about summary refs log tree commit homepage
path: root/lib/PublicInbox/xh_cidx.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox/xh_cidx.h')
-rw-r--r--lib/PublicInbox/xh_cidx.h48
1 files changed, 43 insertions, 5 deletions
diff --git a/lib/PublicInbox/xh_cidx.h b/lib/PublicInbox/xh_cidx.h
index 2803b3a4..311ca05f 100644
--- a/lib/PublicInbox/xh_cidx.h
+++ b/lib/PublicInbox/xh_cidx.h
@@ -3,16 +3,49 @@
 // This file is only intended to be included by xap_helper.h
 // it implements pieces used by CodeSearchIdx.pm
 
-static void dump_ibx_term(struct req *req, const char *pfx,
+static void term_length_extract(struct req *req)
+{
+        req->lenv = (size_t *)calloc(req->pfxc, sizeof(size_t));
+        if (!req->lenv)
+                EABORT("lenv = calloc(%d %zu)", req->pfxc, sizeof(size_t));
+        for (int i = 0; i < req->pfxc; i++) {
+                char *pfx = req->pfxv[i];
+                // extract trailing digits as length:
+                // $len = s/([0-9]+)\z// ? ($1+0) : 0
+                for (size_t j = 0; pfx[j]; j++) {
+                        if (pfx[j] < '0' || pfx[j] > '9')
+                                continue;
+                        if (j == 0) {
+                                warnx("W: `%s' not a valid prefix", pfx);
+                                continue;
+                        }
+                        char *end;
+                        unsigned long long tmp = strtoull(pfx + j, &end, 10);
+                        if (*end || tmp >= (unsigned long long)SIZE_MAX) {
+                                warnx("W: `%s' not recognized", pfx);
+                        } else {
+                                req->lenv[i] = (size_t)tmp;
+                                pfx[j] = 0;
+                                break;
+                        }
+                }
+        }
+}
+
+static void dump_ibx_term(struct req *req, int p,
                         Xapian::Document *doc, const char *ibx_id)
 {
         Xapian::TermIterator cur = doc->termlist_begin();
         Xapian::TermIterator end = doc->termlist_end();
+        const char *pfx = req->pfxv[p];
         size_t pfx_len = strlen(pfx);
+        size_t term_len = req->lenv[p];
 
         for (cur.skip_to(pfx); cur != end; cur++) {
                 std::string tn = *cur;
                 if (!starts_with(&tn, pfx, pfx_len)) break;
+                if (term_len > 0 && (tn.length() - pfx_len) != term_len)
+                        continue;
                 fprintf(req->fp[0], "%s %s\n", tn.c_str() + pfx_len, ibx_id);
                 ++req->nr_out;
         }
@@ -24,7 +57,7 @@ static enum exc_iter dump_ibx_iter(struct req *req, const char *ibx_id,
         try {
                 Xapian::Document doc = i->get_document();
                 for (int p = 0; p < req->pfxc; p++)
-                        dump_ibx_term(req, req->pfxv[p], &doc, ibx_id);
+                        dump_ibx_term(req, p, &doc, ibx_id);
         } catch (const Xapian::DatabaseModifiedError & e) {
                 req->srch->db->reopen();
                 return ITER_RETRY;
@@ -46,6 +79,7 @@ static bool cmd_dump_ibx(struct req *req)
                 EABORT("setlinebuf(fp[0])"); // WTF?
         req->asc = true;
         req->sort_col = -1;
+        term_length_extract(req);
         Xapian::MSet mset = mail_mset(req, req->argv[optind + 1]);
 
         // @UNIQ_FOLD in CodeSearchIdx.pm can handle duplicate lines fine
@@ -110,18 +144,22 @@ static bool root2offs_str(struct fbuf *root_offs, Xapian::Document *doc)
 
 // writes term values matching @pfx for a given @doc, ending the line
 // with the contents of @root_offs
-static void dump_roots_term(struct req *req, const char *pfx,
+static void dump_roots_term(struct req *req, int p,
                                 struct dump_roots_tmp *drt,
                                 struct fbuf *root_offs,
                                 Xapian::Document *doc)
 {
         Xapian::TermIterator cur = doc->termlist_begin();
         Xapian::TermIterator end = doc->termlist_end();
+        const char *pfx = req->pfxv[p];
         size_t pfx_len = strlen(pfx);
+        size_t term_len = req->lenv[p];
 
         for (cur.skip_to(pfx); cur != end; cur++) {
                 std::string tn = *cur;
                 if (!starts_with(&tn, pfx, pfx_len)) break;
+                if (term_len > 0 && (tn.length() - pfx_len) != term_len)
+                        continue;
                 fputs(tn.c_str() + pfx_len, drt->wbuf.fp);
                 fwrite(root_offs->ptr, root_offs->len, 1, drt->wbuf.fp);
                 ++req->nr_out;
@@ -163,8 +201,7 @@ static enum exc_iter dump_roots_iter(struct req *req,
                 if (!root2offs_str(&root_offs, &doc))
                         return ITER_ABORT; // bad request, abort
                 for (int p = 0; p < req->pfxc; p++)
-                        dump_roots_term(req, req->pfxv[p], drt,
-                                        &root_offs, &doc);
+                        dump_roots_term(req, p, drt, &root_offs, &doc);
         } catch (const Xapian::DatabaseModifiedError & e) {
                 req->srch->db->reopen();
                 return ITER_RETRY;
@@ -217,6 +254,7 @@ static bool cmd_dump_roots(struct req *req)
         req->asc = true;
         req->sort_col = -1;
         Xapian::MSet mset = commit_mset(req, req->argv[optind + 1]);
+        term_length_extract(req);
 
         fbuf_init(&drt.wbuf);