#include "refer.h" #ifndef D1 #define D1 0 #endif typedef struct Taglist { List drop; /* candidate tag pointers not yet eliminated */ List count; /* count.el[i] = number of query items having drop.el[i] in list */ } Taglist; static Taglist sprev; /* List space re-used to reduce number of alloc req */ static Taglist scurr; /* ... */ static List hh; /* ... query item hash codes */ static long *hfreq; /* frequency reference for hcomp */ #define Reset(mp) ((mp)->drop.n = (mp)->count.n = 0) static int hcomp(int, int); static void hexch(int, int); #if D1 static void dnote(int, Taglist *, char *); #define D(x) x #else #define dnote(a,b,c) #define D(x) #endif List *doquery(Index *ind, FILE *fb, int nitem, char *qitem[]) { Taglist *cur = &scurr, *prev = &sprev; Fpos *hpt; int best, nterm, i, j; Fpos lp, k; hpt = ind->hash.el; hfreq = ind->freq.el; D(fprintf(stderr, "doquery; nitem %d\n", nitem)); growlist(&hh, nitem); hh.n = 0; for (i = 0; i < nitem; i++) hh.el[i] = hash(qitem[i]) % ind->n; if (prfreqs || D1) for (i = 0; i < nitem; i++) fprintf(stderr, "item %s hash %d hfreq %ld\n", qitem[i], hh.el[i], hfreq[hh.el[i]]); /* if possible, sort query: least common hash code first */ if (hfreq) shell(nitem, hcomp, hexch); /* * read the tag list for the item with the shortest tag list */ lp = hpt[hh.el[0]]; D(fprintf(stderr, "first item hash %d lp %ld\n", hh.el[0], lp)); Reset(cur); xseek(fb, lp, 0); while ((lp = getl(fb)) != -1) { applist(&cur->drop, lp); applist(&cur->count, 1); /* ie, lp has appeared once */ } /* * intersect it with the tag list for each remaining item. * since the tag lists are sorted by key, the intersection * looks like a 2-way merge. note that when the co-ordination * level is non-zero (not all items need match), new items can be added * to the current tag list. */ for (nterm = 1; nterm < nitem; nterm++) { int nf; D(fprintf(stderr, "item %d, hash %d\n", nterm, hh.el[nterm])); { Taglist *mp = prev; prev = cur; cur = mp; } /* exchange */ Reset(cur); nf = prev->drop.n; /* scan prev to build new cur */ lp = hpt[hh.el[nterm]]; /* tag list for this query item's hash code */ xseek(fb, lp, 0); D(fprintf(stderr, "item %d hash %d seek to %ld\n", nterm, hh.el[nterm], lp)); for (j=0; (k=getl(fb)) != -1;) { /* each tag reference */ D(fprintf(stderr, "next term finds tag %ld\n", k)); /* skip entries that can't match this one, unless colevel gives them a chance */ for (; j < nf && prev->drop.el[j] < k; j++) if (prev->count.el[j] + colevel > nterm) { applist(&cur->drop, prev->drop.el[j]); applist(&cur->count, prev->count.el[j]); dnote(hh.el[nterm], cur, "keep/co"); } if (colevel == 0 && j >= nf) break; if (j < nf && prev->drop.el[j] == k) { applist(&cur->drop, k); applist(&cur->count, prev->count.el[j] + 1); j++; dnote(hh.el[nterm], cur, "keep"); } else if (colevel >= nterm) { /* previous entries don't include this, */ /* but nitem-colevel entries might match later */ applist(&cur->drop, k); applist(&cur->count, 1); dnote(hh.el[nterm], cur, "add/co"); } } if (colevel > 0) for (; j < nf; j++) if (prev->count.el[j] + colevel > nterm) { applist(&cur->drop, prev->drop.el[j]); applist(&cur->count, prev->count.el[j]); dnote(hh.el[nterm], cur, "copy/co"); } D(fprintf(stderr, "now have %d items\n", cur->drop.n)); } if (colevel > 0) { /* save just the "best" entries in cur */ best = 0; for (j = 0; j < cur->drop.n; j++) if (cur->count.el[j] > best) best = cur->count.el[j]; D(fprintf(stderr, "colevel %d best %d\n", colevel, best)); reached = best; { Taglist *mp = prev; prev = cur; cur = mp; } /* exchange */ Reset(cur); for (j = 0; j < prev->drop.n; j++) if (prev->count.el[j] == best) applist(&cur->drop, prev->drop.el[j]); D(fprintf(stderr, "now have %d items\n", cur->drop.n)); } D(for (j = 0; j < cur->drop.n; j++) fprintf(stderr, ":%ld\n", cur->drop.el[j])); return &cur->drop; } static int hcomp(int n1, int n2) { return hfreq[hh.el[n1]] <= hfreq[hh.el[n2]]; } static void hexch(int n1, int n2) { int t; t = hh.el[n1]; hh.el[n1] = hh.el[n2]; hh.el[n2] = t; } #if D1 static void dnote(int i, Taglist *mp, char *why) { fprintf(stderr, "item %d %s tag %ld (#%d)\n", i, why, mp->drop.el[mp->drop.n-1], mp->count.el[mp->drop.n-1]); } #endif