#include "stdinc.h" #include "dat.h" #include "fns.h" /* * An IEStream is a sorted list of index entries. */ struct IEStream { Part *part; u64int off; /* read position within part */ u64int n; /* number of valid ientries left to read */ u32int size; /* allocated space in buffer */ u8int *buf; u8int *pos; /* current place in buffer */ u8int *epos; /* end of valid buffer contents */ }; IEStream* initiestream(Part *part, u64int off, u64int clumps, u32int size) { IEStream *ies; /* out of memory? */ ies = MKZ(IEStream); ies->buf = MKN(u8int, size); ies->epos = ies->buf; ies->pos = ies->epos; ies->off = off; ies->n = clumps; ies->size = size; ies->part = part; return ies; } void freeiestream(IEStream *ies) { if(ies == nil) return; free(ies->buf); free(ies); } /* * Return the next IEntry (still packed) in the stream. */ static u8int* peekientry(IEStream *ies) { u32int n, nn; n = ies->epos - ies->pos; if(n < IEntrySize){ memmove(ies->buf, ies->pos, n); ies->epos = &ies->buf[n]; ies->pos = ies->buf; nn = ies->size; if(nn > ies->n * IEntrySize) nn = ies->n * IEntrySize; nn -= n; if(nn == 0) return nil; //fprint(2, "peek %d from %llud into %p\n", nn, ies->off, ies->epos); if(readpart(ies->part, ies->off, ies->epos, nn) < 0){ seterr(EOk, "can't read sorted index entries: %r"); return nil; } ies->epos += nn; ies->off += nn; } return ies->pos; } /* * Compute the bucket number for the given IEntry. * Knows that the score is the first thing in the packed * representation. */ static u32int iebuck(Index *ix, u8int *b, IBucket *ib, IEStream *ies) { USED(ies); USED(ib); return hashbits(b, 32) / ix->div; } /* * Fill ib with the next bucket in the stream. */ u32int buildbucket(Index *ix, IEStream *ies, IBucket *ib, uint maxdata) { IEntry ie1, ie2; u8int *b; u32int buck; buck = TWID32; ib->n = 0; while(ies->n){ b = peekientry(ies); if(b == nil) return TWID32; /* fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, iebuck(ix, b, ib, ies), b); */ if(ib->n == 0) buck = iebuck(ix, b, ib, ies); else{ if(buck != iebuck(ix, b, ib, ies)) break; if(ientrycmp(&ib->data[(ib->n - 1)* IEntrySize], b) == 0){ /* * guess that the larger address is the correct one to use */ unpackientry(&ie1, &ib->data[(ib->n - 1)* IEntrySize]); unpackientry(&ie2, b); seterr(EOk, "duplicate index entry for score=%V type=%d", ie1.score, ie1.ia.type); ib->n--; if(ie1.ia.addr > ie2.ia.addr) memmove(b, &ib->data[ib->n * IEntrySize], IEntrySize); } } if((ib->n+1)*IEntrySize > maxdata){ seterr(EOk, "bucket overflow"); return TWID32; } memmove(&ib->data[ib->n * IEntrySize], b, IEntrySize); ib->n++; ies->n--; ies->pos += IEntrySize; } return buck; }