/* * Write the dirty icache entries to disk. Random seeks are * so expensive that it makes sense to wait until we have * a lot and then just make a sequential pass over the disk. */ #include "stdinc.h" #include "dat.h" #include "fns.h" static void icachewriteproc(void*); static void icachewritecoord(void*); static IEntry *iesort(IEntry*); int icachesleeptime = 1000; /* milliseconds */ int minicachesleeptime = 0; enum { Bufsize = 8*1024*1024 }; typedef struct IWrite IWrite; struct IWrite { Round round; AState as; }; static IWrite iwrite; void initicachewrite(void) { int i; Index *ix; initround(&iwrite.round, "icache", 120*60*1000); ix = mainindex; for(i=0; insects; i++){ ix->sects[i]->writechan = chancreate(sizeof(ulong), 1); ix->sects[i]->writedonechan = chancreate(sizeof(ulong), 1); vtproc(icachewriteproc, ix->sects[i]); } vtproc(icachewritecoord, nil); vtproc(delaykickroundproc, &iwrite.round); } static u64int ie2diskaddr(Index *ix, ISect *is, IEntry *ie) { u64int bucket, addr; bucket = hashbits(ie->score, 32)/ix->div; addr = is->blockbase + ((bucket - is->start) << is->blocklog); return addr; } static IEntry* nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr, uint *pnbuf) { u64int addr, naddr; uint nbuf; int bsize; IEntry *iefirst, *ie, **l; bsize = 1<blocklog; iefirst = *pie; addr = ie2diskaddr(ix, is, iefirst); nbuf = 0; for(l = &iefirst->nextdirty; (ie = *l) != nil; l = &(*l)->nextdirty){ naddr = ie2diskaddr(ix, is, ie); if(naddr - addr >= Bufsize) break; nbuf = naddr - addr; } nbuf += bsize; *l = nil; *pie = ie; *paddr = addr; *pnbuf = nbuf; return iefirst; } static int icachewritesect(Index *ix, ISect *is, u8int *buf) { int err, i, werr, h, bsize, t; u32int lo, hi; u64int addr, naddr; uint nbuf, off; DBlock *b; IBucket ib; IEntry *ie, *iedirty, **l, *chunk; lo = is->start * ix->div; if(TWID32/ix->div < is->stop) hi = TWID32; else hi = is->stop * ix->div - 1; trace(TraceProc, "icachewritesect enter %ud %ud %llud", lo, hi, iwrite.as.aa); iedirty = icachedirty(lo, hi, iwrite.as.aa); iedirty = iesort(iedirty); bsize = 1 << is->blocklog; err = 0; while(iedirty){ disksched(); while((t = icachesleeptime) == SleepForever){ sleep(1000); disksched(); } if(t < minicachesleeptime) t = minicachesleeptime; if(t > 0) sleep(t); trace(TraceProc, "icachewritesect nextchunk"); chunk = nextchunk(ix, is, &iedirty, &addr, &nbuf); trace(TraceProc, "icachewritesect readpart 0x%llux+0x%ux", addr, nbuf); if(readpart(is->part, addr, buf, nbuf) < 0){ fprint(2, "%s: part %s addr 0x%llux: icachewritesect " "readpart: %r\n", argv0, is->part->name, addr); err = -1; continue; } trace(TraceProc, "icachewritesect updatebuf"); addstat(StatIsectReadBytes, nbuf); addstat(StatIsectRead, 1); for(l=&chunk; (ie=*l)!=nil; l=&ie->nextdirty){ again: naddr = ie2diskaddr(ix, is, ie); off = naddr - addr; if(off+bsize > nbuf){ fprint(2, "%s: whoops! addr=0x%llux nbuf=%ud " "addr+nbuf=0x%llux naddr=0x%llux\n", argv0, addr, nbuf, addr+nbuf, naddr); assert(off+bsize <= nbuf); } unpackibucket(&ib, buf+off, is->bucketmagic); if(okibucket(&ib, is) < 0){ fprint(2, "%s: bad bucket XXX\n", argv0); goto skipit; } trace(TraceProc, "icachewritesect add %V at 0x%llux", ie->score, naddr); h = bucklook(ie->score, ie->ia.type, ib.data, ib.n); if(h & 1){ h ^= 1; packientry(ie, &ib.data[h]); }else if(ib.n < is->buckmax){ memmove(&ib.data[h + IEntrySize], &ib.data[h], ib.n*IEntrySize - h); ib.n++; packientry(ie, &ib.data[h]); }else{ fprint(2, "%s: bucket overflow XXX\n", argv0); skipit: err = -1; *l = ie->nextdirty; ie = *l; if(ie) goto again; else break; } packibucket(&ib, buf+off, is->bucketmagic); } diskaccess(1); trace(TraceProc, "icachewritesect writepart", addr, nbuf); werr = 0; if(writepart(is->part, addr, buf, nbuf) < 0 || flushpart(is->part) < 0) werr = -1; for(i=0; ipart, addr+i, ORDWR, 0)) != nil){ memmove(b->data, buf+i, bsize); putdblock(b); } } if(werr < 0){ fprint(2, "%s: part %s addr 0x%llux: icachewritesect " "writepart: %r\n", argv0, is->part->name, addr); err = -1; continue; } addstat(StatIsectWriteBytes, nbuf); addstat(StatIsectWrite, 1); icacheclean(chunk); } trace(TraceProc, "icachewritesect done"); return err; } static void icachewriteproc(void *v) { int ret; uint bsize; ISect *is; Index *ix; u8int *buf; ix = mainindex; is = v; threadsetname("icachewriteproc:%s", is->part->name); bsize = 1<blocklog; buf = emalloc(Bufsize+bsize); buf = (u8int*)(((uintptr)buf+bsize-1)&~(uintptr)(bsize-1)); for(;;){ trace(TraceProc, "icachewriteproc recv"); recv(is->writechan, 0); trace(TraceWork, "start"); ret = icachewritesect(ix, is, buf); trace(TraceProc, "icachewriteproc send"); trace(TraceWork, "finish"); sendul(is->writedonechan, ret); } } static void icachewritecoord(void *v) { int i, err; Index *ix; AState as; USED(v); threadsetname("icachewritecoord"); ix = mainindex; iwrite.as = icachestate(); for(;;){ trace(TraceProc, "icachewritecoord sleep"); waitforkick(&iwrite.round); trace(TraceWork, "start"); as = icachestate(); if(as.arena==iwrite.as.arena && as.aa==iwrite.as.aa){ /* will not be able to do anything more than last flush - kick disk */ trace(TraceProc, "icachewritecoord kick dcache"); kickdcache(); trace(TraceProc, "icachewritecoord kicked dcache"); goto SkipWork; /* won't do anything; don't bother rewriting bloom filter */ } iwrite.as = as; trace(TraceProc, "icachewritecoord start flush"); if(iwrite.as.arena){ for(i=0; insects; i++) send(ix->sects[i]->writechan, 0); if(ix->bloom) send(ix->bloom->writechan, 0); err = 0; for(i=0; insects; i++) err |= recvul(ix->sects[i]->writedonechan); if(ix->bloom) err |= recvul(ix->bloom->writedonechan); trace(TraceProc, "icachewritecoord donewrite err=%d", err); if(err == 0){ setatailstate(&iwrite.as); } } SkipWork: icacheclean(nil); /* wake up anyone waiting */ trace(TraceWork, "finish"); addstat(StatIcacheFlush, 1); } } void flushicache(void) { trace(TraceProc, "flushicache enter"); kickround(&iwrite.round, 1); trace(TraceProc, "flushicache exit"); } void kickicache(void) { kickround(&iwrite.round, 0); } void delaykickicache(void) { delaykickround(&iwrite.round); } static IEntry* iesort(IEntry *ie) { int cmp; IEntry **l; IEntry *ie1, *ie2, *sorted; if(ie == nil || ie->nextdirty == nil) return ie; /* split the lists */ ie1 = ie; ie2 = ie; if(ie2) ie2 = ie2->nextdirty; if(ie2) ie2 = ie2->nextdirty; while(ie1 && ie2){ ie1 = ie1->nextdirty; ie2 = ie2->nextdirty; if(ie2) ie2 = ie2->nextdirty; } if(ie1){ ie2 = ie1->nextdirty; ie1->nextdirty = nil; } /* sort the lists */ ie1 = iesort(ie); ie2 = iesort(ie2); /* merge the lists */ sorted = nil; l = &sorted; cmp = 0; while(ie1 || ie2){ if(ie1 && ie2) cmp = scorecmp(ie1->score, ie2->score); if(ie1==nil || (ie2 && cmp > 0)){ *l = ie2; l = &ie2->nextdirty; ie2 = ie2->nextdirty; }else{ *l = ie1; l = &ie1->nextdirty; ie1 = ie1->nextdirty; } } *l = nil; return sorted; }