#include #include #include #include #include "msgdb.h" void usage(void) { fprint(2, "usage: upas/msgclass [-a] [-d name dbfile]... [-l lockfile] [-m mul] [-t thresh] [tokenfile ...]\n"); exits("usage"); } enum { MAXBEST = 32, MAXLEN = 64, MAXTAB = 256, }; typedef struct Ndb Ndb; struct Ndb { char *name; char *file; Msgdb *db; double p; long nmsg; }; typedef struct Word Word; struct Word { char s[MAXLEN]; int count[MAXTAB]; double p[MAXTAB]; double mp; int mi; /* w.p[w.mi] = w.mp */ int nmsg; }; Ndb db[MAXTAB]; int ndb; int add; int mul; Msgdb *indb; Word best[MAXBEST]; int mbest = 15; int nbest; void process(Biobuf*, char*); void lockfile(char*); void noteword(Word *w, char *s) { int i; for(i=nbest-1; i>=0; i--) if(w->mp < best[i].mp) break; i++; if(i >= mbest) return; if(nbest == mbest) nbest--; if(i < nbest) memmove(&best[i+1], &best[i], (nbest-i)*sizeof(best[0])); best[i] = *w; strecpy(best[i].s, best[i].s+MAXLEN, s); nbest++; } void main(int argc, char **argv) { int i, bad, m, tot, nn, j; Biobuf bin, *b, bout; char *s, *lf; double totp, p, thresh; long n; Word w; lf = nil; thresh = 0; ARGBEGIN{ case 'a': add = 1; break; case 'd': if(ndb >= MAXTAB) sysfatal("too many db classes"); db[ndb].name = EARGF(usage()); db[ndb].file = EARGF(usage()); ndb++; break; case 'l': lf = EARGF(usage()); break; case 'm': mul = atoi(EARGF(usage())); break; case 't': thresh = atof(EARGF(usage())); break; default: usage(); }ARGEND if(ndb == 0){ fprint(2, "must have at least one -d option\n"); usage(); } indb = mdopen(nil, 1); if(argc == 0){ Binit(&bin, 0, OREAD); process(&bin, ""); Bterm(&bin); }else{ bad = 0; for(i=0; i= 0){ tot = 0; totp = 0.0; for(i=0; i= 1.0) p = 1.0; w.p[i] = p; totp += p; } //fprint(2, "%s tot %d totp %g\n", s, tot, totp); if(tot < 2) continue; w.mp = 0.0; for(i=0; i 0.999) p = 0.999; if(p > w.mp){ w.mp = p; w.mi = i; } w.p[i] = p; } noteword(&w, s); } /* compute conditional probabilities of message classes using 15 most specific */ totp = 0.0; for(i=0; i db[m].p) m = i; Binit(&bout, 1, OWRITE); if(db[m].p < thresh) m = -1; if(m >= 0) Bprint(&bout, "%s", db[m].name); else Bprint(&bout, "inconclusive"); for(j=0; j= 0 && add){ mdenum(indb); while(mdnext(indb, &s, &n) >= 0) mdput(db[m].db, s, mdget(db[m].db, s)+n*mul); mdclose(db[m].db); } exits(nil); } void process(Biobuf *b, char*) { char *s; char *p; long n; while((s = Brdline(b, '\n')) != nil){ s[Blinelen(b)-1] = 0; if((p = strrchr(s, ' ')) != nil){ *p++ = 0; n = atoi(p); }else n = 1; mdput(indb, s, mdget(indb, s)+n); } } int tpid; void killtickle(void) { postnote(PNPROC, tpid, "die"); } void lockfile(char *s) { int fd, t, w; char err[ERRMAX]; if(s == nil) return; w = 50; t = 0; for(;;){ fd = open(s, OREAD); if(fd >= 0) break; rerrstr(err, sizeof err); if(strstr(err, "file is locked")==nil && strstr(err, "exclusive lock")==nil)) break; sleep(w); t += w; if(w < 1000) w = (w*3)/2; if(t > 120*1000) break; } if(fd < 0) sysfatal("could not lock %s", s); switch(tpid = fork()){ case -1: sysfatal("fork: %r"); case 0: for(;;){ sleep(30*1000); free(dirfstat(fd)); } _exits(nil); default: break; } close(fd); atexit(killtickle); }