# include "stdio.h" # include "error.h" #include "inv.h" void main(int argc, char **argv) { /* make inverted file indexes. Reads a stream from mkey which gives record pointer items and keys. Generates a set of files a. NHASH pointers to file b. b. lists of record numbers. c. record pointer items. these files are named xxx.ia, xxx.ib, xxx.ic; where xxx is taken from arg1. If the files exist they are updated. */ FILE * fa, *fb, *fc, *fta, *ftb, *fd; int nhash = 256; int appflg = 1; int keepkey = 0, pipein = 0; char nma[100], nmb[100], nmc[100], com[100], nmd[100]; char tmpa[20], tmpb[20], tmpc[20]; char *remf = NULL; int chatty = 0, docs, hashes, status; long keys; char *sortdir; progname = mkprogname(argv[0]); sortdir = "/tmp"; for (; argc > 1 && argv[1][0] == '-'; argc--, argv++) { switch (argv[1][1]) { case 'h': /* size of hash table */ nhash = atoi (argv[1] + 2); break; case 'n': /* new, don't append */ appflg = 0; break; case 'a': /* append to old file */ appflg = 1; break; case 'v': /* verbose output */ chatty = 1; break; case 'd': /* keep keys on file .id for check on searching */ keepkey = 1; break; case 'p': /* pipe into sort (saves space, costs time)*/ pipein = 1; break; case 'i': /* input is on file, not stdin */ close(0); if (open(argv[2], 0) != 0) err("can't read input %s", argv[2]); if (argv[1][2] == 'u') /* remove */ remf = argv[2]; argc--; argv++; break; case 'T': sortdir = &argv[1][2]; break; } } strcpy(nma, argc >= 2 ? argv[1] : "Index"); strcpy(nmb, nma); strcpy(nmc, nma); strcpy(nmd, nma); strcat(nma, ".ia"); strcat(nmb, ".ib"); strcat(nmc, ".ic"); strcat(nmd, ".id"); sprintf(tmpa, "junk%di", getpid()); if (pipein) { sprintf(com, "/bin/sort -T %s -o %s", sortdir, tmpa); if ((fta = popen(com, "w")) == NULL) err("can't pipe into %s", com); } else fta = efopen(tmpa, "w"); /* use tmp file */ if (appflg) { if ((fb = fopen(nmb, "r")) != NULL) { sprintf(tmpb, "junk%dj", getpid()); ftb = efopen(tmpb, "w"); nhash = recopy(ftb, fb, fopen(nma, "r"), nhash); fclose(ftb); fclose(fb); } else appflg = 0; } fc = efopen(nmc, appflg ? "a" : "w"); fd = keepkey? efopen(nmd, "w") : 0; docs = newkeys(fta, stdin, fc, nhash, fd); fclose(stdin); if (remf != NULL) remove(remf); if (pipein) { status = pclose(fta); if (status != 0) err("pipe into sort failed"); } else { fclose(fta); sprintf(com, "/bin/sort -T %s %s -o %s", sortdir, tmpa, tmpa); system(com); } if (appflg) { sprintf(tmpc, "junk%dk", getpid()); rename(tmpa, tmpc); sprintf(com, "/bin/sort -T %s -m %s %s -o %s", sortdir, tmpb, tmpc, tmpa); system(com); } fta = efopen(tmpa, "r"); fa = efopen(nma, "w"); fb = efopen(nmb, "w"); whash(fta, fa, fb, nhash, &keys, &hashes); fclose(fta); # ifndef D1 remove(tmpa); # endif if (appflg) { remove(tmpb); remove(tmpc); } if (chatty) printf ("%ld key occurrences, %d hashes, %d docs\n", keys, hashes, docs); exit(0); }