static char copyright[] = "@(#)Copyright (c) 1986, Greg McGary"; static char sccsid[] = "@(#)mkid.c 1.4 86/11/06"; /* #define register */ #include "bool.h" #ifdef UNIX #include #include #endif #include #include "string.h" #include #include "id.h" #include "bitops.h" #include #include "extern.h" #ifdef AMIGA #include #include #endif int idnHashCmp(); int idnQsortCmp(); int round2(); struct idname *newIdName(); void extractId(); void fileIdArgs(); void initHashTable(); void oldIdArgs(); void rehash(); void updateID(); void writeID(); long NameCount; /* Count of names in database */ long NumberCount; /* Count of numbers in database */ long StringCount; /* Count of strings in database */ long SoloCount; /* Count of identifiers that occur only once */ long HashSize; /* Total Slots in hash table */ long HashMaxLoad; /* Maximum loading of hash table */ long HashFill; /* Number of keys inserted in table */ long HashProbes; /* Total number of probes */ long HashSearches; /* Total number of searches */ struct idname **HashTable; /* Vector of idname pointers */ bool Verbose = FALSE; int ArgsCount = 0; /* Count of args to save */ int ScanCount = 0; /* Count of files to scan */ int PathCount = 0; /* Count of files covered in database */ int BitArraySize; /* Size of bit array slice (per name) */ char *MyName; static void usage() { fprintf(stderr, "Usage: %s [-f] [-s] [-r] [(+|-)l[]] [-v] [(+|-)S] [-a] [-] [-u] [files...]\n", MyName); exit(1); } main(argc, argv) int argc; char **argv; { char *arg; int op; FILE *argFILE = NULL; char *idFile = IDFILE; char *rcsDir = NULL; char *sccsDir = NULL; struct idarg *idArgs, *idArgHead; bool keepLang = FALSE; int argsFrom = 0; #define AF_CMDLINE 0x1 /* file args came on command line */ #define AF_FILE 0x2 /* file args came from a file (-f) */ #define AF_IDFILE 0x4 /* file args came from an old ID file (-u) */ #define AF_QUERY 0x8 /* no file args necessary: usage query */ MyName = basename(GETARG(argc, argv)); #ifdef ERRLINEBUF setlinebuf(stderr); #endif idArgs = idArgHead = NEW(struct idarg); /* Process some arguments, and snarf-up some others for processing later. */ while (argc) { arg = GETARG(argc, argv); if (*arg != '-' && *arg != '+') { argsFrom |= AF_CMDLINE; idArgs->ida_arg = arg; idArgs->ida_flags = IDA_SCAN|IDA_PATH; idArgs->ida_index = postIncr(&PathCount); ScanCount++; idArgs = (idArgs->ida_next = NEW(struct idarg)); continue; } op = *arg++; switch (*arg++) { case 'u': argsFrom |= AF_IDFILE; oldIdArgs(idFile, &idArgs); break; case '\0': argsFrom |= AF_FILE; fileIdArgs(stdin, &idArgs); break; case 'a': if ((argFILE = fopen(arg, "r")) == NULL) { filerr("open", arg); exit(1); } argsFrom |= AF_FILE; fileIdArgs(argFILE, &idArgs); fclose(argFILE); /* added REJ */ break; case 'f': idFile = arg; break; case 'v': Verbose = TRUE; break; case 'S': if (strchr(&arg[-2], '?')) { setScanArgs(op, arg); argsFrom |= AF_QUERY; } /*FALLTHROUGH*/ case 'l': case 's': case 'r': idArgs->ida_arg = &arg[-2]; idArgs->ida_index = -1; idArgs->ida_flags = IDA_ARG; idArgs = (idArgs->ida_next = NEW(struct idarg)); ArgsCount++; break; default: usage(); } } if (argsFrom & AF_QUERY) exit(0); /* File args should only come from one place. Ding the user if arguments came from multiple places, or if none were supplied at all. */ switch (argsFrom) { case AF_CMDLINE: case AF_FILE: case AF_IDFILE: if (PathCount > 0) break; /*FALLTHROUGH*/ case 0: fprintf(stderr, "%s: Use -u, -f, or cmd-line for file args!\n", MyName); usage(); default: fprintf(stderr, "%s: Use only one of: -u, -f, or cmd-line for file args!\n", MyName); usage(); } if (ScanCount == 0) exit(0); BitArraySize = (PathCount + 7) >> 3; initHashTable(ScanCount); if (access(idFile, 06) < 0 && (errno != ENOENT || access(dirname(idFile), 06) < 0)) { filerr("modify", idFile); exit(1); } for (idArgs = idArgHead; idArgs->ida_next; idArgs = idArgs->ida_next) { char *(*scanner)(); FILE *srcFILE; char *arg, *lang = NULL, *suff; /* added = NULL - it assumed locals were zeroed! REJ */ arg = idArgs->ida_arg; if (idArgs->ida_flags & IDA_ARG) { op = *arg++; switch (*arg++) { case 'l': if (*arg == '\0') { keepLang = FALSE; lang = NULL; break; } if (op == '+') keepLang = TRUE; lang = arg; break; case 's': sccsDir = arg; break; case 'r': rcsDir = arg; break; case 'S': setScanArgs(op, strsav(arg)); break; default: usage(); } continue; } if (!(idArgs->ida_flags & IDA_SCAN)) goto skip; if (lang == NULL) { if ((suff = strrchr(arg, '.')) == NULL) suff = ""; if ((lang = getLanguage(suff)) == NULL) { fprintf(stderr, "%s: No language assigned to suffix: `%s'\n", MyName, suff); goto skip; } } if ((scanner = getScanner(lang)) == NULL) { fprintf(stderr, "%s: No scanner for language: `%s'\n", MyName, lang); goto skip; } if ((srcFILE = openSrcFILE(arg, sccsDir, rcsDir)) == NULL) goto skip; if (Verbose) fprintf(stderr, "%s: %s\n", lang, arg); extractId(scanner, srcFILE, idArgs->ida_index); fclose(srcFILE); skip: if (!keepLang) lang = NULL; } if (HashFill == 0) exit(0); if (Verbose) fprintf(stderr, "Compressing Hash Table...\n"); hashCompress(HashTable, HashSize); if (Verbose) fprintf(stderr, "Sorting Hash Table...\n"); qsort(HashTable, HashFill, sizeof(struct idname *), idnQsortCmp); if (argsFrom == AF_IDFILE) { if (Verbose) fprintf(stderr, "Merging Tables...\n"); updateID(idFile, idArgHead); } if (Verbose) fprintf(stderr, "Writing `%s'...\n", idFile); writeID(idFile, idArgHead); if (Verbose) { float loadFactor = (float)HashFill / (float)HashSize; float aveProbes = (float)HashProbes / (float)HashSearches; float aveOccur = (float)HashSearches / (float)HashFill; fprintf(stderr, "Names: %ld, ", NameCount); fprintf(stderr, "Numbers: %ld, ", NumberCount); fprintf(stderr, "Strings: %ld, ", StringCount); fprintf(stderr, "Solo: %ld, ", SoloCount); fprintf(stderr, "Total: %ld\n", HashFill); fprintf(stderr, "Occurances: %.2f, ", aveOccur); fprintf(stderr, "Load: %.2f, ", loadFactor); fprintf(stderr, "Probes: %.2f\n", aveProbes); } exit(0); } void extractId(getId, srcFILE, index) register char *(*getId)(); register FILE *srcFILE; int index; { register struct idname **slot; register char *key; int flags; while ((key = (*getId)(srcFILE, &flags)) != NULL) { slot = (struct idname **)hashSearch(key, HashTable, HashSize, sizeof(struct idname *), h1str, h2str, idnHashCmp, &HashProbes); HashSearches++; if (*slot != NULL) { (*slot)->idn_flags |= flags; BITSET((*slot)->idn_bitv, index); continue; } *slot = newIdName(key); (*slot)->idn_flags = IDN_SOLO|flags; BITSET((*slot)->idn_bitv, index); if (HashFill++ >= HashMaxLoad) rehash(); } } void writeID(idFile, idArgs) char *idFile; struct idarg *idArgs; { register struct idname **idnp; register struct idname *idn; register int i; char *vecBuf; FILE *idFILE; int count; int lasti; long before, after; int length, longest; struct idhead idh; /* note: for -u to work on non-unix, updateID MUST close idFILE so open */ /* can succeed REJ */ if ((idFILE = fopen(idFile, "w+")) == NULL) { filerr("create", idFile); exit(1); } fseek(idFILE, (long)sizeof(struct idhead), 0); /* write out the list of pathnames */ idh.idh_argo = ftell(idFILE); for (i = lasti = 0; idArgs->ida_next; idArgs = idArgs->ida_next) { if (idArgs->ida_index > 0) while (++lasti < idArgs->ida_index) i++, putc('\0', idFILE); fputs(idArgs->ida_arg, idFILE); i++, putc('\0', idFILE); } idh.idh_argc = i; idh.idh_pthc = PathCount; /* write out the list of identifiers */ i = 1; if (idh.idh_pthc >= 0x000000ff) i++; if (idh.idh_pthc >= 0x0000ffff) i++; if (idh.idh_pthc >= 0x00ffffff) i++; idh.idh_vecc = i; vecBuf = malloc((idh.idh_pthc + 1) * idh.idh_vecc); putc('\377', idFILE); before = idh.idh_namo = ftell(idFILE); longest = 0; for (idnp = HashTable, i = 0; i < HashFill; i++, idnp++) { idn = *idnp; if (idn->idn_name[0] == '\0') { HashFill--; i--; continue; } if (idn->idn_flags & IDN_SOLO) SoloCount++; if (idn->idn_flags & IDN_NUMBER) NumberCount++; if (idn->idn_flags & IDN_NAME) NameCount++; if (idn->idn_flags & IDN_STRING) StringCount++; putc((*idnp)->idn_flags, idFILE); fputs(idn->idn_name, idFILE); putc('\0', idFILE); count = bitsToVec(vecBuf, (*idnp)->idn_bitv, idh.idh_pthc, idh.idh_vecc); fwrite(vecBuf, idh.idh_vecc, count, idFILE); putc('\377', idFILE); after = ftell(idFILE); if ((length = (after - before)) > longest) longest = length; before = after; } idh.idh_namc = i; putc('\377', idFILE); idh.idh_endo = ftell(idFILE); idh.idh_bsiz = longest; /* write out the header */ strncpy(idh.idh_magic, IDH_MAGIC, sizeof(idh.idh_magic)); idh.idh_vers = IDH_VERS; fseek(idFILE, 0L, 0); fwrite((char *) &idh, sizeof(struct idhead), 1, idFILE); fclose(idFILE); } /* Build an idarg vector from pathnames contained in an existing id file. Only include pathnames for files whose modification time is later than that of the id file itself. */ void oldIdArgs(idFile, idArgsP) char *idFile; struct idarg **idArgsP; { #ifdef UNIX struct stat statBuf; #endif struct idhead idh; FILE *idFILE; register int i; register char *strings; time_t idModTime; #ifdef AMIGA time_t tmptime; #endif if ((idFILE = fopen(idFile, "r")) == NULL) { filerr("open", idFile); usage(); } /* * Open the id file, get its mod-time, and read its header. */ #ifdef UNIX if (fstat(fileno(idFILE), &statBuf) < 0) { filerr("stat", idFile); usage(); } idModTime = statBuf.st_mtime; #endif #ifdef AMIGA if ((idModTime = getft(idFile)) == -1) { filerr("getft(stat)",idFile); usage(); } #endif fread((char *) &idh, sizeof(struct idhead), 1, idFILE); if (!strnequ(idh.idh_magic, IDH_MAGIC, sizeof(idh.idh_magic))) { fprintf(stderr, "%s: Not an id file: `%s'\n", MyName, idFile); exit(1); } if (idh.idh_vers != IDH_VERS) { fprintf(stderr, "%s: ID version mismatch (%ld,%ld)\n", MyName, idh.idh_vers, IDH_VERS); exit(1); } /* * Read in the id pathnames, compare their mod-times with * the id file, and incorporate the pathnames of recently modified * files in the idarg vector. Also, construct a mask of * bit array positions we want to turn off when we build the * initial hash-table. */ fseek(idFILE, idh.idh_argo, 0); strings = malloc(i = idh.idh_namo - idh.idh_argo); fread(strings, i, 1, idFILE); ScanCount = 0; for (i = 0; i < idh.idh_argc; i++) { (*idArgsP)->ida_arg = strings; if (*strings == '+' || *strings == '-') { (*idArgsP)->ida_flags = IDA_ARG; (*idArgsP)->ida_index = -1; } else { (*idArgsP)->ida_flags = IDA_PATH; (*idArgsP)->ida_index = postIncr(&PathCount); #ifdef UNIX if (stat(strings, &statBuf) < 0) { filerr("stat", strings); } else if (statBuf.st_mtime >= idModTime) { #endif #ifdef AMIGA if ((tmptime = getft(strings)) == -1) filerr("getft(stat)",strings); else if (tmptime >= idModTime) { #endif (*idArgsP)->ida_flags |= IDA_SCAN; ScanCount++; } } (*idArgsP) = ((*idArgsP)->ida_next = NEW(struct idarg)); while (*strings++) ; } fclose(idFILE); if (ScanCount == 0) { exit(0); } } void updateID(idFile, idArgs) char *idFile; struct idarg *idArgs; { struct idname *idn; struct idhead idh; register char *bitArray; char *entry0; register int i; FILE *idFILE; int cmp, count, size; char *bitsOff; struct idname **newTable, **mergeTable; struct idname **t1, **t2, **tm; if ((idFILE = fopen(idFile, "r")) == NULL) filerr("open", idFile); fread((char *) &idh, sizeof(struct idhead), 1, idFILE); entry0 = malloc(idh.idh_bsiz); bitsOff = malloc(BitArraySize); bzero(bitsOff, BitArraySize); for (i = 0; idArgs->ida_next; idArgs = idArgs->ida_next) if (idArgs->ida_flags & IDA_SCAN) BITSET(bitsOff, idArgs->ida_index); bitArray = malloc(BitArraySize); bzero(bitArray, BitArraySize); t2 = newTable = (struct idname **)malloc((idh.idh_namc + 1) * sizeof(struct idname *)); #ifdef UNIX fseek(idFILE, idh.idh_namo, 0); #endif #ifdef AMIGA /* paranoia */ if (fseek(idFILE, idh.idh_namo, 0) == -1) filerr("fseek",idFILE); #endif count = 0; for (i = 0; i < idh.idh_namc; i++) { size = 1 + fgets0(entry0, idh.idh_bsiz, idFILE); getsFF(&entry0[size], idFILE); vecToBits(bitArray, &entry0[size], idh.idh_vecc); bitsclr(bitArray, bitsOff, BitArraySize); if (!bitsany(bitArray, BitArraySize)) continue; *t2 = newIdName(ID_STRING(entry0)); bitsset((*t2)->idn_bitv, bitArray, BitArraySize); (*t2)->idn_flags = ID_FLAGS(entry0); bzero(bitArray, BitArraySize); t2++; count++; } *t2 = NULL; #ifdef AMIGA /* and probably others! */ if (fclose(idFILE) == -1) filerr("close",idFILE); #endif t1 = HashTable; t2 = newTable; tm = mergeTable = (struct idname **)calloc(HashFill + count + 1, sizeof(struct idname *)); while (*t1 && *t2) { cmp = strcmp((*t1)->idn_name, (*t2)->idn_name); if (cmp < 0) *tm++ = *t1++; else if (cmp > 0) *tm++ = *t2++; else { (*t1)->idn_flags |= (*t2)->idn_flags; (*t1)->idn_flags &= ~IDN_SOLO; bitsset((*t1)->idn_bitv, (*t2)->idn_bitv, BitArraySize); *tm++ = *t1; t1++, t2++; } } while (*t1) *tm++ = *t1++; while (*t2) *tm++ = *t2++; *tm = NULL; HashTable = mergeTable; HashFill = tm - mergeTable; } /* Cons up a list of idArgs as supplied in a file. */ void fileIdArgs(argFILE, idArgsP) FILE *argFILE; struct idarg **idArgsP; { /* int fileCount; not really used REJ */ char buf[BUFSIZ]; char *arg; /* fileCount = 0; see? REJ */ while (fgets(buf, sizeof(buf), argFILE)) { if (strlen(buf) <= 1) continue; /* don't save null args REJ */ (*idArgsP)->ida_arg = arg = strnsav(buf, strlen(buf)-1); if (*arg == '+' || *arg == '-') { (*idArgsP)->ida_flags = IDA_ARG; (*idArgsP)->ida_index = -1; } else { (*idArgsP)->ida_flags = IDA_SCAN|IDA_PATH; (*idArgsP)->ida_index = postIncr(&PathCount); ScanCount++; } (*idArgsP) = ((*idArgsP)->ida_next = NEW(struct idarg)); } } void initHashTable(pathCount) int pathCount; { if ((HashSize = round2((pathCount << 6) + 511)) > 0x8000) HashSize = 0x8000; HashMaxLoad = HashSize - (HashSize >> 4); /* about 94% */ HashTable = (struct idname **)calloc(HashSize, sizeof(struct idname *)); } /* Double the size of the hash table in the event of overflow... */ void rehash() { long oldHashSize = HashSize; struct idname **oldHashTable = HashTable; register struct idname **htp; register struct idname **slot; HashSize *= 2; if (Verbose) fprintf(stderr, "Rehashing... (doubling size to %ld)\n", HashSize); HashMaxLoad = HashSize - (HashSize >> 4); HashTable = (struct idname **)calloc(HashSize, sizeof(struct idname *)); HashFill = 0; for (htp = oldHashTable; htp < &oldHashTable[oldHashSize]; htp++) { if (*htp == NULL) continue; slot = (struct idname **)hashSearch((*htp)->idn_name, (char *)HashTable, HashSize, sizeof(struct idname *), h1str, h2str, idnHashCmp, &HashProbes); if (*slot) { fprintf(stderr, "%s: Duplicate hash entry!\n"); exit(1); } *slot = *htp; HashSearches++; HashFill++; } free(oldHashTable); } /* Round a given number up to the nearest power of 2. */ int round2(rough) int rough; { int round; round = 1; while (rough) { round <<= 1; rough >>= 1; } return round; } /* `compar' function for hashSearch() */ int idnHashCmp(key, idn) char *key; struct idname **idn; { int collate; if (*idn == NULL) return 0; if ((collate = strcmp(key, (*idn)->idn_name)) == 0) (*idn)->idn_flags &= ~IDN_SOLO; /* we found another occurance */ return collate; } /* `compar' function for qsort(). */ int idnQsortCmp(idn1, idn2) struct idname **idn1; struct idname **idn2; { if (*idn1 == *idn2) return 0; if (*idn1 == NULL) return 1; if (*idn2 == NULL) return -1; return strcmp((*idn1)->idn_name, (*idn2)->idn_name); } /* Allocate a new idname struct and fill in the name field. We allocate memory in large chunks to avoid frequent calls to malloc() which is a major pig. */ struct idname * newIdName(name) char *name; { register struct idname *idn; register char *allocp; register int allocsiz; static char *allocBuf = NULL; static char *allocEnd = NULL; #define ALLOCSIZ (8*1024) allocsiz = sizeof(struct idname) + strlen(name) + 1 + BitArraySize; allocsiz += (sizeof(long) - 1); allocsiz &= ~(sizeof(long) - 1); allocp = allocBuf; allocBuf += allocsiz; if (allocBuf > allocEnd) { allocBuf = malloc(ALLOCSIZ); allocEnd = &allocBuf[ALLOCSIZ]; allocp = allocBuf; allocBuf += allocsiz; } idn = (struct idname *)allocp; allocp += sizeof(struct idname); idn->idn_bitv = allocp; for (allocsiz = BitArraySize; allocsiz--; allocp++) *allocp = '\0'; idn->idn_name = strcpy(allocp, name); return idn; } int postIncr(ip) int *ip; { register int i; int save; save = *ip; i = save + 1; if ((i & 0x00ff) == 0x00ff) i++; if ((i & 0xff00) == 0xff00) /* This isn't bloody likely */ i += 0x100; *ip = i; return save; } /* Move all non-NULL table entries to the front of the table. return the number of non-NULL elements in the table. */ int hashCompress(table, size) char **table; int size; { register char **front; register char **back; front = &table[-1]; back = &table[size]; for (;;) { while (*--back == NULL) ; if (back < front) break; while (*++front != NULL) ; if (back < front) break; *front = *back; } return (back - table + 1); }