/* -*- Mode: Text -*- */ /* * buildhash.c - make a hash table for ispell * * Pace Willisson, 1983 */ #include #ifdef unix # include # include #else # ifdef AMIGA # include "types.h" # include "stat.h" # else # include # include # endif #endif #include "ispell.h" #define DICT "dict.191" #define COUNT "count.191" #define STATS "stats.191" #define NSTAT 100 struct stat dstat, cstat; int numwords, hashsize; char *malloc(); struct dent *hashtbl; main () { FILE *countf; FILE *statf; int stats[NSTAT]; int i; if (stat (DICT, &dstat) < 0) { fprintf (stderr, "No dictionary (%s)\n", DICT); exit (1); } if (stat (COUNT, &cstat) < 0 || dstat.st_mtime > cstat.st_mtime) newcount (); if ((countf = fopen (COUNT, "r")) == NULL) { fprintf (stderr, "No count file\n"); exit (1); } numwords = 0; fscanf (countf, "%d", &numwords); fclose (countf); if (numwords == 0) { fprintf (stderr, "Bad count file\n"); exit (1); } hashsize = numwords; readdict (); if ((statf = fopen (STATS, "w")) == NULL) { fprintf (stderr, "Can't create %s\n", STATS); exit (1); } for (i = 0; i < NSTAT; i++) stats[i] = 0; for (i = 0; i < hashsize; i++) { struct dent *dp; int j; if (hashtbl[i].used == 0) { stats[0]++; } else { for (j = 1, dp = &hashtbl[i]; dp->next != NULL; j++, dp = dp->next) ; if (j >= NSTAT) j = NSTAT - 1; stats[j]++; } } for (i = 0; i < NSTAT; i++) fprintf (statf, "%d: %d\n", i, stats[i]); fclose (statf); filltable (); output (); } output () { FILE *outfile; struct hashheader hashheader; int strptr, n, i; if ((outfile = fopen ("ispell.hash", "w")) == NULL) { fprintf (stderr, "can't create ispell.hash\n"); return; } hashheader.magic = MAGIC; hashheader.stringsize = 0; hashheader.tblsize = hashsize; fwrite (&hashheader, sizeof hashheader, 1, outfile); strptr = 0; for (i = 0; i < hashsize; i++) { n = strlen (hashtbl[i].word) + 1; fwrite (hashtbl[i].word, n, 1, outfile); hashtbl[i].word = (char *)strptr; strptr += n; } for (i = 0; i < hashsize; i++) { if (hashtbl[i].next != 0) { int x; x = hashtbl[i].next - hashtbl; hashtbl[i].next = (struct dent *)x; } else { hashtbl[i].next = (struct dent *)-1; } } fwrite (hashtbl, sizeof (struct dent), hashsize, outfile); hashheader.stringsize = strptr; rewind (outfile); fwrite (&hashheader, sizeof hashheader, 1, outfile); fclose (outfile); } filltable () { struct dent *freepointer, *nextword, *dp; int i; for (freepointer = hashtbl; freepointer->used; freepointer++) ; for (nextword = hashtbl, i = numwords; i != 0; nextword++, i--) { if (nextword->used == 0) { continue; } if (nextword->next == NULL) { continue; } if (nextword->next >= hashtbl && nextword->next < hashtbl + hashsize) { continue; } dp = nextword; while (dp->next) { if (freepointer > hashtbl + hashsize) { fprintf (stderr, "table overflow\n"); getchar (); break; } *freepointer = *(dp->next); dp->next = freepointer; dp = freepointer; while (freepointer->used) freepointer++; } } } readdict () { struct dent d; char lbuf[100]; FILE *dictf; int i; int h; char *p; if ((dictf = fopen (DICT, "r")) == NULL) { fprintf (stderr, "Can't open dictionary\n"); exit (1); } hashtbl = (struct dent *) calloc (numwords, sizeof (struct dent)); if (hashtbl == NULL) { fprintf (stderr, "couldn't allocate hash table\n"); exit (1); } i = 0; while (fgets (lbuf, sizeof lbuf, dictf) != NULL) { if (i % 1000 == 0) { printf ("%d ", i); fflush (stdout); } i++; p = &lbuf [ strlen (lbuf) - 1 ]; if (*p == '\n') *p = 0; if (makedent (lbuf, &d) < 0) continue; d.word = malloc (strlen (lbuf) + 1); if (d.word == NULL) { fprintf (stderr, "couldn't allocate space for word %s\n", lbuf); exit (1); } strcpy (d.word, lbuf); h = hash (lbuf, strlen (lbuf), hashsize); if (hashtbl[h].used == 0) { hashtbl[h] = d; } else { struct dent *dp; dp = (struct dent *) malloc (sizeof (struct dent)); if (dp == NULL) { fprintf (stderr, "couldn't allocate space for collision\n"); exit (1); } *dp = d; dp->next = hashtbl[h].next; hashtbl[h].next = dp; } } printf ("\n"); } /* * fill in the flags in d, and put a null after the word in s */ makedent (lbuf, d) char *lbuf; struct dent *d; { char *p, *index(); d->next = NULL; d->used = 1; d->v_flag = 0; d->n_flag = 0; d->x_flag = 0; d->h_flag = 0; d->y_flag = 0; d->g_flag = 0; d->j_flag = 0; d->d_flag = 0; d->t_flag = 0; d->r_flag = 0; d->z_flag = 0; d->s_flag = 0; d->p_flag = 0; d->m_flag = 0; p = index (lbuf, '/'); if (p != NULL) *p = 0; if (strlen (lbuf) > WORDLEN - 1) { printf ("%s: word too big\n"); return (-1); } if (p == NULL) return (0); p++; while (*p != NULL) { switch (*p) { case 'V': d->v_flag = 1; break; case 'N': d->n_flag = 1; break; case 'X': d->x_flag = 1; break; case 'H': d->h_flag = 1; break; case 'Y': d->y_flag = 1; break; case 'G': d->g_flag = 1; break; case 'J': d->j_flag = 1; break; case 'D': d->d_flag = 1; break; case 'T': d->t_flag = 1; break; case 'R': d->r_flag = 1; break; case 'Z': d->z_flag = 1; break; case 'S': d->s_flag = 1; break; case 'P': d->p_flag = 1; break; case 'M': d->m_flag = 1; break; case 0: fprintf (stderr, "no key word %s\n", lbuf); continue; default: fprintf (stderr, "unknown flag %c word %s\n", *p, lbuf); break; } p++; if (*p != '/' && *p != NULL && *p != '\n') { fprintf (stderr, "bad format %s (%c 0%o)\n", lbuf, *p, *p); break; } if (*p) p++; } return (0); } newcount () { char buf[200]; FILE *d; int i; fprintf (stderr, "Counting words in dictionary ...\n"); if ((d = fopen (DICT, "r")) == NULL) { fprintf (stderr, "Can't open dictionary\n"); exit (1); } i = 0; while (fgets (buf, sizeof buf, d) != NULL) { i++; if (i % 1000 == 0) { printf ("%d ", i); fflush (stdout); } } fclose (d); printf ("\n%d words\n", i); if ((d = fopen (COUNT, "w")) == NULL) { fprintf (stderr, "can't create %s\n", COUNT); exit (1); } fprintf (d, "%d\n", i); fclose (d); }