/************************************************************************ * * * Copyright (c) 1982, Fred Fish * * All Rights Reserved * * * * This software and/or documentation is released for public * * distribution for personal, non-commercial use only. * * Limited rights to use, modify, and redistribute are hereby * * granted for non-commercial purposes, provided that all * * copyright notices remain intact and all changes are clearly * * documented. The author makes no warranty of any kind with * * respect to this product and explicitly disclaims any implied * * warranties of merchantability or fitness for any particular * * purpose. * * * ************************************************************************ */ /* * FILE * * hashtbl.c generic hash table utility routines * * KEYWORDS * * hash table * hash function * * DESCRIPTION * * In many programming applications, a need exists for * a relatively simple set of utility routines for * managing a lookup table. Typically, information * is accessed by a single "key", such as a person's * name, a programming variable character string, etc. * * These utility routines provide useful table manipulation * routines for small to medium sized tables. The method * used is a hash table, with all entries hashing to a * particular value stored in a linked list. * * Each table entry points to an arbitrary structure (tbl_data) * defined externally, which contains the actual information * associated with that entry (including the name of the entry). * To keep these routines generic, only pointers to the * structure are manipulated. Specifically note that the * actual storage associated with the information in each * entry must be allocated external to these routines. * * FUNCTIONS * * tbl_find find a table entry and return pointer * tbl_add add an entry to the table * dump_tbl dump the hash table for debugging * * BUGS * * Current hash function is very primitive and may give * a very unbalanced table for some classes of strings. * * AUTHOR * * Fred Fish * */ #include #include "hashtbl.h" struct tbl_entry { /* Structure for each table entry */ char *name; /* Table entry name */ struct tbl_data *tdp; /* Structure defined in "hashtbl.h" */ struct tbl_entry *next; /* Next entry with same hash value */ }; #define SCR_WIDTH 80 /* Screen width for table dump */ #define SMATCH 0 /* Value when strings are same */ extern int debug; /* Debug flag */ extern int trace; /* Trace flag */ static struct tbl_entry *hash_table[HASH_SIZE]; /* * FUNCTION * * tbl_find find a table entry and return struct pointer * * KEY WORDS * * table lookup * hash table * * SYNOPSIS * * struct tbl_data *tbl_find(name) * char *name; * * DESCRIPTION * * Given the name of an entry in the table, this routine * attempts to look it up and return a pointer to it's * associated structure. If no entry of the specified * name is found then a NULL is returned. * * The table is structured as a number of linked lists * with the pointer to the first link stored in a hash table. * Each entry in a specific linked list hashes to the same * value. For example: * * HASH TABLE LINKED LIST * ---------- ----------------------- * * . * . * NULL * "12" => name1 => name2 => name3 * NULL * "31" => name4 * "32" => name5 => name6 * . * . * * RETURNS * * Returns pointer to structure if the entry is found. Returns * NULL if no entry has specified name. * */ /* * PSEUDO CODE * * Begin tbl_find. * If pointer to name is not NULL then * Get pointer to first entry for current hash value. * While there is a table entry to test for a name match * If the names match then * Return pointer to the structure for the entry. * Else * Lookup the next table entry for the hash value. * End if * End while * End if * Return NULL for no entry found. * End tbl_find. * */ struct tbl_data *tbl_find(name) char *name; { struct tbl_entry *lookup; if(trace) {printf("beginning table_find\n");} if (name != NULL) { lookup = hash_table[hash(name)]; while (lookup != NULL) { if (strcmp(lookup->name,name) == SMATCH) { return (lookup->tdp); } else { lookup = lookup->next; } } } return ((struct tbl_data *)NULL); } /* * FUNCTION * * tbl_add add an entry to the table * * KEY WORDS * * hash table * table insertions * * SYNOPSIS * * struct tbl_data *tbl_add(new_data) * struct tbl_data *new_data; * * DESCRIPTION * * This routine is responsible for adding entries to the table. * Note that to avoid any possibility of duplicate entries, * tbl_add first calls tbl_find to see if an entry exists. * This usually results in duplication of effort since most * calls to tbl_add are already preceded by a call to * tbl_find. However duplicate entries are a serious matter * and every effort should be made to avoid them. * * RETURNS * * Returns pointer to the table data structure if add is * successful, NULL otherwise (such as duplicate entry). * */ /* * PSEUDO CODE * * Begin tbl_add. * If the pointer to the entry name is not valid then * Return a NULL. * Else if the entry already exists then * Return a NULL. * Else * Allocate new memory for the new table entry. * If allocation fails then * Return a NULL. * Else * Save pointer to the entry name in the table entry. * Save the structure pointer in the table entry. * Initialize the pointer to the next table entry to NULL. * Compute the hash value for the entrie's name. * If the hash table currently contains no entry for value * Start a new table entry chain with hash value. * Else * While there is another link in the current chain * Skip over current and go to the next link. * End while * Add the new link at the end of the chain. * End if * Return pointer to struct (for success). * End if * End if * End tbl_add * */ struct tbl_data *tbl_add(new_data) struct tbl_data *new_data; { struct tbl_entry *new, *scan; int hash_value; char *get_memory(); if(trace) {printf("beginning tbl_add\n");} if (new_data->name == NULL) { return((struct tbl_data *)NULL); } else if (tbl_find(new_data->name) != NULL) { return((struct tbl_data *)NULL); } else { new = (struct tbl_entry *) get_memory(sizeof(struct tbl_entry)); if (new == NULL) { return((struct tbl_data *)NULL); } else { new->name = new_data->name; new->tdp = new_data; new->next = NULL; hash_value = hash(new->name); if ((scan = hash_table[hash_value]) == NULL) { hash_table[hash_value] = new; } else { while (scan->next != NULL) { scan = scan->next; } scan->next = new; } return(new_data); } } } /* * FUNCTION * * hash compute hash value for a string * * KEY WORDS * * hash table * hash function * * SYNOPSIS * * int hash(string) * char *string; * * DESCRIPTION * * Takes an input string and returns a hash value * for that string. Note that the algorithm used has the merit * of extreme simplicity but is by no means the best. * * RETURNS * * Hash value of string. * */ /* * PSEUDO CODE * * Begin hash. * Initialize hash value to be zero. * While there is a character left in input string. * Sum the character's value into the hash value. * End while * Return the value (modulo the hash table size). * End hash. * */ static int hash(string) char *string; { register int hash_value; if(trace) {printf("beginning hash\n");} hash_value = 0; while (*string != NULL) { hash_value += *string++; } hash_value %= HASH_SIZE; return (hash_value); } /* * FUNCTION * * dump_tbl dump hash table for debugging purposes * * KEY WORDS * * hash table * * SYNOPSIS * * dump_tbl() * * DESCRIPTION * * Prints the table contents on the standard output * in the format: * * nnn: name : name : name : name ... * * Where "nnn" is the hash value (index into the table) and * "name" is the name of an entry. The names are printed * in the same order they are encountered in the linked list, and * all have the same hash value. * * Note that the distribution of names can give some indication of * the suitability of the particular hash function used. * */ /* * PSEUDO CODE * * Begin dump_tbl * For each hash table entry in the table * If the hash table entry points to a chain then * Initialize output buffer with hash value output. * For each entry in the linked list chain * If adding name would overflow buffer then * Print the buffer. * Reinitialize buffer for additional line. * End if * Add entry name to buffer * End for * Print the buffer. * End if * End for * End dump_tbl * */ dump_tbl() { char buffer[SCR_WIDTH]; int hash_index; struct tbl_entry *tp; for (hash_index=0; hash_indexnext) { if ((strlen(tp->name) + strlen(buffer) + 4) > SCR_WIDTH) { printf("%s\n",buffer); sprintf(buffer,"\t"); } strcat(buffer," : "); strcat(buffer,tp->name); } printf("%s\n",buffer); } } } /* * FUNCTION * * get_memory allocate more main memory and zero it * * KEY WORDS * * memory allocator * * SYNOPSIS * * char *get_memory(how_much) * unsigned int how_much; * * DESCRIPTION * * This routine is responsible for all memory allocation * requests. If the amount of memory requested cannot be * allocated for any reason, an error message is printed * and NULL is returned. * * To avoid any question about the contents of the newly * allocated memory (some systems zero it, others don't), * we explicitly zero the memory. * * RETURNS * * Pointer to allocated memory or NULL if memory allocation * fails. * */ /* * PSEUDO CODE * * Begin get_memory * Attempt to allocate the requested number of bytes. * If allocation attempt failed then * Print memory allocation failure message. * Return NULL. * Else * For each byte in new memory. * Zero the byte. * End for * End if * Return pointer to the memory which was allocated. * End get_memory * */ char *get_memory(how_much) unsigned int how_much; { char *memory, *mem; char *malloc(); int count; if(trace) {printf("beginning get_memory\n");} memory = malloc(how_much); if (memory == NULL) { fprintf(stderr,"?hashtbl: unable to allocate more memory!\n"); return(NULL); } else { mem = memory; for (count=0; count