/* unjumble.c - try to make words from a jumbled string of letters. Ron Charlton 9002 Balcor Circle Knoxville, TN 37923 Phone: (615)694-0800 PLINK: R*CHARLTON BINTNET: charltr@utkvx1 05-Jul-90 This program is in the public domain. It may be used for any purpose. Algorithm: Generate all of the permutations of the letters in a string and test each one for its "trigram weight". (A trigram is a group of three letters, e.g., AAA, EWE, QQQ.) Use a table of trigram weights (frequencies) derived from a 38,500 word dictionary. Calculate an n-letter string's trigram weight as the sum of the weights of its (n - 2) trigrams. A trigram's weight is the log base 2 of its frequency of occurrence in the dictionary. Keep a table of the twenty highest weights while avoiding duplicates due to multiple occurrences of a single letter. Print out only the non-zero entries in the table. This all works because some trigrams (such as "ING") are more likely than others (such as "QQQ"). The trigram data are stored on disk as a nybble_table and a bit_table for compactness, then converted to an array (nn[][][]) for execution speed. The string to be unjumbled is adjusted by making 'A' 1, 'B' 2, ... 'Z' 26 so that the characters can be used as indices into nn[][][]. This allows nn[][][] to be as small as possible (leaving '\0' available as the end-of-string indicator). history: v1.3 - don't include any 'word' with any individual trigram weight of 0 v1.4 - general cleanup (& change to strupr) v1.5 - added exit() from main() & editorial comments & misc. tweakings v1.6 - changed to structure for table of words and weights v1.7 - changed do{}while() to for() in print_table(); READ_NYBBLE(); test_bit v1.8 - TEST_BIT() as macro; removed unused variables in main() v1.9 - rearranged main(), changed function names, modified intro(), strlwr() v1.10 removed strlwr() definition v1.11 changed temp variable to static in insert() v1.12 eliminated second strcpy() in permute() (faster!!) v1.13 allow 8-letter words, add 'nothing found' v1.14 adjust MAXLEN, restored strlwr() definition, conbined printf's in intro() */ char version[] = "v1.14"; #include #include #define CSI 0x9b #define CLS 0x0c #define FALSE 0 #define TRUE (!FALSE) #define MAPVALUE ('a'-1) /* 'A' for uppercase, 'a' for lowercase */ #define MAXLEN 10 /* max. length of string to unjumble */ #define TABLESIZE 20 /* size of table of guesses */ /* read a nybble from a table of nybbles (32-bit longs in table) */ #define READ_NYBBLE( nyb_number ) \ ( (nybble_table [ nyb_number >> 3 ] >> ((nyb_number & 7L) << 2)) & 0xFL ) /* test bit n in table of 32-bit ints */ #define TEST_BIT( n ) \ ( bit_table [ n >> 5 ] & ( 1L << (n & 0x1FL) ) ? 1 : 0 ) /* the follow two lines are associated with tables.c */ extern int bit_table_size, nybble_table_size; extern unsigned long bit_table[], nybble_table[]; /* structure array to hold candidate words and their weights */ struct { char word [ MAXLEN + 1 ]; int weight; } table [ TABLESIZE ]; unsigned char nn[27][27][27]; /* array of probabilities of trigrams */ int min_weight; /* used in insert() for minimum weight */ int min_weight_ptr; /* used in insert() to point at minimum */ char *strlwr(); /*------------------------------------------------------------------------*/ main() { char buf [ 255+1 ]; int length; intro(); get_weights(); /* read weights into nn[][][] */ for (;;) { init_table(); printf ( "\tEnter 3 to 8 letters ( to quit ): "); if ( ! gets ( buf ) ) break; length = strlen ( buf ); if ( length == 0 ) break; else if ( !all_letters ( buf ) ) printf ( "\tYou may only use letters (a-z; A-Z).\n\n" ); else if ( length > 8 ) printf ( "\tToo many letters.\n\n" ); else if ( length < 3 ) printf ( "\tToo few letters.\n\n" ); else { /* do the unjumbling */ strlwr ( buf ); map ( buf ); /* convert to array indices */ permute ( buf, 0, length ); /* do all permutations */ print_table(); /* display the results */ } } exit ( 0 ); } /*------------------------------------------------------------------------*/ intro() { #ifdef MCH_AMIGA printf ( "%c", CLS ); /* clear screen */ printf ( "%c0;33;40m", CSI ); /* color 3,0 */ #endif printf ( "\n\t\t\t Unjumble %s\n\n" "\t\t\t by\n\n" "\t\t\t Ron Charlton\n" "\t\t\t 9002 Balcor Circle\n" "\t\t\t Knoxville, TN 37923\n" "\t\t\t Plink: R*CHARLTON\n" "\t\t\t BITNET: charltr@utkvx1\n" "\t\t Internet: charltr@utkvx1.utk.edu\n\n", version ); #ifdef MCH_AMIGA printf ( "%c0;31;40m", CSI ); /* color 1,0 */ #endif printf ( "\tUnjumble tries to create words out of a group of letters\n" "\tthat you enter. It will print out up to twenty guesses in\n" "\torder with each guess' weight before it. Three to 8 letters at\n" "\ta time are accepted. Eight letters requires several seconds of \n" "\tcalculation. More letters are harder to unjumble.\n\n" ); } /*------------------------------------------------------------------------*/ /* test for all letters in a string */ all_letters ( str ) char *str; { while ( isalpha ( *str++ ) ) ; return ( ! *--str ); /* pointing at '\0' if all alpha */ } /*------------------------------------------------------------------------*/ /* convert letters to array indices (A-->1, B-->2 ... Z-->26) */ map ( str ) char *str; { while ( * str ) * str++ -= MAPVALUE; } /*------------------------------------------------------------------------*/ /* find sum of weights of trigrams in a string. */ weigh ( str ) char *str; { register int weight = 0, len, pos, triweight; if ( (len = strlen ( str ) ) >= 3 ) for ( pos = 0; pos <= len - 3; pos++) { triweight = nn [ str [pos] ] [ str [pos+1] ] [ str [pos+2] ]; if ( triweight > 0 ) weight += triweight; else { /* eliminate those with any trigram of 0 weight */ weight = 0; break; } } return ( weight ); } /*------------------------------------------------------------------------*/ /* find all permutations of a string */ permute ( a, k, n ) char *a; int k, n; { int i; char b [ MAXLEN ]; static char temp; if (k == n) insert ( a ); /* insert into table if weighty */ else { strcpy(b, a); for (i = k; i < n; i++) { temp = b [ k ]; b [ k ] = b [ i ]; b [ i ] = temp; permute ( b, k+1, n ); } } } /*------------------------------------------------------------------------*/ /* convert array indices to alphabetic 1-->A, 2-->B ... 26-->Z */ unmap ( str ) char *str; { while ( * str ) *str++ += MAPVALUE; } /*------------------------------------------------------------------------*/ /* initialize the guess table to empty */ init_table() { int i; min_weight = 0; min_weight_ptr = 0; for ( i = 0; i < TABLESIZE; i++) { table[i].weight = 0; table[i].word[0] = '\0'; } } /*------------------------------------------------------------------------*/ /* if this is a weighty word, insert it in the table */ insert ( str ) char *str; { register int weight, i; weight = weigh ( str ); if ( weight > min_weight ) /* does this word weigh more than table min.? */ { for ( i = 0; i < TABLESIZE; i++ ) /* avoid duplicates (BIGgER BIgGER) */ if ( ! strcmp ( table[i].word, str ) ) return; /* it's already in the table */ /* put this one in table in place of old minimum */ table [ min_weight_ptr ].weight = weight; strcpy ( table [ min_weight_ptr ].word, str ); /* search for new minimum in the table */ min_weight_ptr = 0; min_weight = table [ 0 ].weight; for ( i = 1; i < TABLESIZE; i++ ) if ( table [ i ].weight < min_weight ) { min_weight_ptr = i; min_weight = table [ i ].weight; } } } /*------------------------------------------------------------------------*/ /* print the non-zero table entries in descending order. */ /* this is a bogus, but adequate, sort */ print_table() { int max_entry, i, first_pass = TRUE; for(;;) { max_entry = 0; /* point at maximum weight */ for ( i = 1; i < TABLESIZE; i++ ) if ( table [ i ].weight > table [ max_entry ].weight ) max_entry = i; if ( first_pass && table [ max_entry ].weight == 0 ) { printf ( "\t\tNothing found.\n" ); break; } if ( table [ max_entry ].weight ) { unmap ( table [ max_entry ].word ); printf("\t\t%5d %s\n", table [ max_entry ].weight, table [ max_entry ].word ); table [ max_entry ].weight = 0; /* 'delete' from table */ } else break; first_pass = FALSE; } } /*------------------------------------------------------------------------*/ /* get trigram weights from nybble_table and bit_table into nn[][][] */ get_weights() { register int i, j, k, m = 0, ptr = 0; for ( i = 1; i <= 26; i++ ) for ( j = 1; j <= 26; j++ ) for ( k = 1; k <= 26; k++ ) { /* beware of ++x side effects */ if ( TEST_BIT ( m ) ) { nn [ i ] [ j ] [ k ] = READ_NYBBLE ( ptr ); ++ptr; } ++m; } } /*------------------------------------------------------------------------*/ /* * Convert a string to lower case in place and return a pointer * to the resulting string. */ char * strlwr ( char *str ) { register char *s = str; while ( *s ) { if (*s >= 'A' && *s <= 'Z') *s = *s - 'A' + 'a'; ++s; } return ( str ); } /*-------------------------- END OF FILE ---------------------------------*/