/* * Bawk main program */ #define MAIN 1 #include #include "bawk.h" /* * Main program */ main( argc, argv ) int argc; char **argv; { char gotrules, didfile, getstdin; getstdin = didfile = gotrules = 0; /* * Initialize global variables: */ Beginact = 0; Endact = 0; Rules = 0; Rulep = 0; #ifdef DEBUG Debug = 0; #endif Filename = 0; Linecount = 0; Saw_break = 0; Stackptr = Stackbtm - 1; Stacktop = Stackbtm + MAXSTACKSZ; Nextvar = Vartab; strcpy( Fieldsep, " \t" ); strcpy( Recordsep, "\n" ); /* * Parse command line */ while ( --argc ) { if ( **(++argv) == '-' ) { /* * Process dash options. */ switch ( tolower( *(++(*argv)) ) ) { #ifdef DEBUG case 'd': ++Debug; break; #endif case 0: ++getstdin; --argv; goto dosomething; break; default: usage(); } } else { dosomething: if ( gotrules ) { /* * Already read rules file - assume this is * is a text file for processing. */ if ( ++didfile == 1 && Beginact ) doaction( Beginact ); if ( getstdin ) { --getstdin; newfile( 0 ); } else newfile( *argv ); process(); } else { /* * First file name argument on command line * is assumed to be a rules file - attempt to * compile it. */ if ( getstdin ) { --getstdin; newfile( 0 ); } else newfile( *argv ); compile(); gotrules = 1; } } } if ( !gotrules ) usage(); if ( ! didfile ) { /* * Didn't process any files yet - process stdin. */ newfile( 0 ); if ( Beginact ) doaction( Beginact ); process(); } if ( Endact ) doaction( Endact ); } /* * Regular expression/action file compilation routines. */ compile() { /* * Compile regular expressions and C actions into Rules struct, * reading from current input file "Fileptr". */ int c, len; #ifdef DEBUG if ( Debug ) error( "compiling...", 0 ); #endif while ( (c = getcharacter()) != -1 ) { if ( c==' ' || c=='\t' || c=='\n' ) /* swallow whitespace */ ; else if ( c=='#' ) { /* * Swallow comments */ while ( (c=getcharacter()) != -1 && c!='\n' ) ; } else if ( c=='{' ) { #ifdef DEBUG if ( Debug ) error( "action", 0 ); #endif /* * Compile (tokenize) the action string into our * global work buffer, then allocate some memory * for it and copy it over. */ ungetcharacter( '{' ); len = act_compile( Workbuf ); if ( Rulep && Rulep->action ) { Rulep->nextrule = getmem( sizeof( *Rulep ) ); Rulep = Rulep->nextrule; fillmem( Rulep, sizeof( *Rulep ), 0 ); } if ( !Rulep ) { /* * This is the first action encountered. * Allocate the first Rules structure and * initialize it */ Rules = Rulep = getmem( sizeof( *Rulep ) ); fillmem( Rulep, sizeof( *Rulep ), 0 ); } Rulep->action = getmem( len ); movemem( Workbuf, Rulep->action, len ); } else if ( c==',' ) { #ifdef DEBUG if ( Debug ) error( "stop pattern", 0 ); #endif /* * It's (hopefully) the second part of a two-part * pattern string. Swallow the comma and start * compiling an action string. */ if ( !Rulep || !Rulep->pattern.start ) error( "stop pattern without a start", RE_ERROR ); if ( Rulep->pattern.stop ) error( "already have a stop pattern", RE_ERROR ); len = pat_compile( Workbuf ); Rulep->pattern.stop = getmem( len ); movemem( Workbuf, Rulep->pattern.stop, len ); } else { /* * Assume it's a regular expression pattern */ #ifdef DEBUG if ( Debug ) error( "start pattern", 0 ); #endif ungetcharacter( c ); len = pat_compile( Workbuf ); if ( *Workbuf == T_BEGIN ) { /* * Saw a "BEGIN" keyword - compile following * action into special "Beginact" buffer. */ len = act_compile( Workbuf ); Beginact = getmem( len ); movemem( Workbuf, Beginact, len ); continue; } if ( *Workbuf == T_END ) { /* * Saw an "END" keyword - compile following * action into special "Endact" buffer. */ len = act_compile( Workbuf ); Endact = getmem( len ); movemem( Workbuf, Endact, len ); continue; } if ( Rulep ) { /* * Already saw a pattern/action - link in * another Rules structure. */ Rulep->nextrule = getmem( sizeof( *Rulep ) ); Rulep = Rulep->nextrule; fillmem( Rulep, sizeof( *Rulep ), 0 ); } if ( !Rulep ) { /* * This is the first pattern encountered. * Allocate the first Rules structure and * initialize it */ Rules = Rulep = getmem( sizeof( *Rulep ) ); fillmem( Rulep, sizeof( *Rulep ), 0 ); } if ( Rulep->pattern.start ) error( "already have a start pattern", RE_ERROR ); Rulep->pattern.start = getmem( len ); movemem( Workbuf, Rulep->pattern.start, len ); } } endfile(); } /* * Text file main processing loop. */ process() { /* * Read a line at a time from current input file at "Fileptr", * then apply each rule in the Rules chain to the input line. */ int i; #ifdef DEBUG if ( Debug ) error( "processing...", 0 ); #endif Recordcount = 0; while ( getline() ) { /* * Parse the input line. */ Fieldcount = parse( Linebuf, Fields, Fieldsep ); #ifdef DEBUG if ( Debug>1 ) { printf( "parsed %d words:\n", Fieldcount ); for(i=0; i\n", Fields[i] ); } #endif Rulep = Rules; do { if ( ! Rulep->pattern.start ) { /* * No pattern given - perform action on * every input line. */ doaction( Rulep->action ); } else if ( Rulep->pattern.startseen ) { /* * Start pattern already found - perform * action then check if line matches * stop pattern. */ doaction( Rulep->action ); if ( dopattern( Rulep->pattern.stop ) ) Rulep->pattern.startseen = 0; } else if ( dopattern( Rulep->pattern.start ) ) { /* * Matched start pattern - perform action. * If a stop pattern was given, set "start * pattern seen" flag and process every input * line until stop pattern found. */ doaction( Rulep->action ); if ( Rulep->pattern.stop ) Rulep->pattern.startseen = 1; } } while ( Rulep = Rulep->nextrule ); /* * Release memory allocated by parse(). */ while ( Fieldcount ) free( Fields[ --Fieldcount ] ); } } /* * Miscellaneous functions */ parse( str, wrdlst, delim ) char *str; char *wrdlst[]; char *delim; { /* * Parse the string of words in "str" into the word list at "wrdlst". * A "word" is a sequence of characters delimited by one or more * of the characters found in the string "delim". * Returns the number of words parsed. * CAUTION: the memory for the words in "wrdlst" is allocated * by malloc() and should eventually be returned by free()... */ int wrdcnt, wrdlen; char wrdbuf[ MAXLINELEN ], c; wrdcnt = 0; while ( *str ) { while ( instr( *str, delim ) ) ++str; if ( !*str ) break; wrdlen = 0; while ( (c = *str) && !instr( c, delim ) ) { wrdbuf[ wrdlen++ ] = c; ++str; } wrdbuf[ wrdlen++ ] = 0; /* * NOTE: allocate a MAXLINELEN sized buffer for every * word, just in case user wants to copy a larger string * into a field. */ wrdlst[ wrdcnt ] = getmem( MAXLINELEN ); strcpy( wrdlst[ wrdcnt++ ], wrdbuf ); } return wrdcnt; } unparse( wrdlst, wrdcnt, str, delim ) char *wrdlst[]; int wrdcnt; char *str; char *delim; { /* * Replace all the words in "str" with the words in "wrdlst", * maintaining the same word seperation distance as found in * the string. * A "word" is a sequence of characters delimited by one or more * of the characters found in the string "delim". */ int wc; char strbuf[ MAXLINELEN ], *sp, *wp, *start; wc = 0; /* next word in "wrdlst" */ sp = strbuf; /* points to our local string */ start = str; /* save start address of "str" for later... */ while ( *str ) { /* * Copy the field delimiters from the original string to * our local version. */ while ( instr( *str, delim ) ) *sp++ = *str++; if ( !*str ) break; /* * Skip over the field in the original string and... */ while ( *str && !instr( *str, delim ) ) ++str; if ( wc < wrdcnt ) { /* * ...copy in the field in the wordlist instead. */ wp = wrdlst[ wc++ ]; while ( *wp ) *sp++ = *wp++; } } /* * Tie off the local string, then copy it back to caller's string. */ *sp = 0; strcpy( start, strbuf ); } instr( c, s ) char c, *s; { while ( *s ) if ( c==*s++ ) return 1; return 0; } char * getmem( len ) unsigned len; { char *cp; if ( cp=malloc( len ) ) return cp; error( "out of memory", MEM_ERROR ); } char *newfile( s ) char *s; { Linecount = 0; if ( Filename = s ) { #ifdef BDS_C if ( fopen( s, Fileptr = Curfbuf ) == -1 ) #else if ( !(Fileptr = fopen( s, "r" )) ) #endif error( "file not found", FILE_ERROR ); } else { /* * No file name given - process standard input. */ Fileptr = stdin; Filename = "standard input"; } } getline() { /* * Read a line of text from current input file. Strip off * trailing record seperator (newline). */ int rtn, len; for ( len=0; len */ if ( (c = getc( Fileptr )) == '\r' ) { if ( (c = getc( Fileptr )) != '\n' ) { ungetc( c ); c = '\r'; } } else if ( c == 26 ) /* ^Z */ c = -1; #else c = getc( Fileptr ); #endif if ( c == *Recordsep ) ++Recordcount; if ( c=='\n' ) ++Linecount; return c; } ungetcharacter( c ) { /* * Push a character back into the input stream. * If the character is a record seperator, or a newline character, * the record and line counters are adjusted appropriately. */ if ( c == *Recordsep ) --Recordcount; if ( c=='\n' ) --Linecount; return ungetc( c, Fileptr ); } endfile() { fclose( Fileptr ); Filename = Linecount = 0; } error( s, severe ) char *s; int severe; { char *cp, *errat; if ( Filename ) fprintf( stderr, "%s:", Filename ); if ( Linecount ) fprintf( stderr, " line %d:", Linecount ); fprintf( stderr, " %s\n", s ); if ( severe ) exit( severe ); } usage() { error( "Usage: bawk [ ...]\n", USAGE_ERROR ); } movemem( from, to, count ) char *from, *to; int count; { while ( count-- > 0 ) *to++ = *from++; } fillmem( array, count, value ) char *array, value; int count; { while ( count-- > 0 ) *array++ = value; } strncmp( s, t, n ) char *s, *t; int n; { while ( --n>0 && *s && *t && *s==*t ) { ++s; ++t; } if ( *s || *t ) return *s - *t; return 0; } num( c ) char c; { return '0'<=c && c<='9'; } alpha( c ) char c; { return ('a'<=c && c<='z') || ('A'<=c && c<='Z') || c=='_'; } alphanum( c ) char c; { return alpha( c ) || num( c ); }