/* * Bawk main program */ #define MAIN 1 #include #include "bawk.h" static char *pattern_arg = NULL; /* Command line bawk program pattern */ static int ungetc_arg = 0; static char eof_seen = 0; static int max_field_count = 0; /* * Main program */ main( argc, argv ) register int argc; register char **argv; { register char gotrules = 0, didfile = 0, getstdin = 0; register char rule_file_flag = 0; DBUG_ENTER("main"); /* * Initialize global variables: */ Beginact = 0; Endact = 0; Rules = 0; Rulep = 0; Filename = 0; Linecount = 0; Saw_break = 0; Stackptr = Stackbtm - 1; Stacktop = Stackbtm + MAXSTACKSZ; Nextvar = Vartab; init_pop_array(); strcpy( Fieldsep, " \t" ); strcpy( Recordsep, "\n" ); /* * Parse command line */ while ( --argc ) { if ( **(++argv) == '-' ) { /* * Process dash options. */ switch ( tolower( argv[0][1] ) ) { case '#': DBUG_PUSH(&argv[0][2]); continue; case 'f': if(!gotrules) { rule_file_flag++; argv++; argc--; } else usage(); break; case 0: if(!gotrules) rule_file_flag++; getstdin++; break; default: usage(); } } if ( gotrules ) { /* * Already read rules file - assume this is * is a text file for processing. */ if ( ++didfile == 1 && Beginact ) doaction( Beginact ); if ( getstdin ) { getstdin--; newfile( 0 ); } else newfile( *argv ); process(); } else { if(rule_file_flag) { if ( getstdin ) { getstdin--; newfile( 0 ); } else newfile( *argv ); } else pattern_arg = *argv; compile(); pattern_arg = NULL; gotrules = 1; } } if ( !gotrules ) usage(); if ( ! didfile ) { /* * Didn't process any files yet - process stdin. */ newfile( 0 ); if ( Beginact ) doaction( Beginact ); process(); } if ( Endact ) doaction( Endact ); DBUG_RETURN(0); } /* * Regular expression/action file compilation routines. */ void compile() { /* * Compile regular expressions and C actions into Rules struct, * reading from current input file "Fileptr". */ register int c; register EXPR_NODE *root; DBUG_ENTER("compile"); while ( (c = getcharacter()) != -1 ) { if ( c==' ' || c=='\t' || c=='\n' ) /* swallow whitespace */ ; else if ( c=='#' ) { /* * Swallow comments */ while ( (c=getcharacter()) != -1 && c!='\n' ) ; } else if ( c=='{' ) { DBUG_PRINT("compile",("action")); /* * Compile the action string into a parse tree */ ungetcharacter( (char) '{' ); if ( Rulep && Rulep->action ) { Rulep->nextrule = (RULE *) get_clear_memory( sizeof( *Rulep ) ); Rulep = Rulep->nextrule; } if ( !Rulep ) { /* * This is the first action encountered. * Allocate the first Rules structure and * initialize it */ Rules = Rulep = (RULE *) get_clear_memory( sizeof( *Rulep ) ); } Rulep->action = act_compile( Workbuf ); } else if ( c==',' ) { DBUG_PRINT("compile",("stop pattern")); /* * It's (hopefully) the second part of a two-part * pattern string. Swallow the comma and start * compiling an action string. */ if ( !Rulep || !Rulep->pattern.start ) error( "stop pattern without a start", RE_ERROR ); if ( Rulep->pattern.stop ) error( "already have a stop pattern", RE_ERROR ); Rulep->pattern.stop = pat_compile( Workbuf ); } else { /* * Assume it's a regular expression pattern */ DBUG_PRINT("compile",("start pattern")); ungetcharacter( (char) c ); root = pat_compile( Workbuf ); if ( *Workbuf == T_BEGIN ) { /* * Saw a "BEGIN" keyword - compile following * action into special "Beginact" parse tree. */ Beginact = act_compile( Workbuf ); continue; } if ( *Workbuf == T_END ) { /* * Saw an "END" keyword - compile following * action into special "Endact" parse tree. */ Endact = act_compile( Workbuf ); continue; } if ( Rulep ) { /* * Already saw a pattern/action - link in * another Rules structure. */ Rulep->nextrule = (RULE *) get_clear_memory( sizeof( *Rulep ) ); Rulep = Rulep->nextrule; } if ( !Rulep ) { /* * This is the first pattern encountered. * Allocate the first Rules structure and * initialize it */ Rules = Rulep = (RULE *) get_clear_memory( sizeof( *Rulep ) ); } if ( Rulep->pattern.start ) error( "already have a start pattern", RE_ERROR ); Rulep->pattern.start = root; } } for(Rulep = Rules; Rulep; Rulep = Rulep->nextrule) { if(!Rulep->action) { pattern_arg = "{printf \"%s\n\", $0}"; Rulep->action = act_compile( Workbuf ); pattern_arg = NULL; } } endfile(); DBUG_VOID_RETURN; } /* * Text file main processing loop. */ void process() { /* * Read a line at a time from current input file at "Fileptr", * then apply each rule in the Rules chain to the input line. */ register int i; DBUG_ENTER("process"); Recordcount = 0; while ( getline() ) { /* * Parse the input line. */ if(! *Recordsep ) strcpy(Fieldsep," \t\n"); Fieldcount = parse( Linebuf, Fields, Fieldsep ); DBUG_PRINT("process",( "parsed %d words:", Fieldcount )); DBUG_EXECUTE("process",for(i=0; i",Fields[i]));); Rulep = Rules; while(Rulep) { if ( ! Rulep->pattern.start ) { /* * No pattern given - perform action on * every input line. */ doaction( Rulep->action ); } else if ( Rulep->pattern.startseen ) { /* * Start pattern already found - perform * action then check if line matches * stop pattern. */ doaction( Rulep->action ); if ( dopattern( Rulep->pattern.stop ) ) Rulep->pattern.startseen = 0; } else if ( dopattern( Rulep->pattern.start ) ) { /* * Matched start pattern - perform action. * If a stop pattern was given, set "start * pattern seen" flag and process every input * line until stop pattern found. */ doaction( Rulep->action ); if ( Rulep->pattern.stop ) Rulep->pattern.startseen = 1; } Rulep = Rulep->nextrule; } } DBUG_VOID_RETURN; } /* * Miscellaneous functions */ parse( str, wrdlst, delim ) register char *str; char *wrdlst[]; char *delim; { /* * Parse the string of words in "str" into the word list at "wrdlst". * A "word" is a sequence of characters delimited by one or more * of the characters found in the string "delim". * Returns the number of words parsed. */ register int wrdcnt; register char *cp, *wrdcp, c; char wrdbuf[ MAXLINELEN+1 ]; DBUG_ENTER("parse"); wrdcnt = 0; while ( *str ) { while(c = *str++) { cp = delim; while(*cp && c != *cp) cp++; if(! *cp) break; } str--; if ( !*str ) break; wrdcp = wrdbuf; while(c = *str++) { cp = delim; while(*cp && c != *cp) cp++; if(*cp) break; *wrdcp++ = c; } str--; *wrdcp = 0; /* * NOTE: allocate a MAXLINELEN sized buffer for every * word, just in case user wants to copy a larger string * into a field. */ if(wrdcnt == max_field_count) { wrdlst[ wrdcnt ] = getmemory( MAXLINELEN+1 ); max_field_count++; } strcpy( wrdlst[ wrdcnt++ ], wrdbuf ); } DBUG_RETURN(wrdcnt); } void unparse( wrdlst, wrdcnt, str, delim ) char *wrdlst[]; register int wrdcnt; register char *str; char *delim; { /* * Replace all the words in "str" with the words in "wrdlst", * maintaining the same word seperation distance as found in * the string. * A "word" is a sequence of characters delimited by one or more * of the characters found in the string "delim". */ register int wc; register char *sp, *cp, c; char strbuf[ MAXLINELEN+1 ], *start; DBUG_ENTER("unparse"); wc = 0; /* next word in "wrdlst" */ sp = strbuf; /* points to our local string */ start = str; /* save start address of "str" for later... */ while ( *str ) { /* * Copy the field delimiters from the original string to * our local version. */ while(c = *str++) { cp = delim; while(*cp && c != *cp) cp++; if(!*cp) break; *sp++ = c; } str--; if ( !*str ) break; /* * Skip over the field in the original string and... */ while(c = *str++) { cp = delim; while(*cp && c != *cp) cp++; if(*cp) break; } str--; if ( wc < wrdcnt ) { /* * ...copy in the field in the wordlist instead. */ cp = wrdlst[ wc++ ]; while(*sp++ = *cp++); sp--; } } /* * Tie off the local string, then copy it back to caller's string. */ *sp = 0; strcpy( start, strbuf ); DBUG_VOID_RETURN; } char * getmemory( len ) register unsigned len; { register char *cp; DBUG_ENTER("getmemory"); if ( cp=malloc( len ) ) DBUG_RETURN(cp); error( "out of memory", MEM_ERROR ); DBUG_RETURN(NULL); } char * get_clear_memory( len ) register unsigned len; { register char *cp; DBUG_ENTER("getmemory"); if ( cp=calloc( 1, len ) ) DBUG_RETURN(cp); error( "out of memory", MEM_ERROR ); DBUG_RETURN(NULL); } EXPR_NODE *get_expr_node(operator) char operator; { register EXPR_NODE *node; DBUG_ENTER("get_expr_node"); node = (EXPR_NODE *) getmemory(sizeof(EXPR_NODE)); node->left = node->right = NULL; node->operator = operator; DBUG_PRINT("get_expr_node",("operator = '%s'",token_name[operator])); DBUG_RETURN(node); } void newfile( s ) register char *s; { DBUG_ENTER("newfile"); Linecount = 0; if ( Filename = s ) { #ifdef BDS_C if ( fopen( s, Fileptr = Curfbuf ) == -1 ) #else if ( !(Fileptr = fopen( s, "r" )) ) #endif error( "file not found", FILE_ERROR ); } else { /* * No file name given - process standard input. */ Fileptr = stdin; Filename = "standard input"; } DBUG_VOID_RETURN; } getline() { /* * Read a record from current input file. */ register int rtn, len = 0; register char *cp = Linebuf, *last_nl, *sep = Recordsep; DBUG_ENTER("getline"); if(eof_seen) { endfile(); DBUG_RETURN(0); } if(*sep) { while((*cp++ = rtn = getcharacter()) != *sep++ && rtn != -1) { while(*sep) { if(rtn == *sep++) break; } if( ++len == MAXLINELEN ) error("Input record too long", RECORD_ERROR); sep = Recordsep; } } else /* Treat an empty line as record separator. */ { while(1) { last_nl = cp; while((*cp++ = rtn = getcharacter()) != '\n' && rtn != -1) { if( ++len == MAXLINELEN ) error("Input record too long", RECORD_ERROR); } if(((cp - last_nl) == 1) || (rtn == -1)) break; } } *(--cp) = 0; if ( rtn == -1 ) { if(len) eof_seen = 1; else { endfile(); DBUG_RETURN(0); } } ++Recordcount; DBUG_RETURN(1); } int getcharacter() { /* * Read a character from curren input file. * WARNING: your getc() must convert lines that end with CR+LF * to LF and CP/M's EOF character (^Z) to a -1. * Also, getc() must return a -1 when attempting to read from * an unopened file. */ register int c; DBUG_ENTER("getcharacter"); if(pattern_arg) { if(ungetc_arg) { c = ungetc_arg; ungetc_arg = 0; } else if(*pattern_arg) c = *pattern_arg++; else c = EOF; } else { #ifdef BDS_C /* * BDS C doesn't do CR+LF to LF and ^Z to -1 conversions * */ if ( (c = getc( Fileptr )) == '\r' ) { if ( (c = getc( Fileptr )) != '\n' ) { ungetc( c ); c = '\r'; } } else if ( c == 26 ) /* ^Z */ c = -1; #else c = getc( Fileptr ); #endif if ( c=='\n' ) ++Linecount; } DBUG_PRINT("getcharacter",("'%c'", c)); DBUG_RETURN(c); } ungetcharacter( c ) register char c; { /* * Push a character back into the input stream. * If the character is a record seperator, or a newline character, * the record and line counters are adjusted appropriately. */ DBUG_ENTER("ungetcharacter"); if ( c == *Recordsep ) --Recordcount; if ( c=='\n' ) --Linecount; DBUG_PRINT("ungetcharacter",("'%c'", c)); if(pattern_arg) DBUG_RETURN(ungetc_arg = c); DBUG_RETURN(ungetc( c, Fileptr )); } void endfile() { DBUG_ENTER("endfile"); fclose( Fileptr ); eof_seen = 0; Filename = NULL; Linecount = 0; DBUG_VOID_RETURN; } void error( s, severe ) register char *s; register int severe; { DBUG_ENTER("error"); if ( Filename ) fprintf( stderr, "%s:", Filename ); if ( Linecount ) fprintf( stderr, " line %d:", Linecount ); fprintf( stderr, " %s\n", s ); if ( severe ) exit( severe ); DBUG_VOID_RETURN; } void usage() { DBUG_ENTER("usage"); error( "Usage: bawk { action | - | -f } ...", USAGE_ERROR ); DBUG_VOID_RETURN; }