/* * Bawk C actions compiler */ #include #include "bawk.h" act_compile( actbuf ) char *actbuf; /* where tokenized actions are compiled into */ { Where = ACTION; return stmt_compile( actbuf ); } pat_compile( actbuf ) char *actbuf; /* where tokenized actions are compiled into */ { Where = PATTERN; return stmt_compile( actbuf ); } stmt_compile( actbuf ) char *actbuf; /* where tokenized actions are compiled into */ { /* * Read and tokenize C actions from current input file into the * action buffer. Strip out comments and whitespace in the * process. */ char *actptr, /* actbuf pointer */ *cp, /* work pointer */ buf[MAXLINELEN];/* string buffer */ int braces, /* counts '{}' pairs - return when 0 */ parens, /* counts '()' pairs */ i, /* temp */ c; /* current input character */ braces = parens = 0; actptr = actbuf; while ( (c = getcharacter()) != -1 ) { /* * Skip over spaces, tabs and newlines */ if ( c==' ' || c=='\t' || c=='\n' ) continue; if ( c=='#' ) { /* * Skip comments. Comments start with a '#' and * end at the next newline. */ while ( (c = getcharacter()) != -1 && c!='\n' ) ; continue; } if ( c=='{' ) { if ( Where==PATTERN ) { /* * We're compiling a pattern. The '{' marks * the beginning of an action statement. * Push the character back and return. */ ungetcharacter( '{' ); break; } else { /* * We must be compiling an action statement. * '{'s mark beginning of action or compound * statements. */ ++braces; *actptr++ = T_LBRACE; } } else if ( c=='}' ) { *actptr++ = T_RBRACE; if ( ! --braces ) /* * Found the end of the action string */ break; } else if ( c=='(' ) { ++parens; *actptr++ = T_LPAREN; } else if ( c==')' ) { if ( --parens < 0 ) error( "mismatched '()'", ACT_ERROR ); *actptr++ = T_RPAREN; } else if ( c==',' && !braces && !parens && Where==PATTERN ) { /* * found a comma outside of any braces or parens- * this must be a regular expression seperator. */ ungetcharacter( ',' ); break; } /* * Check if it's a regular expression: */ else if ( c=='/' ) { /* * A '/' inside a pattern string starts a regular * expression. Inside action strings, a '/' is * the division operator. */ if ( Where == PATTERN ) goto dopattern; else *actptr++ = T_DIV; } else if ( c=='@' ) { dopattern: /* * Within action strings, only the '@' may be used to * delimit regular expressions */ *actptr++ = T_REGEXP; ungetcharacter( c ); actptr += re_compile( actptr ); } /* * symbol, string or constant: */ else if ( alpha( c ) ) { /* * It's a symbol reference. Copy the symbol into * string buffer. */ cp = buf; do *cp++ = c; while ( (c=getcharacter()) != -1 && alphanum( c ) ); ungetcharacter( c ); *cp = 0; /* * Check if a keyword, builtin function or variable. */ if ( c = iskeyword( buf ) ) *actptr++ = c; else if ( i = isfunction( buf ) ) { *actptr++ = T_FUNCTION; storeint( actptr, i ); actptr += sizeof( i ); } else { /* * It's a symbol name. */ *actptr++ = T_VARIABLE; if ( !(cp = findvar( buf )) ) cp = addvar( buf ); storeptr( actptr, cp ); actptr += sizeof( cp ); } } else if ( c == '"' ) { /* * It's a string constant */ *actptr++ = T_STRING; actptr = str_compile( actptr, '"' ); } else if ( c == '\'' ) { /* * It's a character constant */ *actptr++ = T_CONSTANT; str_compile( buf, '\'' ); storeint( actptr, *buf ); actptr += sizeof( i ); } else if ( num( c ) ) { /* * It's a numeric constant */ *actptr++ = T_CONSTANT; cp = buf; do *cp++ = c; while ( (c=getcharacter()) != -1 && num(c) ); ungetcharacter( c ); *cp = 0; storeint( actptr, atoi( buf ) ); actptr += sizeof( i ); } /* * unary operator: */ else if ( c == '$' ) *actptr++ = T_DOLLAR; /* * or binary operator: */ else if ( c == '=' ) { if ( (c=getcharacter()) == '=' ) *actptr++ = T_EQ; else { ungetcharacter( c ); *actptr++ = T_ASSIGN; } } else if ( c == '!' ) { if ( (c=getcharacter()) == '=' ) *actptr++ = T_NE; else { ungetcharacter( c ); *actptr++ = T_LNOT; } } else if ( c == '<' ) { if ( (c=getcharacter()) == '<' ) *actptr++ = T_SHL; else if ( c == '=' ) *actptr++ = T_LE; else { ungetcharacter( c ); *actptr++ = T_LT; } } else if ( c == '>' ) { if ( (c=getcharacter()) == '>' ) *actptr++ = T_SHR; else if ( c == '=' ) *actptr++ = T_GE; else { ungetcharacter( c ); *actptr++ = T_GT; } } else if ( c == '&' ) { if ( (c=getcharacter()) == '&' ) *actptr++ = T_LAND; else { ungetcharacter( c ); *actptr++ = T_AND; } } else if ( c == '|' ) { if ( (c=getcharacter()) == '|' ) *actptr++ = T_LIOR; else { ungetcharacter( c ); *actptr++ = T_IOR; } } else if ( c == '+' ) { if ( (c=getcharacter()) == '+' ) *actptr++ = T_INCR; else { ungetcharacter( c ); *actptr++ = T_ADD; } } else if ( c == '-' ) { if ( (c=getcharacter()) == '-' ) *actptr++ = T_DECR; else { ungetcharacter( c ); *actptr++ = T_SUB; } } /* * punctuation */ else if ( instr( c, "[](),;*/%+-^~" ) ) *actptr++ = c; else { /* * Bad character in input line */ error( "lexical error", ACT_ERROR ); } if ( actptr >= Workbuf + MAXWORKBUFLEN ) error( "action too long", MEM_ERROR ); } if ( braces || parens ) error( "mismatched '{}' or '()'", ACT_ERROR ); *actptr++ = T_EOF; return actptr - actbuf; } char * str_compile( str, delim ) char *str, delim; { /* * Compile a string from current input file into the given string * buffer. Stop when input character is the delimiter in "delim". * Returns a pointer to the first character after the string. */ int c; char buf[ MAXLINELEN ]; while ( (c = getcharacter()) != -1 && c != delim) { if ( c == '\\' ) { switch ( c = getcharacter() ) { case -1: goto err; case 'b': c = '\b'; break; case 'n': c = '\n'; break; case 't': c = '\t'; break; case 'f': c = '\f'; break; case 'r': c = '\r'; break; case '0': case '1': case '2': case '3': *buf = c; for ( c=1; c<3; ++c ) { if ( (buf[c]=getcharacter()) == -1 ) goto err; } buf[c] = 0; sscanf( buf, "%o", &c ); break; case '\n': if ( getcharacter() == -1 ) goto err; default: if ( (c = getcharacter()) == -1 ) goto err; } } *str++ = c; } *str++ = 0; return str; err: sprintf( buf, "missing %c delimiter", delim ); error( buf, 4 ); } storeint( ip, i ) int *ip, i; { return *ip = i; } storeptr( pp, p ) char **pp, *p; { return *pp = p; } fetchint( ip ) int *ip; { return *ip; } char * fetchptr( pp ) char **pp; { return *pp; } getoken() { char *cp; int i; switch ( Token = *Actptr++ ) { case T_STRING: case T_REGEXP: Value.dptr = Actptr; Actptr += strlen( Actptr ) + 1; break; case T_VARIABLE: Value.dptr = fetchptr( Actptr ); Actptr += sizeof( cp ); break; case T_FUNCTION: case T_CONSTANT: Value.ival = fetchint( Actptr ); Actptr += sizeof( i ); break; case T_EOF: --Actptr; default: Value.dptr = 0; } #ifdef DEBUG if ( Debug > 1 ) printf( "Token='%c' (0x%x), Value=%d\n", Token,Token,Value.ival ); #endif return Token; }