/* * Bawk C actions compiler */ #include #include "bawk.h" EXPR_NODE *act_compile( actbuf ) register char *actbuf;/* where tokenized actions are compiled into */ { DBUG_ENTER("act_compile"); Where = ACTION; stmt_lex( actbuf ); Actptr = actbuf; getoken(); DBUG_RETURN(stmt_parse()); } EXPR_NODE *pat_compile( actbuf ) register char *actbuf;/* where tokenized actions are compiled into */ { DBUG_ENTER("pat_compile"); Where = PATTERN; stmt_lex( actbuf ); Actptr = actbuf; getoken(); DBUG_RETURN(stmt_parse()); } void stmt_lex( actbuf ) register char *actbuf;/* where tokenized actions are compiled into */ { /* * Read and tokenize C actions from current input file into the * action buffer. Strip out comments and whitespace in the * process. */ register char *actptr, /* actbuf pointer */ *cp; /* work pointer */ char buf[MAXLINELEN+1];/* string buffer */ register int braces = 0,/* counts '{}' pairs - return when 0 */ parens = 0, /* counts '()' pairs */ i, /* temp */ c, /* current input character */ finished = 0; DBUG_ENTER("stmt_lex"); actptr = actbuf; while ( !finished && ((c = getcharacter()) != -1) ) { switch(c) { case ' ': case '\t': case '\n': /* * Skip over spaces, tabs and newlines */ break; case '#': /* * Skip comments. Comments start with a '#' and * end at the next newline. */ while ( (c = getcharacter()) != -1 && c!='\n' ) ; break; case '{': if ( Where==PATTERN ) { /* * We're compiling a pattern. The '{' marks * the beginning of an action statement. * Push the character back and return. */ ungetcharacter( (char) '{' ); finished = 1; } else { /* * We must be compiling an action statement. * '{'s mark beginning of action or compound * statements. */ ++braces; *actptr++ = T_LBRACE; } break; case '}': *actptr++ = T_RBRACE; finished = (! --braces ); break; case '(': ++parens; *actptr++ = T_LPAREN; break; case ')': if ( --parens < 0 ) error( "mismatched '()'", ACT_ERROR ); *actptr++ = T_RPAREN; break; case ',': if ( !braces && !parens ) { /* * found a comma outside of any braces or * parens - this must be a regular * expression seperator. */ ungetcharacter( (char) ',' ); finished = 1; } else *actptr++ = T_COMMA; break; case '/': *actptr++ = T_DIV; break; case '@': *actptr++ = T_REGEXP; ungetcharacter( (char) c ); actptr += re_compile( actptr ); break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': /* * It's a symbol reference. Copy the symbol into * string buffer. */ cp = buf; do *cp++ = c; while ( (c=getcharacter()) != -1 && (isalnum( c ) || (c == '_'))); ungetcharacter( (char) c ); *cp = 0; /* * Check if a keyword, builtin function or variable. */ if ( c = iskeyword( buf ) ) *actptr++ = c; else if ( i = isfunction( buf ) ) { *actptr++ = T_FUNCTION; storeint( actptr, i ); actptr += sizeof( i ); } else { /* * It's a symbol name. */ *actptr++ = T_VARIABLE; if ( !(cp = (char *) findvar( buf )) ) cp = (char *) addvar( buf ); storeptr( actptr, cp ); actptr += sizeof( cp ); } break; #ifdef QUOTE_STRING_HACK case '`': #endif case '"': /* * It's a string constant */ *actptr++ = T_STRING; actptr = str_compile( actptr, c ); break; case '\'': /* * It's a character constant */ *actptr++ = T_CONSTANT; str_compile( buf, (char) '\'' ); storeint( actptr, *buf ); actptr += sizeof( i ); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': /* * It's a numeric constant */ *actptr++ = T_CONSTANT; cp = buf; do *cp++ = c; while ( (c=getcharacter()) != -1 && isdigit(c) ); ungetcharacter( (char) c ); *cp = 0; storeint( actptr, atoi( buf ) ); actptr += sizeof( i ); break; case '$': *actptr++ = T_DOLLAR; break; case '=': if ( (c=getcharacter()) == '=' ) *actptr++ = T_EQ; else { ungetcharacter( (char) c ); *actptr++ = T_ASSIGN; } break; case '!': if ( (c=getcharacter()) == '=' ) *actptr++ = T_NE; else { ungetcharacter( (char) c ); *actptr++ = T_LNOT; } break; case '<': if ( (c=getcharacter()) == '<' ) *actptr++ = T_SHL; else if ( c == '=' ) *actptr++ = T_LE; else { ungetcharacter( (char) c ); *actptr++ = T_LT; } break; case '>': if ( (c=getcharacter()) == '>' ) *actptr++ = T_SHR; else if ( c == '=' ) *actptr++ = T_GE; else { ungetcharacter( (char) c ); *actptr++ = T_GT; } break; case '&': if ( (c=getcharacter()) == '&' ) *actptr++ = T_LAND; else { ungetcharacter( (char) c ); *actptr++ = T_AND; } break; case '|': if ( (c=getcharacter()) == '|' ) *actptr++ = T_LOR; else { ungetcharacter( (char) c ); *actptr++ = T_OR; } break; case '+': if ( (c=getcharacter()) == '+' ) *actptr++ = T_INCR; else { ungetcharacter( (char) c ); *actptr++ = T_ADD; } break; case '-': if ( (c=getcharacter()) == '-' ) *actptr++ = T_DECR; else { ungetcharacter( (char) c ); *actptr++ = T_SUB; } break; case '[': *actptr++ = T_LBRACKET; break; case ']': *actptr++ = T_RBRACKET; break; case ';': *actptr++ = T_SEMICOLON; break; case '*': *actptr++ = T_MUL; break; case '%': *actptr++ = T_MOD; break; case '^': *actptr++ = T_XOR; break; case '~': *actptr++ = T_NOT; break; default: /* * Bad character in input line */ error( "lexical error", ACT_ERROR ); } if ( actptr >= Workbuf + MAXWORKBUFLEN ) error( "action too long", MEM_ERROR ); } if ( braces || parens ) error( "mismatched '{}' or '()'", ACT_ERROR ); *actptr++ = T_EOF; DBUG_VOID_RETURN; } char * str_compile( str, delim ) register char *str, delim; { /* * Compile a string from current input file into the given string * buffer. Stop when input character is the delimiter in "delim". * Returns a pointer to the first character after the string. */ int tmpc; /* can not be a register variable */ register int c; register char buf[4]; DBUG_ENTER("str_compile"); while ( (c = getcharacter()) != -1 && c != delim) { if ( c == '\\' ) { switch ( c = getcharacter() ) { case -1: goto err; case 'b': c = '\b'; break; case 'n': c = '\n'; break; case 't': c = '\t'; break; case 'f': c = '\f'; break; case 'r': c = '\r'; break; case '0': case '1': case '2': case '3': *buf = c; for ( c=1; c<3; c++ ) { if ( (buf[c]=getcharacter()) == -1 ) goto err; } buf[c] = 0; sscanf( buf, "%o", &tmpc ); c = tmpc; break; case '\n': if ( getcharacter() == -1 ) goto err; default: if ( (c = getcharacter()) == -1 ) goto err; } } *str++ = c; } *str++ = 0; DBUG_RETURN(str); err: sprintf( buf, "missing %c delimiter", delim ); error( buf, 4 ); DBUG_RETURN(NULL); } void storeint( ip, i ) char *ip; int i; { DBUG_ENTER("storeint"); movmem((char *) &i, ip, sizeof(i)); DBUG_VOID_RETURN; } void storeptr( pp, p ) char *pp, *p; { DBUG_ENTER("storeptr"); movmem((char *) &p, pp, sizeof(p)); DBUG_VOID_RETURN; } int fetchint( ip ) register char *ip; { int i; DBUG_ENTER("fetchint"); movmem(ip, (char *) &i, sizeof(i)); DBUG_RETURN(i); } char * fetchptr( pp ) register char *pp; { char *p; DBUG_ENTER("fetchptr"); movmem(pp, (char *) &p, sizeof(p)); DBUG_RETURN(p); } #ifndef DBUG_OFF char *token_name[] = { 0, "CHAR", "BOL", "EOL", "ANY", "CLASS", "NCLASS", "STAR", "PLUS", "MINUS", "ALPHA", "DIGIT", "NALPHA", "PUNCT", "RANGE", "ENDPAT", "T_STRING", "T_DOLLAR", "T_REGEXP", "T_REGEXP_ARG", "T_CONSTANT", "T_VARIABLE", "T_FUNCTION", "T_SEMICOLON", "T_EOF", "T_LBRACE", "T_RBRACE", "T_LPAREN", "T_RPAREN", "T_LBRACKET", "T_RBRACKET", "T_COMMA", "T_ASSIGN", "T_STAR", "T_MUL", "T_DIV", "T_MOD", "T_ADD", "T_UMINUS", "T_SUB", "T_SHL", "T_SHR", "T_LT", "T_LE", "T_GT", "T_GE", "T_EQ", "T_NE", "T_NOT", "T_ADDROF", "T_AND", "T_XOR", "T_OR", "T_LNOT", "T_LAND", "T_LOR", "T_INCR", "T_DECR", "T_POSTINCR", "T_POSTDECR", "T_IF", "T_ELSE", "T_WHILE", "T_BREAK", "T_CHAR", "T_INT", "T_BEGIN", "T_END", "T_NF", "T_NR", "T_FS", "T_RS", "T_FILENAME", "T_STATEMENT", "T_DECLARE", "T_ARRAY_DECLARE" }; #endif char getoken() { register char *cp; register int i; DBUG_ENTER("getoken"); switch ( Token = *Actptr++ ) { case T_STRING: case T_REGEXP: Value.dptr = Actptr; Actptr += strlen( Actptr ) + 1; break; case T_VARIABLE: Value.dptr = fetchptr( Actptr ); Actptr += sizeof( cp ); break; case T_FUNCTION: case T_CONSTANT: Value.ival = fetchint( Actptr ); Actptr += sizeof( i ); break; case T_EOF: --Actptr; default: Value.dptr = 0; } DBUG_PRINT("getoken", ("Token='%s' (%d), Value=%d",token_name[Token],Token,Value.ival)); DBUG_RETURN(Token); }