/* Copyright © 1991, 1992 by Walter Rothe. You may freely use and modify this * program, but not for commercial profit. A modest fee for distribution is * allowed. Derivative works must be released with source along with the * executable or provisions made to provide the user source, if requested. * Uploading source to a major bulletin board system within 6 months of the * time of the request satisfies this requirement. This copyright notice * must not be deleted from the source. */ /* */ /* Scan */ /* */ /* This program scans file(s) looking for a pattern(s). It supports many */ /* wildcard characters in patterns(*,?,[],[^],[-],&,..,+,|) and can scan */ /* for up to 125 patterns simultaneously with little speed degradation. */ /* If a match is found, a whole article can be printed out instead of just */ /* a number of lines around the match. It also supports recursive directory */ /* scanning and inverted pattern matching. */ /* */ /* Limitations: 1. Total file pathname must be less than LONGWIDTH wide. */ /* Aborts otherwise. */ /* 2. Article separator must be less than MAXKWSZ and LWIDTH */ /* characters long. Aborts otherwise. */ /* 3. Sentence scan only looks OVERLAP chars to left or right */ /* of keyword match to see if rest of stuff matches. */ /* 4. Patterns are a max of LWIDTH chars wide. If not, it */ /* aborts. */ /* 5. A max of 125 major terms is supported. Aborts otherwise. */ /* 6. A major term must have at least 1 set of 2 consequtive */ /* non wildcard characters. Otherwise, it will abort. */ /* 7. Article separator must be at least 2 chars long and have */ /* at least 2 unique chars. Otherwise, it will abort. */ /* 8. If article size is > window size(buffer size), there */ /* is a chance that not all of it will be printed out. A */ /* warning will be printed out if this occurs. At least one */ /* buffers worth will always be printed out. The part of the */ /* article in the current buffer will also be printed out. */ /* 9. The ".." wildcard causes a match on either the left or */ /* right MT if the LineScan option is set. */ /* 10.Buffer size must be in longword increments. */ /* */ #include #include #include #include #include #include #include "fcntl.h" #include "ctype.h" #include #include "scan.h" #include #ifndef LATTICE #endif clock_t StrtTime; clock_t EndTime; clock_t TotTime=0; extern FastSearch(); /* Inner loop search(returns maj term #) */ extern Lines *GetMajTrms(); /* Get list of Maj Trms from Min Terms */ extern int NumOfMinTrms; extern int NumOfMajTrms; extern XFI *xfropen(); extern long xfrread(); extern xfrclose(); extern FindRestOfMT(); extern struct Library *OpenLibrary(); char SubPat[65536]; /* Major term index from two char index */ unsigned short CurPat; unsigned short SavEPat, SavTEPat; unsigned short SavOPat, SavTOPat; unsigned short frstuppr, scnduppr; /* "rest of" signifies the characters remaining after the 2 char subkey is */ /* removed. Lets say the keyword is "never" and the subkey is "ne". In this */ /* case, the rest is "ver". If the subkey was "ev", the rest is "never", */ /* since it doesn't do separate compares. If the keyword is "ab" then the */ /* subkey is also "ab" and the rest is "ab". We need at least 2 chars since */ /* tables FrstBt and ScndBt are used, without testing 4 finished, for speed */ unsigned char DsplTb[2*MAXMTS]; /* Displ 2 1st char of rest of key from MTIdx*/ char FrstBt[2*MAXMTS]; /* 1st char of rest of keyword from maj term index */ char ScndBt[2*MAXMTS]; /* 2nd char of rest of keyword from maj term index */ char LowrCs[256]; /* Lower case char from mixed case chars */ int MajTrm2MinTrm[MAXMTS]; /* Minterm number indexed by major term num */ int MajTrm2BitNum[MAXMTS]; /* Bit number in MinSatTbl from maj trm # */ int MinSatByMTOnly[MAXMTS]; /* 1 if min term is satisfied by 1 maj trm */ int MajTrm2MaxKeyLen[MAXMTS]; /* Length of longest keyword in major term */ int MTNumSortedByKeyLen[MAXMTS];/* Maj Trm #'s sorted by length of keyword */ char *FreeItList[256]; /* Max number of bracket expressions */ int FrI=0; int NextMT; int PrntPrevPrev = 0; int CurLen; int TokLen; int CurDirModified=0; int LineScan; int LineNum = 0; /* Print out line number with match if 1 */ int TermLp = 0; int NumDup; int FrstPnt; int LastInLast; /* zero if nothing printed out in present buf yet */ int PrntWidth=0; int AlwaysPrint = 0; /* if 1, always print file pathname scanned */ int LenSt; int SzPtr; int WroteOverIt = 0; int Inv; int DKWIdx; int SavEIdx, SavTEIdx; int SavOIdx, SavTOIdx; int NumBefore=0, NumAfter=1; int RealFile; /* if 1, outputting to real file instead of screen */ int InvertMatch=0; /* When 1, outputs articles that dont match */ int ColReq=0; /* Column that article separator must be in. 0 -> ignore */ int ColOk; int FDsplTbl[MAXDUP]; /* Full Displ to strt of keywrd by MT Index */ int ArtInPrevBuf; /* Start of article is in previous buffer */ int BlkSize=1; /* Size of block to write to output stream */ long NumBlksToWrt; /* Num of blocks to write to output stream */ long NumBlksToWrt2; char *WhereToStrt; /* Pntr 2 start of article to write out */ char *EndOfPrevBuf; /* Pntr 2 end of buffer used b4 current buf */ char *iii; int bufchr; char *SOCB; /* Pntr 2 start of current buf being used */ char *ASOB; /* Pntr 2 absolute start of current buffer */ long CurOddOH; long CurEvenOH; long TotOH; long MulFct; long OH; char *KWTbl[2*MAXMTS][2]; /* Pntrs 2 1st & last char of rest of keywd */ char *SvTbl[2*MAXMTS]; /* Saves pntr to strt of keyword for later */ char *DKWTbl[MAXDUP][4]; /* Pntrs 2 1st/last/Dspl/link 2 next dup 4 rest kwd */ char *EOCB; /* Pntr 2 end last byte in current buf + 1 */ char *SavEOCB, *SavSOCB; char **TmpPtr; char ArticleSep[] = "\nArticle"; char *ArtSep = ArticleSep; /* Pntr 2 rest of article separator */ char *EOASep = ArticleSep + 7; /* Pntr 2 last char of rest of article sep */ unsigned char DFASep[2]; /* Displ for art sep into buffer */ char *CurArtStrt; /* Pntr 2 strt of article in a buffer */ char EOBK[] = "$-$"; /* Keyword indicating end of buffer */ char LineSrchDelim[] = "%!%"; /* Article separator when doing line search */ char *PntPtr; char *RightStrt; char *LastPntEnd; long WinSiz = 16384; /* Size in bytes of each of the 3 buffers */ FILE *StrmPtr = stdout; /* Output stream */ FILE *OutFile=NULL; /* Output file */ FILE *ConfigF=NULL; /* Configuration file */ char ArtBuf[LWIDTH]; /* Buffer to put article separator in */ Lines MinArray[MAXMTS]; Lines *MinTerm = &MinArray[0]; Lines *EndMin; Lines *MinPtr; long MinSatTbl[MAXMTS][2]; /* 32 bit fields & masks of MT's satisfied */ int LastMTTbl[MAXMTS]; /* Daisy chain of major terms found in art */ int MTIndx; int LastMT; /* Index into LastMTTbl of last maj trm fnd */ Lines Garb1; char *Stf1 = (char *)&Garb1; Lines Garb2; char *Stf2 = (char *)&Garb2; Lines Garb3; char *Stf3 = (char *)&Garb3; char *TmpTrm; Lines MTArray[MAXMTS]; Lines NoBrakArray[MAXMTS]; /* Array of MT's with stuff between [] removed */ /* Also ] is removed. This array winds up */ /* having the *,&, and [ chars changed to zero */ Lines NoBrakAry[MAXMTS]; /* Same as NoBrakArray except the *,&, and [ */ /* are not overwritten with 0. */ int TokStrt[2*MAXMTS]; int TokEnd[2*MAXMTS]; char *NBAryEnd[MAXMTS]; /* pntrs to end of MT strings with no ] */ Lines *MajTerm = &MTArray[0]; Lines *LstMajTrm; Lines *EndMT; char *InName, *OutName, *LastArg; char *Tok, *SavTok, *SavTTok; DIB *CurDirPtr=NULL; int i, j, k, ll, ci, t1; /* counter variables */ int FndBrak; int StrtBrk; BrakTyp1 *BrakIdx=NULL; BrakTyp2 *BrakPtr=NULL; char *myptr; int FndALOGKeyWrd; int OutArt = 0; int Indx; int LFCnt; int Mtch1st = 0; int MatchFnd=1; int MatchNotFnd=0; XFI *FHandle=0; char *FName; char *TmpCS, *TmpCS2; long ReadNum; char *malloc(), *strcpy(), *strcat(); int HasCnfgF; unsigned char MTNum; /* Major term number. */ unsigned char MTN; /* Major term index. MTN = MTIdx>>1 */ unsigned char MTIdx; /* Major term index. MTNum = MTIdx>>1 */ char **MTTmp; char *BufIdx; char *TmpP, *BP; int CmdLnArgIdx, LstCmdLnArgIdx, RecursFlg = 0; int NumOfCR = 0; /* Num of line feeds in all buffs already scanned */ int NumOfCRInCurBuf = 0; /* Num of line feeds found so far in current buf */ char *LstOut = NULL; /* Pntr to last char outputed in current buffer + 1 */ long NumTot=-1; static int LnSz; int Trunc=0; int TextWidth; /* Width of current window in characters */ int HasKeyWrd=1; int NoKeyW=0; char *HighLightColor="\x9B\x33\x32m"; char *PathNmColor="\x9B\x33\x33m"; char *DlydPntStart=NULL; /* Pntr 2 right context that still needs 2b printed */ int DlydPntRightContext=0; long DlydPntSize=0; int DlydPntNextBuf=0; char *KeyStrt; char *WrdStrt; char *DlydPntEnd; int SizeDiff; int TotTokLen; int KeyWrdOvlp = MAXKWSZ; /* Used in asyncread.c for main srch buf overlap */ int OpenNew = 0; /* 1 if finished with internal LZH archive file */ int LON; int StrtLFC; char *StrtPP; int CutIt = 0; /* binary file cut since could not find a line feed in time */ int MayNeedLF = 0; char *LZHFileName = ""; /* Internal filename of lzh file being scanned */ char *WildLZH = "*"; /* Wildcard pattern of which lzh int files to search */ int EnableLZHDecomp = 0; /* When true, enables decompression of .lzh files */ int TwoCharArtSep = 0; /* If 1, the article separator is only 2 chars long. */ int SPIdx; int iij; int FndX; int TmpT; long ZeroLong=0; extern int ItsALZH; FILE *zero=NULL; void ClrSubPat() /* This should be done in assembly */ { long i; for (i=0; i<65536; i++) { SubPat[i] = 0; } } int MaxVal(a, b) int *a, *b; { if (MajTrm2MaxKeyLen[*a] < MajTrm2MaxKeyLen[*b]) { return(-1); } else { return( MajTrm2MaxKeyLen[*a] > MajTrm2MaxKeyLen[*b]); } } int WindowSize() { char c; int n = 0, width; char buffer[32]; set_raw(); printf("\2330 q"); /* get window bounds */ n = 0; while( (buffer[n] = getchar()) != 'r' && n++ < 32); c = buffer[n-3]; width = ( (c <= '9' && c > '0') ? (c - '0') * 10 : 0 ) + buffer[n-2] - '0'; buffer[n-1] = '\0'; set_con(); return(width); } void fxwrite( Buf, BlkSiz, Count, Strm, KeyWrdPrs ) /* if "line number" option selected, output line number with output text, */ /* otherwise just output text. fx is called for present buf, fy for prev. */ /* Note that at each buffer switch, the number of line feeds in the prev */ /* buffer is added to the previous total(NumOfCR). Also truncate lines */ /* that are wider than window, if -t option set. Change color of word */ /* containing the matched keyword so it's highlighted. */ char *Buf; size_t BlkSiz, Count; FILE *Strm; int KeyWrdPrs; /* set by caller if keyword is present at end of buffer */ { long NumBytInRec; int OnSameLine=0; static int ColorOn=0; int ColorEnable=0; int DfSz; char *SavLO; int TurnColorOff=0; int MinBefore; /* min # of chars before keyword needing color highlight */ if (!RealFile) { /* set all nonprinting chars to blanks */ for (iii=Buf; iii 32 || (*iii < 0 && *iii > -97)) && *iii != '\n') || *iii == 0x7E) *iii = ' '; } } if( LineNum ) { if( Count == 0) return; if( Buf < SOCB ) { ErrP("Error: program bug!!! Buf < SOCB \n"); } if( Buf > EOCB ) { ErrP("Error: program bug!!! Buf > EOCB \n"); } if( LstOut < SOCB ) { ErrP("Error: LstOut < SOCB \n"); } if( LstOut > EOCB ) { ErrP("Error: LstOut > EOCB \n"); } /* count line feeds from last output to start of current output */ CntCRInCurBuf( Buf); if( NumTot == NumOfCR + NumOfCRInCurBuf ) OnSameLine = 1; while( Count != 0 ) { SavLO = LstOut; if( ColorOn && OnSameLine ) { while( Count != 0 && isalnum((int)(*LstOut))){ LstOut++; Count--; } fprintf( Strm, HighLightColor ); if( Count != 0) TurnColorOff = 1; } else { while( Count != 0 && *LstOut != '\n') { LstOut++; Count--; } } NumTot = NumOfCR + NumOfCRInCurBuf; if( Count != 0 && *LstOut == '\n') { NumOfCRInCurBuf++; LstOut++; Count--; } if( ColorEnable && (!Trunc || LnSz <= TextWidth) ) { ColorEnable = 0; ColorOn = 1; OnSameLine = 1; fprintf( Strm, HighLightColor ); } if( KeyWrdPrs && Count == 0 && !ColorOn && !RealFile) { MinBefore = TotTokLen; while( (isalnum((int)(*(LstOut-1))) || MinBefore > 0) && LstOut != SavLO) { Count++; LstOut--; MinBefore--; } ColorEnable = 1; } /* if last time we printed a match, we were on the same line as */ /* current match, don't put out line #. */ if( !OnSameLine ) { if( NumTot > 99999 ) { if( Trunc ) { /* truncate line if line is too long */ DfSz = LstOut - SavLO; LnSz = DfSz + 8; /* size of data plus size of line # */ if( LnSz > TextWidth ) { LnSz = TextWidth; DfSz = LnSz - 8; } if( LnSz >= TextWidth && ColorEnable ) { fprintf( Strm, HighLightColor ); } fprintf( Strm, "%7ld ", NumTot+1 ); if( LnSz >= TextWidth && ColorEnable ) { fprintf( Strm, "\2330m"); } fwrite( SavLO, BlkSiz, DfSz, Strm ); } else { fprintf( Strm, "%7ld ", NumTot+1 ); fwrite( SavLO, BlkSiz, LstOut - SavLO, Strm ); } } else { if( Trunc ) { DfSz = LstOut - SavLO; LnSz = DfSz + 6; if( LnSz > TextWidth ) { LnSz = TextWidth; DfSz = LnSz - 6; } if( LnSz >= TextWidth && ColorEnable ) { fprintf( Strm, HighLightColor ); } fprintf( Strm, "%5ld ", NumTot+1 ); if( LnSz >= TextWidth && ColorEnable ) { fprintf( Strm, "\2330m"); } fwrite( SavLO, BlkSiz, DfSz, Strm ); } else { fprintf( Strm, "%5ld ", NumTot+1 ); fwrite( SavLO, BlkSiz, LstOut - SavLO, Strm ); } } OnSameLine = 0; } else { if( Trunc ) { DfSz = LstOut - SavLO; if( DfSz + LnSz > TextWidth ) { DfSz = TextWidth - LnSz; LnSz = TextWidth; } else { LnSz += DfSz; } fwrite( SavLO, BlkSiz, DfSz, Strm ); } else { fwrite( SavLO, BlkSiz, LstOut - SavLO, Strm ); } OnSameLine = 0; if( ColorOn ) { fprintf( Strm, "\2330m"); if( *(LstOut-1) == '\n' ) { ColorOn = 0; } else { if( TurnColorOff ) OnSameLine = 1; } if( TurnColorOff ) { TurnColorOff = 0; ColorOn = 0; } } } } } else { /* no line numbers */ SavLO = Buf; if( ColorOn ) { /* Change color of the part of matched keyword to the right of */ /* the two char subpat up to the 1st space or line feed. */ LstOut = Buf; while( Count != 0 && isalnum((int)(*LstOut)) ) { LstOut++; Count--; } fprintf( Strm, HighLightColor ); fwrite( SavLO, BlkSiz, LstOut - SavLO, Strm ); SavLO = LstOut; LstOut += Count; fprintf( Strm, "\2330m"); if( Count != 0) ColorOn = 0; } else { LstOut = SavLO + Count; } if( KeyWrdPrs && !RealFile) { /* Change color of the part of matched keyword to the left of and */ /* including the two char subpat. Stop moving left on space or */ /* start of buffer. Note that when this routine is called with */ /* KeyWrdPrs set, the two char subpat is always the last item in */ /* the buffer. The left context is before it. */ Count = 0; MinBefore = TotTokLen; while( (isalnum((int)(*(LstOut-1))) || MinBefore > 0) && LstOut != SavLO) { Count++; LstOut--; MinBefore--; } fwrite( SavLO, BlkSiz, LstOut - SavLO, Strm ); fprintf( Strm, HighLightColor ); SavLO = LstOut; LstOut += Count; fwrite( SavLO, BlkSiz, LstOut - SavLO, Strm ); fprintf( Strm, "\2330m"); if( *(LstOut-1) != '\n' ) ColorOn = 1; } else { fwrite( SavLO, BlkSiz, LstOut - SavLO, Strm ); } } if( CutIt ) { if( LnSz != TextWidth) fprintf( Strm, "\n"); CutIt = 0; } fflush( Strm ); return; } void fywrite( Buf, BlkSiz, Count, Strm ) /* if "line number" option selected, output line number with output text */ /* otherwise just output text. fy is called for prev buf, fx for present */ char *Buf; size_t BlkSiz, Count; FILE *Strm; { char *CurPtr; int OnSameLine=0; long NumCR2EndOfPBuf=0; char *SavLO; int DfSz; if (!RealFile) { /* set all nonprinting chars to blanks */ for (iii=Buf; iii 32 || (*iii < 0 && *iii > -97)) && *iii != '\n') || *iii == 0x7E) *iii = ' '; } } if( LineNum ) { if( Buf < SavSOCB ) { ErrP("Error: program bug!!! Buf < Prev SOCB \n"); } if( Buf > SavEOCB ) { ErrP("Error: program bug!!! Buf > Prev EOCB \n"); } if( Count == 0 ) { ErrP("Error2: Count to fywrite = 0 \n"); } if( LstOut < SOCB ) { ErrP("Error2: LstOut < SOCB \n"); } if( LstOut > EOCB ) { ErrP("Error2: LstOut > EOCB \n"); } /* count line feeds from start of output in prev buf 2 end of prev buf */ CurPtr = SavEOCB; for(; CurPtr>=Buf; CurPtr-- ) { if( *CurPtr == '\n') NumCR2EndOfPBuf++; } CurPtr++; if( NumTot == NumOfCR - NumCR2EndOfPBuf ) OnSameLine = 1; while( Count != 0 ) { SavLO = CurPtr; while( (--Count != 0) && (*CurPtr != '\n') ) { CurPtr++; } NumTot = NumOfCR - NumCR2EndOfPBuf; if( *CurPtr != '\n') NumCR2EndOfPBuf--; CurPtr++; /* if last time we printed a match, we were on the same line as */ /* current match, don't put out line #. */ if( !OnSameLine ) { if( NumTot > 99999 ) { fprintf( Strm, "%7ld ", NumTot+1 ); if( Trunc ) { /* truncate line if line is too long */ DfSz = CurPtr - SavLO; LnSz = DfSz + 8; /* size of data plus size of line # */ if( LnSz > TextWidth ) { LnSz = TextWidth; DfSz = LnSz - 8; } fwrite( SavLO, BlkSiz, DfSz, Strm ); } else { fwrite( SavLO, BlkSiz, CurPtr - SavLO, Strm ); } } else { fprintf( Strm, "%5ld ", NumTot+1 ); if( Trunc ) { DfSz = CurPtr - SavLO; LnSz = DfSz + 6; if( LnSz > TextWidth ) { LnSz = TextWidth; DfSz = LnSz - 6; } fwrite( SavLO, BlkSiz, DfSz, Strm ); } else { fwrite( SavLO, BlkSiz, CurPtr - SavLO, Strm ); } } OnSameLine = 0; } else { if( Trunc ) { DfSz = CurPtr - SavLO; if( DfSz + LnSz > TextWidth ) { DfSz = TextWidth - LnSz; LnSz = 0; } else { LnSz += DfSz; } fwrite( SavLO, BlkSiz, DfSz, Strm ); } else { fwrite( SavLO, BlkSiz, CurPtr - SavLO, Strm ); } OnSameLine = 0; } } NumTot = NumOfCR; } else { fwrite( Buf, BlkSiz, Count, Strm ); }; if( CutIt ) { if( LnSz != TextWidth) fprintf( Strm, "\n"); CutIt = 0; } return; } void _abort() { CleanIt(); abort(); Exit(2); } BackSlash(str) char *str; { /* Converts string pointed to by str that contains backslashes to a */ /* string without backslashes. */ int i = 0, k = 0, t, c; char *subst = "000"; while( str[i]) { if( str[i] == '\\') { switch( str[++i] ) { case('a'): str[i] = '\a'; break; case('b'): str[i] = '\b'; break; case('f'): str[i] = '\f'; break; case('n'): str[i] = '\n'; break; case('r'): str[i] = '\r'; break; case('t'): str[i] = '\t'; break; case('v'): str[i] = '\v'; break; case('"'): str[i] = '\"'; break; case('\''): str[i] = '\''; break; case('\\'): str[i] = '\\'; break; case('x'): case('X'): sscanf(&str[i+1], "%x", &t); c = t; if(( t > 0) && ( t < 256)) { while( t > 0) { i++; t = t >> 4; } str[i] = (char)c; } else { ErrP("Error: only 2 hex digit allowed after \\x \n"); } break; case('0'): case('1'): case('2'): case('3'): case('4'): case('5'): case('6'): case('7'): case('8'): case('9'): subst[0] = str[i]; for(c=1; (str[++i] >= '0') && (str[i] <= '9') && (c < 3); c++) { subst[c] = str[i]; } subst[c] = '\0'; if( (t = atoi( subst)) < 256) { str[--i] = (char)t; } else { ErrP("Error: decimal # after \\ must be < 256\n"); } break; default: ErrP("Error: illegal character after backslash\n"); } } str[k++] = str[i++]; } str[k] = '\0'; } CleanIt() { BPTR TLock; DIB *TempPtr; #ifdef DEBUGCLEAN printf("1st statement in cleanit\n"); fflush( zero ); #endif /* pop up any dir levels and restore original current directory */ while ( CurDirPtr != NULL ) { if (CurDirPtr->CurLock != 0 ) { UnLock( CurDirPtr->CurLock ); #ifdef DEBUGCLEAN printf("unlocked\n"); fflush( zero ); #endif } if (CurDirPtr->OldLock != 0 ) { TLock = CurrentDir( CurDirPtr->OldLock ); #ifdef DEBUGCLEAN printf("set currentdir\n"); fflush( zero ); #endif } if (CurDirPtr->BackLink != NULL) { TempPtr = CurDirPtr->BackLink; FreeMem( CurDirPtr, ( long ) sizeof ( struct DirInfoBlock ) ); CurDirPtr = TempPtr; #ifdef DEBUGCLEAN printf("freed what CurDirPtr points to\n"); fflush( zero ); #endif } else { FreeMem( CurDirPtr, ( long ) sizeof ( struct DirInfoBlock ) ); CurDirPtr = NULL; #ifdef DEBUGCLEAN printf("last free of what CurDirPtr points to\n"); fflush( zero ); #endif } } xfrclose( FHandle); NumOfCR = 0; if( ConfigF != NULL ) { fclose( ConfigF ); ConfigF = 0; } if( OutFile != NULL ) { fclose( OutFile ); OutFile = 0; } if( BrakIdx != NULL ) { FreeMem( BrakIdx, (long)(MAXMTS*sizeof(BrakTyp1))); BrakIdx = 0; #ifdef DEBUGCLEAN printf("freed mem for braketidx\n"); fflush( zero ); #endif } if( BrakPtr != NULL ) { FreeMem( BrakPtr, (long)(MAXMTS*sizeof(BrakTyp2))); BrakPtr = 0; #ifdef DEBUGCLEAN printf("freed mem for braketptr\n"); fflush( zero ); #endif } while( FrI > 0 ) FreeMem( FreeItList[--FrI], 256 ); } /* Normal strncpy with NULL added if n < length of s */ char *mystrncpy(d, s, n) char *d; const char *s; size_t n; { char c; char *t = d; while(n && (c = *s)) { *d = c; ++s; ++d; --n; } if( n) { *d = 0; } else { if( d-- > t) *d = 0; } return(t); } ErrP(msg) char *msg; { fprintf(stderr,msg); CleanIt(); exit(1); } PathPrint(Matched) /* Print pathname of file being searched. "AlwaysPrint" signifies that the */ /* pathname should be printed even if a match is not found in a file. If */ /* a match is not found in a file, it's pathname is overwritten by the */ /* next file scanned. When doing screen output, the color of the pathname */ /* printed is changed. */ int Matched; { static unsigned long BlkSz = 1; static unsigned long Wrt1 = 1; static unsigned long Wrt2 = 2; static unsigned long Wrt9 = 9; static char LastFileNm[LONGWIDTH] = "\0"; LLines TempFN; extern int Mtch1st; /* flag set after 1st match in a file */ int pad; strcpy( TempFN, FName); if( ItsALZH ) { strcat( TempFN, "(" ); strcat( TempFN, LZHFileName ); strcat( TempFN, ")" ); } if(Matched) { /* a match was found before this routine was called */ if(RealFile) { if(strcmp(LastFileNm, TempFN)) { /* not = so havent printed it yet */ fwrite("->>>>>>> ",BlkSz,Wrt9,StrmPtr); fwrite(TempFN,BlkSz,strlen(TempFN),StrmPtr); fwrite("\n",BlkSz,Wrt1,StrmPtr); strcpy(LastFileNm, TempFN); Mtch1st = 1; } else { if(!Mtch1st) { fwrite("\n",BlkSz,Wrt1,StrmPtr); Mtch1st = 1; } } } else { if(strcmp(LastFileNm, TempFN)) { /* not = so havent printed it yet */ printf(PathNmColor); fwrite(TempFN,BlkSz,strlen(TempFN),StrmPtr); fwrite("\n",BlkSz,Wrt1,StrmPtr); printf("\2330m"); strcpy(LastFileNm, TempFN); Mtch1st = 1; } else { if(!Mtch1st) { fwrite("\n",BlkSz,Wrt1,StrmPtr); Mtch1st = 1; } } } } else { if(!RealFile && AlwaysPrint) { if(strcmp(LastFileNm, TempFN)) { /* not = so havent printed it yet */ fwrite("\r",BlkSz,Wrt1,StrmPtr); printf(PathNmColor); fwrite(TempFN,BlkSz,strlen(TempFN),StrmPtr); printf("\2330m"); pad = strlen(TempFN) - strlen(LastFileNm); while (pad < 0) { fputs(" ",StrmPtr); pad++; } fflush(StrmPtr); strcpy(LastFileNm, TempFN); Mtch1st = 0; } } } } /* This procedure initializes the "beginning" and "end arg" numbers for the */ /* NextFile function. Some command line interpreters */ /* automatically expand wild cards which makes it hard to know which */ /* absolute argument ends the list of file names. This function terminates */ /* the list with the 1st "-" or when a certain # of arguments are left. */ /* Input parameters NumBefore and NumAfter are constants used to make this */ /* routine generic for different CLI command formats. */ int InitNextFile(argv, argc, NumBefore, NumAfter) int argc; char *argv[]; int NumBefore; /* Number of arguments before filenames start but not */ /* including the cmd itself or any "-" arguments. */ /* This is provided to support generic reusability. */ int NumAfter; /* Number of arguments after filenames end. No "-" args */ { extern int CmdLnArgIdx, LstCmdLnArgIdx, RecursFlg; int ArgCSav; char **ArgVSav; int NumB, NumA; char *CmdPtr; CmdLnArgIdx = 0; NumB = NumBefore; ArgVSav = argv; ArgCSav = argc; /* Assign CmdLnArgIdx the index of the first filename on the command line */ while (( NumB >= 0) && (CmdLnArgIdx < ArgCSav)) { CmdPtr = *(++ArgVSav); CmdLnArgIdx++; if (*CmdPtr != '-') NumB--; } if (CmdLnArgIdx == ArgCSav) { ErrP("InitNextFile: # before too large\n"); } NumA = NumAfter; ArgVSav = &argv[argc]; /* Assign LstCmdLnArgIdx the index of the last filename on the command line */ LstCmdLnArgIdx = argc; while (( NumA >= 0) && (LstCmdLnArgIdx > 0)) { CmdPtr = *(--ArgVSav); LstCmdLnArgIdx--; if (*CmdPtr != '-') NumA--; else { NumA--; } } if (LstCmdLnArgIdx == 0) { ErrP("InitNextFile: # args after too large\n"); } } /* This function is used to parse the command line for filenames. The */ /* InitNextFile procedure does the initial work to find absolute beginning */ /* and ending argument numbers and this function uses those numbers to */ /* retrieves a directory and wildcard pattern from the command line and */ /* search for any files matching the pattern. Each time it is called, it */ /* returns a pointer to another full pathname until no more match. Then it */ /* does the same thing with the next command line argument until all command */ /* line args are tried. A NULL pointer returned means no more matches exist. */ char *NextFile(ArgVSav) char *ArgVSav[]; { extern int CmdLnArgIdx, LstCmdLnArgIdx, RecursFlg; static FnshdRecurs = 1; static LLines LstDirNm = "\0"; /* Init to empty string */ static LLines FullPathNm; DIB *TmpPtr; static int StopOnMatch; DIB *NewPtr; FIB *TmpFIBPtr; BPTR CLock, NewLock, TLock; char *PathNmPtr1; char *PathNmPtr2; char *EndOfDir; static char *CurFileNm; static char *ScanAll = "*"; static LLines CurDirName; static LLines SavDirName; char *StrtOfPathNm; int FileNtFnd = 1; int Len; int ii; char *Sidx; char *Sidx2; char *FName; do { if( CmdLnArgIdx > LstCmdLnArgIdx ) { if( LstDirNm[0] != '\0') { UnLock( CurDirPtr->CurLock ); TLock = CurrentDir( CurDirPtr->OldLock ); FreeMem( CurDirPtr, ( long ) sizeof ( struct DirInfoBlock ) ); CurDirPtr = NULL; } return(NULL); } if (FnshdRecurs) { /* get and decode next cmd line argument */ StrtOfPathNm = ArgVSav[ CmdLnArgIdx ]; if( *StrtOfPathNm == '-' ) { CmdLnArgIdx++; continue; } FnshdRecurs = 0; if( strpbrk( StrtOfPathNm, ":/") != NULL) { /* has dir info */ /* get directory pathname from command line */ if( (unsigned long)(PathNmPtr1 = strrchr( StrtOfPathNm, ':')) > (unsigned long)(PathNmPtr2 = strrchr( StrtOfPathNm, '/')) ) { EndOfDir = PathNmPtr1 + 1; } else { EndOfDir = PathNmPtr2 + 1; } if( (EndOfDir - StrtOfPathNm + 1) > LONGWIDTH ) ErrP("File pathname too long. Aborting...\n"); mystrncpy( CurDirName, StrtOfPathNm, EndOfDir - StrtOfPathNm + 1); /* get pointer to start of filename in pathname */ CurFileNm = EndOfDir; } else { /* cmd ln arg only has file info */ /* get pointer to start of filename in pathname */ CurFileNm = StrtOfPathNm; CurDirName[0] = ' '; /* flag as default directory */ CurDirName[1] = '\0'; } /* ifonly given dir name, fill in filename to match any file in dir */ if( CurFileNm[0] == '\0' ) { CurFileNm = ScanAll; } if( strpbrk( CurFileNm, "*?.") == NULL ) { if( CurDirName[0] == ' ' ) { strcpy( CurDirName, CurFileNm); } else { if( strlen(CurDirName) + strlen(CurFileNm) >= LONGWIDTH ) ErrP("Total file pathname too long. Aborting...\n"); strcat( CurDirName, CurFileNm); } strcat( CurDirName, "/" ); CurFileNm[0] = '*'; CurFileNm[1] = '\0'; } StopOnMatch = 1; if( (strpbrk( CurFileNm, "*?") != NULL) || RecursFlg) { StopOnMatch = 0; /* has wildcard or is recursive */ } if(strcmp(LstDirNm,CurDirName)){/*lstdir!sameas cur */ /* if last dir was locked, then free it */ if( LstDirNm[0] != '\0') { UnLock( CurDirPtr->CurLock ); TLock = CurrentDir( CurDirPtr->OldLock ); FreeMem( CurDirPtr, ( long ) sizeof ( struct DirInfoBlock ) ); CurDirPtr = NULL; } strcpy(LstDirNm, CurDirName); /* The FileInfoBlock structure has to be long-word alligned, so */ /* we should use AllocMem to insure this condition is met. */ CurDirPtr = AllocMem((long)sizeof(struct DirInfoBlock), MEMF_PUBLIC | MEMF_CLEAR); CurDirPtr->BackLink = NULL; /* anchor */ /* Get a lock on the upper level Directory */ if((strpbrk( CurDirName, ":") == NULL)) { /* lock the current default directory */ if(!strcmp(CurDirName, " ")) CurDirName[0] = '\0'; CurDirPtr->CurLock = Lock( (UBYTE *)CurDirName, ACCESS_READ ); if (!CurDirPtr->CurLock) { fprintf(stderr,"Err with dir-> %s\n",CurDirName); ErrP("Could not get lock on upper directory\n"); } CurDirPtr->OldLock = CurrentDir(CurDirPtr->CurLock); TmpFIBPtr = (FIB *)AllocMem((long)sizeof(FIB), MEMF_PUBLIC | MEMF_CLEAR); CLock = CurDirPtr->CurLock; ii = LONGWIDTH - 1; /* build full pathname of current default directory */ while (CLock) { NewLock = ParentDir(CLock); Examine(CLock, TmpFIBPtr); FName = TmpFIBPtr->fib_FileName; if (*FName == '\0') FName = "ram"; Len = strlen(FName); if (NewLock) { ii -= Len + 1; if( ii < 0 ) ErrP("Full pathname too long. Aborting...\n"); memcpy(CurDirName + ii, FName, Len); CurDirName[ii+Len] = '/'; } else { ii -= Len + 1; if( ii < 0 ) ErrP("Full pathname too long. Aborting...\n"); memcpy(CurDirName + ii, FName, Len); CurDirName[ii+Len] = ':'; } if(CLock != CurDirPtr->CurLock) { UnLock(CLock); } CLock = NewLock; } FreeMem(TmpFIBPtr, (long)sizeof(FIB)); memmove(CurDirName, CurDirName + ii, LONGWIDTH - ii); CurDirName[LONGWIDTH - ii - 1] = '\0'; strcpy(SavDirName, CurDirName); TLock = CurrentDir(CurDirPtr->OldLock); } else { strcpy(SavDirName, CurDirName); CurDirPtr->CurLock = Lock((UBYTE *)CurDirName, ACCESS_READ ); if (!CurDirPtr->CurLock) { fprintf(stderr,"Error with dir-> %s\n",CurDirName); ErrP("Could not get lock on upper directory\n"); } CurDirPtr->OldLock = CurrentDir(CurDirPtr->CurLock); if( CurDirPtr->OldLock == 0) { fprintf(stderr,"Error with dir-> %s\n",CurDirName); ErrP("Could not set current directory\n"); } } } else { /* last directory name is same as current so copy it */ strcpy(CurDirName, SavDirName); } if ( !Examine( CurDirPtr->CurLock, (FIB *)CurDirPtr )) { /* upper directory is empty so terminate this tree */ FnshdRecurs = 1; } if( ((FIB *)CurDirPtr)->fib_DirEntryType <= 0) { /* Program normally assumes last part of a pathname is a */ /* directory unless there is a "*", "?", or "." in it. It comes */ /* here if it thought it is a directory when it's really a file. */ FnshdRecurs = 0; UnLock( CurDirPtr->CurLock ); CurDirName[strlen(CurDirName)-1] = '\0'; /* Get position of last "/" or ":" in current dir name, */ /* whichever is greater */ Sidx = strchr(CurDirName,':'); if( (Sidx2=strrchr(CurDirName,'/')) != NULL ) Sidx = Sidx2; /* Copy stuff after last "/" or ":" to filename string */ strcpy( CurFileNm, Sidx+1 ); /* Delete it from current directory name */ *(Sidx+1) = '\0'; CurDirPtr->CurLock = Lock( (UBYTE *)CurDirName, ACCESS_READ ); if (!CurDirPtr->CurLock) { fprintf(stderr,"Err with dir-> %s\n",CurDirName); ErrP("Could not get lock on upper directory\n"); } if ( !Examine( CurDirPtr->CurLock, (FIB *)CurDirPtr )) { /* upper directory is empty so terminate this tree */ FnshdRecurs = 1; } } } while ( !ExNext( CurDirPtr->CurLock, (FIB *)CurDirPtr)) { if (CurDirPtr->BackLink != NULL) { /* pop directory */ UnLock( CurDirPtr->CurLock ); TLock = CurrentDir( CurDirPtr->OldLock ); TmpPtr = CurDirPtr->BackLink; FreeMem( CurDirPtr, ( long ) sizeof ( struct DirInfoBlock ) ); CurDirPtr = TmpPtr; } else { /* Dont pop from upper most dir since we may need it later */ FnshdRecurs = 1; break; } } while (!FnshdRecurs && (((FIB *)CurDirPtr)->fib_DirEntryType > 0)) { if (RecursFlg) { TmpPtr = CurDirPtr; CurDirPtr = AllocMem((long)sizeof(struct DirInfoBlock), MEMF_PUBLIC | MEMF_CLEAR); CurDirPtr->BackLink = TmpPtr; /* Get a lock on the Current Directory */ CurDirPtr->CurLock = Lock((UBYTE *)(((FIB *)TmpPtr)->fib_FileName),ACCESS_READ); if (CurDirPtr->CurLock == 0) { fprintf(stderr,"Could not get lock on intermediate dir\n"); TmpPtr = CurDirPtr->BackLink; FreeMem( CurDirPtr, ( long )sizeof( struct DirInfoBlock )); CurDirPtr = TmpPtr; } else { CurDirPtr->OldLock = CurrentDir(CurDirPtr->CurLock); if ( !Examine( CurDirPtr->CurLock, (FIB *)CurDirPtr )) { if (CurDirPtr->BackLink != NULL) { /* pop dir */ UnLock( CurDirPtr->CurLock ); TLock = CurrentDir( CurDirPtr->OldLock ); TmpPtr = CurDirPtr->BackLink; FreeMem( CurDirPtr, ( long )sizeof( struct DirInfoBlock )); CurDirPtr = TmpPtr; } else { /* Dont pop from upper most dir since we may need it later */ FnshdRecurs = 1; break; } } } } while ( !ExNext( CurDirPtr->CurLock, (FIB *)CurDirPtr)) { /* pop dir */ if (CurDirPtr->BackLink != NULL) { UnLock( CurDirPtr->CurLock ); TLock = CurrentDir( CurDirPtr->OldLock ); TmpPtr = CurDirPtr->BackLink; FreeMem( CurDirPtr, ( long ) sizeof ( struct DirInfoBlock ) ); CurDirPtr = TmpPtr; } else { /* Dont pop from upper most dir since we may need it later */ FnshdRecurs = 1; break; } } } if( FnshdRecurs ) { CmdLnArgIdx++; } else { /* test if file matches pattern */ /* printf("%s\n", (((FIB *)CurDirPtr)->fib_FileName) ); */ if( newwildcmp( CurFileNm, ((FIB *)CurDirPtr)->fib_FileName)) { FileNtFnd = 0; } } } while( FileNtFnd ); ii = LONGWIDTH - 1; NewPtr = CurDirPtr; do { TmpPtr = NewPtr; FName = ((FIB *)TmpPtr)->fib_FileName; Len = strlen(FName); if (ii == LONGWIDTH - 1) { ii -= Len; if( ii < 0 ) ErrP("Full pathname too long. Aborting...\n"); memcpy(FullPathNm + ii, FName, Len); } else { ii -= Len + 1; if( ii < 0 ) ErrP("Full pathname too long. Aborting...\n"); memcpy(FullPathNm + ii, FName, Len); FullPathNm[ii+Len] = '/'; } NewPtr = TmpPtr->BackLink; } while( TmpPtr->BackLink != NULL ); Len = strlen(CurDirName); ii -= Len; if( ii < 0 ) ErrP("Full pathname too long. Aborting...\n"); memcpy(FullPathNm + ii, CurDirName, Len); memmove(FullPathNm, FullPathNm + ii, LONGWIDTH - ii); FullPathNm[LONGWIDTH - ii - 1] = '\0'; if (StopOnMatch) { FnshdRecurs = 1; while (CurDirPtr->BackLink != NULL) { /* pop directory */ UnLock( CurDirPtr->CurLock ); TLock = CurrentDir( CurDirPtr->OldLock ); TmpPtr = CurDirPtr->BackLink; FreeMem( CurDirPtr, ( long ) sizeof ( struct DirInfoBlock ) ); CurDirPtr = TmpPtr; } CmdLnArgIdx++; StopOnMatch = 0; } return( FullPathNm ); } long SubPatFreq(SubPat) /* LSB is 0.000001 percent */ unsigned short SubPat; { /* Expected frequency of char in hundredth of percents */ int Freq[256] = { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, /* " " ! " # $ % & ' */ 1000, 10, 100, 10, 100, 10, 10, 10, /* ( ) * + , - . / */ 100, 100, 100, 10, 100, 10, 100, 100, /* 0 1 2 3 4 5 6 7 */ 500, 500, 500, 500, 500, 500, 500, 500, /* 8 9 : ; < = > ? */ 500, 500, 100, 50, 100, 100, 100, 100, /* @ A B C D E F G */ 100, 810, 140, 270, 380, 1300, 290, 200, /* H I J K L M N O */ 520, 630, 13, 40, 340, 250, 710, 790, /* P Q R S T U V W */ 190, 11, 680, 610, 1050, 240, 90, 150, /* X Y Z [ \ ] ^ _ */ 150, 190, 07, 100, 100, 100, 10, 10, /* " " a b c d e f g */ 100, 810, 140, 270, 380, 1300, 290, 200, /* h i j k l m n o */ 520, 630, 13, 40, 340, 250, 710, 790, /* p q r s t u v w */ 190, 11, 680, 610, 1050, 240, 90, 150, /* x y z { |_ } */ 150, 190, 7, 100, 100, 100, 100, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000 }; return( Freq[ (SubPat >> 8) & 255 ] * Freq[ SubPat & 255 ]); } PrntHlp() { fprintf(stderr,"scan's pattern matching algorithm:\n" " ? Matches any single character except newline\n" " [chars] Match any characters within braces\n" " [c1-c2] Match any characters from c1 to c2\n" " [^chars] Match any characters not within braces\n" " \\xYY Matches hex number YY as a character\n" " \\Y Matches the standard C escape sequence Y\n" " \\YYY Matches the decimal number YYY as a character\n" " | Either pattern on left or right must match\n" " + Same as | \n" " * Pattern on left and right must both match and be in same\n" " word. Match on left must come before match on right.\n" " #? Same as *.\n" " & Pattern on left and right must both match and be in same\n" " sentence. Match on left must come before match on right.\n" " .. Pattern on left and right must both match and be in same\n" " article. Order of left and right matches is not important.\n" " This is alot faster than &. This is only useful during\n" " article scans.\n" "scan's config file format(there is an implicit | after each pattern):\n" " article separator\n" " column article separator must be in. 0 -> ignore\n" " invert match flag. 1 -> invert match. 0 -> normal\n" " window size in bytes\n" " search pattern1\n" " .\n" " search patternN\n" ); } main(argc, argv) int argc; char *argv[]; { int ArgCSav; char **ArgVSav; /* StrtTime = clock(); */ // printf("%ld %s %s %s %s %s\n",argc,argv[1],argv[2],argv[3],argv[4],argv[5]); /* Scan the command line and initialize based on different options */ ArgVSav = argv; ArgCSav = argc; HasCnfgF = 0; NumAfter = 1; LineScan = 1; for (++argv; --argc; ++argv) { PndQues2Ast( *argv ); InName = *argv; if (*InName++ == '-') { TermLp = 0; while((*InName != 0) && !TermLp) { switch(*InName++) { case('a'): LineScan = 0; break; case('c'): if (sscanf(InName,"%d",&ColReq)) { while( isdigit( (int)(*InName))) InName++; break; } else { ErrP("Bad column for article separator\n"); } case('f'): HasCnfgF = 1; NumAfter = 0; if ((ConfigF = fopen(InName,"r")) == NULL) { if(*InName != 0) ErrP("Bad config file name\n"); if((ConfigF = fopen("s:scan.config","r")) == NULL) ErrP("Could not open s:scan.config\n"); } /* Handle initialization of article separator, */ /* article column, window size, common words, and */ /* inverse pattern from configuration file. */ if (fscanf(ConfigF,"%s\n",ArtBuf) != 1) ErrP("Could" " not read article separator from config file\n"); ArtSep = EOASep = ArtBuf; while (*EOASep != '\0') EOASep++; --EOASep; if (fscanf(ConfigF,"%d\n",&ColReq) != 1) ErrP("Bad column for article separator\n"); if (fscanf(ConfigF,"%d\n",&InvertMatch) != 1) ErrP("Err reading match inversion flag\n"); if (fscanf(ConfigF,"%ld\n",&WinSiz) != 1) ErrP( "Could not read window size from config file\n"); /* Initialize Min Term Array with pattern to */ /* search for from config file */ EndMin = &MinArray[0] + sizeof(MinArray); while ((fgets(Stf1, LWIDTH-1, ConfigF) > 0) && (MinTerm != EndMin)) if(*Stf1!='\n') strcpy((char *)MinTerm++, strtok(Stf1,";\n\0")); if( MinTerm == EndMin) ErrP( "Too many patterns to search for. Aborting...\n"); if (ferror(ConfigF)) ErrP("Error reading" " config file search pattern. Aborting...\n"); fclose(ConfigF); ConfigF = 0; LineScan = 0; TermLp = 1; break; case('h'): if( isdigit( (int)(*InName))) { HighLightColor[2] = *InName++; if( isdigit( (int)(*InName))) { PathNmColor[2] = *InName++; } } break; case('i'): InvertMatch = 1; break; /* Do a line search instead of an article search. Print out xx lines around */ /* match, where xx is a number following the -l. */ case('l'): LineScan = 1; PrntWidth = atoi(InName); while( isdigit( (int)(*InName))) InName++; break; case('n'): LineNum = 1; break; /* Without -o option, strmptr defaults to stdout */ case('o'): if ((OutFile = fopen(InName,"w")) == NULL) ErrP("Could not open output file\n"); RealFile = 1; StrmPtr = OutFile; TermLp = 1; break; case('p'): AlwaysPrint = 1; break; case('r'): RecursFlg = 1; break; /* Without -s option, the article separator defaults to "Article" */ case('s'): ArtSep = InName; while(*InName != 0) { InName++; } EOASep = --InName; TermLp = 1; break; case('t'): Trunc = 1; break; case('v'): fprintf(stderr,"Scan version 1.0 by Walter Rothe, " "Copyright © 1991,1992.\n"); CleanIt(); exit(1); /* Without -w option, window size defaults to 16kb */ case('w'): if(sscanf(InName,"%ld",&WinSiz)) { while( isdigit( (int)(*InName))) InName++; break; } else { ErrP("Bad window size\n"); } case('x'): PrntHlp(); CleanIt(); exit(1); case('z'): if( *InName ) WildLZH = InName; EnableLZHDecomp = 1; TermLp = 1; break; } } } else { LastArg = --InName; } /* save last non "-" arg on cmd line */ } if (ArgCSav < 3) { fprintf(stderr, " Copyright © 1991,1992 by Walter Rothe.\n" " + Scan SrchFile(s) Pattern -[hlnoprtwz] OR\n" " - Scan SrchFile(s) -f[CnfgFile] -[orz] OR\n" " @ Scan SrchFile(s) Pattern -a[cioprswz] OR\n" " # Scan -[vx]\n" " SrchFile :+-@ Pathname of file(s) to be searched\n" " Pattern :+@ What to srch for; Ex. sale..d*paint[3i]+paint&prog\n" " -a :@ Article scan. Prints out all articles with matches.\n" " -cColumn :@ Column article separtor must be in(1..?)\n" " -fCnfgFile:@ Get parms from config file\n" " -f :@ Get parms from s:scan.config\n" " -hxy :+ Highlight match with x color and pathname, y color.\n" " -i :@ Invert matching so nonmatching articles are printed\n" " -lxx :+ Line search with xx lines around target printed\n" " -n :+ Print line numbers with matched text(slower)\n" " -oOutFile :+-@ Send output to file\n" " -p :+-@ Always print file pathnames scanned\n" " -r :+-@ Recursively scan down directories\n" " -sArtSep :@ Article separator(def Article)\n" " -t :+ Truncate output to window width. Only works with -n\n" " -v :# Print version number. Other options nulled.\n" " -wWinSize :+@ Window size(def 16384 bytes). Mod(size,4) must be 0\n" " -x :# Print out more help info. Nulls other options\n" " -zWildPat :+-@ Decompres .lzh/.lha with int files matching WildPat\n" " -z :+-@ Decompress all .lzh and .lha files before scanning.\n" ); CleanIt(); exit(1); } /* If doing a line search, turn off inverted pattern matching. and set */ /* article separator to a pattern that should never occur. */ if (LineScan) { InvertMatch = 0; ArtSep = EOASep = LineSrchDelim; while (*EOASep != '\0') { EOASep++; } --EOASep; } else { if( (ColReq > LWIDTH) || (ColReq < 0) ) { ErrP("Column # for article sep too large or neg. Aborting...\n"); } } /* For LH5 decompression, window size must be at least 8192. if( EnableLZHDecomp ) WinSiz = 16384; /* Init pointers and data so NextFile function can be called sequentially */ InitNextFile(ArgVSav, ArgCSav, NumBefore, NumAfter); /* If inverted matching, set to not print out when 1st article header found */ if (InvertMatch) { OutArt = 1; } /* Find 1st filename to scan and open it and read in 1st buffer and start */ /* an asyncronous read to fill the 2nd buffer. */ if ((FName = NextFile(ArgVSav)) != NULL) { if ((FHandle = xfropen(FName,&ReadNum)) == NULL) { ErrP("Problems opening 1st file to scan\n"); } if (ReadNum < 0) { ErrP("Could not read any data from 1st file\n"); } } else { ErrP("No files found to scan\n"); } /* Get text width for later */ if( !RealFile ) { TextWidth = WindowSize(); } PathPrint(MatchNotFnd); LstOut = SOCB; /* parse out the min terms from command line argument */ if (!HasCnfgF) { TmpTrm = strtok(LastArg++,"+|"); if( strlen(TmpTrm) >= LWIDTH) { ErrP("A minterm in the pattern is too long. Aborting...\n"); } while (TmpTrm != 0) { strcpy((char *)(MinTerm++), TmpTrm ); TmpTrm = strtok(NULL,"+|"); } } /* Convert any backslashes in minterm array to their equivalent char */ MinPtr = &MinArray[0]; while ( MinPtr != MinTerm ) { BackSlash(MinPtr); PndQues2Ast(MinPtr++); } /* Convert any backslashes in article separator to their equivalent char */ BackSlash(ArtSep); EOASep = ArtSep; while (*EOASep != '\0') { EOASep++; } /* get pntr to end of art sep */ --EOASep; if( (EOASep - ArtSep + 1 >= LWIDTH) || (EOASep - ArtSep + 1 >= MAXKWSZ)) ErrP("Article separator too long. Aborting...\n"); /* printout list of min terms, art sep, win size */ #ifdef DEBUG printf("Window size is %ld \n", WinSiz); printf("Article separator is %s \n", ArtSep); MinPtr = &MinArray[0]; while ( MinPtr != MinTerm ) { printf("%s \n",MinPtr++); } #endif /* Generate list of major terms from list of minterms. Note that 1 is */ /* reserved for the article separator and 2 for the end of buffer key. */ /* 0 is not used. If there are more than 127, there will be a slowdown */ /* since the duplicate keyword algorithm must be used. */ /* Create a table, indexed by major term number of what it's minterm */ /* number is. It is called MajTrm2MinTrm. */ /* Create a table, indexed by major term number, mapping it to a bit */ /* number in the MinSatTbl. This is a number from 0 to 31 and is */ /* assigned according to its order; the 1st major term encountered */ /* for a new minterm in the pattern is assigned 0, and the next 1, etc. */ /* This table is called MajTrm2BitNum. */ /* Create a table, indexed by major term number, indicating if the */ /* associated minterm is satisfied by this major term only, or not. */ /* This table is called MinSatByMTOnly and contains 0 or 1. */ /* Create a table, indexed by minterm number, containing a bit field, */ /* and mask for the bit field. Every time a major term is found, a bit */ /* is set in the appropriate bit field and the mask applied. If the */ /* result is -1, then the minterm is satisfied. This table is called */ /* MinSatTbl. */ MinPtr = &MTArray[0]; strcpy((char *)(MinPtr++), ArtSep); strcpy((char *)(MinPtr), EOBK); if ((LstMajTrm = GetMajTrms()) == NULL) { ErrP("Error building major term table\n"); } MinPtr = &MTArray[0]; #ifdef DEBUG while ( MinPtr != LstMajTrm ) { printf("%s \n",MinPtr++); } printf("number of minterms is %d \n",NumOfMinTrms); printf("number of majterms is %d \n",NumOfMajTrms); #endif /* Create a table, indexed by major term number, containing daisy */ /* chained pointers of major terms found in the article. When the */ /* article delimiter is found, this table is traversed to determine */ /* which bit fields to reset. This table is initially nulled out so */ /* detection of loops is possible. When the article delimiter is found, */ /* the pointers are set back to null again. If a loop is detected, it's */ /* pointer is not modified and the variable LastMT is not modified. */ /* This table is called LastMTTbl. */ for (i=0; i= MAXBRAKS) { ErrP("Too many bracket wildcards in major term\n"); } } } else { if (Stf1[j] == '[') { FndBrak = 1; StrtBrk = j; } if (i < 3) { Stf1[k++] = Stf1[j]; /* Dont convert art sep */ } /* to lower case */ else { Stf1[k++] = LowrCs[Stf1[j]]; /* Keep [ in string for */ } /* use as a delimiter */ Stf3[j] = LowrCs[Stf3[j]]; } j++; } Stf1[k] = 0; LenSt = strlen(Stf1); NBAryEnd[i] = (char *)&NoBrakAry[i] + LenSt - 1; strcpy( (char *)&NoBrakArray[i], Stf1); /* Save MT with no */ /* bracket stuff */ strcpy( (char *)&NoBrakAry[i], Stf1); /* This array will */ /* not be overwritten. */ Tok = strtok( Stf1, "?&*["); /* Tokenize Stf1 with */ while (Tok != NULL) { /* delimiters of ?&*[. */ if (strlen(Tok) > MajTrm2MaxKeyLen[i]) { /* Find longest token. */ MajTrm2MaxKeyLen[i] = strlen(Tok); } Tok = strtok( NULL, "?&*["); } if (MajTrm2MaxKeyLen[i] < 2) { fprintf(stderr,"No keyword found in MT longer than 1 char, so\n"); fprintf(stderr,"MT>>> %s <<= 3) { /* SubPat must have > 2 chars */ CurOddOH = 2147483647; /* Init to max pos overhead */ CurEvenOH = 2147483647; FndALOGKeyWrd = 0; /* Found At Least One Good Key Word in token */ for (j=0; j 5) { /* Not Article separator or End Of Buf */ if ( DsplTb[MTNum] != 0) { /* One element in SubPat tbl */ /* Overhead must take into account extra overhead 4 */ /* current element and duplicate overhead for new el.*/ /* OH = SubPatFreq*((t3+t2-t1) + (t3+2*t2)) */ MulFct = 2*T3 + 3*T2 - T1; if ((OH = SubPatFreq(CurPat)*MulFct) < CurEvenOH) { /* if token is identical to token currently in */ /* keyword table, bypass this keyword. */ if (!memcmp(Tok,SvTbl[MTNum],TokLen)){ continue; } CurEvenOH = OH; SavEPat = CurPat; SavTok = Tok; SavEIdx = j; FndALOGKeyWrd = 1; } } else { /* More than 1 element in table(duplicates). */ NumDup = 1; TmpPtr = &DKWTbl[MTNum][3]; /* Find number of duplicated subpatterns */ while((TmpPtr=(char **)*(TmpPtr+3)) != NULL){NumDup++;} /* Overhead is for a duplicated subpattern */ if( (NumDup+1) << ((1<<29)/1000000/T3) ) { MulFct = T2 + ((NumDup+1)*T3); } else { MulFct = (1<<29)/1000000/T3 + NumDup; } /* If overhead is less than previous least overhead */ /* for other even subpats, then save pntr to CurPat */ if ((OH = SubPatFreq(CurPat)*MulFct) < CurEvenOH) { /* if token is identical to token of another */ /* duplicated keyword, bypass this keyword. */ TmpPtr = &DKWTbl[MTNum][0]; do { if (!memcmp(Tok,TmpPtr,TokLen)){ continue; } } while((TmpPtr=(char **)*(TmpPtr+3)) != NULL); CurEvenOH = OH; SavEPat = CurPat; SavTok = Tok; SavEIdx = j; FndALOGKeyWrd = 1; } } } } } /* Now that we're finished finding the even subpattern with the */ /* least overhead in the token, now find the same thing for the */ /* odd subpat in the same token. */ if (FndALOGKeyWrd) { FndALOGKeyWrd = 0; for (j=1; j 5) { /* Not Article separator or End Of Buf */ if ( DsplTb[MTNum] != 0) { if ((OH = SubPatFreq(CurPat)*T1) < CurOddOH) { /* if token is identical to token currently in */ /* keyword table, bypass this keyword. */ if (!memcmp(Tok,SvTbl[MTNum],TokLen)){ continue; } CurOddOH = OH; SavOPat = CurPat; SavOIdx = j; FndALOGKeyWrd = 1; } } else { NumDup = 1; TmpPtr = &DKWTbl[MTNum][3]; /* Find number of duplicated subpatterns */ while((TmpPtr=(char **)*(TmpPtr+3)) != NULL) { NumDup++; } if( (NumDup+1) << ((1<<29)/1000000/T3) ) { MulFct = T2 + ((NumDup+1)*T3); } else { MulFct = (1<<29)/1000000/T3 + NumDup; } if ((OH = SubPatFreq(CurPat)*MulFct) < CurOddOH) { /* if token is identical to token of another */ /* duplicated keyword, bypass this keyword. */ TmpPtr = &DKWTbl[MTNum][0]; do { if (!memcmp(Tok,TmpPtr,TokLen)){ continue; } } while((TmpPtr=(char **)*(TmpPtr+3)) != NULL); CurOddOH = OH; SavOPat = CurPat; SavOIdx = j; FndALOGKeyWrd = 1; } } } } } } /* If the total minimum overhead for the current token is less */ /* than that of the previously computed tokens then save that */ /* tokens info. */ if ((CurEvenOH + CurOddOH < TotOH) && FndALOGKeyWrd) { /* Total overhead in usec for 20000 characters searched */ TotOH = CurEvenOH + CurOddOH; SavTOPat = SavOPat; /* Even SubPat with least overhead */ SavTEPat = SavEPat; /* Odd SubPat with least overhead */ SavTOIdx = SavOIdx; /* Index into token of odd SubPat */ SavTEIdx = SavEIdx; /* Index into token of even SubPat */ /* pntr to start of best token(not subpat) in MT */ SavTTok = SavTok; #ifdef DEBUG printf("MT %d has best even subpat of %c %c\n",NextMT, SavTTok[SavTEIdx], SavTTok[SavTEIdx+1]); printf("MT %d has best odd subpat of %c %c\n",NextMT, SavTTok[SavTOIdx], SavTTok[SavTOIdx+1]); #endif } } else { if(TokLen == 2 && NextMT > 2 ) { /* SubPat must have > 1 chars */ /* Handle 2 character sub patterns. Dont allow duplicate */ /* keyword chaining, for now. */ CurOddOH = 2147483647; /* Init to max pos overhead */ CurEvenOH = 2147483647; FndALOGKeyWrd = 0; /* Fnd At Least One Good Key Word in token */ CurPat = (Tok[0] << 8) | Tok[1]; /* Construct test subpat */ /* If SubPat table does not yet have an entry for CurPat, */ /* and if the limit of 128 Odd and 128 Even SubPats has not */ /* been reached, then compute the overhead for CurPat and if */ /* less than the previous smallest SubPat overhead(for this */ /* token) then save pointers to CurPat. */ if ((SubPat[CurPat] == 0) && (i < 128)) { CurEvenOH = SubPatFreq(CurPat)*T1; SavEPat = CurPat; SavTok = Tok; SavEIdx = 0; FndALOGKeyWrd = 1; } if (FndALOGKeyWrd) { for( j=0; j<256; j++ ) { CurPat = (Tok[1] << 8) | j; if( (CurPat == SavEPat) || (i >= 128) || (SubPat[CurPat] != 0) ) FndALOGKeyWrd = 0; } if( FndALOGKeyWrd == 1 ) { CurPat = ((CurPat>>8) << 8) | 0x41; CurOddOH = 1000000000 + SubPatFreq(CurPat)*T1; SavOPat = CurPat; SavOIdx = 1; } FndX = 1; for( j=0; j<256; j++ ) { CurPat = Tok[0] | j<<8; if( (CurPat == SavEPat) || (i >= 128) || (SubPat[CurPat] != 0) ) FndX = 0; } if( FndX == 1 && !TwoCharArtSep) { CurPat = (CurPat & 255) | 0x41<<8; if( 1000000000 + SubPatFreq(CurPat)*T1 < CurOddOH ) { CurOddOH = 1000000000 + SubPatFreq(CurPat)*T1; SavOPat = CurPat; SavOIdx = -1; FndALOGKeyWrd = 1; } } } /* If the total minimum overhead for the current token is less */ /* than that of the previously computed tokens then save that */ /* tokens info. */ if ((CurEvenOH + CurOddOH < TotOH) && FndALOGKeyWrd) { /* Total overhead in usec for 20000 characters searched */ TotOH = CurEvenOH + CurOddOH; SavTOPat = SavOPat; /* Even SubPat with least overhead */ SavTEPat = SavEPat; /* Odd SubPat with least overhead */ SavTOIdx = SavOIdx; /* Index into token of odd SubPat */ SavTEIdx = SavEIdx; /* Index into token of even SubPat */ /* pntr to start of best token(not subpat) in MT */ SavTTok = SavTok; #ifdef DEBUG printf("MT %d has best even subpat of %c %c\n",NextMT, SavTTok[SavTEIdx], SavTTok[SavTEIdx+1]); printf("MT %d has best odd subpat of %c %c\n",NextMT, SavTTok[SavTOIdx], SavTTok[SavTOIdx+1]); #endif } } } Tok = strtok( NULL, "?&*["); /* Get next token of MT to look at */ } Tok = SavTTok; /* If we have found the best odd and even SubPat of the MT, then put */ /* the SubPats in the various tables for faster searching later. */ if (TotOH != 2147483647) { /* Have indeed found the best SubPats */ /* 1st do even SubPat */ if ((MTNum = SubPat[SavTEPat]) == 0) { /* Non duplicated keyword */ MTN = SubPat[SavTEPat] = NextMT<<1; /* Need to use */ /* MTNumSortedByKeyLen[SubPat(x)] to */ /* find Major Term Number later. */ frstuppr = toupper(SavTEPat >> 8); scnduppr = toupper(SavTEPat & 255); SubPat[(frstuppr*256) + (SavTEPat&255)] = MTN; SubPat[(SavTEPat&0xFF00) + scnduppr] = MTN; SubPat[(frstuppr*256) + scnduppr] = MTN; TokStrt[MTN] = Tok - (char *)&NoBrakArray[NextMT]; TokEnd[MTN] = TokStrt[MTN] + strlen(Tok) - 1; if (SavTEIdx == 0) { /* SubPat is 1st 2 chars of keyword(token) */ if (strlen(Tok) < 4) { /* Keyword is 2 or 3 chars long */ KWTbl[MTN][0] = Tok + 3; /* Pntr to start of rest of keywrd*/ KWTbl[MTN][1] = Tok + 2; /* Pntr to end of rest of keyword */ if( strlen(Tok) == 3 ) { /* Keyword is 3 chars long */ FrstBt[MTN] = Tok[1]; /* 1st byte of rest of keyword */ ScndBt[MTN] = Tok[2]; /* 2nd byte of rest of keyword */ /* Displacemnt 2 where "rest of keywrd" should b in buf */ DsplTb[MTN] = 128 - 1 + 0; } else { FrstBt[MTN] = Tok[0]; /* 1st byte of rest of keyword */ ScndBt[MTN] = Tok[1]; /* 2nd byte of rest of keyword */ /* Displacemnt 2 where "rest of keywrd" should b in buf */ DsplTb[MTN] = 128 - 2 + 0; } SvTbl[MTN] = Tok; /* Displacement to start of keyword in buffer */ FDsplTbl[MTN] = -2; } else { KWTbl[MTN][0] = Tok + 4; KWTbl[MTN][1] = Tok + strlen(Tok) - 1; FrstBt[MTN] = Tok[2]; ScndBt[MTN] = Tok[3]; DsplTb[MTN] = 128 - 0 + 0; SvTbl[MTN] = Tok; FDsplTbl[MTN] = -2; } } else { KWTbl[MTN][0] = Tok + 2; /* Pntr to start of rest of keyword */ KWTbl[MTN][1] = Tok + strlen(Tok) - 1; FrstBt[MTN] = Tok[0]; ScndBt[MTN] = Tok[1]; DsplTb[MTN] = 128 - 2 - SavTEIdx; SvTbl[MTN] = Tok; FDsplTbl[MTN] = - SavTEIdx - 2; } if (MTN < 6) { DsplTb[MTN] = 0; /* EOB or article separator */ if (MTN == 2 || MTN == 3) { DFASep[0] = 128 - 2 - SavTEIdx; } } } else { /* Duplicated keyword(more than 1 keyword for 1 subpattern) */ MTN = SubPat[SavTEPat]; /* Need to use */ /* MTNumSortedByKeyLen[SubPat(x)] to */ /* find Major Term Number later. */ DKWIdx = NextMT << 1; TokStrt[DKWIdx] = Tok - (char *)&NoBrakArray[NextMT]; TokEnd[DKWIdx] = TokStrt[DKWIdx] + strlen(Tok) - 1; if (DsplTb[MTN] != 0) { /* Non Duplicated single entry */ /* 1st remove the single entry and put it in the dupl tble */ /* then put the new element in the dupl table also */ DsplTb[MTN] = 0; /* Adding 2nd keyword that has same subpat */ DKWTbl[MTN][0] = SvTbl[MTN]; /* Pntr 2 strt of rest of kwd */ DKWTbl[MTN][1] = KWTbl[MTN][1]; /* Pntr 2 end of rest of kwd */ DKWTbl[MTN][2] = (char *)FDsplTbl[MTN] + 128; DKWTbl[MTN][3] = (char *)&DKWTbl[DKWIdx]; DKWTbl[DKWIdx][0] = Tok; /* Pointer 2 strt of rest of keyword */ DKWTbl[DKWIdx][1] = Tok + strlen(Tok) - 1; /* Pntr to end kw */ DKWTbl[DKWIdx][2] = (char *)(128 - 2 - SavTEIdx); /* Displace */ DKWTbl[DKWIdx][3] = NULL; /* link to next */ FDsplTbl[DKWIdx] = - SavTEIdx - 2; } else { /* Duplicated entry */ /* Put another duplicated entry in the dup keyword table */ TmpPtr = &DKWTbl[MTN][0]; while((TmpPtr=(char **)*(TmpPtr+3)) != NULL) { MTTmp = TmpPtr; } *(MTTmp+3) = (char *)&DKWTbl[DKWIdx]; DKWTbl[DKWIdx][0] = Tok; /* Pointer 2 strt of rest of keyword */ DKWTbl[DKWIdx][1] = Tok + strlen(Tok) - 1; /* Pntr to end kw */ DKWTbl[DKWIdx][2] = (char *)(128 - 2 - SavTEIdx); /* Displace */ DKWTbl[DKWIdx][3] = NULL; /* link to next */ FDsplTbl[DKWIdx] = - SavTEIdx - 2; } } /* Now put stuff from odd SubPat into various tables */ if ((MTNum = SubPat[SavTOPat]) == 0) { if( strlen(SavTTok) > 2 ) { MTN = SubPat[SavTOPat] = (NextMT<<1)+1; /* Need to use */ /* MTNumSortedByKeyLen[SubPat(x)] to*/ /* find Major Term Number later. */ frstuppr = toupper(SavTOPat >> 8); scnduppr = toupper(SavTOPat & 255); SubPat[(frstuppr*256) + (SavTOPat&255)] = MTN; SubPat[(SavTOPat&0xFF00) + scnduppr] = MTN; SubPat[(frstuppr*256) + scnduppr] = MTN; TokStrt[MTN] = Tok - (char *)&NoBrakArray[NextMT]; TokEnd[MTN] = TokStrt[MTN] + strlen(Tok) - 1; KWTbl[MTN][0] = Tok + 2; /* Pntr to strt of rest of keyword */ if (strlen(Tok) == 3) { /* Set to less than pntr 2 start so nothing chckd, since it*/ KWTbl[MTN][1] = Tok; /* should already have been verified */ } else { KWTbl[MTN][1] = Tok + strlen(Tok) - 1; /* Pntr 2 rest kwd */ } FrstBt[MTN] = Tok[0]; /* 1st byte of rest of keyword */ ScndBt[MTN] = Tok[1]; /* 2nd byte of rest of keyword */ /* Displacement to where "rest of keywrd" should be in buffer */ DsplTb[MTN] = 128 - 2 - SavTOIdx; SvTbl[MTN] = Tok; FDsplTbl[MTN] = - SavTOIdx - 2; if (MTN < 6) { DsplTb[MTN] = 0; /* EOB or article separator */ if (MTN == 2 || MTN == 3) { DFASep[1] = 128 - 2 - SavTOIdx; } } } else { MTN = (NextMT<<1)+1; for( iij = 0; iij < 256; iij++ ) { if( SavTOIdx == -1 ) { CurPat = (SavTOPat & 255) | iij<<8; } else { CurPat = ((SavTOPat>>8)<<8) | iij; } SubPat[CurPat] = MTN; frstuppr = toupper(CurPat >> 8); scnduppr = toupper(CurPat & 255); SubPat[(frstuppr*256) + (CurPat&255)] = MTN; SubPat[(CurPat&0xFF00) + scnduppr] = MTN; SubPat[(frstuppr*256) + scnduppr] = MTN; } TokStrt[MTN] = Tok - (char *)&NoBrakArray[NextMT]; TokEnd[MTN] = TokStrt[MTN] + strlen(Tok) - 1; KWTbl[MTN][0] = Tok+1; /* Pntr to strt of rest of keyword */ /* Set to less than pntr 2 start so nothing chckd, since it */ KWTbl[MTN][1] = Tok; /* should already have been verified */ FrstBt[MTN] = (char)(SavTEPat>>8); /* 1st byteofrestof keywrd */ ScndBt[MTN] = (char)SavTEPat; /* 2nd byte of rest of keyword */ /* Displacement to where "rest of keywrd" should be in buf */ DsplTb[MTN] = 128 - 2 - SavTOIdx; SvTbl[MTN] = Tok; FDsplTbl[MTN] = - 2 - SavTOIdx; } } else { /* Duplicated keyword(more than 1 keyword for 1 subpattern) */ MTN = SubPat[SavTOPat]; /* Need to use */ /* MTNumSortedByKeyLen[SubPat(x)] to */ /* find Major Term Number later. */ DKWIdx = (NextMT << 1) + 1; TokStrt[DKWIdx] = Tok - (char *)&NoBrakArray[NextMT]; TokEnd[DKWIdx] = TokStrt[DKWIdx] + strlen(Tok) - 1; if (DsplTb[MTN] != 0) { /* Non Duplicated single entry */ /* 1st remove the single entry and put it in the dupl tble */ /* then put the new element in the dupl table also */ DsplTb[MTN] = 0; /* Adding 2nd keyword that has same subpat */ DKWTbl[MTN][0] = SvTbl[MTN]; /* Pntr 2 strt of rest of kwd */ DKWTbl[MTN][1] = KWTbl[MTN][1]; /* Pntr 2 end of rest of kwd */ DKWTbl[MTN][2] = (char *)FDsplTbl[MTN] + 128; DKWTbl[MTN][3] = (char *)&DKWTbl[DKWIdx]; DKWTbl[DKWIdx][0] = Tok; /* Pointer 2 strt of rest of keyword */ DKWTbl[DKWIdx][1] = Tok + strlen(Tok) - 1; /* Pntr to end kw */ DKWTbl[DKWIdx][2] = (char *)(128 - 2 - SavTOIdx); /* Displace */ DKWTbl[DKWIdx][3] = NULL; /* link to next */ FDsplTbl[DKWIdx] = - SavTOIdx - 2; } else { /* Duplicated entry */ /* Put another duplicated entry in the dup keyword table */ TmpPtr = &DKWTbl[MTN][0]; while((TmpPtr=(char **)*(TmpPtr+3)) != NULL) {MTTmp = TmpPtr;} *(MTTmp+3) = (char *)&DKWTbl[DKWIdx]; DKWTbl[DKWIdx][0] = Tok; /* Pointer 2 strt of rest of keyword */ DKWTbl[DKWIdx][1] = Tok + strlen(Tok) - 1; /* Pntr to end kw */ DKWTbl[DKWIdx][2] = (char *)(128 - 2 - SavTOIdx); /* Displace */ DKWTbl[DKWIdx][3] = NULL; /* link to next */ FDsplTbl[DKWIdx] = - SavTOIdx - 2; } } } /* Could not find at least one good keyword in all the tokens of a */ /* major term. A good keyword has at least one set of unique even and */ /* odd 2 char subpats and the keyword must not be the same as any */ /* other keyword already in the table that has the same subpat. */ /* For right now, print out an error and exit. Needs to be handled */ /* better later. This should not happen very often at all. */ else { if(i==1) { fprintf(stderr,"Article separators must have at least 2 unique\n"); fprintf(stderr,"characters. Try adding a \\n or space to it.\n"); ErrP("Aborting...\n"); } else { fprintf(stderr,"For search pattern %s, didn't\n", &MTArray[NextMT-1]); fprintf(stderr,"find a unique even & odd 2 char subpat. MT=%d.\n", NextMT); fprintf(stderr,"Try adding a unique char to search pattern.\n"); ErrP("Aborting...\n"); } } } /* if article separator is only two char long, reset its end pointer back */ /* back to what it originally was. Also copy subpat table value for the */ /* 0x20 prefix to all other possible prefixes. */ if( TwoCharArtSep ) { EOASep--; *(ArtSep + 2) = 0x00; SPIdx = *(ArtSep + 1) << 8; for( iij = 0; iij < 256; iij++ ) { if( SubPat[SPIdx + iij] == 0 ) { SubPat[SPIdx + iij] = 3; } else { if( SubPat[SPIdx + iij] > 3 ) { fprintf(stderr,"\nOdd Art sep srch pat(hex): %lx",SPIdx+iij); if( SubPat[SPIdx+iij] & 1 == 1 ) { fprintf(stderr,"\nThe odd part(don't care) of the following " "srch pat conflicts(hex): %lx", *(short *)SvTbl[SubPat[SPIdx+iij]] ); } else { fprintf(stderr,"\nEven srch pat that conflicts is(hex): %lx", *SvTbl[SubPat[SPIdx+iij]] ); } fprintf(stderr,"\nSrch pat is in major term %d\n", (unsigned int)(SubPat[SPIdx+iij] >> 1)); ErrP("Err:Try adding another char to conflicting srch pat.\n"); } } } } LastInLast = 0; LastPntEnd = NULL; /* default start of article so nothing bad will happen if there are no */ /* article separators in the file */ CurArtStrt = BufIdx; /* StrtTime = clock(); */ while (1==1) { MTIdx = FastSearch(BufIdx); MTNum = MTIdx >> 1; if (MTIdx > 5) { /* Since keyword was found, need to make sure the rest of the major term */ /* matches also, before declaring major term satisfied. */ if (!OutArt && FindRestOfMT(BufIdx, MTIdx)) { if (!MinSatByMTOnly[MTNum] && !LineScan) { /* Set bit indicating MT satisfied. */ TmpT = MajTrm2BitNum[MTNum]; MinSatTbl[MajTrm2MinTrm[MTNum]][1] |= 1 << TmpT; /* Since major term was satisfied, need to check MinSatTbl to see if */ /* minterm is fully satisfied. */ Indx = MajTrm2MinTrm[ MTNum]; if ((MinSatTbl[Indx][0] | MinSatTbl[Indx][1]) == -1 ) { /* Since minterm was satisfied, we set a flag "OutArt" so that when next */ /* article separator or "End of Data" is encountered, the article will */ /* be printed out. */ OutArt = 1; } /* Add major term to chain of those needing to be reset at "End Of Article" */ /* First check for loop. Loop is when a maj trm occured more than once */ /* in the article. */ if (!LastMTTbl[ MTNum] && (MTNum != LastMT)) { LastMTTbl[ LastMT] = MTNum; LastMT = MTNum; } } /* Comes here if only 1 maj term needs to be satisfied for minterm to be */ /* satisfied. In this case, set flag to print out article at next EOA. */ /* Also comes here if the Line Scan flag is set. */ else { if (LineScan) { PathPrint(MatchFnd); /* if match found in overlapped portion at end of buffer,*/ /* set next overlap size so the match will not be found */ /* again once the new buffer is switched to. */ if( EOCB - BufIdx < MAXKWSZ) KeyWrdOvlp = EOCB - BufIdx; /* Find start of word with match in it for delayed print */ /* This is needed so that the front part of the matched */ /* word will be highlighted when right context from the */ /* previous match overlaps this match. The front would */ /* not normally be highlighted if the two char keyword */ /* does not occur at the start of the matched word. */ KeyStrt = BufIdx + FDsplTbl[MTIdx]; WrdStrt = KeyStrt - 1; while( isalnum((int)(*WrdStrt)) && WrdStrt >= SOCB) { WrdStrt--; } WrdStrt++; if( KeyStrt - WrdStrt > 256 ) WrdStrt = KeyStrt - 256; SizeDiff = KeyStrt - WrdStrt; if( DlydPntNextBuf ) { /* Comes here if the right context from the previous match overlapped two */ /* buffers. The part in the previous buffer has been printed out already. */ /* Determine where the right context stops in pres buf by counting line */ /* feeds. */ DlydPntNextBuf = 0; PntPtr = SOCB; StrtLFC = LFCnt - 1; while((PntPtr < EOCB) && (LFCnt <= PrntWidth)) { if(*PntPtr == '\n') { LFCnt++; } if( PntPtr - SOCB > 256 * (LFCnt - StrtLFC) ) { LFCnt = PrntWidth + 1; CutIt = 1; } PntPtr++; } DlydPntStart = SOCB; DlydPntSize = PntPtr - SOCB; DlydPntRightContext = 1; LastInLast = 0; /* 1st print in pres buf */ } DlydPntEnd = DlydPntStart + DlydPntSize; if( DlydPntRightContext && DlydPntEnd < WrdStrt) { /* Comes here if we know that the right context from the previous match */ /* does not extend up to the present match. Print it out. */ fxwrite(DlydPntStart,BlkSize,DlydPntSize,StrmPtr,NoKeyW); DlydPntRightContext = 0; RightStrt = DlydPntEnd; LastPntEnd = DlydPntEnd - 1; } if( DlydPntRightContext && DlydPntEnd >= WrdStrt) { /* Comes here if we know that the right context from the previous match */ /* extends all the way to the present match, and farther. Chop it off so */ /* it only extends up to the start of the present matched word and print */ /* the context. */ DlydPntRightContext = 0; DlydPntSize = WrdStrt - DlydPntStart; if( DlydPntSize < 0 ) { fxwrite(DlydPntStart,BlkSize,ZeroLong,StrmPtr,NoKeyW); NumBlksToWrt = TokEnd[MTIdx] - TokStrt[MTIdx] + 1 + SizeDiff + DlydPntSize; RightStrt = DlydPntStart + NumBlksToWrt; TotTokLen = TokEnd[MTIdx] - TokStrt[MTIdx] + 1; /* Now print out matched word with keyword in it for present match */ fxwrite(DlydPntStart,BlkSize,NumBlksToWrt,StrmPtr,HasKeyWrd); } else { /* print stuff between last and present match */ fxwrite(DlydPntStart,BlkSize,DlydPntSize,StrmPtr,NoKeyW); NumBlksToWrt = TokEnd[MTIdx] - TokStrt[MTIdx] + 1 + SizeDiff; RightStrt = WrdStrt + NumBlksToWrt; TotTokLen = TokEnd[MTIdx] - TokStrt[MTIdx] + 1; /* Now print out matched word with keyword in it for present match */ fxwrite(WrdStrt,BlkSize,NumBlksToWrt,StrmPtr,HasKeyWrd); } } else { /* Print out plus and minus PrntWidth lines around match. */ FrstPnt = 1; LFCnt = 0; StrtLFC = LFCnt - 1; PntPtr = BufIdx + FDsplTbl[MTIdx] - 1; StrtPP = PntPtr; while((PntPtr >= SOCB) && (LFCnt <= PrntWidth) && ((PntPtr > LastPntEnd) || LastInLast)) { if(*PntPtr == '\n') { LFCnt++; } if( StrtPP - PntPtr > 256 * (LFCnt - StrtLFC) ) LFCnt = PrntWidth + 1; PntPtr--; } /* print out any context info in previous buffer */ if((PntPtr < SOCB) && (CurArtStrt != SOCB) && (LFCnt <= PrntWidth)) { StrtLFC = LFCnt - 1; PntPtr = EndOfPrevBuf + (PntPtr - SOCB); StrtPP = PntPtr; while((PntPtr >= CurArtStrt) && (LFCnt <= PrntWidth) && ((PntPtr > LastPntEnd) && LastInLast)) { if(*PntPtr == '\n') { LFCnt++; } if( StrtPP - PntPtr > 256 * (LFCnt - StrtLFC) ) LFCnt = PrntWidth + 1; PntPtr--; } if(PntPtr + 1 < EndOfPrevBuf) { NumBlksToWrt = EndOfPrevBuf - PntPtr - 1; if(FrstPnt && (*(PntPtr+1) == '\n')) { NumBlksToWrt--; PntPtr++; FrstPnt = 0; } if(NumBlksToWrt > 0) { fywrite(PntPtr+1,BlkSize,NumBlksToWrt,StrmPtr); } } PntPtr = SOCB - 1; LastPntEnd = PntPtr; } /* print out context info in current buffer to the left of, and including */ /* the matched keyword. */ PntPtr++; NumBlksToWrt = BufIdx + FDsplTbl[MTIdx] + TokEnd[MTIdx] - TokStrt[MTIdx] - PntPtr + 1; RightStrt = PntPtr + NumBlksToWrt; if(PntPtr + NumBlksToWrt - 1 > LastPntEnd || LastInLast) { if (!LastInLast && (PntPtr <= LastPntEnd)) { PntPtr = LastPntEnd + 1; NumBlksToWrt = RightStrt - PntPtr; } if(FrstPnt && (*PntPtr == '\n')) { NumBlksToWrt--; PntPtr++; FrstPnt = 0; } if(NumBlksToWrt > 0) { TotTokLen = TokEnd[MTIdx] - TokStrt[MTIdx] + 1; fxwrite(PntPtr,BlkSize,NumBlksToWrt,StrmPtr, HasKeyWrd); RightStrt = PntPtr + NumBlksToWrt; } } } /* save pointer to last item printed to it wont be */ /* printed again. */ LastPntEnd = RightStrt - 1; /* print out context info in the present buf, to the right of the matched */ /* keyword. */ LFCnt = 0; StrtLFC = -1; PntPtr = RightStrt; while((PntPtr < EOCB) && (LFCnt <= PrntWidth)) { if(*PntPtr == '\n') { LFCnt++; } if( PntPtr - RightStrt > 256 * (LFCnt - StrtLFC) ) { LFCnt = PrntWidth + 1; CutIt = 1; } PntPtr++; } NumBlksToWrt = PntPtr - RightStrt; if( NumBlksToWrt > 0 || LastInLast || PntPtr==EOCB) { /* Set flag so that when EOCB or next match is */ /* found, right context will be printed out. */ DlydPntRightContext = 1; DlydPntStart = RightStrt; DlydPntSize = NumBlksToWrt; } LastInLast = 0; } else { OutArt = 1; } } } } else { if (MTIdx < 4) { /* MT of 2 or 3 indicates article separator */ /* If flag is set specifying that article separator needs to be in a */ /* certain column, then check for this. */ ColOk = 1; if (ColReq) { /* if article separator is not in right column, reset ColOk to 0 */ if( *(BufIdx + FDsplTbl[MTIdx] - ColReq) == '\n') { TmpP = BufIdx + FDsplTbl[MTIdx]; for( BP=TmpP-ColReq+1; BPTmpCS2-LWIDTH; iii--) { if( *iii == '\n') { TmpCS = iii + 1; break; } } if( !PrntPrevPrev ) { TmpCS2 = CurArtStrt; /* if article sep is found in the middle of line, back */ /* up until line feed is found and use as strt of art. */ for( iii=TmpCS2; iii>TmpCS2-LWIDTH; iii--) { if( *iii == '\n') { CurArtStrt = iii + 1; break; } } } if (ArtInPrevBuf) { /* take care of case where article sep is found at */ /* the very end of previous buf and the same article*/ /* separator found before the start of current buf. */ NumBlksToWrt = EndOfPrevBuf - CurArtStrt; NumBlksToWrt2 = TmpCS - SOCB; if (NumBlksToWrt + NumBlksToWrt2 > MAXKWSZ) { if (!RealFile) { /* set all nonprinting chars to blanks */ for (iii=CurArtStrt; iii 0) { fwrite(CurArtStrt,BlkSize,NumBlksToWrt,StrmPtr); } } WroteOverIt = 0; PrntPrevPrev = 0; OutArt = 0; ArtInPrevBuf = 0; /* Reset list of partially satisfied minterms since article separator */ /* was found. */ Indx = 0; while (MTIndx = LastMTTbl[ Indx]) { TmpT = MajTrm2BitNum[MTIndx]; MinSatTbl[MajTrm2MinTrm[MTIndx]][1] ^= 1 << TmpT; LastMTTbl[ Indx] = 0; Indx = MTIndx; } LastMT = 0; /* Mark start of article so if pattern is found in it, the whole article */ /* can be printed out */ CurArtStrt = BufIdx + FDsplTbl[MTIdx]; /* If, over 5 articles, a few two-char subpatterns dominate(# of subpat */ /* hits is > threshold), reselect a new two-char subpat for the */ /* corresponding major terms. */ } } else { /* Must be at End Of Buffer */ /* Switch buffers since EOB found */ /* Note that this could possibly be sped up if open on next file can */ /* occur while last buffer of current file is scanned. */ /* EndTime = clock(); */ /* printf("time1= %lu and time2= %lu \n",StrtTime,EndTime); */ /* TotTime += EndTime - StrtTime; */ /* If we had a match in the buffer before the buffer before the */ /* one we are about to switch to, or earlier, print it out the */ /* previous buffers contents. */ if (ArtInPrevBuf) { if (OutArt != InvertMatch && !LineScan) { PathPrint(MatchFnd); if( WroteOverIt ) { fprintf(stderr,"Warning: Article was too long. Did " "not print out front part of article.\n Suggest" " increasing window size if not LHA.\n"); /* if( !RealFile ) ErrP("Aborting...\n"); */ } if (!RealFile) { /* set all nonprinting chars to blanks */ for (iii=CurArtStrt; iii 256 * (LFCnt - StrtLFC) ) { LFCnt = PrntWidth + 1; CutIt = 1; } PntPtr++; } DlydPntStart = SOCB; DlydPntSize = PntPtr - SOCB; DlydPntRightContext = 1; } if( DlydPntRightContext) { /* print right context from previous match if line-scan */ /* option set */ fxwrite(DlydPntStart,BlkSize,DlydPntSize,StrmPtr,NoKeyW); DlydPntRightContext = 0; if( LFCnt <= PrntWidth ) DlydPntNextBuf = 1; if( (DlydPntStart + DlydPntSize) == EOCB && *(EOCB-1) != '\n') MayNeedLF = 1; } SavEOCB = EOCB; SavSOCB = SOCB; if( LineNum ) { CountCR(); } /* Note that on both normal, LZH archive files, and internal */ /* LZH archive files, an extra read of -1 bytes follows the */ /* last read of a nonzero # of bytes. When OpenNew is on return,*/ /* it sets things up as if it did a read of zero bytes, even */ /* though it returns -1. */ if( xfrread(FHandle) <= 0 ) { KeyWrdOvlp = MAXKWSZ; if (OutArt != InvertMatch && !LineScan) { PathPrint(MatchFnd); if( !PrntPrevPrev ) { TmpCS2 = CurArtStrt; /* if article sep is found in the middle of line, back */ /* up until line feed is found and use as strt of art. */ for( iii=TmpCS2; iii>TmpCS2-LWIDTH; iii--) { if( *iii == '\n') { CurArtStrt = iii + 1; break; } } } NumBlksToWrt = EndOfPrevBuf - CurArtStrt; if (!RealFile) { /* set all nonprinting chars to blanks */ for (iii=CurArtStrt; iii