/* Copyright © 1991, 1992 by Walter Rothe. You may freely use and modify this * program, but not for commercial profit. A modest fee for distribution is * allowed. Derivative works must be released with source along with the * executable or provisions made to provide the user source, if requested. * Uploading source to a major bulletin board system within 6 months of the * time of the request satisfies this requirement. This copyright notice * must not be deleted from the source. */ /* This program makes sure the rest of a major term matches the specified */ /* pattern. */ /* The "&" function requires that patterns on the left and right of the */ /* "&" match what they are supposed to and that both are in the same */ /* sentence or context. A sentence or context is delimited by the */ /* following items: */ /* */ /* 1) period followed by a space or line feed */ /* 2) a maximum of OVERLAP characters */ /* 3) two newline chars with no chars but ">" between them */ /* 4) start or end of article */ /* 5) newline before a colon */ /* The "*" function requires that the pattern on the left and right of the */ /* "*" match what they are supposed to and that both are in the same word. */ /* A space or newline char delimits words. */ /* Note: Since end of article delimiter is not known until later, there is */ /* a possibility of matching something in the next article during a right */ /* look. Normally this should not present a major problem since the data */ /* around an article separator is usually not used to search on and a */ /* sentence delimiter will usually be very quickly found. This should not */ /* be handled in this routine. BP should be saved on exit from this pgm */ /* and used to determine if a real match occured when the next article */ /* delimiter is found. At that point, you can rescan the article. You */ /* could also modify scan.c so that once OutArt is set, you continue to */ /* check for matches and when you find the article sep, determine if a */ /* real match occured. */ #include #include #include #include #include #include #include "fcntl.h" #include "ctype.h" #include #include "scan.h" #define FRSTWRDLCKSTP 0 #define FRSTWRDLCKSTP1 1 #define FRSTWRDLCKSTP2 2 #define FRSTWRDLCKSTP3 3 #define WRDLOCKSTEP 4 #define WRDLOCKSTEP1 5 #define WRDLOCKSTEP2 6 #define SENTENCELOOK 7 #define WORDLOOK 8 #define WORDLOOK1 9 #define FRSTWRDLOOK 10 #define FRSTWRDLOOK1 11 #define FRSTWRDLOOK2 12 int FindRestOfMT(BI,MTI) char *BI; /* Buffer Index */ unsigned char MTI; /* Major Term Index */ { extern int FDsplTbl[]; /* offset from BI to strt of token */ extern Lines NoBrakAry[]; /* same as above but no 0's for &,*,?,[ */ extern char *NBAryEnd[]; /* pntrs to end of MT string in NBAry */ extern int TokStrt[]; /* indx 2 1st byte of token in nobrakarray */ extern int TokEnd[]; /* indx 2 last byte of token in nobrak */ extern char *EOCB; /* end of current buffer + 1 */ extern int ArtInPrevBuf; /* 1 if article strt is in previous buffer */ extern char *CurArtStrt; /* Pntr 2 strt of article in a buffer */ extern ErrP(); extern char LowrCs[]; /* Convert mixed case to lower case */ extern BrakTyp1 *BrakIdx; extern BrakTyp2 *BrakPtr; char *BP; /* Buffer pointer */ char *BPPntr; char *FStrt; char *FEnd; char *LookSrcP; char *NBAS; char *SenStrt; /* earliest into buffer that sentence could start */ char *SenEnd; char *LookStrt,*LookStrt1,*LookStrt2; char *BPStrt,*BPStrt1,*BPStrt2; unsigned char MTN; int FoundColon=0; unsigned char Mode; MTN = MTI >> 1; BP = BI + FDsplTbl[MTI]; LookSrcP = &NoBrakAry[MTN][TokStrt[MTI]] - 1; FStrt = (char *)&NoBrakAry[MTN]; Mode = FRSTWRDLCKSTP; SenStrt = BI - OVERLAP + MAXKWSZ; /* if article start is in current buffer and is bigger than the earliest */ /* into the buff that the sentence could start, set sentence start to it */ if (!ArtInPrevBuf && (CurArtStrt > SenStrt)) { SenStrt = CurArtStrt; } while (LookSrcP >= FStrt) { switch(*LookSrcP) { case('*'): LookSrcP--; if (LookSrcP < FStrt) { continue; } switch(Mode) { case(FRSTWRDLCKSTP): LookStrt = LookSrcP; Mode = FRSTWRDLOOK; break; case(FRSTWRDLCKSTP1): LookStrt1 = LookSrcP; Mode = FRSTWRDLOOK1; break; case(FRSTWRDLCKSTP2): LookStrt2 = LookSrcP; Mode = FRSTWRDLOOK2; break; case(FRSTWRDLCKSTP3): ErrP("Only supports up to 3 * in a word\n"); case(WRDLOCKSTEP): LookStrt1 = LookSrcP; Mode = WORDLOOK; break; case(WRDLOCKSTEP1): LookStrt2 = LookSrcP; Mode = WORDLOOK1; break; case(WRDLOCKSTEP2): ErrP("Only supports up to 3 * in a word\n"); } break; case('&'): if (--LookSrcP < FStrt) { continue; } if (*LookSrcP == '*') { if (--LookSrcP < FStrt) { continue; } } LookStrt = LookSrcP; Mode = SENTENCELOOK; break; case('['): NBAS = &NoBrakAry[MTN][0]; break; default: { break; } } BP--; switch(Mode) { case(SENTENCELOOK): /* check if BP is passed a sentence/context delimiter */ if (BP < SenStrt) { return(0); } switch(*BP) { case(':'): FoundColon = 1; break; case('\n'): if(FoundColon) { return(0); } BPPntr = BP + 1; while (*BPPntr == '>') { BPPntr++; } if (*BPPntr == '\n') { return(0); } break; case('.'): if(*(BP+1) == ' ' || *(BP+1) == '\n') { return(0); } break; } switch(*LookSrcP) { case('?'): Mode = WRDLOCKSTEP; BPStrt = BP; LookSrcP--; break; case('['): if (BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { Mode = WRDLOCKSTEP; BPStrt = BP; LookSrcP--; } break; default: if (LowrCs[*BP] == LowrCs[*LookSrcP]) { Mode = WRDLOCKSTEP; BPStrt = BP; LookSrcP--; } break; } break; case(WORDLOOK) : if ((BP < SenStrt) || (*BP==' ') || (*BP=='\n')) { /* word did not match */ LookSrcP = LookStrt; Mode = SENTENCELOOK; BP = BPStrt; } else { switch(*LookSrcP) { case('?'): Mode = WRDLOCKSTEP1; BPStrt1 = BP; LookSrcP--; break; case('['): if (BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { Mode = WRDLOCKSTEP1; BPStrt1 = BP; LookSrcP--; } break; default: if (LowrCs[*BP] == LowrCs[*LookSrcP]) { Mode = WRDLOCKSTEP1; BPStrt1 = BP; LookSrcP--; } break; } } break; case(WORDLOOK1) : if ((BP < SenStrt) || (*BP==' ') || (*BP=='\n')) { /* word did not match */ LookSrcP = LookStrt; Mode = WORDLOOK; BP = BPStrt1; } else { switch(*LookSrcP) { case('?'): Mode = WRDLOCKSTEP2; BPStrt2 = BP; LookSrcP--; break; case('['): if (BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { Mode = WRDLOCKSTEP2; BPStrt2 = BP; LookSrcP--; } break; default: if (LowrCs[*BP] == LowrCs[*LookSrcP]) { Mode = WRDLOCKSTEP2; BPStrt2 = BP; LookSrcP--; } break; } } break; case(FRSTWRDLOOK) : if ((*BP==' ') || (*BP=='\n')) { /* word did not match */ return(0); } else { switch(*LookSrcP) { case('?'): Mode = FRSTWRDLCKSTP1; BPStrt = BP; LookSrcP--; break; case('['): if (BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { Mode = FRSTWRDLCKSTP1; BPStrt = BP; LookSrcP--; } break; default: if (LowrCs[*BP] == LowrCs[*LookSrcP]) { Mode = FRSTWRDLCKSTP1; BPStrt = BP; LookSrcP--; } break; } } break; case(FRSTWRDLOOK1) : if ((*BP==' ') || (*BP=='\n')) { /* word did not match */ Mode = FRSTWRDLOOK; BP = BPStrt; LookSrcP = LookStrt; } else { switch(*LookSrcP) { case('?'): Mode = FRSTWRDLCKSTP2; BPStrt1 = BP; LookSrcP--; break; case('['): if (BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { Mode = FRSTWRDLCKSTP2; BPStrt1 = BP; LookSrcP--; } break; default: if (LowrCs[*BP] == LowrCs[*LookSrcP]) { Mode = FRSTWRDLCKSTP2; BPStrt1 = BP; LookSrcP--; } break; } } break; case(FRSTWRDLOOK2) : if ((*BP==' ') || (*BP=='\n')) { /* word did not match */ Mode = FRSTWRDLOOK1; BP = BPStrt1; LookSrcP = LookStrt1; } else { switch(*LookSrcP) { case('?'): Mode = FRSTWRDLCKSTP3; BPStrt2 = BP; LookSrcP--; break; case('['): if (BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { Mode = FRSTWRDLCKSTP3; BPStrt2 = BP; LookSrcP--; } break; default: if (LowrCs[*BP] == LowrCs[*LookSrcP]) { Mode = FRSTWRDLCKSTP3; BPStrt2 = BP; LookSrcP--; } break; } } break; case(WRDLOCKSTEP) : switch(*LookSrcP) { case('?'): LookSrcP--; break; case('['): if (!BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { LookSrcP = LookStrt; Mode = SENTENCELOOK; BP = BPStrt; } else { LookSrcP--; } break; default: if (LowrCs[*BP] != LowrCs[*LookSrcP]) { LookSrcP = LookStrt; Mode = SENTENCELOOK; BP = BPStrt; } else { LookSrcP--; } break; } break; case(WRDLOCKSTEP1) : switch(*LookSrcP) { case('?'): LookSrcP--; break; case('['): if (!BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { LookSrcP = LookStrt1; Mode = WORDLOOK; BP = BPStrt1; } else { LookSrcP--; } break; default: if (LowrCs[*BP] != LowrCs[*LookSrcP]) { LookSrcP = LookStrt1; Mode = WORDLOOK; BP = BPStrt1; } else { LookSrcP--; } break; } break; case(WRDLOCKSTEP2) : switch(*LookSrcP) { case('?'): LookSrcP--; break; case('['): if (!BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { LookSrcP = LookStrt2; Mode = WORDLOOK1; BP = BPStrt2; } else { LookSrcP--; } break; default: if (LowrCs[*BP] != LowrCs[*LookSrcP]) { LookSrcP = LookStrt2; Mode = WORDLOOK1; BP = BPStrt2; } else { LookSrcP--; } break; } break; case(FRSTWRDLCKSTP) : switch(*LookSrcP) { case('?'): LookSrcP--; break; case('['): if (!BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { return(0); } LookSrcP--; break; default: if (LowrCs[*BP] != LowrCs[*LookSrcP]) { return(0); } LookSrcP--; break; } break; case(FRSTWRDLCKSTP1) : switch(*LookSrcP) { case('?'): LookSrcP--; break; case('['): if (!BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { Mode = FRSTWRDLOOK; LookSrcP = LookStrt; BP = BPStrt; } else { LookSrcP--; } break; default: if (LowrCs[*BP] != LowrCs[*LookSrcP]) { Mode = FRSTWRDLOOK; LookSrcP = LookStrt; BP = BPStrt; } else { LookSrcP--; } break; } break; case(FRSTWRDLCKSTP2) : switch(*LookSrcP) { case('?'): LookSrcP--; break; case('['): if (!BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { Mode = FRSTWRDLOOK1; LookSrcP = LookStrt1; BP = BPStrt1; } else { LookSrcP--; } break; default: if (LowrCs[*BP] != LowrCs[*LookSrcP]) { Mode = FRSTWRDLOOK1; LookSrcP = LookStrt1; BP = BPStrt1; } else { LookSrcP--; } break; } break; case(FRSTWRDLCKSTP3) : switch(*LookSrcP) { case('?'): LookSrcP--; break; case('['): if (!BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { Mode = FRSTWRDLOOK2; LookSrcP = LookStrt2; BP = BPStrt2; } else { LookSrcP--; } break; default: if (LowrCs[*BP] != LowrCs[*LookSrcP]) { Mode = FRSTWRDLOOK2; LookSrcP = LookStrt2; BP = BPStrt2; } else { LookSrcP--; } break; } break; } } BP = BI + FDsplTbl[MTI] + TokEnd[MTI] - TokStrt[MTI]; LookSrcP = &NoBrakAry[MTN][TokEnd[MTI]] + 1; FEnd = NBAryEnd[MTN]; Mode = FRSTWRDLCKSTP; SenEnd = BI + OVERLAP - MAXKWSZ; if (EOCB <= SenEnd) { SenEnd = EOCB - 1; } while (LookSrcP <= FEnd) { switch(*LookSrcP) { case('*'): LookSrcP++; if (LookSrcP > FEnd) { continue; } switch(Mode) { case(FRSTWRDLCKSTP): LookStrt = LookSrcP; Mode = FRSTWRDLOOK; break; case(FRSTWRDLCKSTP1): LookStrt1 = LookSrcP; Mode = FRSTWRDLOOK1; break; case(FRSTWRDLCKSTP2): LookStrt2 = LookSrcP; Mode = FRSTWRDLOOK2; break; case(FRSTWRDLCKSTP3): ErrP("Only supports up to 3 * in a word\n"); case(WRDLOCKSTEP): LookStrt1 = LookSrcP; Mode = WORDLOOK; break; case(WRDLOCKSTEP1): LookStrt2 = LookSrcP; Mode = WORDLOOK1; break; case(WRDLOCKSTEP2): ErrP("Only supports up to 3 * in a word\n"); } break; case('&'): if (++LookSrcP > FEnd) { continue; } if (*LookSrcP == '*') { if (++LookSrcP > FEnd) { continue; } } LookStrt = LookSrcP; Mode = SENTENCELOOK; break; case('['): NBAS = &NoBrakAry[MTN][0]; break; default: { break; } } BP++; switch(Mode) { case(SENTENCELOOK): /* check if BP is passed a sentence/context delimiter */ if (BP > SenEnd) { return(0); } switch(*BP) { case('\n'): BPPntr = BP - 1; if(*(BPPntr) == '.') { return(0); } while (*BPPntr == '>') { BPPntr--; } if (*BPPntr == '\n') { return(0); } break; case(' '): if(*(BP-1) == '.') { return(0); } break; } switch(*LookSrcP) { case('?'): Mode = WRDLOCKSTEP; BPStrt = BP; LookSrcP++; break; case('['): if (BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { Mode = WRDLOCKSTEP; BPStrt = BP; LookSrcP++; } break; default: if (LowrCs[*BP] == LowrCs[*LookSrcP]) { Mode = WRDLOCKSTEP; BPStrt = BP; LookSrcP++; } break; } break; case(WORDLOOK) : if ((BP > SenEnd) || (*BP==' ') || (*BP=='\n')) { /* word did not match */ LookSrcP = LookStrt; Mode = SENTENCELOOK; BP = BPStrt; } else { switch(*LookSrcP) { case('?'): Mode = WRDLOCKSTEP1; BPStrt1 = BP; LookSrcP++; break; case('['): if (BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { Mode = WRDLOCKSTEP1; BPStrt1 = BP; LookSrcP++; } break; default: if (LowrCs[*BP] == LowrCs[*LookSrcP]) { Mode = WRDLOCKSTEP1; BPStrt1 = BP; LookSrcP++; } break; } } break; case(WORDLOOK1) : if ((BP > SenEnd) || (*BP==' ') || (*BP=='\n')) { /* word did not match */ LookSrcP = LookStrt; Mode = WORDLOOK; BP = BPStrt1; } else { switch(*LookSrcP) { case('?'): Mode = WRDLOCKSTEP2; BPStrt2 = BP; LookSrcP++; break; case('['): if (BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { Mode = WRDLOCKSTEP2; BPStrt2 = BP; LookSrcP++; } break; default: if (LowrCs[*BP] == LowrCs[*LookSrcP]) { Mode = WRDLOCKSTEP2; BPStrt2 = BP; LookSrcP++; } break; } } break; case(FRSTWRDLOOK) : if ((*BP==' ') || (*BP=='\n')) { /* word did not match */ return(0); } else { switch(*LookSrcP) { case('?'): Mode = FRSTWRDLCKSTP1; BPStrt = BP; LookSrcP++; break; case('['): if (BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { Mode = FRSTWRDLCKSTP1; BPStrt = BP; LookSrcP++; } break; default: if (LowrCs[*BP] == LowrCs[*LookSrcP]) { Mode = FRSTWRDLCKSTP1; BPStrt = BP; LookSrcP++; } break; } } break; case(FRSTWRDLOOK1) : if ((*BP==' ') || (*BP=='\n')) { /* word did not match */ Mode = FRSTWRDLOOK; BP = BPStrt; LookSrcP = LookStrt; } else { switch(*LookSrcP) { case('?'): Mode = FRSTWRDLCKSTP2; BPStrt1 = BP; LookSrcP++; break; case('['): if (BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { Mode = FRSTWRDLCKSTP2; BPStrt1 = BP; LookSrcP++; } break; default: if (LowrCs[*BP] == LowrCs[*LookSrcP]) { Mode = FRSTWRDLCKSTP2; BPStrt1 = BP; LookSrcP++; } break; } } break; case(FRSTWRDLOOK2) : if ((*BP==' ') || (*BP=='\n')) { /* word did not match */ Mode = FRSTWRDLOOK1; BP = BPStrt1; LookSrcP = LookStrt1; } else { switch(*LookSrcP) { case('?'): Mode = FRSTWRDLCKSTP3; BPStrt2 = BP; LookSrcP++; break; case('['): if (BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { Mode = FRSTWRDLCKSTP3; BPStrt2 = BP; LookSrcP++; } break; default: if (LowrCs[*BP] == LowrCs[*LookSrcP]) { Mode = FRSTWRDLCKSTP3; BPStrt2 = BP; LookSrcP++; } break; } } break; case(WRDLOCKSTEP) : switch(*LookSrcP) { case('?'): LookSrcP++; break; case('['): if (!BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { LookSrcP = LookStrt; Mode = SENTENCELOOK; BP = BPStrt; } else { LookSrcP++; } break; default: if (LowrCs[*BP] != LowrCs[*LookSrcP]) { LookSrcP = LookStrt; Mode = SENTENCELOOK; BP = BPStrt; } else { LookSrcP++; } break; } break; case(WRDLOCKSTEP1) : switch(*LookSrcP) { case('?'): LookSrcP++; break; case('['): if (!BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { LookSrcP = LookStrt1; Mode = WORDLOOK; BP = BPStrt1; } else { LookSrcP++; } break; default: if (LowrCs[*BP] != LowrCs[*LookSrcP]) { LookSrcP = LookStrt1; Mode = WORDLOOK; BP = BPStrt1; } else { LookSrcP++; } break; } break; case(WRDLOCKSTEP2) : switch(*LookSrcP) { case('?'): LookSrcP++; break; case('['): if (!BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { LookSrcP = LookStrt2; Mode = WORDLOOK1; BP = BPStrt2; } else { LookSrcP++; } break; default: if (LowrCs[*BP] != LowrCs[*LookSrcP]) { LookSrcP = LookStrt2; Mode = WORDLOOK1; BP = BPStrt2; } else { LookSrcP++; } break; } break; case(FRSTWRDLCKSTP) : switch(*LookSrcP) { case('?'): LookSrcP++; break; case('['): if (!BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { return(0); } LookSrcP++; break; default: if (LowrCs[*BP] != LowrCs[*LookSrcP]) { return(0); } LookSrcP++; break; } break; case(FRSTWRDLCKSTP1) : switch(*LookSrcP) { case('?'): LookSrcP++; break; case('['): if (!BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { Mode = FRSTWRDLOOK; LookSrcP = LookStrt; BP = BPStrt; } else { LookSrcP++; } break; default: if (LowrCs[*BP] != LowrCs[*LookSrcP]) { Mode = FRSTWRDLOOK; LookSrcP = LookStrt; BP = BPStrt; } else { LookSrcP++; } break; } break; case(FRSTWRDLCKSTP2) : switch(*LookSrcP) { case('?'): LookSrcP++; break; case('['): if (!BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { Mode = FRSTWRDLOOK1; LookSrcP = LookStrt1; BP = BPStrt1; } else { LookSrcP++; } break; default: if (LowrCs[*BP] != LowrCs[*LookSrcP]) { Mode = FRSTWRDLOOK1; LookSrcP = LookStrt1; BP = BPStrt1; } else { LookSrcP++; } break; } break; case(FRSTWRDLCKSTP3) : switch(*LookSrcP) { case('?'): LookSrcP++; break; case('['): if (!BrakPtr[MTN][BrakIdx[MTN][LookSrcP-NBAS]][*BP]) { Mode = FRSTWRDLOOK2; LookSrcP = LookStrt2; BP = BPStrt2; } else { LookSrcP++; } break; default: if (LowrCs[*BP] != LowrCs[*LookSrcP]) { Mode = FRSTWRDLOOK2; LookSrcP = LookStrt2; BP = BPStrt2; } else { LookSrcP++; } break; } break; } } return(1); }