/* * mult.c * dennis bednar 08 08 85 Original creation. * dennis bednar 01 09 86 Added -F flag, added debug flag. * report bugs/suggestions etc. to dennis@rlgvax.uucp * * mult read the input (stdin or file(s)), comparing adjacent lines. * In the normal case, the second, and succeeding copies of repeated * lines are output to stdout. * Note that repeated lines must be adjacent, see sort(1). * This tool is sort of the opposite of uniq. * * -fn = use field number n in each line for the comparison, n = 1 = first. * Note - in the 2 lines " abc def" and "abc def", "abc" is field # 1, * and "def" is field number 2, multiple white space chars are field separators. * * -a = output 1st of multiple occurences * Note - this flag is very useful in conjunction with -fn flag. * Example: trying to find all include files which are in multiple dirs: * with input sorted by 1st column: stdio.h /usr/include stdio.h /tmp/junk * we would use both "-f1 -a" flags to print only those lines in which * include files were in more than one directory, but not outputing * those lines in which include files were in only one directory. * */ #include char *cmd; /* in case of error */ int aflag; /* 1 if -a */ int dflag; /* 1 if -d debug */ int fflag; /* 1 if -fn */ char Fflag = '\0'; /* field separator, 0 = white space, else one char */ int fieldnum; /* value of # in -f# option, valid if fflag == 1 */ extern char *u_errmesg(); /* f/w ref */ char *find_field(); main(argc, argv) int argc; char **argv; { int i; FILE *infp; cmd = argv[0]; /* loop thru args, stopping at end of args or first file name */ for (i = 1; i < argc; ++i) { if (argv[i][0] != '-') break; /* found first non-option, ie 1st filename */ if (strcmp(argv[i], "-a") == 0) { aflag=1; continue; } /* get debug flag */ if (strcmp(argv[i], "-d") == 0) { ++dflag; /* enable debugging */ printf("Debugging on\n"); continue; /* goto next argument */ } /* get field number */ if (strncmp(argv[i], "-f", 2) == 0) { if (fflag) goto usage; /* only one -fn allowed */ fflag = 1; if (argv[i][2] == '\0') goto usage; fieldnum = atoi(argv[i]+2); if (fieldnum <= 0) { fprintf(stderr, "%s: 'field number' must be positive\n", cmd); goto usage; } continue; } /* get field separator character */ if (strncmp(argv[i], "-F", 2) == 0) { if (Fflag) goto usage; /* only one -Fc allowed */ Fflag = argv[i][2]; /* save field separator char */ if (argv[i][2] == '\0') goto usage; /* no field separator */ continue; } usage: fprintf(stderr, "usage: %s [-a] [-d] [-fn] [-Fc] [file ...]\n", cmd); fprintf(stderr, " outputs 2nd, 3rd, ... of multiple lines\n"); fprintf(stderr, " -a = also output 1st one of multiple lines\n"); fprintf(stderr, " -d = debug\n"); fprintf(stderr, " -fn = use field number n to compare instead of line, 1=1st field,\n"); fprintf(stderr, " with white space as field separator\n"); fprintf(stderr, " -Fc = means use character 'c' as the field separator\n"); exit(1); } if (i == argc) /* no file names given */ mult(stdin); /* so read from stdin */ else for ( ;i < argc; ++i) /* use given file names */ { infp = fopen( argv[i], "r"); if (infp == (FILE *)NULL) { fprintf(stderr, "%s: cant open %s: %s\n", cmd, argv[i], u_errmesg()); continue; } mult( infp ); fclose(infp); } } /* save the lines here */ struct t_line { #define LINESIZE 2048 char linebuf [ LINESIZE ]; } line [2]; /* use index for faster copy!! */ int old = 0; /* index of old line */ int new = 1; /* index of new line */ /* state flag to help decide actions based on state transitions */ #define S_START 0 #define S_UNIQLINE 1 /* saw 1st line or new one different than the old */ #define S_MULTLINE 2 /* saw new line which is same as the first */ int state = S_START; /* address of the first character in each line buffer */ #define OLDLINE line[old].linebuf #define NEWLINE line[new].linebuf mult( infp ) FILE *infp; { int isdiff; /* 1 iff old line != new line */ /* keep reading lines until eof */ while (1) { /* this is not very efficient, but its the only way * I could think of, otherwise main() gets ugly. */ /* read in next line from input */ if (fgets(NEWLINE, LINESIZE, infp) == NULL) return; /* EOF - no state transition */ stripnl(NEWLINE); /* remove ending newline from string */ /* first time mult() is called, we must save the 1st line * read as the 'oldline' for comparing against future 'newline's */ if (state == S_START) { swapline(); /* copy new line to old line */ state = S_UNIQLINE; continue; /* get next line */ } /* compare the old vs new line, since needed in both states */ /* compute it once to make code more efficient */ #define DIFF strcmp if (fflag) /* compare by field ? */ /* yes, pass the global fieldnum so that same_field() * is kept modular, and reusable in other applications */ isdiff = !same_field(OLDLINE, NEWLINE, fieldnum); else /* no compare entire line */ isdiff = (DIFF(OLDLINE, NEWLINE)); if (state == S_UNIQLINE) { if (isdiff) { swapline(); /* stay in same state */ } else { if (aflag) printf("%s\n", OLDLINE); printf("%s\n", NEWLINE); swapline(); state = S_MULTLINE; } } else if (state == S_MULTLINE) { if (isdiff) { swapline(); state = S_UNIQLINE; } else { printf("%s\n", NEWLINE); swapline(); /* stay in multiple line state */ } } } } /* * swap old line with new line * Called after read into new line, so that effect is same as copying * newline to old line, and discarding newline. */ swapline() { register int t; /* temp */ t = old; old = new; new = t; } /* * return 1 iff field number 'fieldnum' (1=1st) is same in * old line vs. new line. */ same_field(oldline, newline, fieldnum) char *oldline, *newline; int fieldnum; { char *op, /* old field ptr */ *np; /* new field ptr */ op = find_field(oldline, fieldnum); if (dflag) /* debug */ { /* dump out the fields being compared */ char *cp; printf("Old field %d = <", fieldnum); if (*op == '\0') /* past last field in line */ printf("UNDEF"); else for (cp = op; *cp && !field_dlm(*cp); ++cp) printf("%c", *cp); printf("> "); printf("Old line = <%s>\n", oldline); } np = find_field(newline, fieldnum); if (dflag) { char *cp; printf("New field %d = <", fieldnum); if (*np == '\0') /* past last field in line */ printf("UNDEF"); else for (cp = np; *cp && !field_dlm(*cp); ++cp) printf("%c", *cp); printf("> "); printf("New line = <%s>\n", newline); } if (*op == '\0' || *np == '\0') /* is either field non-existent ? */ return 0; /* assume failed to match */ /* compare fields until either one ends */ /* a field ends with either a non-zero delimiter or a '\0' char */ for ( ; *op || *np; ++op, ++np) /* both strings not exhausted */ { /* Important: Please note that field_dlm() checks for '\0' also */ if (field_dlm(*op) && field_dlm(*np)) /* both reached end */ return 1; /* hit end of field */ /* next cmp will handle case when only one field delimiter */ if (*op != *np) /* cmp both chars in the field */ return 0; /* failed to match */ /* both matched, keep going */ } /* both strings hit EOS, so matched that way */ return 1; /* matched */ } /* * return 1 iff a field delimiter such as white space or end of string * a null char is always a field delimiter, because the null replaces * the last newline after the line has been read in. */ field_dlm(c) char c; { if (c == '\0') /* is it a null at End of String ? */ return 1; /* yes, return true, because a delimiter */ if (Fflag) /* field separator defined ? */ return (c == Fflag); /* yes, see if it matches the one given */ else /* no, must check for white space */ return (c == ' ' || c == '\t' || c == '\n'); } /* * return ptr to 'num' nth field, 1 = first field in the buffer. * return ptr to '\0' if ask for a field not present */ char * find_field (line, num) char *line; int num; { char *cp; /* ptr to return */ /* must ask for valid field number */ if (num < 1) return (line+strlen(line)); /* '\0' */ /* beginning of line */ cp = line; while ( num-- > 0) { /* skip poss leading white space */ #define iswhite(c) ( (((c) & 0xff) == '\t') || (((c) & 0xff) == ' ') ) if (Fflag) /* using non-white field delimiter */ ; /* so first char is field 1 */ else /* using white space field dlm */ { while (*cp && iswhite(*cp)) ++cp; /* cp is now at '\0' EOS or 1st non-white */ } /* stop if at beginning of desired field */ if (num <= 0) break; /* else skip over this symbol to either End of String * or next white space , or next delimiter. */ /* now find the last char of this symbol */ if (Fflag) /* non-white field delimiter */ { while (*cp && !field_dlm(*cp)) ++cp; /* hit '\0' EOS or field delimiter */ if (*cp) /* fld */ ++cp; /* so make it point to begin of next field */ else ; /* don't go past end of string !!! */ } else /* white space delimiter */ { while (*cp && !iswhite(*cp)) ++cp; /* cp points to EOS or next white space char */ } } return cp; } /* * strip ending new line from string returned by fgets. * If not present as last char , then line too long. */ stripnl(s) char *s; { char *cp; cp = &s[strlen(s) - 1]; /* ptr to last char of string */ if (*cp == '\n') /* is last char a new line */ *cp = '\0'; /* yes, remove it */ else { fprintf(stderr, "%s: error line <%s>... was too long\n", cmd, s); exit(1); } } /* * return the error message string using errno * More flexibility than perror(3). */ char * u_errmesg() { #ifdef unix extern int errno; extern int sys_nerr; extern char *sys_errlist[]; static char buffer[50]; if (errno < 0 || errno >= sys_nerr) { sprintf( buffer, "errno %d undefined (%d=max)", errno, sys_nerr); return(buffer); } return( sys_errlist[errno] ); #else return ("unknown error"); #endif }