diff -ru multisort-1.1/multisort.c multisort-1.1-tt//multisort.c --- multisort-1.1/multisort.c 1999-10-28 22:12:36.000000000 +0300 +++ multisort-1.1-tt//multisort.c 2011-04-07 15:25:06.000000000 +0300 @@ -1,7 +1,6 @@ /*********************************************************************** * - * multisort - sort multiple Common Log Format files into a single, - * date-ordered file + * multisort - merge multiple sorted log files together * * $Id: multisort.c,v 1.7 1999/10/28 19:11:48 xach Exp $ * @@ -11,6 +10,11 @@ * * - Fixed some potential segfaults by checking the return values * of various functions. + * + * Modifications by Timo Teräs + * - Support syslog format in addition to common log format + * - Some speed improvements + * * * Zachary Beane * @@ -42,7 +46,7 @@ int enabled; char *name; FILE *in_fh; - long atime; + unsigned long atime; char buf[BUFSIZ + 1]; }; @@ -148,7 +152,7 @@ { register const char *s = wordlist[key].name; - if (*str == *s && !strcmp (str + 1, s + 1)) + if (*str == *s && !strncmp (str + 1, s + 1, len - 1)) return &wordlist[key]; } } @@ -174,6 +178,19 @@ #endif +static int +get_int(const char *s, int len) +{ + int val = 0, i; + + for (i = 0; i < len; i++) { + if (s[i] >= '0' && s[i] <= '9') + val = (val * 10) + s[i] - '0'; + } + return val; + +} + /* conv_time * Take a common log format string and return a sortable value. Use a hash * table for month lookup. @@ -182,92 +199,89 @@ * return values and string lengths and such. Lazy bastard. * */ - -long -conv_time(char *s) + +unsigned long +convert_time_common_log_file(const char *s) { - char *ptr; - char *orig_ptr; - int year; - int mon; - int mday; - int hour; - int min; - int sec; + /* 127.198.54.33 - - [14/Jan/1999:13:54:37 -0500] "GET /mint/images/top_maindesigns_on.jpg HTTP/1.0" 200 7164 */ + const char *ptr; + int year, mon, day; + int hour, min, sec; struct month *m; - ptr = s; - ptr = strchr(ptr, '['); - + ptr = strchr(s, '['); if (ptr == NULL) return 0; - ptr++; /* skip the bracket */ + /* skip the bracket */ + ptr++; - if (strlen(ptr) < 21) + /* too short string? */ + if (memchr(ptr, 0, 21) != NULL) return 0; - orig_ptr = ptr; - ptr[2] = '\0'; - ptr[6] = '\0'; - ptr[11] = '\0'; - ptr[14] = '\0'; - ptr[17] = '\0'; - ptr[20] = '\0'; - - mday = atoi(ptr); - - ptr += 3; - m = in_word_set(ptr, 3); - if (m == NULL) { + m = in_word_set(&ptr[3], 3); + if (m == NULL) return 0; - } else { - mon = m->pos; - } - - - ptr += 4; - year = atoi(ptr) - 1990; - - ptr += 5; - hour = atoi(ptr); - ptr += 3; - min = atoi(ptr); - - ptr += 3; - sec = atoi(ptr); - - /* Restore the characters to their original state (this saves us a - strdup!) Yay. */ - - ptr = orig_ptr; - ptr[2] = '/'; - ptr[6] = '/'; - ptr[11] = ':'; - ptr[14] = ':'; - ptr[17] = ':'; - ptr[20] = ' '; + mon = m->pos; + day = get_int(&ptr[0], 2); + year = get_int(&ptr[7], 4) - 1990; + hour = get_int(&ptr[12], 2); + min = get_int(&ptr[15], 2); + sec = get_int(&ptr[18], 2); return((year * 31104000) + (mon * 2592000) - + (mday * 86400) + + (day * 86400) + (hour * 3600) + (min * 60) + sec); } +unsigned long +convert_time_syslog_file(const char *s) +{ + /* Apr 7 02:00:14 blah blah */ + int mon, day, hour, min, sec; + struct month *m; + + /* Too short string? */ + if (memchr(s, 0, 15) != NULL) + return 0; + + m = in_word_set(&s[0], 3); + if (m == NULL) + return 0; + + mon = m->pos; + day = get_int(&s[4], 2); + hour = get_int(&s[7], 2); + min = get_int(&s[10], 2); + sec = get_int(&s[13], 2); + + return((mon * 2592000) + + (day * 86400) + + (hour * 3600) + + (min * 60) + + sec); +} + void usage(void) { - fprintf(stderr, "usage: multisort LOGFILE1 LOGFILE2 [LOGFILEn ...]\n"); - fprintf(stderr, "\n"); - fprintf(stderr, "multisort 1.1 Copyright (C) 1999 Zachary Beane\n"); - fprintf(stderr, "This program has NO WARRANTY and is licensed " + fprintf(stderr, "usage: multisort [-format] LOGFILE1 LOGFILE2 [LOGFILEn ...]\n" + "\n" + "multisort 1.1 Copyright (C) 1999 Zachary Beane\n" + "This program has NO WARRANTY and is licensed " "under the terms of the\nGNU General Public License.\n" - "http://www.xach.com/multisort/ - bugs to xach@mint.net\n"); - + "http://www.xach.com/multisort/ - bugs to xach@mint.net\n" + "\n" + "Supported formats currently:\n" + " -clf Common Log Format (default)\n" + " -syslog Syslog Format\n" + ); exit(1); } @@ -279,9 +293,10 @@ int if_count = 0; /* number of total input files */ int if_nr = 0; /* number of active input files */ char *ret = NULL; - long min_time = 0; + unsigned long min_time = 0; int min_index = 0; int i, j; + unsigned long (*conv_time)(const char *s); if (argc < 3) { usage(); @@ -295,7 +310,19 @@ } /* Open up all the files */ - for (i = 1, j = 0; i < argc; i++, j++) { + conv_time = convert_time_common_log_file; + i = 1; + if (argv[i][0] == '-') { + if (strcmp(argv[i], "-clf") == 0) + conv_time = convert_time_common_log_file; + else if (strcmp(argv[i], "-syslog") == 0) + conv_time = convert_time_syslog_file; + else + usage(); + i++; + } + + for (j = 0; i < argc; i++, j++) { if_list[j] = (InputFile *)malloc(sizeof(InputFile)); if (if_list[j] == NULL) { perror("malloc"); @@ -319,19 +346,18 @@ if_list[j]->name); exit(1); } - + if_list[j]->atime = conv_time(if_list[j]->buf); } if_count = if_nr = j; while (if_nr) { min_index = 0; - min_time = 900000000L; + min_time = (unsigned long) -1; for (i = 0; i < if_count; i++) { if (!if_list[i]->enabled) continue; - if_list[i]->atime = conv_time(if_list[i]->buf); if (if_list[i]->atime < min_time) { min_time = if_list[i]->atime; min_index = i; @@ -345,7 +371,9 @@ /* refill the buffer */ ret = fgets(if_list[min_index]->buf, BUFSIZ, if_list[min_index]->in_fh); - if (ret == NULL) { + if (ret != NULL) { + if_list[min_index]->atime = conv_time(if_list[min_index]->buf); + } else { if_list[min_index]->enabled = 0; fclose(if_list[min_index]->in_fh); if_nr--; @@ -355,5 +383,3 @@ exit(0); } - -