From Jim Young <jyoung@gsu.edu>:

- New duplicate packet removal options for editcap https://bugs.wireshark.org/bugzilla/show_bug.cgi?id=3168 I changed the patch a bit: - Adapted to 80 chars wide screen - Merged -w and -W parameters svn path=/trunk/; revision=28074
author: Balint Reczey <balint.reczey@ericsson.com> 2009-04-17 15:21:46 +0000
committer: Balint Reczey <balint.reczey@ericsson.com> 2009-04-17 15:21:46 +0000
commit: 14fa469df3be6ce40abeff352da8e6a658269364 (patch)
tree: 231d9a1bfd4412aac68e0d0de000ba566bf28eff /editcap.c
parent: c5db90ba6095a59c2ee2ac58cca5a73e710d3a63 (diff)
1 files changed, 303 insertions, 20 deletions
diff --git a/editcap.c b/editcap.c
index c6cf7c2cba..66df3f0ba1 100644
--- a/editcap.c
+++ b/editcap.c
@@ -84,13 +84,18 @@ struct select_item {
 typedef struct _fd_hash_t {
   md5_byte_t digest[16];
   guint32 len;
+  nstime_t time;
 } fd_hash_t;
 
-#define DUP_DEPTH 5
-fd_hash_t fd_hash[DUP_DEPTH];
-int cur_dup = 0;
+#define DEFAULT_DUP_DEPTH 5     /* Used with -d */
+#define MAX_DUP_DEPTH 1000000   /* the maximum window (and actual size of fd_hash[]) for de-duplication */
+
+fd_hash_t fd_hash[MAX_DUP_DEPTH];
+int dup_window = DEFAULT_DUP_DEPTH;
+int cur_dup_entry = 0;
 
 #define ONE_MILLION 1000000
+#define ONE_BILLION 1000000000
 
 /* Weights of different errors we can introduce */
 /* We should probably make these command-line arguments */
@@ -119,11 +124,13 @@ static int out_file_type = WTAP_FILE_PCAP;   /* default to "libpcap"   */
 static int out_frame_type = -2;              /* Leave frame type alone */
 static int verbose = 0;                      /* Not so verbose         */
 static struct time_adjustment time_adj = {{0, 0}, 0}; /* no adjustment */
+static nstime_t relative_time_window = {0, 0}; /* de-dup time window */
 static double err_prob = 0.0;
 static time_t starttime = 0;
 static time_t stoptime = 0;
 static gboolean check_startstop = FALSE;
 static gboolean dup_detect = FALSE;
+static gboolean dup_detect_by_time = FALSE;
 
 static int find_dct2000_real_data(guint8 *buf);
 
@@ -266,29 +273,200 @@ set_time_adjustment(char *optarg)
   time_adj.tv.tv_usec = val;
 }
 
+static void
+set_rel_time(char *optarg)
+{
+  char *frac, *end;
+  long val;
+  int frac_digits;
+
+  if (!optarg)
+    return;
+
+  /* skip leading whitespace */
+  while (*optarg == ' ' || *optarg == '\t') {
+      optarg++;
+  }
+
+  /* ignore negative adjustment  */
+  if (*optarg == '-') {
+      optarg++;
+  }
+
+  /* collect whole number of seconds, if any */
+  if (*optarg == '.') {         /* only fractional (i.e., .5 is ok) */
+      val  = 0;
+      frac = optarg;
+  } else {
+      val = strtol(optarg, &frac, 10);
+      if (frac == NULL || frac == optarg || val == LONG_MIN || val == LONG_MAX) {
+          fprintf(stderr, "1: editcap: \"%s\" isn't a valid rel time value\n",
+                  optarg);
+          exit(1);
+      }
+      if (val < 0) {            /* implies '--' since we caught '-' above  */
+          fprintf(stderr, "2: editcap: \"%s\" isn't a valid rel time value\n",
+                  optarg);
+          exit(1);
+      }
+  }
+  relative_time_window.secs = val;
+
+  /* now collect the partial seconds, if any */
+  if (*frac != '\0') {             /* chars left, so get fractional part */
+    val = strtol(&(frac[1]), &end, 10);
+    if (*frac != '.' || end == NULL || end == frac
+        || val < 0 || val > ONE_BILLION || val == LONG_MIN || val == LONG_MAX) {
+      fprintf(stderr, "3: editcap: \"%s\" isn't a valid rel time value\n",
+              optarg);
+      exit(1);
+    }
+  }
+  else {
+    return;                     /* no fractional digits */
+  }
+
+  /* adjust fractional portion from fractional to numerator
+   * e.g., in "1.5" from 5 to 500000000 since .5*10^9 = 500000000 */
+  if (frac && end) {            /* both are valid */
+    frac_digits = end - frac - 1;   /* fractional digit count (remember '.') */
+    while(frac_digits < 9) {    /* this is frac of 10^9 */
+      val *= 10;
+      frac_digits++;
+    }
+  }
+  relative_time_window.nsecs = val;
+}
+
 static gboolean
 is_duplicate(guint8* fd, guint32 len) {
   int i;
   md5_state_t ms;
 
-  cur_dup++;
-  if (cur_dup >= DUP_DEPTH)
-    cur_dup = 0;
+  cur_dup_entry++;
+  if (cur_dup_entry >= dup_window)
+    cur_dup_entry = 0;
 
   /* Calculate our digest */
   md5_init(&ms);
   md5_append(&ms, fd, len);
-  md5_finish(&ms, fd_hash[cur_dup].digest);
+  md5_finish(&ms, fd_hash[cur_dup_entry].digest);
 
-  fd_hash[cur_dup].len = len;
+  fd_hash[cur_dup_entry].len = len;
 
   /* Look for duplicates */
-  for (i = 0; i < DUP_DEPTH; i++) {
-    if (i == cur_dup)
+  for (i = 0; i < dup_window; i++) {
+    if (i == cur_dup_entry)
       continue;
 
-    if (fd_hash[i].len == fd_hash[cur_dup].len &&
-        memcmp(fd_hash[i].digest, fd_hash[cur_dup].digest, 16) == 0) {
+    if (fd_hash[i].len == fd_hash[cur_dup_entry].len &&
+        memcmp(fd_hash[i].digest, fd_hash[cur_dup_entry].digest, 16) == 0) {
+      return TRUE;
+    }
+  }
+
+  return FALSE;
+}
+
+static gboolean
+is_duplicate_rel_time(guint8* fd, guint32 len, const nstime_t *current) {
+  int i;
+  md5_state_t ms;
+
+  cur_dup_entry++;
+  if (cur_dup_entry >= dup_window)
+    cur_dup_entry = 0;
+
+  /* Calculate our digest */
+  md5_init(&ms);
+  md5_append(&ms, fd, len);
+  md5_finish(&ms, fd_hash[cur_dup_entry].digest);
+
+  fd_hash[cur_dup_entry].len = len;
+  fd_hash[cur_dup_entry].time.secs = current->secs;
+  fd_hash[cur_dup_entry].time.nsecs = current->nsecs;
+
+  /*
+   * Look for relative time related duplicates.
+   * This is hopefully a reasonably efficient mechanism for
+   * finding duplicates by rel time in the fd_hash[] cache.
+   * We check starting from the most recently added hash
+   * entries and work backwards towards older packets.
+   * This approach allows the dup test to be terminated
+   * when the relative time of a cached entry is found to
+   * be beyond the dup time window.
+   *
+   * Of course this assumes that the input trace file is
+   * "well-formed" in the sense that the packet timestamps are
+   * in strict chronologically increasing order (which is NOT
+   * always the case!!).
+   *
+   * The fd_hash[] table was deliberatly created large (1,000,000).
+   * Looking for time related duplicates in large trace files with
+   * non-fractional dup time window values can potentially take
+   * a long time to complete.
+   */
+
+  for (i = cur_dup_entry - 1;; i--) {
+    nstime_t delta;
+    int cmp;
+
+    if (i < 0) {
+      i = dup_window - 1;
+    }
+
+    if (i == cur_dup_entry) {
+      /*
+       * We've decremented back to where we started.
+       * Check no more!
+       */
+      break;
+    }
+
+    if (nstime_is_unset(&(fd_hash[i].time))) {
+      /*
+       * We've decremented to an unused fd_hash[] entry.
+       * Check no more!
+       */
+      break;
+    }
+
+    nstime_delta(&delta, current, &fd_hash[i].time);
+
+    if(delta.secs < 0 || delta.nsecs < 0)
+    {
+      /*
+       * A negative delta implies that the current packet
+       * has an absolute timestamp less than the cached packet
+       * that it is being compared to.  This is NOT a normal
+       * situation since trace files usually have packets in
+       * chronological order (oldest to newest).
+       *
+       * There are several possible ways to deal with this:
+       * 1. 'continue' dup checking with the next cached frame.
+       * 2. 'break' from looking for a duplicate of the current frame.
+       * 3. Take the absolute value of the delta and see if that
+       * falls within the specifed dup time window.
+       *
+       * Currently this code does option 1.  But it would pretty
+       * easy to add yet-another-editcap-option to select one of
+       * the other behaviors for dealing with out-of-sequence
+       * packets.
+       */
+      continue;
+    }
+
+    cmp = nstime_cmp(&delta, &relative_time_window);
+
+    if(cmp > 0) {
+      /*
+       * The delta time indicates that we are now looking at
+       * cached packets beyond the specified dup time window.
+       * Check no more!
+       */
+      break;
+    } else if (fd_hash[i].len == fd_hash[cur_dup_entry].len &&
+          memcmp(fd_hash[i].digest, fd_hash[cur_dup_entry].digest, 16) == 0) {
       return TRUE;
     }
   }
@@ -317,7 +495,22 @@ usage(void)
   fprintf(stderr, "                         given time (format as YYYY-MM-DD hh:mm:ss)\n");
   fprintf(stderr, "  -B <stop time>         don't output packets whose timestamp is after the\n");
   fprintf(stderr, "                         given time (format as YYYY-MM-DD hh:mm:ss)\n");
-  fprintf(stderr, "  -d                     remove duplicate packets\n");
+  fprintf(stderr, "\n");
+  fprintf(stderr, "Duplicate packet removal:\n");
+  fprintf(stderr, "  -d                     remove packet if duplicate (window == %d).\n", DEFAULT_DUP_DEPTH);
+  fprintf(stderr, "  -D <dup window>        remove packet if duplicate, configurable <dup window>.\n");
+  fprintf(stderr, "                         Valid <dup window> values are 0 to %d.\n", MAX_DUP_DEPTH);
+  fprintf(stderr, "                         NOTE: A <dup window> of 0 with -v (verbose option) is\n");
+  fprintf(stderr, "                         useful to print MD5 hashes.\n");
+  fprintf(stderr, "  -w <dup time window>   remove packet if duplicate packet is found EQUAL TO OR\n");
+  fprintf(stderr, "                         LESS THAN <dup time window> prior to current packet.\n");
+  fprintf(stderr, "                         A <dup time window> is specified in relative seconds\n");
+  fprintf(stderr, "                         (e.g. 0.000001)\n");
+  fprintf(stderr, "\n");
+  fprintf(stderr, "           NOTE: The use of the 'Duplicate packet removal' options with\n");
+  fprintf(stderr, "           other editcap options except -v may not always work as expected.\n");
+  fprintf(stderr, "           Specifically the -r and -t options will very likely NOT have the\n");
+  fprintf(stderr, "           desired effect if combined with the -d, -D or -w.\n");
   fprintf(stderr, "\n");
   fprintf(stderr, "Packet manipulation:\n");
   fprintf(stderr, "  -s <snaplen>           truncate each packet to max. <snaplen> bytes of data\n");
@@ -343,6 +536,9 @@ usage(void)
   fprintf(stderr, "Miscellaneous:\n");
   fprintf(stderr, "  -h                     display this help and exit\n");
   fprintf(stderr, "  -v                     verbose output\n");
+  fprintf(stderr, "                         If -v is used with any of the 'Duplicate Packet\n");
+  fprintf(stderr, "                         Removal' options (-d, -D or -w) then Packet lengths\n");
+  fprintf(stderr, "                         and MD5 hashes are printed to standard-out.\n");
   fprintf(stderr, "\n");
 }
 
@@ -399,6 +595,7 @@ main(int argc, char *argv[])
   unsigned int choplen = 0;             /* No chop                */
   wtap_dumper *pdh;
   int count = 1;
+  unsigned duplicate_count = 0;
   gint64 data_offset;
   struct wtap_pkthdr snap_phdr;
   const struct wtap_pkthdr *phdr;
@@ -434,7 +631,7 @@ main(int argc, char *argv[])
 #endif
 
   /* Process the options */
-  while ((opt = getopt(argc, argv, "A:B:c:C:dE:F:hrs:i:t:T:v")) !=-1) {
+  while ((opt = getopt(argc, argv, "A:B:c:C:dD:E:F:hrs:i:t:T:vw:")) !=-1) {
 
     switch (opt) {
 
@@ -483,10 +680,31 @@ main(int argc, char *argv[])
 
     case 'd':
       dup_detect = TRUE;
-      for (i = 0; i < DUP_DEPTH; i++) {
-        memset(&fd_hash[i].digest, 0, 16);
-        fd_hash[i].len = 0;
+      dup_detect_by_time = FALSE;
+      dup_window = DEFAULT_DUP_DEPTH;
+      break;
+
+    case 'D':
+      dup_detect = TRUE;
+      dup_detect_by_time = FALSE;
+      dup_window = strtol(optarg, &p, 10);
+      if (p == optarg || *p != '\0') {
+        fprintf(stderr, "editcap: \"%s\" isn't a valid dupicate window value\n",
+            optarg);
+        exit(1);
       }
+      if (dup_window < 0 || dup_window > MAX_DUP_DEPTH) {
+        fprintf(stderr, "editcap: \"%d\" duplicate window value must be between 0 and %d inclusive.\n",
+            dup_window, MAX_DUP_DEPTH);
+        exit(1);
+      }
+      break;
+
+    case 'w':
+      dup_detect = FALSE;
+      dup_detect_by_time = TRUE;
+      dup_window = MAX_DUP_DEPTH;
+      set_rel_time(optarg);
       break;
 
     case '?':              /* Bad options if GNU getopt */
@@ -686,6 +904,14 @@ main(int argc, char *argv[])
       if (add_selection(argv[i]) == FALSE)
         break;
 
+    if (dup_detect || dup_detect_by_time) {
+      for (i = 0; i < dup_window; i++) {
+        memset(&fd_hash[i].digest, 0, 16);
+        fd_hash[i].len = 0;
+        nstime_set_unset(&fd_hash[i].time);
+      }
+    }
+
     while (wtap_read(wth, &err, &err_info, &data_offset)) {
 
       if (secs_per_block > 0) {
@@ -752,7 +978,7 @@ main(int argc, char *argv[])
       if ( ((check_startstop && check_ts) || (!check_startstop && !check_ts)) && ((!selected(count) && !keep_em) ||
           (selected(count) && keep_em)) ) {
 
-        if (verbose)
+        if (verbose && !dup_detect && !dup_detect_by_time)
           printf("Packet: %u\n", count);
 
         /* We simply write it, perhaps after truncating it; we could do other
@@ -805,13 +1031,59 @@ main(int argc, char *argv[])
           phdr = &snap_phdr;
         }
 
+        /* suppress duplicates by packet window */
         if (dup_detect) {
           buf = wtap_buf_ptr(wth);
           if (is_duplicate(buf, phdr->caplen)) {
-            if (verbose)
-              printf("Skipping duplicate: %u\n", count);
+            if (verbose) {
+              fprintf(stdout, "Skipped: %u, Len: %u, MD5 Hash: ", count, phdr->caplen);
+              for (i = 0; i < 16; i++) {
+                fprintf(stdout, "%02x", (unsigned char)fd_hash[cur_dup_entry].digest[i]);
+              }
+              fprintf(stdout, "\n");
+            }
+            duplicate_count++;
+            count++;
+            continue;
+          } else {
+            if (verbose) {
+              fprintf(stdout, "Packet: %u, Len: %u, MD5 Hash: ", count, phdr->caplen);
+              for (i = 0; i < 16; i++) {
+                fprintf(stdout, "%02x", (unsigned char)fd_hash[cur_dup_entry].digest[i]);
+              }
+              fprintf(stdout, "\n");
+            }
+          }
+        }
+
+        /* suppress duplicates by time window */
+        if (dup_detect_by_time) {
+          nstime_t current;
+
+          current.secs = phdr->ts.secs;
+          current.nsecs = phdr->ts.nsecs;
+
+          buf = wtap_buf_ptr(wth);
+
+          if (is_duplicate_rel_time(buf, phdr->caplen, &current)) {
+            if (verbose) {
+              fprintf(stdout, "Skipped: %u, Len: %u, MD5 Hash: ", count, phdr->caplen);
+              for (i = 0; i < 16; i++) {
+                fprintf(stdout, "%02x", (unsigned char)fd_hash[cur_dup_entry].digest[i]);
+              }
+              fprintf(stdout, "\n");
+            }
+            duplicate_count++;
             count++;
             continue;
+          } else {
+            if (verbose) {
+              fprintf(stdout, "Packet: %u, Len: %u, MD5 Hash: ", count, phdr->caplen);
+              for (i = 0; i < 16; i++) {
+                fprintf(stdout, "%02x", (unsigned char)fd_hash[cur_dup_entry].digest[i]);
+              }
+              fprintf(stdout, "\n");
+            }
           }
         }
 
@@ -902,6 +1174,17 @@ main(int argc, char *argv[])
     }
   }
 
+  if (dup_detect) {
+    fprintf(stdout, "%u packet%s seen, %u packet%s skipped with duplicate window of %u packets.\n",
+                count - 1, plurality(count - 1, "", "s"),
+                duplicate_count, plurality(duplicate_count, "", "s"), dup_window);
+  } else if (dup_detect_by_time) {
+    fprintf(stdout, "%u packet%s seen, %u packet%s skipped with duplicate time window equal to or less than %ld.%09ld seconds.\n",
+                count - 1, plurality(count - 1, "", "s"),
+                duplicate_count, plurality(duplicate_count, "", "s"),
+                (long)relative_time_window.secs, (long int)relative_time_window.nsecs);
+  }
+
   return 0;
 }
author	Balint Reczey <balint.reczey@ericsson.com>	2009-04-17 15:21:46 +0000
committer	Balint Reczey <balint.reczey@ericsson.com>	2009-04-17 15:21:46 +0000
commit	14fa469df3be6ce40abeff352da8e6a658269364 (patch)
tree	231d9a1bfd4412aac68e0d0de000ba566bf28eff /editcap.c
parent	c5db90ba6095a59c2ee2ac58cca5a73e710d3a63 (diff)