aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--doc/editcap.pod114
-rw-r--r--editcap.c323
2 files changed, 402 insertions, 35 deletions
diff --git a/doc/editcap.pod b/doc/editcap.pod
index 138835f8e1..0c43d2c4f3 100644
--- a/doc/editcap.pod
+++ b/doc/editcap.pod
@@ -8,7 +8,6 @@ editcap - Edit and/or translate the format of capture files
B<editcap>
S<[ B<-c> E<lt>packets per fileE<gt> ]>
S<[ B<-C> E<lt>choplenE<gt> ]>
-S<[ B<-d> ]>
S<[ B<-E> E<lt>error probabilityE<gt> ]>
S<[ B<-F> E<lt>file formatE<gt> ]>
S<[ B<-A> E<lt>start timeE<gt> ]>
@@ -23,6 +22,14 @@ I<infile>
I<outfile>
S<[ I<packet#>[-I<packet#>] ... ]>
+B<editcap>
+S< B<-d> > |
+S< B<-D> E<lt>dup windowE<gt> > |
+S< B<-w> E<lt>dup time windowE<gt> >
+S<[ B<-v> ]>
+I<infile>
+I<outfile>
+
=head1 DESCRIPTION
B<Editcap> is a program that reads some or all of the captured packets from the
@@ -32,13 +39,17 @@ resulting packets to the capture I<outfile> (or outfiles).
By default, it reads all packets from the I<infile> and writes them to the
I<outfile> in libpcap file format.
-A list of packet numbers can be specified on the command line; ranges of
-packet numbers can be specified as I<start>-I<end>, referring to all packets
-from I<start> to I<end>.
-The selected packets with those numbers will I<not> be written to the
-capture file.
-If the B<-r> flag is specified, the whole packet selection is reversed;
-in that case I<only> the selected packets will be written to the capture file.
+An optional list of packet numbers can be specified on the command tail;
+individual packet numbers seperated by whitespace and/or ranges of packet
+numbers can be specified as I<start>-I<end>, referring to all packets from
+I<start> to I<end>. By default the selected packets with those numbers will
+I<not> be written to the capture file. If the B<-r> flag is specified, the
+whole packet selection is reversed; in that case I<only> the selected packets
+will be written to the capture file.
+
+B<Editcap> can also be used to remove duplicate packets. Several different
+options (B<-d>, B<-D> and B<-w>) are used to control the packet window
+or relative time window to be used for duplicate comparison.
B<Editcap> is able to detect, read and write the same capture files that
are supported by B<Wireshark>.
@@ -74,9 +85,49 @@ formats leaves some random bytes at the end of each packet.
=item -d
-Attempts to remove duplicate packets. The length and MD5 sum of the
-current packet are compared to the previous four packets. If a match
-is found, the packet is skipped.
+Attempts to remove duplicate packets. The length and MD5 hash of the
+current packet are compared to the previous four (4) packets. If a
+match is found, the current packet is skipped. This option is equilivent
+to using the option B<-D 5>.
+
+=item -D E<lt>dup windowE<gt>
+
+Attempts to remove duplicate packets. The length and MD5 hash of the
+current packet are compared to the previous <dup window> - 1 packets.
+If a match is found, the current packet is skipped.
+
+The use of the option B<-D 0> combined with the B<-v> option is useful
+in that each packet's Packet number, Len and MD5 Hash will be printed
+to standard out. This verbose output (specifically the MD5 hash strings)
+can be useful in scripts to identify duplicate packets across trace
+files.
+
+The <dup window> is specifed as an integer value between 0 and 1000000 (inclusive).
+
+NOTE: Specifying large <dup window> values with large tracefiles can
+result in very long processing times for B<editcap>.
+
+=item -w E<lt>dup time windowE<gt>
+
+Attempts to remove duplicate packets. The current packet's arrival time
+is compared with up to 1000000 previous packets. If the packet's relative
+arrival time is I<less than> the <dup time window> of a previous packet
+and the packet length and MD5 hash of the current packet are the same then
+the packet to skipped. The duplicate comparison test stops when
+the current packet's relative arrival time is greater than <dup time window>.
+
+The <dup time window> is specifed as I<seconds>[I<.fractional seconds>].
+
+The [.fractional seconds] component can be specified to nine (9) decimal
+places (billionths of a second) but most typical trace files have resolution
+to six (6) decimal places (millionths of a second).
+
+NOTE: Specifying large <dup time window> values with large tracefiles can
+result in very long processing times for B<editcap>.
+
+NOTE: The B<-w> option assumes that the packets are in chronological order.
+If the packets are NOT in chronological order then the B<-w> duplication
+removal option may not identify some duplicates.
=item -E E<lt>error probabilityE<gt>
@@ -166,6 +217,10 @@ packet, you will need od(1)/text2pcap(1).
Causes B<editcap> to print verbose messages while it's working.
+Use of B<-v> with the de-duplication switches of B<-d>, B<-D> or B<-w>
+will cause all MD5 hashes to be printed whether the packet is skipped
+or not.
+
=back
=head1 EXAMPLES
@@ -188,15 +243,44 @@ To limit a capture file to packets from number 200 to 750 (inclusive) use:
To get all packets from number 1-500 (inclusive) use:
- editcap -r capture.pcap 500.pcap 1-500
+ editcap -r capture.pcap first500.pcap 1-500
or
- editcap capture.pcap 500.pcap 501-9999999
+ editcap capture.pcap first500.pcap 501-9999999
+
+To exclude packets 1, 5, 10 to 20 and 30 to 40 from the new file use:
+
+ editcap capture.pcap exclude.pcap 1 5 10-20 30-40
+
+To select just packets 1, 5, 10 to 20 and 30 to 40 for the new file use:
+
+ editcap -r capture.pcap select.pcap 1 5 10-20 30-40
+
+To remove duplicate packets seen within the prior four frames use:
+
+ editcap -d capture.pcap dedup.pcap
+
+To remove duplicate packets seen within the prior 100 frames use:
+
+ editcap -D 101 capture.pcap dedup.pcap
+
+To remove duplicate packets seen I<less than> 1/10th of a second:
+
+ editcap -w 0.1 capture.pcap dedup.pcap
+
+To remove duplicate packets seen I<equal to or less than> 1/10th of a second:
+
+ editcap -w 0.1 capture.pcap dedup.pcap
+
+To display the MD5 hash for all of the packets (and NOT generate any
+real output file):
+
+ editcap -v -D 0 capture.pcap /dev/null
-To filter out packets 10 to 20 and 30 to 40 into a new file use:
+or on Windows systems
- editcap capture.pcap selection.pcap 10-20 30-40
+ editcap -v -D 0 capture.pcap NUL
To introduce 5% random errors in a capture file use:
diff --git a/editcap.c b/editcap.c
index c6cf7c2cba..66df3f0ba1 100644
--- a/editcap.c
+++ b/editcap.c
@@ -84,13 +84,18 @@ struct select_item {
typedef struct _fd_hash_t {
md5_byte_t digest[16];
guint32 len;
+ nstime_t time;
} fd_hash_t;
-#define DUP_DEPTH 5
-fd_hash_t fd_hash[DUP_DEPTH];
-int cur_dup = 0;
+#define DEFAULT_DUP_DEPTH 5 /* Used with -d */
+#define MAX_DUP_DEPTH 1000000 /* the maximum window (and actual size of fd_hash[]) for de-duplication */
+
+fd_hash_t fd_hash[MAX_DUP_DEPTH];
+int dup_window = DEFAULT_DUP_DEPTH;
+int cur_dup_entry = 0;
#define ONE_MILLION 1000000
+#define ONE_BILLION 1000000000
/* Weights of different errors we can introduce */
/* We should probably make these command-line arguments */
@@ -119,11 +124,13 @@ static int out_file_type = WTAP_FILE_PCAP; /* default to "libpcap" */
static int out_frame_type = -2; /* Leave frame type alone */
static int verbose = 0; /* Not so verbose */
static struct time_adjustment time_adj = {{0, 0}, 0}; /* no adjustment */
+static nstime_t relative_time_window = {0, 0}; /* de-dup time window */
static double err_prob = 0.0;
static time_t starttime = 0;
static time_t stoptime = 0;
static gboolean check_startstop = FALSE;
static gboolean dup_detect = FALSE;
+static gboolean dup_detect_by_time = FALSE;
static int find_dct2000_real_data(guint8 *buf);
@@ -266,29 +273,200 @@ set_time_adjustment(char *optarg)
time_adj.tv.tv_usec = val;
}
+static void
+set_rel_time(char *optarg)
+{
+ char *frac, *end;
+ long val;
+ int frac_digits;
+
+ if (!optarg)
+ return;
+
+ /* skip leading whitespace */
+ while (*optarg == ' ' || *optarg == '\t') {
+ optarg++;
+ }
+
+ /* ignore negative adjustment */
+ if (*optarg == '-') {
+ optarg++;
+ }
+
+ /* collect whole number of seconds, if any */
+ if (*optarg == '.') { /* only fractional (i.e., .5 is ok) */
+ val = 0;
+ frac = optarg;
+ } else {
+ val = strtol(optarg, &frac, 10);
+ if (frac == NULL || frac == optarg || val == LONG_MIN || val == LONG_MAX) {
+ fprintf(stderr, "1: editcap: \"%s\" isn't a valid rel time value\n",
+ optarg);
+ exit(1);
+ }
+ if (val < 0) { /* implies '--' since we caught '-' above */
+ fprintf(stderr, "2: editcap: \"%s\" isn't a valid rel time value\n",
+ optarg);
+ exit(1);
+ }
+ }
+ relative_time_window.secs = val;
+
+ /* now collect the partial seconds, if any */
+ if (*frac != '\0') { /* chars left, so get fractional part */
+ val = strtol(&(frac[1]), &end, 10);
+ if (*frac != '.' || end == NULL || end == frac
+ || val < 0 || val > ONE_BILLION || val == LONG_MIN || val == LONG_MAX) {
+ fprintf(stderr, "3: editcap: \"%s\" isn't a valid rel time value\n",
+ optarg);
+ exit(1);
+ }
+ }
+ else {
+ return; /* no fractional digits */
+ }
+
+ /* adjust fractional portion from fractional to numerator
+ * e.g., in "1.5" from 5 to 500000000 since .5*10^9 = 500000000 */
+ if (frac && end) { /* both are valid */
+ frac_digits = end - frac - 1; /* fractional digit count (remember '.') */
+ while(frac_digits < 9) { /* this is frac of 10^9 */
+ val *= 10;
+ frac_digits++;
+ }
+ }
+ relative_time_window.nsecs = val;
+}
+
static gboolean
is_duplicate(guint8* fd, guint32 len) {
int i;
md5_state_t ms;
- cur_dup++;
- if (cur_dup >= DUP_DEPTH)
- cur_dup = 0;
+ cur_dup_entry++;
+ if (cur_dup_entry >= dup_window)
+ cur_dup_entry = 0;
/* Calculate our digest */
md5_init(&ms);
md5_append(&ms, fd, len);
- md5_finish(&ms, fd_hash[cur_dup].digest);
+ md5_finish(&ms, fd_hash[cur_dup_entry].digest);
- fd_hash[cur_dup].len = len;
+ fd_hash[cur_dup_entry].len = len;
/* Look for duplicates */
- for (i = 0; i < DUP_DEPTH; i++) {
- if (i == cur_dup)
+ for (i = 0; i < dup_window; i++) {
+ if (i == cur_dup_entry)
continue;
- if (fd_hash[i].len == fd_hash[cur_dup].len &&
- memcmp(fd_hash[i].digest, fd_hash[cur_dup].digest, 16) == 0) {
+ if (fd_hash[i].len == fd_hash[cur_dup_entry].len &&
+ memcmp(fd_hash[i].digest, fd_hash[cur_dup_entry].digest, 16) == 0) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static gboolean
+is_duplicate_rel_time(guint8* fd, guint32 len, const nstime_t *current) {
+ int i;
+ md5_state_t ms;
+
+ cur_dup_entry++;
+ if (cur_dup_entry >= dup_window)
+ cur_dup_entry = 0;
+
+ /* Calculate our digest */
+ md5_init(&ms);
+ md5_append(&ms, fd, len);
+ md5_finish(&ms, fd_hash[cur_dup_entry].digest);
+
+ fd_hash[cur_dup_entry].len = len;
+ fd_hash[cur_dup_entry].time.secs = current->secs;
+ fd_hash[cur_dup_entry].time.nsecs = current->nsecs;
+
+ /*
+ * Look for relative time related duplicates.
+ * This is hopefully a reasonably efficient mechanism for
+ * finding duplicates by rel time in the fd_hash[] cache.
+ * We check starting from the most recently added hash
+ * entries and work backwards towards older packets.
+ * This approach allows the dup test to be terminated
+ * when the relative time of a cached entry is found to
+ * be beyond the dup time window.
+ *
+ * Of course this assumes that the input trace file is
+ * "well-formed" in the sense that the packet timestamps are
+ * in strict chronologically increasing order (which is NOT
+ * always the case!!).
+ *
+ * The fd_hash[] table was deliberatly created large (1,000,000).
+ * Looking for time related duplicates in large trace files with
+ * non-fractional dup time window values can potentially take
+ * a long time to complete.
+ */
+
+ for (i = cur_dup_entry - 1;; i--) {
+ nstime_t delta;
+ int cmp;
+
+ if (i < 0) {
+ i = dup_window - 1;
+ }
+
+ if (i == cur_dup_entry) {
+ /*
+ * We've decremented back to where we started.
+ * Check no more!
+ */
+ break;
+ }
+
+ if (nstime_is_unset(&(fd_hash[i].time))) {
+ /*
+ * We've decremented to an unused fd_hash[] entry.
+ * Check no more!
+ */
+ break;
+ }
+
+ nstime_delta(&delta, current, &fd_hash[i].time);
+
+ if(delta.secs < 0 || delta.nsecs < 0)
+ {
+ /*
+ * A negative delta implies that the current packet
+ * has an absolute timestamp less than the cached packet
+ * that it is being compared to. This is NOT a normal
+ * situation since trace files usually have packets in
+ * chronological order (oldest to newest).
+ *
+ * There are several possible ways to deal with this:
+ * 1. 'continue' dup checking with the next cached frame.
+ * 2. 'break' from looking for a duplicate of the current frame.
+ * 3. Take the absolute value of the delta and see if that
+ * falls within the specifed dup time window.
+ *
+ * Currently this code does option 1. But it would pretty
+ * easy to add yet-another-editcap-option to select one of
+ * the other behaviors for dealing with out-of-sequence
+ * packets.
+ */
+ continue;
+ }
+
+ cmp = nstime_cmp(&delta, &relative_time_window);
+
+ if(cmp > 0) {
+ /*
+ * The delta time indicates that we are now looking at
+ * cached packets beyond the specified dup time window.
+ * Check no more!
+ */
+ break;
+ } else if (fd_hash[i].len == fd_hash[cur_dup_entry].len &&
+ memcmp(fd_hash[i].digest, fd_hash[cur_dup_entry].digest, 16) == 0) {
return TRUE;
}
}
@@ -317,7 +495,22 @@ usage(void)
fprintf(stderr, " given time (format as YYYY-MM-DD hh:mm:ss)\n");
fprintf(stderr, " -B <stop time> don't output packets whose timestamp is after the\n");
fprintf(stderr, " given time (format as YYYY-MM-DD hh:mm:ss)\n");
- fprintf(stderr, " -d remove duplicate packets\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Duplicate packet removal:\n");
+ fprintf(stderr, " -d remove packet if duplicate (window == %d).\n", DEFAULT_DUP_DEPTH);
+ fprintf(stderr, " -D <dup window> remove packet if duplicate, configurable <dup window>.\n");
+ fprintf(stderr, " Valid <dup window> values are 0 to %d.\n", MAX_DUP_DEPTH);
+ fprintf(stderr, " NOTE: A <dup window> of 0 with -v (verbose option) is\n");
+ fprintf(stderr, " useful to print MD5 hashes.\n");
+ fprintf(stderr, " -w <dup time window> remove packet if duplicate packet is found EQUAL TO OR\n");
+ fprintf(stderr, " LESS THAN <dup time window> prior to current packet.\n");
+ fprintf(stderr, " A <dup time window> is specified in relative seconds\n");
+ fprintf(stderr, " (e.g. 0.000001)\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " NOTE: The use of the 'Duplicate packet removal' options with\n");
+ fprintf(stderr, " other editcap options except -v may not always work as expected.\n");
+ fprintf(stderr, " Specifically the -r and -t options will very likely NOT have the\n");
+ fprintf(stderr, " desired effect if combined with the -d, -D or -w.\n");
fprintf(stderr, "\n");
fprintf(stderr, "Packet manipulation:\n");
fprintf(stderr, " -s <snaplen> truncate each packet to max. <snaplen> bytes of data\n");
@@ -343,6 +536,9 @@ usage(void)
fprintf(stderr, "Miscellaneous:\n");
fprintf(stderr, " -h display this help and exit\n");
fprintf(stderr, " -v verbose output\n");
+ fprintf(stderr, " If -v is used with any of the 'Duplicate Packet\n");
+ fprintf(stderr, " Removal' options (-d, -D or -w) then Packet lengths\n");
+ fprintf(stderr, " and MD5 hashes are printed to standard-out.\n");
fprintf(stderr, "\n");
}
@@ -399,6 +595,7 @@ main(int argc, char *argv[])
unsigned int choplen = 0; /* No chop */
wtap_dumper *pdh;
int count = 1;
+ unsigned duplicate_count = 0;
gint64 data_offset;
struct wtap_pkthdr snap_phdr;
const struct wtap_pkthdr *phdr;
@@ -434,7 +631,7 @@ main(int argc, char *argv[])
#endif
/* Process the options */
- while ((opt = getopt(argc, argv, "A:B:c:C:dE:F:hrs:i:t:T:v")) !=-1) {
+ while ((opt = getopt(argc, argv, "A:B:c:C:dD:E:F:hrs:i:t:T:vw:")) !=-1) {
switch (opt) {
@@ -483,10 +680,31 @@ main(int argc, char *argv[])
case 'd':
dup_detect = TRUE;
- for (i = 0; i < DUP_DEPTH; i++) {
- memset(&fd_hash[i].digest, 0, 16);
- fd_hash[i].len = 0;
+ dup_detect_by_time = FALSE;
+ dup_window = DEFAULT_DUP_DEPTH;
+ break;
+
+ case 'D':
+ dup_detect = TRUE;
+ dup_detect_by_time = FALSE;
+ dup_window = strtol(optarg, &p, 10);
+ if (p == optarg || *p != '\0') {
+ fprintf(stderr, "editcap: \"%s\" isn't a valid dupicate window value\n",
+ optarg);
+ exit(1);
}
+ if (dup_window < 0 || dup_window > MAX_DUP_DEPTH) {
+ fprintf(stderr, "editcap: \"%d\" duplicate window value must be between 0 and %d inclusive.\n",
+ dup_window, MAX_DUP_DEPTH);
+ exit(1);
+ }
+ break;
+
+ case 'w':
+ dup_detect = FALSE;
+ dup_detect_by_time = TRUE;
+ dup_window = MAX_DUP_DEPTH;
+ set_rel_time(optarg);
break;
case '?': /* Bad options if GNU getopt */
@@ -686,6 +904,14 @@ main(int argc, char *argv[])
if (add_selection(argv[i]) == FALSE)
break;
+ if (dup_detect || dup_detect_by_time) {
+ for (i = 0; i < dup_window; i++) {
+ memset(&fd_hash[i].digest, 0, 16);
+ fd_hash[i].len = 0;
+ nstime_set_unset(&fd_hash[i].time);
+ }
+ }
+
while (wtap_read(wth, &err, &err_info, &data_offset)) {
if (secs_per_block > 0) {
@@ -752,7 +978,7 @@ main(int argc, char *argv[])
if ( ((check_startstop && check_ts) || (!check_startstop && !check_ts)) && ((!selected(count) && !keep_em) ||
(selected(count) && keep_em)) ) {
- if (verbose)
+ if (verbose && !dup_detect && !dup_detect_by_time)
printf("Packet: %u\n", count);
/* We simply write it, perhaps after truncating it; we could do other
@@ -805,13 +1031,59 @@ main(int argc, char *argv[])
phdr = &snap_phdr;
}
+ /* suppress duplicates by packet window */
if (dup_detect) {
buf = wtap_buf_ptr(wth);
if (is_duplicate(buf, phdr->caplen)) {
- if (verbose)
- printf("Skipping duplicate: %u\n", count);
+ if (verbose) {
+ fprintf(stdout, "Skipped: %u, Len: %u, MD5 Hash: ", count, phdr->caplen);
+ for (i = 0; i < 16; i++) {
+ fprintf(stdout, "%02x", (unsigned char)fd_hash[cur_dup_entry].digest[i]);
+ }
+ fprintf(stdout, "\n");
+ }
+ duplicate_count++;
+ count++;
+ continue;
+ } else {
+ if (verbose) {
+ fprintf(stdout, "Packet: %u, Len: %u, MD5 Hash: ", count, phdr->caplen);
+ for (i = 0; i < 16; i++) {
+ fprintf(stdout, "%02x", (unsigned char)fd_hash[cur_dup_entry].digest[i]);
+ }
+ fprintf(stdout, "\n");
+ }
+ }
+ }
+
+ /* suppress duplicates by time window */
+ if (dup_detect_by_time) {
+ nstime_t current;
+
+ current.secs = phdr->ts.secs;
+ current.nsecs = phdr->ts.nsecs;
+
+ buf = wtap_buf_ptr(wth);
+
+ if (is_duplicate_rel_time(buf, phdr->caplen, &current)) {
+ if (verbose) {
+ fprintf(stdout, "Skipped: %u, Len: %u, MD5 Hash: ", count, phdr->caplen);
+ for (i = 0; i < 16; i++) {
+ fprintf(stdout, "%02x", (unsigned char)fd_hash[cur_dup_entry].digest[i]);
+ }
+ fprintf(stdout, "\n");
+ }
+ duplicate_count++;
count++;
continue;
+ } else {
+ if (verbose) {
+ fprintf(stdout, "Packet: %u, Len: %u, MD5 Hash: ", count, phdr->caplen);
+ for (i = 0; i < 16; i++) {
+ fprintf(stdout, "%02x", (unsigned char)fd_hash[cur_dup_entry].digest[i]);
+ }
+ fprintf(stdout, "\n");
+ }
}
}
@@ -902,6 +1174,17 @@ main(int argc, char *argv[])
}
}
+ if (dup_detect) {
+ fprintf(stdout, "%u packet%s seen, %u packet%s skipped with duplicate window of %u packets.\n",
+ count - 1, plurality(count - 1, "", "s"),
+ duplicate_count, plurality(duplicate_count, "", "s"), dup_window);
+ } else if (dup_detect_by_time) {
+ fprintf(stdout, "%u packet%s seen, %u packet%s skipped with duplicate time window equal to or less than %ld.%09ld seconds.\n",
+ count - 1, plurality(count - 1, "", "s"),
+ duplicate_count, plurality(duplicate_count, "", "s"),
+ (long)relative_time_window.secs, (long int)relative_time_window.nsecs);
+ }
+
return 0;
}