diff options
author | David Perry <boolean263@protonmail.com> | 2021-01-08 09:18:39 +0000 |
---|---|---|
committer | AndersBroman <a.broman58@gmail.com> | 2021-01-08 09:18:39 +0000 |
commit | b758fdaede7960db9f86786b91c7b23e80210e6f (patch) | |
tree | 5e5d61dc5da72185a540f0abe4efdbf9224c57f6 | |
parent | 73cef353bf4bf6ed5b59af2089112e0144fd9b94 (diff) |
Add iso8601_to_nstime() for editcap and nettrace
This adds a function to parse a string date-time in ISO 8601 format into
a `nstime_t` structure. It's based on code from epan/tvbuff.c and
wiretap/nettrace_3gpp_32_423.c and meant to eventually replace both.
(Currently only replaces the latter.)
Since most of Wireshark expects ISO 8601 date-times to fit a fairly
strict pattern, iso8601_to_nstime() currently rejects date-times without
separators between the components, even though ISO 8601 actually permits
this. This could be revisited later.
Also uses iso8601_to_nstime in editcap to parse the -A/-B options,
thus allowing the user to specify a time zone if desired. (See #17110)
-rw-r--r-- | debian/libwsutil0.symbols | 2 | ||||
-rw-r--r-- | doc/editcap.pod | 14 | ||||
-rw-r--r-- | editcap.c | 88 | ||||
-rw-r--r-- | wiretap/nettrace_3gpp_32_423.c | 121 | ||||
-rw-r--r-- | wsutil/nstime.c | 189 | ||||
-rw-r--r-- | wsutil/nstime.h | 6 | ||||
-rw-r--r-- | wsutil/time_util.c | 41 | ||||
-rw-r--r-- | wsutil/time_util.h | 12 |
8 files changed, 279 insertions, 194 deletions
diff --git a/debian/libwsutil0.symbols b/debian/libwsutil0.symbols index f180cfb8a4..2aa4359818 100644 --- a/debian/libwsutil0.symbols +++ b/debian/libwsutil0.symbols @@ -104,6 +104,7 @@ libwsutil.so.0 libwsutil0 #MINVER# init_report_message@Base 2.3.0 is_default_profile@Base 1.12.0~rc1 isdigit_string@Base 1.10.0 + iso8601_to_nstime@Base 3.5.0 isprint_string@Base 1.10.0 isprint_utf8_string@Base 2.6.1 json_decode_string_inplace@Base 2.9.0 @@ -181,6 +182,7 @@ libwsutil.so.0 libwsutil0 #MINVER# started_with_special_privs@Base 1.10.0 test_for_directory@Base 1.12.0~rc1 test_for_fifo@Base 1.12.0~rc1 + tm_is_valid@Base 3.5.0 type_util_gdouble_to_guint64@Base 1.10.0 type_util_guint64_to_gdouble@Base 1.10.0 ulaw2linear@Base 1.12.0~rc1 diff --git a/doc/editcap.pod b/doc/editcap.pod index dcd7aae81c..402c710176 100644 --- a/doc/editcap.pod +++ b/doc/editcap.pod @@ -101,14 +101,20 @@ strings that include spaces. =item -A E<lt>start timeE<gt> Reads only the packets whose timestamp is on or after start time. -The time is given in the following format YYYY-MM-DD HH:MM:SS[.nnnnnnnnn] -(the decimal and fractional seconds are optional). +The time is given in ISO 8601 format, either +YYYY-MM-DD HH:MM:SS[.nnnnnnnnn][Z|±hh:mm] or +YYYY-MM-DDTHH:MM:SS[.nnnnnnnnn][Z|±hh:mm] . +The fractional seconds are optional, as is the time zone offset from UTC +(in which case local time is assumed). =item -B E<lt>stop timeE<gt> Reads only the packets whose timestamp is before stop time. -The time is given in the following format YYYY-MM-DD HH:MM:SS[.nnnnnnnnn] -(the decimal and fractional seconds are optional). +The time is given in ISO 8601 format, either +YYYY-MM-DD HH:MM:SS[.nnnnnnnnn][Z|±hh:mm] or +YYYY-MM-DDTHH:MM:SS[.nnnnnnnnn][Z|±hh:mm] . +The fractional seconds are optional, as is the time zone offset from UTC +(in which case local time is assumed). =item -c E<lt>packets per fileE<gt> @@ -759,9 +759,11 @@ print_usage(FILE *output) fprintf(output, "Packet selection:\n"); fprintf(output, " -r keep the selected packets; default is to delete them.\n"); fprintf(output, " -A <start time> only read packets whose timestamp is after (or equal\n"); - fprintf(output, " to) the given time (format as YYYY-MM-DD hh:mm:ss[.nnnnnnnnn]).\n"); + fprintf(output, " to) the given time.\n"); fprintf(output, " -B <stop time> only read packets whose timestamp is before the\n"); - fprintf(output, " given time (format as YYYY-MM-DD hh:mm:ss[.nnnnnnnnn]).\n"); + fprintf(output, " given time.\n"); + fprintf(output, " Time format for -A/-B options is\n"); + fprintf(output, " YYYY-MM-DDThh:mm:ss[.nnnnnnnnn][Z|+-hh:mm]\n"); fprintf(output, "\n"); fprintf(output, "Duplicate packet removal:\n"); fprintf(output, " --novlan remove vlan info from packets before checking for duplicates.\n"); @@ -1279,77 +1281,25 @@ main(int argc, char *argv[]) case 'A': case 'B': { -#define NSEC_MAXLEN 9 - struct tm st_tm; - guint32 nsec = 0; - char *och; + nstime_t in_time; - memset(&st_tm,0,sizeof(struct tm)); - - if (!(och=strptime(optarg,"%Y-%m-%d %T", &st_tm))) { - goto invalid_time; - } - - /* Sub-second support: see if the time is followed by a '.' */ - if (och != NULL && *och != '\0') { - char *c; - char subsec[NSEC_MAXLEN+1] = ""; - int nchars; - - if (*och != '.') { - goto invalid_time; - } - och++; - c = subsec; - - /* Ensure that only 1-9 digits follow the '.' */ - for (nchars = 0; *och != '\0' && nchars < NSEC_MAXLEN; nchars++) { - if (!g_ascii_isdigit(*och)) { - goto invalid_time; - } - *c++ = *och++; - } - if (*och != '\0') { - goto invalid_time; - } - /* Right-pad what we do have, so eg. 5 = 500,000,000 ns */ - for (; nchars < NSEC_MAXLEN; nchars++) { - *c++ = '0'; - } - *c = '\0'; - if (!ws_strtou32(subsec, NULL, &nsec) || nsec >= NANOSECS_PER_SEC) { - goto invalid_time; + check_startstop = TRUE; + if (0 < iso8601_to_nstime(&in_time, optarg)) { + if (opt == 'A') { + nstime_copy(&starttime, &in_time); + have_starttime = TRUE; + } else { + nstime_copy(&stoptime, &in_time); + have_stoptime = TRUE; } + break; } - - check_startstop = TRUE; - st_tm.tm_isdst = -1; - - /* - * XXX - this will normalize invalid dates rather than - * returning an error, so you could specify, for example, - * 2020-10-40 (to quote the macOS and probably *BSD manual - * page for ctime()/localtime()/mktime()/etc., "October 40 - * is changed into November 9"). - * - * Is that a bug or a feature? - */ - if (opt == 'A') { - starttime.secs = mktime(&st_tm); - starttime.nsecs = nsec; - have_starttime = TRUE; - } else { - stoptime.secs = mktime(&st_tm); - stoptime.nsecs = nsec; - have_stoptime = TRUE; + else { + fprintf(stderr, "editcap: \"%s\" isn't a valid date and time\n\n", + optarg); + ret = INVALID_OPTION; + goto clean_exit; } - break; - -invalid_time: - fprintf(stderr, "editcap: \"%s\" isn't a valid date and time\n\n", - optarg); - ret = INVALID_OPTION; - goto clean_exit; } case 'c': diff --git a/wiretap/nettrace_3gpp_32_423.c b/wiretap/nettrace_3gpp_32_423.c index 825b7c1805..6fa13b7f62 100644 --- a/wiretap/nettrace_3gpp_32_423.c +++ b/wiretap/nettrace_3gpp_32_423.c @@ -762,125 +762,6 @@ nettrace_close(wtap *wth) } } -/* This attribute specification contains a timestamp that refers to the start of the -* first trace data that is stored in this file. -* -* It is a complete timestamp including day, time and delta UTC hour. E.g. -* "2001-09-11T09:30:47-05:00". -*/ - -#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0)) - -static char* -nettrace_parse_begin_time(char *curr_pos, size_t n, nstime_t *ts) -{ - /* Time vars */ - guint year, month, day, hour, minute, second, frac; - int UTCdiffh, UTCdiffm = 0; - int time_length = 0; - int scan_found; - static const guint days_in_month[12] = { - 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 - }; - struct tm tm; - char *end_pos; - int length; - - nstime_set_unset(ts); /* mark time as invalid, until successful converted */ - - end_pos = g_strstr_len(curr_pos, n, "\"/>"); - length = (int)(end_pos - curr_pos); - - if (length < 2) { - return end_pos + 3; - } - - /* Scan for this format: 2001-09-11T09:30:47 Then we will parse any fractions and UTC offset */ - scan_found = sscanf(curr_pos, "%4u-%2u-%2uT%2u:%2u:%2u%n", - &year, &month, &day, &hour, &minute, &second, &time_length); - if (scan_found == 6 && time_length == 19) { - /* Fill in the fields and return it in a time_t */ - tm.tm_year = year - 1900; - if (month < 1 || month > 12) { - /* g_warning("Failed to parse time, month is %u", month); */ - return curr_pos; - } - tm.tm_mon = month - 1; /* Zero count*/ - if (day > ((month == 2 && isleap(year)) ? 29 : days_in_month[month - 1])) { - /* g_warning("Failed to parse time, %u-%02u-%2u is not a valid day", year, month, day); */ - return curr_pos; - } - tm.tm_mday = day; - if (hour > 23) { - /* g_warning("Failed to parse time, hour is %u", hour); */ - return curr_pos; - } - tm.tm_hour = hour; - if (minute > 59) { - /* g_warning("Failed to parse time, minute is %u", minute); */ - return curr_pos; - } - tm.tm_min = minute; - if (second > 60) { - /* - * Yes, 60, for leap seconds - POSIX's and Windows' - * refusal to believe in them nonwithstanding. - */ - /* g_warning("Failed to parse time, second is %u", second); */ - return curr_pos; - } - tm.tm_sec = second; - tm.tm_isdst = -1; /* daylight saving time info not known */ - - /* Move curr_pos to end of parsed object and get that character 2019-01-10T10:14:56 */ - curr_pos += time_length; - if (*curr_pos == '.' || *curr_pos == ',') { - /* We have fractions */ - curr_pos++; - if (1 == sscanf(curr_pos, "%u%n", &frac, &time_length)) { - if ((frac >= 1000000000) || (frac == 0)) { - ts->nsecs = 0; - } else { - switch (time_length) { /* including leading zeros */ - case 1: ts->nsecs = frac * 100000000; break; - case 2: ts->nsecs = frac * 10000000; break; - case 3: ts->nsecs = frac * 1000000; break; - case 4: ts->nsecs = frac * 100000; break; - case 5: ts->nsecs = frac * 10000; break; - case 6: ts->nsecs = frac * 1000; break; - case 7: ts->nsecs = frac * 100; break; - case 8: ts->nsecs = frac * 10; break; - default: ts->nsecs = frac; - } - } - curr_pos += time_length; - } - } - - if (*curr_pos == '-' || *curr_pos == '+' || *curr_pos == 'Z') { - /* We have UTC offset */ - if (1 <= sscanf(curr_pos, "%3d:%2d", &UTCdiffh, &UTCdiffm)) { - /* adjust for timezone */ - tm.tm_hour -= UTCdiffh; - tm.tm_min -= UTCdiffh < 0 ? -UTCdiffm: UTCdiffm; - } /* else 'Z' for Zero time */ - /* convert to UTC time */ -#ifdef _WIN32 - ts->secs = _mkgmtime(&tm); -#else - ts->secs = timegm(&tm); -#endif - } else { - /* no UTC offset means localtime in ISO 8601 */ - ts->secs = mktime(&tm); - } - /* } else { - g_warning("Failed to parse time, only %u fields", scan_found); */ - } - - return curr_pos; -} - /* Test the current file to see if it's one we can read. * Set in file_access.c as the function to be called for this file type. */ @@ -929,7 +810,7 @@ nettrace_3gpp_32_423_file_open(wtap *wth, int *err, gchar **err_info) /* Ok it's our file. From here we'll need to free memory */ file_info = g_new0(nettrace_3gpp_32_423_file_info_t, 1); - curr_pos = nettrace_parse_begin_time(curr_pos, (guint)(bytes_read - (curr_pos - magic_buf)), &file_info->start_time); + curr_pos += iso8601_to_nstime(&file_info->start_time, curr_pos); file_info->start_offset = start_offset + (curr_pos - magic_buf); file_info->buffer = g_byte_array_sized_new(RINGBUFFER_START_SIZE); g_byte_array_append(file_info->buffer, curr_pos, (guint)(bytes_read - (curr_pos - magic_buf))); diff --git a/wsutil/nstime.c b/wsutil/nstime.c index 16d6e0adcc..a2939a4c30 100644 --- a/wsutil/nstime.c +++ b/wsutil/nstime.c @@ -10,9 +10,12 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ +#include <stdio.h> +#include <string.h> #include <glib.h> #include "nstime.h" #include "epochs.h" +#include "time_util.h" /* this is #defined so that we can clearly see that we have the right number of zeros, rather than as a guard against the number of nanoseconds in a second @@ -265,6 +268,192 @@ nsfiletime_to_nstime(nstime_t *nstime, guint64 nsfiletime) } /* + * function: iso8601_to_nstime + * parses a character string for a date and time given in + * ISO 8601 date-time format (eg: 2014-04-07T05:41:56.782+00:00) + * and converts to an nstime_t + * returns number of chars parsed on success, or 0 on failure + * + * NB. ISO 8601 is actually a lot more flexible than the above format, + * much to a developer's chagrin. The -/T/: separators are technically + * optional. + * Code is here to allow for that, but short-circuited for now since + * our callers assume they're there. + * + * Future improvements could parse other ISO 8601 formats, such as + * YYYY-Www-D, YYYY-DDD, etc. For a relatively easy introduction to + * these formats, see wikipedia: https://en.wikipedia.org/wiki/ISO_8601 + */ +guint8 +iso8601_to_nstime(nstime_t *nstime, const char *ptr) +{ + struct tm tm; + gint n_scanned = 0; + gint n_chars = 0; + guint frac = 0; + gint off_hr = 0; + gint off_min = 0; + guint8 ret_val = 0; + const char *start = ptr; + gboolean has_separator = FALSE; + gboolean have_offset = FALSE; + + memset(&tm, 0, sizeof(tm)); + tm.tm_isdst = -1; + nstime_set_unset(nstime); + + /* There may be 2 or 0 dashes between the date parts */ + has_separator = (*(ptr+4) == '-'); + + /* For now we require the separator to remove ambiguity */ + if (!has_separator) return 0; + + /* Note: sscanf is known to be inconsistent across platforms with respect + to whether a %n is counted as a return value or not, so we use '<'/'>=' + */ + n_scanned = sscanf(ptr, has_separator ? "%4u-%2u-%2u%n" : "%4u%2u%2u%n", + &tm.tm_year, + &tm.tm_mon, + &tm.tm_mday, + &n_chars); + if (n_scanned >= 3) { + /* Got year, month, and day */ + tm.tm_mon--; /* struct tm expects 0-based month */ + tm.tm_year -= 1900; /* struct tm expects number of years since 1900 */ + ptr += n_chars; + } + else { + return 0; + } + + if (*ptr == 'T' || *ptr == ' ') { + /* The 'T' between date and time is optional if the meaning is + unambiguous. We also allow for ' ' here to support formats + such as editcap's -A/-B options */ + ptr++; + } + else { + /* For now we require the separator to remove ambiguity; + remove this entire 'else' when we wish to change that */ + return 0; + } + + /* Now we're on to the time part. We'll require a minimum of hours and + minutes. + Test for a possible ':' */ + has_separator = (*(ptr+2) == ':'); + if (!has_separator) return 0; + + n_scanned = sscanf(ptr, has_separator ? "%2u:%2u%n" : "%2u%2u%n", + &tm.tm_hour, + &tm.tm_min, + &n_chars); + if (n_scanned >= 2) { + ptr += n_chars; + } + else { + /* didn't get hours and minutes */ + return 0; + } + + /* Test for (whole) seconds */ + if ((has_separator && *ptr == ':') || + (!has_separator && g_ascii_isdigit(*ptr))) { + /* Looks like we should have them */ + if (1 > sscanf(ptr, has_separator ? ":%2u%n" : "%2u%n", + &tm.tm_sec, &n_chars)) { + /* Couldn't get them */ + return 0; + } + ptr += n_chars; + + /* Now let's test for fractional seconds */ + if (*ptr == '.' || *ptr == ',') { + /* Get fractional seconds */ + ptr++; + if (1 <= sscanf(ptr, "%u%n", &frac, &n_chars)) { + /* normalize frac to nanoseconds */ + if ((frac >= 1000000000) || (frac == 0)) { + frac = 0; + } else { + switch (n_chars) { /* including leading zeros */ + case 1: frac *= 100000000; break; + case 2: frac *= 10000000; break; + case 3: frac *= 1000000; break; + case 4: frac *= 100000; break; + case 5: frac *= 10000; break; + case 6: frac *= 1000; break; + case 7: frac *= 100; break; + case 8: frac *= 10; break; + default: break; + } + } + ptr += n_chars; + } + /* If we didn't get frac, it's still its default of 0 */ + } + } + else { + tm.tm_sec = 0; + } + + /* Validate what we got so far. mktime() doesn't care about strange + values (and we use this to our advantage when calculating the + time zone offset) but we should at least start with something valid */ + if (!tm_is_valid(&tm)) { + return 0; + } + + /* Check for a time zone offset */ + if (*ptr == '-' || *ptr == '+' || *ptr == 'Z') { + /* We have a UTC-relative offset */ + if (*ptr == 'Z') { + off_hr = off_min = n_scanned = 0; + have_offset = TRUE; + ptr++; + } + else { + has_separator = (*(ptr+3) == ':'); + if (!has_separator) return 0; + n_scanned = sscanf(ptr, has_separator ? "%3d:%2d%n" : "%3d%2d%n", + &off_hr, + &off_min, + &n_chars); + if (n_scanned >= 1) { + /* Definitely got hours */ + have_offset = TRUE; + if (n_scanned >= 2) { + /* Got minutes too */ + ptr += n_chars; + } + else { + /* Only got hours, just move ptr past the +hh or whatever */ + off_min = 0; + ptr += 3; + } + } + else { + /* Didn't get a valid offset, treat as if there's none at all */ + off_hr = off_min = n_scanned = 0; + have_offset = FALSE; + } + } + } + if (have_offset) { + tm.tm_hour -= off_hr; + tm.tm_min -= (off_hr < 0 ? -off_min : off_min); + nstime->secs = mktime_utc(&tm); + } + else { + /* No UTC offset given; ISO 8601 says this means localtime */ + nstime->secs = mktime(&tm); + } + nstime->nsecs = frac; + ret_val = (guint)(ptr-start); + return ret_val; +} + +/* * Editor modelines * * Local Variables: diff --git a/wsutil/nstime.h b/wsutil/nstime.h index 6c50e1fb92..2292b6f811 100644 --- a/wsutil/nstime.h +++ b/wsutil/nstime.h @@ -11,6 +11,7 @@ #ifndef __NSTIME_H__ #define __NSTIME_H__ +#include <glib.h> #include <time.h> #include "ws_symbol_export.h" @@ -122,6 +123,11 @@ WS_DLL_PUBLIC gboolean filetime_to_nstime(nstime_t *nstime, guint64 filetime); FALSE on failure */ WS_DLL_PUBLIC gboolean nsfiletime_to_nstime(nstime_t *nstime, guint64 nsfiletime); +/** parse an ISO 8601 format datetime string to nstime, returns number of + chars parsed on success, 0 on failure. + Note that nstime is set to unset in the case of failure */ +WS_DLL_PUBLIC guint8 iso8601_to_nstime(nstime_t *nstime, const char *ptr); + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/wsutil/time_util.c b/wsutil/time_util.c index 98df723a9b..a842acbaac 100644 --- a/wsutil/time_util.c +++ b/wsutil/time_util.c @@ -23,6 +23,9 @@ #include <windows.h> #endif +/* Test if the given year is a leap year */ +#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0)) + /* converts a broken down date representation, relative to UTC, * to a timestamp; it uses timegm() if it's available. * Copied from Glib source gtimer.c @@ -47,7 +50,7 @@ mktime_utc(struct tm *tm) /* count number of leap years */ yr = tm->tm_year + 1900; - if (tm->tm_mon + 1 < 3 && (yr % 4) == 0 && ((yr % 100) != 0 || (yr % 400) == 0)) + if (tm->tm_mon + 1 < 3 && isleap(yr)) yr--; retval += (((yr / 4) - (yr / 100) + (yr / 400)) - 477); /* 477 = ((1970 / 4) - (1970 / 100) + (1970 / 400)) */ @@ -61,6 +64,42 @@ mktime_utc(struct tm *tm) #endif /* !HAVE_TIMEGM */ } +/* Validate the values in a time_t + * Currently checks tm_year, tm_mon, tm_mday, tm_hour, tm_min, and tm_sec; + * disregards tm_wday, tm_yday, and tm_isdst. + * Use this in situations where you wish to return an error rather than + * normalizing invalid dates; otherwise you could specify, for example, + * 2020-10-40 (to quote the macOS and probably *BSD manual + * page for ctime()/localtime()/mktime()/etc., "October 40 + * is changed into November 9"). + */ +gboolean +tm_is_valid(struct tm *tm) +{ + static const gint8 days_in_month[12] = { + 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 + }; + + if (tm->tm_mon < 0 || tm->tm_mon > 11) { + return FALSE; + } + if (tm->tm_mday < 0 || tm->tm_mday > + ((tm->tm_mon == 1 && isleap(tm->tm_year)) ? 29 : days_in_month[tm->tm_mon])) { + return FALSE; + } + if (tm->tm_hour < 0 || tm->tm_hour > 23) { + return FALSE; + } + if (tm->tm_min < 0 || tm->tm_min > 59) { + return FALSE; + } + if (tm->tm_sec < 0 || tm->tm_sec > 60) { + /* 60, not 59, to account for leap seconds */ + return FALSE; + } + return TRUE; +} + void get_resource_usage(double *user_time, double *sys_time) { #ifndef _WIN32 struct rusage ru; diff --git a/wsutil/time_util.h b/wsutil/time_util.h index 1cd430dedf..99d97d662e 100644 --- a/wsutil/time_util.h +++ b/wsutil/time_util.h @@ -18,9 +18,21 @@ extern "C" { #include <time.h> +/** Converts a broken down date representation, relative to UTC, + * to a timestamp + */ WS_DLL_PUBLIC time_t mktime_utc(struct tm *tm); +/** Validate the values in a time_t. + * Currently checks tm_year, tm_mon, tm_mday, tm_hour, tm_min, and tm_sec; + * disregards tm_wday, tm_yday, and tm_isdst. + * + * @param tm The struct tm to validate. + */ +WS_DLL_PUBLIC +gboolean tm_is_valid(struct tm *tm); + /** Fetch the process CPU time. * * Fetch the current process user and system CPU times, convert them to |