aboutsummaryrefslogtreecommitdiffstats
path: root/wiretap
diff options
context:
space:
mode:
authorGuy Harris <guy@alum.mit.edu>2013-12-30 06:43:19 +0000
committerGuy Harris <guy@alum.mit.edu>2013-12-30 06:43:19 +0000
commit5946c5236bc36965bd72911c6c4a1d8f432ad707 (patch)
treed127339df7c403480345f51c0d26c4e0104e1d39 /wiretap
parent8724d5de0f9905cf2cfa20a0bff1cc6625c78587 (diff)
For file formats that don't have magic numbers, so they're recognized by
heuristics, but do have a file extension that files of that format are likely to have, use the extension of the file we're opening, if it has one, as a hint for which heuristics to try first. svn path=/trunk/; revision=54495
Diffstat (limited to 'wiretap')
-rw-r--r--wiretap/file_access.c386
-rw-r--r--wiretap/wtap.h54
2 files changed, 375 insertions, 65 deletions
diff --git a/wiretap/file_access.c b/wiretap/file_access.c
index 0987ea0b66..ecfc038d00 100644
--- a/wiretap/file_access.c
+++ b/wiretap/file_access.c
@@ -299,21 +299,15 @@ GSList *wtap_get_all_file_extensions_list(void)
* However, the caller does have to free the private data pointer when
* returning 0, since the next file type will be called and will likely
* just overwrite the pointer.
- *
- * Put the trace files that are merely saved telnet-sessions last, since it's
- * possible that you could have captured someone a router telnet-session
- * using another tool. So, a libpcap trace of an toshiba "snoop" session
- * should be discovered as a libpcap file, not a toshiba file.
*/
-static wtap_open_routine_t open_routines_base[] = {
- /* Files that have magic bytes in fixed locations. These
- * are easy to identify.
- */
+/* Files that have magic bytes in fixed locations. These
+ * are easy to identify. Only an open routine is needed.
+ */
+static const wtap_open_routine_t magic_number_open_routines_base[] = {
libpcap_open,
pcapng_open,
- lanalyzer_open,
ngsniffer_open,
snoop_open,
iptrace_open,
@@ -328,24 +322,65 @@ static wtap_open_routine_t open_routines_base[] = {
dbs_etherwatch_open,
k12_open,
catapult_dct2000_open,
- ber_open, /* XXX - this is really a heuristic */
aethra_open,
btsnoop_open,
eyesdn_open,
- packetlogger_open, /* This type does not have a magic number, but its
- * files are sometimes grabbed by mpeg_open. */
- mpeg_open,
tnef_open,
- dct3trace_open,
- daintree_sna_open,
- mime_file_open,
- stanag4607_open,
- /* Files that don't have magic bytes at a fixed location,
- * but that instead require a heuristic of some sort to
- * identify them. This includes the ASCII trace files that
- * would be, for example, saved copies of a Telnet session
- * to some box.
+};
+#define N_MAGIC_FILE_TYPES (sizeof magic_number_open_routines_base / sizeof magic_number_open_routines_base[0])
+
+static wtap_open_routine_t* magic_number_open_routines = NULL;
+
+static GArray* magic_number_open_routines_arr = NULL;
+
+/*
+ * Initialize the magic-number open routines array if it has not been
+ * initialized yet.
+ */
+static void init_magic_number_open_routines(void) {
+
+ if (magic_number_open_routines_arr) return;
+
+ magic_number_open_routines_arr = g_array_new(FALSE,TRUE,sizeof(wtap_open_routine_t));
+
+ g_array_append_vals(magic_number_open_routines_arr,magic_number_open_routines_base,N_MAGIC_FILE_TYPES);
+
+ magic_number_open_routines = (wtap_open_routine_t*)(void *)magic_number_open_routines_arr->data;
+}
+
+void wtap_register_magic_number_open_routine(wtap_open_routine_t open_routine) {
+ init_magic_number_open_routines();
+
+ g_array_append_val(magic_number_open_routines_arr,open_routine);
+
+ magic_number_open_routines = (wtap_open_routine_t*)(void *)magic_number_open_routines_arr->data;
+}
+
+/* Files that don't have magic bytes at a fixed location,
+ * but that instead require a heuristic of some sort to
+ * identify them. This includes ASCII trace files.
+ *
+ * Entries for the ASCII trace files that would be, for example,
+ * saved copies of a Telnet session to some box are put after
+ * most of the other entries, as we don't want to treat a capture
+ * of such a session as a trace file from such a session
+ * merely because it has the right text in it. They still
+ * appear before the *really* weak entries, such as the VWR entry.
+ */
+static const struct heuristic_open_info heuristic_open_info_base[] = {
+ { lanalyzer_open, "tr1", },
+ /*
+ * PacketLogger must come before MPEG, because its files
+ * are sometimes grabbed by mpeg_open.
*/
+ { packetlogger_open, "pklg" },
+ /* Some MPEG files have magic numbers, others just have heuristics. */
+ { mpeg_open, "mpg;mp3" },
+ { dct3trace_open, "xml" },
+ { daintree_sna_open, "dcf" },
+ { mime_file_open, NULL },
+ { stanag4607_open, NULL },
+ { ber_open, NULL },
/* I put NetScreen *before* erf, because there were some
* false positives with my test-files (Sake Blok, July 2007)
@@ -359,55 +394,56 @@ static wtap_open_routine_t open_routines_base[] = {
* because there were some cases where files of those types were
* misidentified as vwr files (Guy Harris, December 2013)
*/
- netscreen_open,
- erf_open,
- ipfix_open,
- k12text_open,
- peekclassic_open,
- pppdump_open,
- iseries_open,
- ascend_open,
- toshiba_open,
- i4btrace_open,
- mp2t_open,
- csids_open,
- vms_open,
- cosine_open,
- hcidump_open,
- commview_open,
- nstrace_open,
- vwr_open,
- camins_open
+ { netscreen_open, "txt" },
+ { erf_open, "erf" },
+ { ipfix_open, "pfx;ipfix" },
+ { k12text_open, "txt" },
+ { peekclassic_open, "pkt;tpc;apc;wpz" },
+ { pppdump_open, NULL },
+ { iseries_open, "txt" },
+ { i4btrace_open, NULL },
+ { mp2t_open, "ts;mpg" },
+ { csids_open, NULL },
+ { vms_open, "txt" },
+ { cosine_open, "txt" },
+ { hcidump_open, NULL },
+ { commview_open, "ncf" },
+ { nstrace_open, "txt" },
+
+ /* ASCII trace files from Telnet sessions. */
+ { ascend_open, "txt" },
+ { toshiba_open, "txt" },
+
+ /* Extremely weak heuristics - put them at the end. */
+ { vwr_open, "vwr" },
+ { camins_open, "camins" },
};
+#define N_HEURISTIC_FILE_TYPES (sizeof heuristic_open_info_base / sizeof heuristic_open_info_base[0])
-#define N_FILE_TYPES (sizeof open_routines_base / sizeof open_routines_base[0])
-
-static wtap_open_routine_t* open_routines = NULL;
+static const struct heuristic_open_info* heuristic_open_info = NULL;
-static GArray* open_routines_arr = NULL;
+static GArray* heuristic_open_info_arr = NULL;
+/*
+ * Initialize the heuristics array if it has not been initialized yet.
+ */
+static void init_heuristic_open_info(void) {
-/* initialize the open routines array if it has not been initialized yet */
-static void init_open_routines(void) {
-
- if (open_routines_arr) return;
+ if (heuristic_open_info_arr) return;
- open_routines_arr = g_array_new(FALSE,TRUE,sizeof(wtap_open_routine_t));
+ heuristic_open_info_arr = g_array_new(FALSE,TRUE,sizeof(struct heuristic_open_info));
- g_array_append_vals(open_routines_arr,open_routines_base,N_FILE_TYPES);
+ g_array_append_vals(heuristic_open_info_arr,heuristic_open_info_base,N_HEURISTIC_FILE_TYPES);
- open_routines = (wtap_open_routine_t*)(void *)open_routines_arr->data;
+ heuristic_open_info = (const struct heuristic_open_info*)(void *)heuristic_open_info_arr->data;
}
-void wtap_register_open_routine(wtap_open_routine_t open_routine, gboolean has_magic) {
- init_open_routines();
+void wtap_register_heuristic_open_info(const struct heuristic_open_info *hi) {
+ init_heuristic_open_info();
- if (has_magic)
- g_array_prepend_val(open_routines_arr,open_routine);
- else
- g_array_append_val(open_routines_arr,open_routine);
+ g_array_append_val(heuristic_open_info_arr,*hi);
- open_routines = (wtap_open_routine_t*)(void *)open_routines_arr->data;
+ heuristic_open_info = (const struct heuristic_open_info*)(void *)heuristic_open_info_arr->data;
}
/*
@@ -429,6 +465,129 @@ void wtap_register_open_routine(wtap_open_routine_t open_routine, gboolean has_m
#define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR)
#endif
+static char *get_file_extension(const char *pathname)
+{
+ gchar *filename;
+ gchar **components;
+ size_t ncomponents;
+ GSList *compressed_file_extensions, *compressed_file_extension;
+ gchar *extensionp;
+
+ /*
+ * Is the pathname empty?
+ */
+ if (strcmp(pathname, "") == 0)
+ return NULL; /* no extension */
+
+ /*
+ * Find the last component of the pathname.
+ */
+ filename = g_path_get_basename(pathname);
+
+ /*
+ * Does it have an extension?
+ */
+ if (strchr(filename, '.') == NULL) {
+ g_free(filename);
+ return NULL; /* no extension whatsoever */
+ }
+
+ /*
+ * Yes. Split it into components separated by ".".
+ */
+ components = g_strsplit(filename, ".", 0);
+ g_free(filename);
+
+ /*
+ * Count the components.
+ */
+ for (ncomponents = 0; components[ncomponents] != NULL; ncomponents++)
+ ;
+
+ if (ncomponents == 0) {
+ g_strfreev(components);
+ return NULL; /* no components */
+ }
+ if (ncomponents == 1) {
+ g_strfreev(components);
+ return NULL; /* only one component, with no "." */
+ }
+
+ /*
+ * Is the last component one of the extensions used for compressed
+ * files?
+ */
+ compressed_file_extensions = wtap_get_compressed_file_extensions();
+ if (compressed_file_extensions == NULL) {
+ /*
+ * We don't support reading compressed files, so just
+ * return a copy of whatever extension we did find.
+ */
+ extensionp = g_strdup(components[ncomponents - 1]);
+ g_strfreev(components);
+ return extensionp;
+ }
+ extensionp = components[ncomponents - 1];
+ for (compressed_file_extension = compressed_file_extensions;
+ compressed_file_extension != NULL;
+ compressed_file_extension = g_slist_next(compressed_file_extension)) {
+ if (strcmp(extensionp, (char *)compressed_file_extension->data) == 0) {
+ /*
+ * Yes, it's one of the compressed-file extensions.
+ * Is there an extension before that?
+ */
+ if (ncomponents == 2) {
+ g_strfreev(components);
+ return NULL; /* no, only two components */
+ }
+
+ /*
+ * Yes, return that extension.
+ */
+ extensionp = g_strdup(components[ncomponents - 2]);
+ g_strfreev(components);
+ return extensionp;
+ }
+ }
+
+ /*
+ * The extension isn't one of the compressed-file extensions;
+ * return it.
+ */
+ extensionp = g_strdup(extensionp);
+ g_strfreev(components);
+ return extensionp;
+}
+
+gboolean heuristic_uses_extension(unsigned int i, const char *extension)
+{
+ gchar **extensions_set, **extensionp;
+
+ /*
+ * Does this file type *have* any extensions?
+ */
+ if (heuristic_open_info[i].extensions == NULL)
+ return FALSE; /* no */
+
+ /*
+ * Get a list of the extensions used by the specified file type.
+ */
+ extensions_set = g_strsplit(heuristic_open_info[i].extensions, ";", 0);
+
+ /*
+ * Check each of them against the specified extension.
+ */
+ for (extensionp = extensions_set; *extensionp != NULL;
+ extensionp++) {
+ if (strcmp(extension, *extensionp) == 0) {
+ g_strfreev(extensions_set);
+ return TRUE; /* it's one of them */
+ }
+ }
+ g_strfreev(extensions_set);
+ return FALSE; /* it's not one of them */
+}
+
/* Opens a file and prepares a wtap struct.
If "do_random" is TRUE, it opens the file twice; the second open
allows the application to do random-access I/O without moving
@@ -444,6 +603,7 @@ wtap* wtap_open_offline(const char *filename, int *err, char **err_info,
wtap *wth;
unsigned int i;
gboolean use_stdin = FALSE;
+ gchar *extension;
/* open standard input if filename is '-' */
if (strcmp(filename, "-") == 0)
@@ -559,7 +719,8 @@ wtap* wtap_open_offline(const char *filename, int *err, char **err_info,
wth->tsprecision = WTAP_FILE_TSPREC_USEC;
wth->priv = NULL;
- init_open_routines();
+ init_magic_number_open_routines();
+ init_heuristic_open_info();
if (wth->random_fh) {
wth->fast_seek = g_ptr_array_new();
@@ -567,8 +728,8 @@ wtap* wtap_open_offline(const char *filename, int *err, char **err_info,
file_set_random_access(wth->random_fh, TRUE, wth->fast_seek);
}
- /* Try all file types */
- for (i = 0; i < open_routines_arr->len; i++) {
+ /* Try all file types that support magic numbers */
+ for (i = 0; i < magic_number_open_routines_arr->len; i++) {
/* Seek back to the beginning of the file; the open routine
for the previous file type may have left the file
position somewhere other than the beginning, and the
@@ -582,7 +743,7 @@ wtap* wtap_open_offline(const char *filename, int *err, char **err_info,
return NULL;
}
- switch ((*open_routines[i])(wth, err, err_info)) {
+ switch ((*magic_number_open_routines[i])(wth, err, err_info)) {
case -1:
/* I/O error - give up */
@@ -599,6 +760,103 @@ wtap* wtap_open_offline(const char *filename, int *err, char **err_info,
}
}
+ /* Does this file's name have an extension? */
+ extension = get_file_extension(filename);
+ if (extension != NULL) {
+ /* Yes - try the heuristic types that use that extension first. */
+ for (i = 0; i < heuristic_open_info_arr->len; i++) {
+ /* Does this type use that extension? */
+ if (heuristic_uses_extension(i, extension)) {
+ /* Yes. */
+ if (file_seek(wth->fh, 0, SEEK_SET, err) == -1) {
+ /* I/O error - give up */
+ g_free(extension);
+ wtap_close(wth);
+ return NULL;
+ }
+
+ switch ((*heuristic_open_info[i].open_routine)(wth,
+ err, err_info)) {
+
+ case -1:
+ /* I/O error - give up */
+ g_free(extension);
+ wtap_close(wth);
+ return NULL;
+
+ case 0:
+ /* No I/O error, but not that type of file */
+ break;
+
+ case 1:
+ /* We found the file type */
+ g_free(extension);
+ goto success;
+ }
+ }
+ }
+
+ /* Now try the ones that don't use it. */
+ for (i = 0; i < heuristic_open_info_arr->len; i++) {
+ /* Does this type use that extension? */
+ if (!heuristic_uses_extension(i, extension)) {
+ /* No. */
+ if (file_seek(wth->fh, 0, SEEK_SET, err) == -1) {
+ /* I/O error - give up */
+ g_free(extension);
+ wtap_close(wth);
+ return NULL;
+ }
+
+ switch ((*heuristic_open_info[i].open_routine)(wth,
+ err, err_info)) {
+
+ case -1:
+ /* I/O error - give up */
+ g_free(extension);
+ wtap_close(wth);
+ return NULL;
+
+ case 0:
+ /* No I/O error, but not that type of file */
+ break;
+
+ case 1:
+ /* We found the file type */
+ g_free(extension);
+ goto success;
+ }
+ }
+ }
+ g_free(extension);
+ } else {
+ /* No - try all the heuristics types in order. */
+ for (i = 0; i < heuristic_open_info_arr->len; i++) {
+ if (file_seek(wth->fh, 0, SEEK_SET, err) == -1) {
+ /* I/O error - give up */
+ wtap_close(wth);
+ return NULL;
+ }
+
+ switch ((*heuristic_open_info[i].open_routine)(wth,
+ err, err_info)) {
+
+ case -1:
+ /* I/O error - give up */
+ wtap_close(wth);
+ return NULL;
+
+ case 0:
+ /* No I/O error, but not that type of file */
+ break;
+
+ case 1:
+ /* We found the file type */
+ goto success;
+ }
+ }
+ }
+
/* Well, it's not one of the types of file we know about. */
wtap_close(wth);
*err = WTAP_ERR_FILE_UNKNOWN_FORMAT;
diff --git a/wiretap/wtap.h b/wiretap/wtap.h
index 9b4ec88e0b..719ed0aeb5 100644
--- a/wiretap/wtap.h
+++ b/wiretap/wtap.h
@@ -1133,9 +1133,59 @@ struct file_extension_info {
const char *extensions;
};
+/*
+ * For registering file types that we can open.
+ *
+ * Each file type has an open routine and an optional list of extensions
+ * the file might have.
+ *
+ * The open routine should return:
+ *
+ * -1 on an I/O error;
+ *
+ * 1 if the file it's reading is one of the types it handles;
+ *
+ * 0 if the file it's reading isn't the type it handles.
+ *
+ * If the routine handles this type of file, it should set the "file_type"
+ * field in the "struct wtap" to the type of the file.
+ *
+ * Note that the routine does not have to free the private data pointer on
+ * error. The caller takes care of that by calling wtap_close on error.
+ * (See https://bugs.wireshark.org/bugzilla/show_bug.cgi?id=8518)
+ *
+ * However, the caller does have to free the private data pointer when
+ * returning 0, since the next file type will be called and will likely
+ * just overwrite the pointer.
+ */
+
+/*
+ * Some file formats have defined magic numbers at fixed offsets from
+ * the beginning of the file; those routines should return 1 if and
+ * only if the file has the magic number at that offset. (pcap-ng
+ * is a bit of a special case, as it has both the Section Header Block
+ * type field and its byte-order magic field; it checks for both.)
+ * Those file formats do not require a file name extension in order
+ * to recognize them or to avoid recognizing other file types as that
+ * type, and have no extensions specified for them.
+ */
typedef int (*wtap_open_routine_t)(struct wtap*, int *, char **);
/*
+ * Some file formats don't have defined magic numbers at fixed offsets,
+ * so a heuristic is required. If that file format has any file name
+ * extensions used for it, a list of those extensions should be
+ * specified, so that, if the name of the file being opened has an
+ * extension, the file formats that use that extension are tried before
+ * the ones that don't, to handle the case where a file of one type
+ * might be recognized by the heuristics for a different file type.
+ */
+struct heuristic_open_info {
+ wtap_open_routine_t open_routine;
+ const char *extensions;
+};
+
+/*
* Types of comments.
*/
#define WTAP_COMMENT_PER_SECTION 0x00000001 /* per-file/per-file-section */
@@ -1402,7 +1452,9 @@ void register_all_wiretap_modules(void);
WS_DLL_PUBLIC
void wtap_register_file_type_extension(const struct file_extension_info *ei);
WS_DLL_PUBLIC
-void wtap_register_open_routine(wtap_open_routine_t, gboolean has_magic);
+void wtap_register_magic_number_open_routine(wtap_open_routine_t open_routine);
+WS_DLL_PUBLIC
+void wtap_register_heuristic_open_info(const struct heuristic_open_info *oi);
WS_DLL_PUBLIC
int wtap_register_file_type_subtypes(const struct file_type_subtype_info* fi);
WS_DLL_PUBLIC