From 1830974f2c97ec4e9fc423dcf20ad2d745663af5 Mon Sep 17 00:00:00 2001 From: lego Date: Thu, 9 Feb 2006 13:05:32 +0000 Subject: Add heuristic dissection of XML git-svn-id: http://anonsvn.wireshark.org/wireshark/trunk@17228 f5534014-38df-0310-8fa8-9805f1628bb7 --- epan/dissectors/packet-media.c | 7 +++++++ epan/dissectors/packet-xml.c | 40 +++++++++++++++++++++++++++++++++++----- epan/tvbparse.c | 36 ++++++++++++++++++++++++++++++++++-- epan/tvbparse.h | 13 ++++++++++++- 4 files changed, 88 insertions(+), 8 deletions(-) (limited to 'epan') diff --git a/epan/dissectors/packet-media.c b/epan/dissectors/packet-media.c index d9b595b920..75c2a4d2b7 100644 --- a/epan/dissectors/packet-media.c +++ b/epan/dissectors/packet-media.c @@ -39,12 +39,17 @@ * print routines */ int proto_media = -1; +static heur_dissector_list_t heur_subdissector_list; static void dissect_media(tvbuff_t *tvb, packet_info *pinfo , proto_tree *tree) { int bytes; + if (dissector_try_heuristic(heur_subdissector_list, tvb, pinfo, tree)) { + return; + } + /* Add media type to the INFO column if it is visible */ if (check_col(pinfo->cinfo, COL_INFO)) { col_append_fstr(pinfo->cinfo, COL_INFO, " (%s)", pinfo->match_string); @@ -79,6 +84,8 @@ proto_register_media(void) "media" /* abbrev */ ); register_dissector("media", dissect_media, proto_media); + register_heur_dissector_list("media", &heur_subdissector_list); + /* * "Media" is used to dissect something whose normal dissector diff --git a/epan/dissectors/packet-xml.c b/epan/dissectors/packet-xml.c index e3923ec09c..5f619449fc 100644 --- a/epan/dissectors/packet-xml.c +++ b/epan/dissectors/packet-xml.c @@ -51,6 +51,7 @@ #include #include #include +#include typedef struct _xml_ns_t { /* the name of this namespace */ @@ -111,6 +112,7 @@ static dissector_handle_t xml_handle; /* parser definitions */ static tvbparse_wanted_t* want; static tvbparse_wanted_t* want_ignore; +static tvbparse_wanted_t* want_heur; static GHashTable* xmpli_names; static GHashTable* media_types; @@ -119,6 +121,8 @@ static xml_ns_t xml_ns = {"xml","/",-1,-1,-1,NULL,NULL,NULL}; static xml_ns_t unknown_ns = {"unknown","?",-1,-1,-1,NULL,NULL,NULL}; static xml_ns_t* root_ns; +static gboolean pref_heuristic = FALSE; + #define XML_CDATA -1000 #define XML_SCOPED_NAME -1001 @@ -152,7 +156,6 @@ static const gchar* default_media_types[] = { "application/resource-lists+xml", }; - static void dissect_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree) { @@ -191,6 +194,14 @@ dissect_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree) while(( tok = tvbparse_get(tt, want) )) ; } +static gboolean dissect_xml_heur(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree) { + if ( pref_heuristic && tvbparse_peek(tvbparse_init(tvb,0,-1,NULL,want_ignore), want_heur)) { + dissect_xml(tvb, pinfo, tree); + return TRUE; + } else { + return FALSE; + } +} static void after_token(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) { GPtrArray* stack = tvbparse_data; @@ -568,6 +579,13 @@ static void init_xml_parser(void) { tvbparse_not_chars(-1,1,0," \t\r\n",NULL,NULL,unrecognized_token), NULL); + want_heur = tvbparse_set_oneof(-1, NULL, NULL, NULL, + want_comment, + want_xmlpi, + want_doctype_start, + want_dtd_tag, + want_tag, + NULL); } @@ -1146,9 +1164,15 @@ static void init_xml_names(void) { #endif } +static void apply_prefs(void) { + if (pref_heuristic) { + heur_dissector_add("http", dissect_xml_heur, xml_ns.hf_tag); + heur_dissector_add("media", dissect_xml_heur, xml_ns.hf_tag); + } +} + void proto_register_xml(void) { - static gint *ett_base[] = { &unknown_ns.ett, &xml_ns.ett, @@ -1165,7 +1189,8 @@ proto_register_xml(void) { { &unknown_ns.hf_cdata, {"CDATA", "xml.cdata", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}, { &unknown_ns.hf_tag, {"Tag", "xml.tag", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}, { &xml_ns.hf_cdata, {"Unknown", "xml.unknown", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }} - }; + }; + module_t* xml_module; hf_arr = g_array_new(FALSE,FALSE,sizeof(hf_register_info)); ett_arr = g_array_new(FALSE,FALSE,sizeof(gint*)); @@ -1179,7 +1204,12 @@ proto_register_xml(void) { proto_register_field_array(xml_ns.hf_tag, (hf_register_info*)hf_arr->data, hf_arr->len); proto_register_subtree_array((gint**)ett_arr->data, ett_arr->len); - + + xml_module = prefs_register_protocol(xml_ns.hf_tag,apply_prefs); + prefs_register_bool_preference(xml_module, "heuristic", "Use Heuristics", + "Try to recognize XML for unknown HTTP media types", + &pref_heuristic); + g_array_free(hf_arr,FALSE); g_array_free(ett_arr,TRUE); @@ -1200,5 +1230,5 @@ proto_reg_handoff_xml(void) xml_handle = find_dissector("xml"); g_hash_table_foreach(media_types,add_dissector_media,NULL); - + } diff --git a/epan/tvbparse.c b/epan/tvbparse.c index 06b3f094f6..5b2a02797d 100644 --- a/epan/tvbparse.c +++ b/epan/tvbparse.c @@ -67,8 +67,8 @@ #define TVBPARSE_DEBUG_FIND 0x00000100 #define TVBPARSE_DEBUG_NEWTOK 0x00000080 #define TVBPARSE_DEBUG_IGNORE 0x00000040 -/*#define TVBPARSE_DEBUG_ 0x00000020 -#define TVBPARSE_DEBUG_ 0x00000010 +#define TVBPARSE_DEBUG_PEEK 0x00000020 +/*#define TVBPARSE_DEBUG_ 0x00000010 #define TVBPARSE_DEBUG_ 0x00000008 #define TVBPARSE_DEBUG_ 0x00000004 #define TVBPARSE_DEBUG_ 0x00000002 @@ -1272,6 +1272,38 @@ static void execute_callbacks(tvbparse_t* tt, tvbparse_elem_t* curr) { } +gboolean tvbparse_peek(tvbparse_t* tt, + const tvbparse_wanted_t* wanted) { + tvbparse_elem_t* tok = NULL; + int consumed; + int offset = tt->offset; + +#ifdef TVBPARSE_DEBUG + if (TVBPARSE_DEBUG & TVBPARSE_DEBUG_PEEK) g_warning("tvbparse_peek: ENTER offset=%i",offset); +#endif + + offset += ignore(tt,offset); + +#ifdef TVBPARSE_DEBUG + if (TVBPARSE_DEBUG & TVBPARSE_DEBUG_PEEK) g_warning("tvbparse_peek: after ignore offset=%i",offset); +#endif + + consumed = wanted->condition(tt,offset,wanted,&tok); + + if (consumed >= 0) { +#ifdef TVBPARSE_DEBUG + if (TVBPARSE_DEBUG & TVBPARSE_DEBUG_PEEK) g_warning("tvbparse_peek: GOT len=%i",consumed); +#endif + return TRUE; + } else { +#ifdef TVBPARSE_DEBUG + if (TVBPARSE_DEBUG & TVBPARSE_DEBUG_PEEK) g_warning("tvbparse_peek: NOT GOT"); +#endif + return FALSE; + } + +} + tvbparse_elem_t* tvbparse_get(tvbparse_t* tt, const tvbparse_wanted_t* wanted) { tvbparse_elem_t* tok = NULL; diff --git a/epan/tvbparse.h b/epan/tvbparse.h index 742363a0aa..2afc4eecfe 100644 --- a/epan/tvbparse.h +++ b/epan/tvbparse.h @@ -440,8 +440,19 @@ gboolean tvbparse_reset(tvbparse_t* tt, int offset, int len); guint tvbparse_curr_offset(tvbparse_t* tt); guint tvbparse_len_left(tvbparse_t* tt); + + +/* + * This will look for the wanted token at the current offset or after any given + * number of ignored tokens returning FALSE if there's no match or TRUE if there + * is a match. + * The parser will be left in its original state and no callbacks will be called. + */ +gboolean tvbparse_peek(tvbparse_t* tt, + const tvbparse_wanted_t* wanted); + /* - * This ill look for the wanted token at the current offset or after any given + * This will look for the wanted token at the current offset or after any given * number of ignored tokens returning NULL if there's no match. * if there is a match it will set the offset of the current parser after * the end of the token -- cgit v1.2.3