aboutsummaryrefslogtreecommitdiffstats
path: root/epan
diff options
context:
space:
mode:
authorLuis Ontanon <luis.ontanon@gmail.com>2006-02-09 13:05:32 +0000
committerLuis Ontanon <luis.ontanon@gmail.com>2006-02-09 13:05:32 +0000
commit0bb1b874ec85463c8671e7d2b5e9041c0f33bb07 (patch)
treea282d9d8967008f4d0cfc86b56ae5bacc587fa66 /epan
parent118e05db0bb080813a919dadcf2b1de9aa711fb8 (diff)
Add heuristic dissection of XML
svn path=/trunk/; revision=17228
Diffstat (limited to 'epan')
-rw-r--r--epan/dissectors/packet-media.c7
-rw-r--r--epan/dissectors/packet-xml.c40
-rw-r--r--epan/tvbparse.c36
-rw-r--r--epan/tvbparse.h13
4 files changed, 88 insertions, 8 deletions
diff --git a/epan/dissectors/packet-media.c b/epan/dissectors/packet-media.c
index d9b595b920..75c2a4d2b7 100644
--- a/epan/dissectors/packet-media.c
+++ b/epan/dissectors/packet-media.c
@@ -39,12 +39,17 @@
* print routines
*/
int proto_media = -1;
+static heur_dissector_list_t heur_subdissector_list;
static void
dissect_media(tvbuff_t *tvb, packet_info *pinfo , proto_tree *tree)
{
int bytes;
+ if (dissector_try_heuristic(heur_subdissector_list, tvb, pinfo, tree)) {
+ return;
+ }
+
/* Add media type to the INFO column if it is visible */
if (check_col(pinfo->cinfo, COL_INFO)) {
col_append_fstr(pinfo->cinfo, COL_INFO, " (%s)", pinfo->match_string);
@@ -79,6 +84,8 @@ proto_register_media(void)
"media" /* abbrev */
);
register_dissector("media", dissect_media, proto_media);
+ register_heur_dissector_list("media", &heur_subdissector_list);
+
/*
* "Media" is used to dissect something whose normal dissector
diff --git a/epan/dissectors/packet-xml.c b/epan/dissectors/packet-xml.c
index e3923ec09c..5f619449fc 100644
--- a/epan/dissectors/packet-xml.c
+++ b/epan/dissectors/packet-xml.c
@@ -51,6 +51,7 @@
#include <epan/dtd.h>
#include <epan/report_err.h>
#include <epan/filesystem.h>
+#include <epan/prefs.h>
typedef struct _xml_ns_t {
/* the name of this namespace */
@@ -111,6 +112,7 @@ static dissector_handle_t xml_handle;
/* parser definitions */
static tvbparse_wanted_t* want;
static tvbparse_wanted_t* want_ignore;
+static tvbparse_wanted_t* want_heur;
static GHashTable* xmpli_names;
static GHashTable* media_types;
@@ -119,6 +121,8 @@ static xml_ns_t xml_ns = {"xml","/",-1,-1,-1,NULL,NULL,NULL};
static xml_ns_t unknown_ns = {"unknown","?",-1,-1,-1,NULL,NULL,NULL};
static xml_ns_t* root_ns;
+static gboolean pref_heuristic = FALSE;
+
#define XML_CDATA -1000
#define XML_SCOPED_NAME -1001
@@ -152,7 +156,6 @@ static const gchar* default_media_types[] = {
"application/resource-lists+xml",
};
-
static void
dissect_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree)
{
@@ -191,6 +194,14 @@ dissect_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree)
while(( tok = tvbparse_get(tt, want) )) ;
}
+static gboolean dissect_xml_heur(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree) {
+ if ( pref_heuristic && tvbparse_peek(tvbparse_init(tvb,0,-1,NULL,want_ignore), want_heur)) {
+ dissect_xml(tvb, pinfo, tree);
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
static void after_token(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
GPtrArray* stack = tvbparse_data;
@@ -568,6 +579,13 @@ static void init_xml_parser(void) {
tvbparse_not_chars(-1,1,0," \t\r\n",NULL,NULL,unrecognized_token),
NULL);
+ want_heur = tvbparse_set_oneof(-1, NULL, NULL, NULL,
+ want_comment,
+ want_xmlpi,
+ want_doctype_start,
+ want_dtd_tag,
+ want_tag,
+ NULL);
}
@@ -1146,9 +1164,15 @@ static void init_xml_names(void) {
#endif
}
+static void apply_prefs(void) {
+ if (pref_heuristic) {
+ heur_dissector_add("http", dissect_xml_heur, xml_ns.hf_tag);
+ heur_dissector_add("media", dissect_xml_heur, xml_ns.hf_tag);
+ }
+}
+
void
proto_register_xml(void) {
-
static gint *ett_base[] = {
&unknown_ns.ett,
&xml_ns.ett,
@@ -1165,7 +1189,8 @@ proto_register_xml(void) {
{ &unknown_ns.hf_cdata, {"CDATA", "xml.cdata", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
{ &unknown_ns.hf_tag, {"Tag", "xml.tag", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
{ &xml_ns.hf_cdata, {"Unknown", "xml.unknown", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}
- };
+ };
+ module_t* xml_module;
hf_arr = g_array_new(FALSE,FALSE,sizeof(hf_register_info));
ett_arr = g_array_new(FALSE,FALSE,sizeof(gint*));
@@ -1179,7 +1204,12 @@ proto_register_xml(void) {
proto_register_field_array(xml_ns.hf_tag, (hf_register_info*)hf_arr->data, hf_arr->len);
proto_register_subtree_array((gint**)ett_arr->data, ett_arr->len);
-
+
+ xml_module = prefs_register_protocol(xml_ns.hf_tag,apply_prefs);
+ prefs_register_bool_preference(xml_module, "heuristic", "Use Heuristics",
+ "Try to recognize XML for unknown HTTP media types",
+ &pref_heuristic);
+
g_array_free(hf_arr,FALSE);
g_array_free(ett_arr,TRUE);
@@ -1200,5 +1230,5 @@ proto_reg_handoff_xml(void)
xml_handle = find_dissector("xml");
g_hash_table_foreach(media_types,add_dissector_media,NULL);
-
+
}
diff --git a/epan/tvbparse.c b/epan/tvbparse.c
index 06b3f094f6..5b2a02797d 100644
--- a/epan/tvbparse.c
+++ b/epan/tvbparse.c
@@ -67,8 +67,8 @@
#define TVBPARSE_DEBUG_FIND 0x00000100
#define TVBPARSE_DEBUG_NEWTOK 0x00000080
#define TVBPARSE_DEBUG_IGNORE 0x00000040
-/*#define TVBPARSE_DEBUG_ 0x00000020
-#define TVBPARSE_DEBUG_ 0x00000010
+#define TVBPARSE_DEBUG_PEEK 0x00000020
+/*#define TVBPARSE_DEBUG_ 0x00000010
#define TVBPARSE_DEBUG_ 0x00000008
#define TVBPARSE_DEBUG_ 0x00000004
#define TVBPARSE_DEBUG_ 0x00000002
@@ -1272,6 +1272,38 @@ static void execute_callbacks(tvbparse_t* tt, tvbparse_elem_t* curr) {
}
+gboolean tvbparse_peek(tvbparse_t* tt,
+ const tvbparse_wanted_t* wanted) {
+ tvbparse_elem_t* tok = NULL;
+ int consumed;
+ int offset = tt->offset;
+
+#ifdef TVBPARSE_DEBUG
+ if (TVBPARSE_DEBUG & TVBPARSE_DEBUG_PEEK) g_warning("tvbparse_peek: ENTER offset=%i",offset);
+#endif
+
+ offset += ignore(tt,offset);
+
+#ifdef TVBPARSE_DEBUG
+ if (TVBPARSE_DEBUG & TVBPARSE_DEBUG_PEEK) g_warning("tvbparse_peek: after ignore offset=%i",offset);
+#endif
+
+ consumed = wanted->condition(tt,offset,wanted,&tok);
+
+ if (consumed >= 0) {
+#ifdef TVBPARSE_DEBUG
+ if (TVBPARSE_DEBUG & TVBPARSE_DEBUG_PEEK) g_warning("tvbparse_peek: GOT len=%i",consumed);
+#endif
+ return TRUE;
+ } else {
+#ifdef TVBPARSE_DEBUG
+ if (TVBPARSE_DEBUG & TVBPARSE_DEBUG_PEEK) g_warning("tvbparse_peek: NOT GOT");
+#endif
+ return FALSE;
+ }
+
+}
+
tvbparse_elem_t* tvbparse_get(tvbparse_t* tt,
const tvbparse_wanted_t* wanted) {
tvbparse_elem_t* tok = NULL;
diff --git a/epan/tvbparse.h b/epan/tvbparse.h
index 742363a0aa..2afc4eecfe 100644
--- a/epan/tvbparse.h
+++ b/epan/tvbparse.h
@@ -440,8 +440,19 @@ gboolean tvbparse_reset(tvbparse_t* tt, int offset, int len);
guint tvbparse_curr_offset(tvbparse_t* tt);
guint tvbparse_len_left(tvbparse_t* tt);
+
+
+/*
+ * This will look for the wanted token at the current offset or after any given
+ * number of ignored tokens returning FALSE if there's no match or TRUE if there
+ * is a match.
+ * The parser will be left in its original state and no callbacks will be called.
+ */
+gboolean tvbparse_peek(tvbparse_t* tt,
+ const tvbparse_wanted_t* wanted);
+
/*
- * This ill look for the wanted token at the current offset or after any given
+ * This will look for the wanted token at the current offset or after any given
* number of ignored tokens returning NULL if there's no match.
* if there is a match it will set the offset of the current parser after
* the end of the token