diff options
author | Luis Ontanon <luis.ontanon@gmail.com> | 2005-09-10 17:29:15 +0000 |
---|---|---|
committer | Luis Ontanon <luis.ontanon@gmail.com> | 2005-09-10 17:29:15 +0000 |
commit | 96326c0b8617db336cb85d122b1e1e5a00644f2b (patch) | |
tree | ac7c2065c289737d9c131c768c1c153e55747890 | |
parent | 541fd750b86a0fa3666c4ec2d917ff6c23a332f9 (diff) |
the dtd parser (still missing the glue) and few fixes to packet-xml.c
svn path=/trunk/; revision=15745
-rw-r--r-- | epan/Makefile.am | 21 | ||||
-rw-r--r-- | epan/Makefile.common | 5 | ||||
-rw-r--r-- | epan/Makefile.nmake | 14 | ||||
-rw-r--r-- | epan/dissectors/packet-xml.c | 307 | ||||
-rw-r--r-- | epan/dtd.h | 61 | ||||
-rw-r--r-- | epan/dtd_grammar.lemon | 151 | ||||
-rw-r--r-- | epan/dtd_parse.l | 316 | ||||
-rw-r--r-- | epan/dtd_preparse.l | 258 |
8 files changed, 1061 insertions, 72 deletions
diff --git a/epan/Makefile.am b/epan/Makefile.am index 56efd3acdb..6faee21134 100644 --- a/epan/Makefile.am +++ b/epan/Makefile.am @@ -50,6 +50,9 @@ EXTRA_libethereal_la_SOURCES = \ inet_v6defs.h EXTRA_DIST = \ + dtd_grammar.lemon \ + dtd_parse.l \ + dtd_preparse.l \ enterprise-numbers \ libethereal.def \ Makefile.common \ @@ -66,7 +69,12 @@ CLEANFILES = \ *~ DISTCLEANFILES = \ + dtd_grammar.c \ + dtd_grammar.h \ + dtd_parse.c \ + dtd_preparse.c \ radius_dict.c + MAINTAINERCLEANFILES = \ Makefile.in @@ -85,7 +93,20 @@ exntest: exntest.o except.o radius_dict.c: radius_dict.l $(LEX) $^ + +dtd_parse.c : dtd_parse.l + $(LEX) -odtd_parse.c $(srcdir)/dtd_parse.l +dtd_preparse.c : dtd_preparse.l + $(LEX) -odtd_preparse.c $(srcdir)/dtd_preparse.l + +dtd_grammar.h: dtd_grammar.c + +LEMON=../tools/lemon + +dtd_grammar.c: dtd_grammar.lemon + $(LEMON)/lemon t=$(srcdir)/$(LEMON)/lempar.c $^ + tvbtest.o exntest.o: exceptions.h sminmpec.c: enterprise-numbers make-sminmpec.pl diff --git a/epan/Makefile.common b/epan/Makefile.common index 2f20c4c08d..150140a00a 100644 --- a/epan/Makefile.common +++ b/epan/Makefile.common @@ -43,6 +43,9 @@ LIBETHEREAL_SRC = \ crypt-md4.c \ crypt-md5.c \ crypt-rc4.c \ + dtd_grammar.c \ + dtd_parse.c \ + dtd_preparse.c \ emem.c \ epan.c \ except.c \ @@ -105,6 +108,8 @@ LIBETHEREAL_INCLUDES = \ crypt-md4.h \ crypt-md5.h \ crypt-rc4.h \ + dtd.h \ + dtd_grammar.h \ emem.h \ epan.h \ epan_dissect.h \ diff --git a/epan/Makefile.nmake b/epan/Makefile.nmake index a5d9e98906..a6f05223ee 100644 --- a/epan/Makefile.nmake +++ b/epan/Makefile.nmake @@ -153,3 +153,17 @@ radius_dict.c : radius_dict.l sminmpec.c: enterprise-numbers make-sminmpec.pl $(PERL) make-sminmpec.pl enterprise-numbers sminmpec.c + +dtd_parse.c : dtd_parse.l + $(LEX) -odtd_parse.c $(srcdir)/dtd_parse.l + +dtd_preparse.c : dtd_preparse.l + $(LEX) -odtd_preparse.c $(srcdir)/dtd_preparse.l + +dtd_grammar.h: dtd_grammar.c + +LEMON=../tools/lemon + +dtd_grammar.c: dtd_grammar.lemon + $(LEMON)/lemon t=$(srcdir)/$(LEMON)/lempar.c $^ + diff --git a/epan/dissectors/packet-xml.c b/epan/dissectors/packet-xml.c index ca4d2a4f1e..b75dfee282 100644 --- a/epan/dissectors/packet-xml.c +++ b/epan/dissectors/packet-xml.c @@ -36,34 +36,46 @@ #include <string.h> #include <stdarg.h> +#include <stdio.h> + #include <glib.h> #include <epan/emem.h> #include <epan/packet.h> #include <epan/strutil.h> #include <epan/tvbparse.h> +#include <epan/dtd.h> + +typedef struct _xml_names_t { + gchar* name; + gchar* longname; + gchar* blurb; + int hf_tag; + int hf_cdata; + gint ett; + + gboolean is_root; + + GHashTable* attributes; + GHashTable* elements; +} xml_names_t; typedef struct { proto_tree* tree; proto_item* item; proto_item* last_item; + xml_names_t* ns; int start_offset; } xml_frame_t; -static int proto_xml = -1; - -static gint ett_i = -1; -static gint ett_tag = -1; static gint ett_dtd = -1; +static gint ett_xmpli = -1; -static int hf_what = -1; -static int hf_attrib = -1; -static int hf_cdata = -1; +static int hf_junk = -1; +static int hf_unknowwn_attrib = -1; static int hf_comment = -1; static int hf_xmlpi = -1; -static int hf_tag = -1; static int hf_dtd_tag = -1; static int hf_doctype = -1; -static int hf_entity = -1; /* Dissector handles */ static dissector_handle_t xml_handle; @@ -72,11 +84,20 @@ static dissector_handle_t xml_handle; static tvbparse_wanted_t* want; static tvbparse_wanted_t* want_ignore; +static GHashTable* xmpli_names; +static GHashTable* media_types; +static xml_names_t xml_ns = {"xml","eXtesible Markup Language","XML",-1,-1,-1,TRUE,NULL,NULL}; +static xml_names_t unknown_ns = {"","","",-1,-1,-1,TRUE,NULL,NULL}; +static xml_names_t* root_ns; +#define XML_CDATA -1000 + +GArray* hf; +GArray* ett_arr; static void -dissect_xml(tvbuff_t *tvb, packet_info *pinfo _U_, proto_tree *tree) +dissect_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree) { tvbparse_t* tt; tvbparse_elem_t* tok = NULL; @@ -93,12 +114,20 @@ dissect_xml(tvbuff_t *tvb, packet_info *pinfo _U_, proto_tree *tree) g_ptr_array_add(stack,current_frame); tt = tvbparse_init(tvb,0,-1,stack,want_ignore); + current_frame->start_offset = 0; + + root_ns = g_hash_table_lookup(media_types,pinfo->match_string); + + if (! root_ns ) { + root_ns = &unknown_ns; + } + + current_frame->ns = root_ns; - current_frame->item = proto_tree_add_item(tree,proto_xml,tvb,0,-1,FALSE); - current_frame->tree = proto_item_add_subtree(current_frame->item,ett_i); + current_frame->item = proto_tree_add_item(tree,xml_ns.hf_tag,tvb,0,-1,FALSE); + current_frame->tree = proto_item_add_subtree(current_frame->item,xml_ns.ett); current_frame->last_item = current_frame->item; - current_frame->start_offset = 0; - + while(( tok = tvbparse_get(tt, want) )) ; } @@ -109,15 +138,18 @@ static void after_token(void* tvbparse_data, const void* wanted_data _U_, tvbpar int hfid; proto_item* pi; - if (tok->id > 0) + if (tok->id == XML_CDATA) { + hfid = current_frame->ns->hf_cdata; + } else if ( tok->id > 0) { hfid = tok->id; - else - hfid = hf_what; + } else { + hfid = hf_junk; + } pi = proto_tree_add_item(current_frame->tree, hfid, tok->tvb, tok->offset, tok->len, FALSE); proto_item_set_text(pi, "%s", - tvb_get_ephemeral_string(tok->tvb,tok->offset,tok->len)); + tvb_format_text(tok->tvb,tok->offset,tok->len)); } static void before_xmpli(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) { @@ -126,18 +158,32 @@ static void before_xmpli(void* tvbparse_data, const void* wanted_data _U_, tvbpa proto_item* pi; proto_tree* pt; tvbparse_elem_t* name_tok = tok->sub->next; + gchar* name = g_strdown(tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len)); + xml_names_t* ns = g_hash_table_lookup(xmpli_names,name); + int hf_tag; + gint ett; + + if (!ns) { + hf_tag = hf_xmlpi; + ett = ett_xmpli; + } else { + hf_tag = ns->hf_tag; + ett = ns->ett; + } - pi = proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,-1, - "<? %s", - tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len)); - pt = proto_item_add_subtree(pi,ett_tag); + pi = proto_tree_add_item(current_frame->tree,hf_tag,tok->tvb,tok->offset,tok->len,FALSE); + + proto_item_set_text(pi,tvb_format_text(tok->tvb,tok->offset,(name_tok->offset - tok->offset) + name_tok->len)); + + pt = proto_item_add_subtree(pi,ett); current_frame = ep_alloc(sizeof(xml_frame_t)); current_frame->item = pi; current_frame->last_item = pi; current_frame->tree = pt; current_frame->start_offset = tok->offset; - + current_frame->ns = ns; + g_ptr_array_add(stack,current_frame); } @@ -146,42 +192,53 @@ static void after_xmlpi(void* tvbparse_data, const void* wanted_data _U_, tvbpar GPtrArray* stack = tvbparse_data; xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1); - proto_item_set_len(current_frame->item, (tok->offset - current_frame->start_offset) + tok->len); - proto_item_append_text(current_frame->last_item," ?>"); + proto_tree_add_text(current_frame->tree, + tok->tvb, tok->offset, tok->len, + tvb_format_text(tok->tvb,tok->offset,tok->len)); if (stack->len > 1) { g_ptr_array_remove_index_fast(stack,stack->len - 1); } else { - proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"[ ERROR: Closing an unopened tag ]"); + proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"[ ERROR: Closing an unopened xmpli tag ]"); } } static void before_tag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) { GPtrArray* stack = tvbparse_data; xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1); + tvbparse_elem_t* name_tok = tok->sub->next; + gchar* name = g_strdown(tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len)); + xml_names_t* ns = g_hash_table_lookup(current_frame->ns->elements,name); + xml_frame_t* new_frame; proto_item* pi; proto_tree* pt; - tvbparse_elem_t* name_tok = tok->sub->next; - gchar* name = tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len); - pi = proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,-1,"<%s",name); - pt = proto_item_add_subtree(pi,ett_tag); + if (!ns) { + if (! ( ns = g_hash_table_lookup(root_ns->elements,name) ) ) { + ns = &unknown_ns; + } + } - current_frame = ep_alloc(sizeof(xml_frame_t)); - current_frame->item = pi; - current_frame->last_item = pi; - current_frame->tree = pt; - current_frame->start_offset = tok->offset; + pi = proto_tree_add_item(current_frame->tree,ns->hf_tag,tok->tvb,tok->offset,tok->len,FALSE); + proto_item_set_text(pi,tvb_format_text(tok->tvb,tok->offset,(name_tok->offset - tok->offset) + name_tok->len)); - g_ptr_array_add(stack,current_frame); + pt = proto_item_add_subtree(pi,ns->ett); + + new_frame = ep_alloc(sizeof(xml_frame_t)); + new_frame->item = pi; + new_frame->last_item = pi; + new_frame->tree = pt; + new_frame->start_offset = tok->offset; + new_frame->ns = ns; + + g_ptr_array_add(stack,new_frame); } -static void after_open_tag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) { +static void after_open_tag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok _U_) { GPtrArray* stack = tvbparse_data; xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1); - proto_item_set_len(current_frame->item, (tok->offset - current_frame->start_offset) + tok->len); proto_item_append_text(current_frame->last_item,">"); } @@ -189,7 +246,6 @@ static void after_closed_tag(void* tvbparse_data, const void* wanted_data _U_, t GPtrArray* stack = tvbparse_data; xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1); - proto_item_set_len(current_frame->item, (tok->offset - current_frame->start_offset) + tok->len); proto_item_append_text(current_frame->last_item,"/>"); if (stack->len > 1) { @@ -206,7 +262,7 @@ void after_untag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem proto_item_set_len(current_frame->item, (tok->offset - current_frame->start_offset) + tok->len); proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"%s", - tvb_get_ephemeral_string(tok->tvb,tok->offset,tok->len)); + tvb_format_text(tok->tvb,tok->offset,tok->len)); if (stack->len > 1) { g_ptr_array_remove_index_fast(stack,stack->len - 1); @@ -223,13 +279,14 @@ static void before_dtd_doctype(void* tvbparse_data, const void* wanted_data _U_, proto_tree* dtd_item = proto_tree_add_item(current_frame->tree, hf_doctype, name_tok->tvb, name_tok->offset, name_tok->len, FALSE); - proto_item_set_text(dtd_item,"%s",tvb_get_ephemeral_string(tok->tvb,tok->offset,tok->len)); + proto_item_set_text(dtd_item,"%s",tvb_format_text(tok->tvb,tok->offset,tok->len)); current_frame = ep_alloc(sizeof(xml_frame_t)); current_frame->item = dtd_item; current_frame->last_item = dtd_item; current_frame->tree = proto_item_add_subtree(dtd_item,ett_dtd); current_frame->start_offset = tok->offset; + current_frame->ns = NULL; g_ptr_array_add(stack,current_frame); } @@ -251,7 +308,7 @@ static void after_dtd_close(void* tvbparse_data, const void* wanted_data _U_, tv xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1); proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"%s", - tvb_get_ephemeral_string(tok->tvb,tok->offset,tok->len)); + tvb_format_text(tok->tvb,tok->offset,tok->len)); if (stack->len > 1) { g_ptr_array_remove_index_fast(stack,stack->len - 1); } else { @@ -266,13 +323,20 @@ static void get_attrib_value(void* tvbparse_data _U_, const void* wanted_data _U static void after_attrib(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) { GPtrArray* stack = tvbparse_data; xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1); - gchar* name = tvb_get_ephemeral_string(tok->sub->tvb,tok->sub->offset,tok->sub->len); + gchar* name = g_strdown(tvb_get_ephemeral_string(tok->sub->tvb,tok->sub->offset,tok->sub->len)); tvbparse_elem_t* value = tok->sub->next->next->data; + int* hfidp; + int hfid; - name = name; + if(current_frame->ns && (hfidp = g_hash_table_lookup(current_frame->ns->attributes,g_strdown(name)) )) { + hfid = *hfidp; + } else { + hfid = hf_unknowwn_attrib; + value = tok; + } - current_frame->last_item = proto_tree_add_item(current_frame->tree,hf_attrib,value->tvb,value->offset,value->len,FALSE); - proto_item_set_text(current_frame->last_item, "%s", tvb_get_ephemeral_string(tok->tvb,tok->offset,tok->len)); + current_frame->last_item = proto_tree_add_item(current_frame->tree,hfid,value->tvb,value->offset,value->len,FALSE); + proto_item_set_text(current_frame->last_item, "%s", tvb_format_text(tok->tvb,tok->offset,tok->len)); } @@ -287,10 +351,10 @@ static void unrecognized_token(void* tvbparse_data, const void* wanted_data _U_, void init_xml_parser(void) { - tvbparse_wanted_t* want_name = tvbparse_chars(-1,0,0,"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-",NULL,NULL,NULL); + tvbparse_wanted_t* want_name = tvbparse_chars(-1,0,0,"abcdefghijklmnopqrstuvwxyz-_:ABCDEFGHIJKLMNOPQRSTUVWXYZ",NULL,NULL,NULL); tvbparse_wanted_t* want_attributes = tvbparse_one_or_more(-1, NULL, NULL, NULL, - tvbparse_set_seq(hf_attrib, NULL, NULL, after_attrib, + tvbparse_set_seq(-1, NULL, NULL, after_attrib, want_name, tvbparse_char(-1,"=",NULL,NULL,NULL), tvbparse_set_oneof(0, NULL, NULL, get_attrib_value, @@ -306,7 +370,7 @@ void init_xml_parser(void) { tvbparse_string(-1, "/>", NULL, NULL, after_closed_tag), NULL); - tvbparse_wanted_t* want_stopxmlpi = tvbparse_string(-1,"?>",NULL,NULL,NULL); + tvbparse_wanted_t* want_stopxmlpi = tvbparse_string(-1,"?>",NULL,NULL,after_xmlpi); want_ignore = tvbparse_chars(-1,0,0," \t\r\n",NULL,NULL,NULL); @@ -317,7 +381,7 @@ void init_xml_parser(void) { tvbparse_string(-1,"-->",NULL,NULL,NULL), TRUE), NULL), - tvbparse_set_seq(hf_xmlpi,NULL,before_xmpli,after_xmlpi, + tvbparse_set_seq(hf_xmlpi,NULL,before_xmpli,NULL, tvbparse_string(-1,"<?",NULL,NULL,NULL), want_name, tvbparse_set_oneof(-1,NULL,NULL,NULL, @@ -377,12 +441,7 @@ void init_xml_parser(void) { want_stoptag, NULL), NULL), - tvbparse_set_seq(hf_entity,NULL,NULL,after_token, - tvbparse_char(4,"&",NULL,NULL,NULL), - want_name, - tvbparse_char(4,";",NULL,NULL,NULL), - NULL), - tvbparse_not_chars(hf_cdata,0,0,"<",NULL,NULL,after_token), + tvbparse_not_chars(XML_CDATA,0,0,"<",NULL,NULL,after_token), tvbparse_not_chars(-1,0,0," \t\r\n",NULL,NULL,unrecognized_token), NULL); @@ -390,35 +449,139 @@ void init_xml_parser(void) { } +xml_names_t* xml_new_namespace(GHashTable* hash, gchar* name, gchar* longname, gchar* blurb, ...) { + xml_names_t* ns = g_malloc(sizeof(xml_names_t)); + va_list ap; + gchar* attr_name; + + ns->name = g_strdup(name); + ns->longname = g_strdup(longname); + ns->blurb = g_strdup(blurb); + ns->hf_tag = -1; + ns->hf_cdata = -1; + ns->ett = -1; + ns->attributes = g_hash_table_new(g_str_hash,g_str_equal); + ns->elements = g_hash_table_new(g_str_hash,g_str_equal); + + va_start(ap,blurb); + + while(( attr_name = va_arg(ap,gchar*) )) { + int* hfp = g_malloc(sizeof(int)); + *hfp = -1; + g_hash_table_insert(ns->attributes,g_strdup(attr_name),hfp); + }; + + va_end(ap); + + g_hash_table_insert(hash,ns->name,ns); + + return ns; +} + +void add_xml_attribute_names(gpointer k, gpointer v, gpointer p) { + gchar* basename = g_strdup_printf("%s.%s",(gchar*)p,(gchar*)k); + hf_register_info hfri; + + hfri.p_id = (int*)v; + hfri.hfinfo.name = basename; + hfri.hfinfo.abbrev = basename; + hfri.hfinfo.type = FT_STRING; + hfri.hfinfo.display = BASE_NONE; + hfri.hfinfo.strings = NULL; + hfri.hfinfo.bitmask = 0x0; + hfri.hfinfo.blurb = basename; + hfri.hfinfo.id = 0; + hfri.hfinfo.parent = 0; + hfri.hfinfo.ref_count = 0; + hfri.hfinfo.bitshift = 0; + hfri.hfinfo.same_name_next = NULL; + hfri.hfinfo.same_name_prev = NULL; + + g_array_append_val(hf,hfri); +} + +void add_xmlpi_namespace(gpointer k _U_, gpointer v, gpointer p) { + xml_names_t* ns = v; + hf_register_info hfri; + gchar* basename = g_strdup_printf("%s.%s",(gchar*)p,ns->name); + gint* ett_p = &(ns->ett); + + hfri.p_id = &(ns->hf_tag); + hfri.hfinfo.name = basename; + hfri.hfinfo.abbrev = basename; + hfri.hfinfo.type = FT_STRING; + hfri.hfinfo.display = BASE_NONE; + hfri.hfinfo.strings = NULL; + hfri.hfinfo.bitmask = 0x0; + hfri.hfinfo.blurb = basename; + hfri.hfinfo.id = 0; + hfri.hfinfo.parent = 0; + hfri.hfinfo.ref_count = 0; + hfri.hfinfo.bitshift = 0; + hfri.hfinfo.same_name_next = NULL; + hfri.hfinfo.same_name_prev = NULL; + + g_array_append_val(hf,hfri); + g_array_append_val(ett_arr,ett_p); + + g_hash_table_foreach(ns->attributes,add_xml_attribute_names,basename); + +} + +void init_xml_names(void) { + xml_names_t* xmlpi_xml_ns; + + xmpli_names = g_hash_table_new(g_str_hash,g_str_equal); + media_types = g_hash_table_new(g_str_hash,g_str_equal); + + unknown_ns.elements = g_hash_table_new(g_str_hash,g_str_equal); + unknown_ns.attributes = g_hash_table_new(g_str_hash,g_str_equal); + + xmlpi_xml_ns = xml_new_namespace(xmpli_names,"xml","XML XMLPI","XML XMLPI", + "version","encoding","standalone",NULL); + + g_hash_table_destroy(xmlpi_xml_ns->elements); + xmlpi_xml_ns->elements = NULL; + + g_hash_table_foreach(xmpli_names,add_xmlpi_namespace,"xml.xmlpi"); +} + + void proto_register_xml(void) { - static gint *ett[] = { - &ett_i, - &ett_tag, + static gint *ett_base[] = { + &unknown_ns.ett, + &xml_ns.ett, &ett_dtd, + &ett_xmpli }; - static hf_register_info hf[] = { - { &hf_cdata, {"CDATA", "xml.cdata", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}, + static hf_register_info hf_base[] = { { &hf_xmlpi, {"XMLPI", "xml.xmlpi", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}, - { &hf_entity, {"Entity", "xml.entity", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}, - { &hf_attrib, {"Attribute", "xml.attribute", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}, { &hf_comment, {"Comment", "xml.comment", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}, - { &hf_tag, {"Tag", "xml.tag", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}, + { &hf_unknowwn_attrib, {"Attribute", "xml.attribute", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}, { &hf_doctype, {"Doctype", "xml.doctype", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}, { &hf_dtd_tag, {"DTD Tag", "xml.dtdtag", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}, - { &hf_what, {"Unknown", "xml.unknown", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }} + { &unknown_ns.hf_cdata, {"CDATA", "xml.cdata", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}, + { &unknown_ns.hf_tag, {"Tag", "xml.tag", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}, + { &hf_junk, {"Unknown", "xml.unknown", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }} }; + + hf = g_array_new(FALSE,FALSE,sizeof(hf_register_info)); + ett_arr = g_array_new(FALSE,FALSE,sizeof(gint*)); + + g_array_append_vals(hf,hf_base,array_length(hf_base)); + g_array_append_vals(ett_arr,ett_base,array_length(ett_base)); - proto_xml = proto_register_protocol("eXtensible Markup Language", - "XML", - "xml"); - - proto_register_field_array(proto_xml, hf, array_length(hf)); - proto_register_subtree_array(ett, array_length(ett)); + init_xml_names(); + + xml_ns.hf_tag = proto_register_protocol(xml_ns.blurb, xml_ns.longname, xml_ns.name); + + proto_register_field_array(xml_ns.hf_tag, (hf_register_info*)hf->data, hf->len); + proto_register_subtree_array((gint**)ett_arr->data, ett_arr->len); - register_dissector("xml", dissect_xml, proto_xml); + register_dissector("xml", dissect_xml, xml_ns.hf_tag); init_xml_parser(); } diff --git a/epan/dtd.h b/epan/dtd.h new file mode 100644 index 0000000000..1db10fd371 --- /dev/null +++ b/epan/dtd.h @@ -0,0 +1,61 @@ +/* + * dtd.h + * + * XML dissector for ethereal + * DTD import declarations + * + * Copyright 2005, Luis E. Garcia Ontanon <luis.ontanon@gmail.com> + * + * $Id $ + * + * Ethereal - Network traffic analyzer + * By Gerald Combs <gerald@ethereal.com> + * Copyright 1998 Gerald Combs + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _DTD_H_ +#define _DTD_H_ + +#include <glib.h> + +typedef struct _dtd_build_data_t { + gchar* proto_name; + gchar* media_type; + gchar* description; + gchar* proto_root; + + GPtrArray* elements; + GPtrArray* attributes; + + gchar* location; + GString* error; +} dtd_build_data_t; + +typedef struct _dtd_token_data_t { + gchar* text; + gchar* location; +} dtd_token_data_t; + +typedef struct _dtd_named_list_t { + gchar* name; + GPtrArray* list; +} dtd_named_list_t; + +extern GString* dtd_preparse(gchar* dname, gchar* fname, GString* err); +extern dtd_build_data_t* dtd_parse(GString* s); + +#endif diff --git a/epan/dtd_grammar.lemon b/epan/dtd_grammar.lemon new file mode 100644 index 0000000000..fc98472c99 --- /dev/null +++ b/epan/dtd_grammar.lemon @@ -0,0 +1,151 @@ +%include { + +/* dtd_parser.lemon +* XML dissector for ethereal +* XML's DTD grammar +* +* Copyright 2005, Luis E. Garcia Ontanon <luis.ontanon@gmail.com> +* +* $Id $ +* +* Ethereal - Network traffic analyzer +* By Gerald Combs <gerald@ethereal.com> +* Copyright 1998 Gerald Combs +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License +* as published by the Free Software Foundation; either version 2 +* of the License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +*/ + +#include <glib.h> +#include "dtd.h" + + +static dtd_named_list_t* dtd_named_list_new(gchar* name, GPtrArray* list) { + dtd_named_list_t* nl = g_malloc(sizeof(dtd_named_list_t)); + + nl->name = name; + nl->list = list; + + return nl; +} + +static GPtrArray* g_ptr_array_join(GPtrArray* a, GPtrArray* b){ + + while(b->len > 0) { + g_ptr_array_add(a,g_ptr_array_remove_index_fast(b,0)); + } + + g_ptr_array_free(b,FALSE); + + return a; +} + +} + +%name DtdParse + +%extra_argument { dtd_build_data_t *bd } + +%token_destructor { + if ($$) { + if ($$->text) g_free($$->text); + if ($$->location) g_free($$->location); + g_free($$); + } +} + +%syntax_error { + if (!TOKEN) + g_string_sprintfa(bd->error,"syntax error at end of file"); + else + g_string_sprintfa(bd->error,"syntax error in %s at or before '%s': \n", bd->location,TOKEN->text); +} + +%parse_failure { + g_string_sprintfa(bd->error,"DTD parsing failure at %s\n",bd->location); +} + +%token_prefix TOKEN_ + +%token_type { dtd_token_data_t* } + +dtd ::= doctype. +dtd ::= dtd_parts. + +doctype ::= TAG_START DOCTYPE_KW NAME(Name) OPEN_BRACKET dtd_parts CLOSE_BRACKET TAG_STOP. { + bd->proto_name = g_strdown(g_strdup(Name->text)); +} + +dtd_parts ::= dtd_parts element(Element). { g_ptr_array_add(bd->elements,Element); } +dtd_parts ::= dtd_parts attlist(Attlist). { g_ptr_array_add(bd->attributes,Attlist); } +dtd_parts ::= element(Element). { g_ptr_array_add(bd->elements,Element); } +dtd_parts ::= attlist(Attlist). { g_ptr_array_add(bd->attributes,Attlist); } + +%type attlist { dtd_named_list_t* } +attlist(A) ::= TAG_START ATTLIST_KW NAME(B) attrib_list(TheList) TAG_STOP. { A = dtd_named_list_new(B->text,TheList); } + +%type element { dtd_named_list_t* } +element(A) ::= TAG_START ELEMENT_KW NAME(B) sub_elements(C) TAG_STOP. { A = dtd_named_list_new(B->text,C); } + +%type attrib_list { GPtrArray* } +attrib_list(A) ::= attrib_list(B) attrib(C). { g_ptr_array_add(B,C); A = B; } +attrib_list(A) ::= attrib(B). { A = g_ptr_array_new(); g_ptr_array_add(A,B); } + +%type attrib { gchar* } +attrib(A) ::= NAME(B) att_type att_default. { A = g_strdown(g_strdup(B->text)); } + +att_type ::= ATT_TYPE. +att_type ::= enumeration. + +att_default ::= ATT_DEF. +att_default ::= ATT_DEF_WITH_VALUE QUOTED. +att_default ::= QUOTED. +att_default ::= IMPLIED_KW. +att_default ::= REQUIRED_KW. + +enumeration ::= OPEN_PARENS enum_list CLOSE_PARENS. + +enum_list ::= enum_list PIPE enum_item. +enum_list ::= enum_item. +enum_list ::= enumeration. +enum_list ::= enum_list PIPE enumeration. + +enum_item ::= NAME. +enum_item ::= QUOTED. + + +%type sub_elements { GPtrArray* } +sub_elements(A) ::= sub_elements(B) STAR. {A=B;} +sub_elements(A) ::= sub_elements(B) PLUS. {A=B;} +sub_elements(A) ::= sub_elements(B) QUESTION. {A=B;} +sub_elements(A) ::= OPEN_PARENS ELEM_DATA CLOSE_PARENS. { A = g_ptr_array_new(); } +sub_elements(A) ::= OPEN_PARENS element_list(B) COMMA ELEM_DATA CLOSE_PARENS. { A = B; } +sub_elements(A) ::= OPEN_PARENS element_list(B) PIPE ELEM_DATA CLOSE_PARENS. { A = B; } +sub_elements(A) ::= OPEN_PARENS element_list(B) CLOSE_PARENS. { A = B; } +sub_elements(A) ::= EMPTY_KW. { A = g_ptr_array_new(); } + +%type element_list { GPtrArray* } +element_list(A) ::= element_list(B) COMMA element_child(C). { g_ptr_array_add(B,C); A = B; } +element_list(A) ::= element_list(B) PIPE element_child(C). { g_ptr_array_add(B,C); A = B; } +element_list(A) ::= element_child(B). { A = g_ptr_array_new(); g_ptr_array_add(A,B); } +element_list(A) ::= sub_elements(B). { A = B; } +element_list(A) ::= element_list(B) COMMA sub_elements(C). { A = g_ptr_array_join(B,C); } +element_list(A) ::= element_list(B) PIPE sub_elements(C). { A = g_ptr_array_join(B,C); } + +%type element_child { gchar* } +element_child(A) ::= NAME(B). { A = g_strdown(g_strdup(B->text)); } +element_child(A) ::= NAME(B) STAR. { A = g_strdown(g_strdup(B->text)); } +element_child(A) ::= NAME(B) QUESTION. { A = g_strdown(g_strdup(B->text)); } +element_child(A) ::= NAME(B) PLUS. { A = g_strdown(g_strdup(B->text)); } + diff --git a/epan/dtd_parse.l b/epan/dtd_parse.l new file mode 100644 index 0000000000..8c33ee26e6 --- /dev/null +++ b/epan/dtd_parse.l @@ -0,0 +1,316 @@ +%option noyywrap +%option nounput +%option outfile="dtd_parse.c" +%option prefix="Dtd_Parse_" +%option never-interactive + +%{ + + /* dtd_lexer.l + * an XML dissector for ethereal + * lexical analyzer for DTDs + * + * Copyright 2004, Luis E. Garcia Ontanon <luis.ontanon@gmail.com> + * + * $Id$ + * + * Ethereal - Network traffic analyzer + * By Gerald Combs <gerald@ethereal.com> + * Copyright 1998 Gerald Combs + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <glib.h> +#include <string.h> + +#include "dtd.h" +#include "dtd_grammar.h" + + struct _proto_xmlpi_attr { + gchar* name; + void (*act)(gchar*); + }; + + void DtdParse(void*,int,dtd_token_data_t*,dtd_build_data_t*); + void *DtdParseAlloc(void *(*)(gulong)); + void DtdParseFree( void*, void(*)(void*) ); + void DtdParseTrace(FILE *TraceFILE, char *zTracePrompt); + void* pParser; + GString* input_string; + guint offset; + guint len; + gchar* location; + gchar* attr_name; + + static int my_yyinput(char* buff,guint size); + + static dtd_token_data_t* new_token(gchar*); + + static dtd_build_data_t* build_data; + + static void set_proto_name (gchar* val) { if(build_data->proto_name) g_free(build_data->proto_name); build_data->proto_name = g_strdup(val); } + static void set_media_type (gchar* val) { if(build_data->media_type) g_free(build_data->media_type); build_data->media_type = g_strdup(val); } + static void set_proto_root (gchar* val) { if(build_data->proto_root) g_free(build_data->proto_root); build_data->proto_root = g_strdup(val); } + static void set_description (gchar* val) { if(build_data->description) g_free(build_data->description); build_data->description = g_strdup(val); } + + struct _proto_xmlpi_attr proto_attrs[] = + { + { "name", set_proto_name }, + { "media", set_media_type }, + { "root", set_proto_root }, + { "description", set_description }, + {NULL,NULL} + }; + +#define DTD_PARSE(token_type) \ + { build_data->location = location; \ + DtdParse(pParser, (token_type), new_token(yytext), build_data); \ + if(build_data->error->len > 0) yyterminate(); \ + } + +#define YY_INPUT(buff,result,max_size) ( (result) = my_yyinput((buff),(max_size)) ) + +%} + +start_xmlpi "<?" + +location_xmlpi "ethereal:location" +protocol_xmlpi "ethereal:protocol" + +get_attr_quote =[:blank:]*["] +avoid_editor_bug ["] + +get_location_xmlpi [^[:blank:]]+ + +stop_xmlpi "?>" + +special_start "<!" +special_stop ">" +whitespace [[:blank:]\r\n]+ +newline \n +attlist_kw ATTLIST +doctype_kw DOCTYPE +element_kw ELEMENT + +pcdata #PCDATA +any ANY +cdata #CDATA + +iD ID +idref IDREF +idrefs IDREFS +nmtoken NMTOKEN +nmtokens NMTOKENS +entity ENTITY +entities ENTITIES +notation NOTATION +cdata_t CDATA + +empty EMPTY +defaulT #DEFAULT +fixed #FIXED +required #REQUIRED +implied #IMPLIED + +star "*" +question "?" +plus "+" +open_parens "(" +close_parens ")" +open_bracket "[" +close_bracket "]" +comma "," +pipe "|" +dquote ["] + +name [a-z][-a-z0-9_]* +dquoted ["][^\"]*["] +squoted ['][^\']*['] + +%START DTD XMLPI LOCATION DONE PROTOCOL GET_ATTR_QUOTE GET_ATTR_VAL GET_ATTR_CLOSE_QUOTE +%% + +{whitespace} ; + +<DTD>{start_xmlpi} { + BEGIN XMLPI; +} + +<XMLPI>{location_xmlpi} { + if(location) g_free(location); + BEGIN LOCATION; +} + +<XMLPI>{protocol_xmlpi} { + BEGIN PROTOCOL; +} + +<XMLPI><.> ; +<XMLPI>{stop_xmlpi} BEGIN DTD; + +<LOCATION>{get_location_xmlpi} { + location = g_strdup(yytext); + BEGIN DONE; +} + +<DONE>{stop_xmlpi} BEGIN DTD; + +<PROTOCOL>{name} { + attr_name = g_strdup(yytext); + BEGIN GET_ATTR_QUOTE; +} + +<GET_ATTR_QUOTE>{get_attr_quote} { BEGIN GET_ATTR_VAL; } + +<GET_ATTR_QUOTE>. { + g_string_sprintfa(build_data->error, + "error in ethereal:protocol xmpli at %s : could not find attribute value!", + location); + yyterminate(); +} + +<GET_ATTR_VAL>[^"]+ { + /*"*/ + struct _proto_xmlpi_attr* pa; + gboolean got_it = FALSE; + + for(pa = proto_attrs; pa->name; pa++) { + if (g_strcasecmp(attr_name,pa->name) == 0) { + pa->act(yytext); + got_it = TRUE; + break; + } + } + + if (! got_it) { + g_string_sprintfa(build_data->error, + "error in ethereal:protocol xmpli at %s : no such parameter %s!", + location, attr_name); + g_free(attr_name); + yyterminate(); + } + + g_free(attr_name); + + BEGIN GET_ATTR_CLOSE_QUOTE; +} + +<GET_ATTR_CLOSE_QUOTE>{dquote} { BEGIN PROTOCOL;} + +<PROTOCOL>{stop_xmlpi} BEGIN DTD; + +<DTD>{special_start} { DTD_PARSE(TOKEN_TAG_START); } +<DTD>{special_stop} { DTD_PARSE(TOKEN_TAG_STOP); } + +<DTD>{attlist_kw} { DTD_PARSE(TOKEN_ATTLIST_KW); } +<DTD>{element_kw} { DTD_PARSE(TOKEN_ELEMENT_KW); } +<DTD>{doctype_kw} { DTD_PARSE(TOKEN_DOCTYPE_KW); } + +<DTD>{pcdata} { DTD_PARSE(TOKEN_ELEM_DATA); } +<DTD>{any} { DTD_PARSE(TOKEN_ELEM_DATA); } +<DTD>{cdata} { DTD_PARSE(TOKEN_ELEM_DATA); } +<DTD>{empty} { DTD_PARSE(TOKEN_EMPTY_KW); } + +<DTD>{iD} { DTD_PARSE(TOKEN_ATT_TYPE); } +<DTD>{idref} { DTD_PARSE(TOKEN_ATT_TYPE); } +<DTD>{idrefs} { DTD_PARSE(TOKEN_ATT_TYPE); } +<DTD>{nmtoken} { DTD_PARSE(TOKEN_ATT_TYPE); } +<DTD>{nmtokens} { DTD_PARSE(TOKEN_ATT_TYPE); } +<DTD>{entity} { DTD_PARSE(TOKEN_ATT_TYPE); } +<DTD>{entities} { DTD_PARSE(TOKEN_ATT_TYPE); } +<DTD>{notation} { DTD_PARSE(TOKEN_ATT_TYPE); } +<DTD>{cdata_t} { DTD_PARSE(TOKEN_ATT_TYPE); } +<DTD>{defaulT} { DTD_PARSE(TOKEN_ATT_DEF_WITH_VALUE); } +<DTD>{fixed} { DTD_PARSE(TOKEN_ATT_DEF_WITH_VALUE); } +<DTD>{required} { DTD_PARSE(TOKEN_ATT_DEF); } +<DTD>{implied} { DTD_PARSE(TOKEN_ATT_DEF); } + +<DTD>{star} { DTD_PARSE(TOKEN_STAR); } +<DTD>{question} { DTD_PARSE(TOKEN_QUESTION); } +<DTD>{plus} { DTD_PARSE(TOKEN_PLUS); } +<DTD>{comma} { DTD_PARSE(TOKEN_COMMA); } +<DTD>{open_parens} { DTD_PARSE(TOKEN_OPEN_PARENS); } +<DTD>{close_parens} { DTD_PARSE(TOKEN_CLOSE_PARENS); } +<DTD>{open_bracket} { DTD_PARSE(TOKEN_OPEN_BRACKET); } +<DTD>{close_bracket} { DTD_PARSE(TOKEN_CLOSE_BRACKET); } +<DTD>{pipe} { DTD_PARSE(TOKEN_PIPE); } + +<DTD>{dquoted} | +<DTD>{squoted} { DTD_PARSE(TOKEN_QUOTED); } +<DTD>{name} { DTD_PARSE(TOKEN_NAME); } + +%% + +static dtd_token_data_t* new_token(gchar* text) { + dtd_token_data_t* t = g_malloc(sizeof(dtd_token_data_t)); + + t->text = g_strdup(text); + t->location = g_strdup(location); + + return t; +} + + + +static int my_yyinput(char* buff, guint size) { + + if (offset >= len ) { + return YY_NULL; + } else if ( offset + size <= len ) { + memcpy(buff, input_string->str + offset,size); + offset += size; + return size; + } else { + size = len - offset; + memcpy(buff, input_string->str + offset,size); + offset = len; + return size; + } +} + +extern dtd_build_data_t* dtd_parse(GString* s) { + + input_string = s; + offset = 0; + len = input_string->len; + + pParser = DtdParseAlloc(g_malloc); + + build_data = g_malloc(sizeof(dtd_build_data_t)); + + build_data->proto_name = NULL; + build_data->media_type = NULL; + build_data->description = NULL; + build_data->proto_root = NULL; + + build_data->elements = g_ptr_array_new(); + build_data->attributes = g_ptr_array_new(); + + build_data->location = NULL; + build_data->error = g_string_new(""); + + BEGIN DTD; + + yylex(); + + DtdParse(pParser, 0, NULL,build_data); + + yyrestart(NULL); + + DtdParseFree(pParser, g_free ); + + return build_data; +} diff --git a/epan/dtd_preparse.l b/epan/dtd_preparse.l new file mode 100644 index 0000000000..101a9161a4 --- /dev/null +++ b/epan/dtd_preparse.l @@ -0,0 +1,258 @@ +%option noyywrap +%option nounput +%option prefix="Dtd_PreParse_" +%option never-interactive +%option caseless +%option outfile="dtd_preparse.c" + +%{ + /* + * dtd_preparser.l + * + * an XML dissector for ethereal + * + * DTD Preparser - import a dtd file into a GString + * including files, removing comments + * and resolving %entities; + * + * Copyright 2004, Luis E. Garcia Ontanon <luis.ontanon@gmail.com> + * + * $Id$ + * + * Ethereal - Network traffic analyzer + * By Gerald Combs <gerald@ethereal.com> + * Copyright 1998 Gerald Combs + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <glib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include "dtd.h" + +#define MAX_INCLUDE_DEPTH 10 +YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH]; +int include_stack_ptr = 0; + +#define ECHO g_string_append(current,yytext); + +GString* current; +GString* output; +GHashTable* entities; +gchar* entity_name; +GString* error; + +gchar* dirname; +gchar* filename; +guint linenum; + +static gchar* replace_entity(gchar* s); +static const gchar* location(void); +static gchar* load_entity_file(gchar* filename); + /* [:blank:]+file[:blank:]*=[:blank:]*["] */ + +%} +xmlpi_start "<?" +xmlpi_stop "?>" +xmlpi_chars . + +comment_start "<!--" +comment_stop "-->" +special_start "<!" +special_stop ">" + +entity_start "<!"[[:blank:]\n]*entity[[:blank:]\n]*"%" +system SYSTEM +filename [^"]+ + + +name [A-Za-z][-:A-Za-z0-9_]* + +quote "\"" +percent [%] +escaped_quote "\\\"" +non_quote [^"%]+ + +avoid_editor_bug ["] + +entity [%&][A-Za-z][-A-Za-z0-9_]*; + +whitespace [[blank:]]+ +newline \n +%START OUTSIDE IN_COMMENT IN_ENTITY NAMED_ENTITY IN_QUOTE ENTITY_DONE GET_FNAME_OPEN_QUOTE GET_FNAME GET_FNAME_CLOSE_QUOTE XMLPI +%% + + +{entity} if (current) g_string_sprintfa(current,"%s\n%s\n",replace_entity(yytext),location()); + +{whitespace} if (current) g_string_append(current," "); + +<OUTSIDE>{xmlpi_start} { g_string_append(current,yytext); BEGIN XMLPI; } +<XMLPI>{xmlpi_chars} { g_string_append(current,yytext); } +<XMLPI>{newline} { g_string_append(current,yytext); } +<XMLPI>{xmlpi_stop} { g_string_append(current,yytext); BEGIN OUTSIDE; } + +<OUTSIDE>{comment_start} { current = NULL; BEGIN IN_COMMENT; } +<IN_COMMENT>[^-]? | +<IN_COMMENT>[-] ; +<IN_COMMENT>{comment_stop} { current = output; BEGIN OUTSIDE; } + +{newline} { + linenum++; + if (current) g_string_sprintfa(current,"%s\n",location()); +} + + +<OUTSIDE>{entity_start} { BEGIN IN_ENTITY; } +<IN_ENTITY>{name} { entity_name = g_strdup_printf("%%%s;",yytext); BEGIN NAMED_ENTITY; } +<NAMED_ENTITY>{quote} { current = g_string_new(location()); BEGIN IN_QUOTE; } +<IN_QUOTE>{quote} { g_hash_table_insert(entities,entity_name,current); BEGIN ENTITY_DONE; } +<IN_QUOTE>{percent} | +<IN_QUOTE>{non_quote} | +<IN_QUOTE>{escaped_quote} g_string_append(current,yytext); +<NAMED_ENTITY>{system} { BEGIN GET_FNAME_OPEN_QUOTE; } +<GET_FNAME_OPEN_QUOTE>{quote} { BEGIN GET_FNAME; } +<GET_FNAME>{filename} { g_hash_table_insert(entities,entity_name,load_entity_file(yytext)); BEGIN GET_FNAME_CLOSE_QUOTE; } +<GET_FNAME_CLOSE_QUOTE>{quote} { BEGIN ENTITY_DONE; } +<ENTITY_DONE>{special_stop} { current = output; g_string_append(current,"\n"); BEGIN OUTSIDE; } + +%% + +static gchar* load_entity_file(gchar* fname) { + gchar* fullname = g_strdup_printf("%s%s",dirname,fname); + gchar* save_filename = filename; + guint save_linenum = linenum; + FILE* fp = fopen(fullname,"r"); + GString* filetext; + gchar* retstr; + gchar* line; + size_t linelen; + + g_free(fullname); + + if (!fp) { + g_string_sprintfa(error,"at %s:%u: could not load file %s: %s", filename, linenum, fname, strerror(errno)); + return ""; + } + + filename = fname; + linenum = 1; + + filetext = g_string_new(location()); + + while(( line = fgetln(fp,&linelen) )) { + g_string_append(filetext,location()); + g_string_append_len(filetext,line,linelen); + linenum++; + } + + retstr = filetext->str; + g_string_free(filetext,FALSE); + + if ( ferror(fp) ) { + g_string_sprintfa(error,"at %s:%u: problem reading file %s: %s", filename, linenum, fname, strerror(errno)); + } + + filename = save_filename; + save_linenum = linenum; + + return retstr; +} + +static gchar* replace_entity(gchar* entity) { + GString* replacement; + + *entity = '%'; + + replacement = g_hash_table_lookup(entities,entity); + + if (replacement) { + return replacement->str; + } else { + g_string_sprintfa(error,"dtd_preparse: in file '%s': %s does not exists\n", filename, entity); + return ""; + } + +} + +static const gchar* location(void) { + static GString* loc = NULL; + guint i = include_stack_ptr + 1; + + if (loc) { + g_string_truncate(loc,0); + } else { + loc = g_string_new(""); + } + + g_string_sprintfa(loc,"<? ethereal:location "); + + while (i--) { + g_string_sprintfa(loc, "%s:%u from", + filename, + linenum); + } + + g_string_truncate(loc,(loc->len) - 4); + + g_string_sprintfa(loc,"?>"); + + return loc->str; +} + +static gboolean free_gstring_hash_items(gpointer k,gpointer v,gpointer p _U_) { + g_free(k); + g_string_free(v,TRUE); + return TRUE; +} + +extern GString* dtd_preparse(gchar* dname, gchar* fname, GString* err) { + gchar* fullname = g_strdup_printf("%s%s",dname,fname); + + dirname = dname; + filename = fname; + + yyin = fopen(fullname,"r"); + + g_free(fullname); + + if (!yyin) { + if (err) + g_string_sprintfa(err, "Could not open file: '%s', error: %s",filename,strerror(errno)); + + return NULL; + } + + filename = filename; + linenum = 1; + + error = err; + + entities = g_hash_table_new(g_str_hash,g_str_equal); + current = output = g_string_new(location()); + + BEGIN OUTSIDE; + + yylex(); + + yyrestart(NULL); + + g_hash_table_foreach_remove(entities,free_gstring_hash_items,NULL); + g_hash_table_destroy(entities); + + return output; +} |