aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLuis Ontanon <luis.ontanon@gmail.com>2005-09-10 17:29:15 +0000
committerLuis Ontanon <luis.ontanon@gmail.com>2005-09-10 17:29:15 +0000
commit96326c0b8617db336cb85d122b1e1e5a00644f2b (patch)
treeac7c2065c289737d9c131c768c1c153e55747890
parent541fd750b86a0fa3666c4ec2d917ff6c23a332f9 (diff)
the dtd parser (still missing the glue) and few fixes to packet-xml.c
svn path=/trunk/; revision=15745
-rw-r--r--epan/Makefile.am21
-rw-r--r--epan/Makefile.common5
-rw-r--r--epan/Makefile.nmake14
-rw-r--r--epan/dissectors/packet-xml.c307
-rw-r--r--epan/dtd.h61
-rw-r--r--epan/dtd_grammar.lemon151
-rw-r--r--epan/dtd_parse.l316
-rw-r--r--epan/dtd_preparse.l258
8 files changed, 1061 insertions, 72 deletions
diff --git a/epan/Makefile.am b/epan/Makefile.am
index 56efd3acdb..6faee21134 100644
--- a/epan/Makefile.am
+++ b/epan/Makefile.am
@@ -50,6 +50,9 @@ EXTRA_libethereal_la_SOURCES = \
inet_v6defs.h
EXTRA_DIST = \
+ dtd_grammar.lemon \
+ dtd_parse.l \
+ dtd_preparse.l \
enterprise-numbers \
libethereal.def \
Makefile.common \
@@ -66,7 +69,12 @@ CLEANFILES = \
*~
DISTCLEANFILES = \
+ dtd_grammar.c \
+ dtd_grammar.h \
+ dtd_parse.c \
+ dtd_preparse.c \
radius_dict.c
+
MAINTAINERCLEANFILES = \
Makefile.in
@@ -85,7 +93,20 @@ exntest: exntest.o except.o
radius_dict.c: radius_dict.l
$(LEX) $^
+
+dtd_parse.c : dtd_parse.l
+ $(LEX) -odtd_parse.c $(srcdir)/dtd_parse.l
+dtd_preparse.c : dtd_preparse.l
+ $(LEX) -odtd_preparse.c $(srcdir)/dtd_preparse.l
+
+dtd_grammar.h: dtd_grammar.c
+
+LEMON=../tools/lemon
+
+dtd_grammar.c: dtd_grammar.lemon
+ $(LEMON)/lemon t=$(srcdir)/$(LEMON)/lempar.c $^
+
tvbtest.o exntest.o: exceptions.h
sminmpec.c: enterprise-numbers make-sminmpec.pl
diff --git a/epan/Makefile.common b/epan/Makefile.common
index 2f20c4c08d..150140a00a 100644
--- a/epan/Makefile.common
+++ b/epan/Makefile.common
@@ -43,6 +43,9 @@ LIBETHEREAL_SRC = \
crypt-md4.c \
crypt-md5.c \
crypt-rc4.c \
+ dtd_grammar.c \
+ dtd_parse.c \
+ dtd_preparse.c \
emem.c \
epan.c \
except.c \
@@ -105,6 +108,8 @@ LIBETHEREAL_INCLUDES = \
crypt-md4.h \
crypt-md5.h \
crypt-rc4.h \
+ dtd.h \
+ dtd_grammar.h \
emem.h \
epan.h \
epan_dissect.h \
diff --git a/epan/Makefile.nmake b/epan/Makefile.nmake
index a5d9e98906..a6f05223ee 100644
--- a/epan/Makefile.nmake
+++ b/epan/Makefile.nmake
@@ -153,3 +153,17 @@ radius_dict.c : radius_dict.l
sminmpec.c: enterprise-numbers make-sminmpec.pl
$(PERL) make-sminmpec.pl enterprise-numbers sminmpec.c
+
+dtd_parse.c : dtd_parse.l
+ $(LEX) -odtd_parse.c $(srcdir)/dtd_parse.l
+
+dtd_preparse.c : dtd_preparse.l
+ $(LEX) -odtd_preparse.c $(srcdir)/dtd_preparse.l
+
+dtd_grammar.h: dtd_grammar.c
+
+LEMON=../tools/lemon
+
+dtd_grammar.c: dtd_grammar.lemon
+ $(LEMON)/lemon t=$(srcdir)/$(LEMON)/lempar.c $^
+
diff --git a/epan/dissectors/packet-xml.c b/epan/dissectors/packet-xml.c
index ca4d2a4f1e..b75dfee282 100644
--- a/epan/dissectors/packet-xml.c
+++ b/epan/dissectors/packet-xml.c
@@ -36,34 +36,46 @@
#include <string.h>
#include <stdarg.h>
+#include <stdio.h>
+
#include <glib.h>
#include <epan/emem.h>
#include <epan/packet.h>
#include <epan/strutil.h>
#include <epan/tvbparse.h>
+#include <epan/dtd.h>
+
+typedef struct _xml_names_t {
+ gchar* name;
+ gchar* longname;
+ gchar* blurb;
+ int hf_tag;
+ int hf_cdata;
+ gint ett;
+
+ gboolean is_root;
+
+ GHashTable* attributes;
+ GHashTable* elements;
+} xml_names_t;
typedef struct {
proto_tree* tree;
proto_item* item;
proto_item* last_item;
+ xml_names_t* ns;
int start_offset;
} xml_frame_t;
-static int proto_xml = -1;
-
-static gint ett_i = -1;
-static gint ett_tag = -1;
static gint ett_dtd = -1;
+static gint ett_xmpli = -1;
-static int hf_what = -1;
-static int hf_attrib = -1;
-static int hf_cdata = -1;
+static int hf_junk = -1;
+static int hf_unknowwn_attrib = -1;
static int hf_comment = -1;
static int hf_xmlpi = -1;
-static int hf_tag = -1;
static int hf_dtd_tag = -1;
static int hf_doctype = -1;
-static int hf_entity = -1;
/* Dissector handles */
static dissector_handle_t xml_handle;
@@ -72,11 +84,20 @@ static dissector_handle_t xml_handle;
static tvbparse_wanted_t* want;
static tvbparse_wanted_t* want_ignore;
+static GHashTable* xmpli_names;
+static GHashTable* media_types;
+static xml_names_t xml_ns = {"xml","eXtesible Markup Language","XML",-1,-1,-1,TRUE,NULL,NULL};
+static xml_names_t unknown_ns = {"","","",-1,-1,-1,TRUE,NULL,NULL};
+static xml_names_t* root_ns;
+#define XML_CDATA -1000
+
+GArray* hf;
+GArray* ett_arr;
static void
-dissect_xml(tvbuff_t *tvb, packet_info *pinfo _U_, proto_tree *tree)
+dissect_xml(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree)
{
tvbparse_t* tt;
tvbparse_elem_t* tok = NULL;
@@ -93,12 +114,20 @@ dissect_xml(tvbuff_t *tvb, packet_info *pinfo _U_, proto_tree *tree)
g_ptr_array_add(stack,current_frame);
tt = tvbparse_init(tvb,0,-1,stack,want_ignore);
+ current_frame->start_offset = 0;
+
+ root_ns = g_hash_table_lookup(media_types,pinfo->match_string);
+
+ if (! root_ns ) {
+ root_ns = &unknown_ns;
+ }
+
+ current_frame->ns = root_ns;
- current_frame->item = proto_tree_add_item(tree,proto_xml,tvb,0,-1,FALSE);
- current_frame->tree = proto_item_add_subtree(current_frame->item,ett_i);
+ current_frame->item = proto_tree_add_item(tree,xml_ns.hf_tag,tvb,0,-1,FALSE);
+ current_frame->tree = proto_item_add_subtree(current_frame->item,xml_ns.ett);
current_frame->last_item = current_frame->item;
- current_frame->start_offset = 0;
-
+
while(( tok = tvbparse_get(tt, want) )) ;
}
@@ -109,15 +138,18 @@ static void after_token(void* tvbparse_data, const void* wanted_data _U_, tvbpar
int hfid;
proto_item* pi;
- if (tok->id > 0)
+ if (tok->id == XML_CDATA) {
+ hfid = current_frame->ns->hf_cdata;
+ } else if ( tok->id > 0) {
hfid = tok->id;
- else
- hfid = hf_what;
+ } else {
+ hfid = hf_junk;
+ }
pi = proto_tree_add_item(current_frame->tree, hfid, tok->tvb, tok->offset, tok->len, FALSE);
proto_item_set_text(pi, "%s",
- tvb_get_ephemeral_string(tok->tvb,tok->offset,tok->len));
+ tvb_format_text(tok->tvb,tok->offset,tok->len));
}
static void before_xmpli(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
@@ -126,18 +158,32 @@ static void before_xmpli(void* tvbparse_data, const void* wanted_data _U_, tvbpa
proto_item* pi;
proto_tree* pt;
tvbparse_elem_t* name_tok = tok->sub->next;
+ gchar* name = g_strdown(tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len));
+ xml_names_t* ns = g_hash_table_lookup(xmpli_names,name);
+ int hf_tag;
+ gint ett;
+
+ if (!ns) {
+ hf_tag = hf_xmlpi;
+ ett = ett_xmpli;
+ } else {
+ hf_tag = ns->hf_tag;
+ ett = ns->ett;
+ }
- pi = proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,-1,
- "<? %s",
- tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len));
- pt = proto_item_add_subtree(pi,ett_tag);
+ pi = proto_tree_add_item(current_frame->tree,hf_tag,tok->tvb,tok->offset,tok->len,FALSE);
+
+ proto_item_set_text(pi,tvb_format_text(tok->tvb,tok->offset,(name_tok->offset - tok->offset) + name_tok->len));
+
+ pt = proto_item_add_subtree(pi,ett);
current_frame = ep_alloc(sizeof(xml_frame_t));
current_frame->item = pi;
current_frame->last_item = pi;
current_frame->tree = pt;
current_frame->start_offset = tok->offset;
-
+ current_frame->ns = ns;
+
g_ptr_array_add(stack,current_frame);
}
@@ -146,42 +192,53 @@ static void after_xmlpi(void* tvbparse_data, const void* wanted_data _U_, tvbpar
GPtrArray* stack = tvbparse_data;
xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
- proto_item_set_len(current_frame->item, (tok->offset - current_frame->start_offset) + tok->len);
- proto_item_append_text(current_frame->last_item," ?>");
+ proto_tree_add_text(current_frame->tree,
+ tok->tvb, tok->offset, tok->len,
+ tvb_format_text(tok->tvb,tok->offset,tok->len));
if (stack->len > 1) {
g_ptr_array_remove_index_fast(stack,stack->len - 1);
} else {
- proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"[ ERROR: Closing an unopened tag ]");
+ proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"[ ERROR: Closing an unopened xmpli tag ]");
}
}
static void before_tag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
GPtrArray* stack = tvbparse_data;
xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
+ tvbparse_elem_t* name_tok = tok->sub->next;
+ gchar* name = g_strdown(tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len));
+ xml_names_t* ns = g_hash_table_lookup(current_frame->ns->elements,name);
+ xml_frame_t* new_frame;
proto_item* pi;
proto_tree* pt;
- tvbparse_elem_t* name_tok = tok->sub->next;
- gchar* name = tvb_get_ephemeral_string(name_tok->tvb,name_tok->offset,name_tok->len);
- pi = proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,-1,"<%s",name);
- pt = proto_item_add_subtree(pi,ett_tag);
+ if (!ns) {
+ if (! ( ns = g_hash_table_lookup(root_ns->elements,name) ) ) {
+ ns = &unknown_ns;
+ }
+ }
- current_frame = ep_alloc(sizeof(xml_frame_t));
- current_frame->item = pi;
- current_frame->last_item = pi;
- current_frame->tree = pt;
- current_frame->start_offset = tok->offset;
+ pi = proto_tree_add_item(current_frame->tree,ns->hf_tag,tok->tvb,tok->offset,tok->len,FALSE);
+ proto_item_set_text(pi,tvb_format_text(tok->tvb,tok->offset,(name_tok->offset - tok->offset) + name_tok->len));
- g_ptr_array_add(stack,current_frame);
+ pt = proto_item_add_subtree(pi,ns->ett);
+
+ new_frame = ep_alloc(sizeof(xml_frame_t));
+ new_frame->item = pi;
+ new_frame->last_item = pi;
+ new_frame->tree = pt;
+ new_frame->start_offset = tok->offset;
+ new_frame->ns = ns;
+
+ g_ptr_array_add(stack,new_frame);
}
-static void after_open_tag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
+static void after_open_tag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok _U_) {
GPtrArray* stack = tvbparse_data;
xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
- proto_item_set_len(current_frame->item, (tok->offset - current_frame->start_offset) + tok->len);
proto_item_append_text(current_frame->last_item,">");
}
@@ -189,7 +246,6 @@ static void after_closed_tag(void* tvbparse_data, const void* wanted_data _U_, t
GPtrArray* stack = tvbparse_data;
xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
- proto_item_set_len(current_frame->item, (tok->offset - current_frame->start_offset) + tok->len);
proto_item_append_text(current_frame->last_item,"/>");
if (stack->len > 1) {
@@ -206,7 +262,7 @@ void after_untag(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem
proto_item_set_len(current_frame->item, (tok->offset - current_frame->start_offset) + tok->len);
proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"%s",
- tvb_get_ephemeral_string(tok->tvb,tok->offset,tok->len));
+ tvb_format_text(tok->tvb,tok->offset,tok->len));
if (stack->len > 1) {
g_ptr_array_remove_index_fast(stack,stack->len - 1);
@@ -223,13 +279,14 @@ static void before_dtd_doctype(void* tvbparse_data, const void* wanted_data _U_,
proto_tree* dtd_item = proto_tree_add_item(current_frame->tree, hf_doctype,
name_tok->tvb, name_tok->offset, name_tok->len, FALSE);
- proto_item_set_text(dtd_item,"%s",tvb_get_ephemeral_string(tok->tvb,tok->offset,tok->len));
+ proto_item_set_text(dtd_item,"%s",tvb_format_text(tok->tvb,tok->offset,tok->len));
current_frame = ep_alloc(sizeof(xml_frame_t));
current_frame->item = dtd_item;
current_frame->last_item = dtd_item;
current_frame->tree = proto_item_add_subtree(dtd_item,ett_dtd);
current_frame->start_offset = tok->offset;
+ current_frame->ns = NULL;
g_ptr_array_add(stack,current_frame);
}
@@ -251,7 +308,7 @@ static void after_dtd_close(void* tvbparse_data, const void* wanted_data _U_, tv
xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
proto_tree_add_text(current_frame->tree,tok->tvb,tok->offset,tok->len,"%s",
- tvb_get_ephemeral_string(tok->tvb,tok->offset,tok->len));
+ tvb_format_text(tok->tvb,tok->offset,tok->len));
if (stack->len > 1) {
g_ptr_array_remove_index_fast(stack,stack->len - 1);
} else {
@@ -266,13 +323,20 @@ static void get_attrib_value(void* tvbparse_data _U_, const void* wanted_data _U
static void after_attrib(void* tvbparse_data, const void* wanted_data _U_, tvbparse_elem_t* tok) {
GPtrArray* stack = tvbparse_data;
xml_frame_t* current_frame = g_ptr_array_index(stack,stack->len - 1);
- gchar* name = tvb_get_ephemeral_string(tok->sub->tvb,tok->sub->offset,tok->sub->len);
+ gchar* name = g_strdown(tvb_get_ephemeral_string(tok->sub->tvb,tok->sub->offset,tok->sub->len));
tvbparse_elem_t* value = tok->sub->next->next->data;
+ int* hfidp;
+ int hfid;
- name = name;
+ if(current_frame->ns && (hfidp = g_hash_table_lookup(current_frame->ns->attributes,g_strdown(name)) )) {
+ hfid = *hfidp;
+ } else {
+ hfid = hf_unknowwn_attrib;
+ value = tok;
+ }
- current_frame->last_item = proto_tree_add_item(current_frame->tree,hf_attrib,value->tvb,value->offset,value->len,FALSE);
- proto_item_set_text(current_frame->last_item, "%s", tvb_get_ephemeral_string(tok->tvb,tok->offset,tok->len));
+ current_frame->last_item = proto_tree_add_item(current_frame->tree,hfid,value->tvb,value->offset,value->len,FALSE);
+ proto_item_set_text(current_frame->last_item, "%s", tvb_format_text(tok->tvb,tok->offset,tok->len));
}
@@ -287,10 +351,10 @@ static void unrecognized_token(void* tvbparse_data, const void* wanted_data _U_,
void init_xml_parser(void) {
- tvbparse_wanted_t* want_name = tvbparse_chars(-1,0,0,"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-",NULL,NULL,NULL);
+ tvbparse_wanted_t* want_name = tvbparse_chars(-1,0,0,"abcdefghijklmnopqrstuvwxyz-_:ABCDEFGHIJKLMNOPQRSTUVWXYZ",NULL,NULL,NULL);
tvbparse_wanted_t* want_attributes = tvbparse_one_or_more(-1, NULL, NULL, NULL,
- tvbparse_set_seq(hf_attrib, NULL, NULL, after_attrib,
+ tvbparse_set_seq(-1, NULL, NULL, after_attrib,
want_name,
tvbparse_char(-1,"=",NULL,NULL,NULL),
tvbparse_set_oneof(0, NULL, NULL, get_attrib_value,
@@ -306,7 +370,7 @@ void init_xml_parser(void) {
tvbparse_string(-1, "/>", NULL, NULL, after_closed_tag),
NULL);
- tvbparse_wanted_t* want_stopxmlpi = tvbparse_string(-1,"?>",NULL,NULL,NULL);
+ tvbparse_wanted_t* want_stopxmlpi = tvbparse_string(-1,"?>",NULL,NULL,after_xmlpi);
want_ignore = tvbparse_chars(-1,0,0," \t\r\n",NULL,NULL,NULL);
@@ -317,7 +381,7 @@ void init_xml_parser(void) {
tvbparse_string(-1,"-->",NULL,NULL,NULL),
TRUE),
NULL),
- tvbparse_set_seq(hf_xmlpi,NULL,before_xmpli,after_xmlpi,
+ tvbparse_set_seq(hf_xmlpi,NULL,before_xmpli,NULL,
tvbparse_string(-1,"<?",NULL,NULL,NULL),
want_name,
tvbparse_set_oneof(-1,NULL,NULL,NULL,
@@ -377,12 +441,7 @@ void init_xml_parser(void) {
want_stoptag,
NULL),
NULL),
- tvbparse_set_seq(hf_entity,NULL,NULL,after_token,
- tvbparse_char(4,"&",NULL,NULL,NULL),
- want_name,
- tvbparse_char(4,";",NULL,NULL,NULL),
- NULL),
- tvbparse_not_chars(hf_cdata,0,0,"<",NULL,NULL,after_token),
+ tvbparse_not_chars(XML_CDATA,0,0,"<",NULL,NULL,after_token),
tvbparse_not_chars(-1,0,0," \t\r\n",NULL,NULL,unrecognized_token),
NULL);
@@ -390,35 +449,139 @@ void init_xml_parser(void) {
}
+xml_names_t* xml_new_namespace(GHashTable* hash, gchar* name, gchar* longname, gchar* blurb, ...) {
+ xml_names_t* ns = g_malloc(sizeof(xml_names_t));
+ va_list ap;
+ gchar* attr_name;
+
+ ns->name = g_strdup(name);
+ ns->longname = g_strdup(longname);
+ ns->blurb = g_strdup(blurb);
+ ns->hf_tag = -1;
+ ns->hf_cdata = -1;
+ ns->ett = -1;
+ ns->attributes = g_hash_table_new(g_str_hash,g_str_equal);
+ ns->elements = g_hash_table_new(g_str_hash,g_str_equal);
+
+ va_start(ap,blurb);
+
+ while(( attr_name = va_arg(ap,gchar*) )) {
+ int* hfp = g_malloc(sizeof(int));
+ *hfp = -1;
+ g_hash_table_insert(ns->attributes,g_strdup(attr_name),hfp);
+ };
+
+ va_end(ap);
+
+ g_hash_table_insert(hash,ns->name,ns);
+
+ return ns;
+}
+
+void add_xml_attribute_names(gpointer k, gpointer v, gpointer p) {
+ gchar* basename = g_strdup_printf("%s.%s",(gchar*)p,(gchar*)k);
+ hf_register_info hfri;
+
+ hfri.p_id = (int*)v;
+ hfri.hfinfo.name = basename;
+ hfri.hfinfo.abbrev = basename;
+ hfri.hfinfo.type = FT_STRING;
+ hfri.hfinfo.display = BASE_NONE;
+ hfri.hfinfo.strings = NULL;
+ hfri.hfinfo.bitmask = 0x0;
+ hfri.hfinfo.blurb = basename;
+ hfri.hfinfo.id = 0;
+ hfri.hfinfo.parent = 0;
+ hfri.hfinfo.ref_count = 0;
+ hfri.hfinfo.bitshift = 0;
+ hfri.hfinfo.same_name_next = NULL;
+ hfri.hfinfo.same_name_prev = NULL;
+
+ g_array_append_val(hf,hfri);
+}
+
+void add_xmlpi_namespace(gpointer k _U_, gpointer v, gpointer p) {
+ xml_names_t* ns = v;
+ hf_register_info hfri;
+ gchar* basename = g_strdup_printf("%s.%s",(gchar*)p,ns->name);
+ gint* ett_p = &(ns->ett);
+
+ hfri.p_id = &(ns->hf_tag);
+ hfri.hfinfo.name = basename;
+ hfri.hfinfo.abbrev = basename;
+ hfri.hfinfo.type = FT_STRING;
+ hfri.hfinfo.display = BASE_NONE;
+ hfri.hfinfo.strings = NULL;
+ hfri.hfinfo.bitmask = 0x0;
+ hfri.hfinfo.blurb = basename;
+ hfri.hfinfo.id = 0;
+ hfri.hfinfo.parent = 0;
+ hfri.hfinfo.ref_count = 0;
+ hfri.hfinfo.bitshift = 0;
+ hfri.hfinfo.same_name_next = NULL;
+ hfri.hfinfo.same_name_prev = NULL;
+
+ g_array_append_val(hf,hfri);
+ g_array_append_val(ett_arr,ett_p);
+
+ g_hash_table_foreach(ns->attributes,add_xml_attribute_names,basename);
+
+}
+
+void init_xml_names(void) {
+ xml_names_t* xmlpi_xml_ns;
+
+ xmpli_names = g_hash_table_new(g_str_hash,g_str_equal);
+ media_types = g_hash_table_new(g_str_hash,g_str_equal);
+
+ unknown_ns.elements = g_hash_table_new(g_str_hash,g_str_equal);
+ unknown_ns.attributes = g_hash_table_new(g_str_hash,g_str_equal);
+
+ xmlpi_xml_ns = xml_new_namespace(xmpli_names,"xml","XML XMLPI","XML XMLPI",
+ "version","encoding","standalone",NULL);
+
+ g_hash_table_destroy(xmlpi_xml_ns->elements);
+ xmlpi_xml_ns->elements = NULL;
+
+ g_hash_table_foreach(xmpli_names,add_xmlpi_namespace,"xml.xmlpi");
+}
+
+
void
proto_register_xml(void) {
- static gint *ett[] = {
- &ett_i,
- &ett_tag,
+ static gint *ett_base[] = {
+ &unknown_ns.ett,
+ &xml_ns.ett,
&ett_dtd,
+ &ett_xmpli
};
- static hf_register_info hf[] = {
- { &hf_cdata, {"CDATA", "xml.cdata", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
+ static hf_register_info hf_base[] = {
{ &hf_xmlpi, {"XMLPI", "xml.xmlpi", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
- { &hf_entity, {"Entity", "xml.entity", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
- { &hf_attrib, {"Attribute", "xml.attribute", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
{ &hf_comment, {"Comment", "xml.comment", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
- { &hf_tag, {"Tag", "xml.tag", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
+ { &hf_unknowwn_attrib, {"Attribute", "xml.attribute", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
{ &hf_doctype, {"Doctype", "xml.doctype", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
{ &hf_dtd_tag, {"DTD Tag", "xml.dtdtag", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
- { &hf_what, {"Unknown", "xml.unknown", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}
+ { &unknown_ns.hf_cdata, {"CDATA", "xml.cdata", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
+ { &unknown_ns.hf_tag, {"Tag", "xml.tag", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }},
+ { &hf_junk, {"Unknown", "xml.unknown", FT_STRING, BASE_NONE, NULL, 0, "", HFILL }}
};
+
+ hf = g_array_new(FALSE,FALSE,sizeof(hf_register_info));
+ ett_arr = g_array_new(FALSE,FALSE,sizeof(gint*));
+
+ g_array_append_vals(hf,hf_base,array_length(hf_base));
+ g_array_append_vals(ett_arr,ett_base,array_length(ett_base));
- proto_xml = proto_register_protocol("eXtensible Markup Language",
- "XML",
- "xml");
-
- proto_register_field_array(proto_xml, hf, array_length(hf));
- proto_register_subtree_array(ett, array_length(ett));
+ init_xml_names();
+
+ xml_ns.hf_tag = proto_register_protocol(xml_ns.blurb, xml_ns.longname, xml_ns.name);
+
+ proto_register_field_array(xml_ns.hf_tag, (hf_register_info*)hf->data, hf->len);
+ proto_register_subtree_array((gint**)ett_arr->data, ett_arr->len);
- register_dissector("xml", dissect_xml, proto_xml);
+ register_dissector("xml", dissect_xml, xml_ns.hf_tag);
init_xml_parser();
}
diff --git a/epan/dtd.h b/epan/dtd.h
new file mode 100644
index 0000000000..1db10fd371
--- /dev/null
+++ b/epan/dtd.h
@@ -0,0 +1,61 @@
+/*
+ * dtd.h
+ *
+ * XML dissector for ethereal
+ * DTD import declarations
+ *
+ * Copyright 2005, Luis E. Garcia Ontanon <luis.ontanon@gmail.com>
+ *
+ * $Id $
+ *
+ * Ethereal - Network traffic analyzer
+ * By Gerald Combs <gerald@ethereal.com>
+ * Copyright 1998 Gerald Combs
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef _DTD_H_
+#define _DTD_H_
+
+#include <glib.h>
+
+typedef struct _dtd_build_data_t {
+ gchar* proto_name;
+ gchar* media_type;
+ gchar* description;
+ gchar* proto_root;
+
+ GPtrArray* elements;
+ GPtrArray* attributes;
+
+ gchar* location;
+ GString* error;
+} dtd_build_data_t;
+
+typedef struct _dtd_token_data_t {
+ gchar* text;
+ gchar* location;
+} dtd_token_data_t;
+
+typedef struct _dtd_named_list_t {
+ gchar* name;
+ GPtrArray* list;
+} dtd_named_list_t;
+
+extern GString* dtd_preparse(gchar* dname, gchar* fname, GString* err);
+extern dtd_build_data_t* dtd_parse(GString* s);
+
+#endif
diff --git a/epan/dtd_grammar.lemon b/epan/dtd_grammar.lemon
new file mode 100644
index 0000000000..fc98472c99
--- /dev/null
+++ b/epan/dtd_grammar.lemon
@@ -0,0 +1,151 @@
+%include {
+
+/* dtd_parser.lemon
+* XML dissector for ethereal
+* XML's DTD grammar
+*
+* Copyright 2005, Luis E. Garcia Ontanon <luis.ontanon@gmail.com>
+*
+* $Id $
+*
+* Ethereal - Network traffic analyzer
+* By Gerald Combs <gerald@ethereal.com>
+* Copyright 1998 Gerald Combs
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License
+* as published by the Free Software Foundation; either version 2
+* of the License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include <glib.h>
+#include "dtd.h"
+
+
+static dtd_named_list_t* dtd_named_list_new(gchar* name, GPtrArray* list) {
+ dtd_named_list_t* nl = g_malloc(sizeof(dtd_named_list_t));
+
+ nl->name = name;
+ nl->list = list;
+
+ return nl;
+}
+
+static GPtrArray* g_ptr_array_join(GPtrArray* a, GPtrArray* b){
+
+ while(b->len > 0) {
+ g_ptr_array_add(a,g_ptr_array_remove_index_fast(b,0));
+ }
+
+ g_ptr_array_free(b,FALSE);
+
+ return a;
+}
+
+}
+
+%name DtdParse
+
+%extra_argument { dtd_build_data_t *bd }
+
+%token_destructor {
+ if ($$) {
+ if ($$->text) g_free($$->text);
+ if ($$->location) g_free($$->location);
+ g_free($$);
+ }
+}
+
+%syntax_error {
+ if (!TOKEN)
+ g_string_sprintfa(bd->error,"syntax error at end of file");
+ else
+ g_string_sprintfa(bd->error,"syntax error in %s at or before '%s': \n", bd->location,TOKEN->text);
+}
+
+%parse_failure {
+ g_string_sprintfa(bd->error,"DTD parsing failure at %s\n",bd->location);
+}
+
+%token_prefix TOKEN_
+
+%token_type { dtd_token_data_t* }
+
+dtd ::= doctype.
+dtd ::= dtd_parts.
+
+doctype ::= TAG_START DOCTYPE_KW NAME(Name) OPEN_BRACKET dtd_parts CLOSE_BRACKET TAG_STOP. {
+ bd->proto_name = g_strdown(g_strdup(Name->text));
+}
+
+dtd_parts ::= dtd_parts element(Element). { g_ptr_array_add(bd->elements,Element); }
+dtd_parts ::= dtd_parts attlist(Attlist). { g_ptr_array_add(bd->attributes,Attlist); }
+dtd_parts ::= element(Element). { g_ptr_array_add(bd->elements,Element); }
+dtd_parts ::= attlist(Attlist). { g_ptr_array_add(bd->attributes,Attlist); }
+
+%type attlist { dtd_named_list_t* }
+attlist(A) ::= TAG_START ATTLIST_KW NAME(B) attrib_list(TheList) TAG_STOP. { A = dtd_named_list_new(B->text,TheList); }
+
+%type element { dtd_named_list_t* }
+element(A) ::= TAG_START ELEMENT_KW NAME(B) sub_elements(C) TAG_STOP. { A = dtd_named_list_new(B->text,C); }
+
+%type attrib_list { GPtrArray* }
+attrib_list(A) ::= attrib_list(B) attrib(C). { g_ptr_array_add(B,C); A = B; }
+attrib_list(A) ::= attrib(B). { A = g_ptr_array_new(); g_ptr_array_add(A,B); }
+
+%type attrib { gchar* }
+attrib(A) ::= NAME(B) att_type att_default. { A = g_strdown(g_strdup(B->text)); }
+
+att_type ::= ATT_TYPE.
+att_type ::= enumeration.
+
+att_default ::= ATT_DEF.
+att_default ::= ATT_DEF_WITH_VALUE QUOTED.
+att_default ::= QUOTED.
+att_default ::= IMPLIED_KW.
+att_default ::= REQUIRED_KW.
+
+enumeration ::= OPEN_PARENS enum_list CLOSE_PARENS.
+
+enum_list ::= enum_list PIPE enum_item.
+enum_list ::= enum_item.
+enum_list ::= enumeration.
+enum_list ::= enum_list PIPE enumeration.
+
+enum_item ::= NAME.
+enum_item ::= QUOTED.
+
+
+%type sub_elements { GPtrArray* }
+sub_elements(A) ::= sub_elements(B) STAR. {A=B;}
+sub_elements(A) ::= sub_elements(B) PLUS. {A=B;}
+sub_elements(A) ::= sub_elements(B) QUESTION. {A=B;}
+sub_elements(A) ::= OPEN_PARENS ELEM_DATA CLOSE_PARENS. { A = g_ptr_array_new(); }
+sub_elements(A) ::= OPEN_PARENS element_list(B) COMMA ELEM_DATA CLOSE_PARENS. { A = B; }
+sub_elements(A) ::= OPEN_PARENS element_list(B) PIPE ELEM_DATA CLOSE_PARENS. { A = B; }
+sub_elements(A) ::= OPEN_PARENS element_list(B) CLOSE_PARENS. { A = B; }
+sub_elements(A) ::= EMPTY_KW. { A = g_ptr_array_new(); }
+
+%type element_list { GPtrArray* }
+element_list(A) ::= element_list(B) COMMA element_child(C). { g_ptr_array_add(B,C); A = B; }
+element_list(A) ::= element_list(B) PIPE element_child(C). { g_ptr_array_add(B,C); A = B; }
+element_list(A) ::= element_child(B). { A = g_ptr_array_new(); g_ptr_array_add(A,B); }
+element_list(A) ::= sub_elements(B). { A = B; }
+element_list(A) ::= element_list(B) COMMA sub_elements(C). { A = g_ptr_array_join(B,C); }
+element_list(A) ::= element_list(B) PIPE sub_elements(C). { A = g_ptr_array_join(B,C); }
+
+%type element_child { gchar* }
+element_child(A) ::= NAME(B). { A = g_strdown(g_strdup(B->text)); }
+element_child(A) ::= NAME(B) STAR. { A = g_strdown(g_strdup(B->text)); }
+element_child(A) ::= NAME(B) QUESTION. { A = g_strdown(g_strdup(B->text)); }
+element_child(A) ::= NAME(B) PLUS. { A = g_strdown(g_strdup(B->text)); }
+
diff --git a/epan/dtd_parse.l b/epan/dtd_parse.l
new file mode 100644
index 0000000000..8c33ee26e6
--- /dev/null
+++ b/epan/dtd_parse.l
@@ -0,0 +1,316 @@
+%option noyywrap
+%option nounput
+%option outfile="dtd_parse.c"
+%option prefix="Dtd_Parse_"
+%option never-interactive
+
+%{
+
+ /* dtd_lexer.l
+ * an XML dissector for ethereal
+ * lexical analyzer for DTDs
+ *
+ * Copyright 2004, Luis E. Garcia Ontanon <luis.ontanon@gmail.com>
+ *
+ * $Id$
+ *
+ * Ethereal - Network traffic analyzer
+ * By Gerald Combs <gerald@ethereal.com>
+ * Copyright 1998 Gerald Combs
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <glib.h>
+#include <string.h>
+
+#include "dtd.h"
+#include "dtd_grammar.h"
+
+ struct _proto_xmlpi_attr {
+ gchar* name;
+ void (*act)(gchar*);
+ };
+
+ void DtdParse(void*,int,dtd_token_data_t*,dtd_build_data_t*);
+ void *DtdParseAlloc(void *(*)(gulong));
+ void DtdParseFree( void*, void(*)(void*) );
+ void DtdParseTrace(FILE *TraceFILE, char *zTracePrompt);
+ void* pParser;
+ GString* input_string;
+ guint offset;
+ guint len;
+ gchar* location;
+ gchar* attr_name;
+
+ static int my_yyinput(char* buff,guint size);
+
+ static dtd_token_data_t* new_token(gchar*);
+
+ static dtd_build_data_t* build_data;
+
+ static void set_proto_name (gchar* val) { if(build_data->proto_name) g_free(build_data->proto_name); build_data->proto_name = g_strdup(val); }
+ static void set_media_type (gchar* val) { if(build_data->media_type) g_free(build_data->media_type); build_data->media_type = g_strdup(val); }
+ static void set_proto_root (gchar* val) { if(build_data->proto_root) g_free(build_data->proto_root); build_data->proto_root = g_strdup(val); }
+ static void set_description (gchar* val) { if(build_data->description) g_free(build_data->description); build_data->description = g_strdup(val); }
+
+ struct _proto_xmlpi_attr proto_attrs[] =
+ {
+ { "name", set_proto_name },
+ { "media", set_media_type },
+ { "root", set_proto_root },
+ { "description", set_description },
+ {NULL,NULL}
+ };
+
+#define DTD_PARSE(token_type) \
+ { build_data->location = location; \
+ DtdParse(pParser, (token_type), new_token(yytext), build_data); \
+ if(build_data->error->len > 0) yyterminate(); \
+ }
+
+#define YY_INPUT(buff,result,max_size) ( (result) = my_yyinput((buff),(max_size)) )
+
+%}
+
+start_xmlpi "<?"
+
+location_xmlpi "ethereal:location"
+protocol_xmlpi "ethereal:protocol"
+
+get_attr_quote =[:blank:]*["]
+avoid_editor_bug ["]
+
+get_location_xmlpi [^[:blank:]]+
+
+stop_xmlpi "?>"
+
+special_start "<!"
+special_stop ">"
+whitespace [[:blank:]\r\n]+
+newline \n
+attlist_kw ATTLIST
+doctype_kw DOCTYPE
+element_kw ELEMENT
+
+pcdata #PCDATA
+any ANY
+cdata #CDATA
+
+iD ID
+idref IDREF
+idrefs IDREFS
+nmtoken NMTOKEN
+nmtokens NMTOKENS
+entity ENTITY
+entities ENTITIES
+notation NOTATION
+cdata_t CDATA
+
+empty EMPTY
+defaulT #DEFAULT
+fixed #FIXED
+required #REQUIRED
+implied #IMPLIED
+
+star "*"
+question "?"
+plus "+"
+open_parens "("
+close_parens ")"
+open_bracket "["
+close_bracket "]"
+comma ","
+pipe "|"
+dquote ["]
+
+name [a-z][-a-z0-9_]*
+dquoted ["][^\"]*["]
+squoted ['][^\']*[']
+
+%START DTD XMLPI LOCATION DONE PROTOCOL GET_ATTR_QUOTE GET_ATTR_VAL GET_ATTR_CLOSE_QUOTE
+%%
+
+{whitespace} ;
+
+<DTD>{start_xmlpi} {
+ BEGIN XMLPI;
+}
+
+<XMLPI>{location_xmlpi} {
+ if(location) g_free(location);
+ BEGIN LOCATION;
+}
+
+<XMLPI>{protocol_xmlpi} {
+ BEGIN PROTOCOL;
+}
+
+<XMLPI><.> ;
+<XMLPI>{stop_xmlpi} BEGIN DTD;
+
+<LOCATION>{get_location_xmlpi} {
+ location = g_strdup(yytext);
+ BEGIN DONE;
+}
+
+<DONE>{stop_xmlpi} BEGIN DTD;
+
+<PROTOCOL>{name} {
+ attr_name = g_strdup(yytext);
+ BEGIN GET_ATTR_QUOTE;
+}
+
+<GET_ATTR_QUOTE>{get_attr_quote} { BEGIN GET_ATTR_VAL; }
+
+<GET_ATTR_QUOTE>. {
+ g_string_sprintfa(build_data->error,
+ "error in ethereal:protocol xmpli at %s : could not find attribute value!",
+ location);
+ yyterminate();
+}
+
+<GET_ATTR_VAL>[^"]+ {
+ /*"*/
+ struct _proto_xmlpi_attr* pa;
+ gboolean got_it = FALSE;
+
+ for(pa = proto_attrs; pa->name; pa++) {
+ if (g_strcasecmp(attr_name,pa->name) == 0) {
+ pa->act(yytext);
+ got_it = TRUE;
+ break;
+ }
+ }
+
+ if (! got_it) {
+ g_string_sprintfa(build_data->error,
+ "error in ethereal:protocol xmpli at %s : no such parameter %s!",
+ location, attr_name);
+ g_free(attr_name);
+ yyterminate();
+ }
+
+ g_free(attr_name);
+
+ BEGIN GET_ATTR_CLOSE_QUOTE;
+}
+
+<GET_ATTR_CLOSE_QUOTE>{dquote} { BEGIN PROTOCOL;}
+
+<PROTOCOL>{stop_xmlpi} BEGIN DTD;
+
+<DTD>{special_start} { DTD_PARSE(TOKEN_TAG_START); }
+<DTD>{special_stop} { DTD_PARSE(TOKEN_TAG_STOP); }
+
+<DTD>{attlist_kw} { DTD_PARSE(TOKEN_ATTLIST_KW); }
+<DTD>{element_kw} { DTD_PARSE(TOKEN_ELEMENT_KW); }
+<DTD>{doctype_kw} { DTD_PARSE(TOKEN_DOCTYPE_KW); }
+
+<DTD>{pcdata} { DTD_PARSE(TOKEN_ELEM_DATA); }
+<DTD>{any} { DTD_PARSE(TOKEN_ELEM_DATA); }
+<DTD>{cdata} { DTD_PARSE(TOKEN_ELEM_DATA); }
+<DTD>{empty} { DTD_PARSE(TOKEN_EMPTY_KW); }
+
+<DTD>{iD} { DTD_PARSE(TOKEN_ATT_TYPE); }
+<DTD>{idref} { DTD_PARSE(TOKEN_ATT_TYPE); }
+<DTD>{idrefs} { DTD_PARSE(TOKEN_ATT_TYPE); }
+<DTD>{nmtoken} { DTD_PARSE(TOKEN_ATT_TYPE); }
+<DTD>{nmtokens} { DTD_PARSE(TOKEN_ATT_TYPE); }
+<DTD>{entity} { DTD_PARSE(TOKEN_ATT_TYPE); }
+<DTD>{entities} { DTD_PARSE(TOKEN_ATT_TYPE); }
+<DTD>{notation} { DTD_PARSE(TOKEN_ATT_TYPE); }
+<DTD>{cdata_t} { DTD_PARSE(TOKEN_ATT_TYPE); }
+<DTD>{defaulT} { DTD_PARSE(TOKEN_ATT_DEF_WITH_VALUE); }
+<DTD>{fixed} { DTD_PARSE(TOKEN_ATT_DEF_WITH_VALUE); }
+<DTD>{required} { DTD_PARSE(TOKEN_ATT_DEF); }
+<DTD>{implied} { DTD_PARSE(TOKEN_ATT_DEF); }
+
+<DTD>{star} { DTD_PARSE(TOKEN_STAR); }
+<DTD>{question} { DTD_PARSE(TOKEN_QUESTION); }
+<DTD>{plus} { DTD_PARSE(TOKEN_PLUS); }
+<DTD>{comma} { DTD_PARSE(TOKEN_COMMA); }
+<DTD>{open_parens} { DTD_PARSE(TOKEN_OPEN_PARENS); }
+<DTD>{close_parens} { DTD_PARSE(TOKEN_CLOSE_PARENS); }
+<DTD>{open_bracket} { DTD_PARSE(TOKEN_OPEN_BRACKET); }
+<DTD>{close_bracket} { DTD_PARSE(TOKEN_CLOSE_BRACKET); }
+<DTD>{pipe} { DTD_PARSE(TOKEN_PIPE); }
+
+<DTD>{dquoted} |
+<DTD>{squoted} { DTD_PARSE(TOKEN_QUOTED); }
+<DTD>{name} { DTD_PARSE(TOKEN_NAME); }
+
+%%
+
+static dtd_token_data_t* new_token(gchar* text) {
+ dtd_token_data_t* t = g_malloc(sizeof(dtd_token_data_t));
+
+ t->text = g_strdup(text);
+ t->location = g_strdup(location);
+
+ return t;
+}
+
+
+
+static int my_yyinput(char* buff, guint size) {
+
+ if (offset >= len ) {
+ return YY_NULL;
+ } else if ( offset + size <= len ) {
+ memcpy(buff, input_string->str + offset,size);
+ offset += size;
+ return size;
+ } else {
+ size = len - offset;
+ memcpy(buff, input_string->str + offset,size);
+ offset = len;
+ return size;
+ }
+}
+
+extern dtd_build_data_t* dtd_parse(GString* s) {
+
+ input_string = s;
+ offset = 0;
+ len = input_string->len;
+
+ pParser = DtdParseAlloc(g_malloc);
+
+ build_data = g_malloc(sizeof(dtd_build_data_t));
+
+ build_data->proto_name = NULL;
+ build_data->media_type = NULL;
+ build_data->description = NULL;
+ build_data->proto_root = NULL;
+
+ build_data->elements = g_ptr_array_new();
+ build_data->attributes = g_ptr_array_new();
+
+ build_data->location = NULL;
+ build_data->error = g_string_new("");
+
+ BEGIN DTD;
+
+ yylex();
+
+ DtdParse(pParser, 0, NULL,build_data);
+
+ yyrestart(NULL);
+
+ DtdParseFree(pParser, g_free );
+
+ return build_data;
+}
diff --git a/epan/dtd_preparse.l b/epan/dtd_preparse.l
new file mode 100644
index 0000000000..101a9161a4
--- /dev/null
+++ b/epan/dtd_preparse.l
@@ -0,0 +1,258 @@
+%option noyywrap
+%option nounput
+%option prefix="Dtd_PreParse_"
+%option never-interactive
+%option caseless
+%option outfile="dtd_preparse.c"
+
+%{
+ /*
+ * dtd_preparser.l
+ *
+ * an XML dissector for ethereal
+ *
+ * DTD Preparser - import a dtd file into a GString
+ * including files, removing comments
+ * and resolving %entities;
+ *
+ * Copyright 2004, Luis E. Garcia Ontanon <luis.ontanon@gmail.com>
+ *
+ * $Id$
+ *
+ * Ethereal - Network traffic analyzer
+ * By Gerald Combs <gerald@ethereal.com>
+ * Copyright 1998 Gerald Combs
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <glib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include "dtd.h"
+
+#define MAX_INCLUDE_DEPTH 10
+YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH];
+int include_stack_ptr = 0;
+
+#define ECHO g_string_append(current,yytext);
+
+GString* current;
+GString* output;
+GHashTable* entities;
+gchar* entity_name;
+GString* error;
+
+gchar* dirname;
+gchar* filename;
+guint linenum;
+
+static gchar* replace_entity(gchar* s);
+static const gchar* location(void);
+static gchar* load_entity_file(gchar* filename);
+ /* [:blank:]+file[:blank:]*=[:blank:]*["] */
+
+%}
+xmlpi_start "<?"
+xmlpi_stop "?>"
+xmlpi_chars .
+
+comment_start "<!--"
+comment_stop "-->"
+special_start "<!"
+special_stop ">"
+
+entity_start "<!"[[:blank:]\n]*entity[[:blank:]\n]*"%"
+system SYSTEM
+filename [^"]+
+
+
+name [A-Za-z][-:A-Za-z0-9_]*
+
+quote "\""
+percent [%]
+escaped_quote "\\\""
+non_quote [^"%]+
+
+avoid_editor_bug ["]
+
+entity [%&][A-Za-z][-A-Za-z0-9_]*;
+
+whitespace [[blank:]]+
+newline \n
+%START OUTSIDE IN_COMMENT IN_ENTITY NAMED_ENTITY IN_QUOTE ENTITY_DONE GET_FNAME_OPEN_QUOTE GET_FNAME GET_FNAME_CLOSE_QUOTE XMLPI
+%%
+
+
+{entity} if (current) g_string_sprintfa(current,"%s\n%s\n",replace_entity(yytext),location());
+
+{whitespace} if (current) g_string_append(current," ");
+
+<OUTSIDE>{xmlpi_start} { g_string_append(current,yytext); BEGIN XMLPI; }
+<XMLPI>{xmlpi_chars} { g_string_append(current,yytext); }
+<XMLPI>{newline} { g_string_append(current,yytext); }
+<XMLPI>{xmlpi_stop} { g_string_append(current,yytext); BEGIN OUTSIDE; }
+
+<OUTSIDE>{comment_start} { current = NULL; BEGIN IN_COMMENT; }
+<IN_COMMENT>[^-]? |
+<IN_COMMENT>[-] ;
+<IN_COMMENT>{comment_stop} { current = output; BEGIN OUTSIDE; }
+
+{newline} {
+ linenum++;
+ if (current) g_string_sprintfa(current,"%s\n",location());
+}
+
+
+<OUTSIDE>{entity_start} { BEGIN IN_ENTITY; }
+<IN_ENTITY>{name} { entity_name = g_strdup_printf("%%%s;",yytext); BEGIN NAMED_ENTITY; }
+<NAMED_ENTITY>{quote} { current = g_string_new(location()); BEGIN IN_QUOTE; }
+<IN_QUOTE>{quote} { g_hash_table_insert(entities,entity_name,current); BEGIN ENTITY_DONE; }
+<IN_QUOTE>{percent} |
+<IN_QUOTE>{non_quote} |
+<IN_QUOTE>{escaped_quote} g_string_append(current,yytext);
+<NAMED_ENTITY>{system} { BEGIN GET_FNAME_OPEN_QUOTE; }
+<GET_FNAME_OPEN_QUOTE>{quote} { BEGIN GET_FNAME; }
+<GET_FNAME>{filename} { g_hash_table_insert(entities,entity_name,load_entity_file(yytext)); BEGIN GET_FNAME_CLOSE_QUOTE; }
+<GET_FNAME_CLOSE_QUOTE>{quote} { BEGIN ENTITY_DONE; }
+<ENTITY_DONE>{special_stop} { current = output; g_string_append(current,"\n"); BEGIN OUTSIDE; }
+
+%%
+
+static gchar* load_entity_file(gchar* fname) {
+ gchar* fullname = g_strdup_printf("%s%s",dirname,fname);
+ gchar* save_filename = filename;
+ guint save_linenum = linenum;
+ FILE* fp = fopen(fullname,"r");
+ GString* filetext;
+ gchar* retstr;
+ gchar* line;
+ size_t linelen;
+
+ g_free(fullname);
+
+ if (!fp) {
+ g_string_sprintfa(error,"at %s:%u: could not load file %s: %s", filename, linenum, fname, strerror(errno));
+ return "";
+ }
+
+ filename = fname;
+ linenum = 1;
+
+ filetext = g_string_new(location());
+
+ while(( line = fgetln(fp,&linelen) )) {
+ g_string_append(filetext,location());
+ g_string_append_len(filetext,line,linelen);
+ linenum++;
+ }
+
+ retstr = filetext->str;
+ g_string_free(filetext,FALSE);
+
+ if ( ferror(fp) ) {
+ g_string_sprintfa(error,"at %s:%u: problem reading file %s: %s", filename, linenum, fname, strerror(errno));
+ }
+
+ filename = save_filename;
+ save_linenum = linenum;
+
+ return retstr;
+}
+
+static gchar* replace_entity(gchar* entity) {
+ GString* replacement;
+
+ *entity = '%';
+
+ replacement = g_hash_table_lookup(entities,entity);
+
+ if (replacement) {
+ return replacement->str;
+ } else {
+ g_string_sprintfa(error,"dtd_preparse: in file '%s': %s does not exists\n", filename, entity);
+ return "";
+ }
+
+}
+
+static const gchar* location(void) {
+ static GString* loc = NULL;
+ guint i = include_stack_ptr + 1;
+
+ if (loc) {
+ g_string_truncate(loc,0);
+ } else {
+ loc = g_string_new("");
+ }
+
+ g_string_sprintfa(loc,"<? ethereal:location ");
+
+ while (i--) {
+ g_string_sprintfa(loc, "%s:%u from",
+ filename,
+ linenum);
+ }
+
+ g_string_truncate(loc,(loc->len) - 4);
+
+ g_string_sprintfa(loc,"?>");
+
+ return loc->str;
+}
+
+static gboolean free_gstring_hash_items(gpointer k,gpointer v,gpointer p _U_) {
+ g_free(k);
+ g_string_free(v,TRUE);
+ return TRUE;
+}
+
+extern GString* dtd_preparse(gchar* dname, gchar* fname, GString* err) {
+ gchar* fullname = g_strdup_printf("%s%s",dname,fname);
+
+ dirname = dname;
+ filename = fname;
+
+ yyin = fopen(fullname,"r");
+
+ g_free(fullname);
+
+ if (!yyin) {
+ if (err)
+ g_string_sprintfa(err, "Could not open file: '%s', error: %s",filename,strerror(errno));
+
+ return NULL;
+ }
+
+ filename = filename;
+ linenum = 1;
+
+ error = err;
+
+ entities = g_hash_table_new(g_str_hash,g_str_equal);
+ current = output = g_string_new(location());
+
+ BEGIN OUTSIDE;
+
+ yylex();
+
+ yyrestart(NULL);
+
+ g_hash_table_foreach_remove(entities,free_gstring_hash_items,NULL);
+ g_hash_table_destroy(entities);
+
+ return output;
+}