diff options
author | Olivier Biot <obiot.ethereal@gmail.com> | 2003-12-09 23:02:40 +0000 |
---|---|---|
committer | Olivier Biot <obiot.ethereal@gmail.com> | 2003-12-09 23:02:40 +0000 |
commit | 0f18533b75208fe7969246fa614645962ae9c5d3 (patch) | |
tree | 59d317e93032ee308bcd87aa1d60729e31d09b69 | |
parent | 4b890b83fd0259b22fed5bad4b6a9070cfdb9176 (diff) |
Internal PCRE field type for efficient RE parsing in dfilters.
svn path=/trunk/; revision=9224
-rw-r--r-- | epan/dfilter/semcheck.c | 72 | ||||
-rw-r--r-- | epan/ftypes/Makefile.am | 3 | ||||
-rwxr-xr-x | epan/ftypes/ftype-pcre.c | 235 | ||||
-rw-r--r-- | epan/ftypes/ftype-string.c | 50 | ||||
-rw-r--r-- | epan/ftypes/ftypes.c | 4 | ||||
-rw-r--r-- | epan/ftypes/ftypes.h | 18 |
6 files changed, 333 insertions, 49 deletions
diff --git a/epan/dfilter/semcheck.c b/epan/dfilter/semcheck.c index 865d74deb2..8ccfe41a4c 100644 --- a/epan/dfilter/semcheck.c +++ b/epan/dfilter/semcheck.c @@ -1,5 +1,5 @@ /* - * $Id: semcheck.c,v 1.20 2003/12/06 16:35:19 gram Exp $ + * $Id: semcheck.c,v 1.21 2003/12/09 23:02:40 obiot Exp $ * * Ethereal - Network traffic analyzer * By Gerald Combs <gerald@ethereal.com> @@ -34,6 +34,16 @@ #include <epan/exceptions.h> #include <epan/packet.h> +/* Usage: DebugLog(("Error: string=%s\n", str)); */ +#ifdef DEBUG_dfilter +#define DebugLog(x) \ + printf("%s:%u: ", __FILE__, __LINE__); \ + printf x; \ + fflush(stdout) +#else +#define DebugLog(x) ; +#endif + static void semcheck(stnode_t *st_node); @@ -101,6 +111,7 @@ compatible_ftypes(ftenum_t a, ftenum_t b) return FALSE; } + case FT_PCRE: case FT_NUM_TYPES: g_assert_not_reached(); } @@ -121,7 +132,6 @@ mk_uint32_fvalue(guint32 val) return fv; } - /* Try to make an fvalue from a string using a value_string or true_false_string. * This works only for ftypes that are integers. Returns the created fvalue_t* * or NULL if impossible. */ @@ -151,6 +161,7 @@ mk_fvalue_from_val_string(header_field_info *hfinfo, char *s) case FT_UINT_STRING: case FT_UINT64: case FT_INT64: + case FT_PCRE: return FALSE; case FT_BOOLEAN: @@ -212,7 +223,6 @@ mk_fvalue_from_val_string(header_field_info *hfinfo, char *s) return FALSE; } - static gboolean is_bytes_type(enum ftenum type) { @@ -246,6 +256,7 @@ is_bytes_type(enum ftenum type) case FT_INT24: case FT_INT32: case FT_INT64: + case FT_PCRE: return FALSE; case FT_NUM_TYPES: @@ -284,7 +295,6 @@ check_relation_LHS_FIELD(const char *relation_string, FtypeCanFunc can_func, THROW(TypeError); } - if (type2 == STTYPE_FIELD) { hfinfo2 = stnode_data(st_arg2); ftype2 = hfinfo2->type; @@ -304,14 +314,19 @@ check_relation_LHS_FIELD(const char *relation_string, FtypeCanFunc can_func, } else if (type2 == STTYPE_STRING) { s = stnode_data(st_arg2); - fvalue = fvalue_from_string(ftype1, s, dfilter_fail); - if (!fvalue) { - /* check value_string */ - fvalue = mk_fvalue_from_val_string(hfinfo1, s); + if (strcmp(relation_string, "matches") == 0) { + /* Convert to a FT_PCRE */ + fvalue = fvalue_from_string(FT_PCRE, s, dfilter_fail); + } else { + fvalue = fvalue_from_string(ftype1, s, dfilter_fail); if (!fvalue) { - THROW(TypeError); + /* check value_string */ + fvalue = mk_fvalue_from_val_string(hfinfo1, s); } } + if (!fvalue) { + THROW(TypeError); + } new_st = stnode_new(STTYPE_FVALUE, fvalue); sttype_test_set2_args(st_node, st_arg1, new_st); @@ -319,14 +334,19 @@ check_relation_LHS_FIELD(const char *relation_string, FtypeCanFunc can_func, } else if (type2 == STTYPE_UNPARSED) { s = stnode_data(st_arg2); - fvalue = fvalue_from_unparsed(ftype1, s, allow_partial_value, dfilter_fail); - if (!fvalue) { - /* check value_string */ - fvalue = mk_fvalue_from_val_string(hfinfo1, s); + if (strcmp(relation_string, "matches") == 0) { + /* Convert to a FT_PCRE */ + fvalue = fvalue_from_unparsed(FT_PCRE, s, FALSE, dfilter_fail); + } else { + fvalue = fvalue_from_unparsed(ftype1, s, allow_partial_value, dfilter_fail); if (!fvalue) { - THROW(TypeError); + /* check value_string */ + fvalue = mk_fvalue_from_val_string(hfinfo1, s); } } + if (!fvalue) { + THROW(TypeError); + } new_st = stnode_new(STTYPE_FVALUE, fvalue); sttype_test_set2_args(st_node, st_arg1, new_st); @@ -618,6 +638,11 @@ check_relation(const char *relation_string, gboolean allow_partial_value, FtypeCanFunc can_func, stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2) { +#ifdef DEBUG_dfilter + static guint i = 0; +#endif + + DebugLog((" 4 check_relation(\"%s\") [%u]\n", relation_string, i++)); switch (stnode_type_id(st_arg1)) { case STTYPE_FIELD: check_relation_LHS_FIELD(relation_string, can_func, @@ -651,6 +676,11 @@ check_test(stnode_t *st_node) { test_op_t st_op; stnode_t *st_arg1, *st_arg2; +#ifdef DEBUG_dfilter + static guint i = 0; +#endif + + DebugLog((" 3 check_test(stnode_t *st_node = %p) [%u]\n", st_node, i)); sttype_test_get(st_node, &st_op, &st_arg1, &st_arg2); @@ -706,6 +736,7 @@ check_test(stnode_t *st_node) default: g_assert_not_reached(); } + DebugLog((" 3 check_test(stnode_t *st_node = %p) [%u] - End\n", st_node, i++)); } @@ -713,6 +744,10 @@ check_test(stnode_t *st_node) static void semcheck(stnode_t *st_node) { +#ifdef DEBUG_dfilter + static guint i = 0; +#endif + DebugLog((" 2 semcheck(stnode_t *st_node = %p) [%u]\n", st_node, i++)); /* The parser assures that the top-most syntax-tree * node will be a TEST node, no matter what. So assert that. */ switch (stnode_type_id(st_node)) { @@ -731,6 +766,11 @@ semcheck(stnode_t *st_node) gboolean dfw_semcheck(dfwork_t *dfw) { +#ifdef DEBUG_dfilter + static guint i = 0; +#endif + + DebugLog(("1 dfw_semcheck(dfwork_t *dfw = %p) [%u]\n", dfw, i)); /* Instead of having to check for errors at every stage of * the semantic-checking, the semantic-checking code will * throw an exception if a problem is found. */ @@ -738,9 +778,13 @@ dfw_semcheck(dfwork_t *dfw) semcheck(dfw->st_root); } CATCH(TypeError) { + DebugLog(("1 dfw_semcheck(dfwork_t *dfw = %p) [%u] - Returns FALSE\n", + dfw, i++)); return FALSE; } ENDTRY; + DebugLog(("1 dfw_semcheck(dfwork_t *dfw = %p) [%u] - Returns FALSE\n", + dfw, i++)); return TRUE; } diff --git a/epan/ftypes/Makefile.am b/epan/ftypes/Makefile.am index de18d0c22a..5a95647b75 100644 --- a/epan/ftypes/Makefile.am +++ b/epan/ftypes/Makefile.am @@ -1,6 +1,6 @@ # Makefile.am # -# $Id: Makefile.am,v 1.4 2001/03/05 22:53:40 gram Exp $ +# $Id: Makefile.am,v 1.5 2003/12/09 23:02:39 obiot Exp $ # # Ethereal - Network traffic analyzer # By Gerald Combs <gerald@zing.org> @@ -42,6 +42,7 @@ libftypes_a_SOURCES = \ ftype-integer.c \ ftype-ipv4.c \ ftype-none.c \ + ftype-pcre.c \ ftype-string.c \ ftype-time.c \ ftype-tvbuff.c diff --git a/epan/ftypes/ftype-pcre.c b/epan/ftypes/ftype-pcre.c new file mode 100755 index 0000000000..079adddb7b --- /dev/null +++ b/epan/ftypes/ftype-pcre.c @@ -0,0 +1,235 @@ +/* + * $Id: ftype-pcre.c,v 1.1 2003/12/09 23:02:39 obiot Exp $ + * + * Ethereal - Network traffic analyzer + * By Gerald Combs <gerald@ethereal.com> + * Copyright 2001 Gerald Combs + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/* Perl-Compatible Regular Expression (PCRE) internal field type. + * Used with the "matches" dfilter operator, allowing efficient + * compilation and studying of a PCRE pattern in dfilters. + * + * PCRE is provided with libpcre (http://www.pcre.org/). + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <ftypes-int.h> + +#ifdef HAVE_LIBPCRE +#include <pcre.h> + +/* Create a pcre_tuple_t object based on the given string pattern */ +static pcre_tuple_t * +pcre_tuple_new(const char *value) +{ + pcre_tuple_t *tuple; + const char *pcre_error_text; + int pcre_error_offset; + + tuple = g_malloc(sizeof(pcre_tuple_t)); + tuple->string = g_strdup(value); /* The RE as string */ + /* Compile the RE */ + tuple->re = pcre_compile( + value, /* pattern */ + 0, /* PCRE options */ + &pcre_error_text, /* PCRE constant error string */ + &pcre_error_offset, /* Start offset of error in pattern */ + NULL /* Default char tables (C locale) */ + ); + if (pcre_error_text) { + tuple->error = g_strdup_printf("In regular expression \"%s\":\n" + "%s (character position %u)", + (char *)value, pcre_error_text, pcre_error_offset); + return tuple; + } else { + tuple->error = NULL; + } + /* Study the RE */ + tuple->ex = pcre_study(tuple->re, 0, &pcre_error_text); + if (pcre_error_text) { + if (tuple->error) { + tuple->error = g_strdup_printf("In regular expression \"%s\":\n" + "%s. %s", + (char *)value, tuple->error, pcre_error_text); + } else { + tuple->error = g_strdup_printf("In regular expression \"%s\":\n" + "%s", + (char *)value, pcre_error_text); + } + } + return tuple; +} + +static void +pcre_tuple_free(pcre_tuple_t *tuple) +{ + if (tuple) { + if (tuple->string) g_free(tuple->string); + if (tuple->re) g_free(tuple->re); + if (tuple->ex) g_free(tuple->ex); + if (tuple->error) g_free(tuple->error); + g_free(tuple); + } +} + +static void +pcre_fvalue_new(fvalue_t *fv) +{ + fv->value.re = NULL; +} + +static void +pcre_fvalue_free(fvalue_t *fv) +{ + if (fv->value.re) { + pcre_tuple_free(fv->value.re); + } +} + +/* Generate a FT_PCRE from a parsed string pattern. + * Uses the specified logfunc() to report errors. */ +static gboolean +val_from_string(fvalue_t *fv, char *pattern, LogFunc logfunc) +{ + /* Free up the old value, if we have one */ + pcre_fvalue_free(fv); + + fv->value.re = pcre_tuple_new(pattern); + if (fv->value.re->error) { + logfunc(fv->value.re->error); + return FALSE; + } + return TRUE; +} + +/* Generate a FT_PCRE from an unparsed string pattern. + * Uses the specified logfunc() to report errors. */ +static gboolean +val_from_unparsed(fvalue_t *fv, char *pattern, gboolean allow_partial_value _U_, LogFunc logfunc) +{ + /* Free up the old value, if we have one */ + pcre_fvalue_free(fv); + g_assert(! allow_partial_value); + + fv->value.re = pcre_tuple_new(pattern); + if (fv->value.re->error) { + logfunc(fv->value.re->error); + return FALSE; + } + return TRUE; +} + +/* BEHOLD - value contains the string representation of the regular expression, + * and we want to store the compiled PCRE RE object into the value. */ +static void +pcre_fvalue_set(fvalue_t *fv, gpointer value, gboolean already_copied) +{ + g_assert(value != NULL); + /* Free up the old value, if we have one */ + pcre_fvalue_free(fv); + g_assert(! already_copied); + fv->value.re = pcre_tuple_new(value); +} + +static gpointer +pcre_fvalue_get(fvalue_t *fv) +{ + return fv->value.re; +} + +void +ftype_register_pcre(void) +{ + static ftype_t pcre_type = { + "FT_PCRE", + "Compiled Perl-Compatible Regular Expression object", + 0, /* wire_size */ + pcre_fvalue_new, /* new_value */ + pcre_fvalue_free, /* free_value */ + val_from_unparsed, /* val_from_unparsed */ + val_from_string, /* val_from_string */ + NULL, /* val_to_string_repr */ + NULL, /* len_string_repr */ + + pcre_fvalue_set, /* set_value */ + NULL, /* set_value_integer */ + NULL, /* set_value_floating */ + + pcre_fvalue_get, /* get_value */ + NULL, /* get_value_integer */ + NULL, /* get_value_floating */ + + NULL, /* cmp_eq */ + NULL, /* cmp_ne */ + NULL, /* cmp_gt */ + NULL, /* cmp_ge */ + NULL, /* cmp_lt */ + NULL, /* cmp_le */ + NULL, /* cmp_contains */ + NULL, /* cmp_matches */ + + NULL, /* len */ + NULL, /* slice */ + }; + ftype_register(FT_PCRE, &pcre_type); +} + +#else /* HAVE_LIBPCRE */ + +void +ftype_register_pcre(void) +{ + static ftype_t pcre_type = { + "FT_PCRE", + "Compiled Perl-Compatible Regular Expression object", + 0, /* wire_size */ + NULL, /* new_value */ + NULL, /* free_value */ + NULL, /* val_from_unparsed */ + NULL, /* val_from_string */ + NULL, /* val_to_string_repr */ + NULL, /* len_string_repr */ + + NULL, /* set_value */ + NULL, /* set_value_integer */ + NULL, /* set_value_floating */ + + NULL, /* get_value */ + NULL, /* get_value_integer */ + NULL, /* get_value_floating */ + + NULL, /* cmp_eq */ + NULL, /* cmp_ne */ + NULL, /* cmp_gt */ + NULL, /* cmp_ge */ + NULL, /* cmp_lt */ + NULL, /* cmp_le */ + NULL, /* cmp_contains */ + NULL, /* cmp_matches */ + + NULL, /* len */ + NULL, /* slice */ + }; + ftype_register(FT_PCRE, &pcre_type); +} + +#endif /* HAVE_LIBPCRE */ diff --git a/epan/ftypes/ftype-string.c b/epan/ftypes/ftype-string.c index a70425a733..1af5c38184 100644 --- a/epan/ftypes/ftype-string.c +++ b/epan/ftypes/ftype-string.c @@ -1,5 +1,5 @@ /* - * $Id: ftype-string.c,v 1.16 2003/12/06 16:35:20 gram Exp $ + * $Id: ftype-string.c,v 1.17 2003/12/09 23:02:39 obiot Exp $ * * Ethereal - Network traffic analyzer * By Gerald Combs <gerald@ethereal.com> @@ -237,46 +237,32 @@ cmp_contains(fvalue_t *fv_a, fvalue_t *fv_b) static gboolean cmp_matches(fvalue_t *fv_a, fvalue_t *fv_b) { - pcre *re; - const char *pcre_error_text; - int pcre_error_offset; int options = 0; int rc; - pcre_extra *pe = NULL; /* TODO - pcre_study() */ - - re = pcre_compile( - fv_b->value.string, /* pattern */ - options, /* PCRE options */ - &pcre_error_text, /* PCRE constant error string */ - &pcre_error_offset, /* Start offset of error in pattern */ - NULL /* Default char tables (C locale) */ - ); - if (re == NULL) { - /* TODO - Do something with pcre_error and pcre_error_offset */ + + /* fv_b is always a FT_PCRE, otherwise the dfilter semcheck() would have + * warned us. For the same reason (and because we're using g_malloc()), + * fv_b->value.re is not NULL. + */ + if (strcmp(fv_b->ftype->name, "FT_PCRE") != 0) { return FALSE; } - /* TODO - Study the RE *if* the compile & study only happens once * / - pe = pcre_study(re, 0, &pcre_error_text); - if (pcre_error != NULL) { - / * TODO - Do something with pcre_error and pcre_error_offset * / + if (! fv_b->value.re) { return FALSE; } - */ rc = pcre_exec( - re, /* Compiled PCRE */ - pe, /* PCRE extra from pcre_study() */ - fv_a->value.string, /* The data to check for the pattern */ - (int)strlen(fv_a->value.string), /* and its length */ - 0, /* Start offset within data */ - options, /* PCRE options */ - NULL, /* We are not interested in the matched string */ - 0 /* of the pattern; only in success or failure. */ + (fv_b->value.re)->re, /* Compiled PCRE */ + (fv_b->value.re)->ex, /* PCRE extra from pcre_study() */ + fv_a->value.string, /* The data to check for the pattern... */ + (int)strlen(fv_a->value.string), /* ... and its length */ + 0, /* Start offset within data */ + options, /* PCRE options */ + NULL, /* We are not interested in the matched string */ + 0 /* of the pattern; only in success or failure. */ ); - /* if (pe != NULL) - g_free(pe); */ - g_free(re); - if (rc == 0) + if (rc == 0) { return TRUE; + } return FALSE; } #endif diff --git a/epan/ftypes/ftypes.c b/epan/ftypes/ftypes.c index c294ed75bb..a723113bd5 100644 --- a/epan/ftypes/ftypes.c +++ b/epan/ftypes/ftypes.c @@ -1,5 +1,5 @@ /* - * $Id: ftypes.c,v 1.19 2003/12/06 16:35:20 gram Exp $ + * $Id: ftypes.c,v 1.20 2003/12/09 23:02:39 obiot Exp $ * * Ethereal - Network traffic analyzer * By Gerald Combs <gerald@ethereal.com> @@ -46,6 +46,7 @@ void ftype_register_none(void); void ftype_register_string(void); void ftype_register_time(void); void ftype_register_tvbuff(void); +void ftype_register_pcre(void); /* Initialize the ftype module. */ void @@ -59,6 +60,7 @@ ftypes_initialize(void) ftype_register_string(); ftype_register_time(); ftype_register_tvbuff(); + ftype_register_pcre(); } /* Each ftype_t is registered via this function */ diff --git a/epan/ftypes/ftypes.h b/epan/ftypes/ftypes.h index 1ffbef7bff..dfd7f9f3e5 100644 --- a/epan/ftypes/ftypes.h +++ b/epan/ftypes/ftypes.h @@ -1,7 +1,7 @@ /* ftypes.h * Definitions for field types * - * $Id: ftypes.h,v 1.26 2003/12/06 16:35:20 gram Exp $ + * $Id: ftypes.h,v 1.27 2003/12/09 23:02:39 obiot Exp $ * * Ethereal - Network traffic analyzer * By Gerald Combs <gerald@ethereal.com> @@ -26,6 +26,10 @@ #ifndef FTYPES_H #define FTYPES_H +#ifdef HAVE_LIBPCRE +#include <pcre.h> +#endif /* HAVE_LIBPCRE */ + #include <glib.h> #include "../slab.h" @@ -59,6 +63,7 @@ enum ftenum { FT_IPv6, FT_IPXNET, FT_FRAMENUM, /* a UINT32, but if selected lets you go to frame with that numbe */ + FT_PCRE, /* a compiled Perl-Compatible Regular Expression object */ FT_NUM_TYPES /* last item number plus one */ }; @@ -73,6 +78,14 @@ enum ftrepr { typedef enum ftrepr ftrepr_t; +#ifdef HAVE_LIBPCRE +typedef struct _pcre_tuple_t { + char *string; + pcre *re; + pcre_extra *ex; + char *error; +} pcre_tuple_t; +#endif /* HAVE_LIBPCRE */ /* Initialize the ftypes subsytem. Called once. */ void @@ -141,6 +154,9 @@ typedef struct _fvalue_t { ipv4_addr ipv4; nstime_t time; tvbuff_t *tvb; +#ifdef HAVE_LIBPCRE + pcre_tuple_t *re; +#endif /* HAVE_LIBPCRE */ } value; /* The following is provided for private use |