diff options
-rw-r--r-- | docbook/release-notes.adoc | 2 | ||||
-rw-r--r-- | epan/dfilter/dfilter-int.h | 3 | ||||
-rw-r--r-- | epan/dfilter/dfilter.c | 36 | ||||
-rw-r--r-- | epan/dfilter/grammar.lemon | 7 | ||||
-rw-r--r-- | epan/dfilter/semcheck.c | 156 | ||||
-rw-r--r-- | epan/dfilter/syntax-tree.c | 2 | ||||
-rw-r--r-- | test/suite_dfilter/group_syntax.py | 12 | ||||
-rw-r--r-- | wsutil/ws_assert.h | 4 |
8 files changed, 105 insertions, 117 deletions
diff --git a/docbook/release-notes.adoc b/docbook/release-notes.adoc index 7253ac654c..124565f7ab 100644 --- a/docbook/release-notes.adoc +++ b/docbook/release-notes.adoc @@ -108,6 +108,8 @@ The following features are new (or have been significantly updated) since versio match a valid byte array specification. Now this is a syntax error. Use double-quotes to match literal strings. ** For string comparisons literal byte arrays are always interpreted as unquoted literal strings. This avoids unexpected results with embedded NUL bytes. For example "http.user_agent contains aa:bb" tries to match "aa:bb". Avoid this usage, always use double-quotes: http.user_agent contains "\xaa\xbb". +** Regular expressions (using "matches" or "~") must be specified using character strings. It is a syntax error to omit the double-quotes around + the regular expression. Before the syntax rules of an unquoted regex string could be difficult to predict. // === Removed Features and Support diff --git a/epan/dfilter/dfilter-int.h b/epan/dfilter/dfilter-int.h index 9e6cef482c..44d1a448ae 100644 --- a/epan/dfilter/dfilter-int.h +++ b/epan/dfilter/dfilter-int.h @@ -86,6 +86,9 @@ DfilterTrace(FILE *TraceFILE, char *zTracePrompt); stnode_t * dfilter_new_function(dfwork_t *dfw, const char *name); +stnode_t * +dfilter_new_regex(dfwork_t *dfw, const char *patt); + gboolean dfilter_str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint); diff --git a/epan/dfilter/dfilter.c b/epan/dfilter/dfilter.c index 5cbf570ce2..9dd34f1163 100644 --- a/epan/dfilter/dfilter.c +++ b/epan/dfilter/dfilter.c @@ -78,6 +78,42 @@ dfilter_new_function(dfwork_t *dfw, const char *name) return stnode_new(STTYPE_FUNCTION, def, name); } +/* Gets a GRegex from a string, and sets the error message on failure. */ +stnode_t * +dfilter_new_regex(dfwork_t *dfw, const char *patt) +{ + GError *regex_error = NULL; + GRegex *pcre; + + ws_debug("Compile regex pattern: %s", patt); + + /* + * As a string is not guaranteed to contain valid UTF-8, + * we have to disable support for UTF-8 patterns and treat + * every pattern and subject as raw bytes. + * + * Should support for UTF-8 patterns be necessary, then we + * should compile a pattern without G_REGEX_RAW. Additionally, + * we MUST use g_utf8_validate() before calling g_regex_match_full() + * or risk crashes. + */ + GRegexCompileFlags cflags = G_REGEX_CASELESS | G_REGEX_OPTIMIZE | G_REGEX_RAW; + + pcre = g_regex_new( + patt, /* pattern */ + cflags, /* Compile options */ + 0, /* Match options */ + ®ex_error); /* Compile / study errors */ + + if (regex_error) { + dfilter_parse_fail(dfw, "%s", regex_error->message); + g_error_free(regex_error); + pcre = NULL; + } + + return stnode_new(STTYPE_PCRE, pcre, patt); +} + gboolean dfilter_str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint) { diff --git a/epan/dfilter/grammar.lemon b/epan/dfilter/grammar.lemon index d3dc79ea9c..b48d76da9c 100644 --- a/epan/dfilter/grammar.lemon +++ b/epan/dfilter/grammar.lemon @@ -302,10 +302,13 @@ relation_test(T) ::= entity(E) rel_binop(O) relation_test(R). } /* "matches" does not chain with other relational tests. */ -relation_test(T) ::= entity(E) TEST_MATCHES entity(F). +relation_test(T) ::= entity(E) TEST_MATCHES STRING(S). { + stnode_t *R = dfilter_new_regex(dfw, stnode_token_value(S)); + stnode_free(S); + T = stnode_new(STTYPE_TEST, NULL, NULL); - sttype_test_set2(T, TEST_OP_MATCHES, E, F); + sttype_test_set2(T, TEST_OP_MATCHES, E, R); } relation_test(T) ::= entity(E) TEST_IN LBRACE set_node_list(L) RBRACE. diff --git a/epan/dfilter/semcheck.c b/epan/dfilter/semcheck.c index be9a5ec145..bef49649d1 100644 --- a/epan/dfilter/semcheck.c +++ b/epan/dfilter/semcheck.c @@ -424,50 +424,6 @@ is_bytes_type(enum ftenum type) return FALSE; } -/* Gets a GRegex from a string, and sets the error message on failure. */ -WS_RETNONNULL -static GRegex* -dfilter_g_regex_from_string(dfwork_t *dfw, stnode_t *st) -{ - GError *regex_error = NULL; - GRegexCompileFlags cflags = (GRegexCompileFlags)(G_REGEX_CASELESS | G_REGEX_OPTIMIZE); - GRegex *pcre; - const char *s = stnode_data(st); - - /* - * As FT_BYTES and FT_PROTOCOL contain arbitrary binary data - * and FT_STRING is not guaranteed to contain valid UTF-8, - * we have to disable support for UTF-8 patterns and treat - * every pattern and subject as raw bytes. - * - * Should support for UTF-8 patterns be necessary, then we - * should compile a pattern without G_REGEX_RAW. Additionally, - * we MUST use g_utf8_validate() before calling g_regex_match_full() - * or risk crashes. - */ - cflags = (GRegexCompileFlags)(cflags | G_REGEX_RAW); - - ws_debug("Compile regex pattern: %s", s); - - pcre = g_regex_new( - s, /* pattern */ - cflags, /* Compile options */ - (GRegexMatchFlags)0, /* Match options */ - ®ex_error /* Compile / study errors */ - ); - - if (regex_error) { - if (dfw->error_message == NULL) - dfw->error_message = g_strdup(regex_error->message); - g_error_free(regex_error); - if (pcre) { - g_regex_unref(pcre); - } - THROW(TypeError); - } - return pcre; -} - /* Check the semantics of an existence test. */ static void check_exists(dfwork_t *dfw, stnode_t *st_arg1) @@ -738,7 +694,6 @@ check_relation_LHS_FIELD(dfwork_t *dfw, const char *relation_string, df_func_def_t *funcdef; ftenum_t ftype1, ftype2; fvalue_t *fvalue; - GRegex *pcre; type2 = stnode_type_id(st_arg2); @@ -777,34 +732,28 @@ check_relation_LHS_FIELD(dfwork_t *dfw, const char *relation_string, } else if (type2 == STTYPE_STRING || type2 == STTYPE_UNPARSED || type2 == STTYPE_CHARCONST) { - if (strcmp(relation_string, "matches") == 0) { - /* Convert to a GRegex */ - pcre = dfilter_g_regex_from_string(dfw, st_arg2); - stnode_replace(st_arg2, STTYPE_PCRE, pcre); - } else { - /* Skip incompatible fields */ - while (hfinfo1->same_name_prev_id != -1 && - ((type2 == STTYPE_STRING && ftype1 != FT_STRING && ftype1!= FT_STRINGZ) || - (type2 != STTYPE_STRING && (ftype1 == FT_STRING || ftype1== FT_STRINGZ)))) { - hfinfo1 = proto_registrar_get_nth(hfinfo1->same_name_prev_id); - ftype1 = hfinfo1->type; - } + /* Skip incompatible fields */ + while (hfinfo1->same_name_prev_id != -1 && + ((type2 == STTYPE_STRING && ftype1 != FT_STRING && ftype1!= FT_STRINGZ) || + (type2 != STTYPE_STRING && (ftype1 == FT_STRING || ftype1== FT_STRINGZ)))) { + hfinfo1 = proto_registrar_get_nth(hfinfo1->same_name_prev_id); + ftype1 = hfinfo1->type; + } - if (type2 == STTYPE_STRING) { - fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, hfinfo1); - } - else if (type2 == STTYPE_CHARCONST && - strcmp(relation_string, "contains") == 0) { - /* The RHS should be the same type as the LHS, - * but a character is just a one-byte byte - * string. */ - fvalue = dfilter_fvalue_from_charconst_string(dfw, ftype1, st_arg2, allow_partial_value); - } - else { - fvalue = dfilter_fvalue_from_unparsed(dfw, ftype1, st_arg2, allow_partial_value, hfinfo1); - } - stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); + if (type2 == STTYPE_STRING) { + fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, hfinfo1); } + else if (type2 == STTYPE_CHARCONST && + strcmp(relation_string, "contains") == 0) { + /* The RHS should be the same type as the LHS, + * but a character is just a one-byte byte + * string. */ + fvalue = dfilter_fvalue_from_charconst_string(dfw, ftype1, st_arg2, allow_partial_value); + } + else { + fvalue = dfilter_fvalue_from_unparsed(dfw, ftype1, st_arg2, allow_partial_value, hfinfo1); + } + stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); } else if (type2 == STTYPE_RANGE) { check_drange_sanity(dfw, st_arg2); @@ -881,6 +830,9 @@ check_relation_LHS_FIELD(dfwork_t *dfw, const char *relation_string, nodelist = g_slist_next(nodelist); } } + else if (type2 == STTYPE_PCRE) { + ws_assert_streq(relation_string, "matches"); + } else { ws_assert_not_reached(); } @@ -1032,7 +984,6 @@ check_relation_LHS_RANGE(dfwork_t *dfw, const char *relation_string, header_field_info *hfinfo2; ftenum_t ftype2; fvalue_t *fvalue; - GRegex *pcre; ws_debug("5 check_relation_LHS_RANGE(%s)", relation_string); @@ -1059,38 +1010,20 @@ check_relation_LHS_RANGE(dfwork_t *dfw, const char *relation_string, } else if (type2 == STTYPE_STRING) { ws_debug("5 check_relation_LHS_RANGE(type2 = STTYPE_STRING)"); - if (strcmp(relation_string, "matches") == 0) { - /* Convert to a GRegex * */ - pcre = dfilter_g_regex_from_string(dfw, st_arg2); - stnode_replace(st_arg2, STTYPE_PCRE, pcre); - } else { - fvalue = dfilter_fvalue_from_string(dfw, FT_BYTES, st_arg2, NULL); - stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); - } + fvalue = dfilter_fvalue_from_string(dfw, FT_BYTES, st_arg2, NULL); + stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); } else if (type2 == STTYPE_UNPARSED) { ws_debug("5 check_relation_LHS_RANGE(type2 = STTYPE_UNPARSED)"); - if (strcmp(relation_string, "matches") == 0) { - /* Convert to a GRegex */ - pcre = dfilter_g_regex_from_string(dfw, st_arg2); - stnode_replace(st_arg2, STTYPE_PCRE, pcre); - } else { - fvalue = dfilter_fvalue_from_unparsed(dfw, FT_BYTES, st_arg2, allow_partial_value, NULL); - stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); - } + fvalue = dfilter_fvalue_from_unparsed(dfw, FT_BYTES, st_arg2, allow_partial_value, NULL); + stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); } else if (type2 == STTYPE_CHARCONST) { ws_debug("5 check_relation_LHS_RANGE(type2 = STTYPE_CHARCONST)"); - if (strcmp(relation_string, "matches") == 0) { - /* Convert to a GRegex */ - pcre = dfilter_g_regex_from_string(dfw, st_arg2); - stnode_replace(st_arg2, STTYPE_PCRE, pcre); - } else { - /* The RHS should be FT_BYTES, but a character is just a - * one-byte byte string. */ - fvalue = dfilter_fvalue_from_charconst_string(dfw, FT_BYTES, st_arg2, allow_partial_value); - stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); - } + /* The RHS should be FT_BYTES, but a character is just a + * one-byte byte string. */ + fvalue = dfilter_fvalue_from_charconst_string(dfw, FT_BYTES, st_arg2, allow_partial_value); + stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); } else if (type2 == STTYPE_RANGE) { ws_debug("5 check_relation_LHS_RANGE(type2 = STTYPE_RANGE)"); @@ -1118,6 +1051,9 @@ check_relation_LHS_RANGE(dfwork_t *dfw, const char *relation_string, dfilter_fail(dfw, "Only a field may be tested for membership in a set."); THROW(TypeError); } + else if (type2 == STTYPE_PCRE) { + ws_assert_streq(relation_string, "matches"); + } else { ws_assert_not_reached(); } @@ -1154,7 +1090,6 @@ check_relation_LHS_FUNCTION(dfwork_t *dfw, const char *relation_string, header_field_info *hfinfo2; ftenum_t ftype1, ftype2; fvalue_t *fvalue; - GRegex *pcre; df_func_def_t *funcdef; df_func_def_t *funcdef2; /* GSList *params; */ @@ -1192,24 +1127,12 @@ check_relation_LHS_FUNCTION(dfwork_t *dfw, const char *relation_string, } } else if (type2 == STTYPE_STRING) { - if (strcmp(relation_string, "matches") == 0) { - /* Convert to a GRegex */ - pcre = dfilter_g_regex_from_string(dfw, st_arg2); - stnode_replace(st_arg2, STTYPE_PCRE, pcre); - } else { - fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, NULL); - stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); - } + fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, NULL); + stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); } else if (type2 == STTYPE_UNPARSED || type2 == STTYPE_CHARCONST) { - if (strcmp(relation_string, "matches") == 0) { - /* Convert to a GRegex */ - pcre = dfilter_g_regex_from_string(dfw, st_arg2); - stnode_replace(st_arg2, STTYPE_PCRE, pcre); - } else { - fvalue = dfilter_fvalue_from_unparsed(dfw, ftype1, st_arg2, allow_partial_value, NULL); - stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); - } + fvalue = dfilter_fvalue_from_unparsed(dfw, ftype1, st_arg2, allow_partial_value, NULL); + stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); } else if (type2 == STTYPE_RANGE) { check_drange_sanity(dfw, st_arg2); @@ -1249,6 +1172,9 @@ check_relation_LHS_FUNCTION(dfwork_t *dfw, const char *relation_string, dfilter_fail(dfw, "Only a field may be tested for membership in a set."); THROW(TypeError); } + else if (type2 == STTYPE_PCRE) { + ws_assert_streq(relation_string, "matches"); + } else { ws_assert_not_reached(); } diff --git a/epan/dfilter/syntax-tree.c b/epan/dfilter/syntax-tree.c index 6638cc09dd..1bd88f88fd 100644 --- a/epan/dfilter/syntax-tree.c +++ b/epan/dfilter/syntax-tree.c @@ -119,6 +119,8 @@ _node_init(stnode_t *node, sttype_id_t type_id, gpointer data) node->data = NULL; } else { + /* Creating an initialized node with a NULL pointer is + * allowed and needs to be safe. The parser relies on that. */ type = sttype_lookup(type_id); ws_assert(type); node->type = type; diff --git a/test/suite_dfilter/group_syntax.py b/test/suite_dfilter/group_syntax.py index 7e1cfdacfd..23b39052ac 100644 --- a/test/suite_dfilter/group_syntax.py +++ b/test/suite_dfilter/group_syntax.py @@ -30,3 +30,15 @@ class case_syntax(unittest.TestCase): def test_value_string_1(self, checkDFilterSucceed): dfilter = 'eth.fcs.status=="Bad"' checkDFilterSucceed(dfilter) + + def test_matches_1(self, checkDFilterSucceed): + dfilter = 'http.request.method matches "^HEAD"' + checkDFilterSucceed(dfilter) + + def test_matches_2(self, checkDFilterFail): + dfilter = 'http.request.method matches HEAD' + checkDFilterFail(dfilter, '"HEAD" was unexpected in this context') + + def test_matches_3(self, checkDFilterFail): + dfilter = 'http.request.method matches "^HEAD" matches "^POST"' + checkDFilterFail(dfilter, '"matches" was unexpected in this context.') diff --git a/wsutil/ws_assert.h b/wsutil/ws_assert.h index e2727b589c..90e4647279 100644 --- a/wsutil/ws_assert.h +++ b/wsutil/ws_assert.h @@ -13,6 +13,7 @@ #include <ws_symbol_export.h> #include <ws_attributes.h> #include <stdbool.h> +#include <string.h> #ifdef WS_LOG_DOMAIN #define _ASSERT_DOMAIN WS_LOG_DOMAIN @@ -58,6 +59,9 @@ void ws_assert_failed(const char *file, int line, const char *function, #define ws_assert(expr) ws_abort_if_fail(expr) #endif +#define ws_assert_streq(s1, s2) \ + ws_assert((s1) && (s2) && strcmp((s1), (s2)) == 0) + /* * We don't want to disable ws_assert_not_reached() with WS_DISABLE_ASSERT. * That would blast compiler warnings everywhere for no benefit, not |