aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--docbook/release-notes.adoc2
-rw-r--r--epan/dfilter/dfilter-int.h3
-rw-r--r--epan/dfilter/dfilter.c36
-rw-r--r--epan/dfilter/grammar.lemon7
-rw-r--r--epan/dfilter/semcheck.c156
-rw-r--r--epan/dfilter/syntax-tree.c2
-rw-r--r--test/suite_dfilter/group_syntax.py12
-rw-r--r--wsutil/ws_assert.h4
8 files changed, 105 insertions, 117 deletions
diff --git a/docbook/release-notes.adoc b/docbook/release-notes.adoc
index 7253ac654c..124565f7ab 100644
--- a/docbook/release-notes.adoc
+++ b/docbook/release-notes.adoc
@@ -108,6 +108,8 @@ The following features are new (or have been significantly updated) since versio
match a valid byte array specification. Now this is a syntax error. Use double-quotes to match literal strings.
** For string comparisons literal byte arrays are always interpreted as unquoted literal strings. This avoids unexpected results with embedded NUL bytes.
For example "http.user_agent contains aa:bb" tries to match "aa:bb". Avoid this usage, always use double-quotes: http.user_agent contains "\xaa\xbb".
+** Regular expressions (using "matches" or "~") must be specified using character strings. It is a syntax error to omit the double-quotes around
+ the regular expression. Before the syntax rules of an unquoted regex string could be difficult to predict.
// === Removed Features and Support
diff --git a/epan/dfilter/dfilter-int.h b/epan/dfilter/dfilter-int.h
index 9e6cef482c..44d1a448ae 100644
--- a/epan/dfilter/dfilter-int.h
+++ b/epan/dfilter/dfilter-int.h
@@ -86,6 +86,9 @@ DfilterTrace(FILE *TraceFILE, char *zTracePrompt);
stnode_t *
dfilter_new_function(dfwork_t *dfw, const char *name);
+stnode_t *
+dfilter_new_regex(dfwork_t *dfw, const char *patt);
+
gboolean
dfilter_str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint);
diff --git a/epan/dfilter/dfilter.c b/epan/dfilter/dfilter.c
index 5cbf570ce2..9dd34f1163 100644
--- a/epan/dfilter/dfilter.c
+++ b/epan/dfilter/dfilter.c
@@ -78,6 +78,42 @@ dfilter_new_function(dfwork_t *dfw, const char *name)
return stnode_new(STTYPE_FUNCTION, def, name);
}
+/* Gets a GRegex from a string, and sets the error message on failure. */
+stnode_t *
+dfilter_new_regex(dfwork_t *dfw, const char *patt)
+{
+ GError *regex_error = NULL;
+ GRegex *pcre;
+
+ ws_debug("Compile regex pattern: %s", patt);
+
+ /*
+ * As a string is not guaranteed to contain valid UTF-8,
+ * we have to disable support for UTF-8 patterns and treat
+ * every pattern and subject as raw bytes.
+ *
+ * Should support for UTF-8 patterns be necessary, then we
+ * should compile a pattern without G_REGEX_RAW. Additionally,
+ * we MUST use g_utf8_validate() before calling g_regex_match_full()
+ * or risk crashes.
+ */
+ GRegexCompileFlags cflags = G_REGEX_CASELESS | G_REGEX_OPTIMIZE | G_REGEX_RAW;
+
+ pcre = g_regex_new(
+ patt, /* pattern */
+ cflags, /* Compile options */
+ 0, /* Match options */
+ &regex_error); /* Compile / study errors */
+
+ if (regex_error) {
+ dfilter_parse_fail(dfw, "%s", regex_error->message);
+ g_error_free(regex_error);
+ pcre = NULL;
+ }
+
+ return stnode_new(STTYPE_PCRE, pcre, patt);
+}
+
gboolean
dfilter_str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint)
{
diff --git a/epan/dfilter/grammar.lemon b/epan/dfilter/grammar.lemon
index d3dc79ea9c..b48d76da9c 100644
--- a/epan/dfilter/grammar.lemon
+++ b/epan/dfilter/grammar.lemon
@@ -302,10 +302,13 @@ relation_test(T) ::= entity(E) rel_binop(O) relation_test(R).
}
/* "matches" does not chain with other relational tests. */
-relation_test(T) ::= entity(E) TEST_MATCHES entity(F).
+relation_test(T) ::= entity(E) TEST_MATCHES STRING(S).
{
+ stnode_t *R = dfilter_new_regex(dfw, stnode_token_value(S));
+ stnode_free(S);
+
T = stnode_new(STTYPE_TEST, NULL, NULL);
- sttype_test_set2(T, TEST_OP_MATCHES, E, F);
+ sttype_test_set2(T, TEST_OP_MATCHES, E, R);
}
relation_test(T) ::= entity(E) TEST_IN LBRACE set_node_list(L) RBRACE.
diff --git a/epan/dfilter/semcheck.c b/epan/dfilter/semcheck.c
index be9a5ec145..bef49649d1 100644
--- a/epan/dfilter/semcheck.c
+++ b/epan/dfilter/semcheck.c
@@ -424,50 +424,6 @@ is_bytes_type(enum ftenum type)
return FALSE;
}
-/* Gets a GRegex from a string, and sets the error message on failure. */
-WS_RETNONNULL
-static GRegex*
-dfilter_g_regex_from_string(dfwork_t *dfw, stnode_t *st)
-{
- GError *regex_error = NULL;
- GRegexCompileFlags cflags = (GRegexCompileFlags)(G_REGEX_CASELESS | G_REGEX_OPTIMIZE);
- GRegex *pcre;
- const char *s = stnode_data(st);
-
- /*
- * As FT_BYTES and FT_PROTOCOL contain arbitrary binary data
- * and FT_STRING is not guaranteed to contain valid UTF-8,
- * we have to disable support for UTF-8 patterns and treat
- * every pattern and subject as raw bytes.
- *
- * Should support for UTF-8 patterns be necessary, then we
- * should compile a pattern without G_REGEX_RAW. Additionally,
- * we MUST use g_utf8_validate() before calling g_regex_match_full()
- * or risk crashes.
- */
- cflags = (GRegexCompileFlags)(cflags | G_REGEX_RAW);
-
- ws_debug("Compile regex pattern: %s", s);
-
- pcre = g_regex_new(
- s, /* pattern */
- cflags, /* Compile options */
- (GRegexMatchFlags)0, /* Match options */
- &regex_error /* Compile / study errors */
- );
-
- if (regex_error) {
- if (dfw->error_message == NULL)
- dfw->error_message = g_strdup(regex_error->message);
- g_error_free(regex_error);
- if (pcre) {
- g_regex_unref(pcre);
- }
- THROW(TypeError);
- }
- return pcre;
-}
-
/* Check the semantics of an existence test. */
static void
check_exists(dfwork_t *dfw, stnode_t *st_arg1)
@@ -738,7 +694,6 @@ check_relation_LHS_FIELD(dfwork_t *dfw, const char *relation_string,
df_func_def_t *funcdef;
ftenum_t ftype1, ftype2;
fvalue_t *fvalue;
- GRegex *pcre;
type2 = stnode_type_id(st_arg2);
@@ -777,34 +732,28 @@ check_relation_LHS_FIELD(dfwork_t *dfw, const char *relation_string,
}
else if (type2 == STTYPE_STRING || type2 == STTYPE_UNPARSED ||
type2 == STTYPE_CHARCONST) {
- if (strcmp(relation_string, "matches") == 0) {
- /* Convert to a GRegex */
- pcre = dfilter_g_regex_from_string(dfw, st_arg2);
- stnode_replace(st_arg2, STTYPE_PCRE, pcre);
- } else {
- /* Skip incompatible fields */
- while (hfinfo1->same_name_prev_id != -1 &&
- ((type2 == STTYPE_STRING && ftype1 != FT_STRING && ftype1!= FT_STRINGZ) ||
- (type2 != STTYPE_STRING && (ftype1 == FT_STRING || ftype1== FT_STRINGZ)))) {
- hfinfo1 = proto_registrar_get_nth(hfinfo1->same_name_prev_id);
- ftype1 = hfinfo1->type;
- }
+ /* Skip incompatible fields */
+ while (hfinfo1->same_name_prev_id != -1 &&
+ ((type2 == STTYPE_STRING && ftype1 != FT_STRING && ftype1!= FT_STRINGZ) ||
+ (type2 != STTYPE_STRING && (ftype1 == FT_STRING || ftype1== FT_STRINGZ)))) {
+ hfinfo1 = proto_registrar_get_nth(hfinfo1->same_name_prev_id);
+ ftype1 = hfinfo1->type;
+ }
- if (type2 == STTYPE_STRING) {
- fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, hfinfo1);
- }
- else if (type2 == STTYPE_CHARCONST &&
- strcmp(relation_string, "contains") == 0) {
- /* The RHS should be the same type as the LHS,
- * but a character is just a one-byte byte
- * string. */
- fvalue = dfilter_fvalue_from_charconst_string(dfw, ftype1, st_arg2, allow_partial_value);
- }
- else {
- fvalue = dfilter_fvalue_from_unparsed(dfw, ftype1, st_arg2, allow_partial_value, hfinfo1);
- }
- stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
+ if (type2 == STTYPE_STRING) {
+ fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, hfinfo1);
}
+ else if (type2 == STTYPE_CHARCONST &&
+ strcmp(relation_string, "contains") == 0) {
+ /* The RHS should be the same type as the LHS,
+ * but a character is just a one-byte byte
+ * string. */
+ fvalue = dfilter_fvalue_from_charconst_string(dfw, ftype1, st_arg2, allow_partial_value);
+ }
+ else {
+ fvalue = dfilter_fvalue_from_unparsed(dfw, ftype1, st_arg2, allow_partial_value, hfinfo1);
+ }
+ stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_RANGE) {
check_drange_sanity(dfw, st_arg2);
@@ -881,6 +830,9 @@ check_relation_LHS_FIELD(dfwork_t *dfw, const char *relation_string,
nodelist = g_slist_next(nodelist);
}
}
+ else if (type2 == STTYPE_PCRE) {
+ ws_assert_streq(relation_string, "matches");
+ }
else {
ws_assert_not_reached();
}
@@ -1032,7 +984,6 @@ check_relation_LHS_RANGE(dfwork_t *dfw, const char *relation_string,
header_field_info *hfinfo2;
ftenum_t ftype2;
fvalue_t *fvalue;
- GRegex *pcre;
ws_debug("5 check_relation_LHS_RANGE(%s)", relation_string);
@@ -1059,38 +1010,20 @@ check_relation_LHS_RANGE(dfwork_t *dfw, const char *relation_string,
}
else if (type2 == STTYPE_STRING) {
ws_debug("5 check_relation_LHS_RANGE(type2 = STTYPE_STRING)");
- if (strcmp(relation_string, "matches") == 0) {
- /* Convert to a GRegex * */
- pcre = dfilter_g_regex_from_string(dfw, st_arg2);
- stnode_replace(st_arg2, STTYPE_PCRE, pcre);
- } else {
- fvalue = dfilter_fvalue_from_string(dfw, FT_BYTES, st_arg2, NULL);
- stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
- }
+ fvalue = dfilter_fvalue_from_string(dfw, FT_BYTES, st_arg2, NULL);
+ stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_UNPARSED) {
ws_debug("5 check_relation_LHS_RANGE(type2 = STTYPE_UNPARSED)");
- if (strcmp(relation_string, "matches") == 0) {
- /* Convert to a GRegex */
- pcre = dfilter_g_regex_from_string(dfw, st_arg2);
- stnode_replace(st_arg2, STTYPE_PCRE, pcre);
- } else {
- fvalue = dfilter_fvalue_from_unparsed(dfw, FT_BYTES, st_arg2, allow_partial_value, NULL);
- stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
- }
+ fvalue = dfilter_fvalue_from_unparsed(dfw, FT_BYTES, st_arg2, allow_partial_value, NULL);
+ stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_CHARCONST) {
ws_debug("5 check_relation_LHS_RANGE(type2 = STTYPE_CHARCONST)");
- if (strcmp(relation_string, "matches") == 0) {
- /* Convert to a GRegex */
- pcre = dfilter_g_regex_from_string(dfw, st_arg2);
- stnode_replace(st_arg2, STTYPE_PCRE, pcre);
- } else {
- /* The RHS should be FT_BYTES, but a character is just a
- * one-byte byte string. */
- fvalue = dfilter_fvalue_from_charconst_string(dfw, FT_BYTES, st_arg2, allow_partial_value);
- stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
- }
+ /* The RHS should be FT_BYTES, but a character is just a
+ * one-byte byte string. */
+ fvalue = dfilter_fvalue_from_charconst_string(dfw, FT_BYTES, st_arg2, allow_partial_value);
+ stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_RANGE) {
ws_debug("5 check_relation_LHS_RANGE(type2 = STTYPE_RANGE)");
@@ -1118,6 +1051,9 @@ check_relation_LHS_RANGE(dfwork_t *dfw, const char *relation_string,
dfilter_fail(dfw, "Only a field may be tested for membership in a set.");
THROW(TypeError);
}
+ else if (type2 == STTYPE_PCRE) {
+ ws_assert_streq(relation_string, "matches");
+ }
else {
ws_assert_not_reached();
}
@@ -1154,7 +1090,6 @@ check_relation_LHS_FUNCTION(dfwork_t *dfw, const char *relation_string,
header_field_info *hfinfo2;
ftenum_t ftype1, ftype2;
fvalue_t *fvalue;
- GRegex *pcre;
df_func_def_t *funcdef;
df_func_def_t *funcdef2;
/* GSList *params; */
@@ -1192,24 +1127,12 @@ check_relation_LHS_FUNCTION(dfwork_t *dfw, const char *relation_string,
}
}
else if (type2 == STTYPE_STRING) {
- if (strcmp(relation_string, "matches") == 0) {
- /* Convert to a GRegex */
- pcre = dfilter_g_regex_from_string(dfw, st_arg2);
- stnode_replace(st_arg2, STTYPE_PCRE, pcre);
- } else {
- fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, NULL);
- stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
- }
+ fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, NULL);
+ stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_UNPARSED || type2 == STTYPE_CHARCONST) {
- if (strcmp(relation_string, "matches") == 0) {
- /* Convert to a GRegex */
- pcre = dfilter_g_regex_from_string(dfw, st_arg2);
- stnode_replace(st_arg2, STTYPE_PCRE, pcre);
- } else {
- fvalue = dfilter_fvalue_from_unparsed(dfw, ftype1, st_arg2, allow_partial_value, NULL);
- stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
- }
+ fvalue = dfilter_fvalue_from_unparsed(dfw, ftype1, st_arg2, allow_partial_value, NULL);
+ stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_RANGE) {
check_drange_sanity(dfw, st_arg2);
@@ -1249,6 +1172,9 @@ check_relation_LHS_FUNCTION(dfwork_t *dfw, const char *relation_string,
dfilter_fail(dfw, "Only a field may be tested for membership in a set.");
THROW(TypeError);
}
+ else if (type2 == STTYPE_PCRE) {
+ ws_assert_streq(relation_string, "matches");
+ }
else {
ws_assert_not_reached();
}
diff --git a/epan/dfilter/syntax-tree.c b/epan/dfilter/syntax-tree.c
index 6638cc09dd..1bd88f88fd 100644
--- a/epan/dfilter/syntax-tree.c
+++ b/epan/dfilter/syntax-tree.c
@@ -119,6 +119,8 @@ _node_init(stnode_t *node, sttype_id_t type_id, gpointer data)
node->data = NULL;
}
else {
+ /* Creating an initialized node with a NULL pointer is
+ * allowed and needs to be safe. The parser relies on that. */
type = sttype_lookup(type_id);
ws_assert(type);
node->type = type;
diff --git a/test/suite_dfilter/group_syntax.py b/test/suite_dfilter/group_syntax.py
index 7e1cfdacfd..23b39052ac 100644
--- a/test/suite_dfilter/group_syntax.py
+++ b/test/suite_dfilter/group_syntax.py
@@ -30,3 +30,15 @@ class case_syntax(unittest.TestCase):
def test_value_string_1(self, checkDFilterSucceed):
dfilter = 'eth.fcs.status=="Bad"'
checkDFilterSucceed(dfilter)
+
+ def test_matches_1(self, checkDFilterSucceed):
+ dfilter = 'http.request.method matches "^HEAD"'
+ checkDFilterSucceed(dfilter)
+
+ def test_matches_2(self, checkDFilterFail):
+ dfilter = 'http.request.method matches HEAD'
+ checkDFilterFail(dfilter, '"HEAD" was unexpected in this context')
+
+ def test_matches_3(self, checkDFilterFail):
+ dfilter = 'http.request.method matches "^HEAD" matches "^POST"'
+ checkDFilterFail(dfilter, '"matches" was unexpected in this context.')
diff --git a/wsutil/ws_assert.h b/wsutil/ws_assert.h
index e2727b589c..90e4647279 100644
--- a/wsutil/ws_assert.h
+++ b/wsutil/ws_assert.h
@@ -13,6 +13,7 @@
#include <ws_symbol_export.h>
#include <ws_attributes.h>
#include <stdbool.h>
+#include <string.h>
#ifdef WS_LOG_DOMAIN
#define _ASSERT_DOMAIN WS_LOG_DOMAIN
@@ -58,6 +59,9 @@ void ws_assert_failed(const char *file, int line, const char *function,
#define ws_assert(expr) ws_abort_if_fail(expr)
#endif
+#define ws_assert_streq(s1, s2) \
+ ws_assert((s1) && (s2) && strcmp((s1), (s2)) == 0)
+
/*
* We don't want to disable ws_assert_not_reached() with WS_DISABLE_ASSERT.
* That would blast compiler warnings everywhere for no benefit, not