aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoão Valverde <j@v6e.pt>2021-10-15 23:47:28 +0100
committerWireshark GitLab Utility <gerald+gitlab-utility@wireshark.org>2021-10-18 12:09:36 +0000
commite8800ff3c49e0b8a7a04d06ea68a39a16bab778f (patch)
tree2a79cf7b04c746e5c8f145b59b0a5f8c66060f09
parent2e048df011a56eaf2a5b3d86b76a8b5675f9e113 (diff)
dfilter: Add a thin encapsulation layer for REs
-rw-r--r--epan/dfilter/dfilter.c39
-rw-r--r--epan/dfilter/dfvm.c10
-rw-r--r--epan/dfilter/dfvm.h2
-rw-r--r--epan/dfilter/gencode.c4
-rw-r--r--epan/dfilter/sttype-pointer.c8
-rw-r--r--epan/ftypes/ftype-bytes.c13
-rw-r--r--epan/ftypes/ftype-protocol.c23
-rw-r--r--epan/ftypes/ftype-string.c12
-rw-r--r--epan/ftypes/ftypes-int.h2
-rw-r--r--epan/ftypes/ftypes.c61
-rw-r--r--epan/ftypes/ftypes.h17
-rw-r--r--test/suite_dfilter/group_syntax.py4
12 files changed, 106 insertions, 89 deletions
diff --git a/epan/dfilter/dfilter.c b/epan/dfilter/dfilter.c
index 263b5c192c..1836aa8efa 100644
--- a/epan/dfilter/dfilter.c
+++ b/epan/dfilter/dfilter.c
@@ -78,46 +78,25 @@ dfilter_new_function(dfwork_t *dfw, const char *name)
return stnode_new(STTYPE_FUNCTION, def, name);
}
-/* Gets a GRegex from a string, and sets the error message on failure. */
+/* Gets a regex from a string, and sets the error message on failure. */
stnode_t *
dfilter_new_regex(dfwork_t *dfw, stnode_t *node)
{
- GError *regex_error = NULL;
- GRegex *pcre;
- const char *patt;
+ fvalue_regex_t *pcre;
+ char *errmsg = NULL;
- if (stnode_type_id(node) == STTYPE_STRING) {
- patt = stnode_data(node);
- }
- else {
+ if (stnode_type_id(node) != STTYPE_STRING) {
dfilter_parse_fail(dfw, "Expected a string not %s", stnode_todisplay(node));
return node;
}
- patt = stnode_data(node);
+ const char *patt = stnode_data(node);
ws_debug("Compile regex pattern: %s", patt);
- /*
- * As a string is not guaranteed to contain valid UTF-8,
- * we have to disable support for UTF-8 patterns and treat
- * every pattern and subject as raw bytes.
- *
- * Should support for UTF-8 patterns be necessary, then we
- * should compile a pattern without G_REGEX_RAW. Additionally,
- * we MUST use g_utf8_validate() before calling g_regex_match_full()
- * or risk crashes.
- */
- GRegexCompileFlags cflags = G_REGEX_CASELESS | G_REGEX_OPTIMIZE | G_REGEX_RAW;
-
- pcre = g_regex_new(
- patt, /* pattern */
- cflags, /* Compile options */
- 0, /* Match options */
- &regex_error); /* Compile / study errors */
-
- if (regex_error) {
- dfilter_parse_fail(dfw, "%s", regex_error->message);
- g_error_free(regex_error);
+ pcre = fvalue_regex_compile(patt, &errmsg);
+ if (errmsg) {
+ dfilter_parse_fail(dfw, "%s", errmsg);
+ g_free(errmsg);
return node;
}
diff --git a/epan/dfilter/dfvm.c b/epan/dfilter/dfvm.c
index 42f29e0150..47278e4169 100644
--- a/epan/dfilter/dfvm.c
+++ b/epan/dfilter/dfvm.c
@@ -38,7 +38,7 @@ dfvm_value_free(dfvm_value_t *v)
drange_free(v->value.drange);
break;
case PCRE:
- g_regex_unref(v->value.pcre);
+ fvalue_regex_free(v->value.pcre);
break;
default:
/* nothing */
@@ -110,9 +110,9 @@ dfvm_dump(FILE *f, dfilter_t *df)
wmem_free(NULL, value_str);
break;
case PUT_PCRE:
- fprintf(f, "%05d PUT_PCRE\t%s -> reg#%u\n",
+ fprintf(f, "%05d PUT_PCRE \t%s <GRegex> -> reg#%u\n",
id,
- g_regex_get_pattern(arg1->value.pcre),
+ fvalue_regex_pattern(arg1->value.pcre),
arg2->value.numeric);
break;
case CHECK_EXISTS:
@@ -365,7 +365,7 @@ put_fvalue(dfilter_t *df, fvalue_t *fv, int reg)
/* Put a constant PCRE in a register. These will not be cleared by
* free_register_overhead. */
static gboolean
-put_pcre(dfilter_t *df, GRegex *pcre, int reg)
+put_pcre(dfilter_t *df, fvalue_regex_t *pcre, int reg)
{
df->registers[reg] = g_list_append(NULL, pcre);
df->owns_memory[reg] = FALSE;
@@ -404,7 +404,7 @@ any_matches(dfilter_t *df, int reg1, int reg2)
while (list_a) {
list_b = df->registers[reg2];
while (list_b) {
- if (fvalue_matches((fvalue_t *)list_a->data, (GRegex *)list_b->data)) {
+ if (fvalue_matches((fvalue_t *)list_a->data, list_b->data)) {
return TRUE;
}
list_b = g_list_next(list_b);
diff --git a/epan/dfilter/dfvm.h b/epan/dfilter/dfvm.h
index 71e6cd4449..a57333c707 100644
--- a/epan/dfilter/dfvm.h
+++ b/epan/dfilter/dfvm.h
@@ -36,7 +36,7 @@ typedef struct {
drange_t *drange;
header_field_info *hfinfo;
df_func_def_t *funcdef;
- GRegex *pcre;
+ fvalue_regex_t *pcre;
} value;
} dfvm_value_t;
diff --git a/epan/dfilter/gencode.c b/epan/dfilter/gencode.c
index 28cb573104..bb5023710c 100644
--- a/epan/dfilter/gencode.c
+++ b/epan/dfilter/gencode.c
@@ -234,7 +234,7 @@ dfw_append_function(dfwork_t *dfw, stnode_t *node, dfvm_value_t **p_jmp)
/* returns register number */
static int
-dfw_append_put_pcre(dfwork_t *dfw, GRegex *pcre)
+dfw_append_put_pcre(dfwork_t *dfw, fvalue_regex_t *pcre)
{
dfvm_insn_t *insn;
dfvm_value_t *val1, *val2;
@@ -427,7 +427,7 @@ gen_entity(dfwork_t *dfw, stnode_t *st_arg, dfvm_value_t **p_jmp)
reg = dfw_append_function(dfw, st_arg, p_jmp);
}
else if (e_type == STTYPE_PCRE) {
- reg = dfw_append_put_pcre(dfw, (GRegex *)stnode_steal_data(st_arg));
+ reg = dfw_append_put_pcre(dfw, stnode_steal_data(st_arg));
}
else {
/* printf("sttype_id is %u\n", (unsigned)e_type); */
diff --git a/epan/dfilter/sttype-pointer.c b/epan/dfilter/sttype-pointer.c
index 1f2a5ffa2b..4feb8b1d45 100644
--- a/epan/dfilter/sttype-pointer.c
+++ b/epan/dfilter/sttype-pointer.c
@@ -27,7 +27,7 @@ fvalue_free(gpointer value)
static void
pcre_free(gpointer value)
{
- GRegex *pcre = (GRegex*)value;
+ fvalue_regex_t *pcre = value;
/* If the data was not claimed with stnode_steal_data(), free it. */
if (pcre) {
@@ -36,7 +36,7 @@ pcre_free(gpointer value)
* count; it'll get freed when the reference count drops
* to 0.
*/
- g_regex_unref(pcre);
+ fvalue_regex_free(pcre);
}
}
@@ -67,9 +67,9 @@ field_tostr(const void *data, gboolean pretty _U_)
static char *
pcre_tostr(const void *data, gboolean pretty _U_)
{
- const GRegex *pcre = (const GRegex *)data;
+ const fvalue_regex_t *pcre = data;
- return g_strdup(g_regex_get_pattern(pcre));
+ return g_strdup(fvalue_regex_pattern(pcre));
}
void
diff --git a/epan/ftypes/ftype-bytes.c b/epan/ftypes/ftype-bytes.c
index 5eed4868a2..d0da0fd482 100644
--- a/epan/ftypes/ftype-bytes.c
+++ b/epan/ftypes/ftype-bytes.c
@@ -586,20 +586,11 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b)
}
static gboolean
-cmp_matches(const fvalue_t *fv, const GRegex *regex)
+cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex)
{
GByteArray *a = fv->value.bytes;
- return g_regex_match_full(
- regex, /* Compiled PCRE */
- (char *)a->data, /* The data to check for the pattern... */
- (int)a->len, /* ... and its length */
- 0, /* Start offset within data */
- (GRegexMatchFlags)0, /* GRegexMatchFlags */
- NULL, /* We are not interested in the match information */
- NULL /* We don't want error information */
- );
- /* NOTE - DO NOT g_free(data) */
+ return fvalue_regex_matches(regex, a->data, a->len);
}
void
diff --git a/epan/ftypes/ftype-protocol.c b/epan/ftypes/ftype-protocol.c
index 98d38dcc7d..9af382e773 100644
--- a/epan/ftypes/ftype-protocol.c
+++ b/epan/ftypes/ftype-protocol.c
@@ -257,7 +257,7 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b)
}
static gboolean
-cmp_matches(const fvalue_t *fv, const GRegex *regex)
+cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex)
{
const protocol_value_t *a = (const protocol_value_t *)&fv->value.protocol;
volatile gboolean rc = FALSE;
@@ -271,26 +271,9 @@ cmp_matches(const fvalue_t *fv, const GRegex *regex)
if (a->tvb != NULL) {
tvb_len = tvb_captured_length(a->tvb);
data = (const char *)tvb_get_ptr(a->tvb, 0, tvb_len);
- rc = g_regex_match_full(
- regex, /* Compiled PCRE */
- data, /* The data to check for the pattern... */
- tvb_len, /* ... and its length */
- 0, /* Start offset within data */
- (GRegexMatchFlags)0, /* GRegexMatchFlags */
- NULL, /* We are not interested in the match information */
- NULL /* We don't want error information */
- );
- /* NOTE - DO NOT g_free(data) */
+ rc = fvalue_regex_matches(regex, data, tvb_len);
} else {
- rc = g_regex_match_full(
- regex, /* Compiled PCRE */
- a->proto_string, /* The data to check for the pattern... */
- (int)strlen(a->proto_string), /* ... and its length */
- 0, /* Start offset within data */
- (GRegexMatchFlags)0, /* GRegexMatchFlags */
- NULL, /* We are not interested in the match information */
- NULL /* We don't want error information */
- );
+ rc = fvalue_regex_matches(regex, a->proto_string, -1);
}
}
CATCH_ALL {
diff --git a/epan/ftypes/ftype-string.c b/epan/ftypes/ftype-string.c
index 21c76dfa8b..ccc3f97733 100644
--- a/epan/ftypes/ftype-string.c
+++ b/epan/ftypes/ftype-string.c
@@ -138,22 +138,14 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b)
}
static gboolean
-cmp_matches(const fvalue_t *fv, const GRegex *regex)
+cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex)
{
char *str = fv->value.string;
if (! regex) {
return FALSE;
}
- return g_regex_match_full(
- regex, /* Compiled PCRE */
- str, /* The data to check for the pattern... */
- (int)strlen(str), /* ... and its length */
- 0, /* Start offset within data */
- (GRegexMatchFlags)0, /* GRegexMatchFlags */
- NULL, /* We are not interested in the match information */
- NULL /* We don't want error information */
- );
+ return fvalue_regex_matches(regex, str, -1);
}
void
diff --git a/epan/ftypes/ftypes-int.h b/epan/ftypes/ftypes-int.h
index 704b5d55cc..df01c95c12 100644
--- a/epan/ftypes/ftypes-int.h
+++ b/epan/ftypes/ftypes-int.h
@@ -64,7 +64,7 @@ typedef double (*FvalueGetFloatingFunc)(fvalue_t*);
typedef int (*FvalueCmp)(const fvalue_t*, const fvalue_t*);
typedef gboolean (*FvalueBitwiseAnd)(const fvalue_t*, const fvalue_t*);
typedef gboolean (*FvalueContains)(const fvalue_t*, const fvalue_t*);
-typedef gboolean (*FvalueMatches)(const fvalue_t*, const GRegex*);
+typedef gboolean (*FvalueMatches)(const fvalue_t*, const fvalue_regex_t*);
typedef guint (*FvalueLen)(fvalue_t*);
typedef void (*FvalueSlice)(fvalue_t*, GByteArray *, guint offset, guint length);
diff --git a/epan/ftypes/ftypes.c b/epan/ftypes/ftypes.c
index e10a38513d..12324c2d3e 100644
--- a/epan/ftypes/ftypes.c
+++ b/epan/ftypes/ftypes.c
@@ -8,12 +8,14 @@
#include "config.h"
-#include <ftypes-int.h>
-#include <glib.h>
+#include "ftypes-int.h"
-#include "ftypes.h"
#include <wsutil/ws_assert.h>
+struct _fvalue_regex_t {
+ GRegex *code;
+};
+
/* Keep track of ftype_t's via their ftenum number */
static ftype_t* type_list[FT_NUM_TYPES];
@@ -747,13 +749,64 @@ fvalue_contains(const fvalue_t *a, const fvalue_t *b)
}
gboolean
-fvalue_matches(const fvalue_t *a, const GRegex *b)
+fvalue_matches(const fvalue_t *a, const fvalue_regex_t *b)
{
/* XXX - check compatibility of a and b */
ws_assert(a->ftype->cmp_matches);
return a->ftype->cmp_matches(a, b);
}
+fvalue_regex_t *
+fvalue_regex_compile(const char *patt, char **errmsg)
+{
+ GError *regex_error = NULL;
+ GRegex *pcre;
+
+ /*
+ * As a string is not guaranteed to contain valid UTF-8,
+ * we have to disable support for UTF-8 patterns and treat
+ * every pattern and subject as raw bytes.
+ *
+ * Should support for UTF-8 patterns be necessary, then we
+ * should compile a pattern without G_REGEX_RAW. Additionally,
+ * we MUST use g_utf8_validate() before calling g_regex_match_full()
+ * or risk crashes.
+ */
+ GRegexCompileFlags cflags = G_REGEX_CASELESS | G_REGEX_OPTIMIZE | G_REGEX_RAW;
+
+ pcre = g_regex_new(patt, cflags, 0, &regex_error);
+
+ if (regex_error) {
+ *errmsg = g_strdup(regex_error->message);
+ g_error_free(regex_error);
+ return NULL;
+ }
+
+ struct _fvalue_regex_t *re = g_new(struct _fvalue_regex_t, 1);
+ re->code = pcre;
+
+ return re;
+}
+
+gboolean
+fvalue_regex_matches(const fvalue_regex_t *regex, const char *subj, gssize subj_size)
+{
+ return g_regex_match_full(regex->code, subj, subj_size, 0, 0, NULL, NULL);
+}
+
+void
+fvalue_regex_free(fvalue_regex_t *regex)
+{
+ g_regex_unref(regex->code);
+ g_free(regex);
+}
+
+const char *
+fvalue_regex_pattern(const fvalue_regex_t *regex)
+{
+ return g_regex_get_pattern(regex->code);
+}
+
/*
* Editor modelines - https://www.wireshark.org/tools/modelines.html
*
diff --git a/epan/ftypes/ftypes.h b/epan/ftypes/ftypes.h
index a43b655d84..9659bd0e4b 100644
--- a/epan/ftypes/ftypes.h
+++ b/epan/ftypes/ftypes.h
@@ -131,6 +131,9 @@ typedef enum ft_framenum_type ft_framenum_type_t;
struct _ftype_t;
typedef struct _ftype_t ftype_t;
+struct _fvalue_regex_t;
+typedef struct _fvalue_regex_t fvalue_regex_t;
+
/* String representation types. */
enum ftrepr {
FTREPR_DISPLAY,
@@ -368,7 +371,19 @@ gboolean
fvalue_contains(const fvalue_t *a, const fvalue_t *b);
gboolean
-fvalue_matches(const fvalue_t *a, const GRegex *b);
+fvalue_matches(const fvalue_t *a, const fvalue_regex_t *re);
+
+fvalue_regex_t *
+fvalue_regex_compile(const char *patt, char **errmsg);
+
+gboolean
+fvalue_regex_matches(const fvalue_regex_t *regex, const char *subj, gssize subj_size);
+
+void
+fvalue_regex_free(fvalue_regex_t *regex);
+
+const char *
+fvalue_regex_pattern(const fvalue_regex_t *regex);
guint
fvalue_length(fvalue_t *fv);
diff --git a/test/suite_dfilter/group_syntax.py b/test/suite_dfilter/group_syntax.py
index 42a7099e68..204851e7d0 100644
--- a/test/suite_dfilter/group_syntax.py
+++ b/test/suite_dfilter/group_syntax.py
@@ -42,3 +42,7 @@ class case_syntax(unittest.TestCase):
def test_matches_3(self, checkDFilterFail):
dfilter = 'http.request.method matches "^HEAD" matches "^POST"'
checkDFilterFail(dfilter, '"matches" was unexpected in this context.')
+
+ def test_matches_4(self, checkDFilterCount):
+ dfilter = r'http.host matches r"update\.microsoft\.c.."'
+ checkDFilterCount(dfilter, 1)