diff options
author | Guy Harris <gharris@sonic.net> | 2021-03-21 03:06:17 -0700 |
---|---|---|
committer | Guy Harris <gharris@sonic.net> | 2021-03-21 03:27:44 -0700 |
commit | b61fd6d76a2ce97f2bae40d8a8eacebc91e617ea (patch) | |
tree | d229e30e3f969bb916d1dd50f1d98a60b33252d0 | |
parent | ce611792c37f5c80692bc927e07d3d4f36cd5b65 (diff) |
dfilter, ftypes: get rid of FT_PCRE.
It's not a valid field type, it's only a hack to support regular
expression matching in packet-matching expressions.
Instead, in the packet-matching code, have a separate syntax tree type
for Perl-compatible regular expressions, and a separate instruction to
load one into a register, and have the "matching" operator for field
types take a GRegex * as the second argument.
-rw-r--r-- | epan/dfilter/dfvm.c | 58 | ||||
-rw-r--r-- | epan/dfilter/dfvm.h | 5 | ||||
-rw-r--r-- | epan/dfilter/gencode.c | 24 | ||||
-rw-r--r-- | epan/dfilter/grammar.lemon | 1 | ||||
-rw-r--r-- | epan/dfilter/semcheck.c | 211 | ||||
-rw-r--r-- | epan/dfilter/sttype-pointer.c | 24 | ||||
-rw-r--r-- | epan/dfilter/syntax-tree.h | 1 | ||||
-rw-r--r-- | epan/ftypes/CMakeLists.txt | 1 | ||||
-rw-r--r-- | epan/ftypes/ftype-bytes.c | 15 | ||||
-rw-r--r-- | epan/ftypes/ftype-pcre.c | 164 | ||||
-rw-r--r-- | epan/ftypes/ftype-protocol.c | 12 | ||||
-rw-r--r-- | epan/ftypes/ftype-string.c | 14 | ||||
-rw-r--r-- | epan/ftypes/ftypes-int.h | 3 | ||||
-rw-r--r-- | epan/ftypes/ftypes.c | 7 | ||||
-rw-r--r-- | epan/ftypes/ftypes.h | 3 | ||||
-rw-r--r-- | epan/proto.c | 17 | ||||
-rw-r--r-- | epan/wslua/wslua_field.c | 2 | ||||
-rw-r--r-- | epan/wslua/wslua_proto_field.c | 2 | ||||
-rw-r--r-- | test/lua/globals_2.2.txt | 1 | ||||
-rwxr-xr-x | tools/convert_proto_tree_add_text.pl | 2 | ||||
-rwxr-xr-x | tools/fix-encoding-args.pl | 1 |
21 files changed, 265 insertions, 303 deletions
diff --git a/epan/dfilter/dfvm.c b/epan/dfilter/dfvm.c index 5bbcbaccb6..e944478d3a 100644 --- a/epan/dfilter/dfvm.c +++ b/epan/dfilter/dfvm.c @@ -36,6 +36,9 @@ dfvm_value_free(dfvm_value_t *v) case DRANGE: drange_free(v->value.drange); break; + case PCRE: + g_regex_unref(v->value.pcre); + break; default: /* nothing */ ; @@ -105,6 +108,12 @@ dfvm_dump(FILE *f, dfilter_t *df) arg2->value.numeric); wmem_free(NULL, value_str); break; + case PUT_PCRE: + fprintf(f, "%05d PUT_PCRE\t%s -> reg#%u\n", + id, + g_regex_get_pattern(arg1->value.pcre), + arg2->value.numeric); + break; case CHECK_EXISTS: case READ_TREE: case CALL_FUNCTION: @@ -169,6 +178,11 @@ dfvm_dump(FILE *f, dfilter_t *df) g_assert_not_reached(); break; + case PUT_PCRE: + /* We already dumped these */ + g_assert_not_reached(); + break; + case MK_RANGE: arg3 = insn->arg3; fprintf(f, "%05d MK_RANGE\t\treg#%u[", @@ -347,6 +361,16 @@ put_fvalue(dfilter_t *df, fvalue_t *fv, int reg) return TRUE; } +/* Put a constant PCRE in a register. These will not be cleared by + * free_register_overhead. */ +static gboolean +put_pcre(dfilter_t *df, GRegex *pcre, int reg) +{ + df->registers[reg] = g_list_append(NULL, pcre); + df->owns_memory[reg] = FALSE; + return TRUE; +} + typedef gboolean (*FvalueCmpFunc)(const fvalue_t*, const fvalue_t*); static gboolean @@ -370,6 +394,26 @@ any_test(dfilter_t *df, FvalueCmpFunc cmp, int reg1, int reg2) } static gboolean +any_matches(dfilter_t *df, int reg1, int reg2) +{ + GList *list_a, *list_b; + + list_a = df->registers[reg1]; + + while (list_a) { + list_b = df->registers[reg2]; + while (list_b) { + if (fvalue_matches((fvalue_t *)list_a->data, (GRegex *)list_b->data)) { + return TRUE; + } + list_b = g_list_next(list_b); + } + list_a = g_list_next(list_a); + } + return FALSE; +} + +static gboolean any_in_range(dfilter_t *df, int reg1, int reg2, int reg3) { GList *list1, *list_low, *list_high; @@ -568,7 +612,7 @@ dfvm_apply(dfilter_t *df, proto_tree *tree) break; case ANY_MATCHES: - accum = any_test(df, fvalue_matches, + accum = any_matches(df, arg1->value.numeric, arg2->value.numeric); break; @@ -609,6 +653,14 @@ dfvm_apply(dfilter_t *df, proto_tree *tree) break; #endif + case PUT_PCRE: +#if 0 + /* These were handled in the constants initialization */ + accum = put_pcre(df, + arg1->value.pcre, arg2->value.numeric); + break; +#endif + default: g_assert_not_reached(); break; @@ -640,6 +692,10 @@ dfvm_init_const(dfilter_t *df) put_fvalue(df, arg1->value.fvalue, arg2->value.numeric); break; + case PUT_PCRE: + put_pcre(df, + arg1->value.pcre, arg2->value.numeric); + break; case CHECK_EXISTS: case READ_TREE: case CALL_FUNCTION: diff --git a/epan/dfilter/dfvm.h b/epan/dfilter/dfvm.h index cfc5ce51f0..71e6cd4449 100644 --- a/epan/dfilter/dfvm.h +++ b/epan/dfilter/dfvm.h @@ -23,7 +23,8 @@ typedef enum { REGISTER, INTEGER, DRANGE, - FUNCTION_DEF + FUNCTION_DEF, + PCRE } dfvm_value_type_t; typedef struct { @@ -35,6 +36,7 @@ typedef struct { drange_t *drange; header_field_info *hfinfo; df_func_def_t *funcdef; + GRegex *pcre; } value; } dfvm_value_t; @@ -49,6 +51,7 @@ typedef enum { RETURN, READ_TREE, PUT_FVALUE, + PUT_PCRE, ANY_EQ, ANY_NE, ANY_GT, diff --git a/epan/dfilter/gencode.c b/epan/dfilter/gencode.c index ecc929a738..cfd22c920a 100644 --- a/epan/dfilter/gencode.c +++ b/epan/dfilter/gencode.c @@ -231,6 +231,27 @@ dfw_append_function(dfwork_t *dfw, stnode_t *node, dfvm_value_t **p_jmp) return val2->value.numeric; } +/* returns register number */ +static int +dfw_append_put_pcre(dfwork_t *dfw, GRegex *pcre) +{ + dfvm_insn_t *insn; + dfvm_value_t *val1, *val2; + int reg; + + insn = dfvm_insn_new(PUT_PCRE); + val1 = dfvm_value_new(PCRE); + val1->value.pcre = pcre; + val2 = dfvm_value_new(REGISTER); + reg = dfw->first_constant--; + val2->value.numeric = reg; + insn->arg1 = val1; + insn->arg2 = val2; + dfw_append_const(dfw, insn); + + return reg; +} + /** * Adds an instruction for a relation operator where the values are already @@ -404,6 +425,9 @@ gen_entity(dfwork_t *dfw, stnode_t *st_arg, dfvm_value_t **p_jmp) else if (e_type == STTYPE_FUNCTION) { reg = dfw_append_function(dfw, st_arg, p_jmp); } + else if (e_type == STTYPE_PCRE) { + reg = dfw_append_put_pcre(dfw, (GRegex *)stnode_steal_data(st_arg)); + } else { /* printf("sttype_id is %u\n", (unsigned)e_type); */ g_assert_not_reached(); diff --git a/epan/dfilter/grammar.lemon b/epan/dfilter/grammar.lemon index c29b91a592..4ea3a2b673 100644 --- a/epan/dfilter/grammar.lemon +++ b/epan/dfilter/grammar.lemon @@ -118,6 +118,7 @@ any "error" symbols are shifted, if possible. */ case STTYPE_NUM_TYPES: case STTYPE_RANGE: case STTYPE_FVALUE: + case STTYPE_PCRE: g_assert_not_reached(); break; } diff --git a/epan/dfilter/semcheck.c b/epan/dfilter/semcheck.c index 7837c3be28..c1be5f3a56 100644 --- a/epan/dfilter/semcheck.c +++ b/epan/dfilter/semcheck.c @@ -133,7 +133,6 @@ compatible_ftypes(ftenum_t a, ftenum_t b) return FALSE; } - case FT_PCRE: case FT_NUM_TYPES: g_assert_not_reached(); } @@ -219,7 +218,6 @@ mk_fvalue_from_val_string(dfwork_t *dfw, header_field_info *hfinfo, char *s) case FT_STRINGZPAD: case FT_STRINGZTRUNC: case FT_EUI64: - case FT_PCRE: case FT_GUID: case FT_OID: case FT_REL_OID: @@ -383,7 +381,6 @@ is_bytes_type(enum ftenum type) case FT_INT48: case FT_INT56: case FT_INT64: - case FT_PCRE: case FT_EUI64: return FALSE; @@ -395,6 +392,46 @@ is_bytes_type(enum ftenum type) return FALSE; } +/* Gets a GRegex from a string, and sets the error message on failure. */ +static GRegex* +dfilter_g_regex_from_string(dfwork_t *dfw, const char *s) +{ + GError *regex_error = NULL; + GRegexCompileFlags cflags = (GRegexCompileFlags)(G_REGEX_CASELESS | G_REGEX_OPTIMIZE); + GRegex *pcre; + + /* + * As FT_BYTES and FT_PROTOCOL contain arbitrary binary data + * and FT_STRING is not guaranteed to contain valid UTF-8, + * we have to disable support for UTF-8 patterns and treat + * every pattern and subject as raw bytes. + * + * Should support for UTF-8 patterns be necessary, then we + * should compile a pattern without G_REGEX_RAW. Additionally, + * we MUST use g_utf8_validate() before calling g_regex_match_full() + * or risk crashes. + */ + cflags = (GRegexCompileFlags)(cflags | G_REGEX_RAW); + + pcre = g_regex_new( + s, /* pattern */ + cflags, /* Compile options */ + (GRegexMatchFlags)0, /* Match options */ + ®ex_error /* Compile / study errors */ + ); + + if (regex_error) { + if (dfw->error_message == NULL) + dfw->error_message = g_strdup(regex_error->message); + g_error_free(regex_error); + if (pcre) { + g_regex_unref(pcre); + } + return NULL; + } + return pcre; +} + /* Check the semantics of an existence test. */ static void check_exists(dfwork_t *dfw, stnode_t *st_arg1) @@ -439,6 +476,7 @@ check_exists(dfwork_t *dfw, stnode_t *st_arg1) case STTYPE_INTEGER: case STTYPE_FVALUE: case STTYPE_SET: + case STTYPE_PCRE: case STTYPE_NUM_TYPES: g_assert_not_reached(); } @@ -636,6 +674,7 @@ check_relation_LHS_FIELD(dfwork_t *dfw, const char *relation_string, df_func_def_t *funcdef; ftenum_t ftype1, ftype2; fvalue_t *fvalue; + GRegex *pcre; char *s; type2 = stnode_type_id(st_arg2); @@ -677,11 +716,12 @@ check_relation_LHS_FIELD(dfwork_t *dfw, const char *relation_string, type2 == STTYPE_CHARCONST) { s = (char *)stnode_data(st_arg2); if (strcmp(relation_string, "matches") == 0) { - /* Convert to a FT_PCRE */ - if (type2 == STTYPE_STRING) - fvalue = dfilter_fvalue_from_string(dfw, FT_PCRE, s); - else - fvalue = dfilter_fvalue_from_unparsed(dfw, FT_PCRE, s, FALSE); + /* Convert to a GRegex */ + pcre = dfilter_g_regex_from_string(dfw, s); + if (!pcre) { + THROW(TypeError); + } + new_st = stnode_new(STTYPE_PCRE, pcre); } else { /* Skip incompatible fields */ while (hfinfo1->same_name_prev_id != -1 && @@ -720,13 +760,11 @@ check_relation_LHS_FIELD(dfwork_t *dfw, const char *relation_string, dfw->error_message = NULL; } } + if (!fvalue) { + THROW(TypeError); + } + new_st = stnode_new(STTYPE_FVALUE, fvalue); } - - if (!fvalue) { - THROW(TypeError); - } - - new_st = stnode_new(STTYPE_FVALUE, fvalue); if (stnode_type_id(st_node) == STTYPE_TEST) { sttype_test_set2_args(st_node, st_arg1, new_st); } else { @@ -1018,6 +1056,7 @@ check_relation_LHS_RANGE(dfwork_t *dfw, const char *relation_string, header_field_info *hfinfo1, *hfinfo2; ftenum_t ftype1, ftype2; fvalue_t *fvalue; + GRegex *pcre; char *s; int len_range; @@ -1081,16 +1120,20 @@ check_relation_LHS_RANGE(dfwork_t *dfw, const char *relation_string, DebugLog((" 5 check_relation_LHS_RANGE(type2 = STTYPE_STRING)\n")); s = (char*)stnode_data(st_arg2); if (strcmp(relation_string, "matches") == 0) { - /* Convert to a FT_PCRE */ - fvalue = dfilter_fvalue_from_string(dfw, FT_PCRE, s); + /* Convert to a GRegex * */ + pcre = dfilter_g_regex_from_string(dfw, s); + if (!pcre) { + THROW(TypeError); + } + new_st = stnode_new(STTYPE_PCRE, pcre); } else { fvalue = dfilter_fvalue_from_string(dfw, FT_BYTES, s); + if (!fvalue) { + DebugLog((" 5 check_relation_LHS_RANGE(type2 = STTYPE_STRING): Could not convert from string!\n")); + THROW(TypeError); + } + new_st = stnode_new(STTYPE_FVALUE, fvalue); } - if (!fvalue) { - DebugLog((" 5 check_relation_LHS_RANGE(type2 = STTYPE_STRING): Could not convert from string!\n")); - THROW(TypeError); - } - new_st = stnode_new(STTYPE_FVALUE, fvalue); sttype_test_set2_args(st_node, st_arg1, new_st); stnode_free(st_arg2); } @@ -1099,36 +1142,51 @@ check_relation_LHS_RANGE(dfwork_t *dfw, const char *relation_string, s = (char*)stnode_data(st_arg2); len_range = drange_get_total_length(sttype_range_drange(st_arg1)); if (strcmp(relation_string, "matches") == 0) { - /* Convert to a FT_PCRE */ - fvalue = dfilter_fvalue_from_unparsed(dfw, FT_PCRE, s, FALSE); - } - - /* The RHS should be FT_BYTES. However, there is a special case where - * the range slice on the LHS is one byte long. In that case, it is natural - * for the user to specify a normal hex integer on the RHS, with the "0x" - * notation, as in "slice[0] == 0x10". We can't allow this for any - * slices that are longer than one byte, because then we'd have to know - * which endianness the byte string should be in. */ - else if (len_range == 1 && strlen(s) == 4 && strncmp(s, "0x", 2) == 0) { - /* Even if the RHS string starts with "0x", it still could fail to - * be an integer. Try converting it here. */ - fvalue = dfilter_fvalue_from_unparsed(dfw, FT_UINT8, s, allow_partial_value); - if (fvalue) { - FVALUE_FREE(fvalue); - /* The value doees indeed fit into 8 bits. Create a BYTE_STRING - * from it. Since we know that the last 2 characters are a valid - * hex string, just use those directly. */ - fvalue = dfilter_fvalue_from_unparsed(dfw, FT_BYTES, s+2, allow_partial_value); - } - } - else { - fvalue = dfilter_fvalue_from_unparsed(dfw, FT_BYTES, s, allow_partial_value); - } - if (!fvalue) { - DebugLog((" 5 check_relation_LHS_RANGE(type2 = STTYPE_UNPARSED): Could not convert from string!\n")); - THROW(TypeError); + /* Convert to a GRegex */ + pcre = dfilter_g_regex_from_string(dfw, s); + if (!pcre) { + THROW(TypeError); + } + new_st = stnode_new(STTYPE_PCRE, pcre); + } else { + /* + * The RHS should be FT_BYTES. However, there is a + * special case where the range slice on the LHS is + * one byte long. In that case, it is natural + * for the user to specify a normal hex integer + * on the RHS, with the "0x" notation, as in + * "slice[0] == 0x10". We can't allow this for any + * slices that are longer than one byte, because + * then we'd have to know which endianness the + * byte string should be in. + */ + if (len_range == 1 && strlen(s) == 4 && strncmp(s, "0x", 2) == 0) { + /* + * Even if the RHS string starts with "0x", + * it still could fail to be an integer. + * Try converting it here. + */ + fvalue = dfilter_fvalue_from_unparsed(dfw, FT_UINT8, s, allow_partial_value); + if (fvalue) { + FVALUE_FREE(fvalue); + /* + * The value doees indeed fit into + * 8 bits. Create a BYTE_STRING + * from it. Since we know that + * the last 2 characters are a valid + * hex string, just use those directly. + */ + fvalue = dfilter_fvalue_from_unparsed(dfw, FT_BYTES, s+2, allow_partial_value); + } + } else { + fvalue = dfilter_fvalue_from_unparsed(dfw, FT_BYTES, s, allow_partial_value); + } + if (!fvalue) { + DebugLog((" 5 check_relation_LHS_RANGE(type2 = STTYPE_UNPARSED): Could not convert from string!\n")); + THROW(TypeError); + } + new_st = stnode_new(STTYPE_FVALUE, fvalue); } - new_st = stnode_new(STTYPE_FVALUE, fvalue); sttype_test_set2_args(st_node, st_arg1, new_st); stnode_free(st_arg2); } @@ -1136,18 +1194,22 @@ check_relation_LHS_RANGE(dfwork_t *dfw, const char *relation_string, DebugLog((" 5 check_relation_LHS_RANGE(type2 = STTYPE_CHARCONST)\n")); s = (char*)stnode_data(st_arg2); if (strcmp(relation_string, "matches") == 0) { - /* Convert to a FT_PCRE */ - fvalue = dfilter_fvalue_from_unparsed(dfw, FT_PCRE, s, FALSE); + /* Convert to a GRegex */ + pcre = dfilter_g_regex_from_string(dfw, s); + if (!pcre) { + THROW(TypeError); + } + new_st = stnode_new(STTYPE_PCRE, pcre); } else { /* The RHS should be FT_BYTES, but a character is just a * one-byte byte string. */ fvalue = dfilter_fvalue_from_charconst_string(dfw, FT_BYTES, s, allow_partial_value); + if (!fvalue) { + DebugLog((" 5 check_relation_LHS_RANGE(type2 = STTYPE_UNPARSED): Could not convert from string!\n")); + THROW(TypeError); + } + new_st = stnode_new(STTYPE_FVALUE, fvalue); } - if (!fvalue) { - DebugLog((" 5 check_relation_LHS_RANGE(type2 = STTYPE_UNPARSED): Could not convert from string!\n")); - THROW(TypeError); - } - new_st = stnode_new(STTYPE_FVALUE, fvalue); sttype_test_set2_args(st_node, st_arg1, new_st); stnode_free(st_arg2); } @@ -1222,6 +1284,7 @@ check_relation_LHS_FUNCTION(dfwork_t *dfw, const char *relation_string, header_field_info *hfinfo2; ftenum_t ftype1, ftype2; fvalue_t *fvalue; + GRegex *pcre; char *s; df_func_def_t *funcdef; df_func_def_t *funcdef2; @@ -1264,32 +1327,38 @@ check_relation_LHS_FUNCTION(dfwork_t *dfw, const char *relation_string, else if (type2 == STTYPE_STRING) { s = (char*)stnode_data(st_arg2); if (strcmp(relation_string, "matches") == 0) { - /* Convert to a FT_PCRE */ - fvalue = dfilter_fvalue_from_string(dfw, FT_PCRE, s); + /* Convert to a GRegex */ + pcre = dfilter_g_regex_from_string(dfw, s); + if (!pcre) { + THROW(TypeError); + } + new_st = stnode_new(STTYPE_PCRE, pcre); } else { fvalue = dfilter_fvalue_from_string(dfw, ftype1, s); + if (!fvalue) { + THROW(TypeError); + } + new_st = stnode_new(STTYPE_FVALUE, fvalue); } - if (!fvalue) { - THROW(TypeError); - } - - new_st = stnode_new(STTYPE_FVALUE, fvalue); sttype_test_set2_args(st_node, st_arg1, new_st); stnode_free(st_arg2); } else if (type2 == STTYPE_UNPARSED || type2 == STTYPE_CHARCONST) { s = (char*)stnode_data(st_arg2); if (strcmp(relation_string, "matches") == 0) { - /* Convert to a FT_PCRE */ - fvalue = dfilter_fvalue_from_unparsed(dfw, FT_PCRE, s, FALSE); + /* Convert to a GRegex */ + pcre = dfilter_g_regex_from_string(dfw, s); + if (!pcre) { + THROW(TypeError); + } + new_st = stnode_new(STTYPE_PCRE, pcre); } else { fvalue = dfilter_fvalue_from_unparsed(dfw, ftype1, s, allow_partial_value); + if (!fvalue) { + THROW(TypeError); + } + new_st = stnode_new(STTYPE_FVALUE, fvalue); } - if (!fvalue) { - THROW(TypeError); - } - - new_st = stnode_new(STTYPE_FVALUE, fvalue); sttype_test_set2_args(st_node, st_arg1, new_st); stnode_free(st_arg2); } diff --git a/epan/dfilter/sttype-pointer.c b/epan/dfilter/sttype-pointer.c index 0ee0d4b0cc..489db05872 100644 --- a/epan/dfilter/sttype-pointer.c +++ b/epan/dfilter/sttype-pointer.c @@ -24,6 +24,22 @@ fvalue_free(gpointer value) } } +static void +pcre_free(gpointer value) +{ + GRegex *pcre = (GRegex*)value; + + /* If the data was not claimed with stnode_steal_data(), free it. */ + if (pcre) { + /* + * They're reference-counted, so just drop the reference + * count; it'll get freed when the reference count drops + * to 0. + */ + g_regex_unref(pcre); + } +} + void sttype_register_pointer(void) { @@ -41,9 +57,17 @@ sttype_register_pointer(void) fvalue_free, NULL }; + static sttype_t pcre_type = { + STTYPE_PCRE, + "PCRE", + NULL, + pcre_free, + NULL + }; sttype_register(&field_type); sttype_register(&fvalue_type); + sttype_register(&pcre_type); } /* diff --git a/epan/dfilter/syntax-tree.h b/epan/dfilter/syntax-tree.h index 555915664d..3123b49589 100644 --- a/epan/dfilter/syntax-tree.h +++ b/epan/dfilter/syntax-tree.h @@ -27,6 +27,7 @@ typedef enum { STTYPE_RANGE, STTYPE_FUNCTION, STTYPE_SET, + STTYPE_PCRE, STTYPE_NUM_TYPES } sttype_id_t; diff --git a/epan/ftypes/CMakeLists.txt b/epan/ftypes/CMakeLists.txt index 31a4607889..0346c1ccf2 100644 --- a/epan/ftypes/CMakeLists.txt +++ b/epan/ftypes/CMakeLists.txt @@ -26,7 +26,6 @@ set(FTYPE_FILES ftype-ipv6.c ftype-guid.c ftype-none.c - ftype-pcre.c ftype-protocol.c ftype-string.c ftype-time.c diff --git a/epan/ftypes/ftype-bytes.c b/epan/ftypes/ftype-bytes.c index ee15aa508e..dc414e3e62 100644 --- a/epan/ftypes/ftype-bytes.c +++ b/epan/ftypes/ftype-bytes.c @@ -650,21 +650,10 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b) } static gboolean -cmp_matches(const fvalue_t *fv_a, const fvalue_t *fv_b) +cmp_matches(const fvalue_t *fv, const GRegex *regex) { - GByteArray *a = fv_a->value.bytes; - GRegex *regex = fv_b->value.re; + GByteArray *a = fv->value.bytes; - /* fv_b is always a FT_PCRE, otherwise the dfilter semcheck() would have - * warned us. For the same reason (and because we're using g_malloc()), - * fv_b->value.re is not NULL. - */ - if (strcmp(fv_b->ftype->name, "FT_PCRE") != 0) { - return FALSE; - } - if (! regex) { - return FALSE; - } return g_regex_match_full( regex, /* Compiled PCRE */ (char *)a->data, /* The data to check for the pattern... */ diff --git a/epan/ftypes/ftype-pcre.c b/epan/ftypes/ftype-pcre.c deleted file mode 100644 index 854d957ee4..0000000000 --- a/epan/ftypes/ftype-pcre.c +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Wireshark - Network traffic analyzer - * By Gerald Combs <gerald@wireshark.org> - * Copyright 2001 Gerald Combs - * - * SPDX-License-Identifier: GPL-2.0-or-later - */ - -/* Perl-Compatible Regular Expression (PCRE) internal field type. - * Used with the "matches" dfilter operator, allowing efficient - * compilation and studying of a PCRE pattern in dfilters. - */ - -#include "config.h" - -#include <ftypes-int.h> - -#include <glib.h> -#include <string.h> - -static void -gregex_fvalue_new(fvalue_t *fv) -{ - fv->value.re = NULL; -} - -static void -gregex_fvalue_free(fvalue_t *fv) -{ - if (fv->value.re) { - g_regex_unref(fv->value.re); - fv->value.re = NULL; - } -} - -/* Generate a FT_PCRE from a parsed string pattern. - * On failure, if err_msg is non-null, set *err_msg to point to a - * g_malloc()ed error message. */ -static gboolean -val_from_string(fvalue_t *fv, const char *pattern, gchar **err_msg) -{ - GError *regex_error = NULL; - GRegexCompileFlags cflags = (GRegexCompileFlags)(G_REGEX_CASELESS | G_REGEX_OPTIMIZE); - - /* - * As FT_BYTES and FT_PROTOCOL contain arbitrary binary data and FT_STRING - * is not guaranteed to contain valid UTF-8, we have to disable support for - * UTF-8 patterns and treat every pattern and subject as raw bytes. - * - * Should support for UTF-8 patterns be necessary, then we should compile a - * pattern without G_REGEX_RAW. Additionally, we MUST use g_utf8_validate() - * before calling g_regex_match_full() or risk crashes. - */ - cflags = (GRegexCompileFlags)(cflags | G_REGEX_RAW); - - /* Free up the old value, if we have one */ - gregex_fvalue_free(fv); - - fv->value.re = g_regex_new( - pattern, /* pattern */ - cflags, /* Compile options */ - (GRegexMatchFlags)0, /* Match options */ - ®ex_error /* Compile / study errors */ - ); - - if (regex_error) { - if (err_msg) { - *err_msg = g_strdup(regex_error->message); - } - g_error_free(regex_error); - if (fv->value.re) { - g_regex_unref(fv->value.re); - } - return FALSE; - } - return TRUE; -} - -/* Generate a FT_PCRE from an unparsed string pattern. - * On failure, if err_msg is non-null, set *err_msg to point to a - * g_malloc()ed error message. */ -static gboolean -val_from_unparsed(fvalue_t *fv, const char *pattern, gboolean allow_partial_value, gchar **err_msg) -{ - g_assert(! allow_partial_value); - - return val_from_string(fv, pattern, err_msg); -} - -static int -gregex_repr_len(fvalue_t *fv, ftrepr_t rtype _U_, int field_display _U_) -{ - return (int)strlen(g_regex_get_pattern(fv->value.re)); -} - -static void -gregex_to_repr(fvalue_t *fv, ftrepr_t rtype _U_, int field_display _U_, char *buf, unsigned int size) -{ - g_strlcpy(buf, g_regex_get_pattern(fv->value.re), size); -} - -/* BEHOLD - value contains the string representation of the regular expression, - * and we want to store the compiled PCRE RE object into the value. */ -static void -gregex_fvalue_set(fvalue_t *fv, const char *value) -{ - g_assert(value != NULL); - /* Free up the old value, if we have one */ - gregex_fvalue_free(fv); - val_from_unparsed(fv, value, FALSE, NULL); -} - -static gpointer -gregex_fvalue_get(fvalue_t *fv) -{ - return fv->value.re; -} - -void -ftype_register_pcre(void) -{ - static ftype_t pcre_type = { - FT_PCRE, /* ftype */ - "FT_PCRE", /* name */ - "Compiled Perl-Compatible Regular Expression (GRegex) object", /* pretty_name */ - 0, /* wire_size */ - gregex_fvalue_new, /* new_value */ - gregex_fvalue_free, /* free_value */ - val_from_unparsed, /* val_from_unparsed */ - val_from_string, /* val_from_string */ - gregex_to_repr, /* val_to_string_repr */ - gregex_repr_len, /* len_string_repr */ - - { .set_value_string = gregex_fvalue_set }, /* union set_value */ - { .get_value_ptr = gregex_fvalue_get }, /* union get_value */ - - NULL, /* cmp_eq */ - NULL, /* cmp_ne */ - NULL, /* cmp_gt */ - NULL, /* cmp_ge */ - NULL, /* cmp_lt */ - NULL, /* cmp_le */ - NULL, /* cmp_bitwise_and */ - NULL, /* cmp_contains */ - NULL, /* cmp_matches */ - - NULL, /* len */ - NULL, /* slice */ - }; - ftype_register(FT_PCRE, &pcre_type); -} - -/* - * Editor modelines - https://www.wireshark.org/tools/modelines.html - * - * Local variables: - * c-basic-offset: 4 - * tab-width: 8 - * indent-tabs-mode: nil - * End: - * - * vi: set shiftwidth=4 tabstop=8 expandtab: - * :indentSize=4:tabSize=8:noTabs=true: - */ diff --git a/epan/ftypes/ftype-protocol.c b/epan/ftypes/ftype-protocol.c index 71ffae00c0..b3dc5328a5 100644 --- a/epan/ftypes/ftype-protocol.c +++ b/epan/ftypes/ftype-protocol.c @@ -383,21 +383,13 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b) } static gboolean -cmp_matches(const fvalue_t *fv_a, const fvalue_t *fv_b) +cmp_matches(const fvalue_t *fv, const GRegex *regex) { - const protocol_value_t *a = (const protocol_value_t *)&fv_a->value.protocol; - GRegex *regex = fv_b->value.re; + const protocol_value_t *a = (const protocol_value_t *)&fv->value.protocol; volatile gboolean rc = FALSE; const char *data = NULL; /* tvb data */ guint32 tvb_len; /* tvb length */ - /* fv_b is always a FT_PCRE, otherwise the dfilter semcheck() would have - * warned us. For the same reason (and because we're using g_malloc()), - * fv_b->value.re is not NULL. - */ - if (strcmp(fv_b->ftype->name, "FT_PCRE") != 0) { - return FALSE; - } if (! regex) { return FALSE; } diff --git a/epan/ftypes/ftype-string.c b/epan/ftypes/ftype-string.c index f6813d3b30..c42f08d928 100644 --- a/epan/ftypes/ftype-string.c +++ b/epan/ftypes/ftype-string.c @@ -186,18 +186,10 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b) } static gboolean -cmp_matches(const fvalue_t *fv_a, const fvalue_t *fv_b) +cmp_matches(const fvalue_t *fv, const GRegex *regex) { - char *str = fv_a->value.string; - GRegex *regex = fv_b->value.re; - - /* fv_b is always a FT_PCRE, otherwise the dfilter semcheck() would have - * warned us. For the same reason (and because we're using g_malloc()), - * fv_b->value.re is not NULL. - */ - if (strcmp(fv_b->ftype->name, "FT_PCRE") != 0) { - return FALSE; - } + char *str = fv->value.string; + if (! regex) { return FALSE; } diff --git a/epan/ftypes/ftypes-int.h b/epan/ftypes/ftypes-int.h index 30051acd41..98af5997c2 100644 --- a/epan/ftypes/ftypes-int.h +++ b/epan/ftypes/ftypes-int.h @@ -62,6 +62,7 @@ typedef gint64 (*FvalueGetSignedInteger64Func)(fvalue_t*); typedef double (*FvalueGetFloatingFunc)(fvalue_t*); typedef gboolean (*FvalueCmp)(const fvalue_t*, const fvalue_t*); +typedef gboolean (*FvalueMatches)(const fvalue_t*, const GRegex*); typedef guint (*FvalueLen)(fvalue_t*); typedef void (*FvalueSlice)(fvalue_t*, GByteArray *, guint offset, guint length); @@ -109,7 +110,7 @@ struct _ftype_t { FvalueCmp cmp_le; FvalueCmp cmp_bitwise_and; FvalueCmp cmp_contains; - FvalueCmp cmp_matches; + FvalueMatches cmp_matches; FvalueLen len; FvalueSlice slice; diff --git a/epan/ftypes/ftypes.c b/epan/ftypes/ftypes.c index e62582ab79..a8a4838ffa 100644 --- a/epan/ftypes/ftypes.c +++ b/epan/ftypes/ftypes.c @@ -31,7 +31,6 @@ ftypes_initialize(void) ftype_register_string(); ftype_register_time(); ftype_register_tvbuff(); - ftype_register_pcre(); } /* Each ftype_t is registered via this function */ @@ -528,8 +527,7 @@ void fvalue_set_string(fvalue_t *fv, const gchar *value) { g_assert(IS_FT_STRING(fv->ftype->ftype) || - fv->ftype->ftype == FT_UINT_STRING || - fv->ftype->ftype == FT_PCRE); + fv->ftype->ftype == FT_UINT_STRING); g_assert(fv->ftype->set_value.set_value_string); fv->ftype->set_value.set_value_string(fv, value); } @@ -618,7 +616,6 @@ fvalue_get(fvalue_t *fv) fv->ftype->ftype == FT_FCWWN || fv->ftype->ftype == FT_GUID || fv->ftype->ftype == FT_IPv6 || - fv->ftype->ftype == FT_PCRE || fv->ftype->ftype == FT_PROTOCOL || IS_FT_STRING(fv->ftype->ftype) || fv->ftype->ftype == FT_UINT_STRING || @@ -753,7 +750,7 @@ fvalue_contains(const fvalue_t *a, const fvalue_t *b) } gboolean -fvalue_matches(const fvalue_t *a, const fvalue_t *b) +fvalue_matches(const fvalue_t *a, const GRegex *b) { /* XXX - check compatibility of a and b */ g_assert(a->ftype->cmp_matches); diff --git a/epan/ftypes/ftypes.h b/epan/ftypes/ftypes.h index 197c1d58a2..740b0e0280 100644 --- a/epan/ftypes/ftypes.h +++ b/epan/ftypes/ftypes.h @@ -58,7 +58,6 @@ enum ftenum { FT_IPv6, FT_IPXNET, FT_FRAMENUM, /* a UINT32, but if selected lets you go to frame with that number */ - FT_PCRE, /* a compiled Perl-Compatible Regular Expression object */ FT_GUID, /* GUID, UUID */ FT_OID, /* OBJECT IDENTIFIER */ FT_EUI64, @@ -370,7 +369,7 @@ gboolean fvalue_contains(const fvalue_t *a, const fvalue_t *b); gboolean -fvalue_matches(const fvalue_t *a, const fvalue_t *b); +fvalue_matches(const fvalue_t *a, const GRegex *b); guint fvalue_length(fvalue_t *fv); diff --git a/epan/proto.c b/epan/proto.c index 08994bae86..3e013e9192 100644 --- a/epan/proto.c +++ b/epan/proto.c @@ -7101,7 +7101,6 @@ proto_item_get_display_repr(wmem_allocator_t *scope, proto_item *pi) return ""; fi = PITEM_FINFO(pi); DISSECTOR_ASSERT(fi->hfinfo != NULL); - DISSECTOR_ASSERT(fi->hfinfo->type != FT_PCRE); return fvalue_to_string_repr(scope, &fi->value, FTREPR_DISPLAY, fi->hfinfo->display); } @@ -8236,16 +8235,6 @@ tmp_fld_check_assert(header_field_info *hfinfo) if (!hfinfo->abbrev || !hfinfo->abbrev[0]) g_error("Field '%s' does not have an abbreviation\n", hfinfo->name); - /* - * FT_PCRE is a special "field" type; it's not for use by - * fields, just by field values in filter expressions. - */ - if (hfinfo->type == FT_PCRE) { - g_error("Field '%s' (%s) is of type FT_PCRE," - " which is not allowed\n", - hfinfo->name, hfinfo->abbrev); - } - /* These types of fields are allowed to have value_strings, * true_false_strings or a protocol_t struct */ @@ -11533,12 +11522,6 @@ construct_match_selected_string(field_info *finfo, epan_dissect_t *edt, } break; - case FT_PCRE: - /* FT_PCRE never appears as a type for a registered field. It is - * only used internally. */ - DISSECTOR_ASSERT_NOT_REACHED(); - break; - /* By default, use the fvalue's "to_string_repr" method. */ default: /* Figure out the string length needed. diff --git a/epan/wslua/wslua_field.c b/epan/wslua/wslua_field.c index c683ebacbe..8e1ab66445 100644 --- a/epan/wslua/wslua_field.c +++ b/epan/wslua/wslua_field.c @@ -207,7 +207,7 @@ WSLUA_METAMETHOD FieldInfo__tostring(lua_State* L) { if (fi->ws_fi->value.ftype->val_to_string_repr) { gchar* repr = NULL; - if (fi->ws_fi->hfinfo->type == FT_PROTOCOL || fi->ws_fi->hfinfo->type == FT_PCRE) { + if (fi->ws_fi->hfinfo->type == FT_PROTOCOL) { repr = fvalue_to_string_repr(NULL, &fi->ws_fi->value,FTREPR_DFILTER,BASE_NONE); } else { diff --git a/epan/wslua/wslua_proto_field.c b/epan/wslua/wslua_proto_field.c index e3f63273df..2d4b0d06cd 100644 --- a/epan/wslua/wslua_proto_field.c +++ b/epan/wslua/wslua_proto_field.c @@ -746,8 +746,6 @@ WSLUA_CONSTRUCTOR ProtoField_new(lua_State* L) { case FT_STRINGZTRUNC: WSLUA_ARG_ERROR(ProtoField_new,TYPE,"Unsupported ProtoField field type"); break; - /* FT_PCRE isn't a valid field type. */ - case FT_PCRE: default: WSLUA_ARG_ERROR(ProtoField_new,TYPE,"Invalid ProtoField field type"); break; diff --git a/test/lua/globals_2.2.txt b/test/lua/globals_2.2.txt index e891184fbf..67442b8112 100644 --- a/test/lua/globals_2.2.txt +++ b/test/lua/globals_2.2.txt @@ -767,7 +767,6 @@ ["IPv6"] = 24, ["NONE"] = 0, ["OID"] = 29, - ["PCRE"] = 27, ["PROTOCOL"] = 1, ["RELATIVE_TIME"] = 16, ["REL_OID"] = 33, diff --git a/tools/convert_proto_tree_add_text.pl b/tools/convert_proto_tree_add_text.pl index 055c547398..c54964d6d4 100755 --- a/tools/convert_proto_tree_add_text.pl +++ b/tools/convert_proto_tree_add_text.pl @@ -76,7 +76,7 @@ my %FIELD_TYPE = ('FT_NONE' => "FT_NONE", 'FT_PROTOCOL' => "FT_PROTOCOL", 'FT_BO 'FT_STRING' => "FT_STRING", 'FT_STRINGZ' => "FT_STRINGZ", 'FT_UINT_STRING' => "FT_UINT_STRING", 'FT_ETHER' => "FT_ETHER", 'FT_BYTES' => "FT_BYTES", 'FT_UINT_BYTES' => "FT_UINT_BYTES", 'FT_IPv4' => "FT_IPv4", 'FT_IPv6' => "FT_IPv6", 'FT_IPXNET' => "FT_IPXNET", 'FT_AX25' => "FT_AX25", 'FT_VINES' => "FT_VINES", - 'FT_FRAMENUM' => "FT_FRAMENUM", 'FT_PCRE' => "FT_PCRE", 'FT_GUID' => "FT_GUID", 'FT_OID' => "FT_OID", 'FT_REL_OID' => "FT_REL_OID", 'FT_EUI64' => "FT_EUI64"); + 'FT_FRAMENUM' => "FT_FRAMENUM", 'FT_GUID' => "FT_GUID", 'FT_OID' => "FT_OID", 'FT_REL_OID' => "FT_REL_OID", 'FT_EUI64' => "FT_EUI64"); my %EXPERT_SEVERITY = ('PI_COMMENT' => "PI_COMMENT", 'PI_CHAT' => "PI_CHAT", diff --git a/tools/fix-encoding-args.pl b/tools/fix-encoding-args.pl index e9f5186660..fc98c75bd5 100755 --- a/tools/fix-encoding-args.pl +++ b/tools/fix-encoding-args.pl @@ -167,7 +167,6 @@ my @types_ALL = FT_IPv6 FT_IPXNET FT_FRAMENUM - FT_PCRE FT_GUID FT_OID FT_REL_OID |