aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoão Valverde <j@v6e.pt>2022-12-29 06:04:00 +0000
committerJoão Valverde <j@v6e.pt>2023-01-07 21:15:25 +0000
commit1861679e8132a26ff1eb891e349856fdcea1f8be (patch)
treed1eaaa8be2265158a9cc276ddaefecff122a0f70
parent7641ba7416cdb0232ecef7c5aa3d39a711d1766c (diff)
dfilter: Optimize some scanner patterns
Cleanup flex code. Optimize some patterns to avoid lookups for field matches for values that are not legal field names. Improve warning and add some comments.
-rw-r--r--epan/dfilter/scanner.l212
-rw-r--r--epan/dfilter/semcheck.c18
-rw-r--r--epan/dfilter/syntax-tree.c17
-rw-r--r--epan/dfilter/syntax-tree.h11
-rw-r--r--test/suite_dfilter/group_syntax.py17
5 files changed, 208 insertions, 67 deletions
diff --git a/epan/dfilter/scanner.l b/epan/dfilter/scanner.l
index 1943f2acc4..04c57de19a 100644
--- a/epan/dfilter/scanner.l
+++ b/epan/dfilter/scanner.l
@@ -80,13 +80,14 @@ WS_WARN_UNUSED static int set_lval_simple(df_scanner_state_t *state, int token,
#define test(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_TEST))
#define math(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_ARITHMETIC))
-WS_WARN_UNUSED static int set_lval_literal(df_scanner_state_t *state, const char *token_value);
-WS_WARN_UNUSED static int set_lval_unparsed(df_scanner_state_t *state, const char *token_value);
+WS_WARN_UNUSED static int set_lval_literal(df_scanner_state_t *state, const char *value, const char *token_value);
+WS_WARN_UNUSED static int set_lval_identifier(df_scanner_state_t *state, const char *value, const char *token_value);
+WS_WARN_UNUSED static int set_lval_constant(df_scanner_state_t *state, const char *value, const char *token_value);
+WS_WARN_UNUSED static int set_lval_unparsed(df_scanner_state_t *state, const char *value, const char *token_value);
+
+WS_WARN_UNUSED static int set_lval_field(df_scanner_state_t *state, const header_field_info *hfinfo, const char *token_value);
WS_WARN_UNUSED static int set_lval_quoted_string(df_scanner_state_t *state, GString *quoted_string);
WS_WARN_UNUSED static int set_lval_charconst(df_scanner_state_t *state, GString *quoted_string);
-WS_WARN_UNUSED static int set_lval_field(df_scanner_state_t *state, const char *token_value, const header_field_info *hfinfo);
-WS_WARN_UNUSED static int set_lval_identifier(df_scanner_state_t *state, const char *token_value);
-WS_WARN_UNUSED static int set_lval_constant(df_scanner_state_t *state, const char *token_value);
static gboolean append_escaped_char(df_scanner_state_t *state, GString *str, char c);
static gboolean append_universal_character_name(df_scanner_state_t *state, GString *str, const char *ucn);
@@ -103,27 +104,43 @@ static void update_string_loc(df_scanner_state_t *state, const char *text);
%}
-Identifier [[:alnum:]_][[:alnum:]_-]*(\.[[:alnum:]_-]+)*
+FunctionIdentifier [[:alpha:]_][[:alnum:]_]*
+
+/*
+ * Cannot start with '-'. * Some protocol name can contain '-', for example "mac-lte".
+ * Note that some protocol names start with a number, for example "9p".
+ * Some protocol names contain dots, e.g: _ws.expert
+ * Protocol or protocol field cannot contain DOTDOT anywhere.
+ */
+VarIdentifier [[:alnum:]_][[:alnum:]_-]*
+ProtoFieldIdentifier {VarIdentifier}(\.{VarIdentifier})*
+
+hex2 [[:xdigit:]]{2}
+ColonMacAddress {hex2}:{hex2}:{hex2}:{hex2}:{hex2}:{hex2}
+HyphenMacAddress {hex2}-{hex2}-{hex2}-{hex2}-{hex2}-{hex2}
+DotMacAddress {hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}
-hex2 [[:xdigit:]]{2}
-MacAddress {hex2}:{hex2}:{hex2}:{hex2}:{hex2}:{hex2}|{hex2}-{hex2}-{hex2}-{hex2}-{hex2}-{hex2}|{hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}
+hex4 [[:xdigit:]]{4}
+DotQuadMacAddress {hex4}\.{hex4}\.{hex4}
-hex4 [[:xdigit:]]{4}
-QuadMacAddress {hex4}\.{hex4}\.{hex4}
+ColonBytes ({hex2}:)|({hex2}(:{hex2})+)
+HyphenBytes {hex2}(-{hex2})+
+DotBytes {hex2}(\.{hex2})+
-DecOctet [0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
-IPv4Address {DecOctet}\.{DecOctet}\.{DecOctet}\.{DecOctet}
+DecOctet [0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
+IPv4Address {DecOctet}\.{DecOctet}\.{DecOctet}\.{DecOctet}
-h16 [0-9A-Fa-f]{1,4}
-ls32 {h16}:{h16}|{IPv4Address}
-IPv6Address ({h16}:){6}{ls32}|::({h16}:){5}{ls32}|({h16})?::({h16}:){4}{ls32}|(({h16}:){0,1}{h16})?::({h16}:){3}{ls32}|(({h16}:){0,2}{h16})?::({h16}:){2}{ls32}|(({h16}:){0,3}{h16})?::{h16}:{ls32}|(({h16}:){0,4}{h16})?::{ls32}|(({h16}:){0,5}{h16})?::{h16}|(({h16}:){0,6}{h16})?::
+h16 [0-9A-Fa-f]{1,4}
+ls32 {h16}:{h16}|{IPv4Address}
+IPv6Address ({h16}:){6}{ls32}|::({h16}:){5}{ls32}|({h16})?::({h16}:){4}{ls32}|(({h16}:){0,1}{h16})?::({h16}:){3}{ls32}|(({h16}:){0,2}{h16})?::({h16}:){2}{ls32}|(({h16}:){0,3}{h16})?::{h16}:{ls32}|(({h16}:){0,4}{h16})?::{ls32}|(({h16}:){0,5}{h16})?::{h16}|(({h16}:){0,6}{h16})?::
-V4CidrPrefix \/[[:digit:]]{1,2}
-V6CidrPrefix \/[[:digit:]]{1,3}
+V4CidrPrefix \/[[:digit:]]{1,2}
+V6CidrPrefix \/[[:digit:]]{1,3}
-ColonBytes ({hex2}:)|({hex2}(:{hex2})+)
-DotBytes {hex2}(\.{hex2})+
-HyphenBytes {hex2}(-{hex2})+
+/* Catch all valid semantic values. Cannot contain DOT DOT or start with MINUS. */
+StartAlphabet [[:alnum:]_:]
+Alphabet [[:alnum:]_:/-]
+LiteralValue {StartAlphabet}{Alphabet}*(\.{Alphabet}+)*
%x RANGE
%x LAYER
@@ -403,57 +420,120 @@ HyphenBytes {hex2}(-{hex2})+
g_string_append(yyextra->quoted_string, yytext);
}
+ /* NOTE: None of the patterns below can match ".." anywhere in the token string. */
- /* None of the patterns below can match ".." anywhere in the token string. */
+ /* MAC address. */
-{MacAddress}|{QuadMacAddress} {
+{ColonMacAddress}|{HyphenMacAddress} {
/* MAC Address. */
update_location(yyextra, yytext);
- return set_lval_unparsed(yyextra, yytext);
+ return set_lval_literal(yyextra, yytext, yytext);
+}
+
+{DotMacAddress}|{DotQuadMacAddress} {
+ /* MAC Address, can also be a field. */
+ update_location(yyextra, yytext);
+ return set_lval_unparsed(yyextra, yytext, yytext);
}
+ /* IP address. */
+
{IPv4Address}{V4CidrPrefix}? {
/* IPv4 with or without prefix. */
update_location(yyextra, yytext);
- return set_lval_unparsed(yyextra, yytext);
+ return set_lval_literal(yyextra, yytext, yytext);
}
{IPv6Address}{V6CidrPrefix}? {
/* IPv6 with or without prefix. */
update_location(yyextra, yytext);
- return set_lval_unparsed(yyextra, yytext);
+ return set_lval_literal(yyextra, yytext, yytext);
+}
+
+ /* Integer or bytes */
+
+0[bBoOxX][[:xdigit:]]+ {
+ /* Binary/octal/hex integer. */
+ update_location(yyextra, yytext);
+ return set_lval_literal(yyextra, yytext, yytext);
+}
+
+:[[:xdigit:]]+ {
+ /* Numeric prefixed with ':'. */
+ update_location(yyextra, yytext);
+ return set_lval_literal(yyextra, yytext + 1, yytext);
+}
+
+[[:xdigit:]]+ {
+ /* Numeric or field. */
+ update_location(yyextra, yytext);
+ return set_lval_unparsed(yyextra, yytext, yytext);
+}
+
+ /* Floating point. */
+
+[[:digit:]]+\.[[:digit:]]+([eE][+-]?[[:digit:]]+)? {
+ /* Decimal float with optional exponent. */
+ /* Significand cannot have any side omitted. */
+ update_location(yyextra, yytext);
+ return set_lval_unparsed(yyextra, yytext, yytext);
+}
+
+0[xX][[:xdigit:]]+\.[[:xdigit:]]+([pP][+-]?[[:digit:]]+)? {
+ /* Hexadecimal float with optional exponent. Can't be a field because
+ * field cannot beging with 0x. */
+ /* Significand cannot have any side omitted. */
+ update_location(yyextra, yytext);
+ return set_lval_literal(yyextra, yytext, yytext);
}
-:?({ColonBytes}|{DotBytes}|{HyphenBytes}) {
+ /* Bytes. */
+
+:?{ColonBytes} {
/* Bytes. */
update_location(yyextra, yytext);
if (yytext[0] == ':')
- return set_lval_literal(yyextra, yytext); /* Keep leading colon. */
- return set_lval_unparsed(yyextra, yytext);
+ return set_lval_literal(yyextra, yytext + 1, yytext);
+ return set_lval_literal(yyextra, yytext, yytext);
}
-:[[:xdigit:]]+ {
- /* Numeric. */
+:?{HyphenBytes} {
+ /* Bytes. */
update_location(yyextra, yytext);
- return set_lval_literal(yyextra, yytext); /* Keep leading colon. */
+ if (yytext[0] == ':')
+ return set_lval_literal(yyextra, yytext + 1, yytext);
+ return set_lval_literal(yyextra, yytext, yytext);
}
-{Identifier} {
- /* Identifier (field or function) or constant (literal). */
+:?{DotBytes} {
+ /* DotBytes, can be a field without ':' prefix. */
+ update_location(yyextra, yytext);
+ if (yytext[0] == ':')
+ return set_lval_literal(yyextra, yytext + 1, yytext);
+ return set_lval_unparsed(yyextra, yytext, yytext);
+}
+
+ /* Identifier (protocol/field/function name). */
+
+ /* This must come before FieldIdentifier to match function names. */
+{FunctionIdentifier} {
+ /* Identifier (field or function) or constant (bytes without separator). */
+ /* We use CONSTANT instead of LITERAL because the difference is significant
+ * in the syntactical grammar. */
update_location(yyextra, yytext);
header_field_info *hfinfo = dfilter_resolve_unparsed(yyextra->dfw, yytext);
if (hfinfo != NULL) {
- return set_lval_identifier(yyextra, yytext);
+ return set_lval_identifier(yyextra, yytext, yytext);
}
df_func_def_t *def = df_func_lookup(yytext);
if (def != NULL) {
- return set_lval_identifier(yyextra, yytext);
+ return set_lval_identifier(yyextra, yytext, yytext);
}
- return set_lval_constant(yyextra, yytext);
+ return set_lval_constant(yyextra, yytext, yytext);
}
-\.{Identifier} {
- /* Field. */
+\.{ProtoFieldIdentifier} {
+ /* Identifier, prefixed with a '.'. */
update_location(yyextra, yytext);
const char *name = yytext + 1;
header_field_info *hfinfo = dfilter_resolve_unparsed(yyextra->dfw, name);
@@ -461,7 +541,21 @@ HyphenBytes {hex2}(-{hex2})+
FAIL("\"%s\" is not a valid protocol or protocol field.", name);
return SCAN_FAILED;
}
- return set_lval_field(yyextra, yytext, hfinfo);
+ return set_lval_field(yyextra, hfinfo, yytext);
+}
+
+{ProtoFieldIdentifier} {
+ /* Catch-all for protocol values. Can also be a literal. */
+ update_location(yyextra, yytext);
+ return set_lval_unparsed(yyextra, yytext, yytext);
+}
+
+{LiteralValue} {
+ /* Catch-all for semantic values. */
+ update_location(yyextra, yytext);
+ /* We use literal here because identifiers (using unparsed) should have
+ * matched one of the previous rules. */
+ return set_lval_literal(yyextra, yytext, yytext);
}
. {
@@ -474,7 +568,6 @@ HyphenBytes {hex2}(-{hex2})+
return SCAN_FAILED;
}
-
%%
/*
@@ -511,34 +604,46 @@ set_lval_simple(df_scanner_state_t *state, int token, const char *token_value, s
}
static int
-set_lval_literal(df_scanner_state_t *state, const char *token_value)
+set_lval_literal(df_scanner_state_t *state, const char *value, const char *token_value)
{
- state->df_lval = stnode_new(STTYPE_LITERAL, g_strdup(token_value), g_strdup(token_value), state->location);
+ state->df_lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), state->location);
return TOKEN_LITERAL;
}
static int
-set_lval_identifier(df_scanner_state_t *state, const char *token_value)
+set_lval_identifier(df_scanner_state_t *state, const char *value, const char *token_value)
{
- state->df_lval = stnode_new(STTYPE_LITERAL, g_strdup(token_value), g_strdup(token_value), state->location);
+ state->df_lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), state->location);
return TOKEN_IDENTIFIER;
}
static int
-set_lval_constant(df_scanner_state_t *state, const char *token_value)
+set_lval_constant(df_scanner_state_t *state, const char *value, const char *token_value)
{
- state->df_lval = stnode_new(STTYPE_LITERAL, g_strdup(token_value), g_strdup(token_value), state->location);
+ state->df_lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), state->location);
return TOKEN_CONSTANT;
}
static int
-set_lval_unparsed(df_scanner_state_t *state, const char *token_value)
+set_lval_unparsed(df_scanner_state_t *state, const char *value, const char *token_value)
{
- const header_field_info *hfinfo = dfilter_resolve_unparsed(state->dfw, token_value);
+ int token;
+ const header_field_info *hfinfo = dfilter_resolve_unparsed(state->dfw, value);
if (hfinfo != NULL) {
- return set_lval_field(state, token_value, hfinfo);
+ token = set_lval_field(state, hfinfo, token_value);
}
- return set_lval_literal(state, token_value);
+ else {
+ token = set_lval_literal(state, value, token_value);
+ }
+ stnode_set_flags(state->df_lval, STFLAG_UNPARSED);
+ return token;
+}
+
+static int
+set_lval_field(df_scanner_state_t *state, const header_field_info *hfinfo, const char *token_value)
+{
+ state->df_lval = stnode_new(STTYPE_FIELD, (gpointer)hfinfo, g_strdup(token_value), state->location);
+ return TOKEN_FIELD;
}
static int
@@ -567,13 +672,6 @@ set_lval_charconst(df_scanner_state_t *state, GString *quoted_string)
return TOKEN_CHARCONST;
}
-static int
-set_lval_field(df_scanner_state_t *state, const char *token_value, const header_field_info *hfinfo)
-{
- state->df_lval = stnode_new(STTYPE_FIELD, (gpointer)hfinfo, g_strdup(token_value), state->location);
- return TOKEN_FIELD;
-}
-
static gboolean
append_escaped_char(df_scanner_state_t *state, GString *str, char c)
{
diff --git a/epan/dfilter/semcheck.c b/epan/dfilter/semcheck.c
index a769ac4ed5..f116050501 100644
--- a/epan/dfilter/semcheck.c
+++ b/epan/dfilter/semcheck.c
@@ -1149,18 +1149,18 @@ check_relation(dfwork_t *dfw, stnode_op_t st_op,
}
static void
-check_relation_contains_RHS_FIELD(dfwork_t *dfw, stnode_t *st_node _U_,
+check_warning_contains_RHS_FIELD(dfwork_t *dfw, stnode_t *st_node _U_,
stnode_t *st_arg1 _U_, stnode_t *st_arg2)
{
const char *token = stnode_token(st_arg2);
- if (token[0] == '.' || token[0] == ':')
- return;
-
header_field_info *hfinfo = sttype_field_hfinfo(st_arg2);
- fvalue_t *fvalue = fvalue_from_literal(FT_BYTES, hfinfo->abbrev, FALSE, NULL);
+ fvalue_t *fvalue = fvalue_from_literal(FT_BYTES, token, TRUE, NULL);
if (fvalue != NULL) {
- add_compile_warning(dfw, "Interpreting \"%s\" as \"%s\". Consider writing :%s or .%s",
- hfinfo->abbrev, hfinfo->name, hfinfo->abbrev, hfinfo->abbrev);
+ char *repr = fvalue_to_string_repr(dfw->dfw_scope, fvalue, FTREPR_DFILTER, 0);
+ add_compile_warning(dfw, "Interpreting \"%s\" as %s instead of %s. "
+ "Consider writing \"%s\" or \".%s\" to remove this warning",
+ token, hfinfo->name, ftype_pretty_name(FT_BYTES),
+ repr, hfinfo->abbrev);
fvalue_free(fvalue);
}
}
@@ -1171,8 +1171,8 @@ check_relation_contains(dfwork_t *dfw, stnode_t *st_node,
{
LOG_NODE(st_node);
- if (stnode_type_id(st_arg2) == STTYPE_FIELD) {
- check_relation_contains_RHS_FIELD(dfw, st_node, st_arg1, st_arg2);
+ if (stnode_type_id(st_arg2) == STTYPE_FIELD && stnode_get_flags(st_arg2, STFLAG_UNPARSED)) {
+ check_warning_contains_RHS_FIELD(dfw, st_node, st_arg1, st_arg2);
}
switch (stnode_type_id(st_arg1)) {
diff --git a/epan/dfilter/syntax-tree.c b/epan/dfilter/syntax-tree.c
index c0f1207e95..6220817899 100644
--- a/epan/dfilter/syntax-tree.c
+++ b/epan/dfilter/syntax-tree.c
@@ -99,6 +99,7 @@ stnode_clear(stnode_t *node)
node->repr_token = NULL;
node->location.col_start = -1;
node->location.col_len = 0;
+ node->flags = 0;
}
void
@@ -113,6 +114,7 @@ stnode_init(stnode_t *node, sttype_id_t type_id, gpointer data, char *token, df_
node->repr_debug = NULL;
node->repr_token = token;
node->location = loc;
+ node->flags = 0;
if (type_id == STTYPE_UNINITIALIZED) {
node->type = NULL;
@@ -138,8 +140,10 @@ stnode_replace(stnode_t *node, sttype_id_t type_id, gpointer data)
{
char *token = g_strdup(node->repr_token);
df_loc_t loc = node->location;
+ uint16_t flags = node->flags;
stnode_clear(node);
stnode_init(node, type_id, data, token, loc);
+ node->flags = flags;
}
stnode_t*
@@ -174,6 +178,7 @@ stnode_dup(const stnode_t *node)
new->repr_debug = NULL;
new->repr_token = g_strdup(node->repr_token);
new->location = node->location;
+ new->flags = node->flags;
new->type = node->type;
if (node->type == NULL)
@@ -256,6 +261,18 @@ stnode_set_location(stnode_t *node, df_loc_t loc)
node->location = loc;
}
+gboolean
+stnode_get_flags(stnode_t *node, uint16_t flags)
+{
+ return node->flags & flags;
+}
+
+void
+stnode_set_flags(stnode_t *node, uint16_t flags)
+{
+ node->flags |= flags;
+}
+
/* Finds the first and last location from a set and creates
* a new location from start of first (col_start) to end of
* last (col_start + col_len). Sets the result to dst. */
diff --git a/epan/dfilter/syntax-tree.h b/epan/dfilter/syntax-tree.h
index ef3dabcc17..3b97fe0bbb 100644
--- a/epan/dfilter/syntax-tree.h
+++ b/epan/dfilter/syntax-tree.h
@@ -54,6 +54,10 @@ typedef struct {
STTypeToStrFunc func_tostr;
} sttype_t;
+
+/* Lexical value is ambiguous (can be a protocol field or a literal). */
+#define STFLAG_UNPARSED (1 << 0)
+
/** Node (type instance) information */
typedef struct {
uint32_t magic;
@@ -63,6 +67,7 @@ typedef struct {
char *repr_display;
char *repr_debug;
df_loc_t location;
+ uint16_t flags;
} stnode_t;
typedef enum {
@@ -159,6 +164,12 @@ stnode_location(stnode_t *node);
void
stnode_set_location(stnode_t *node, df_loc_t loc);
+gboolean
+stnode_get_flags(stnode_t *node, uint16_t flags);
+
+void
+stnode_set_flags(stnode_t *node, uint16_t flags);
+
void
stnode_merge_location(stnode_t *dst, stnode_t *n1, stnode_t *n2);
diff --git a/test/suite_dfilter/group_syntax.py b/test/suite_dfilter/group_syntax.py
index a2494ae84f..77762fb8ea 100644
--- a/test/suite_dfilter/group_syntax.py
+++ b/test/suite_dfilter/group_syntax.py
@@ -193,11 +193,26 @@ class case_equality(unittest.TestCase):
dfilter = 'frame[37] == fc:'
checkDFilterCount(dfilter, 1)
- def test_rhs_literal_bias_4(self, checkDFilterCount):
+ def test_rhs_bias_4(self, checkDFilterCount):
# Protocol "Fibre Channel" on the RHS
dfilter = 'frame[37] == .fc'
checkDFilterCount(dfilter, 0)
+ def test_rhs_bias_5(self, checkDFilterSucceed):
+ # Protocol "Fibre Channel" on the RHS (with warning)
+ dfilter = 'frame contains fc'
+ checkDFilterSucceed(dfilter, 'Interpreting "fc" as Fibre Channel')
+
+ def test_rhs_bias_6(self, checkDFilterSucceed):
+ # Protocol "Fibre Channel" on the RHS (without warning)
+ dfilter = 'frame contains .fc'
+ checkDFilterSucceed(dfilter)
+
+ def test_rhs_bias_7(self, checkDFilterSucceed):
+ # Byte 0xFC on the RHS
+ dfilter = 'frame contains fc:'
+ checkDFilterSucceed(dfilter)
+
@fixtures.uses_fixtures
class case_bitwise(unittest.TestCase):
trace_file = "http.pcap"