aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoão Valverde <j@v6e.pt>2021-09-26 12:05:54 +0100
committerJoão Valverde <j@v6e.pt>2021-10-14 16:45:19 +0100
commite91b5beafdeb8a2a1366e802fee2c2a3e346b054 (patch)
tree3e9231111fdd00887f8066272b7fedfbd5065f0f
parent3e6cc8ce4a17975499d00ffd7d5da9a41bb2f431 (diff)
dfilter: Resolve field names in the parser
The lexical rules for fields and unparsed strings are ambiguous, e.g. "fc" can be the protocol fibre channel or the byte 0xfc. In general a name is determined to be a protocol field or not by checking the registry. Resolving the name in the parser gives more flexibility, for example to use different semantic rules according to the relation between LHS and RHS, and allows function names and protocol names to co-exist without ambiguity. Before: Filter: tcp == 1 Constants: 00000 PUT_FVALUE 01 <FT_PROTOCOL> -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_EQ reg#0 == reg#1 00003 RETURN Filter: tcp() == 1 dftest: Syntax error near "(". After: Filter: tcp == 1 Constants: 00000 PUT_FVALUE 01 <FT_PROTOCOL> -> reg#1 Instructions: (same) Filter: tcp() == 1 dftest: Function 'tcp' does not exist It's also a goal to make it easier to modify the lexer rules. Ping #12810.
-rw-r--r--epan/dfilter/dfilter-int.h3
-rw-r--r--epan/dfilter/dfilter.c33
-rw-r--r--epan/dfilter/grammar.lemon10
-rw-r--r--epan/dfilter/scanner.l23
4 files changed, 37 insertions, 32 deletions
diff --git a/epan/dfilter/dfilter-int.h b/epan/dfilter/dfilter-int.h
index 649ad82356..eff2946d98 100644
--- a/epan/dfilter/dfilter-int.h
+++ b/epan/dfilter/dfilter-int.h
@@ -90,6 +90,9 @@ dfilter_new_function(dfwork_t *dfw, const char *name);
gboolean
dfilter_str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint);
+stnode_t *
+dfilter_resolve_unparsed(dfwork_t *dfw, stnode_t *node);
+
const char *tokenstr(int token);
#endif
diff --git a/epan/dfilter/dfilter.c b/epan/dfilter/dfilter.c
index 32afd60c99..e2a1a37069 100644
--- a/epan/dfilter/dfilter.c
+++ b/epan/dfilter/dfilter.c
@@ -129,6 +129,38 @@ dfilter_str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint)
return TRUE;
}
+/*
+ * Tries to convert an STTYPE_UNPARSED to a STTYPE_FIELD. If it's not registered as
+ * a field pass UNPARSED to the semantic check.
+ */
+stnode_t *
+dfilter_resolve_unparsed(dfwork_t *dfw, stnode_t *node)
+{
+ const char *name;
+ header_field_info *hfinfo;
+
+ ws_assert(stnode_type_id(node) == STTYPE_UNPARSED);
+
+ name = stnode_data(node);
+
+ hfinfo = proto_registrar_get_byname(name);
+ if (hfinfo != NULL) {
+ /* It's a field name */
+ stnode_replace(node, STTYPE_FIELD, hfinfo);
+ return node;
+ }
+
+ hfinfo = proto_registrar_get_byalias(name);
+ if (hfinfo != NULL) {
+ /* It's an aliased field name */
+ add_deprecated_token(dfw->deprecated, name);
+ stnode_replace(node, STTYPE_FIELD, hfinfo);
+ return node;
+ }
+
+ /* It's not a field. */
+ return node;
+}
/* Initialize the dfilter module */
void
@@ -292,7 +324,6 @@ const char *tokenstr(int token)
case TOKEN_TEST_MATCHES: return "TEST_MATCHES";
case TOKEN_TEST_BITWISE_AND: return "TEST_BITWISE_AND";
case TOKEN_TEST_NOT: return "TEST_NOT";
- case TOKEN_FIELD: return "FIELD";
case TOKEN_STRING: return "STRING";
case TOKEN_CHARCONST: return "CHARCONST";
case TOKEN_UNPARSED: return "UNPARSED";
diff --git a/epan/dfilter/grammar.lemon b/epan/dfilter/grammar.lemon
index 3965cbae59..b2666710a2 100644
--- a/epan/dfilter/grammar.lemon
+++ b/epan/dfilter/grammar.lemon
@@ -69,8 +69,6 @@
any "error" symbols are shifted, if possible. */
%syntax_error {
- header_field_info *hfinfo;
-
if (!TOKEN) {
dfilter_fail(dfw, "Unexpected end of filter string.");
dfw->syntax_error = TRUE;
@@ -96,14 +94,11 @@ any "error" symbols are shifted, if possible. */
dfilter_fail(dfw, "\"%s\" was unexpected in this context.",
(char *)stnode_data(TOKEN));
break;
- case STTYPE_FIELD:
- hfinfo = (header_field_info *)stnode_data(TOKEN);
- dfilter_fail(dfw, "Syntax error near \"%s\".", hfinfo->abbrev);
- break;
/* These aren't handed to use as terminal tokens from
the scanner, so was can assert that we'll never
see them here. */
case STTYPE_NUM_TYPES:
+ case STTYPE_FIELD:
case STTYPE_FUNCTION:
case STTYPE_RANGE:
case STTYPE_FVALUE:
@@ -168,10 +163,9 @@ logical_test(T) ::= entity(E).
/* Entities, or things that can be compared/tested/checked */
-entity(E) ::= FIELD(F). { E = F; }
entity(E) ::= STRING(S). { E = S; }
entity(E) ::= CHARCONST(C). { E = C; }
-entity(E) ::= UNPARSED(U). { E = U; }
+entity(E) ::= UNPARSED(U). { E = dfilter_resolve_unparsed(dfw, U); }
entity(E) ::= range(R). { E = R; }
entity(E) ::= function(F). { E = F; }
diff --git a/epan/dfilter/scanner.l b/epan/dfilter/scanner.l
index f5991cbe7d..8a5c00b9c7 100644
--- a/epan/dfilter/scanner.l
+++ b/epan/dfilter/scanner.l
@@ -85,7 +85,6 @@ DIAG_OFF_FLEX
/*#undef YY_NO_UNPUT*/
static int set_lval_str(int token, const char *token_value);
-static int set_lval_field(int token, header_field_info *hfinfo, const char *token_value);
static int simple(int token, const char *token_value);
#define SIMPLE(token) simple(token, yytext)
@@ -393,7 +392,6 @@ static int simple(int token, const char *token_value);
([.][-+[:alnum:]_:]+)+[.]{0,2} |
[-+[:alnum:]_:]+([.][-+[:alnum:]_:]+)*[.]{0,2} {
/* Is it a field name or some other value (float, integer, bytes, ...)? */
- header_field_info *hfinfo;
/* Trailing dot is allowed for floats, but make sure that trailing ".."
* is interpreted as a token on its own. */
@@ -401,19 +399,6 @@ static int simple(int token, const char *token_value);
yyless(yyleng-2);
}
- hfinfo = proto_registrar_get_byname(yytext);
- if (hfinfo) {
- /* Yes, it's a field name */
- return set_lval_field(TOKEN_FIELD, hfinfo, yytext);
- }
-
- hfinfo = proto_registrar_get_byalias(yytext);
- if (hfinfo) {
- /* Yes, it's an aliased field name */
- add_deprecated_token(yyextra->deprecated, yytext);
- return set_lval_field(TOKEN_FIELD, hfinfo, yytext);
- }
-
/* No match, so treat it as an unparsed string */
return set_lval_str(TOKEN_UNPARSED, yytext);
}
@@ -492,11 +477,3 @@ set_lval_str(int token, const char *token_value)
stnode_init(df_lval, type_id, (gpointer)token_value, token_value);
return token;
}
-
-static int
-set_lval_field(int token, header_field_info *hfinfo, const char *token_value)
-{
- ws_assert(token == TOKEN_FIELD);
- stnode_init(df_lval, STTYPE_FIELD, hfinfo, token_value);
- return token;
-}