diff options
author | João Valverde <j@v6e.pt> | 2021-10-08 13:26:53 +0100 |
---|---|---|
committer | João Valverde <j@v6e.pt> | 2021-10-08 19:18:56 +0100 |
commit | 2c701ddf6f6145a95eb19c9648d3beaedd7c3aa0 (patch) | |
tree | 28df962ed9b1072cde0900fc10832efcd0b4fffa /epan | |
parent | 9d87c4712eb51fefa2ed1f7b023945d18c8cc6ca (diff) |
dfilter: Improve grammar to parse ranges
Do the integer conversion for ranges in the parser. This is more
conventional, I think, and allows removing the unnecessary integer
syntax tree node type.
Try to minimize the number and complexity of lexical rules for
ranges. But it seems we need to keep different states for integer
and punctuation because of the need to disambiguate the ranges
[-n-n] and [-n--n].
Diffstat (limited to 'epan')
-rw-r--r-- | epan/dfilter/.editorconfig | 4 | ||||
-rw-r--r-- | epan/dfilter/CMakeLists.txt | 1 | ||||
-rw-r--r-- | epan/dfilter/dfilter-int.h | 3 | ||||
-rw-r--r-- | epan/dfilter/dfilter.c | 52 | ||||
-rw-r--r-- | epan/dfilter/grammar.lemon | 50 | ||||
-rw-r--r-- | epan/dfilter/scanner.l | 109 | ||||
-rw-r--r-- | epan/dfilter/semcheck.c | 2 | ||||
-rw-r--r-- | epan/dfilter/sttype-integer.c | 41 | ||||
-rw-r--r-- | epan/dfilter/syntax-tree.c | 22 | ||||
-rw-r--r-- | epan/dfilter/syntax-tree.h | 12 |
10 files changed, 105 insertions, 191 deletions
diff --git a/epan/dfilter/.editorconfig b/epan/dfilter/.editorconfig index 6538af7689..52f67cb70f 100644 --- a/epan/dfilter/.editorconfig +++ b/epan/dfilter/.editorconfig @@ -28,10 +28,6 @@ indent_size = tab indent_style = tab indent_size = tab -[sttype-integer.[ch]] -indent_style = tab -indent_size = tab - [sttype-pointer.[ch]] indent_style = tab indent_size = tab diff --git a/epan/dfilter/CMakeLists.txt b/epan/dfilter/CMakeLists.txt index 10e21e2a34..95ec4418ad 100644 --- a/epan/dfilter/CMakeLists.txt +++ b/epan/dfilter/CMakeLists.txt @@ -38,7 +38,6 @@ set(DFILTER_NONGENERATED_FILES gencode.c semcheck.c sttype-function.c - sttype-integer.c sttype-pointer.c sttype-range.c sttype-set.c diff --git a/epan/dfilter/dfilter-int.h b/epan/dfilter/dfilter-int.h index 2b982f799d..649ad82356 100644 --- a/epan/dfilter/dfilter-int.h +++ b/epan/dfilter/dfilter-int.h @@ -87,6 +87,9 @@ DfilterTrace(FILE *TraceFILE, char *zTracePrompt); stnode_t * dfilter_new_function(dfwork_t *dfw, const char *name); +gboolean +dfilter_str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint); + const char *tokenstr(int token); #endif diff --git a/epan/dfilter/dfilter.c b/epan/dfilter/dfilter.c index 287e315bdf..32afd60c99 100644 --- a/epan/dfilter/dfilter.c +++ b/epan/dfilter/dfilter.c @@ -78,6 +78,58 @@ dfilter_new_function(dfwork_t *dfw, const char *name) return stnode_new(STTYPE_FUNCTION, def, name); } +gboolean +dfilter_str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint) +{ + char *endptr; + long integer; + + errno = 0; + integer = strtol(s, &endptr, 0); + + if (errno == EINVAL || endptr == s || *endptr != '\0') { + /* This isn't a valid number. */ + dfilter_parse_fail(dfw, "\"%s\" is not a valid number.", s); + return FALSE; + } + if (errno == ERANGE) { + if (integer == LONG_MAX) { + dfilter_parse_fail(dfw, "\"%s\" causes an integer overflow.", s); + } + else if (integer == LONG_MIN) { + dfilter_parse_fail(dfw, "\"%s\" causes an integer underflow.", s); + } + else { + /* + * XXX - can "strtol()" set errno to ERANGE without + * returning LONG_MAX or LONG_MIN? + */ + dfilter_parse_fail(dfw, "\"%s\" is not an integer.", s); + } + return FALSE; + } + if (integer > G_MAXINT32) { + /* + * Fits in a long, but not in a gint32 (a long might be + * 64 bits). + */ + dfilter_parse_fail(dfw, "\"%s\" causes an integer overflow.", s); + return FALSE; + } + if (integer < G_MININT32) { + /* + * Fits in a long, but not in a gint32 (a long might be + * 64 bits). + */ + dfilter_parse_fail(dfw, "\"%s\" causes an integer underflow.", s); + return FALSE; + } + + *pint = (gint32)integer; + return TRUE; +} + + /* Initialize the dfilter module */ void dfilter_init(void) diff --git a/epan/dfilter/grammar.lemon b/epan/dfilter/grammar.lemon index 3f9ae3ea37..3965cbae59 100644 --- a/epan/dfilter/grammar.lemon +++ b/epan/dfilter/grammar.lemon @@ -96,10 +96,6 @@ any "error" symbols are shifted, if possible. */ dfilter_fail(dfw, "\"%s\" was unexpected in this context.", (char *)stnode_data(TOKEN)); break; - case STTYPE_INTEGER: - dfilter_fail(dfw, "The integer %d was unexpected in this context.", - stnode_value(TOKEN)); - break; case STTYPE_FIELD: hfinfo = (header_field_info *)stnode_data(TOKEN); dfilter_fail(dfw, "Syntax error near \"%s\".", hfinfo->abbrev); @@ -201,54 +197,76 @@ range_node_list(L) ::= range_node_list(P) COMMA range_node(D). L = g_slist_append(P, D); } -/* x:y is offset:length */ +/* x:y */ range_node(D) ::= INTEGER(X) COLON INTEGER(Y). { + int32_t start = 0, length = 0; + + dfilter_str_to_gint32(dfw, stnode_token_value(X), &start); + dfilter_str_to_gint32(dfw, stnode_token_value(Y), &length); + D = drange_node_new(); - drange_node_set_start_offset(D, stnode_value(X)); - drange_node_set_length(D, stnode_value(Y)); + drange_node_set_start_offset(D, start); + drange_node_set_length(D, length); stnode_free(X); stnode_free(Y); } -/* x-y == offset:offset */ +/* x-y */ range_node(D) ::= INTEGER(X) HYPHEN INTEGER(Y). { + int32_t start = 0, offset = 0; + + dfilter_str_to_gint32(dfw, stnode_token_value(X), &start); + dfilter_str_to_gint32(dfw, stnode_token_value(Y), &offset); + D = drange_node_new(); - drange_node_set_start_offset(D, stnode_value(X)); - drange_node_set_end_offset(D, stnode_value(Y)); + drange_node_set_start_offset(D, start); + drange_node_set_end_offset(D, offset); stnode_free(X); stnode_free(Y); } -/* :y == from start to offset */ +/* :y = 0:y*/ range_node(D) ::= COLON INTEGER(Y). { + int32_t length = 0; + + dfilter_str_to_gint32(dfw, stnode_token_value(Y), &length); + D = drange_node_new(); drange_node_set_start_offset(D, 0); - drange_node_set_length(D, stnode_value(Y)); + drange_node_set_length(D, length); stnode_free(Y); } -/* x: from offset to end */ +/* x: = x:-1 */ range_node(D) ::= INTEGER(X) COLON. { + int32_t start = 0; + + dfilter_str_to_gint32(dfw, stnode_token_value(X), &start); + D = drange_node_new(); - drange_node_set_start_offset(D, stnode_value(X)); + drange_node_set_start_offset(D, start); drange_node_set_to_the_end(D); stnode_free(X); } -/* x == x:1 */ +/* x = x:1 */ range_node(D) ::= INTEGER(X). { + int32_t start = 0; + + dfilter_str_to_gint32(dfw, stnode_token_value(X), &start); + D = drange_node_new(); - drange_node_set_start_offset(D, stnode_value(X)); + drange_node_set_start_offset(D, start); drange_node_set_length(D, 1); stnode_free(X); diff --git a/epan/dfilter/scanner.l b/epan/dfilter/scanner.l index ff0694313e..f5991cbe7d 100644 --- a/epan/dfilter/scanner.l +++ b/epan/dfilter/scanner.l @@ -86,10 +86,8 @@ DIAG_OFF_FLEX static int set_lval_str(int token, const char *token_value); static int set_lval_field(int token, header_field_info *hfinfo, const char *token_value); -static int set_lval_int(dfwork_t *dfw, int token, const char *token_value); static int simple(int token, const char *token_value); #define SIMPLE(token) simple(token, yytext) -static gboolean str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint); /* * Sleazy hack to suppress compiler warnings in yy_fatal_error(). @@ -168,20 +166,20 @@ static gboolean str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint); "or" return SIMPLE(TOKEN_TEST_OR); "in" return SIMPLE(TOKEN_TEST_IN); + /* + * The syntax for ranges must handle slice[-d-d] and slice[-d--5], e.g: + * frame[-10-5] (minus ten to five) + * frame[-10--5] (minus ten to minus 5) + */ "[" { BEGIN(RANGE_INT); return SIMPLE(TOKEN_LBRACKET); } -<RANGE_INT>[+-]?[[:digit:]]+ { - BEGIN(RANGE_PUNCT); - return set_lval_int(yyextra->dfw, TOKEN_INTEGER, yytext); -} - -<RANGE_INT>[+-]?0x[[:xdigit:]]+ { +<RANGE_INT>[+-]?[[:alnum:]]+ { BEGIN(RANGE_PUNCT); - return set_lval_int(yyextra->dfw, TOKEN_INTEGER, yytext); + return set_lval_str(TOKEN_INTEGER, yytext); } <RANGE_INT,RANGE_PUNCT>":" { @@ -207,17 +205,13 @@ static gboolean str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint); /* Error if none of the above while scanning a range (slice) */ <RANGE_PUNCT>[^:\-,\]]+ { - dfilter_fail(yyextra->dfw, "Invalid string \"%s\" found while scanning slice.", yytext); - return SCAN_FAILED; + BEGIN(RANGE_INT); + return set_lval_str(TOKEN_UNPARSED, yytext); } - /* XXX It would be nice to be able to match an entire non-integer string, - * but beware of Flex's "match the most text" rule. - */ - -<RANGE_INT>. { - dfilter_fail(yyextra->dfw, "Invalid character \"%s\" found while scanning slice; expected integer.", yytext); - return SCAN_FAILED; +<RANGE_INT>[+-]?[^[:alnum:]\]]+ { + BEGIN(RANGE_PUNCT); + return set_lval_str(TOKEN_UNPARSED, yytext); } [rR]{0,1}\042 { @@ -488,6 +482,10 @@ set_lval_str(int token, const char *token_value) case TOKEN_UNPARSED: type_id = STTYPE_UNPARSED; break; + case TOKEN_INTEGER: + /* Not used in AST. */ + type_id = STTYPE_UNINITIALIZED; + break; default: ws_assert_not_reached(); } @@ -502,78 +500,3 @@ set_lval_field(int token, header_field_info *hfinfo, const char *token_value) stnode_init(df_lval, STTYPE_FIELD, hfinfo, token_value); return token; } - -static int -set_lval_int(dfwork_t *dfw, int token, const char *token_value) -{ - sttype_id_t type_id = STTYPE_UNINITIALIZED; - gint32 val; - - if (!str_to_gint32(dfw, token_value, &val)) { - return SCAN_FAILED; - } - - switch (token) { - case TOKEN_INTEGER: - type_id = STTYPE_INTEGER; - break; - default: - ws_assert_not_reached(); - } - - stnode_init_int(df_lval, type_id, val, token_value); - return token; -} - - -static gboolean -str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint) -{ - char *endptr; - long integer; - - errno = 0; - integer = strtol(s, &endptr, 0); - - if (errno == EINVAL || endptr == s || *endptr != '\0') { - /* This isn't a valid number. */ - dfilter_fail(dfw, "\"%s\" is not a valid number.", s); - return FALSE; - } - if (errno == ERANGE) { - if (integer == LONG_MAX) { - dfilter_fail(dfw, "\"%s\" causes an integer overflow.", s); - } - else if (integer == LONG_MIN) { - dfilter_fail(dfw, "\"%s\" causes an integer underflow.", s); - } - else { - /* - * XXX - can "strtol()" set errno to ERANGE without - * returning LONG_MAX or LONG_MIN? - */ - dfilter_fail(dfw, "\"%s\" is not an integer.", s); - } - return FALSE; - } - if (integer > G_MAXINT32) { - /* - * Fits in a long, but not in a gint32 (a long might be - * 64 bits). - */ - dfilter_fail(dfw, "\"%s\" causes an integer overflow.", s); - return FALSE; - } - if (integer < G_MININT32) { - /* - * Fits in a long, but not in a gint32 (a long might be - * 64 bits). - */ - dfilter_fail(dfw, "\"%s\" causes an integer underflow.", s); - return FALSE; - } - - *pint = (gint32)integer; - return TRUE; -} - diff --git a/epan/dfilter/semcheck.c b/epan/dfilter/semcheck.c index 5c905c08a2..6fb869af38 100644 --- a/epan/dfilter/semcheck.c +++ b/epan/dfilter/semcheck.c @@ -511,7 +511,6 @@ check_exists(dfwork_t *dfw, stnode_t *st_arg1) case STTYPE_UNINITIALIZED: case STTYPE_TEST: - case STTYPE_INTEGER: case STTYPE_FVALUE: case STTYPE_SET: case STTYPE_PCRE: @@ -1354,7 +1353,6 @@ check_relation(dfwork_t *dfw, const char *relation_string, case STTYPE_UNINITIALIZED: case STTYPE_TEST: - case STTYPE_INTEGER: case STTYPE_FVALUE: case STTYPE_SET: default: diff --git a/epan/dfilter/sttype-integer.c b/epan/dfilter/sttype-integer.c deleted file mode 100644 index dc58ce984a..0000000000 --- a/epan/dfilter/sttype-integer.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Wireshark - Network traffic analyzer - * By Gerald Combs <gerald@wireshark.org> - * Copyright 2001 Gerald Combs - * - * - * SPDX-License-Identifier: GPL-2.0-or-later - */ - -#include "config.h" - -#include "ftypes/ftypes.h" -#include "syntax-tree.h" - -void -sttype_register_integer(void) -{ - static sttype_t integer_type = { - STTYPE_INTEGER, - "INTEGER", - NULL, - NULL, - NULL, - NULL - }; - - sttype_register(&integer_type); -} - -/* - * Editor modelines - https://www.wireshark.org/tools/modelines.html - * - * Local variables: - * c-basic-offset: 8 - * tab-width: 8 - * indent-tabs-mode: t - * End: - * - * vi: set shiftwidth=8 tabstop=8 noexpandtab: - * :indentSize=8:tabSize=8:noTabs=false: - */ diff --git a/epan/dfilter/syntax-tree.c b/epan/dfilter/syntax-tree.c index 244e4b0760..c14bbbb56c 100644 --- a/epan/dfilter/syntax-tree.c +++ b/epan/dfilter/syntax-tree.c @@ -26,7 +26,6 @@ void sttype_init(void) { sttype_register_function(); - sttype_register_integer(); sttype_register_pointer(); sttype_register_range(); sttype_register_set(); @@ -89,7 +88,6 @@ _node_clear(stnode_t *node) node->type = NULL; node->flags = 0; node->data = NULL; - node->value = 0; } void @@ -109,7 +107,6 @@ _node_init(stnode_t *node, sttype_id_t type_id, gpointer data) ws_assert(!node->type); ws_assert(!node->data); node->flags = 0; - node->value = 0; if (type_id == STTYPE_UNINITIALIZED) { node->type = NULL; @@ -137,13 +134,6 @@ stnode_init(stnode_t *node, sttype_id_t type_id, gpointer data, const char *tok } void -stnode_init_int(stnode_t *node, sttype_id_t type_id, gint32 value, const char *token_value) -{ - stnode_init(node, type_id, NULL, token_value); - node->value = value; -} - -void stnode_replace(stnode_t *node, sttype_id_t type_id, gpointer data) { uint16_t flags = node->flags; /* Save flags. */ @@ -186,7 +176,6 @@ stnode_dup(const stnode_t *org) node->data = type->func_dup(org->data); else node->data = org->data; - node->value = org->value; node->token_value = g_strdup(org->token_value); @@ -238,13 +227,6 @@ stnode_steal_data(stnode_t *node) return data; } -gint32 -stnode_value(stnode_t *node) -{ - ws_assert_magic(node, STNODE_MAGIC); - return node->value; -} - const char * stnode_token_value(stnode_t *node) { @@ -274,9 +256,6 @@ stnode_set_inside_parens(stnode_t *node, gboolean inside) char * stnode_tostr(stnode_t *node) { - if (stnode_type_id(node) == STTYPE_INTEGER) - return g_strdup_printf("%"PRId32, stnode_value(node)); - if (node->type->func_tostr == NULL) return g_strdup("<FIXME>"); @@ -298,7 +277,6 @@ sprint_node(stnode_t *node) s = stnode_tostr(node); wmem_strbuf_append_printf(buf, "\tdata = %s<%s>\n", stnode_type_name(node), s); g_free(s); - wmem_strbuf_append_printf(buf, "\tvalue = %"PRId32"\n", stnode_value(node)); wmem_strbuf_append_printf(buf, "}\n"); return wmem_strbuf_finalize(buf); } diff --git a/epan/dfilter/syntax-tree.h b/epan/dfilter/syntax-tree.h index c7728c4707..d8d352c2ab 100644 --- a/epan/dfilter/syntax-tree.h +++ b/epan/dfilter/syntax-tree.h @@ -27,7 +27,6 @@ typedef enum { STTYPE_CHARCONST, STTYPE_FIELD, STTYPE_FVALUE, - STTYPE_INTEGER, STTYPE_RANGE, STTYPE_FUNCTION, STTYPE_SET, @@ -58,12 +57,7 @@ typedef struct { uint32_t magic; sttype_t *type; uint16_t flags; - - /* This could be made an enum, but I haven't - * set aside to time to do so. */ gpointer data; - int32_t value; - char *token_value; } stnode_t; @@ -98,9 +92,6 @@ void stnode_init(stnode_t *node, sttype_id_t type_id, gpointer data, const char *token_value); void -stnode_init_int(stnode_t *node, sttype_id_t type_id, gint32 value, const char *token_value); - -void stnode_replace(stnode_t *node, sttype_id_t type_id, gpointer data); void @@ -118,9 +109,6 @@ stnode_data(stnode_t *node); gpointer stnode_steal_data(stnode_t *node); -gint32 -stnode_value(stnode_t *node); - const char * stnode_token_value(stnode_t *node); |