diff options
author | Huang Qiangxiong <qiangxiong.huang@qq.com> | 2020-12-27 13:57:17 +0800 |
---|---|---|
committer | AndersBroman <a.broman58@gmail.com> | 2020-12-27 11:32:10 +0000 |
commit | cd2d35c1d2b10fe2916469544cc2951a8979a4dc (patch) | |
tree | 12472adbf303c2e4b2d052c7740924d3a5655cb2 /epan/protobuf_lang_parser.lemon | |
parent | 5778b2403ea4fec9762f961a6ba4dbf33fd2ee05 (diff) |
Protobuf: fix bugs that parsing complex syntax .proto files
Some .proto files contain complex syntax that does not be described in protobuf official site
(https://developers.google.com/protocol-buffers/docs/reference/proto3-spec).
1. Update 'epan/protobuf_lang_parser.lemon' to:
1) Support complex option names format (EBNF):
optionName = ( ident | "(" fullIdent ")" ) { "." ( ident | "(" fullIdent ")" ) }
for example, "option (complex_opt2).(grault) = 654;".
2) Make enum body support 'reserved' section (EBNF):
enumBody = "{" { reserved | option | enumField | emptyStatement } "}"
3) Allow the value of field or enumValue option to be "{ ... }" other than constant:
enumValueOption = optionName "=" ( constant | customOptionValue ) ";"
fieldOption = optionName "=" ( constant | customOptionValue ) ";"
4) Allow 'group' section missing 'label' (for example, in 'oneof' section).
5) Make 'oneof' section support 'option' and 'group' sections (BNF):
oneof = "oneof" oneofName "{" { oneofField | option | group | emptyStatement } "}"
6) Ignore unused 'extend' section.
7) Fix the bug of one string being splitted into multi-lines.
2. Update 'epan/protobuf_lang_tree.c' to:
8) Fix the bug of parsing repeated option.
3. Update 'test/suite_dissection.py' to add test case for parsing complex syntax .proto files:
test/protobuf_lang_files/complex_proto_files/unittest_custom_options.proto
test/protobuf_lang_files/complex_proto_files/complex_syntax.proto
and dependency files:
test/protobuf_lang_files/well_know_types/google/protobuf/any.proto
test/protobuf_lang_files/well_know_types/google/protobuf/descriptor.proto
Refer to issue #17046
Diffstat (limited to 'epan/protobuf_lang_parser.lemon')
-rw-r--r-- | epan/protobuf_lang_parser.lemon | 62 |
1 files changed, 53 insertions, 9 deletions
diff --git a/epan/protobuf_lang_parser.lemon b/epan/protobuf_lang_parser.lemon index 36f9edb506..4aa1b9b85d 100644 --- a/epan/protobuf_lang_parser.lemon +++ b/epan/protobuf_lang_parser.lemon @@ -190,11 +190,17 @@ option ::= PT_OPTION optionName PT_ASSIGN constant PT_SEMICOLON. option ::= PT_OPTION optionName PT_ASSIGN customOptionValue PT_SEMICOLON. /* v2/v3: optionName = ( ident | "(" fullIdent ")" ) { "." ident } */ -optionName ::= exIdent. -optionName(A) ::= PT_LPAREN exIdent(B) PT_RPAREN. +/* Offical PBL bugfix: optionName = ( ident | "(" fullIdent ")" ) { "." ( ident | "(" fullIdent ")" ) } */ +extIdentInParentheses(A) ::= PT_LPAREN exIdent(B) PT_RPAREN. { A = B; A->v = pbl_store_string_token(state, g_strconcat("(", B->v, ")", NULL)); } -optionName(A) ::= PT_LPAREN exIdent(B) PT_RPAREN exIdent(C). /* Note that the exIdent contains "." */ - { A = B; A->v = pbl_store_string_token(state, g_strconcat("(", B->v, ")", C->v, NULL)); } +optionName ::= exIdent. +optionName ::= extIdentInParentheses. +optionName(A) ::= optionName(B) exIdent(C). // Note that the exIdent contains "." + { A = B; A->v = pbl_store_string_token(state, g_strconcat(B->v, C->v, NULL)); } +optionName(A) ::= optionName(B) PT_DOT extIdentInParentheses(C). + { A = B; A->v = pbl_store_string_token(state, g_strconcat(B->v, ".", C->v, NULL)); } +optionName(A) ::= optionName(B) extIdentInParentheses(C). + { A = B; A->v = pbl_store_string_token(state, g_strconcat(B->v, ".", C->v, NULL)); } /* Allow format which not defined in offical PBL specification like: option (google.api.http) = { post: "/v3alpha/kv/put" body: "*" }; @@ -203,9 +209,30 @@ optionName(A) ::= PT_LPAREN exIdent(B) PT_RPAREN exIdent(C). /* Note that the e */ customOptionValue ::= PT_LCURLY customOptionBody PT_RCURLY. +/* The formal EBNF of customOptionBody seems to be */ +/* +customOptionBody ::= . +customOptionBody ::= customOptionBody optionField. +customOptionBody ::= customOptionBody PT_COMMA optionField. +customOptionBody ::= customOptionBody PT_SEMICOLON optionField. + +optionField ::= optionName PT_COLON constant. +optionField ::= optionName PT_COLON customOptionValue. +optionField ::= optionName customOptionValue. +optionField ::= optionName PT_COLON array. + +array ::= PT_LBRACKET arrayBody PT_RBRACKET. +arrayBodyConst ::= constant. +arrayBodyConst ::= arrayBody PT_COMMA constant. +arrayBodyCustom ::= customOptionValue. +arrayBodyCustom ::= arrayBody PT_COMMA customOptionValue. +arrayBody ::= arrayBodyConst. +arrayBody ::= arrayBodyCustom. +*/ +/* but for handling unexpected situations, we still use following EBNF */ customOptionBody ::= . customOptionBody ::= customOptionBody exIdent. -customOptionBody ::= customOptionBody strLit. +customOptionBody ::= customOptionBody PT_STRLIT. customOptionBody ::= customOptionBody symbolsWithoutCurly. customOptionBody ::= customOptionBody intLit. customOptionBody ::= customOptionBody customOptionValue. @@ -275,7 +302,9 @@ enum(A) ::= PT_ENUM enumName(B) PT_LCURLY enumBody(C) PT_RCURLY. { A = C; pbl_set_node_name(A, B->ln, B->v); } /* v2/v3: enumBody = "{" { option | enumField | emptyStatement } "}" */ +/* Offical PBL bugfix: enumBody = "{" { reserved | option | enumField | emptyStatement } "}" */ enumBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_ENUM, NAME_TO_BE_SET); } +enumBody ::= enumBody reserved. enumBody ::= enumBody option. enumBody(A) ::= enumBody(B) enumField(C). { A = B; pbl_add_child(A, C); } enumBody ::= enumBody emptyStatement. @@ -296,7 +325,9 @@ enumValueOptions ::= enumValueOption. enumValueOptions ::= enumValueOptions PT_COMMA enumValueOption. /* v2/v3: enumValueOption = optionName "=" constant */ +/* Offical PBL bugfix: enumValueOption = optionName "=" ( constant | customOptionValue ) ";" */ enumValueOption ::= optionName PT_ASSIGN constant. +enumValueOption ::= optionName PT_ASSIGN customOptionValue. /* v2: service = "service" serviceName "{" { option | rpc | stream | emptyStatement } "}" */ /* v3: service = "service" serviceName "{" { option | rpc | emptyStatement } "}" */ @@ -373,21 +404,30 @@ fieldOptions(A) ::= fieldOptions(B) PT_COMMA fieldOption(C). { A = B; pbl_add_child(A, C); } /* v2/v3: fieldOption = optionName "=" constant */ +/* Offical PBL bugfix: fieldOption = optionName "=" ( constant | customOptionValue ) ";" */ fieldOption(A) ::= optionName(B) PT_ASSIGN constant(C). { A = pbl_create_option_node(state->file, B->ln, B->v, C); } +fieldOption(A) ::= optionName(B) PT_ASSIGN customOptionValue. + { A = pbl_create_option_node(state->file, B->ln, B->v, pbl_store_string_token(state, g_strdup("{ ... }"))); } /* v2 only: group = label "group" groupName "=" fieldNumber messageBody */ +/* Offical PBL bugfix: there is no label if the 'group' is a member of oneof body */ +group(A) ::= PT_GROUP groupName(B) PT_ASSIGN fieldNumber PT_LCURLY messageBody(C) PT_RCURLY. + { A = C; pbl_set_node_name(A, B->ln, B->v); } group(A) ::= label PT_GROUP groupName(B) PT_ASSIGN fieldNumber PT_LCURLY messageBody(C) PT_RCURLY. { A = C; pbl_set_node_name(A, B->ln, B->v); } groupName ::= exIdent. /* v2/v3: oneof = "oneof" oneofName "{" { oneofField | emptyStatement } "}" */ +/* Offical PBL bugfix: oneof = "oneof" oneofName "{" { oneofField | option | group | emptyStatement } "}" */ oneof(A) ::= PT_ONEOF oneofName(B) PT_LCURLY oneofBody(C) PT_RCURLY. { A = C; pbl_set_node_name(A, B->ln, B->v); } oneofBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_ONEOF, NAME_TO_BE_SET); } oneofBody(A) ::= oneofBody(B) oneofField(C). { A = B; pbl_add_child(A, C); } +oneofBody ::= oneofBody option. +oneofBody ::= oneofBody group. oneofBody ::= oneofBody emptyStatement. /* v2/v3: oneofField = type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */ @@ -436,11 +476,13 @@ Note that there is an error in BNF definition about reserved fieldName. It's str quoteFieldNames ::= strLit. quoteFieldNames ::= quoteFieldNames PT_COMMA strLit. -/* v2 only: extend = "extend" messageType "{" {field | group | emptyStatement} "}" */ -extend(A) ::= PT_EXTEND(X) messageType(B) PT_LCURLY extendBody(C) PT_RCURLY. - { A = C; pbl_set_node_name(A, X->ln, pbl_store_string_token(state, g_strconcat(B, "Extend", NULL))); } +/* v2/v3: extend = "extend" messageType "{" {field | group | emptyStatement} "}" +Note that creating custom options uses extensions, which are permitted only for custom options in proto3. +We don't use custom options while parsing packet, so we just ignore the 'extend'. +*/ +extend(A) ::= PT_EXTEND messageType PT_LCURLY extendBody(B) PT_RCURLY. + { A = NULL; pbl_free_node(B); } -/* v2 only */ extendBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_MESSAGE, NAME_TO_BE_SET); } extendBody(A) ::= extendBody(B) field(C). { A = B; pbl_add_child(A, C); } extendBody(A) ::= extendBody(B) group(C). { A = B; pbl_add_child(A, C); } @@ -481,6 +523,8 @@ constant(A) ::= PT_MINUS exIdent(B). { A = pbl_store_string_token(state, g_strco exIdent ::= PT_IDENT. strLit(A) ::= PT_STRLIT(B). { A = pbl_store_string_token(state, g_strndup(B->v + 1, strlen(B->v) - 2)); } +/* support one string being splitted into multi-lines */ +strLit(A) ::= strLit(B) PT_STRLIT(C). { A = pbl_store_string_token(state, g_strconcat(B, g_strndup(C->v + 1, strlen(C->v) - 2), NULL)); } %code { |