/* packet-json.c * Routines for JSON dissection * References: * RFC 4627: http://tools.ietf.org/html/rfc4627 * Website: http://json.org/ * * Copyright 2010, Jakub Zawadzki * * Wireshark - Network traffic analyzer * By Gerald Combs * Copyright 1998 Gerald Combs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #define NEW_PROTO_TREE_API #include "config.h" #include #include #include #include #include #include #include "packet-http.h" void proto_register_json(void); void proto_reg_handoff_json(void); static char *json_string_unescape(tvbparse_elem_t *tok); static dissector_handle_t json_handle; static int proto_json = -1; static gint ett_json = -1; static gint ett_json_array = -1; static gint ett_json_object = -1; static gint ett_json_member = -1; static header_field_info *hfi_json = NULL; #define JSON_HFI_INIT HFI_INIT(proto_json) static header_field_info hfi_json_array JSON_HFI_INIT = { "Array", "json.array", FT_NONE, BASE_NONE, NULL, 0x00, "JSON array", HFILL }; static header_field_info hfi_json_object JSON_HFI_INIT = { "Object", "json.object", FT_NONE, BASE_NONE, NULL, 0x00, "JSON object", HFILL }; static header_field_info hfi_json_member JSON_HFI_INIT = { "Member", "json.member", FT_NONE, BASE_NONE, NULL, 0x00, "JSON object member", HFILL }; static header_field_info hfi_json_key JSON_HFI_INIT = { "Key", "json.key", FT_STRING, STR_UNICODE, NULL, 0x00, NULL, HFILL }; static header_field_info hfi_json_value_string JSON_HFI_INIT = /* FT_STRINGZ? */ { "String value", "json.value.string", FT_STRING, STR_UNICODE, NULL, 0x00, "JSON string value", HFILL }; static header_field_info hfi_json_value_number JSON_HFI_INIT = /* FT_DOUBLE/ FT_INT64? */ { "Number value", "json.value.number", FT_STRING, BASE_NONE, NULL, 0x00, "JSON number value", HFILL }; static header_field_info hfi_json_value_false JSON_HFI_INIT = { "False value", "json.value.false", FT_NONE, BASE_NONE, NULL, 0x00, "JSON false value", HFILL }; static header_field_info hfi_json_value_null JSON_HFI_INIT = { "Null value", "json.value.null", FT_NONE, BASE_NONE, NULL, 0x00, "JSON null value", HFILL }; static header_field_info hfi_json_value_true JSON_HFI_INIT = { "True value", "json.value.true", FT_NONE, BASE_NONE, NULL, 0x00, "JSON true value", HFILL }; static tvbparse_wanted_t* want; static tvbparse_wanted_t* want_ignore; static dissector_handle_t text_lines_handle; typedef enum { JSON_TOKEN_INVALID = -1, JSON_TOKEN_NUMBER = 0, JSON_TOKEN_STRING, JSON_TOKEN_FALSE, JSON_TOKEN_NULL, JSON_TOKEN_TRUE, /* not really tokens ... */ JSON_OBJECT, JSON_ARRAY } json_token_type_t; typedef struct { wmem_stack_t *stack; } json_parser_data_t; static int dissect_json(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void* data) { proto_tree *json_tree = NULL; proto_item *ti = NULL; json_parser_data_t parser_data; tvbparse_t *tt; http_message_info_t *message_info; const char *data_name; int offset; /* JSON dissector can be called in a JSON native file or when transported * by another protocol. We set the column values only if they've not been * already set by someone else. */ wmem_list_frame_t *proto = wmem_list_frame_prev(wmem_list_tail(pinfo->layers)); if (proto) { const char *name = proto_get_protocol_filter_name(GPOINTER_TO_INT(wmem_list_frame_data(proto))); if (!strcmp(name, "frame")) { col_set_str(pinfo->cinfo, COL_PROTOCOL, "JSON"); col_set_str(pinfo->cinfo, COL_INFO, "JavaScript Object Notation"); } } data_name = pinfo->match_string; if (! (data_name && data_name[0])) { /* * No information from "match_string" */ message_info = (http_message_info_t *)data; if (message_info == NULL) { /* * No information from dissector data */ data_name = NULL; } else { data_name = message_info->media_str; if (! (data_name && data_name[0])) { /* * No information from dissector data */ data_name = NULL; } } } if (tree) { ti = proto_tree_add_item(tree, hfi_json, tvb, 0, -1, ENC_NA); json_tree = proto_item_add_subtree(ti, ett_json); if (data_name) proto_item_append_text(ti, ": %s", data_name); } offset = 0; parser_data.stack = wmem_stack_new(wmem_packet_scope()); wmem_stack_push(parser_data.stack, json_tree); tt = tvbparse_init(tvb, offset, -1, &parser_data, want_ignore); /* XXX, only one json in packet? */ while ((tvbparse_get(tt, want))) ; offset = tvbparse_curr_offset(tt); proto_item_set_len(ti, offset); /* if we have some unparsed data, pass to data-text-lines dissector (?) */ if (tvb_reported_length_remaining(tvb, offset) > 0) { tvbuff_t *next_tvb; next_tvb = tvb_new_subset_remaining(tvb, offset); call_dissector_with_data(text_lines_handle, next_tvb, pinfo, tree, data); } else if (data_name) { col_append_sep_fstr(pinfo->cinfo, COL_INFO, " ", "(%s)", data_name); } return tvb_captured_length(tvb); } /* * For dissecting JSON in a file; we don't get passed a media type. */ static int dissect_json_file(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void* data _U_) { return dissect_json(tvb, pinfo, tree, NULL); } static void before_object(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok) { json_parser_data_t *data = (json_parser_data_t *) tvbparse_data; proto_tree *tree = (proto_tree *)wmem_stack_peek(data->stack); proto_tree *subtree; proto_item *ti; ti = proto_tree_add_item(tree, &hfi_json_object, tok->tvb, tok->offset, tok->len, ENC_NA); subtree = proto_item_add_subtree(ti, ett_json_object); wmem_stack_push(data->stack, subtree); } static void after_object(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *elem _U_) { json_parser_data_t *data = (json_parser_data_t *) tvbparse_data; wmem_stack_pop(data->stack); } static void before_member(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok) { json_parser_data_t *data = (json_parser_data_t *) tvbparse_data; proto_tree *tree = (proto_tree *)wmem_stack_peek(data->stack); proto_tree *subtree; proto_item *ti; ti = proto_tree_add_item(tree, &hfi_json_member, tok->tvb, tok->offset, tok->len, ENC_NA); subtree = proto_item_add_subtree(ti, ett_json_member); wmem_stack_push(data->stack, subtree); } static void after_member(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok) { json_parser_data_t *data = (json_parser_data_t *) tvbparse_data; proto_tree *tree = (proto_tree *)wmem_stack_pop(data->stack); if (tree) { tvbparse_elem_t *key_tok = tok->sub; if (key_tok && key_tok->id == JSON_TOKEN_STRING) { char *key = json_string_unescape(key_tok); proto_tree_add_string(tree, &hfi_json_key, key_tok->tvb, key_tok->offset, key_tok->len, key); proto_item_append_text(tree, " Key: %s", key); } } } static void before_array(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok) { json_parser_data_t *data = (json_parser_data_t *) tvbparse_data; proto_tree *tree = (proto_tree *)wmem_stack_peek(data->stack); proto_tree *subtree; proto_item *ti; ti = proto_tree_add_item(tree, &hfi_json_array, tok->tvb, tok->offset, tok->len, ENC_NA); subtree = proto_item_add_subtree(ti, ett_json_array); wmem_stack_push(data->stack, subtree); } static void after_array(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *elem _U_) { json_parser_data_t *data = (json_parser_data_t *) tvbparse_data; wmem_stack_pop(data->stack); } static int json_tvb_memcpy_utf8(char *buf, tvbuff_t *tvb, int offset, int offset_max) { int len = ws_utf8_char_len((guint8) *buf); /* XXX, before moving to core API check if it's off-by-one safe. * For JSON analyzer it's not a problem * (string always terminated by ", which is not valid UTF-8 continuation character) */ if (len == -1 || ((guint) (offset + len)) >= (guint) offset_max) { *buf = '?'; return 1; } /* assume it's valid UTF-8 */ tvb_memcpy(tvb, buf + 1, offset + 1, len - 1); if (!g_utf8_validate(buf, len, NULL)) { *buf = '?'; return 1; } return len; } static char *json_string_unescape(tvbparse_elem_t *tok) { char *str = (char *)wmem_alloc(wmem_packet_scope(), tok->len - 1); int i, j; j = 0; for (i = 1; i < tok->len - 1; i++) { guint8 ch = tvb_get_guint8(tok->tvb, tok->offset + i); int bin; if (ch == '\\') { i++; ch = tvb_get_guint8(tok->tvb, tok->offset + i); switch (ch) { case '\"': case '\\': case '/': str[j++] = ch; break; case 'b': str[j++] = '\b'; break; case 'f': str[j++] = '\f'; break; case 'n': str[j++] = '\n'; break; case 'r': str[j++] = '\r'; break; case 't': str[j++] = '\t'; break; case 'u': { guint32 unicode_hex = 0; gboolean valid = TRUE; int k; for (k = 0; k < 4; k++) { i++; unicode_hex <<= 4; ch = tvb_get_guint8(tok->tvb, tok->offset + i); bin = ws_xton(ch); if (bin == -1) { valid = FALSE; break; } unicode_hex |= bin; } if ((IS_LEAD_SURROGATE(unicode_hex))) { ch = tvb_get_guint8(tok->tvb, tok->offset + i + 1); if (ch == '\\') { i++; ch = tvb_get_guint8(tok->tvb, tok->offset + i + 1); if (ch == 'u') { guint16 lead_surrogate = unicode_hex; guint16 trail_surrogate = 0; i++; for (k = 0; k < 4; k++) { i++; trail_surrogate <<= 4; ch = tvb_get_guint8(tok->tvb, tok->offset + i); bin = ws_xton(ch); if (bin == -1) { valid = FALSE; break; } trail_surrogate |= bin; } if ((IS_TRAIL_SURROGATE(trail_surrogate))) { unicode_hex = SURROGATE_VALUE(lead_surrogate,trail_surrogate); } else { valid = FALSE; } } else { valid = FALSE; } } else { valid = FALSE; } } else if ((IS_TRAIL_SURROGATE(unicode_hex))) { i++; valid = FALSE; } if (valid && g_unichar_validate(unicode_hex) && g_unichar_isprint(unicode_hex)) { /* \uXXXX => 6 bytes */ int charlen = g_unichar_to_utf8(unicode_hex, &str[j]); j += charlen; } else str[j++] = '?'; break; } default: /* not valid by JSON grammar (also tvbparse rules should not allow it) */ DISSECTOR_ASSERT_NOT_REACHED(); break; } } else { int utf_len; str[j] = ch; /* XXX if it's not valid UTF-8 character, add some expert info? (it violates JSON grammar) */ utf_len = json_tvb_memcpy_utf8(&str[j], tok->tvb, i, tok->len); j += utf_len; i += (utf_len - 1); } } str[j] = '\0'; return str; } static void after_value(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *tok) { json_parser_data_t *data = (json_parser_data_t *) tvbparse_data; proto_tree *tree = (proto_tree *)wmem_stack_peek(data->stack); json_token_type_t value_id = JSON_TOKEN_INVALID; if (tok->sub) value_id = (json_token_type_t)tok->sub->id; switch (value_id) { case JSON_TOKEN_STRING: if (tok->len >= 2) proto_tree_add_string(tree, &hfi_json_value_string, tok->tvb, tok->offset, tok->len, json_string_unescape(tok)); else proto_tree_add_item(tree, &hfi_json_value_string, tok->tvb, tok->offset, tok->len, ENC_ASCII|ENC_NA); break; case JSON_TOKEN_NUMBER: /* XXX, convert to number */ proto_tree_add_item(tree, &hfi_json_value_number, tok->tvb, tok->offset, tok->len, ENC_ASCII|ENC_NA); break; case JSON_TOKEN_FALSE: proto_tree_add_item(tree, &hfi_json_value_false, tok->tvb, tok->offset, tok->len, ENC_NA); break; case JSON_TOKEN_NULL: proto_tree_add_item(tree, &hfi_json_value_null, tok->tvb, tok->offset, tok->len, ENC_NA); break; case JSON_TOKEN_TRUE: proto_tree_add_item(tree, &hfi_json_value_true, tok->tvb, tok->offset, tok->len, ENC_NA); break; case JSON_OBJECT: case JSON_ARRAY: /* already added */ break; default: proto_tree_add_format_text(tree, tok->tvb, tok->offset, tok->len); break; } } static void init_json_parser(void) { static tvbparse_wanted_t _want_object; static tvbparse_wanted_t _want_array; tvbparse_wanted_t *want_object, *want_array; tvbparse_wanted_t *want_member; tvbparse_wanted_t *want_string; tvbparse_wanted_t *want_number, *want_int; tvbparse_wanted_t *want_value; tvbparse_wanted_t *want_value_separator; #define tvbparse_optional(id, private_data, before_cb, after_cb, wanted) \ tvbparse_some(id, 0, 1, private_data, before_cb, after_cb, wanted) tvbparse_wanted_t *want_quot = tvbparse_char(-1,"\"",NULL,NULL,NULL); want_string = tvbparse_set_seq(JSON_TOKEN_STRING, NULL, NULL, NULL, want_quot, tvbparse_some(-1, 0, G_MAXINT, NULL, NULL, NULL, tvbparse_set_oneof(-1, NULL, NULL, NULL, tvbparse_not_chars(-1, 0, 0, "\"" "\\", NULL, NULL, NULL), /* XXX, without invalid unicode characters */ tvbparse_set_seq(-1, NULL, NULL, NULL, tvbparse_char(-1, "\\", NULL, NULL, NULL), tvbparse_set_oneof(-1, NULL, NULL, NULL, tvbparse_chars(-1, 0, 1, "\"" "\\" "/bfnrt", NULL, NULL, NULL), tvbparse_set_seq(-1, NULL, NULL, NULL, tvbparse_char(-1, "u", NULL, NULL, NULL), tvbparse_chars(-1, 4, 4, "0123456789abcdefABCDEF", NULL, NULL, NULL), NULL), NULL), NULL), NULL) ), want_quot, NULL); want_value_separator = tvbparse_char(-1, ",", NULL, NULL, NULL); /* int = zero / ( digit1-9 *DIGIT ) */ want_int = tvbparse_set_oneof(-1, NULL, NULL, NULL, tvbparse_char(-1, "0", NULL, NULL, NULL), tvbparse_set_seq(-1, NULL, NULL, NULL, tvbparse_chars(-1, 1, 1, "123456789", NULL, NULL, NULL), tvbparse_optional(-1, NULL, NULL, NULL, /* tvbparse_chars() don't respect 0 as min_len ;/ */ tvbparse_chars(-1, 0, 0, "0123456789", NULL, NULL, NULL)), NULL), NULL); /* number = [ minus ] int [ frac ] [ exp ] */ want_number = tvbparse_set_seq(JSON_TOKEN_NUMBER, NULL, NULL, NULL, tvbparse_optional(-1, NULL, NULL, NULL, /* tvbparse_chars() don't respect 0 as min_len ;/ */ tvbparse_chars(-1, 0, 1, "-", NULL, NULL, NULL)), want_int, /* frac = decimal-point 1*DIGIT */ tvbparse_optional(-1, NULL, NULL, NULL, tvbparse_set_seq(-1, NULL, NULL, NULL, tvbparse_char(-1, ".", NULL, NULL, NULL), tvbparse_chars(-1, 1, 0, "0123456789", NULL, NULL, NULL), NULL)), /* exp = e [ minus / plus ] 1*DIGIT */ tvbparse_optional(-1, NULL, NULL, NULL, tvbparse_set_seq(-1, NULL, NULL, NULL, tvbparse_char(-1, "eE", NULL, NULL, NULL), tvbparse_optional(-1, NULL, NULL, NULL, /* tvbparse_chars() don't respect 0 as min_len ;/ */ tvbparse_chars(-1, 0, 1, "-+", NULL, NULL, NULL)), tvbparse_chars(-1, 1, 0, "0123456789", NULL, NULL, NULL), NULL)), NULL); /* value = false / null / true / object / array / number / string */ want_value = tvbparse_set_oneof(-1, NULL, NULL, after_value, tvbparse_string(JSON_TOKEN_FALSE, "false", NULL, NULL, NULL), tvbparse_string(JSON_TOKEN_NULL, "null", NULL, NULL, NULL), tvbparse_string(JSON_TOKEN_TRUE, "true", NULL, NULL, NULL), &_want_object, &_want_array, want_number, want_string, NULL); /* array = begin-array [ value *( value-separator value ) ] end-array */ want_array = tvbparse_set_seq(JSON_ARRAY, NULL, before_array, after_array, tvbparse_char(-1, "[", NULL, NULL, NULL), tvbparse_optional(-1, NULL, NULL, NULL, tvbparse_set_seq(-1, NULL, NULL, NULL, want_value, tvbparse_some(-1, 0, G_MAXINT, NULL, NULL, NULL, tvbparse_set_seq(-1, NULL, NULL, NULL, want_value_separator, want_value, NULL)), NULL) ), tvbparse_char(-1, "]", NULL, NULL, NULL), NULL); _want_array = *want_array; /* member = string name-separator value */ want_member = tvbparse_set_seq(-1, NULL, before_member, after_member, want_string, tvbparse_char(-1, ":", NULL, NULL, NULL), want_value, NULL); /* object = begin-object [ member *( value-separator member ) ] end-object */ want_object = tvbparse_set_seq(JSON_OBJECT, NULL, before_object, after_object, tvbparse_char(-1, "{", NULL, NULL, NULL), tvbparse_optional(-1, NULL, NULL, NULL, tvbparse_set_seq(-1, NULL, NULL, NULL, want_member, tvbparse_some(-1, 0, G_MAXINT, NULL, NULL, NULL, tvbparse_set_seq(-1, NULL, NULL, NULL, want_value_separator, want_member, NULL)), NULL) ), tvbparse_char(-1, "}", NULL, NULL, NULL), NULL); _want_object = *want_object; want_ignore = tvbparse_chars(-1, 1, 0, " \t\r\n", NULL, NULL, NULL); /* JSON-text = object / array */ want = tvbparse_set_oneof(-1, NULL, NULL, NULL, want_object, want_array, /* tvbparse_not_chars(-1, 1, 0, " \t\r\n", NULL, NULL, NULL), */ NULL); /* XXX, heur? */ } /* This function leverages the libjsmn to undestand if the payload is json or not */ static gboolean dissect_json_heur(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void *data) { guint len = tvb_captured_length(tvb); const guint8* buf = tvb_get_string_enc(wmem_packet_scope(), tvb, 0, len, ENC_ASCII); if (jsmn_is_json(buf, len) == FALSE) return FALSE; return (dissect_json(tvb, pinfo, tree, data) != 0); } void proto_register_json(void) { static gint *ett[] = { &ett_json, &ett_json_array, &ett_json_object, &ett_json_member }; #ifndef HAVE_HFI_SECTION_INIT static header_field_info *hfi[] = { &hfi_json_array, &hfi_json_object, &hfi_json_member, &hfi_json_key, &hfi_json_value_string, &hfi_json_value_number, &hfi_json_value_false, &hfi_json_value_null, &hfi_json_value_true, }; #endif proto_json = proto_register_protocol("JavaScript Object Notation", "JSON", "json"); hfi_json = proto_registrar_get_nth(proto_json); proto_register_fields(proto_json, hfi, array_length(hfi)); proto_register_subtree_array(ett, array_length(ett)); json_handle = register_dissector("json", dissect_json, proto_json); init_json_parser(); } void proto_reg_handoff_json(void) { dissector_handle_t json_file_handle = create_dissector_handle(dissect_json_file, proto_json); heur_dissector_add("hpfeeds", dissect_json_heur, "JSON over HPFEEDS", "json_hpfeeds", proto_json, HEURISTIC_ENABLE); heur_dissector_add("db-lsp", dissect_json_heur, "JSON over DB-LSP", "json_db_lsp", proto_json, HEURISTIC_ENABLE); dissector_add_uint("wtap_encap", WTAP_ENCAP_JSON, json_file_handle); dissector_add_string("media_type", "application/json", json_handle); /* RFC 4627 */ dissector_add_string("media_type", "application/json-rpc", json_handle); /* JSON-RPC over HTTP */ dissector_add_string("media_type", "application/jsonrequest", json_handle); /* JSON-RPC over HTTP */ dissector_add_string("media_type", "application/dds-web+json", json_handle); /* DDS Web Integration Service over HTTP */ text_lines_handle = find_dissector_add_dependency("data-text-lines", proto_json); } /* * Editor modelines - http://www.wireshark.org/tools/modelines.html * * Local variables: * c-basic-offset: 8 * tab-width: 8 * indent-tabs-mode: t * End: * * vi: set shiftwidth=8 tabstop=8 noexpandtab: * :indentSize=8:tabSize=8:noTabs=false: */