diff options
author | Jakub Zawadzki <darkjames-ws@darkjames.pl> | 2017-09-02 21:47:14 +0200 |
---|---|---|
committer | Jakub Zawadzki <darkjames-ws@darkjames.pl> | 2017-09-08 18:14:59 +0000 |
commit | 1c6f6c691f52bc3c0a660e6f82453844135acb30 (patch) | |
tree | 64b06491a0bd51fe2c811e6b32c6e08f3d13ad1a | |
parent | 585297b189ba7f39db34f66f57d25429c8daf570 (diff) |
sharkd: implement proper JSON string unescaping, based on JSON dissector.
Change-Id: I749b78b759f98c78526840b8bb1cbccfc17a5611
Reviewed-on: https://code.wireshark.org/review/23365
Petri-Dish: Jakub Zawadzki <darkjames-ws@darkjames.pl>
Tested-by: Jakub Zawadzki <darkjames-ws@darkjames.pl>
Tested-by: Petri Dish Buildbot <buildbot-no-reply@wireshark.org>
Reviewed-by: Dario Lombardo <lomato@gmail.com>
Reviewed-by: Jakub Zawadzki <darkjames-ws@darkjames.pl>
-rw-r--r-- | debian/libwsutil0.symbols | 1 | ||||
-rw-r--r-- | sharkd_session.c | 28 | ||||
-rw-r--r-- | wsutil/wsjsmn.c | 103 | ||||
-rw-r--r-- | wsutil/wsjsmn.h | 5 |
4 files changed, 117 insertions, 20 deletions
diff --git a/debian/libwsutil0.symbols b/debian/libwsutil0.symbols index f81f22c036..5d1b215dab 100644 --- a/debian/libwsutil0.symbols +++ b/debian/libwsutil0.symbols @@ -185,3 +185,4 @@ libwsutil.so.0 libwsutil0 #MINVER# ws_utf8_char_len@Base 1.12.0~rc1 ws_xton@Base 1.12.0~rc1 wsjsmn_parse@Base 2.3.0 + wsjsmn_unescape_json_string@Base 2.5.0 diff --git a/sharkd_session.c b/sharkd_session.c index 431e01e9f6..299c711572 100644 --- a/sharkd_session.c +++ b/sharkd_session.c @@ -81,26 +81,10 @@ #include "sharkd.h" -static void +static gboolean json_unescape_str(char *input) { - char *output = input; - - while (*input) - { - char ch = *input++; - - if (ch == '\\') - { - /* TODO, add more escaping rules */ - ch = *input++; - } - - *output = ch; - output++; - } - - *output = '\0'; + return wsjsmn_unescape_json_string(input, input); } static const char * @@ -3646,8 +3630,12 @@ sharkd_session_process(char *buf, const jsmntok_t *tokens, int count) buf[tokens[i + 0].end] = '\0'; buf[tokens[i + 1].end] = '\0'; - json_unescape_str(&buf[tokens[i + 0].start]); - json_unescape_str(&buf[tokens[i + 1].start]); + /* unescape only value, as keys are simple strings */ + if (!json_unescape_str(&buf[tokens[i + 1].start])) + { + fprintf(stderr, "sanity check(3a): [%d] cannot unescape string\n", i + 1); + return; + } } { diff --git a/wsutil/wsjsmn.c b/wsutil/wsjsmn.c index 949b983efc..19d171ca88 100644 --- a/wsutil/wsjsmn.c +++ b/wsutil/wsjsmn.c @@ -26,6 +26,8 @@ #include <string.h> #include <wsutil/jsmn.h> +#include <wsutil/str_util.h> +#include <wsutil/unicode-utils.h> #include "log.h" gboolean jsmn_is_json(const guint8* buf, const size_t len) @@ -76,6 +78,107 @@ int wsjsmn_parse(const char *buf, jsmntok_t *tokens, unsigned int max_tokens) return jsmn_parse(&p, buf, strlen(buf), tokens, max_tokens); } +gboolean wsjsmn_unescape_json_string(const char *input, char *output) +{ + while (*input) { + char ch = *input++; + + if (ch == '\\') { + ch = *input++; + + switch (ch) { + case '\"': + case '\\': + case '/': + *output++ = ch; + break; + + case 'b': + *output++ = '\b'; + break; + case 'f': + *output++ = '\f'; + break; + case 'n': + *output++ = '\n'; + break; + case 'r': + *output++ = '\r'; + break; + case 't': + *output++ = '\t'; + break; + + case 'u': + { + guint32 unicode_hex = 0; + int k; + int bin; + + for (k = 0; k < 4; k++) { + unicode_hex <<= 4; + + ch = *input++; + bin = ws_xton(ch); + if (bin == -1) + return FALSE; + unicode_hex |= bin; + } + + if ((IS_LEAD_SURROGATE(unicode_hex))) { + guint16 lead_surrogate = unicode_hex; + guint16 trail_surrogate = 0; + + if (input[0] != '\\' || input[1] != 'u') + return FALSE; + input += 2; + + for (k = 0; k < 4; k++) { + trail_surrogate <<= 4; + + ch = *input++; + bin = ws_xton(ch); + if (bin == -1) + return FALSE; + trail_surrogate |= bin; + } + + if ((!IS_TRAIL_SURROGATE(trail_surrogate))) + return FALSE; + + unicode_hex = SURROGATE_VALUE(lead_surrogate,trail_surrogate); + + } else if ((IS_TRAIL_SURROGATE(unicode_hex))) { + return FALSE; + } + + if (!g_unichar_validate(unicode_hex)) + return FALSE; + + /* Don't allow NUL byte injection. */ + if (unicode_hex == 0) + return FALSE; + + /* \uXXXX => 6 bytes, and g_unichar_to_utf8() requires to have output buffer at least 6 bytes -> OK. */ + k = g_unichar_to_utf8(unicode_hex, output); + output += k; + break; + } + + default: + return FALSE; + } + + } else { + *output = ch; + output++; + } + } + + *output = '\0'; + return TRUE; +} + /* * Editor modelines - https://www.wireshark.org/tools/modelines.html * diff --git a/wsutil/wsjsmn.h b/wsutil/wsjsmn.h index 4b4d9054db..98084ca5c5 100644 --- a/wsutil/wsjsmn.h +++ b/wsutil/wsjsmn.h @@ -41,6 +41,11 @@ WS_DLL_PUBLIC gboolean jsmn_is_json(const guint8* buf, const size_t len); WS_DLL_PUBLIC int wsjsmn_parse(const char *buf, jsmntok_t *tokens, unsigned int max_tokens); +/** + * Try to unescape input JSON string. output can be the same pointer as input, or must have the same buffer size as input. + */ +WS_DLL_PUBLIC gboolean wsjsmn_unescape_json_string(const char *input, char *output); + #ifdef __cplusplus } #endif |