diff options
author | Jakub Zawadzki <darkjames-ws@darkjames.pl> | 2017-09-02 21:47:14 +0200 |
---|---|---|
committer | Jakub Zawadzki <darkjames-ws@darkjames.pl> | 2017-09-08 18:14:59 +0000 |
commit | 1c6f6c691f52bc3c0a660e6f82453844135acb30 (patch) | |
tree | 64b06491a0bd51fe2c811e6b32c6e08f3d13ad1a /wsutil | |
parent | 585297b189ba7f39db34f66f57d25429c8daf570 (diff) |
sharkd: implement proper JSON string unescaping, based on JSON dissector.
Change-Id: I749b78b759f98c78526840b8bb1cbccfc17a5611
Reviewed-on: https://code.wireshark.org/review/23365
Petri-Dish: Jakub Zawadzki <darkjames-ws@darkjames.pl>
Tested-by: Jakub Zawadzki <darkjames-ws@darkjames.pl>
Tested-by: Petri Dish Buildbot <buildbot-no-reply@wireshark.org>
Reviewed-by: Dario Lombardo <lomato@gmail.com>
Reviewed-by: Jakub Zawadzki <darkjames-ws@darkjames.pl>
Diffstat (limited to 'wsutil')
-rw-r--r-- | wsutil/wsjsmn.c | 103 | ||||
-rw-r--r-- | wsutil/wsjsmn.h | 5 |
2 files changed, 108 insertions, 0 deletions
diff --git a/wsutil/wsjsmn.c b/wsutil/wsjsmn.c index 949b983efc..19d171ca88 100644 --- a/wsutil/wsjsmn.c +++ b/wsutil/wsjsmn.c @@ -26,6 +26,8 @@ #include <string.h> #include <wsutil/jsmn.h> +#include <wsutil/str_util.h> +#include <wsutil/unicode-utils.h> #include "log.h" gboolean jsmn_is_json(const guint8* buf, const size_t len) @@ -76,6 +78,107 @@ int wsjsmn_parse(const char *buf, jsmntok_t *tokens, unsigned int max_tokens) return jsmn_parse(&p, buf, strlen(buf), tokens, max_tokens); } +gboolean wsjsmn_unescape_json_string(const char *input, char *output) +{ + while (*input) { + char ch = *input++; + + if (ch == '\\') { + ch = *input++; + + switch (ch) { + case '\"': + case '\\': + case '/': + *output++ = ch; + break; + + case 'b': + *output++ = '\b'; + break; + case 'f': + *output++ = '\f'; + break; + case 'n': + *output++ = '\n'; + break; + case 'r': + *output++ = '\r'; + break; + case 't': + *output++ = '\t'; + break; + + case 'u': + { + guint32 unicode_hex = 0; + int k; + int bin; + + for (k = 0; k < 4; k++) { + unicode_hex <<= 4; + + ch = *input++; + bin = ws_xton(ch); + if (bin == -1) + return FALSE; + unicode_hex |= bin; + } + + if ((IS_LEAD_SURROGATE(unicode_hex))) { + guint16 lead_surrogate = unicode_hex; + guint16 trail_surrogate = 0; + + if (input[0] != '\\' || input[1] != 'u') + return FALSE; + input += 2; + + for (k = 0; k < 4; k++) { + trail_surrogate <<= 4; + + ch = *input++; + bin = ws_xton(ch); + if (bin == -1) + return FALSE; + trail_surrogate |= bin; + } + + if ((!IS_TRAIL_SURROGATE(trail_surrogate))) + return FALSE; + + unicode_hex = SURROGATE_VALUE(lead_surrogate,trail_surrogate); + + } else if ((IS_TRAIL_SURROGATE(unicode_hex))) { + return FALSE; + } + + if (!g_unichar_validate(unicode_hex)) + return FALSE; + + /* Don't allow NUL byte injection. */ + if (unicode_hex == 0) + return FALSE; + + /* \uXXXX => 6 bytes, and g_unichar_to_utf8() requires to have output buffer at least 6 bytes -> OK. */ + k = g_unichar_to_utf8(unicode_hex, output); + output += k; + break; + } + + default: + return FALSE; + } + + } else { + *output = ch; + output++; + } + } + + *output = '\0'; + return TRUE; +} + /* * Editor modelines - https://www.wireshark.org/tools/modelines.html * diff --git a/wsutil/wsjsmn.h b/wsutil/wsjsmn.h index 4b4d9054db..98084ca5c5 100644 --- a/wsutil/wsjsmn.h +++ b/wsutil/wsjsmn.h @@ -41,6 +41,11 @@ WS_DLL_PUBLIC gboolean jsmn_is_json(const guint8* buf, const size_t len); WS_DLL_PUBLIC int wsjsmn_parse(const char *buf, jsmntok_t *tokens, unsigned int max_tokens); +/** + * Try to unescape input JSON string. output can be the same pointer as input, or must have the same buffer size as input. + */ +WS_DLL_PUBLIC gboolean wsjsmn_unescape_json_string(const char *input, char *output); + #ifdef __cplusplus } #endif |