aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakub Zawadzki <darkjames-ws@darkjames.pl>2017-09-02 21:47:14 +0200
committerJakub Zawadzki <darkjames-ws@darkjames.pl>2017-09-08 18:14:59 +0000
commit1c6f6c691f52bc3c0a660e6f82453844135acb30 (patch)
tree64b06491a0bd51fe2c811e6b32c6e08f3d13ad1a
parent585297b189ba7f39db34f66f57d25429c8daf570 (diff)
sharkd: implement proper JSON string unescaping, based on JSON dissector.
Change-Id: I749b78b759f98c78526840b8bb1cbccfc17a5611 Reviewed-on: https://code.wireshark.org/review/23365 Petri-Dish: Jakub Zawadzki <darkjames-ws@darkjames.pl> Tested-by: Jakub Zawadzki <darkjames-ws@darkjames.pl> Tested-by: Petri Dish Buildbot <buildbot-no-reply@wireshark.org> Reviewed-by: Dario Lombardo <lomato@gmail.com> Reviewed-by: Jakub Zawadzki <darkjames-ws@darkjames.pl>
-rw-r--r--debian/libwsutil0.symbols1
-rw-r--r--sharkd_session.c28
-rw-r--r--wsutil/wsjsmn.c103
-rw-r--r--wsutil/wsjsmn.h5
4 files changed, 117 insertions, 20 deletions
diff --git a/debian/libwsutil0.symbols b/debian/libwsutil0.symbols
index f81f22c036..5d1b215dab 100644
--- a/debian/libwsutil0.symbols
+++ b/debian/libwsutil0.symbols
@@ -185,3 +185,4 @@ libwsutil.so.0 libwsutil0 #MINVER#
ws_utf8_char_len@Base 1.12.0~rc1
ws_xton@Base 1.12.0~rc1
wsjsmn_parse@Base 2.3.0
+ wsjsmn_unescape_json_string@Base 2.5.0
diff --git a/sharkd_session.c b/sharkd_session.c
index 431e01e9f6..299c711572 100644
--- a/sharkd_session.c
+++ b/sharkd_session.c
@@ -81,26 +81,10 @@
#include "sharkd.h"
-static void
+static gboolean
json_unescape_str(char *input)
{
- char *output = input;
-
- while (*input)
- {
- char ch = *input++;
-
- if (ch == '\\')
- {
- /* TODO, add more escaping rules */
- ch = *input++;
- }
-
- *output = ch;
- output++;
- }
-
- *output = '\0';
+ return wsjsmn_unescape_json_string(input, input);
}
static const char *
@@ -3646,8 +3630,12 @@ sharkd_session_process(char *buf, const jsmntok_t *tokens, int count)
buf[tokens[i + 0].end] = '\0';
buf[tokens[i + 1].end] = '\0';
- json_unescape_str(&buf[tokens[i + 0].start]);
- json_unescape_str(&buf[tokens[i + 1].start]);
+ /* unescape only value, as keys are simple strings */
+ if (!json_unescape_str(&buf[tokens[i + 1].start]))
+ {
+ fprintf(stderr, "sanity check(3a): [%d] cannot unescape string\n", i + 1);
+ return;
+ }
}
{
diff --git a/wsutil/wsjsmn.c b/wsutil/wsjsmn.c
index 949b983efc..19d171ca88 100644
--- a/wsutil/wsjsmn.c
+++ b/wsutil/wsjsmn.c
@@ -26,6 +26,8 @@
#include <string.h>
#include <wsutil/jsmn.h>
+#include <wsutil/str_util.h>
+#include <wsutil/unicode-utils.h>
#include "log.h"
gboolean jsmn_is_json(const guint8* buf, const size_t len)
@@ -76,6 +78,107 @@ int wsjsmn_parse(const char *buf, jsmntok_t *tokens, unsigned int max_tokens)
return jsmn_parse(&p, buf, strlen(buf), tokens, max_tokens);
}
+gboolean wsjsmn_unescape_json_string(const char *input, char *output)
+{
+ while (*input) {
+ char ch = *input++;
+
+ if (ch == '\\') {
+ ch = *input++;
+
+ switch (ch) {
+ case '\"':
+ case '\\':
+ case '/':
+ *output++ = ch;
+ break;
+
+ case 'b':
+ *output++ = '\b';
+ break;
+ case 'f':
+ *output++ = '\f';
+ break;
+ case 'n':
+ *output++ = '\n';
+ break;
+ case 'r':
+ *output++ = '\r';
+ break;
+ case 't':
+ *output++ = '\t';
+ break;
+
+ case 'u':
+ {
+ guint32 unicode_hex = 0;
+ int k;
+ int bin;
+
+ for (k = 0; k < 4; k++) {
+ unicode_hex <<= 4;
+
+ ch = *input++;
+ bin = ws_xton(ch);
+ if (bin == -1)
+ return FALSE;
+ unicode_hex |= bin;
+ }
+
+ if ((IS_LEAD_SURROGATE(unicode_hex))) {
+ guint16 lead_surrogate = unicode_hex;
+ guint16 trail_surrogate = 0;
+
+ if (input[0] != '\\' || input[1] != 'u')
+ return FALSE;
+ input += 2;
+
+ for (k = 0; k < 4; k++) {
+ trail_surrogate <<= 4;
+
+ ch = *input++;
+ bin = ws_xton(ch);
+ if (bin == -1)
+ return FALSE;
+ trail_surrogate |= bin;
+ }
+
+ if ((!IS_TRAIL_SURROGATE(trail_surrogate)))
+ return FALSE;
+
+ unicode_hex = SURROGATE_VALUE(lead_surrogate,trail_surrogate);
+
+ } else if ((IS_TRAIL_SURROGATE(unicode_hex))) {
+ return FALSE;
+ }
+
+ if (!g_unichar_validate(unicode_hex))
+ return FALSE;
+
+ /* Don't allow NUL byte injection. */
+ if (unicode_hex == 0)
+ return FALSE;
+
+ /* \uXXXX => 6 bytes, and g_unichar_to_utf8() requires to have output buffer at least 6 bytes -> OK. */
+ k = g_unichar_to_utf8(unicode_hex, output);
+ output += k;
+ break;
+ }
+
+ default:
+ return FALSE;
+ }
+
+ } else {
+ *output = ch;
+ output++;
+ }
+ }
+
+ *output = '\0';
+ return TRUE;
+}
+
/*
* Editor modelines - https://www.wireshark.org/tools/modelines.html
*
diff --git a/wsutil/wsjsmn.h b/wsutil/wsjsmn.h
index 4b4d9054db..98084ca5c5 100644
--- a/wsutil/wsjsmn.h
+++ b/wsutil/wsjsmn.h
@@ -41,6 +41,11 @@ WS_DLL_PUBLIC gboolean jsmn_is_json(const guint8* buf, const size_t len);
WS_DLL_PUBLIC int wsjsmn_parse(const char *buf, jsmntok_t *tokens, unsigned int max_tokens);
+/**
+ * Try to unescape input JSON string. output can be the same pointer as input, or must have the same buffer size as input.
+ */
+WS_DLL_PUBLIC gboolean wsjsmn_unescape_json_string(const char *input, char *output);
+
#ifdef __cplusplus
}
#endif