aboutsummaryrefslogtreecommitdiffstats
path: root/wsutil
diff options
context:
space:
mode:
authorJakub Zawadzki <darkjames-ws@darkjames.pl>2017-09-02 21:47:14 +0200
committerJakub Zawadzki <darkjames-ws@darkjames.pl>2017-09-08 18:14:59 +0000
commit1c6f6c691f52bc3c0a660e6f82453844135acb30 (patch)
tree64b06491a0bd51fe2c811e6b32c6e08f3d13ad1a /wsutil
parent585297b189ba7f39db34f66f57d25429c8daf570 (diff)
sharkd: implement proper JSON string unescaping, based on JSON dissector.
Change-Id: I749b78b759f98c78526840b8bb1cbccfc17a5611 Reviewed-on: https://code.wireshark.org/review/23365 Petri-Dish: Jakub Zawadzki <darkjames-ws@darkjames.pl> Tested-by: Jakub Zawadzki <darkjames-ws@darkjames.pl> Tested-by: Petri Dish Buildbot <buildbot-no-reply@wireshark.org> Reviewed-by: Dario Lombardo <lomato@gmail.com> Reviewed-by: Jakub Zawadzki <darkjames-ws@darkjames.pl>
Diffstat (limited to 'wsutil')
-rw-r--r--wsutil/wsjsmn.c103
-rw-r--r--wsutil/wsjsmn.h5
2 files changed, 108 insertions, 0 deletions
diff --git a/wsutil/wsjsmn.c b/wsutil/wsjsmn.c
index 949b983efc..19d171ca88 100644
--- a/wsutil/wsjsmn.c
+++ b/wsutil/wsjsmn.c
@@ -26,6 +26,8 @@
#include <string.h>
#include <wsutil/jsmn.h>
+#include <wsutil/str_util.h>
+#include <wsutil/unicode-utils.h>
#include "log.h"
gboolean jsmn_is_json(const guint8* buf, const size_t len)
@@ -76,6 +78,107 @@ int wsjsmn_parse(const char *buf, jsmntok_t *tokens, unsigned int max_tokens)
return jsmn_parse(&p, buf, strlen(buf), tokens, max_tokens);
}
+gboolean wsjsmn_unescape_json_string(const char *input, char *output)
+{
+ while (*input) {
+ char ch = *input++;
+
+ if (ch == '\\') {
+ ch = *input++;
+
+ switch (ch) {
+ case '\"':
+ case '\\':
+ case '/':
+ *output++ = ch;
+ break;
+
+ case 'b':
+ *output++ = '\b';
+ break;
+ case 'f':
+ *output++ = '\f';
+ break;
+ case 'n':
+ *output++ = '\n';
+ break;
+ case 'r':
+ *output++ = '\r';
+ break;
+ case 't':
+ *output++ = '\t';
+ break;
+
+ case 'u':
+ {
+ guint32 unicode_hex = 0;
+ int k;
+ int bin;
+
+ for (k = 0; k < 4; k++) {
+ unicode_hex <<= 4;
+
+ ch = *input++;
+ bin = ws_xton(ch);
+ if (bin == -1)
+ return FALSE;
+ unicode_hex |= bin;
+ }
+
+ if ((IS_LEAD_SURROGATE(unicode_hex))) {
+ guint16 lead_surrogate = unicode_hex;
+ guint16 trail_surrogate = 0;
+
+ if (input[0] != '\\' || input[1] != 'u')
+ return FALSE;
+ input += 2;
+
+ for (k = 0; k < 4; k++) {
+ trail_surrogate <<= 4;
+
+ ch = *input++;
+ bin = ws_xton(ch);
+ if (bin == -1)
+ return FALSE;
+ trail_surrogate |= bin;
+ }
+
+ if ((!IS_TRAIL_SURROGATE(trail_surrogate)))
+ return FALSE;
+
+ unicode_hex = SURROGATE_VALUE(lead_surrogate,trail_surrogate);
+
+ } else if ((IS_TRAIL_SURROGATE(unicode_hex))) {
+ return FALSE;
+ }
+
+ if (!g_unichar_validate(unicode_hex))
+ return FALSE;
+
+ /* Don't allow NUL byte injection. */
+ if (unicode_hex == 0)
+ return FALSE;
+
+ /* \uXXXX => 6 bytes, and g_unichar_to_utf8() requires to have output buffer at least 6 bytes -> OK. */
+ k = g_unichar_to_utf8(unicode_hex, output);
+ output += k;
+ break;
+ }
+
+ default:
+ return FALSE;
+ }
+
+ } else {
+ *output = ch;
+ output++;
+ }
+ }
+
+ *output = '\0';
+ return TRUE;
+}
+
/*
* Editor modelines - https://www.wireshark.org/tools/modelines.html
*
diff --git a/wsutil/wsjsmn.h b/wsutil/wsjsmn.h
index 4b4d9054db..98084ca5c5 100644
--- a/wsutil/wsjsmn.h
+++ b/wsutil/wsjsmn.h
@@ -41,6 +41,11 @@ WS_DLL_PUBLIC gboolean jsmn_is_json(const guint8* buf, const size_t len);
WS_DLL_PUBLIC int wsjsmn_parse(const char *buf, jsmntok_t *tokens, unsigned int max_tokens);
+/**
+ * Try to unescape input JSON string. output can be the same pointer as input, or must have the same buffer size as input.
+ */
+WS_DLL_PUBLIC gboolean wsjsmn_unescape_json_string(const char *input, char *output);
+
#ifdef __cplusplus
}
#endif