diff options
author | João Valverde <j@v6e.pt> | 2022-10-03 10:49:07 +0100 |
---|---|---|
committer | João Valverde <j@v6e.pt> | 2022-10-05 19:34:47 +0100 |
commit | 51320ae59b80f16a60300e8041c225791f5af409 (patch) | |
tree | 851db67c62f725160f33d4efd1891996eadbfea8 /wsutil/wmem | |
parent | f9aba04431390ad817578dd2c0d2da3efe663b6d (diff) |
wsutil: Improve UTF-8 APIs for debugging
In particular add an UTF-8 specific wslog API that should
make it easier to interpret invalid encodings.
Diffstat (limited to 'wsutil/wmem')
-rw-r--r-- | wsutil/wmem/wmem_strbuf.c | 31 | ||||
-rw-r--r-- | wsutil/wmem/wmem_strbuf.h | 17 |
2 files changed, 33 insertions, 15 deletions
diff --git a/wsutil/wmem/wmem_strbuf.c b/wsutil/wmem/wmem_strbuf.c index 00edf26264..bb1ea41c83 100644 --- a/wsutil/wmem/wmem_strbuf.c +++ b/wsutil/wmem/wmem_strbuf.c @@ -252,6 +252,23 @@ wmem_strbuf_append_unichar(wmem_strbuf_t *strbuf, const gunichar c) } } +static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + +void +wmem_strbuf_append_hex(wmem_strbuf_t *strbuf, uint8_t ch) +{ + wmem_strbuf_grow(strbuf, 4); + + if (!strbuf->max_size || WMEM_STRBUF_ROOM(strbuf) >= 4) { + strbuf->str[strbuf->len++] = '\\'; + strbuf->str[strbuf->len++] = 'x'; + strbuf->str[strbuf->len++] = hex[(ch >> 4) & 0xF]; + strbuf->str[strbuf->len++] = hex[(ch >> 0) & 0xF]; + strbuf->str[strbuf->len] = '\0'; + } +} + void wmem_strbuf_truncate(wmem_strbuf_t *strbuf, const size_t len) { @@ -332,14 +349,16 @@ wmem_strbuf_destroy(wmem_strbuf_t *strbuf) } bool -wmem_strbuf_sanitize_utf8(wmem_strbuf_t *strbuf) +wmem_strbuf_utf8_validate(wmem_strbuf_t *strbuf, const char **endptr) { - if (g_utf8_validate(strbuf->str, -1, NULL)) { - return false; - } + return g_utf8_validate(strbuf->str, strbuf->len, endptr); +} +void +wmem_strbuf_utf8_make_valid(wmem_strbuf_t *strbuf) +{ /* Sanitize the contents to a temporary string. */ - char *tmp = g_utf8_make_valid(strbuf->str, -1); + char *tmp = g_utf8_make_valid(strbuf->str, strbuf->len); /* Reset the strbuf, keeping the backing memory allocation */ *strbuf->str = '\0'; @@ -348,8 +367,6 @@ wmem_strbuf_sanitize_utf8(wmem_strbuf_t *strbuf) /* Copy the temporary string to the strbuf. */ wmem_strbuf_append(strbuf, tmp); g_free(tmp); - - return true; } /* diff --git a/wsutil/wmem/wmem_strbuf.h b/wsutil/wmem/wmem_strbuf.h index c691eeb160..e9acc215c7 100644 --- a/wsutil/wmem/wmem_strbuf.h +++ b/wsutil/wmem/wmem_strbuf.h @@ -106,6 +106,10 @@ wmem_strbuf_append_unichar(wmem_strbuf_t *strbuf, const gunichar c); WS_DLL_PUBLIC void +wmem_strbuf_append_hex(wmem_strbuf_t *strbuf, uint8_t); + +WS_DLL_PUBLIC +void wmem_strbuf_truncate(wmem_strbuf_t *strbuf, const size_t len); WS_DLL_PUBLIC @@ -137,16 +141,13 @@ WS_DLL_PUBLIC void wmem_strbuf_destroy(wmem_strbuf_t *strbuf); -/** Check the UTF-8 encoded strbuf for validity and sanitize the contents if needed, - * by replacing encoding errors with unicode replacement character. This function is - * intended for debugging purposes and is not optimized for speed. - * - * @param strbuf the strbuf to validate - * @return true if the string was sanitized, false otherwise - */ WS_DLL_PUBLIC bool -wmem_strbuf_sanitize_utf8(wmem_strbuf_t *strbuf); +wmem_strbuf_utf8_validate(wmem_strbuf_t *strbuf, const char **endptr); + +WS_DLL_PUBLIC +void +wmem_strbuf_utf8_make_valid(wmem_strbuf_t *strbuf); /** @} * @} */ |