aboutsummaryrefslogtreecommitdiffstats
path: root/wsutil/wmem
diff options
context:
space:
mode:
authorJoão Valverde <j@v6e.pt>2022-10-03 10:49:07 +0100
committerJoão Valverde <j@v6e.pt>2022-10-05 19:34:47 +0100
commit51320ae59b80f16a60300e8041c225791f5af409 (patch)
tree851db67c62f725160f33d4efd1891996eadbfea8 /wsutil/wmem
parentf9aba04431390ad817578dd2c0d2da3efe663b6d (diff)
wsutil: Improve UTF-8 APIs for debugging
In particular add an UTF-8 specific wslog API that should make it easier to interpret invalid encodings.
Diffstat (limited to 'wsutil/wmem')
-rw-r--r--wsutil/wmem/wmem_strbuf.c31
-rw-r--r--wsutil/wmem/wmem_strbuf.h17
2 files changed, 33 insertions, 15 deletions
diff --git a/wsutil/wmem/wmem_strbuf.c b/wsutil/wmem/wmem_strbuf.c
index 00edf26264..bb1ea41c83 100644
--- a/wsutil/wmem/wmem_strbuf.c
+++ b/wsutil/wmem/wmem_strbuf.c
@@ -252,6 +252,23 @@ wmem_strbuf_append_unichar(wmem_strbuf_t *strbuf, const gunichar c)
}
}
+static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+
+void
+wmem_strbuf_append_hex(wmem_strbuf_t *strbuf, uint8_t ch)
+{
+ wmem_strbuf_grow(strbuf, 4);
+
+ if (!strbuf->max_size || WMEM_STRBUF_ROOM(strbuf) >= 4) {
+ strbuf->str[strbuf->len++] = '\\';
+ strbuf->str[strbuf->len++] = 'x';
+ strbuf->str[strbuf->len++] = hex[(ch >> 4) & 0xF];
+ strbuf->str[strbuf->len++] = hex[(ch >> 0) & 0xF];
+ strbuf->str[strbuf->len] = '\0';
+ }
+}
+
void
wmem_strbuf_truncate(wmem_strbuf_t *strbuf, const size_t len)
{
@@ -332,14 +349,16 @@ wmem_strbuf_destroy(wmem_strbuf_t *strbuf)
}
bool
-wmem_strbuf_sanitize_utf8(wmem_strbuf_t *strbuf)
+wmem_strbuf_utf8_validate(wmem_strbuf_t *strbuf, const char **endptr)
{
- if (g_utf8_validate(strbuf->str, -1, NULL)) {
- return false;
- }
+ return g_utf8_validate(strbuf->str, strbuf->len, endptr);
+}
+void
+wmem_strbuf_utf8_make_valid(wmem_strbuf_t *strbuf)
+{
/* Sanitize the contents to a temporary string. */
- char *tmp = g_utf8_make_valid(strbuf->str, -1);
+ char *tmp = g_utf8_make_valid(strbuf->str, strbuf->len);
/* Reset the strbuf, keeping the backing memory allocation */
*strbuf->str = '\0';
@@ -348,8 +367,6 @@ wmem_strbuf_sanitize_utf8(wmem_strbuf_t *strbuf)
/* Copy the temporary string to the strbuf. */
wmem_strbuf_append(strbuf, tmp);
g_free(tmp);
-
- return true;
}
/*
diff --git a/wsutil/wmem/wmem_strbuf.h b/wsutil/wmem/wmem_strbuf.h
index c691eeb160..e9acc215c7 100644
--- a/wsutil/wmem/wmem_strbuf.h
+++ b/wsutil/wmem/wmem_strbuf.h
@@ -106,6 +106,10 @@ wmem_strbuf_append_unichar(wmem_strbuf_t *strbuf, const gunichar c);
WS_DLL_PUBLIC
void
+wmem_strbuf_append_hex(wmem_strbuf_t *strbuf, uint8_t);
+
+WS_DLL_PUBLIC
+void
wmem_strbuf_truncate(wmem_strbuf_t *strbuf, const size_t len);
WS_DLL_PUBLIC
@@ -137,16 +141,13 @@ WS_DLL_PUBLIC
void
wmem_strbuf_destroy(wmem_strbuf_t *strbuf);
-/** Check the UTF-8 encoded strbuf for validity and sanitize the contents if needed,
- * by replacing encoding errors with unicode replacement character. This function is
- * intended for debugging purposes and is not optimized for speed.
- *
- * @param strbuf the strbuf to validate
- * @return true if the string was sanitized, false otherwise
- */
WS_DLL_PUBLIC
bool
-wmem_strbuf_sanitize_utf8(wmem_strbuf_t *strbuf);
+wmem_strbuf_utf8_validate(wmem_strbuf_t *strbuf, const char **endptr);
+
+WS_DLL_PUBLIC
+void
+wmem_strbuf_utf8_make_valid(wmem_strbuf_t *strbuf);
/** @}
* @} */