aboutsummaryrefslogtreecommitdiffstats
path: root/wsutil
diff options
context:
space:
mode:
authorJoão Valverde <j@v6e.pt>2023-02-06 17:46:35 +0000
committerJoão Valverde <j@v6e.pt>2023-02-08 11:21:19 +0000
commiteda38f5f2de12268347093c64de4c93d18c70515 (patch)
tree927b0c3105d5f3a24aa59bf60334a5e11c903ef4 /wsutil
parenta66b5080c38dc9d270e1ecde38c47b3545e761c8 (diff)
Replace g_utf8_make_valid() with own function
The function ws_utf8_make_valid() is all-around better and also does maximal substitution of subparts.
Diffstat (limited to 'wsutil')
-rw-r--r--wsutil/unicode-utils.c13
-rw-r--r--wsutil/unicode-utils.h6
-rw-r--r--wsutil/wmem/wmem_strbuf.c16
-rw-r--r--wsutil/wmem/wmem_test.c4
4 files changed, 26 insertions, 13 deletions
diff --git a/wsutil/unicode-utils.c b/wsutil/unicode-utils.c
index 8af7910554..10c13a62d9 100644
--- a/wsutil/unicode-utils.c
+++ b/wsutil/unicode-utils.c
@@ -189,8 +189,8 @@ utf_8_validate(const guint8 *start, ssize_t length, const guint8 **end)
* indication of whether there was an invalid character (i.e.
* REPLACEMENT CHARACTER was used.)
*/
-guint8 *
-ws_utf8_make_valid(wmem_allocator_t *scope, const guint8 *ptr, ssize_t length)
+wmem_strbuf_t *
+ws_utf8_make_valid_strbuf(wmem_allocator_t *scope, const guint8 *ptr, ssize_t length)
{
wmem_strbuf_t *str;
@@ -215,7 +215,14 @@ ws_utf8_make_valid(wmem_allocator_t *scope, const guint8 *ptr, ssize_t length)
}
}
- return (guint8 *) wmem_strbuf_finalize(str);
+ return str;
+}
+
+guint8 *
+ws_utf8_make_valid(wmem_allocator_t *scope, const guint8 *ptr, ssize_t length)
+{
+ wmem_strbuf_t *str = ws_utf8_make_valid_strbuf(scope, ptr, length);
+ return wmem_strbuf_finalize(str);
}
#ifdef _WIN32
diff --git a/wsutil/unicode-utils.h b/wsutil/unicode-utils.h
index 7303f607e0..7c85614bbe 100644
--- a/wsutil/unicode-utils.h
+++ b/wsutil/unicode-utils.h
@@ -70,6 +70,12 @@ int ws_utf8_seqlen[256];
WS_DLL_PUBLIC guint8 *
ws_utf8_make_valid(wmem_allocator_t *scope, const guint8 *ptr, ssize_t length);
+/*
+ * Same as ws_utf8_make_valid() but returns a wmem_strbuf_t.
+ */
+WS_DLL_PUBLIC wmem_strbuf_t *
+ws_utf8_make_valid_strbuf(wmem_allocator_t *scope, const guint8 *ptr, ssize_t length);
+
#ifdef _WIN32
/** Given a UTF-8 string, convert it to UTF-16. This is meant to be used
diff --git a/wsutil/wmem/wmem_strbuf.c b/wsutil/wmem/wmem_strbuf.c
index 3b67ab7b38..11c9548d67 100644
--- a/wsutil/wmem/wmem_strbuf.c
+++ b/wsutil/wmem/wmem_strbuf.c
@@ -18,6 +18,8 @@
#include "wmem-int.h"
#include "wmem_strutl.h"
+#include <wsutil/unicode-utils.h>
+
#define DEFAULT_MINIMUM_SIZE 16
/* _ROOM accounts for the null-terminator, _RAW_ROOM does not.
@@ -444,16 +446,14 @@ wmem_strbuf_utf8_validate(wmem_strbuf_t *strbuf, const char **endpptr)
void
wmem_strbuf_utf8_make_valid(wmem_strbuf_t *strbuf)
{
- /* Sanitize the contents to a temporary string. */
- char *tmp = g_utf8_make_valid(strbuf->str, strbuf->len);
+ wmem_strbuf_t *tmp = ws_utf8_make_valid_strbuf(strbuf->allocator, strbuf->str, strbuf->len);
- /* Reset the strbuf, keeping the backing memory allocation */
- *strbuf->str = '\0';
- strbuf->len = 0;
+ wmem_free(strbuf->allocator, strbuf->str);
+ strbuf->str = tmp->str;
+ strbuf->len = tmp->len;
+ strbuf->alloc_size = tmp->alloc_size;
- /* Copy the temporary string to the strbuf. */
- wmem_strbuf_append(strbuf, tmp);
- g_free(tmp);
+ wmem_free(strbuf->allocator, tmp);
}
/*
diff --git a/wsutil/wmem/wmem_test.c b/wsutil/wmem/wmem_test.c
index c887db9e3d..6ac917fb57 100644
--- a/wsutil/wmem/wmem_test.c
+++ b/wsutil/wmem/wmem_test.c
@@ -1157,7 +1157,7 @@ wmem_test_strbuf_validate(void)
strbuf = wmem_strbuf_new(NULL, "TEST\xEF ABC");
g_assert_false(wmem_strbuf_utf8_validate(strbuf, &endptr));
- g_assert(endptr == &strbuf->str[4]);
+ g_assert_true(endptr == &strbuf->str[4]);
wmem_strbuf_destroy(strbuf);
strbuf = wmem_strbuf_new(NULL, NULL);
@@ -1168,7 +1168,7 @@ wmem_test_strbuf_validate(void)
strbuf = wmem_strbuf_new(NULL, NULL);
wmem_strbuf_append_len(strbuf, "TEST\x00\xEF ABC", 10);
g_assert_false(wmem_strbuf_utf8_validate(strbuf, &endptr));
- g_assert(endptr == &strbuf->str[5]);
+ g_assert_true(endptr == &strbuf->str[5]);
wmem_strbuf_destroy(strbuf);
strbuf = wmem_strbuf_new(NULL, NULL);