diff options
author | João Valverde <j@v6e.pt> | 2023-02-06 17:46:35 +0000 |
---|---|---|
committer | João Valverde <j@v6e.pt> | 2023-02-08 11:21:19 +0000 |
commit | eda38f5f2de12268347093c64de4c93d18c70515 (patch) | |
tree | 927b0c3105d5f3a24aa59bf60334a5e11c903ef4 /wsutil | |
parent | a66b5080c38dc9d270e1ecde38c47b3545e761c8 (diff) |
Replace g_utf8_make_valid() with own function
The function ws_utf8_make_valid() is all-around better and
also does maximal substitution of subparts.
Diffstat (limited to 'wsutil')
-rw-r--r-- | wsutil/unicode-utils.c | 13 | ||||
-rw-r--r-- | wsutil/unicode-utils.h | 6 | ||||
-rw-r--r-- | wsutil/wmem/wmem_strbuf.c | 16 | ||||
-rw-r--r-- | wsutil/wmem/wmem_test.c | 4 |
4 files changed, 26 insertions, 13 deletions
diff --git a/wsutil/unicode-utils.c b/wsutil/unicode-utils.c index 8af7910554..10c13a62d9 100644 --- a/wsutil/unicode-utils.c +++ b/wsutil/unicode-utils.c @@ -189,8 +189,8 @@ utf_8_validate(const guint8 *start, ssize_t length, const guint8 **end) * indication of whether there was an invalid character (i.e. * REPLACEMENT CHARACTER was used.) */ -guint8 * -ws_utf8_make_valid(wmem_allocator_t *scope, const guint8 *ptr, ssize_t length) +wmem_strbuf_t * +ws_utf8_make_valid_strbuf(wmem_allocator_t *scope, const guint8 *ptr, ssize_t length) { wmem_strbuf_t *str; @@ -215,7 +215,14 @@ ws_utf8_make_valid(wmem_allocator_t *scope, const guint8 *ptr, ssize_t length) } } - return (guint8 *) wmem_strbuf_finalize(str); + return str; +} + +guint8 * +ws_utf8_make_valid(wmem_allocator_t *scope, const guint8 *ptr, ssize_t length) +{ + wmem_strbuf_t *str = ws_utf8_make_valid_strbuf(scope, ptr, length); + return wmem_strbuf_finalize(str); } #ifdef _WIN32 diff --git a/wsutil/unicode-utils.h b/wsutil/unicode-utils.h index 7303f607e0..7c85614bbe 100644 --- a/wsutil/unicode-utils.h +++ b/wsutil/unicode-utils.h @@ -70,6 +70,12 @@ int ws_utf8_seqlen[256]; WS_DLL_PUBLIC guint8 * ws_utf8_make_valid(wmem_allocator_t *scope, const guint8 *ptr, ssize_t length); +/* + * Same as ws_utf8_make_valid() but returns a wmem_strbuf_t. + */ +WS_DLL_PUBLIC wmem_strbuf_t * +ws_utf8_make_valid_strbuf(wmem_allocator_t *scope, const guint8 *ptr, ssize_t length); + #ifdef _WIN32 /** Given a UTF-8 string, convert it to UTF-16. This is meant to be used diff --git a/wsutil/wmem/wmem_strbuf.c b/wsutil/wmem/wmem_strbuf.c index 3b67ab7b38..11c9548d67 100644 --- a/wsutil/wmem/wmem_strbuf.c +++ b/wsutil/wmem/wmem_strbuf.c @@ -18,6 +18,8 @@ #include "wmem-int.h" #include "wmem_strutl.h" +#include <wsutil/unicode-utils.h> + #define DEFAULT_MINIMUM_SIZE 16 /* _ROOM accounts for the null-terminator, _RAW_ROOM does not. @@ -444,16 +446,14 @@ wmem_strbuf_utf8_validate(wmem_strbuf_t *strbuf, const char **endpptr) void wmem_strbuf_utf8_make_valid(wmem_strbuf_t *strbuf) { - /* Sanitize the contents to a temporary string. */ - char *tmp = g_utf8_make_valid(strbuf->str, strbuf->len); + wmem_strbuf_t *tmp = ws_utf8_make_valid_strbuf(strbuf->allocator, strbuf->str, strbuf->len); - /* Reset the strbuf, keeping the backing memory allocation */ - *strbuf->str = '\0'; - strbuf->len = 0; + wmem_free(strbuf->allocator, strbuf->str); + strbuf->str = tmp->str; + strbuf->len = tmp->len; + strbuf->alloc_size = tmp->alloc_size; - /* Copy the temporary string to the strbuf. */ - wmem_strbuf_append(strbuf, tmp); - g_free(tmp); + wmem_free(strbuf->allocator, tmp); } /* diff --git a/wsutil/wmem/wmem_test.c b/wsutil/wmem/wmem_test.c index c887db9e3d..6ac917fb57 100644 --- a/wsutil/wmem/wmem_test.c +++ b/wsutil/wmem/wmem_test.c @@ -1157,7 +1157,7 @@ wmem_test_strbuf_validate(void) strbuf = wmem_strbuf_new(NULL, "TEST\xEF ABC"); g_assert_false(wmem_strbuf_utf8_validate(strbuf, &endptr)); - g_assert(endptr == &strbuf->str[4]); + g_assert_true(endptr == &strbuf->str[4]); wmem_strbuf_destroy(strbuf); strbuf = wmem_strbuf_new(NULL, NULL); @@ -1168,7 +1168,7 @@ wmem_test_strbuf_validate(void) strbuf = wmem_strbuf_new(NULL, NULL); wmem_strbuf_append_len(strbuf, "TEST\x00\xEF ABC", 10); g_assert_false(wmem_strbuf_utf8_validate(strbuf, &endptr)); - g_assert(endptr == &strbuf->str[5]); + g_assert_true(endptr == &strbuf->str[5]); wmem_strbuf_destroy(strbuf); strbuf = wmem_strbuf_new(NULL, NULL); |