diff options
author | Guy Harris <guy@alum.mit.edu> | 2019-07-24 01:19:15 -0700 |
---|---|---|
committer | Guy Harris <guy@alum.mit.edu> | 2019-07-24 08:44:06 +0000 |
commit | c8933e48f2de4baab10a609736d694e11bd0b53b (patch) | |
tree | 0e3394db96fb1645af70fc71ed6fabed8b240b78 /epan/charsets.c | |
parent | c5dab4efb575c337da59b8440192a223604d6b61 (diff) |
Insert REPLACEMENT CHARACTER for various UTF-16 errors.
Change-Id: I2f62a409548b2c743864ca8da5733f7a73872b3c
Reviewed-on: https://code.wireshark.org/review/34066
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
Diffstat (limited to 'epan/charsets.c')
-rw-r--r-- | epan/charsets.c | 39 |
1 files changed, 19 insertions, 20 deletions
diff --git a/epan/charsets.c b/epan/charsets.c index aa6c237703..df4aa30033 100644 --- a/epan/charsets.c +++ b/epan/charsets.c @@ -711,10 +711,7 @@ get_ucs_2_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const * * Specify length in bytes. * - * XXX - should map surrogate errors to REPLACEMENT CHARACTERs (0xFFFD). - * XXX - should map code points > 10FFFF to REPLACEMENT CHARACTERs. - * XXX - if there are an odd number of bytes, should put a - * REPLACEMENT CHARACTER at the end. + * XXX - should map invalid Unicode characters to REPLACEMENT CHARACTERs. */ guint8 * get_utf_16_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const guint encoding) @@ -741,11 +738,11 @@ get_utf_16_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const if (i + 1 >= length) { /* * Oops, string ends with a lead surrogate. - * Ignore this for now. - * XXX - insert "substitute" character? - * Report the error in some other - * fashion? + * + * Insert a REPLACEMENT CHARACTER to mark the error, + * and quit. */ + wmem_strbuf_append_unichar(strbuf, UNREPL); break; } lead_surrogate = uchar2; @@ -760,23 +757,22 @@ get_utf_16_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const } else { /* * Not a trail surrogate. - * Ignore the entire pair. - * XXX - insert "substitute" character? - * Report the error in some other - * fashion? + * + * Insert a REPLACEMENT CHARACTER to mark the error, + * and continue; */ - ; + wmem_strbuf_append_unichar(strbuf, UNREPL); } } else { if (IS_TRAIL_SURROGATE(uchar2)) { /* * Trail surrogate without a preceding - * lead surrogate. Ignore it. - * XXX - insert "substitute" character? - * Report the error in some other - * fashion? + * lead surrogate. + * + * Insert a REPLACEMENT CHARACTER to mark the error, + * and continue; */ - ; + wmem_strbuf_append_unichar(strbuf, UNREPL); } else { /* * Non-surrogate; just append it. @@ -787,9 +783,12 @@ get_utf_16_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const } /* - * XXX - if i < length, this means we were handed an odd - * number of bytes, so we're not a valid UTF-16 string. + * If i < length, this means we were handed an odd number of bytes, + * so we're not a valid UTF-16 string; insert a REPLACEMENT CHARACTER + * to mark the error. */ + if (i < length) + wmem_strbuf_append_unichar(strbuf, UNREPL); return (guint8 *) wmem_strbuf_finalize(strbuf); } |