aboutsummaryrefslogtreecommitdiffstats
path: root/epan
diff options
context:
space:
mode:
authorGuy Harris <guy@alum.mit.edu>2019-07-24 01:19:15 -0700
committerGuy Harris <guy@alum.mit.edu>2019-07-24 08:44:06 +0000
commitc8933e48f2de4baab10a609736d694e11bd0b53b (patch)
tree0e3394db96fb1645af70fc71ed6fabed8b240b78 /epan
parentc5dab4efb575c337da59b8440192a223604d6b61 (diff)
Insert REPLACEMENT CHARACTER for various UTF-16 errors.
Change-Id: I2f62a409548b2c743864ca8da5733f7a73872b3c Reviewed-on: https://code.wireshark.org/review/34066 Petri-Dish: Guy Harris <guy@alum.mit.edu> Tested-by: Petri Dish Buildbot Reviewed-by: Guy Harris <guy@alum.mit.edu>
Diffstat (limited to 'epan')
-rw-r--r--epan/charsets.c39
1 files changed, 19 insertions, 20 deletions
diff --git a/epan/charsets.c b/epan/charsets.c
index aa6c237703..df4aa30033 100644
--- a/epan/charsets.c
+++ b/epan/charsets.c
@@ -711,10 +711,7 @@ get_ucs_2_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const
*
* Specify length in bytes.
*
- * XXX - should map surrogate errors to REPLACEMENT CHARACTERs (0xFFFD).
- * XXX - should map code points > 10FFFF to REPLACEMENT CHARACTERs.
- * XXX - if there are an odd number of bytes, should put a
- * REPLACEMENT CHARACTER at the end.
+ * XXX - should map invalid Unicode characters to REPLACEMENT CHARACTERs.
*/
guint8 *
get_utf_16_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const guint encoding)
@@ -741,11 +738,11 @@ get_utf_16_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const
if (i + 1 >= length) {
/*
* Oops, string ends with a lead surrogate.
- * Ignore this for now.
- * XXX - insert "substitute" character?
- * Report the error in some other
- * fashion?
+ *
+ * Insert a REPLACEMENT CHARACTER to mark the error,
+ * and quit.
*/
+ wmem_strbuf_append_unichar(strbuf, UNREPL);
break;
}
lead_surrogate = uchar2;
@@ -760,23 +757,22 @@ get_utf_16_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const
} else {
/*
* Not a trail surrogate.
- * Ignore the entire pair.
- * XXX - insert "substitute" character?
- * Report the error in some other
- * fashion?
+ *
+ * Insert a REPLACEMENT CHARACTER to mark the error,
+ * and continue;
*/
- ;
+ wmem_strbuf_append_unichar(strbuf, UNREPL);
}
} else {
if (IS_TRAIL_SURROGATE(uchar2)) {
/*
* Trail surrogate without a preceding
- * lead surrogate. Ignore it.
- * XXX - insert "substitute" character?
- * Report the error in some other
- * fashion?
+ * lead surrogate.
+ *
+ * Insert a REPLACEMENT CHARACTER to mark the error,
+ * and continue;
*/
- ;
+ wmem_strbuf_append_unichar(strbuf, UNREPL);
} else {
/*
* Non-surrogate; just append it.
@@ -787,9 +783,12 @@ get_utf_16_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const
}
/*
- * XXX - if i < length, this means we were handed an odd
- * number of bytes, so we're not a valid UTF-16 string.
+ * If i < length, this means we were handed an odd number of bytes,
+ * so we're not a valid UTF-16 string; insert a REPLACEMENT CHARACTER
+ * to mark the error.
*/
+ if (i < length)
+ wmem_strbuf_append_unichar(strbuf, UNREPL);
return (guint8 *) wmem_strbuf_finalize(strbuf);
}