Insert REPLACEMENT CHARACTER for various UTF-16 errors.

Change-Id: I2f62a409548b2c743864ca8da5733f7a73872b3c Reviewed-on: https://code.wireshark.org/review/34066 Petri-Dish: Guy Harris <guy@alum.mit.edu> Tested-by: Petri Dish Buildbot Reviewed-by: Guy Harris <guy@alum.mit.edu>
author: Guy Harris <guy@alum.mit.edu> 2019-07-24 01:19:15 -0700
committer: Guy Harris <guy@alum.mit.edu> 2019-07-24 08:44:06 +0000
commit: c8933e48f2de4baab10a609736d694e11bd0b53b (patch)
tree: 0e3394db96fb1645af70fc71ed6fabed8b240b78 /epan
parent: c5dab4efb575c337da59b8440192a223604d6b61 (diff)
1 files changed, 19 insertions, 20 deletions
diff --git a/epan/charsets.c b/epan/charsets.c
index aa6c237703..df4aa30033 100644
--- a/epan/charsets.c
+++ b/epan/charsets.c
@@ -711,10 +711,7 @@ get_ucs_2_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const
  *
  * Specify length in bytes.
  *
- * XXX - should map surrogate errors to REPLACEMENT CHARACTERs (0xFFFD).
- * XXX - should map code points > 10FFFF to REPLACEMENT CHARACTERs.
- * XXX - if there are an odd number of bytes, should put a
- * REPLACEMENT CHARACTER at the end.
+ * XXX - should map invalid Unicode characters to REPLACEMENT CHARACTERs.
  */
 guint8 *
 get_utf_16_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const guint encoding)
@@ -741,11 +738,11 @@ get_utf_16_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const
             if (i + 1 >= length) {
                 /*
                  * Oops, string ends with a lead surrogate.
-                 * Ignore this for now.
-                 * XXX - insert "substitute" character?
-                 * Report the error in some other
-                 * fashion?
+                 *
+                 * Insert a REPLACEMENT CHARACTER to mark the error,
+                 * and quit.
                  */
+                wmem_strbuf_append_unichar(strbuf, UNREPL);
                 break;
             }
             lead_surrogate = uchar2;
@@ -760,23 +757,22 @@ get_utf_16_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const
             } else {
                 /*
                  * Not a trail surrogate.
-                 * Ignore the entire pair.
-                 * XXX - insert "substitute" character?
-                 * Report the error in some other
-                 * fashion?
+                 *
+                 * Insert a REPLACEMENT CHARACTER to mark the error,
+                 * and continue;
                  */
-                 ;
+                wmem_strbuf_append_unichar(strbuf, UNREPL);
             }
         } else {
             if (IS_TRAIL_SURROGATE(uchar2)) {
                 /*
                  * Trail surrogate without a preceding
-                 * lead surrogate.  Ignore it.
-                 * XXX - insert "substitute" character?
-                 * Report the error in some other
-                 * fashion?
+                 * lead surrogate.
+                 *
+                 * Insert a REPLACEMENT CHARACTER to mark the error,
+                 * and continue;
                  */
-                ;
+                wmem_strbuf_append_unichar(strbuf, UNREPL);
             } else {
                 /*
                  * Non-surrogate; just append it.
@@ -787,9 +783,12 @@ get_utf_16_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const
     }
 
     /*
-     * XXX - if i < length, this means we were handed an odd
-     * number of bytes, so we're not a valid UTF-16 string.
+     * If i < length, this means we were handed an odd number of bytes,
+     * so we're not a valid UTF-16 string; insert a REPLACEMENT CHARACTER
+     * to mark the error.
      */
+    if (i < length)
+        wmem_strbuf_append_unichar(strbuf, UNREPL);
     return (guint8 *) wmem_strbuf_finalize(strbuf);
 }
author	Guy Harris <guy@alum.mit.edu>	2019-07-24 01:19:15 -0700
committer	Guy Harris <guy@alum.mit.edu>	2019-07-24 08:44:06 +0000
commit	c8933e48f2de4baab10a609736d694e11bd0b53b (patch)
tree	0e3394db96fb1645af70fc71ed6fabed8b240b78 /epan
parent	c5dab4efb575c337da59b8440192a223604d6b61 (diff)