aboutsummaryrefslogtreecommitdiffstats
path: root/epan/tvbuff.c
diff options
context:
space:
mode:
authorGuy Harris <guy@alum.mit.edu>2019-07-14 20:18:14 -0700
committerGuy Harris <guy@alum.mit.edu>2019-07-15 07:50:30 +0000
commite26e0b4de071caa9bbfbde61dcc682ab6ede099e (patch)
tree74d2e51abaddae97ce8add71750aa03b57fc7206 /epan/tvbuff.c
parent258a5f6a173f7ae5fa9ccf9e709b22bbaa4f190b (diff)
Add support for the ISO 646 "Basic code table" encoding.
The "Basic code table" in ISO 646 is mostly ASCII, but some code points either 1) have more than one glyph that can be assigned to them or 2) have no glyph assigned to them. National versions choose one of the two glyphs for the code points in group 1) and assign specific glyphs to the code points in group 2); the International Reference Version assigns the same glyphs to those code points as does ASCII. For the "Basic code table" encoding, we map the code points in groups 1) and 2) to a REPLACEMENT CHARACTER; additional encodings can be added for the national versions. Add ENC_ISO_646_IRV (International Reference Version) as an alias for ENC_ASCII. Expand some comments, and add some comments, while we're at it. Change-Id: I4f1b5e426ec193775e919731c5cae1224dc65115 Reviewed-on: https://code.wireshark.org/review/33941 Petri-Dish: Guy Harris <guy@alum.mit.edu> Tested-by: Petri Dish Buildbot Reviewed-by: Guy Harris <guy@alum.mit.edu>
Diffstat (limited to 'epan/tvbuff.c')
-rw-r--r--epan/tvbuff.c44
1 files changed, 44 insertions, 0 deletions
diff --git a/epan/tvbuff.c b/epan/tvbuff.c
index ea9e1ffacb..07c0b23472 100644
--- a/epan/tvbuff.c
+++ b/epan/tvbuff.c
@@ -2503,6 +2503,28 @@ tvb_get_ascii_string(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint l
}
/*
+ * Given a wmem scope, a tvbuff, an offset, a length, and a translation table,
+ * treat the string of bytes referred to by the tvbuff, offset, and length
+ * as a string encoded using one octet per character, with octets with the
+ * high-order bit clear being mapped by the translation table to 2-byte
+ * Unicode Basic Multilingual Plane characters (including REPLACEMENT
+ * CHARACTER) and octets with the high-order bit set being mapped to
+ * REPLACEMENT CHARACTER, and return a pointer to a UTF-8 string,
+ * allocated using the wmem scope.
+ *
+ * Octets with the highest bit set will be converted to the Unicode
+ * REPLACEMENT CHARACTER.
+ */
+static guint8 *
+tvb_get_iso_646_string(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint length, const gunichar2 table[0x80])
+{
+ const guint8 *ptr;
+
+ ptr = ensure_contiguous(tvb, offset, length);
+ return get_iso_646_string(scope, ptr, length, table);
+}
+
+/*
* Given a wmem scope, a tvbuff, an offset, and a length, treat the string
* of bytes referred to by the tvbuff, the offset. and the length as a UTF-8
* string, and return a pointer to that string, allocated using the wmem scope.
@@ -2870,6 +2892,10 @@ tvb_get_string_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset,
strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_cp866);
break;
+ case ENC_ISO_646_BASIC:
+ strptr = tvb_get_iso_646_string(scope, tvb, offset, length, charset_table_iso_646_basic);
+ break;
+
case ENC_3GPP_TS_23_038_7BITS:
{
gint bit_offset = offset << 3;
@@ -2951,6 +2977,20 @@ tvb_get_ascii_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint
}
static guint8 *
+tvb_get_iso_646_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint *lengthp, const gunichar2 table[0x80])
+{
+ guint size;
+ const guint8 *ptr;
+
+ size = tvb_strsize(tvb, offset);
+ ptr = ensure_contiguous(tvb, offset, size);
+ /* XXX, conversion between signed/unsigned integer */
+ if (lengthp)
+ *lengthp = size;
+ return get_iso_646_string(scope, ptr, size, table);
+}
+
+static guint8 *
tvb_get_utf_8_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint *lengthp)
{
guint size;
@@ -3236,6 +3276,10 @@ tvb_get_stringz_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, g
strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_cp866);
break;
+ case ENC_ISO_646_BASIC:
+ strptr = tvb_get_iso_646_stringz(scope, tvb, offset, lengthp, charset_table_iso_646_basic);
+ break;
+
case ENC_3GPP_TS_23_038_7BITS:
REPORT_DISSECTOR_BUG("TS 23.038 7bits has no null character and doesn't support null-terminated strings");
break;