aboutsummaryrefslogtreecommitdiffstats
path: root/epan
diff options
context:
space:
mode:
authorGuy Harris <gharris@sonic.net>2020-09-28 13:16:17 -0700
committerGuy Harris <gharris@sonic.net>2020-09-28 22:30:35 +0000
commitc597927da829f9f9aab3e175347f85649e103a45 (patch)
treef4a73d564337356ba549c55cc6cd3ed49c50ce90 /epan
parentb7c0dc3cb1725fc2d4bb5f6545e7f7ebf014030b (diff)
Add some more string encodings.
Add an encoding for "unpacked" 3GPP TS 23.038 7-bit strings, in which each code position is in a byte of its own, rather than with the code positions packed into 7 bits. Rename the packed encoding to explicitly indicate that it's packed. Add an encoding for ETSI TS 102 221 Annex A strings. Use the new encodings.
Diffstat (limited to 'epan')
-rw-r--r--epan/charsets.c175
-rw-r--r--epan/charsets.h10
-rw-r--r--epan/dissectors/packet-ansi_637.c2
-rw-r--r--epan/dissectors/packet-cell_broadcast.c2
-rw-r--r--epan/dissectors/packet-etsi_card_app_toolkit.c90
-rw-r--r--epan/dissectors/packet-gmr1_rr.c2
-rw-r--r--epan/dissectors/packet-gsm_a_dtap.c2
-rw-r--r--epan/dissectors/packet-gsm_sms.c42
-rw-r--r--epan/dissectors/packet-mbim.c2
-rw-r--r--epan/dissectors/packet-nas_eps.c2
-rw-r--r--epan/proto.c4
-rw-r--r--epan/proto.h88
-rw-r--r--epan/tvbuff.c47
-rw-r--r--epan/tvbuff.h54
14 files changed, 351 insertions, 171 deletions
diff --git a/epan/charsets.c b/epan/charsets.c
index eb32e3730f..d93655d4eb 100644
--- a/epan/charsets.c
+++ b/epan/charsets.c
@@ -917,23 +917,29 @@ handle_ts_23_038_char(wmem_strbuf_t *strbuf, guint8 code_point,
*/
saw_escape = TRUE;
} else {
- /*
- * Have we seen an escape?
- */
- if (saw_escape) {
- saw_escape = FALSE;
- uchar = GSMext_to_UNICHAR(code_point);
+ if (!(code_point & 0x80)) {
+ /*
+ * Code point is valid (7-bit).
+ * Have we seen an escape?
+ */
+ if (saw_escape) {
+ saw_escape = FALSE;
+ uchar = GSMext_to_UNICHAR(code_point);
+ } else {
+ uchar = GSM_to_UNICHAR(code_point);
+ }
+ wmem_strbuf_append_unichar(strbuf, uchar);
} else {
- uchar = GSM_to_UNICHAR(code_point);
+ /* Invalid - put in a REPLACEMENT CHARACTER */
+ wmem_strbuf_append_unichar(strbuf, UNREPL);
}
- wmem_strbuf_append_unichar(strbuf, uchar);
}
return saw_escape;
}
guint8 *
-get_ts_23_038_7bits_string(wmem_allocator_t *scope, const guint8 *ptr,
- const gint bit_offset, gint no_of_chars)
+get_ts_23_038_7bits_string_packed(wmem_allocator_t *scope, const guint8 *ptr,
+ const gint bit_offset, gint no_of_chars)
{
wmem_strbuf_t *strbuf;
gint char_count; /* character counter for string */
@@ -1004,6 +1010,155 @@ get_ts_23_038_7bits_string(wmem_allocator_t *scope, const guint8 *ptr,
}
guint8 *
+get_ts_23_038_7bits_string_unpacked(wmem_allocator_t *scope, const guint8 *ptr,
+ gint length)
+{
+ wmem_strbuf_t *strbuf;
+ gint i; /* Byte counter for string */
+ gboolean saw_escape = FALSE;
+
+ strbuf = wmem_strbuf_sized_new(scope, length+1, 0);
+
+ for (i = 0; i < length; i++)
+ saw_escape = handle_ts_23_038_char(strbuf, *ptr, saw_escape);
+
+ return (guint8 *)wmem_strbuf_finalize(strbuf);
+}
+
+/*
+ * ETSI TS 102 221 Annex A.
+ */
+guint8 *
+get_etsi_ts_102_221_annex_a_string(wmem_allocator_t *scope, const guint8 *ptr,
+ gint length)
+{
+ guint8 string_type;
+ guint8 string_len;
+ gunichar2 ucs2_base;
+ wmem_strbuf_t *strbuf;
+ guint i; /* Byte counter for string */
+ gboolean saw_escape = FALSE;
+
+ /*
+ * get the first octet.
+ */
+ if (length == 0) {
+ /* XXX - return error indication */
+ strbuf = wmem_strbuf_new(wmem_packet_scope(), "");
+ return (guint8 *)wmem_strbuf_finalize(strbuf);
+ }
+ string_type = *ptr;
+ ptr++;
+ length--;
+
+ if (string_type == 0x80) {
+ /*
+ * Annex A, coding scheme 1) - big-endian UCS-2.
+ */
+ return get_ucs_2_string(scope, ptr, length, ENC_BIG_ENDIAN);
+ }
+
+ /*
+ * Annex A, coding schemes 2) and 3):
+ *
+ * the second byte is the number of characters (characters,
+ * not octets) in the string;
+ *
+ * for coding scheme 2), the third byte defines bits 15 to 8
+ * of all UCS-2 characters in the string (all bit numbers are
+ * 1-origin, so bit 1 is the low-order bit), with bit 16 being 0;
+ *
+ * for coding scheme 3), the third byte and fourth bytes, treated
+ * as a big-endian value, define the base value for all UCS-2
+ * characters in the string;
+ *
+ * for all subsequent bytes, if bit 8 is 0, it's a character
+ * in the GSM Default Alphabet, otherwise, it is added to
+ * the UCS-2 base value to give a UCS-2 character.
+ *
+ * XXX - that doesn't seem to indicate that a byte of 0x1b is
+ * treated as an escape character, it just says that a single octet
+ * with the 8th bit not set is a GSM Default Alphabet character.
+ */
+
+ /*
+ * Get the string length, in characters.
+ */
+ if (length == 0) {
+ /* XXX - return error indication */
+ strbuf = wmem_strbuf_new(wmem_packet_scope(), "");
+ return (guint8 *)wmem_strbuf_finalize(strbuf);
+ }
+ string_len = *ptr;
+ ptr++;
+ length--;
+
+ strbuf = wmem_strbuf_sized_new(wmem_packet_scope(), 2*string_len+1, 0);
+
+ /*
+ * Get the UCS-2 base.
+ */
+ if (string_type == 0x81) {
+ if (length == 0) {
+ /* XXX - return error indication */
+ return (guint8 *)wmem_strbuf_finalize(strbuf);
+ }
+ ucs2_base = (*ptr) << 7;
+ ptr++;
+ length--;
+ } else if (string_type == 0x82) {
+ if (length == 0) {
+ /* XXX - return error indication */
+ return (guint8 *)wmem_strbuf_finalize(strbuf);
+ }
+ ucs2_base = (*ptr) << 8;
+ ptr++;
+ length--;
+
+ if (length == 0) {
+ /* XXX - return error indication */
+ return (guint8 *)wmem_strbuf_finalize(strbuf);
+ }
+ ucs2_base |= *ptr;
+ ptr++;
+ length--;
+ } else {
+ /* Invalid string type. */
+ /* XXX - return error indication */
+ return (guint8 *)wmem_strbuf_finalize(strbuf);
+ return NULL;
+ }
+
+ for (i = 0; i < string_len; i++) {
+ guint8 byte;
+
+ if (length == 0) {
+ /* XXX - return error indication */
+ return (guint8 *)wmem_strbuf_finalize(strbuf);
+ }
+ byte = *ptr;
+ if ((byte & 0x80) == 0) {
+ saw_escape = handle_ts_23_038_char(strbuf, byte, saw_escape);
+ } else {
+ gunichar2 uchar;
+
+ /*
+ * XXX - if saw_escape is true, this is bogus.
+ *
+ * XXX - should map lead and trail surrogate values to
+ * REPLACEMENT CHARACTERs (0xFFFD)?
+ * XXX - if there are an odd number of bytes, should put a
+ * REPLACEMENT CHARACTER at the end.
+ */
+ uchar = ucs2_base + (byte & 0x7f);
+ wmem_strbuf_append_unichar(strbuf, uchar);
+ }
+ }
+
+ return (guint8 *)wmem_strbuf_finalize(strbuf);
+}
+
+guint8 *
get_ascii_7bits_string(wmem_allocator_t *scope, const guint8 *ptr,
const gint bit_offset, gint no_of_chars)
{
diff --git a/epan/charsets.h b/epan/charsets.h
index 7b1c1b6ea4..4f824c5aef 100644
--- a/epan/charsets.h
+++ b/epan/charsets.h
@@ -166,10 +166,18 @@ WS_DLL_PUBLIC guint8 *
get_ucs_4_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const guint encoding);
WS_DLL_PUBLIC guint8 *
-get_ts_23_038_7bits_string(wmem_allocator_t *scope, const guint8 *ptr,
+get_ts_23_038_7bits_string_packed(wmem_allocator_t *scope, const guint8 *ptr,
const gint bit_offset, gint no_of_chars);
WS_DLL_PUBLIC guint8 *
+get_ts_23_038_7bits_string_unpacked(wmem_allocator_t *scope, const guint8 *ptr,
+ gint length);
+
+WS_DLL_PUBLIC guint8 *
+get_etsi_ts_102_221_annex_a_string(wmem_allocator_t *scope, const guint8 *ptr,
+ gint length);
+
+WS_DLL_PUBLIC guint8 *
get_ascii_7bits_string(wmem_allocator_t *scope, const guint8 *ptr,
const gint bit_offset, gint no_of_chars);
diff --git a/epan/dissectors/packet-ansi_637.c b/epan/dissectors/packet-ansi_637.c
index c6a84a6827..97a760e5ed 100644
--- a/epan/dissectors/packet-ansi_637.c
+++ b/epan/dissectors/packet-ansi_637.c
@@ -451,7 +451,7 @@ text_decoder(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset
offset = 0;
bit = fill_bits;
- proto_tree_add_ts_23_038_7bits_item(tree, hf_index, tvb_out, (offset << 3) + bit, num_fields);
+ proto_tree_add_ts_23_038_7bits_packed_item(tree, hf_index, tvb_out, (offset << 3) + bit, num_fields);
break;
case 0x10: /* KSC5601 (Korean) */
diff --git a/epan/dissectors/packet-cell_broadcast.c b/epan/dissectors/packet-cell_broadcast.c
index afd86f8040..6914be47d1 100644
--- a/epan/dissectors/packet-cell_broadcast.c
+++ b/epan/dissectors/packet-cell_broadcast.c
@@ -240,7 +240,7 @@ tvbuff_t * dissect_cbs_data(guint8 sms_encoding, tvbuff_t *tvb, proto_tree *tree
switch(sms_encoding){
case SMS_ENCODING_7BIT:
case SMS_ENCODING_7BIT_LANG:
- text = tvb_get_ts_23_038_7bits_string(pinfo->pool, tvb, offset<<3, (length*8)/7);
+ text = tvb_get_ts_23_038_7bits_string_packed(pinfo->pool, tvb, offset<<3, (length*8)/7);
tvb_out = tvb_new_child_real_data(tvb, text, (guint)strlen(text), (guint)strlen(text));
add_new_data_source(pinfo, tvb_out, "unpacked 7 bit data");
break;
diff --git a/epan/dissectors/packet-etsi_card_app_toolkit.c b/epan/dissectors/packet-etsi_card_app_toolkit.c
index 5da5e75526..1c0f97e8bb 100644
--- a/epan/dissectors/packet-etsi_card_app_toolkit.c
+++ b/epan/dissectors/packet-etsi_card_app_toolkit.c
@@ -997,89 +997,25 @@ typedef struct {
cat_nmr_type nmr_type;
} cat_transaction_t;
+/*
+ * ETSI TS 102 221 Annex A.
+ */
static void
dissect_cat_efadn_coding(tvbuff_t *tvb, proto_tree *tree, guint32 pos, guint32 len, int hf_entry)
{
if (len) {
- guint32 i;
-
guint8 first_byte = tvb_get_guint8(tvb, pos);
+
if ((first_byte & 0x80) == 0) {
- wmem_strbuf_t *strbuf = wmem_strbuf_sized_new(wmem_packet_scope(), len+1, 0);
- for (i = 0; i < len; i++) {
- guint8 gsm_chars[2];
- gsm_chars[0] = tvb_get_guint8(tvb, pos+i);
- if (gsm_chars[0] == 0x1b) {
- /* Escape character */
- guint8 second_byte;
- i++;
- second_byte = tvb_get_guint8(tvb, pos+i);
- gsm_chars[0] |= second_byte << 7;
- gsm_chars[1] = second_byte >> 1;
- wmem_strbuf_append(strbuf, get_ts_23_038_7bits_string(wmem_packet_scope(), gsm_chars, 0, 2));
- } else {
- wmem_strbuf_append(strbuf, get_ts_23_038_7bits_string(wmem_packet_scope(), gsm_chars, 0, 1));
- }
- }
- proto_tree_add_string(tree, hf_entry, tvb, pos, len, wmem_strbuf_finalize(strbuf));
- } else if (first_byte == 0x80) {
- proto_tree_add_item(tree, hf_entry, tvb, pos+1, len-1, ENC_UCS_2|ENC_BIG_ENDIAN);
- } else if (first_byte == 0x81) {
- guint8 string_len = tvb_get_guint8(tvb, pos+1);
- guint16 ucs2_base = tvb_get_guint8(tvb, pos+2) << 7;
- wmem_strbuf_t *strbuf = wmem_strbuf_sized_new(wmem_packet_scope(), 2*string_len+1, 0);
- for (i = 0; i < string_len; i++) {
- guint8 byte = tvb_get_guint8(tvb, pos+3+i);
- if ((byte & 0x80) == 0) {
- guint8 gsm_chars[2];
- gsm_chars[0] = byte;
- if (gsm_chars[0] == 0x1b) {
- /* Escape character */
- guint8 second_byte;
- i++;
- second_byte = tvb_get_guint8(tvb, pos+3+i);
- gsm_chars[0] |= second_byte << 7;
- gsm_chars[1] = second_byte >> 1;
- wmem_strbuf_append(strbuf, get_ts_23_038_7bits_string(wmem_packet_scope(), gsm_chars, 0, 2));
- } else {
- wmem_strbuf_append(strbuf, get_ts_23_038_7bits_string(wmem_packet_scope(), gsm_chars, 0, 1));
- }
- } else {
- guint8 ucs2_char[2];
- ucs2_char[0] = ucs2_base >> 8;
- ucs2_char[1] = (ucs2_base & 0xff) + (byte & 0x7f);
- wmem_strbuf_append(strbuf, get_ucs_2_string(wmem_packet_scope(), ucs2_char, 2, ENC_BIG_ENDIAN));
- }
- }
- proto_tree_add_string(tree, hf_entry, tvb, pos, len, wmem_strbuf_finalize(strbuf));
- } else if (first_byte == 0x82) {
- guint8 string_len = tvb_get_guint8(tvb, pos+1);
- guint16 ucs2_base = tvb_get_ntohs(tvb, pos+2);
- wmem_strbuf_t *strbuf = wmem_strbuf_sized_new(wmem_packet_scope(), 2*string_len+1, 0);
- for (i = 0; i < string_len; i++) {
- guint8 byte = tvb_get_guint8(tvb, pos+4+i);
- if ((byte & 0x80) == 0) {
- guint8 gsm_chars[2];
- gsm_chars[0] = byte;
- if (gsm_chars[0] == 0x1b) {
- /* Escape character */
- guint8 second_byte;
- i++;
- second_byte = tvb_get_guint8(tvb, pos+4+i);
- gsm_chars[0] |= second_byte << 7;
- gsm_chars[1] = second_byte >> 1;
- wmem_strbuf_append(strbuf, get_ts_23_038_7bits_string(wmem_packet_scope(), gsm_chars, 0, 2));
- } else {
- wmem_strbuf_append(strbuf, get_ts_23_038_7bits_string(wmem_packet_scope(), gsm_chars, 0, 1));
- }
- } else {
- guint8 ucs2_char[2];
- ucs2_char[0] = ucs2_base >> 8;
- ucs2_char[1] = (ucs2_base & 0xff) + (byte & 0x7f);
- wmem_strbuf_append(strbuf, get_ucs_2_string(wmem_packet_scope(), ucs2_char, 2, ENC_BIG_ENDIAN));
- }
- }
- proto_tree_add_string(tree, hf_entry, tvb, pos, len, wmem_strbuf_finalize(strbuf));
+ /*
+ * Unpacked GSM alphabet.
+ */
+ proto_tree_add_item(tree, hf_entry, tvb, pos, len, ENC_3GPP_TS_23_038_7BITS_UNPACKED|ENC_NA);
+ } else {
+ /*
+ * Annex A.
+ */
+ proto_tree_add_item(tree, hf_entry, tvb, pos, len, ENC_ETSI_TS_102_221_ANNEX_A);
}
}
}
diff --git a/epan/dissectors/packet-gmr1_rr.c b/epan/dissectors/packet-gmr1_rr.c
index 52fb29a5d7..00df109157 100644
--- a/epan/dissectors/packet-gmr1_rr.c
+++ b/epan/dissectors/packet-gmr1_rr.c
@@ -652,7 +652,7 @@ GMR1_IE_FUNC(gmr1_ie_rr_pos_display)
txt_packed_tvb = tvb_new_real_data(txt_packed, 11, 11);
/* Unpack text */
- txt_unpacked = tvb_get_ts_23_038_7bits_string(wmem_packet_scope(), txt_packed_tvb, 0, 12);
+ txt_unpacked = tvb_get_ts_23_038_7bits_string_packed(wmem_packet_scope(), txt_packed_tvb, 0, 12);
tvb_free(txt_packed_tvb);
/* Display it */
diff --git a/epan/dissectors/packet-gsm_a_dtap.c b/epan/dissectors/packet-gsm_a_dtap.c
index cb7dd3a0b0..706b069e0f 100644
--- a/epan/dissectors/packet-gsm_a_dtap.c
+++ b/epan/dissectors/packet-gsm_a_dtap.c
@@ -938,7 +938,7 @@ de_network_name(tvbuff_t *tvb, proto_tree *tree, packet_info *pinfo, guint32 off
{
expert_add_info(pinfo, item, &ei_gsm_a_dtap_text_string_not_multiple_of_7);
}
- proto_tree_add_ts_23_038_7bits_item(tree, hf_gsm_a_dtap_text_string, tvb, curr_offset<<3, num_text_bits/7);
+ proto_tree_add_ts_23_038_7bits_packed_item(tree, hf_gsm_a_dtap_text_string, tvb, curr_offset<<3, num_text_bits/7);
break;
case 1:
proto_tree_add_item(tree, hf_gsm_a_dtap_text_string, tvb, curr_offset, len - 1, ENC_UCS_2|ENC_BIG_ENDIAN);
diff --git a/epan/dissectors/packet-gsm_sms.c b/epan/dissectors/packet-gsm_sms.c
index 922f1b2d30..e82976e25d 100644
--- a/epan/dissectors/packet-gsm_sms.c
+++ b/epan/dissectors/packet-gsm_sms.c
@@ -645,7 +645,7 @@ dis_field_addr(tvbuff_t *tvb, packet_info* pinfo, proto_tree *tree, guint32 *off
{
case 0x05: /* "Alphanumeric (coded according to 3GPP TS 23.038 GSM 7-bit default alphabet)" */
addrlength = (addrlength << 2) / 7;
- addrstr = tvb_get_ts_23_038_7bits_string(wmem_packet_scope(), tvb, offset << 3,
+ addrstr = tvb_get_ts_23_038_7bits_string_packed(wmem_packet_scope(), tvb, offset << 3,
(addrlength > MAX_ADDR_SIZE) ? MAX_ADDR_SIZE : addrlength);
break;
default:
@@ -2131,29 +2131,23 @@ dis_field_ud(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset
{
if (data && data->stk_packing_required)
{
- /* STK requires SMS packing by the terminal */
- /* Per 3GPP 31.111 chapter 6.4.10: */
- /* It shall use the SMS default 7-bit coded alphabet */
- /* as defined in TS 23.038 with bit 8 set to 0 */
+ /*
+ * STK requires SMS packing by the terminal; this means
+ * that the string here is *not* packet 7 bits per
+ * character, but is unpacked, with each character in
+ * an octet, with the expectation that the recipient
+ * will pack it before sending it on the network.
+ *
+ * Per 3GPP 31.111 chapter 6.4.10:
+ * It shall use the SMS default 7-bit coded alphabet
+ * as defined in TS 23.038 with bit 8 set to 0
+ *
+ * I.e., bit 8 of each octet should be 0.
+ */
if(!(reassembled && pinfo->num == reassembled_in))
{
- wmem_strbuf_t *strbuf = wmem_strbuf_sized_new(wmem_packet_scope(), length+1, 0);
- for (i = 0; i < length; i++) {
- guint8 gsm_chars[2];
- gsm_chars[0] = tvb_get_guint8(tvb, offset+i);
- if (gsm_chars[0] == 0x1b) {
- /* Escape character */
- guint8 second_byte;
- i++;
- second_byte = tvb_get_guint8(tvb, offset+i);
- gsm_chars[0] |= second_byte << 7;
- gsm_chars[1] = second_byte >> 1;
- wmem_strbuf_append(strbuf, get_ts_23_038_7bits_string(wmem_packet_scope(), gsm_chars, 0, 2));
- } else {
- wmem_strbuf_append(strbuf, get_ts_23_038_7bits_string(wmem_packet_scope(), gsm_chars, 0, 1));
- }
- }
- proto_tree_add_string(subtree, hf_gsm_sms_text, tvb, offset, length, wmem_strbuf_finalize(strbuf));
+ proto_tree_add_item(subtree, hf_gsm_sms_text, tvb, offset,
+ length, ENC_3GPP_TS_23_038_7BITS_UNPACKED);
}
else
{
@@ -2181,7 +2175,7 @@ dis_field_ud(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset
if(!(reassembled && pinfo->num == reassembled_in))
{
/* Show unassembled SMS */
- proto_tree_add_ts_23_038_7bits_item(subtree, hf_gsm_sms_text, tvb, (offset<<3)+fill_bits,
+ proto_tree_add_ts_23_038_7bits_packed_item(subtree, hf_gsm_sms_text, tvb, (offset<<3)+fill_bits,
(udl > SMS_MAX_MESSAGE_SIZE ? SMS_MAX_MESSAGE_SIZE : udl));
}
else
@@ -2202,7 +2196,7 @@ dis_field_ud(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset
&frag_params_key);
if (p_frag_params) {
- proto_tree_add_ts_23_038_7bits_item(subtree, hf_gsm_sms_text, sm_tvb,
+ proto_tree_add_ts_23_038_7bits_packed_item(subtree, hf_gsm_sms_text, sm_tvb,
(total_sms_len<<3)+p_frag_params->fill_bits,
(p_frag_params->udl > SMS_MAX_MESSAGE_SIZE ? SMS_MAX_MESSAGE_SIZE : p_frag_params->udl));
diff --git a/epan/dissectors/packet-mbim.c b/epan/dissectors/packet-mbim.c
index 22841f05c1..7ac2249f82 100644
--- a/epan/dissectors/packet-mbim.c
+++ b/epan/dissectors/packet-mbim.c
@@ -3833,7 +3833,7 @@ static void mbim_decode_sms_cdma_text(tvbuff_t *tvb, proto_tree *tree, const int
proto_tree_add_item(tree, hfindex, tvb, offset, size_in_bytes, ENC_ISO_8859_1|ENC_NA);
break;
case MBIM_ENCODING_GSM_7BIT:
- proto_tree_add_ts_23_038_7bits_item(tree, hfindex, tvb, (offset << 3), size_in_chars);
+ proto_tree_add_ts_23_038_7bits_packed_item(tree, hfindex, tvb, (offset << 3), size_in_chars);
break;
default:
break;
diff --git a/epan/dissectors/packet-nas_eps.c b/epan/dissectors/packet-nas_eps.c
index 712ceb1f66..496b1d604c 100644
--- a/epan/dissectors/packet-nas_eps.c
+++ b/epan/dissectors/packet-nas_eps.c
@@ -2356,7 +2356,7 @@ de_emm_ext_emerg_num_list(tvbuff_t *tvb, proto_tree *tree, packet_info *pinfo _U
tvb, curr_offset, 1, ENC_NA, &length);
curr_offset++;
if (length > 0) {
- proto_tree_add_ts_23_038_7bits_item(sub_tree, hf_eps_emm_ext_emerg_num_list_sub_serv_field,
+ proto_tree_add_ts_23_038_7bits_packed_item(sub_tree, hf_eps_emm_ext_emerg_num_list_sub_serv_field,
tvb, curr_offset<<3, (length<<3)/7);
curr_offset += length;
}
diff --git a/epan/proto.c b/epan/proto.c
index a67d10c8bb..7b33532f9c 100644
--- a/epan/proto.c
+++ b/epan/proto.c
@@ -12676,7 +12676,7 @@ proto_tree_add_boolean_bits_format_value64(proto_tree *tree, const int hfindex,
}
proto_item *
-proto_tree_add_ts_23_038_7bits_item(proto_tree *tree, const int hfindex, tvbuff_t *tvb,
+proto_tree_add_ts_23_038_7bits_packed_item(proto_tree *tree, const int hfindex, tvbuff_t *tvb,
const guint bit_offset, const gint no_of_chars)
{
proto_item *pi;
@@ -12694,7 +12694,7 @@ proto_tree_add_ts_23_038_7bits_item(proto_tree *tree, const int hfindex, tvbuff_
byte_length = (((no_of_chars + 1) * 7) + (bit_offset & 0x07)) >> 3;
byte_offset = bit_offset >> 3;
- string = tvb_get_ts_23_038_7bits_string(wmem_packet_scope(), tvb, bit_offset, no_of_chars);
+ string = tvb_get_ts_23_038_7bits_string_packed(wmem_packet_scope(), tvb, bit_offset, no_of_chars);
if (hfinfo->display == STR_UNICODE) {
DISSECTOR_ASSERT(g_utf8_validate(string, -1, NULL));
diff --git a/epan/proto.h b/epan/proto.h
index b93e8066ee..a4c59e148c 100644
--- a/epan/proto.h
+++ b/epan/proto.h
@@ -385,45 +385,48 @@ void proto_report_dissector_bug(const char *format, ...)
*
* *DO NOT* add anything to this set that is not a character encoding!
*/
-#define ENC_CHARENCODING_MASK 0x0000FFFE /* mask out byte-order bits and other bits used with string encodings */
-#define ENC_ASCII 0x00000000
-#define ENC_ISO_646_IRV ENC_ASCII /* ISO 646 International Reference Version = ASCII */
-#define ENC_UTF_8 0x00000002
-#define ENC_UTF_16 0x00000004
-#define ENC_UCS_2 0x00000006
-#define ENC_UCS_4 0x00000008
-#define ENC_ISO_8859_1 0x0000000A
-#define ENC_ISO_8859_2 0x0000000C
-#define ENC_ISO_8859_3 0x0000000E
-#define ENC_ISO_8859_4 0x00000010
-#define ENC_ISO_8859_5 0x00000012
-#define ENC_ISO_8859_6 0x00000014
-#define ENC_ISO_8859_7 0x00000016
-#define ENC_ISO_8859_8 0x00000018
-#define ENC_ISO_8859_9 0x0000001A
-#define ENC_ISO_8859_10 0x0000001C
-#define ENC_ISO_8859_11 0x0000001E
-/* #define ENC_ISO_8859_12 0x00000020 ISO 8859-12 was abandoned */
-#define ENC_ISO_8859_13 0x00000022
-#define ENC_ISO_8859_14 0x00000024
-#define ENC_ISO_8859_15 0x00000026
-#define ENC_ISO_8859_16 0x00000028
-#define ENC_WINDOWS_1250 0x0000002A
-#define ENC_3GPP_TS_23_038_7BITS 0x0000002C
-#define ENC_EBCDIC 0x0000002E
-#define ENC_MAC_ROMAN 0x00000030
-#define ENC_CP437 0x00000032
-#define ENC_ASCII_7BITS 0x00000034
-#define ENC_T61 0x00000036
-#define ENC_EBCDIC_CP037 0x00000038
-#define ENC_WINDOWS_1252 0x0000003A
-#define ENC_WINDOWS_1251 0x0000003C
-#define ENC_CP855 0x0000003E
-#define ENC_CP866 0x00000040
-#define ENC_ISO_646_BASIC 0x00000042
-#define ENC_BCD_DIGITS_0_9 0x00000044 /* Packed BCD, digits 0-9 */
-#define ENC_KEYPAD_ABC_TBCD 0x00000046 /* Keypad-with-a/b/c "telephony BCD" = 0-9, *, #, a, b, c */
-#define ENC_KEYPAD_BC_TBCD 0x00000048 /* Keypad-with-B/C "telephony BCD" = 0-9, B, C, *, # */
+#define ENC_CHARENCODING_MASK 0x0000FFFE /* mask out byte-order bits and other bits used with string encodings */
+#define ENC_ASCII 0x00000000
+#define ENC_ISO_646_IRV ENC_ASCII /* ISO 646 International Reference Version = ASCII */
+#define ENC_UTF_8 0x00000002
+#define ENC_UTF_16 0x00000004
+#define ENC_UCS_2 0x00000006
+#define ENC_UCS_4 0x00000008
+#define ENC_ISO_8859_1 0x0000000A
+#define ENC_ISO_8859_2 0x0000000C
+#define ENC_ISO_8859_3 0x0000000E
+#define ENC_ISO_8859_4 0x00000010
+#define ENC_ISO_8859_5 0x00000012
+#define ENC_ISO_8859_6 0x00000014
+#define ENC_ISO_8859_7 0x00000016
+#define ENC_ISO_8859_8 0x00000018
+#define ENC_ISO_8859_9 0x0000001A
+#define ENC_ISO_8859_10 0x0000001C
+#define ENC_ISO_8859_11 0x0000001E
+/* #define ENC_ISO_8859_12 0x00000020 ISO 8859-12 was abandoned */
+#define ENC_ISO_8859_13 0x00000022
+#define ENC_ISO_8859_14 0x00000024
+#define ENC_ISO_8859_15 0x00000026
+#define ENC_ISO_8859_16 0x00000028
+#define ENC_WINDOWS_1250 0x0000002A
+#define ENC_3GPP_TS_23_038_7BITS_PACKED 0x0000002C
+#define ENC_3GPP_TS_23_038_7BITS ENC_3GPP_TS_23_038_7BITS_PACKED
+#define ENC_EBCDIC 0x0000002E
+#define ENC_MAC_ROMAN 0x00000030
+#define ENC_CP437 0x00000032
+#define ENC_ASCII_7BITS 0x00000034
+#define ENC_T61 0x00000036
+#define ENC_EBCDIC_CP037 0x00000038
+#define ENC_WINDOWS_1252 0x0000003A
+#define ENC_WINDOWS_1251 0x0000003C
+#define ENC_CP855 0x0000003E
+#define ENC_CP866 0x00000040
+#define ENC_ISO_646_BASIC 0x00000042
+#define ENC_BCD_DIGITS_0_9 0x00000044 /* Packed BCD, digits 0-9 */
+#define ENC_KEYPAD_ABC_TBCD 0x00000046 /* Keypad-with-a/b/c "telephony BCD" = 0-9, *, #, a, b, c */
+#define ENC_KEYPAD_BC_TBCD 0x00000048 /* Keypad-with-B/C "telephony BCD" = 0-9, B, C, *, # */
+#define ENC_3GPP_TS_23_038_7BITS_UNPACKED 0x0000004C
+#define ENC_ETSI_TS_102_221_ANNEX_A 0x0000004E /* ETSI TS 102 221 Annex A */
/*
* TODO:
*
@@ -3167,7 +3170,8 @@ proto_tree_add_float_bits_format_value(proto_tree *tree, const int hf_index, tvb
G_GNUC_PRINTF(7,8);
-/** Add a FT_STRING with ENC_3GPP_TS_23_038_7BITS encoding to a proto_tree.
+/** Add a FT_STRING with ENC_3GPP_TS_23_038_7BITS_PACKED encoding to a
+ proto_tree.
@param tree the tree to append this item to
@param hfindex field index
@param tvb the tv buffer of the current data
@@ -3175,7 +3179,7 @@ proto_tree_add_float_bits_format_value(proto_tree *tree, const int hf_index, tvb
@param no_of_chars number of 7bits characters to display
@return the newly created item */
WS_DLL_PUBLIC proto_item *
-proto_tree_add_ts_23_038_7bits_item(proto_tree *tree, const int hfindex, tvbuff_t *tvb,
+proto_tree_add_ts_23_038_7bits_packed_item(proto_tree *tree, const int hfindex, tvbuff_t *tvb,
const guint bit_offset, const gint no_of_chars);
/** Add a FT_STRING with ENC_ASCII_7BITS encoding to a proto_tree.
@@ -3525,7 +3529,7 @@ proto_custom_set(proto_tree* tree, GSList *field_id,
#define proto_tree_add_float_bits_format_value(tree, hfinfo, tvb, start, no_of_bits, value, format, ...) \
proto_tree_add_float_bits_format_value(tree, (hfinfo)->id, tvb, start, no_of_bits, value, format, __VA_ARGS__)
-#define proto_tree_add_ts_23_038_7bits_item(tree, hfinfo, tvb, start, no_of_chars) \
+#define proto_tree_add_ts_23_038_7bits_packed_item(tree, hfinfo, tvb, start, no_of_chars) \
proto_tree_add_ts_23_038_7bits(tree, (hfinfo)->id, tvb, start, no_of_chars)
#define proto_tree_add_ascii_7bits_item(tree, hfinfo, tvb, start, no_of_chars) \
diff --git a/epan/tvbuff.c b/epan/tvbuff.c
index 41e3cd0cbf..5ef52e054b 100644
--- a/epan/tvbuff.c
+++ b/epan/tvbuff.c
@@ -1606,7 +1606,8 @@ validate_single_byte_ascii_encoding(const guint encoding)
case ENC_UTF_16:
case ENC_UCS_2:
case ENC_UCS_4:
- case ENC_3GPP_TS_23_038_7BITS:
+ case ENC_3GPP_TS_23_038_7BITS_PACKED:
+ case ENC_ASCII_7BITS:
case ENC_EBCDIC:
REPORT_DISSECTOR_BUG("Invalid string encoding type passed to tvb_get_string_XXX");
break;
@@ -2698,7 +2699,7 @@ tvb_get_ucs_4_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset,
}
gchar *
-tvb_get_ts_23_038_7bits_string(wmem_allocator_t *scope, tvbuff_t *tvb,
+tvb_get_ts_23_038_7bits_string_packed(wmem_allocator_t *scope, tvbuff_t *tvb,
const gint bit_offset, gint no_of_chars)
{
gint in_offset = bit_offset >> 3; /* Current pointer to the input buffer */
@@ -2708,7 +2709,31 @@ tvb_get_ts_23_038_7bits_string(wmem_allocator_t *scope, tvbuff_t *tvb,
DISSECTOR_ASSERT(tvb && tvb->initialized);
ptr = ensure_contiguous(tvb, in_offset, length);
- return get_ts_23_038_7bits_string(scope, ptr, bit_offset, no_of_chars);
+ return get_ts_23_038_7bits_string_packed(scope, ptr, bit_offset, no_of_chars);
+}
+
+gchar *
+tvb_get_ts_23_038_7bits_string_unpacked(wmem_allocator_t *scope, tvbuff_t *tvb,
+ const gint offset, gint length)
+{
+ const guint8 *ptr;
+
+ DISSECTOR_ASSERT(tvb && tvb->initialized);
+
+ ptr = ensure_contiguous(tvb, offset, length);
+ return get_ts_23_038_7bits_string_unpacked(scope, ptr, length);
+}
+
+gchar *
+tvb_get_etsi_ts_102_221_annex_a_string(wmem_allocator_t *scope, tvbuff_t *tvb,
+ const gint offset, gint length)
+{
+ const guint8 *ptr;
+
+ DISSECTOR_ASSERT(tvb && tvb->initialized);
+
+ ptr = ensure_contiguous(tvb, offset, length);
+ return get_etsi_ts_102_221_annex_a_string(scope, ptr, length);
}
gchar *
@@ -2933,11 +2958,11 @@ tvb_get_string_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset,
strptr = tvb_get_iso_646_string(scope, tvb, offset, length, charset_table_iso_646_basic);
break;
- case ENC_3GPP_TS_23_038_7BITS:
+ case ENC_3GPP_TS_23_038_7BITS_PACKED:
{
gint bit_offset = offset << 3;
gint no_of_chars = (length << 3) / 7;
- strptr = tvb_get_ts_23_038_7bits_string(scope, tvb, bit_offset, no_of_chars);
+ strptr = tvb_get_ts_23_038_7bits_string_packed(scope, tvb, bit_offset, no_of_chars);
}
break;
@@ -2991,6 +3016,14 @@ tvb_get_string_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset,
*/
strptr = tvb_get_bcd_string(scope, tvb, offset, length, &Dgt_ansi_tbcd, FALSE);
break;
+
+ case ENC_3GPP_TS_23_038_7BITS_UNPACKED:
+ strptr = tvb_get_ts_23_038_7bits_string_unpacked(scope, tvb, offset, length);
+ break;
+
+ case ENC_ETSI_TS_102_221_ANNEX_A:
+ strptr = tvb_get_etsi_ts_102_221_annex_a_string(scope, tvb, offset, length);
+ break;
}
return strptr;
}
@@ -3340,7 +3373,9 @@ tvb_get_stringz_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, g
strptr = tvb_get_iso_646_stringz(scope, tvb, offset, lengthp, charset_table_iso_646_basic);
break;
- case ENC_3GPP_TS_23_038_7BITS:
+ case ENC_3GPP_TS_23_038_7BITS_PACKED:
+ case ENC_3GPP_TS_23_038_7BITS_UNPACKED:
+ case ENC_ETSI_TS_102_221_ANNEX_A:
REPORT_DISSECTOR_BUG("TS 23.038 7bits has no null character and doesn't support null-terminated strings");
break;
diff --git a/epan/tvbuff.h b/epan/tvbuff.h
index 388625370f..cd2f5e1bc8 100644
--- a/epan/tvbuff.h
+++ b/epan/tvbuff.h
@@ -594,7 +594,8 @@ WS_DLL_PUBLIC guint8 *tvb_get_string_enc(wmem_allocator_t *scope,
/**
* Given an allocator scope, a tvbuff, a bit offset, and a length in
* 7-bit characters (not octets!), with the specified offset and
- * length referring to a string in the 3GPP TS 23.038 7bits encoding:
+ * length referring to a string in the 3GPP TS 23.038 7bits encoding,
+ * with code points packed into 7 bits:
*
* allocate a buffer using the specified scope;
*
@@ -611,11 +612,58 @@ WS_DLL_PUBLIC guint8 *tvb_get_string_enc(wmem_allocator_t *scope,
* the memory allocated. Otherwise memory is automatically freed when the
* scope lifetime is reached.
*/
-WS_DLL_PUBLIC gchar *tvb_get_ts_23_038_7bits_string(wmem_allocator_t *scope,
+WS_DLL_PUBLIC gchar *tvb_get_ts_23_038_7bits_string_packed(wmem_allocator_t *scope,
tvbuff_t *tvb, const gint bit_offset, gint no_of_chars);
/**
- * Given an allocator scope, a tvbuff, a bit offset, and a length in
+ * Given an allocator scope, a tvbuff, an offset, and a length in
+ * octets with the specified offset and length referring to a string
+ * in the 3GPP TS 23.038 7bits encoding, with one octet per code poiint
+ * (the 8th bit of each octet should be 0; if not, the octet is invalid):
+ *
+ * allocate a buffer using the specified scope;
+ *
+ * convert the string from the specified encoding to UTF-8, possibly
+ * mapping some characters or invalid octet sequences to the Unicode
+ * REPLACEMENT CHARACTER, and put the resulting UTF-8 string, plus a
+ * trailing '\0', into that buffer;
+ *
+ * and return a pointer to the buffer.
+ *
+ * Throws an exception if the tvbuff ends before the string does.
+ *
+ * If scope is set to NULL it is the user's responsibility to wmem_free()
+ * the memory allocated. Otherwise memory is automatically freed when the
+ * scope lifetime is reached.
+ */
+WS_DLL_PUBLIC gchar *tvb_get_ts_23_038_7bits_string_unpacked(wmem_allocator_t *scope,
+ tvbuff_t *tvb, const gint offset, gint length);
+
+/**
+ * Given an allocator scope, a tvbuff, an offset, and a length in
+ * octets with the specified offset and length referring to a string
+ * in the ETSI TS 102 221 Annex A encodings; if not:
+ *
+ * allocate a buffer using the specified scope;
+ *
+ * convert the string from the specified encoding to UTF-8, possibly
+ * mapping some characters or invalid octet sequences to the Unicode
+ * REPLACEMENT CHARACTER, and put the resulting UTF-8 string, plus a
+ * trailing '\0', into that buffer;
+ *
+ * and return a pointer to the buffer.
+ *
+ * Throws an exception if the tvbuff ends before the string does.
+ *
+ * If scope is set to NULL it is the user's responsibility to wmem_free()
+ * the memory allocated. Otherwise memory is automatically freed when the
+ * scope lifetime is reached.
+ */
+WS_DLL_PUBLIC gchar *tvb_get_etsi_ts_102_221_annex_a_string(wmem_allocator_t *scope,
+ tvbuff_t *tvb, const gint offset, gint length);
+
+/**
+ * Given an allocator scope, a tvbuff, an offset, and a length in
* 7-bit characters (not octets!), with the specified offset and
* length referring to a string in the ASCII 7bits encoding:
*