From 4bd8336017373bef48ee18c20829a06734ffad3b Mon Sep 17 00:00:00 2001 From: Jakub Zawadzki Date: Thu, 24 Apr 2014 21:56:43 +0200 Subject: Move GSM guint8 to unicode conversion functions to charsets.c charsets.c is already place with huge number of conversion tables. Also make gsm_default_alphabet gunichar2, all values fits in 2 bytes. Change-Id: Ia5ab6c176b4fec21ec76b06513c1d00794ba10ef Reviewed-on: https://code.wireshark.org/review/1328 Reviewed-by: Anders Broman --- epan/charsets.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- epan/charsets.h | 6 ++++++ epan/tvbuff.c | 60 ++---------------------------------------------------- 3 files changed, 68 insertions(+), 61 deletions(-) diff --git a/epan/charsets.c b/epan/charsets.c index 9774404c0b..de2a842e64 100644 --- a/epan/charsets.c +++ b/epan/charsets.c @@ -26,6 +26,9 @@ #include "charsets.h" +/* REPLACEMENT CHARACTER */ +#define UNREPL 0xFFFD + /* * Wikipedia's "Character encoding" template, giving a pile of character encodings and * Wikipedia pages for them: @@ -166,15 +169,68 @@ EBCDIC_to_ASCII1(guint8 c) return EBCDIC_translate_ASCII[c]; } +/* + * FROM GNOKII + * gsm-encoding.c + * gsm-sms.c + */ + +/* ETSI GSM 03.38, version 6.0.1, section 6.2.1; Default alphabet */ +static const gunichar2 gsm_default_alphabet[0x80] = { + '@', 0xa3, '$', 0xa5, 0xe8, 0xe9, 0xf9, 0xec, + 0xf2, 0xc7, '\n', 0xd8, 0xf8, '\r', 0xc5, 0xe5, + 0x394, '_', 0x3a6, 0x393, 0x39b, 0x3a9, 0x3a0, 0x3a8, + 0x3a3, 0x398, 0x39e, 0xa0, 0xc6, 0xe6, 0xdf, 0xc9, + ' ', '!', '\"', '#', 0xa4, '%', '&', '\'', + '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', ':', ';', '<', '=', '>', '?', + 0xa1, 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', 0xc4, 0xd6, 0xd1, 0xdc, 0xa7, + 0xbf, 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', 0xe4, 0xf6, 0xf1, 0xfc, 0xe0 +}; + +gunichar +GSMext_to_UNICHAR(guint8 c) +{ + switch (c) + { + case 0x0a: return 0x0c; /* form feed */ + case 0x14: return '^'; + case 0x28: return '{'; + case 0x29: return '}'; + case 0x2f: return '\\'; + case 0x3c: return '['; + case 0x3d: return '~'; + case 0x3e: return ']'; + case 0x40: return '|'; + case 0x65: return 0x20ac; /* euro */ + } + + return UNREPL; /* invalid character */ +} + +gunichar +GSM_to_UNICHAR(guint8 c) +{ + if (c < G_N_ELEMENTS(gsm_default_alphabet)) + return gsm_default_alphabet[c]; + + return UNREPL; +} + + /* * Translation tables that map the upper 128 code points in single-byte * "extended ASCII" character encodings to Unicode code points in the * Basic Multilingual Plane. */ -/* REPLACEMENT CHARACTER */ -#define UNREPL 0xFFFD - /* ISO-8859-2 (http://en.wikipedia.org/wiki/ISO/IEC_8859-2#Code_page_layout) */ const gunichar2 charset_table_iso_8859_2[0x80] = { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, /* 0x80 - */ @@ -516,6 +572,7 @@ const gunichar2 charset_table_cp437[0x80] = { 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0, /* - 0xFF */ }; + /* * Editor modelines - http://www.wireshark.org/tools/modelines.html * diff --git a/epan/charsets.h b/epan/charsets.h index 8b6f752b8b..40ec9d9034 100644 --- a/epan/charsets.h +++ b/epan/charsets.h @@ -37,6 +37,12 @@ void EBCDIC_to_ASCII(guint8 *buf, guint bytes); WS_DLL_PUBLIC guint8 EBCDIC_to_ASCII1(guint8 c); +WS_DLL_PUBLIC gunichar +GSM_to_UNICHAR(guint8 c); + +WS_DLL_PUBLIC gunichar +GSMext_to_UNICHAR(guint8 c); + /* * Translation tables that map the upper 128 code points in single-byte * "extended ASCII" character encodings to Unicode code points in the diff --git a/epan/tvbuff.c b/epan/tvbuff.c index df0c2878aa..fac825576b 100644 --- a/epan/tvbuff.c +++ b/epan/tvbuff.c @@ -2500,70 +2500,14 @@ tvb_get_ucs_4_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, */ #define GN_BYTE_MASK ((1 << bits) - 1) -#define GN_CHAR_ALPHABET_SIZE 128 - #define GN_CHAR_ESCAPE 0x1b -static const gunichar gsm_default_alphabet[GN_CHAR_ALPHABET_SIZE] = { - - /* ETSI GSM 03.38, version 6.0.1, section 6.2.1; Default alphabet */ - - '@', 0xa3, '$', 0xa5, 0xe8, 0xe9, 0xf9, 0xec, - 0xf2, 0xc7, '\n', 0xd8, 0xf8, '\r', 0xc5, 0xe5, - 0x394, '_', 0x3a6, 0x393, 0x39b, 0x3a9, 0x3a0, 0x3a8, - 0x3a3, 0x398, 0x39e, 0xa0, 0xc6, 0xe6, 0xdf, 0xc9, - ' ', '!', '\"', '#', 0xa4, '%', '&', '\'', - '(', ')', '*', '+', ',', '-', '.', '/', - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', ':', ';', '<', '=', '>', '?', - 0xa1, 'A', 'B', 'C', 'D', 'E', 'F', 'G', - 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', - 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', - 'X', 'Y', 'Z', 0xc4, 0xd6, 0xd1, 0xdc, 0xa7, - 0xbf, 'a', 'b', 'c', 'd', 'e', 'f', 'g', - 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', - 'x', 'y', 'z', 0xe4, 0xf6, 0xf1, 0xfc, 0xe0 -}; - static gboolean char_is_escape(unsigned char value) { return (value == GN_CHAR_ESCAPE); } -static gunichar -char_def_alphabet_ext_decode(unsigned char value) -{ - switch (value) - { - case 0x0a: return 0x0c; /* form feed */ - case 0x14: return '^'; - case 0x28: return '{'; - case 0x29: return '}'; - case 0x2f: return '\\'; - case 0x3c: return '['; - case 0x3d: return '~'; - case 0x3e: return ']'; - case 0x40: return '|'; - case 0x65: return 0x20ac; /* euro */ - default: return UNREPL; /* invalid character */ - } -} - -static gunichar -char_def_alphabet_decode(unsigned char value) -{ - if (value < GN_CHAR_ALPHABET_SIZE) - { - return gsm_default_alphabet[value]; - } - else - { - return UNREPL; - } -} - static gboolean handle_ts_23_038_char(wmem_strbuf_t *strbuf, guint8 code_point, gboolean saw_escape) @@ -2584,9 +2528,9 @@ handle_ts_23_038_char(wmem_strbuf_t *strbuf, guint8 code_point, */ if (saw_escape) { saw_escape = FALSE; - uchar = char_def_alphabet_ext_decode(code_point); + uchar = GSMext_to_UNICHAR(code_point); } else { - uchar = char_def_alphabet_decode(code_point); + uchar = GSM_to_UNICHAR(code_point); } wmem_strbuf_append_unichar(strbuf, uchar); } -- cgit v1.2.3