diff options
-rw-r--r-- | epan/charsets.c | 2 | ||||
-rw-r--r-- | epan/proto.h | 6 | ||||
-rw-r--r-- | epan/tvbuff.c | 58 |
3 files changed, 63 insertions, 3 deletions
diff --git a/epan/charsets.c b/epan/charsets.c index 800b8705aa..dd13ca1217 100644 --- a/epan/charsets.c +++ b/epan/charsets.c @@ -178,6 +178,7 @@ EBCDIC_to_ASCII1(guint8 c) #define UNREPL 0xFFFD /* ISO-8859-2 */ +/* http://en.wikipedia.org/wiki/ISO/IEC_8859-2#Code_page_layout */ const gunichar2 charset_table_iso_8859_2[0x80] = { UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, /* 0x80 - */ UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, /* - 0x8F */ @@ -198,6 +199,7 @@ const gunichar2 charset_table_iso_8859_2[0x80] = { }; /* Windows-1250 */ +/* http://en.wikipedia.org/wiki/Windows-1250 */ const gunichar2 charset_table_cp1250[0x80] = { 0x20ac, UNREPL, 0x201a, UNREPL, 0x201e, 0x2026, 0x2020, 0x2021, /* 0x80 - */ UNREPL, 0x2030, 0x0160, 0x2039, 0x015a, 0x0164, 0x017d, 0x0179, /* - 0x8F */ diff --git a/epan/proto.h b/epan/proto.h index e5c8af62fe..fa0a365345 100644 --- a/epan/proto.h +++ b/epan/proto.h @@ -284,8 +284,9 @@ WS_DLL_PUBLIC WS_MSVC_NORETURN void proto_report_dissector_bug(const char *messa #define ENC_UTF_16 0x00000004 #define ENC_UCS_2 0x00000006 #define ENC_EBCDIC 0x00000008 -#define ENC_WINDOWS_1250 0x0000000A /* http://en.wikipedia.org/wiki/Windows-1250 */ -#define ENC_ISO_8859_2 0x0000000C /* http://en.wikipedia.org/wiki/ISO/IEC_8859-2#Code_page_layout */ +#define ENC_WINDOWS_1250 0x0000000A +#define ENC_ISO_8859_1 0x0000000C +#define ENC_ISO_8859_2 0x0000000E /* * TODO: @@ -293,7 +294,6 @@ WS_DLL_PUBLIC WS_MSVC_NORETURN void proto_report_dissector_bug(const char *messa * These could probably be used by existing code: * * ENC_UCS_4 - UCS-4 - * ENC_ISO_8859_1 - ISO 8859/1 * ENC_ISO_8859_8 - ISO 8859/8 * - "IBM MS DBCS" * - JIS C 6226 diff --git a/epan/tvbuff.c b/epan/tvbuff.c index c6d5c8c6fa..70a0c2150e 100644 --- a/epan/tvbuff.c +++ b/epan/tvbuff.c @@ -1808,6 +1808,35 @@ tvb_get_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, const } static guint8 * +tvb_get_string_8859_1(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint length) +{ + wmem_strbuf_t *str; + + str = wmem_strbuf_new(scope, ""); + + while (length > 0) { + guint8 ch = tvb_get_guint8(tvb, offset); + + if (ch < 0x80) + wmem_strbuf_append_c(str, ch); + else { + /* + * Note: we assume here that the code points + * 0x80-0x9F are used for C1 control characters, + * and thus have the same value as the corresponding + * Unicode code points. + */ + wmem_strbuf_append_unichar(str, ch); + } + offset++; + length--; + } + + /* XXX, discarding constiness, should we have some function which "take-over" strbuf->str (like when strbuf is no longer needed) */ + return (guint8 *) wmem_strbuf_get_str(str); +} + +static guint8 * tvb_get_string_unichar2(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint length, const gunichar2 table[0x80]) { wmem_strbuf_t *str; @@ -1911,6 +1940,15 @@ tvb_get_string_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_cp1250); break; + case ENC_ISO_8859_1: + /* + * ISO 8859-1 printable code point values are equal + * to the equivalent Unicode code point value, so + * no translation table is needed. + */ + strbuf = tvb_get_string_8859_1(scope, tvb, offset, length); + break; + case ENC_ISO_8859_2: strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_2); break; @@ -1991,6 +2029,17 @@ tvb_get_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint } static guint8 * +tvb_get_stringz_8859_1(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint *lengthp) +{ + guint size; + + /* XXX, convertion between signed/unsigned integer */ + *lengthp = size = tvb_strsize(tvb, offset); + + return tvb_get_string_8859_1(scope, tvb, offset, size); +} + +static guint8 * tvb_get_stringz_unichar2(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint *lengthp, const gunichar2 table[0x80]) { guint size; @@ -2030,6 +2079,15 @@ tvb_get_stringz_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, g strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_cp1250); break; + case ENC_ISO_8859_1: + /* + * ISO 8859-1 printable code point values are equal + * to the equivalent Unicode code point value, so + * no translation table is needed. + */ + strptr = tvb_get_stringz_8859_1(scope, tvb, offset, lengthp); + break; + case ENC_ISO_8859_2: strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_2); break; |