aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuy Harris <guy@alum.mit.edu>2013-12-08 01:05:35 +0000
committerGuy Harris <guy@alum.mit.edu>2013-12-08 01:05:35 +0000
commit562348fbb823bd8ea67e62e45f83db86904b01cf (patch)
tree720f3fccf07fcd9af7124afc87acb1c302b13205
parent0f05597ab17ea7fc5161458c670f56a523cb9c42 (diff)
Add ENC_ISO_8859_1.
Move the Wikipedia links for the code page layouts in front of the tables whose contents reflect the code page layouts. svn path=/trunk/; revision=53837
-rw-r--r--epan/charsets.c2
-rw-r--r--epan/proto.h6
-rw-r--r--epan/tvbuff.c58
3 files changed, 63 insertions, 3 deletions
diff --git a/epan/charsets.c b/epan/charsets.c
index 800b8705aa..dd13ca1217 100644
--- a/epan/charsets.c
+++ b/epan/charsets.c
@@ -178,6 +178,7 @@ EBCDIC_to_ASCII1(guint8 c)
#define UNREPL 0xFFFD
/* ISO-8859-2 */
+/* http://en.wikipedia.org/wiki/ISO/IEC_8859-2#Code_page_layout */
const gunichar2 charset_table_iso_8859_2[0x80] = {
UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, /* 0x80 - */
UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, /* - 0x8F */
@@ -198,6 +199,7 @@ const gunichar2 charset_table_iso_8859_2[0x80] = {
};
/* Windows-1250 */
+/* http://en.wikipedia.org/wiki/Windows-1250 */
const gunichar2 charset_table_cp1250[0x80] = {
0x20ac, UNREPL, 0x201a, UNREPL, 0x201e, 0x2026, 0x2020, 0x2021, /* 0x80 - */
UNREPL, 0x2030, 0x0160, 0x2039, 0x015a, 0x0164, 0x017d, 0x0179, /* - 0x8F */
diff --git a/epan/proto.h b/epan/proto.h
index e5c8af62fe..fa0a365345 100644
--- a/epan/proto.h
+++ b/epan/proto.h
@@ -284,8 +284,9 @@ WS_DLL_PUBLIC WS_MSVC_NORETURN void proto_report_dissector_bug(const char *messa
#define ENC_UTF_16 0x00000004
#define ENC_UCS_2 0x00000006
#define ENC_EBCDIC 0x00000008
-#define ENC_WINDOWS_1250 0x0000000A /* http://en.wikipedia.org/wiki/Windows-1250 */
-#define ENC_ISO_8859_2 0x0000000C /* http://en.wikipedia.org/wiki/ISO/IEC_8859-2#Code_page_layout */
+#define ENC_WINDOWS_1250 0x0000000A
+#define ENC_ISO_8859_1 0x0000000C
+#define ENC_ISO_8859_2 0x0000000E
/*
* TODO:
@@ -293,7 +294,6 @@ WS_DLL_PUBLIC WS_MSVC_NORETURN void proto_report_dissector_bug(const char *messa
* These could probably be used by existing code:
*
* ENC_UCS_4 - UCS-4
- * ENC_ISO_8859_1 - ISO 8859/1
* ENC_ISO_8859_8 - ISO 8859/8
* - "IBM MS DBCS"
* - JIS C 6226
diff --git a/epan/tvbuff.c b/epan/tvbuff.c
index c6d5c8c6fa..70a0c2150e 100644
--- a/epan/tvbuff.c
+++ b/epan/tvbuff.c
@@ -1808,6 +1808,35 @@ tvb_get_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, const
}
static guint8 *
+tvb_get_string_8859_1(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint length)
+{
+ wmem_strbuf_t *str;
+
+ str = wmem_strbuf_new(scope, "");
+
+ while (length > 0) {
+ guint8 ch = tvb_get_guint8(tvb, offset);
+
+ if (ch < 0x80)
+ wmem_strbuf_append_c(str, ch);
+ else {
+ /*
+ * Note: we assume here that the code points
+ * 0x80-0x9F are used for C1 control characters,
+ * and thus have the same value as the corresponding
+ * Unicode code points.
+ */
+ wmem_strbuf_append_unichar(str, ch);
+ }
+ offset++;
+ length--;
+ }
+
+ /* XXX, discarding constiness, should we have some function which "take-over" strbuf->str (like when strbuf is no longer needed) */
+ return (guint8 *) wmem_strbuf_get_str(str);
+}
+
+static guint8 *
tvb_get_string_unichar2(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint length, const gunichar2 table[0x80])
{
wmem_strbuf_t *str;
@@ -1911,6 +1940,15 @@ tvb_get_string_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset,
strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_cp1250);
break;
+ case ENC_ISO_8859_1:
+ /*
+ * ISO 8859-1 printable code point values are equal
+ * to the equivalent Unicode code point value, so
+ * no translation table is needed.
+ */
+ strbuf = tvb_get_string_8859_1(scope, tvb, offset, length);
+ break;
+
case ENC_ISO_8859_2:
strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_2);
break;
@@ -1991,6 +2029,17 @@ tvb_get_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint
}
static guint8 *
+tvb_get_stringz_8859_1(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint *lengthp)
+{
+ guint size;
+
+ /* XXX, convertion between signed/unsigned integer */
+ *lengthp = size = tvb_strsize(tvb, offset);
+
+ return tvb_get_string_8859_1(scope, tvb, offset, size);
+}
+
+static guint8 *
tvb_get_stringz_unichar2(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint *lengthp, const gunichar2 table[0x80])
{
guint size;
@@ -2030,6 +2079,15 @@ tvb_get_stringz_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, g
strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_cp1250);
break;
+ case ENC_ISO_8859_1:
+ /*
+ * ISO 8859-1 printable code point values are equal
+ * to the equivalent Unicode code point value, so
+ * no translation table is needed.
+ */
+ strptr = tvb_get_stringz_8859_1(scope, tvb, offset, lengthp);
+ break;
+
case ENC_ISO_8859_2:
strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_2);
break;