diff options
author | Guy Harris <guy@alum.mit.edu> | 2012-05-15 05:42:58 +0000 |
---|---|---|
committer | Guy Harris <guy@alum.mit.edu> | 2012-05-15 05:42:58 +0000 |
commit | 1c2a67f3483c126d378db56c78284d166825ffdc (patch) | |
tree | b7193e1547c3ace2fb30a8db0a9d0e189943202b /epan | |
parent | 1ec0629c1bf276c851c920c7c1af4e1eacaba6f8 (diff) |
Make the encoding argument uniformly guint rather than gint.
Put the code that's currently common to the ENC_ASCII and ENC_UTF_8
cases in tvb_get_ephemeral_string_enc() into tvb_get_ephemeral_string(),
and call tvb_get_ephemeral_string() in those cases. Skip the
tvb_ensure_bytes_exist() and ensure_contiguous() calls in the ENC_UTF_16
and ENC_UCS_2 cases, as they're unnecessary there.
Update the comment for tvb_get_ephemeral_string_enc().
Make tvb_get_ephemeral_stringz_enc() handle the encodings that
tvb_get_ephemeral_string_enc() does.
svn path=/trunk/; revision=42627
Diffstat (limited to 'epan')
-rw-r--r-- | epan/tvbuff.c | 122 | ||||
-rw-r--r-- | epan/tvbuff.h | 6 |
2 files changed, 95 insertions, 33 deletions
diff --git a/epan/tvbuff.c b/epan/tvbuff.c index 7c4dc08bf5..7fe63c1cc1 100644 --- a/epan/tvbuff.c +++ b/epan/tvbuff.c @@ -2379,9 +2379,9 @@ tvb_get_unicode_string(tvbuff_t *tvb, const gint offset, gint length, const guin /* * Given a tvbuff, an offset, a length, and an encoding, allocate a * buffer big enough to hold a non-null-terminated string of that length - * at that offset, plus a trailing '\0', copy the string into it, and - * return a pointer to the string; if the encoding is EBCDIC, map - * the string from EBCDIC to ASCII. + * at that offset, plus a trailing '\0', copy into the buffer the + * string as converted from the appropriate encoding to UTF-8, and + * return a pointer to the string. * * Throws an exception if the tvbuff ends before the string does. * @@ -2393,14 +2393,11 @@ tvb_get_unicode_string(tvbuff_t *tvb, const gint offset, gint length, const guin */ guint8 * tvb_get_ephemeral_string_enc(tvbuff_t *tvb, const gint offset, - const gint length, const gint encoding) + const gint length, const guint encoding) { const guint8 *ptr; - guint8 *strbuf = NULL; - - tvb_ensure_bytes_exist(tvb, offset, length); + guint8 *strbuf; - ptr = ensure_contiguous(tvb, offset, length); switch (encoding & ENC_CHARENCODING_MASK) { case ENC_ASCII: @@ -2417,10 +2414,7 @@ tvb_get_ephemeral_string_enc(tvbuff_t *tvb, const gint offset, * XXX - should map all octets with the 8th bit * not set to a "substitute" UTF-8 character. */ - strbuf = ep_alloc(length + 1); - if (length != 0) { - memcpy(strbuf, ptr, length); - } + strbuf = tvb_get_ephemeral_string(tvb, offset, length); break; case ENC_UTF_8: @@ -2428,10 +2422,7 @@ tvb_get_ephemeral_string_enc(tvbuff_t *tvb, const gint offset, * XXX - should map all invalid UTF-8 sequences * to a "substitute" UTF-8 character. */ - strbuf = ep_alloc(length + 1); - if (length != 0) { - memcpy(strbuf, ptr, length); - } + strbuf = tvb_get_ephemeral_string(tvb, offset, length); break; case ENC_UTF_16: @@ -2461,29 +2452,36 @@ tvb_get_ephemeral_string_enc(tvbuff_t *tvb, const gint offset, * * XXX - multiple "dialects" of EBCDIC? */ + tvb_ensure_bytes_exist(tvb, offset, length); /* make sure length = -1 fails */ strbuf = ep_alloc(length + 1); if (length != 0) { + ptr = ensure_contiguous(tvb, offset, length); memcpy(strbuf, ptr, length); + EBCDIC_to_ASCII(strbuf, length); } - EBCDIC_to_ASCII(strbuf, length); + strbuf[length] = '\0'; break; } - - strbuf[length] = '\0'; return strbuf; } guint8 * tvb_get_ephemeral_string(tvbuff_t *tvb, const gint offset, const gint length) { - return tvb_get_ephemeral_string_enc(tvb, offset, length, ENC_UTF_8|ENC_NA); + guint8 *strbuf; + + tvb_ensure_bytes_exist(tvb, offset, length); /* make sure length = -1 fails */ + strbuf = ep_alloc(length + 1); + tvb_memcpy(tvb, strbuf, offset, length); + strbuf[length] = '\0'; + return strbuf; } /* * Unicode (UTF-16) version of tvb_get_ephemeral_string() * XXX - this is UCS-2, not UTF-16, as it doesn't handle surrogate pairs * - * Encoding paramter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN + * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN * * Specify length in bytes * @@ -2567,7 +2565,7 @@ tvb_get_seasonal_string(tvbuff_t *tvb, const gint offset, const gint length) * string (including the terminating null) through a pointer. */ guint8 * -tvb_get_stringz_enc(tvbuff_t *tvb, const gint offset, gint *lengthp, gint encoding) +tvb_get_stringz_enc(tvbuff_t *tvb, const gint offset, gint *lengthp, const guint encoding) { guint size; guint8 *strptr; @@ -2629,25 +2627,89 @@ tvb_get_const_stringz(tvbuff_t *tvb, const gint offset, gint *lengthp) * after the current packet has been dissected. */ guint8 * -tvb_get_ephemeral_stringz_enc(tvbuff_t *tvb, const gint offset, gint *lengthp, gint encoding) +tvb_get_ephemeral_stringz_enc(tvbuff_t *tvb, const gint offset, gint *lengthp, const guint encoding) { guint size; guint8 *strptr; - size = tvb_strsize(tvb, offset); - strptr = ep_alloc(size); - tvb_memcpy(tvb, strptr, offset, size); - if ((encoding & ENC_CHARENCODING_MASK) == ENC_EBCDIC) + switch (encoding & ENC_CHARENCODING_MASK) { + + case ENC_ASCII: + default: + /* + * For now, we treat bogus values as meaning + * "ASCII" rather than reporting an error, + * for the benefit of old dissectors written + * when the last argument to proto_tree_add_item() + * was a gboolean for the byte order, not an + * encoding value, and passed non-zero values + * other than TRUE to mean "little-endian". + * + * XXX - should map all octets with the 8th bit + * not set to a "substitute" UTF-8 character. + */ + strptr = tvb_get_ephemeral_stringz(tvb, offset, lengthp); + break; + + case ENC_UTF_8: + /* + * XXX - should map all invalid UTF-8 sequences + * to a "substitute" UTF-8 character. + */ + strptr = tvb_get_ephemeral_stringz(tvb, offset, lengthp); + break; + + case ENC_UTF_16: + /* + * XXX - needs to handle surrogate pairs and to map + * invalid characters and sequences to a "substitute" + * UTF-8 character. + */ + strptr = tvb_get_ephemeral_unicode_stringz(tvb, offset, lengthp, + encoding & ENC_LITTLE_ENDIAN); + break; + + case ENC_UCS_2: + /* + * XXX - needs to map values that are not valid UCS-2 + * characters (such as, I think, values used as the + * components of a UTF-16 surrogate pair) to a + * "substitute" UTF-8 character. + */ + strptr = tvb_get_ephemeral_unicode_stringz(tvb, offset, lengthp, + encoding & ENC_LITTLE_ENDIAN); + break; + + case ENC_EBCDIC: + /* + * XXX - do the copy and conversion in one pass. + * + * XXX - multiple "dialects" of EBCDIC? + */ + size = tvb_strsize(tvb, offset); + strptr = ep_alloc(size); + tvb_memcpy(tvb, strptr, offset, size); EBCDIC_to_ASCII(strptr, size); - if (lengthp) - *lengthp = size; + if (lengthp) + *lengthp = size; + break; + } + return strptr; } guint8 * tvb_get_ephemeral_stringz(tvbuff_t *tvb, const gint offset, gint *lengthp) { - return tvb_get_ephemeral_stringz_enc(tvb, offset, lengthp, ENC_UTF_8|ENC_NA); + guint size; + guint8 *strptr; + + size = tvb_strsize(tvb, offset); + strptr = ep_alloc(size); + tvb_memcpy(tvb, strptr, offset, size); + if (lengthp) + *lengthp = size; + return strptr; } /* diff --git a/epan/tvbuff.h b/epan/tvbuff.h index d37f62b15f..fe47945399 100644 --- a/epan/tvbuff.h +++ b/epan/tvbuff.h @@ -482,7 +482,7 @@ extern guint8 *tvb_get_string(tvbuff_t *tvb, const gint offset, const gint lengt extern gchar *tvb_get_unicode_string(tvbuff_t *tvb, const gint offset, gint length, const guint encoding); extern guint8 *tvb_get_ephemeral_string(tvbuff_t *tvb, const gint offset, const gint length); extern guint8 *tvb_get_ephemeral_string_enc(tvbuff_t *tvb, const gint offset, - const gint length, const gint encoding); + const gint length, const guint encoding); extern gchar *tvb_get_ephemeral_unicode_string(tvbuff_t *tvb, const gint offset, gint length, const guint encoding); extern guint8 *tvb_get_seasonal_string(tvbuff_t *tvb, const gint offset, const gint length); @@ -523,10 +523,10 @@ extern guint8 *tvb_get_seasonal_string(tvbuff_t *tvb, const gint offset, const g * or file is opened. */ extern guint8 *tvb_get_stringz(tvbuff_t *tvb, const gint offset, gint *lengthp); -extern guint8 *tvb_get_stringz_enc(tvbuff_t *tvb, const gint offset, gint *lengthp, gint encoding); +extern guint8 *tvb_get_stringz_enc(tvbuff_t *tvb, const gint offset, gint *lengthp, const guint encoding); extern const guint8 *tvb_get_const_stringz(tvbuff_t *tvb, const gint offset, gint *lengthp); extern guint8 *tvb_get_ephemeral_stringz(tvbuff_t *tvb, const gint offset, gint *lengthp); -extern guint8 *tvb_get_ephemeral_stringz_enc(tvbuff_t *tvb, const gint offset, gint *lengthp, gint encoding); +extern guint8 *tvb_get_ephemeral_stringz_enc(tvbuff_t *tvb, const gint offset, gint *lengthp, const guint encoding); extern gchar *tvb_get_ephemeral_unicode_stringz(tvbuff_t *tvb, const gint offset, gint *lengthp, const guint encoding); extern guint8 *tvb_get_seasonal_stringz(tvbuff_t *tvb, const gint offset, gint *lengthp); |