diff options
author | Guy Harris <gharris@sonic.net> | 2020-08-19 23:58:20 -0700 |
---|---|---|
committer | Guy Harris <gharris@sonic.net> | 2020-08-20 07:24:32 +0000 |
commit | 35418a73f7c9cefebe392b1ea0f012fccaf89801 (patch) | |
tree | f6ce6ac12a3fcc952b484455468790b8a79cd7e8 /epan/strutil.c | |
parent | 06ff18fbb45fbf37feb3a3fcf0e22eebfa979079 (diff) |
Add format_text_string(), which gets the length with strlen().
format_text(alloc, string, strlen(string)) is a common idiom; provide
format_text_string(), which does the strlen(string) for you. (Any
string used in a %s to set the text of a protocol tree item, if it was
directly extracted from the packet, should be run through a format_text
routine, to ensure that it's valid UTF-8 and that control characters are
handled correctly.)
Update comments while we're at it.
Change-Id: Ia8549efa1c96510ffce97178ed4ff7be4b02eb6e
Reviewed-on: https://code.wireshark.org/review/38202
Petri-Dish: Guy Harris <gharris@sonic.net>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <gharris@sonic.net>
Diffstat (limited to 'epan/strutil.c')
-rw-r--r-- | epan/strutil.c | 33 |
1 files changed, 29 insertions, 4 deletions
diff --git a/epan/strutil.c b/epan/strutil.c index 2e9b47d69b..2565c5fa87 100644 --- a/epan/strutil.c +++ b/epan/strutil.c @@ -193,10 +193,11 @@ get_token_len(const guchar *linep, const guchar *lineend, #define UNPOOP 0x1F4A9 /* - * Given a string, expected to be in UTF-8 but possibly containing - * invalid sequences (as it may have come from packet data), generate - * a valid UTF-8 string from it, allocated with the specified wmem - * allocator, that: + * Given a wmem scope, a not-necessarily-null-terminated string, + * expected to be in UTF-8 but possibly containing invalid sequences + * (as it may have come from packet data), and the length of the string, + * generate a valid UTF-8 string from it, allocated in the specified + * wmem scope, that: * * shows printable Unicode characters as themselves; * @@ -486,6 +487,30 @@ format_text(wmem_allocator_t* allocator, const guchar *string, size_t len) return fmtbuf; } +/** Given a wmem scope and a null-terminated string, expected to be in + * UTF-8 but possibly containing invalid sequences (as it may have come + * from packet data), and the length of the string, generate a valid + * UTF-8 string from it, allocated in the specified wmem scope, that: + * + * shows printable Unicode characters as themselves; + * + * shows non-printable ASCII characters as C-style escapes (octal + * if not one of the standard ones such as LF -> '\n'); + * + * shows non-printable Unicode-but-not-ASCII characters as + * their universal character names; + * + * shows illegal UTF-8 sequences as a sequence of bytes represented + * as C-style hex escapes; + * + * and return a pointer to it. + */ +gchar * +format_text_string(wmem_allocator_t* allocator, const guchar *string) +{ + return format_text(allocator, string, strlen(string)); +} + /* * Given a string, generate a string from it that shows non-printable * characters as C-style escapes except a whitespace character |