diff options
author | Aurelien Aptel <aaptel@suse.com> | 2019-07-05 16:08:18 +0200 |
---|---|---|
committer | Peter Wu <peter@lekensteyn.nl> | 2019-07-15 21:00:14 +0000 |
commit | 0db39ae59aaefc13a38ec4e7728da44a647b1a10 (patch) | |
tree | e82f234b0928c832d23bd6fe128958611131c060 /epan/tvbuff_lz77.c | |
parent | 1a91aac9747ec3d937dd20ec962c86a149248876 (diff) |
smb2: add support for decompression
The latest iteration of Microsoft updates to SMB3 added compression to
the protocol. This commit implements decompressing and dissecting
compressed payloads.
The compression algorithms that can be used are "Plain LZ77",
"LZ77+Huffman" and "LZNT1" which you can read more about in the
[MS-XCA] documentation. This set of algorithm is sometimes referred to
as XPRESS.
This commit reuses the existing uncompression API scheme already in
place with zlib and brotli and adds 3 tvb_uncompress_*() function
implemented in:
* epan/tvbuff_lz77.c
* epan/tvbuff_lz77huff.c
* epan/tvbuff_lznt1.c
A new function wmem_array_try_index() was added to the wmem_array API
to make bound checked reads that fail gracefully. New tests for it
have been added as well.
Since both reads (tvb) and writes (wmem_array) are bound checked the
risk for buffer overruns is drastically reduced. LZ77+Huffman has
decoding tables and special care was taken to bound check these.
Simplified versions of the implementations were succesfully tested
against AFL (American Fuzzy Lop) for ~150 millions executions each.
The SMB2/3 dissector was changed to deal with the new transform header
for compressed packets (new protocol_id value) and READ request
flags (COMPRESSED). Badly compressed or encrypted packets are now
reported as such, and the decryption test suite was changed to reflect
that.
This commit also adds a test capture with 1 packet compressed with
each algorithm as returned by Windows Server 2019, along with 3
matching tests in test/suite_dissection.py
Change-Id: I2b84f56541f2f4ee7d886152794b993987dd10e7
Reviewed-on: https://code.wireshark.org/review/33855
Petri-Dish: Anders Broman <a.broman58@gmail.com>
Tested-by: Petri Dish Buildbot
Reviewed-by: Peter Wu <peter@lekensteyn.nl>
Diffstat (limited to 'epan/tvbuff_lz77.c')
-rw-r--r-- | epan/tvbuff_lz77.c | 155 |
1 files changed, 155 insertions, 0 deletions
diff --git a/epan/tvbuff_lz77.c b/epan/tvbuff_lz77.c new file mode 100644 index 0000000000..082e58a133 --- /dev/null +++ b/epan/tvbuff_lz77.c @@ -0,0 +1,155 @@ +/* + * Decompression code for Plain LZ77. This encoding is used by + * Microsoft in various file formats and protocols including SMB3. + * + * Copyright (C) 2019 Aurélien Aptel + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <glib.h> +#include <epan/exceptions.h> +#include <epan/tvbuff.h> +#include <epan/wmem/wmem.h> + +#define MAX_INPUT_SIZE (16*1024*1024) /* 16MB */ + +static gboolean do_uncompress(tvbuff_t *tvb, int offset, int in_size, + wmem_array_t *obuf) +{ + guint buf_flags = 0, buf_flag_count = 0; + int in_off = 0; + int last_length_half_byte = 0; + guint match_bytes, match_len, match_off; + guint i; + + if (!tvb) + return FALSE; + + if (in_size > MAX_INPUT_SIZE) + return FALSE; + + while (1) { + if (buf_flag_count == 0) { + buf_flags = tvb_get_letohl(tvb, offset+in_off); + in_off += 4; + buf_flag_count = 32; + } + buf_flag_count--; + if ((buf_flags & (1 << buf_flag_count)) == 0) { + guint8 v = tvb_get_guint8(tvb, offset+in_off); + wmem_array_append_one(obuf, v); + in_off++; + } else { + if (in_off == in_size) + return TRUE; + match_bytes = tvb_get_letohs(tvb, offset+in_off); + in_off += 2; + match_len = match_bytes % 8; + match_off = (match_bytes/8) + 1; + if (match_len == 7) { + if (last_length_half_byte == 0) { + match_len = tvb_get_guint8(tvb, offset+in_off); + match_len = match_len % 16; + last_length_half_byte = in_off; + in_off++; + } else { + match_len = tvb_get_guint8(tvb, offset+last_length_half_byte); + match_len = match_len / 16; + last_length_half_byte = 0; + } + if (match_len == 15) { + match_len = tvb_get_guint8(tvb, offset+in_off); + in_off++; + if (match_len == 255) { + match_len = tvb_get_letohs(tvb, offset+in_off); + in_off += 2; + if (match_len == 0) { + /* This case isn't documented */ + match_len = tvb_get_letohs(tvb, offset+in_off); + in_off += 4; + } + if (match_len < 15+7) + return FALSE; + match_len -= (15 + 7); + } + match_len += 15; + } + match_len += 7; + } + match_len += 3; + for (i = 0; i < match_len; i++) { + guint8 byte; + if (match_off > wmem_array_get_count(obuf)) + return FALSE; + if (wmem_array_try_index(obuf, wmem_array_get_count(obuf)-match_off, &byte)) + return FALSE; + wmem_array_append_one(obuf, byte); + } + } + } + + return TRUE; +} + +tvbuff_t * +tvb_uncompress_lz77(tvbuff_t *tvb, const int offset, int in_size) +{ + volatile gboolean ok = FALSE; + wmem_allocator_t *pool; + wmem_array_t *obuf; + tvbuff_t *out; + + pool = wmem_allocator_new(WMEM_ALLOCATOR_SIMPLE); + obuf = wmem_array_sized_new(pool, 1, in_size*2); + + TRY { + ok = do_uncompress(tvb, offset, in_size, obuf); + } CATCH_ALL { + ok = FALSE; + } + ENDTRY; + + if (ok) { + /* + * Cannot pass a tvb free callback that frees the wmem + * pool, so we make an make an extra copy that uses + * bare pointers. This could be optimized if tvb API + * had a free pool callback of some sort. + */ + guint size = wmem_array_get_count(obuf); + guint8 *p = (guint8 *)g_malloc(size); + memcpy(p, wmem_array_get_raw(obuf), size); + out = tvb_new_real_data(p, size, size); + tvb_set_free_cb(out, g_free); + } else { + out = NULL; + } + + wmem_destroy_allocator(pool); + + return out; +} + +tvbuff_t * +tvb_child_uncompress_lz77(tvbuff_t *parent, tvbuff_t *tvb, const int offset, int in_size) +{ + tvbuff_t *new_tvb = tvb_uncompress_lz77(tvb, offset, in_size); + if (new_tvb) + tvb_set_child_real_data_tvbuff(parent, new_tvb); + return new_tvb; +} + + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + * + * vi: set shiftwidth=8 tabstop=8 noexpandtab: + * :indentSize=8:tabSize=8:noTabs=false: + */ |