aboutsummaryrefslogtreecommitdiffstats
path: root/wsutil
diff options
context:
space:
mode:
authorHadriel Kaplan <hadrielk@yahoo.com>2015-02-06 13:52:37 -0500
committerAnders Broman <a.broman58@gmail.com>2015-02-11 09:14:50 +0000
commita837570d02dca2ad94ff5046b13592d84a12a345 (patch)
tree0a06b1d9a1c7c1e6bc67f57412f7adc3a2a1db71 /wsutil
parenta618f1c0d63fd290cbdc93272beaf1ca7e838027 (diff)
Combine SSE and pre-compiled patterns for faster pbrk
This combines the SSE4.2 instructions usage, with pre-compiled pattern searching usage, for a faster pbrk search method. Testing against large files of HTTP and SIP, there is about a 5% performance improvement by using pre-"compiled" patterns for guint8_pbrk() instead of passing it the search string and having it build the match array every time. Similar to regular expressions, "compiling" the pattern match array in advance only once and using the "compiled" patterns for the searches is faster than compiling it every time. Change-Id: Ifcbc14a6c93f32d15663a10d974bacdca5119a8e Ping-Bug: 10798 Reviewed-on: https://code.wireshark.org/review/6990 Petri-Dish: Hadriel Kaplan <hadrielk@yahoo.com> Tested-by: Petri Dish Buildbot <buildbot-no-reply@wireshark.org> Reviewed-by: Anders Broman <a.broman58@gmail.com>
Diffstat (limited to 'wsutil')
-rw-r--r--wsutil/ws_mempbrk.c60
-rw-r--r--wsutil/ws_mempbrk.h27
-rw-r--r--wsutil/ws_mempbrk_sse42.c123
3 files changed, 94 insertions, 116 deletions
diff --git a/wsutil/ws_mempbrk.c b/wsutil/ws_mempbrk.c
index 8ad1a17e78..6ed9ebcd9e 100644
--- a/wsutil/ws_mempbrk.c
+++ b/wsutil/ws_mempbrk.c
@@ -36,50 +36,54 @@
#include <glib.h>
#include "ws_symbol_export.h"
+#include "ws_mempbrk.h"
+
+
+void
+tvb_pbrk_compile(tvb_pbrk_pattern* pattern, const gchar *needles)
+{
+ const gchar *n = needles;
+ while (*n) {
+ pattern->patt[(int)*n] = 1;
+ n++;
+ }
+
#ifdef HAVE_SSE4_2
-#include "ws_cpuid.h"
+ ws_mempbrk_sse42_compile(pattern, needles);
#endif
-#include "ws_mempbrk.h"
+}
+
const guint8 *
-_ws_mempbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles)
+ws_mempbrk_exec(const guint8* haystack, size_t haystacklen, const tvb_pbrk_pattern* pattern, guchar *found_needle)
{
- gchar tmp[256] = { 0 };
- const guint8 *haystack_end;
+ const guint8 *haystack_end = haystack + haystacklen;
- while (*needles)
- tmp[*needles++] = 1;
+ while (haystack < haystack_end) {
+ if (pattern->patt[*haystack]) {
+ if (found_needle)
+ *found_needle = *haystack;
+ return haystack;
+ }
+ haystack++;
+ }
- haystack_end = haystack + haystacklen;
- while (haystack < haystack_end) {
- if (tmp[*haystack])
- return haystack;
- haystack++;
- }
-
- return NULL;
+ return NULL;
}
+
WS_DLL_PUBLIC const guint8 *
-ws_mempbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles)
+tvb_pbrk_exec(const guint8* haystack, size_t haystacklen, const tvb_pbrk_pattern* pattern, guchar *found_needle)
{
#ifdef HAVE_SSE4_2
- static int have_sse42 = -1;
+ if (haystacklen >= 16 && pattern->use_sse42)
+ return ws_mempbrk_sse42_exec(haystack, haystacklen, pattern, found_needle);
#endif
- if (*needles == 0)
- return NULL;
-
-#ifdef HAVE_SSE4_2
- if G_UNLIKELY(have_sse42 < 0)
- have_sse42 = ws_cpuid_sse42();
- if (haystacklen >= 16 && have_sse42)
- return _ws_mempbrk_sse42(haystack, haystacklen, needles);
-#endif
-
- return _ws_mempbrk(haystack, haystacklen, needles);
+ return ws_mempbrk_exec(haystack, haystacklen, pattern, found_needle);
}
+
/*
* Editor modelines - http://www.wireshark.org/tools/modelines.html
*
diff --git a/wsutil/ws_mempbrk.h b/wsutil/ws_mempbrk.h
index 72f37d574a..708d53cfff 100644
--- a/wsutil/ws_mempbrk.h
+++ b/wsutil/ws_mempbrk.h
@@ -24,13 +24,30 @@
#include "ws_symbol_export.h"
-WS_DLL_PUBLIC const guint8 *ws_mempbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles);
+/** The pattern object used for tvb_pbrk_pattern_guint8().
+ */
+typedef struct {
+ gchar patt[256];
+ gboolean use_sse42;
+ void *mask;
+} tvb_pbrk_pattern;
+
+/** The value to use when initializing a tvb_pbrk_pattern variable.
+ * For example:
+ * static tvb_pbrk_pattern pbrk_mypattern = INIT_PBRK_PATTERN;
+ */
+#define INIT_PBRK_PATTERN { { 0 }, FALSE, NULL }
+
+/** Compile the pattern for the needles to find using tvb_pbrk_pattern_guint8().
+ */
+WS_DLL_PUBLIC void tvb_pbrk_compile(tvb_pbrk_pattern* pattern, const gchar *needles);
+
+WS_DLL_PUBLIC const guint8 *tvb_pbrk_exec(const guint8* haystack, size_t haystacklen, const tvb_pbrk_pattern* pattern, guchar *found_needle);
-#ifdef HAVE_SSE4_2
-const char *_ws_mempbrk_sse42(const char* haystack, size_t haystacklen, const char *needles);
-#endif
+void ws_mempbrk_sse42_compile(tvb_pbrk_pattern* pattern, const gchar *needles);
+const char *ws_mempbrk_sse42_exec(const char* haystack, size_t haystacklen, const tvb_pbrk_pattern* pattern, guchar *found_needle);
-const guint8 *_ws_mempbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles);
+const guint8 *ws_mempbrk_exec(const guint8* haystack, size_t haystacklen, const tvb_pbrk_pattern* pattern, guchar *found_needle);
#endif /* __WS_MEMPBRK_H__ */
diff --git a/wsutil/ws_mempbrk_sse42.c b/wsutil/ws_mempbrk_sse42.c
index 2805c0240e..da37a665aa 100644
--- a/wsutil/ws_mempbrk_sse42.c
+++ b/wsutil/ws_mempbrk_sse42.c
@@ -23,7 +23,7 @@
#ifdef HAVE_SSE4_2
#include <glib.h>
-
+#include "ws_cpuid.h"
#ifdef WIN32
#include <tmmintrin.h>
@@ -59,6 +59,23 @@ __m128i_shift_right (__m128i value, unsigned long int offset)
_mm_loadu_si128 (cast_128aligned__m128i(___m128i_shift_right + offset)));
}
+
+void
+ws_mempbrk_sse42_compile(tvb_pbrk_pattern* pattern, const gchar *needles)
+{
+ size_t length = strlen(needles);
+
+ pattern->use_sse42 = ws_cpuid_sse42() && (length <= 16);
+
+ if (pattern->use_sse42) {
+ __m128i *pmask = NULL;
+ pattern->mask = g_malloc(sizeof(__m128i));
+ pmask = (__m128i *) pattern->mask;
+ *pmask = _mm_setzero_si128();
+ memcpy(pmask, needles, length);
+ }
+}
+
/* We use 0x2:
_SIDD_SBYTE_OPS
| _SIDD_CMP_EQUAL_ANY
@@ -92,81 +109,12 @@ __m128i_shift_right (__m128i value, unsigned long int offset)
X for case 1. */
const char *
-_ws_mempbrk_sse42(const char *s, size_t slen, const char *a)
+ws_mempbrk_sse42_exec(const char *s, size_t slen, const tvb_pbrk_pattern* pattern, guchar *found_needle)
{
const char *aligned;
- __m128i mask;
+ __m128i *pmask = (__m128i *) pattern->mask;
int offset;
-#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
- {
- /* As 'a' is not guarantueed to have a size of at least 16 bytes, and is not
- * aligned, _mm_load_si128() cannot be used when ASAN is enabled. That
- * triggers a buffer overflow which is harmless as 'a' is guaranteed to be
- * '\0' terminated, and the PCMISTRI instruction always ignored everything
- * starting from EOS ('\0'). A false positive indeed. */
- size_t length;
-
- length = strlen(a);
- /* Don't use SSE4.2 if the length of A > 16. */
- if (length > 16)
- return _ws_mempbrk(s, slen, a);
-
- mask = _mm_setzero_si128();
- memcpy(&mask, a, length);
- }
-#else /* else if ASAN is disabled */
- offset = (int) ((size_t) a & 15);
- aligned = (const char *) ((size_t) a & -16L);
- if (offset != 0)
- {
- int length;
-
- /* Load masks. */
- /* cast safe - _mm_load_si128() it's 16B aligned */
- mask = __m128i_shift_right(_mm_load_si128 (cast_128aligned__m128i(aligned)), offset);
-
- /* Find where the NULL terminator is. */
- length = _mm_cmpistri (mask, mask, 0x3a);
- if (length == 16 - offset)
- {
- /* There is no NULL terminator. */
- __m128i mask1 = _mm_load_si128 (cast_128aligned__m128i(aligned + 16));
- int idx = _mm_cmpistri (mask1, mask1, 0x3a);
- length += idx;
-
- /* Don't use SSE4.2 if the length of A > 16. */
- if (length > 16)
- return _ws_mempbrk(s, slen, a);
-
- if (idx != 0)
- {
- /* Combine mask0 and mask1. We could play games with
- palignr, but frankly this data should be in L1 now
- so do the merge via an unaligned load. */
- mask = _mm_loadu_si128 (cast_128aligned__m128i(a));
- }
- }
- }
- else
- {
- int length;
-
- /* A is aligned. (cast safe) */
- mask = _mm_load_si128 (cast_128aligned__m128i(a));
-
- /* Find where the NULL terminator is. */
- length = _mm_cmpistri (mask, mask, 0x3a);
- if (length == 16)
- {
- /* There is no NULL terminator. Don't use SSE4.2 if the length
- of A > 16. */
- if (a[16] != 0)
- return _ws_mempbrk(s, slen, a);
- }
- }
-#endif /* ASAN disabled */
-
offset = (int) ((size_t) s & 15);
aligned = (const char *) ((size_t) s & -16L);
if (offset != 0)
@@ -174,18 +122,23 @@ _ws_mempbrk_sse42(const char *s, size_t slen, const char *a)
/* Check partial string. cast safe it's 16B aligned */
__m128i value = __m128i_shift_right (_mm_load_si128 (cast_128aligned__m128i(aligned)), offset);
- int length = _mm_cmpistri (mask, value, 0x2);
+ int length = _mm_cmpistri (*pmask, value, 0x2);
/* No need to check ZFlag since ZFlag is always 1. */
- int cflag = _mm_cmpistrc (mask, value, 0x2);
+ int cflag = _mm_cmpistrc (*pmask, value, 0x2);
+ /* XXX: why does this compare value with value? */
int idx = _mm_cmpistri (value, value, 0x3a);
- if (cflag)
+ if (cflag) {
+ if (found_needle)
+ *found_needle = *(s + length);
return s + length;
+ }
+
/* Find where the NULL terminator is. */
if (idx < 16 - offset)
{
- /* fond NUL @ 'idx', need to switch to slower mempbrk */
- return _ws_mempbrk(s + idx + 1, slen - idx - 1, a); /* slen is bigger than 16 & idx < 16 so no undeflow here */
+ /* found NUL @ 'idx', need to switch to slower mempbrk */
+ return ws_mempbrk_exec(s + idx + 1, slen - idx - 1, pattern, found_needle); /* slen is bigger than 16 & idx < 16 so no undeflow here */
}
aligned += 16;
slen -= (16 - offset);
@@ -196,23 +149,27 @@ _ws_mempbrk_sse42(const char *s, size_t slen, const char *a)
while (slen >= 16)
{
__m128i value = _mm_load_si128 (cast_128aligned__m128i(aligned));
- int idx = _mm_cmpistri (mask, value, 0x2);
- int cflag = _mm_cmpistrc (mask, value, 0x2);
- int zflag = _mm_cmpistrz (mask, value, 0x2);
+ int idx = _mm_cmpistri (*pmask, value, 0x2);
+ int cflag = _mm_cmpistrc (*pmask, value, 0x2);
+ int zflag = _mm_cmpistrz (*pmask, value, 0x2);
- if (cflag)
+ if (cflag) {
+ if (found_needle)
+ *found_needle = *(aligned + idx);
return aligned + idx;
+ }
+
if (zflag)
{
/* found NUL, need to switch to slower mempbrk */
- return _ws_mempbrk(aligned, slen, a);
+ return ws_mempbrk_exec(aligned, slen, pattern, found_needle);
}
aligned += 16;
slen -= 16;
}
/* XXX, use mempbrk_slow here? */
- return _ws_mempbrk(aligned, slen, a);
+ return ws_mempbrk_exec(aligned, slen, pattern, found_needle);
}
#endif /* HAVE_SSE4_2 */