aboutsummaryrefslogtreecommitdiffstats
path: root/wsutil/unicode-utils.c
diff options
context:
space:
mode:
authorJoão Valverde <j@v6e.pt>2022-10-14 10:25:00 +0100
committerJoão Valverde <j@v6e.pt>2022-10-14 12:19:11 +0100
commit3de62e588f3b1da970306fff03f28209bc7fbb2c (patch)
treef80bcf789d003583576c779bf7de11a1916e21c0 /wsutil/unicode-utils.c
parenteea68c7721e8e5f6113c17f542b8408a9791471b (diff)
wsutil: Rewrite ws_utf8_char_len() using a lookup table
Rewrite for speed and correctness. This implementation is more strict with invalid first bytes (continuation bytes, invalid codepoints and some overlong sequences). Returns 0 instead of -1 for invalid bytes.
Diffstat (limited to 'wsutil/unicode-utils.c')
-rw-r--r--wsutil/unicode-utils.c30
1 files changed, 18 insertions, 12 deletions
diff --git a/wsutil/unicode-utils.c b/wsutil/unicode-utils.c
index 2ade075ad9..764c0b3c03 100644
--- a/wsutil/unicode-utils.c
+++ b/wsutil/unicode-utils.c
@@ -12,18 +12,24 @@
#include "unicode-utils.h"
-int
-ws_utf8_char_len(guint8 ch)
-{
- if (ch >= 0xfe) return -1;
- if (ch >= 0xfc) return 6;
- if (ch >= 0xf8) return 5;
- if (ch >= 0xf0) return 4;
- if (ch >= 0xe0) return 3;
- if (ch >= 0xc0) return 2;
- else return 1;
-}
-
+int ws_utf8_seqlen[256] = {
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x00...0x0f */
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x10...0x1f */
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x20...0x2f */
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x30...0x3f */
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x40...0x4f */
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x50...0x5f */
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x60...0x6f */
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x70...0x7f */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80...0x8f */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90...0x9f */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0...0xaf */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0...0xbf */
+ 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xc0...0xcf */
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xd0...0xdf */
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* 0xe0...0xef */
+ 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, /* 0xf0...0xff */
+};
#ifdef _WIN32