diff options
author | João Valverde <j@v6e.pt> | 2022-10-14 10:25:00 +0100 |
---|---|---|
committer | João Valverde <j@v6e.pt> | 2022-10-14 12:19:11 +0100 |
commit | 3de62e588f3b1da970306fff03f28209bc7fbb2c (patch) | |
tree | f80bcf789d003583576c779bf7de11a1916e21c0 /wsutil/unicode-utils.c | |
parent | eea68c7721e8e5f6113c17f542b8408a9791471b (diff) |
wsutil: Rewrite ws_utf8_char_len() using a lookup table
Rewrite for speed and correctness.
This implementation is more strict with invalid
first bytes (continuation bytes, invalid codepoints and
some overlong sequences).
Returns 0 instead of -1 for invalid bytes.
Diffstat (limited to 'wsutil/unicode-utils.c')
-rw-r--r-- | wsutil/unicode-utils.c | 30 |
1 files changed, 18 insertions, 12 deletions
diff --git a/wsutil/unicode-utils.c b/wsutil/unicode-utils.c index 2ade075ad9..764c0b3c03 100644 --- a/wsutil/unicode-utils.c +++ b/wsutil/unicode-utils.c @@ -12,18 +12,24 @@ #include "unicode-utils.h" -int -ws_utf8_char_len(guint8 ch) -{ - if (ch >= 0xfe) return -1; - if (ch >= 0xfc) return 6; - if (ch >= 0xf8) return 5; - if (ch >= 0xf0) return 4; - if (ch >= 0xe0) return 3; - if (ch >= 0xc0) return 2; - else return 1; -} - +int ws_utf8_seqlen[256] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x00...0x0f */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x10...0x1f */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x20...0x2f */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x30...0x3f */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x40...0x4f */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x50...0x5f */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x60...0x6f */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x70...0x7f */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80...0x8f */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90...0x9f */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0...0xaf */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0...0xbf */ + 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xc0...0xcf */ + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xd0...0xdf */ + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* 0xe0...0xef */ + 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, /* 0xf0...0xff */ +}; #ifdef _WIN32 |