aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHadriel Kaplan <hadrielk@yahoo.com>2014-02-22 00:44:00 -0500
committerAnders Broman <a.broman58@gmail.com>2014-03-10 07:11:12 +0000
commitea46cdc4748d84d5026a7703b25e427f8170833e (patch)
tree97c581cda8563f0b91996fd685c9dc6fb42eeba5
parent81c0091c0ad593c5ded0af4622cf10c19cb18ae7 (diff)
Add GLib's regex library into Lua
While Lua's built-in pattern support is ok for simple things, many people end up wanting a real regex engine. Since Wireshark already includes the GLib Regex library (a wrapper for PCRE), it makes sense to expose that library to Lua scripts. This has been done using Lrexlib, one of the most popular regex bindings for Lua. Lrexlib didn't support binding GLib's Regex in particular - it does for PCRE but GLib is a different API - so I've done that. A fairly thorough testsuite came along with that, which has been incorporated into the wireshark wslua testuites as well in this commit. Change-Id: I05811d1edf7af8d7c9f4f081de6850f31c0717c7 Reviewed-on: https://code.wireshark.org/review/332 Reviewed-by: Anders Broman <a.broman58@gmail.com>
-rw-r--r--docbook/CMakeLists.txt1
-rw-r--r--docbook/wsluarm.xml487
-rw-r--r--epan/wslua/CMakeLists.txt3
-rw-r--r--epan/wslua/Makefile.am7
-rw-r--r--epan/wslua/Makefile.nmake6
-rw-r--r--epan/wslua/lrexlib.c266
-rw-r--r--epan/wslua/lrexlib.h130
-rw-r--r--epan/wslua/lrexlib_algo.h755
-rw-r--r--epan/wslua/lrexlib_glib.c414
-rw-r--r--epan/wslua/lrexlib_glib_f.c138
-rwxr-xr-xepan/wslua/make-reg.pl1
-rw-r--r--epan/wslua/wslua.h1
-rwxr-xr-xtest/lua/common_sets.lua319
-rw-r--r--test/lua/glib_sets.lua204
-rw-r--r--test/lua/gregex.lua285
-rwxr-xr-xtest/lua/luatest.lua174
-rwxr-xr-xtest/lua/pat2pcre.lua87
-rwxr-xr-xtest/lua/pcre_sets.lua179
-rwxr-xr-xtest/lua/pcre_sets2.lua198
-rwxr-xr-xtest/suite-wslua.sh21
20 files changed, 3673 insertions, 3 deletions
diff --git a/docbook/CMakeLists.txt b/docbook/CMakeLists.txt
index bbcf284b2d..0373da5f76 100644
--- a/docbook/CMakeLists.txt
+++ b/docbook/CMakeLists.txt
@@ -310,7 +310,6 @@ set(WSLUA_MODULES
${CMAKE_SOURCE_DIR}/epan/wslua/wslua_tree.c
${CMAKE_SOURCE_DIR}/epan/wslua/wslua_tvb.c
${CMAKE_SOURCE_DIR}/epan/wslua/wslua_util.c
- ${CMAKE_SOURCE_DIR}/epan/wslua/wslua_int64.c
${CMAKE_SOURCE_DIR}/epan/wslua/wslua_struct.c
)
diff --git a/docbook/wsluarm.xml b/docbook/wsluarm.xml
index 261c65fe30..1a01b568d4 100644
--- a/docbook/wsluarm.xml
+++ b/docbook/wsluarm.xml
@@ -179,4 +179,491 @@ end
&WsLuaUtility;
&WsLuaInt64;
&WsLuaStruct;
+
+ <section id='lua_module_GRegex'>
+ <title> GLib Regular Expressions </title>
+ <para>
+ Lua has its own native 'pattern' syntax in the string library, but sometimes a real
+ regex engine is more useful. Wireshark comes with GLib's Regex implementation, which
+ itself is based on Perl Compatible Regular Expressions (PCRE). This engine is exposed
+ into Wireshark's Lua engine through the well-known Lrexlib library, following the
+ same syntax and semantics as the Lrexlib PCRE implementation, with a few differences as follows:
+ <itemizedlist>
+ <listitem>
+ <para> There is no support for using custom locale/chartables </para>
+ </listitem>
+ <listitem>
+ <para> dfa_exec() doesn't take 'ovecsize' nor 'wscount' arguments </para>
+ </listitem>
+ <listitem>
+ <para> dfa_exec() returns boolean true for partial match, without subcapture info </para>
+ </listitem>
+ <listitem>
+ <para> Named subgroups do not return name-keyed entries in the return
+ table (i.e., in match/tfind/exec)
+ </para>
+ </listitem>
+ <listitem>
+ <para> The 'flags()' function still works, returning all flags, but two new
+ functions 'compile_flags()' and 'match_flags()' return just their respective
+ flags, since GLib has a different and smaller set of such flags, for
+ regex compile vs. match functions
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Using some assertions and POSIX character classes against strings with non-ASCII characters
+ might match high-order characters, because glib always sets PCRE_UCP
+ even if G_REGEX_RAW is set. For example, '[:alpha;]' matches certain
+ non-ASCII bytes. The following assertions have this issue: '\b', '\B', '\s', '\S', '\w', '\W'.
+ The following character classes have this issue: [:alpha:], [:alnum:], [:lower:], [:upper:],
+ [:space:], [:word:], and [:graph:].
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ The compile flag G_REGEX_RAW is always set/used, even if you didn't specify it. This is because
+ GLib runs PCRE in UTF-8 mode by default, whereas Lua strings are not UTF-aware.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ This page is based on the full documentation for Lrexlib at
+ <ulink url="http://rrthomas.github.io/lrexlib/manual.html">http://rrthomas.github.io/lrexlib/manual.html</ulink>
+ </para>
+ <para>
+ The GLib Regular expression syntax (which is essentially PCRE syntax) can be found at
+ <ulink url="https://developer.gnome.org/glib/2.38/glib-regex-syntax.html">https://developer.gnome.org/glib/2.38/glib-regex-syntax.html</ulink>
+ </para>
+ <section id='lua_class_GRegex'><title>GRegex</title>
+ <para>
+ GLib Regular Expressions based on PCRE.
+ </para>
+ <section id='lua_class_GRegex_notes'><title>Notes</title>
+ <para>
+ All functions that take a regular expression pattern as an argument will
+ generate an error if that pattern is found invalid by the regex library.
+ </para>
+ <para>
+ All functions that take a string-type regex argument accept a compiled regex
+ too. In this case, the compile flags argument is ignored (should be either supplied as nils or omitted).
+ </para>
+ <para>
+ The capture flag argument 'cf' may also be supplied as a string, whose characters stand for compilation flags.
+ Combinations of the following characters (case sensitive) are supported:
+ <itemizedlist>
+ <listitem>
+ <para> '<command>i</command>' = G_REGEX_CASELESS - Letters in the pattern match both upper- and lowercase letters.
+ This option can be changed within a pattern by a "(?i)" option setting. </para>
+ </listitem>
+ <listitem>
+ <para> '<command>m</command>' = G_REGEX_MULTILINE - By default, GRegex treats the strings as consisting of a single
+ line of characters (even if it actually contains newlines). The "start of line"
+ metacharacter ("^") matches only at the start of the string, while the "end of line"
+ metacharacter ("$") matches only at the end of the string, or before a terminating newline
+ (unless G_REGEX_DOLLAR_ENDONLY is set). When G_REGEX_MULTILINE is set, the "start of line"
+ and "end of line" constructs match immediately following or immediately before any newline
+ in the string, respectively, as well as at the very start and end. This can be changed
+ within a pattern by a "(?m)" option setting.</para>
+ </listitem>
+ <listitem>
+ <para> '<command>s</command>' = G_REGEX_DOTALL - A dot metacharater (".") in the pattern matches all characters,
+ including newlines. Without it, newlines are excluded. This option can be changed within
+ a pattern by a ("?s") option setting. </para>
+ </listitem>
+ <listitem>
+ <para> '<command>x</command>' = G_REGEX_EXTENDED - Whitespace data characters in the pattern are totally ignored
+ except when escaped or inside a character class. Whitespace does not include the VT
+ character (code 11). In addition, characters between an unescaped "#" outside a character
+ class and the next newline character, inclusive, are also ignored. This can be changed
+ within a pattern by a "(?x)" option setting. </para>
+ </listitem>
+ <listitem>
+ <para> '<command>U</command>' = G_REGEX_UNGREEDY - Inverts the "greediness" of the quantifiers so that they are not
+ greedy by default, but become greedy if followed by "?". It can also be set by a "(?U)"
+ option setting within the pattern. </para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ </section><!-- end of notes -->
+ <section id='lua_fn_GRegex_new_pattern_'>
+ <title>GRegex.new(pattern)</title>
+ <para>Compiles regular expression pattern into a regular expression object whose
+ internal representation is corresponding to the library used. The returned
+ result then can be used by the methods, e.g. match, exec, etc. Regular
+ expression objects are automatically garbage collected.
+ </para>
+ <section><title>Arguments</title>
+ <variablelist>
+ <varlistentry><term>pattern</term>
+ <listitem><para> A Perl-compatible regular expression pattern string </para></listitem>
+ </varlistentry> <!-- function_arg_footer: pattern -->
+ </variablelist>
+ </section>
+ <section><title>Returns</title>
+ <para> The compiled regular expression (a userdata object) </para>
+ </section> <!-- function_returns_footer: GRegex.new -->
+ <section><title>Errors</title>
+ <itemizedlist>
+ <listitem><para>A malformed pattern generates a Lua error </para></listitem>
+ </itemizedlist>
+ </section> <!-- function_error_footer: GRegex.new -->
+ </section> <!-- function_footer: GRegex.new -->
+ <section id='lua_fn_GRegex_flags__table__'>
+ <title>GRegex.flags([table])</title>
+ <para>Returns a table containing the numeric values of the constants defined by
+ the regex library, with the keys being the (string) names of the
+ constants. If the table argument is supplied then it is used as the
+ output table, otherwise a new table is created. The constants contained
+ in the returned table can then be used in most functions and methods where
+ compilation flags or execution flags can be specified. They can also be
+ used for comparing with return codes of some functions and methods for
+ determining the reason of failure.
+ </para>
+ <section><title>Arguments</title>
+ <variablelist>
+ <varlistentry><term>table (optional)</term>
+ <listitem><para> A table for placing results into </para></listitem>
+ </varlistentry> <!-- function_arg_footer: table (optional) -->
+ </variablelist>
+ </section>
+ <section><title>Returns</title>
+ <para>A table filled with the results.</para>
+ </section> <!-- function_returns_footer: GRegex.flags -->
+ </section> <!-- function_footer: GRegex.flags -->
+ <section id='lua_fn_GRegex_compile_flags__table__'>
+ <title>GRegex.compile_flags([table])</title>
+ <para>Returns a table containing the numeric values of the constants defined by
+ the regex library for compile flags, with the keys being the (string) names of the
+ constants. If the table argument is supplied then it is used as the
+ output table, otherwise a new table is created.
+ </para>
+ <section><title>Arguments</title>
+ <variablelist>
+ <varlistentry><term>table (optional)</term>
+ <listitem><para> A table for placing results into </para></listitem>
+ </varlistentry> <!-- function_arg_footer: table (optional) -->
+ </variablelist>
+ </section>
+ <section><title>Returns</title>
+ <para>A table filled with the results.</para>
+ </section> <!-- function_returns_footer: GRegex.compile_flags -->
+ </section> <!-- function_footer: GRegex.compile_flags -->
+ <section id='lua_fn_GRegex_match_flags__table__'>
+ <title>GRegex.match_flags([table])</title>
+ <para>Returns a table containing the numeric values of the constants defined by
+ the regex library for match flags, with the keys being the (string) names of the
+ constants. If the table argument is supplied then it is used as the
+ output table, otherwise a new table is created.
+ </para>
+ <section><title>Arguments</title>
+ <variablelist>
+ <varlistentry><term>table (optional)</term>
+ <listitem><para> A table for placing results into </para></listitem>
+ </varlistentry> <!-- function_arg_footer: table (optional) -->
+ </variablelist>
+ </section>
+ <section><title>Returns</title>
+ <para>A table filled with the results.</para>
+ </section> <!-- function_returns_footer: GRegex.match_flags -->
+ </section> <!-- function_footer: GRegex.match_flags -->
+ <section id='lua_fn_GRegex_match_subject__pattern___init____cf____ef__'>
+ <title>GRegex.match(subject, pattern, [init], [cf], [ef])</title>
+ <para>Searches for the first match of the regexp pattern in the string subject, starting
+ from offset init, subject to flags cf and ef. The pattern is compiled each time this is
+ called, unlike the class method 'match' function.
+ </para>
+ <section><title>Arguments</title>
+ <variablelist>
+ <varlistentry><term>subject</term>
+ <listitem><para> Subject string to search </para></listitem>
+ </varlistentry> <!-- function_arg_footer: subject -->
+ <varlistentry><term>pattern</term>
+ <listitem><para> A Perl-compatible regular expression pattern string or GRegex object </para></listitem>
+ </varlistentry> <!-- function_arg_footer: pattern -->
+ <varlistentry><term>init (optional)</term>
+ <listitem><para> start offset in the subject (can be negative) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: init (optional) -->
+ <varlistentry><term>cf (optional)</term>
+ <listitem><para> compilation flags (bitwise OR) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: cf (optional) -->
+ <varlistentry><term>ef (optional)</term>
+ <listitem><para> match execution flags (bitwise OR) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: ef (optional) -->
+ </variablelist>
+ </section>
+ <section><title>Returns</title>
+ <para>On success, returns all substring matches ("captures"), in the order they appear in the pattern.
+ false is returned for sub-patterns that did not participate in the match. If
+ the pattern specified no captures then the whole matched substring is
+ returned. On failure, returns nil.
+ </para>
+ </section> <!-- function_returns_footer: GRegex.match -->
+ </section> <!-- function_footer: GRegex.match -->
+ <section id='lua_fn_GRegex_find_subject__pattern___init____cf____ef__'>
+ <title>GRegex.find(subject, pattern, [init], [cf], [ef])</title>
+ <para>Searches for the first match of the regexp pattern in the string subject, starting
+ from offset init, subject to flags ef. The pattern is compiled each time this is
+ called, unlike the class method 'find' function.
+ </para>
+ <section><title>Arguments</title>
+ <variablelist>
+ <varlistentry><term>subject</term>
+ <listitem><para> Subject string to search </para></listitem>
+ </varlistentry> <!-- function_arg_footer: subject -->
+ <varlistentry><term>pattern</term>
+ <listitem><para> A Perl-compatible regular expression pattern string or GRegex object </para></listitem>
+ </varlistentry> <!-- function_arg_footer: pattern -->
+ <varlistentry><term>init (optional)</term>
+ <listitem><para> start offset in the subject (can be negative) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: init (optional) -->
+ <varlistentry><term>cf (optional)</term>
+ <listitem><para> compilation flags (bitwise OR) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: cf (optional) -->
+ <varlistentry><term>ef (optional)</term>
+ <listitem><para> match execution flags (bitwise OR) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: ef (optional) -->
+ </variablelist>
+ </section>
+ <section><title>Returns</title>
+ <para>On success, returns the start point of the match (a number), the
+ end point of the match (a number), and all substring matches ("captures"), in
+ the order they appear in the pattern. false is returned for sub-patterns that did
+ not participate in the match. On failure, returns nil.
+ </para>
+ </section> <!-- function_returns_footer: GRegex.find -->
+ </section> <!-- function_footer: GRegex.find -->
+ <section id='lua_fn_GRegex_gmatch_subject__pattern___init____cf____ef__'>
+ <title>GRegex.gmatch(subject, pattern, [init], [cf], [ef])</title>
+ <para>Returns an iterator for repeated matching of the pattern patt in the string subj, subject
+ to flags cf and ef. The function is intended for use in the generic for Lua construct.
+ The pattern can be a string or a GRegex object previously compiled with GRegex.new().
+ </para>
+ <section><title>Arguments</title>
+ <variablelist>
+ <varlistentry><term>subject</term>
+ <listitem><para> Subject string to search </para></listitem>
+ </varlistentry> <!-- function_arg_footer: subject -->
+ <varlistentry><term>pattern</term>
+ <listitem><para> A Perl-compatible regular expression pattern string or GRegex object </para></listitem>
+ </varlistentry> <!-- function_arg_footer: pattern -->
+ <varlistentry><term>init (optional)</term>
+ <listitem><para> start offset in the subject (can be negative) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: init (optional) -->
+ <varlistentry><term>cf (optional)</term>
+ <listitem><para> compilation flags (bitwise OR) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: cf (optional) -->
+ <varlistentry><term>ef (optional)</term>
+ <listitem><para> match execution flags (bitwise OR) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: ef (optional) -->
+ </variablelist>
+ </section>
+ <section><title>Returns</title>
+ <para>The iterator function is called by Lua. On every iteration (that is, on every
+ match), it returns all captures in the order they appear in the pattern (or
+ the entire match if the pattern specified no captures). The iteration will
+ continue till the subject fails to match.
+ </para>
+ </section> <!-- function_returns_footer: GRegex.gmatch -->
+ </section> <!-- function_footer: GRegex.gmatch -->
+ <section id='lua_fn_GRegex_gsub_subject__pattern___repl____max____cf____ef__'>
+ <title>GRegex.gsub(subject, pattern, [repl], [max], [cf], [ef])</title>
+ <para>Searches for all matches of the pattern in the string subject and replaces them according
+ to the parameters repl and max.
+ The pattern can be a string or a GRegex object previously compiled with GRegex.new().
+ </para>
+ <para> For details see:
+ <ulink url="http://rrthomas.github.io/lrexlib/manual.html#gsub">http://rrthomas.github.io/lrexlib/manual.html#gsub</ulink>
+ </para>
+ <section><title>Arguments</title>
+ <variablelist>
+ <varlistentry><term>subject</term>
+ <listitem><para> Subject string to search </para></listitem>
+ </varlistentry> <!-- function_arg_footer: subject -->
+ <varlistentry><term>pattern</term>
+ <listitem><para> A Perl-compatible regular expression pattern string or GRegex object </para></listitem>
+ </varlistentry> <!-- function_arg_footer: pattern -->
+ <varlistentry><term>repl (optional)</term>
+ <listitem><para> Substitution source string, function, table, false or nil </para></listitem>
+ </varlistentry> <!-- function_arg_footer: repl (optional) -->
+ <varlistentry><term>max (optional)</term>
+ <listitem><para> Maximum number of matches to search for, or control function, or nil </para></listitem>
+ </varlistentry> <!-- function_arg_footer: max (optional) -->
+ <varlistentry><term>cf (optional)</term>
+ <listitem><para> Compilation flags (bitwise OR) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: cf (optional) -->
+ <varlistentry><term>ef (optional)</term>
+ <listitem><para> Match execution flags (bitwise OR) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: ef (optional) -->
+ </variablelist>
+ </section>
+ <section><title>Returns</title>
+ <para>On success, returns the subject string with the substitutions made,
+ the number of matches found, and the number of substitutions made.
+ </para>
+ </section> <!-- function_returns_footer: GRegex.gsub -->
+ </section> <!-- function_footer: GRegex.gsub -->
+ <section id='lua_fn_GRegex_split_subject__sep___cf____ef__'>
+ <title>GRegex.split(subject, sep, [cf], [ef])</title>
+ <para>Splits a subject string subj into parts (sections). The sep parameter
+ is a regular expression pattern representing separators between the sections.
+ The function is intended for use in the generic for Lua construct.
+ The function returns an iterator for repeated matching of the pattern sep in
+ the string subj, subject to flags cf and ef.
+ The sep pattern can be a string or a GRegex object previously compiled with GRegex.new().
+ Unlike gmatch, there will always be at least one iteration pass, even if there are no matches in the subject.
+ </para>
+ <section><title>Arguments</title>
+ <variablelist>
+ <varlistentry><term>subject</term>
+ <listitem><para> Subject string to search </para></listitem>
+ </varlistentry> <!-- function_arg_footer: subject -->
+ <varlistentry><term>sep</term>
+ <listitem><para> A Perl-compatible regular expression pattern string or GRegex object </para></listitem>
+ </varlistentry> <!-- function_arg_footer: sep -->
+ <varlistentry><term>cf (optional)</term>
+ <listitem><para> compilation flags (bitwise OR) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: cf (optional) -->
+ <varlistentry><term>ef (optional)</term>
+ <listitem><para> match execution flags (bitwise OR) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: ef (optional) -->
+ </variablelist>
+ </section>
+ <section><title>Returns</title>
+ <para>The iterator function is called by Lua. On every iteration, it returns
+ a subject section (can be an empty string), followed by all captures in the order they
+ appear in the sep pattern (or the entire match if the sep pattern specified no captures).
+ If there is no match (this can occur only in the last iteration), then nothing is
+ returned after the subject section. The iteration will continue till the end of the subject.
+ </para>
+ </section> <!-- function_returns_footer: GRegex.split -->
+ </section> <!-- function_footer: -->
+ <section id='lua_fn_GRegex_version__'>
+ <title>GRegex.version()</title>
+ <para>Returns a returns a string containing the version of the used library.</para>
+ <section><title>Returns</title>
+ <para>The version string</para>
+ </section> <!-- function_returns_footer: GRegex.version -->
+ </section> <!-- function_footer: GRegex.version -->
+ <section id='lua_fn_gregex_match_subject___init____ef__'>
+ <title>gregex:match(subject, [init], [ef])</title>
+ <para>Searches for the first match of the regexp pattern in the string subject, starting
+ from offset init, subject to flags ef.
+ </para>
+ <section><title>Arguments</title>
+ <variablelist>
+ <varlistentry><term>subject</term>
+ <listitem><para> Subject string to search </para></listitem>
+ </varlistentry> <!-- function_arg_footer: subject -->
+ <varlistentry><term>init (optional)</term>
+ <listitem><para> start offset in the subject (can be negative) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: init (optional) -->
+ <varlistentry><term>ef (optional)</term>
+ <listitem><para> match execution flags (bitwise OR) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: ef (optional) -->
+ </variablelist>
+ </section>
+ <section><title>Returns</title>
+ <para>On success, returns all substring matches ("captures"), in the order they appear in the pattern.
+ false is returned for sub-patterns that did not participate in the match. If
+ the pattern specified no captures then the whole matched substring is
+ returned. nil is returned if the pattern did not match.
+ </para>
+ </section> <!-- function_returns_footer: gregex:match -->
+ </section> <!-- function_footer: gregex:match -->
+ <section id='lua_fn_gregex_find_subject___init____ef__'>
+ <title>gregex:find(subject, [init], [ef])</title>
+ <para>Searches for the first match of the regexp pattern in the string subject, starting
+ from offset init, subject to flags ef.
+ </para>
+ <section><title>Arguments</title>
+ <variablelist>
+ <varlistentry><term>subject</term>
+ <listitem><para> Subject string to search </para></listitem>
+ </varlistentry> <!-- function_arg_footer: subject -->
+ <varlistentry><term>init (optional)</term>
+ <listitem><para> start offset in the subject (can be negative) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: init (optional) -->
+ <varlistentry><term>ef (optional)</term>
+ <listitem><para> match execution flags (bitwise OR) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: ef (optional) -->
+ </variablelist>
+ </section>
+ <section><title>Returns</title>
+ <para>On success, returns the start point of the match (a number), the
+ end point of the match (a number), and all substring matches ("captures"), in
+ the order they appear in the pattern. false is returned for sub-patterns that did
+ not participate in the match. On failure, returns nil.
+ </para>
+ </section> <!-- function_returns_footer: gregex:find -->
+ </section> <!-- function_footer: -->
+ <section id='lua_fn_gregex_exec_subject___init____ef__'>
+ <title>gregex:exec(subject, [init], [ef])</title>
+ <para>Searches for the first match of the compiled GRegex object in the string subject, starting
+ from offset init, subject to the execution match flags ef.
+ </para>
+ <section><title>Arguments</title>
+ <variablelist>
+ <varlistentry><term>subject</term>
+ <listitem><para> Subject string to search </para></listitem>
+ </varlistentry> <!-- function_arg_footer: subject -->
+ <varlistentry><term>init (optional)</term>
+ <listitem><para> start offset in the subject (can be negative) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: init (optional) -->
+ <varlistentry><term>ef (optional)</term>
+ <listitem><para> match execution flags (bitwise OR) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: ef (optional) -->
+ </variablelist>
+ </section>
+ <section><title>Returns</title>
+ <para>On success, returns the start point of the first match (a number), the
+ end point of the first match (a number), and the offsets of substring matches ("captures"
+ in Lua terminology) are returned as a third result, in a table. This table contains false
+ in the positions where the corresponding sub-pattern did not participate in the match.
+ On failure, returns nil.
+ Example:
+ If the whole match is at offsets 10,20 and substring matches are at offsets 12,14 and 16,19
+ then the function returns the following: 10, 20, { 12,14,16,19 }.
+ </para>
+ </section> <!-- function_returns_footer: gregex:exec -->
+ </section> <!-- function_footer: gregex:exec -->
+ <section id='lua_fn_gregex_dfa_exec_subject___init____ef__'>
+ <title>gregex:dfa_exec(subject, [init], [ef])</title>
+ <para>Matches a compiled regular expression GRegex object against a given subject string subj, using a DFA matching algorithm.</para>
+ <section><title>Arguments</title>
+ <variablelist>
+ <varlistentry><term>subject</term>
+ <listitem><para> Subject string to search </para></listitem>
+ </varlistentry> <!-- function_arg_footer: subject -->
+ <varlistentry><term>init (optional)</term>
+ <listitem><para> start offset in the subject (can be negative) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: init (optional) -->
+ <varlistentry><term>ef (optional)</term>
+ <listitem><para> match execution flags (bitwise OR) </para></listitem>
+ </varlistentry> <!-- function_arg_footer: ef (optional) -->
+ </variablelist>
+ </section>
+ <section><title>Returns</title>
+ <para>On success, returns the start point of the matches found (a number), a
+ table containing the end points of the matches found, the longer matches first, and the
+ number of matches found as the third return value.
+ On failure, returns nil.
+ Example:
+ If there are 3 matches found starting at offset 10 and ending at offsets 15, 20 and 25
+ then the function returns the following: 10, { 25,20,15 }, 3
+ </para>
+ </section> <!-- function_returns_footer: gregex:dfa_exec -->
+ </section> <!-- function_footer: gregex:dfa_exec -->
+ <section id='lua_fn_gregex___tostring__'>
+ <title>gregex:__tostring()</title>
+ <para>Returns a string containing debug information about the GRegex object.</para>
+ <section><title>Returns</title>
+ <para>The debug string</para>
+ </section> <!-- function_returns_footer: gregex:__tostring -->
+ </section> <!-- function_footer: -->
+ </section> <!-- class_footer: GRegex -->
+ </section>
+
+
</chapter>
diff --git a/epan/wslua/CMakeLists.txt b/epan/wslua/CMakeLists.txt
index e65daeccd9..7d29798665 100644
--- a/epan/wslua/CMakeLists.txt
+++ b/epan/wslua/CMakeLists.txt
@@ -25,6 +25,9 @@
set(WSLUA_MODULES
${CMAKE_CURRENT_SOURCE_DIR}/wslua/lua_bitop.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/wslua/lrexlib.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/wslua/lrexlib_glib.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/wslua/lrexlib_glib_f.c
${CMAKE_CURRENT_SOURCE_DIR}/wslua/wslua_int64.c
${CMAKE_CURRENT_SOURCE_DIR}/wslua/wslua_tvb.c
${CMAKE_CURRENT_SOURCE_DIR}/wslua/wslua_proto.c
diff --git a/epan/wslua/Makefile.am b/epan/wslua/Makefile.am
index ec4afd2cd9..0a721bd4fe 100644
--- a/epan/wslua/Makefile.am
+++ b/epan/wslua/Makefile.am
@@ -31,6 +31,9 @@ noinst_LTLIBRARIES = libwslua.la
wslua_modules = \
$(srcdir)/lua_bitop.c \
+ $(srcdir)/lrexlib.c \
+ $(srcdir)/lrexlib_glib.c \
+ $(srcdir)/lrexlib_glib_f.c \
$(srcdir)/wslua_tvb.c \
$(srcdir)/wslua_proto.c \
$(srcdir)/wslua_int64.c \
@@ -83,6 +86,8 @@ MAINTAINERCLEANFILES = \
EXTRA_DIST = \
declare_wslua.h \
lua_bitop.h \
+ lrexlib.h \
+ lrexlib_algo.h \
register_wslua.c \
taps \
make-reg.pl \
@@ -125,6 +130,8 @@ checkapi:
init_wslua.c \
register_wslua.c \
taps_wslua.c \
+ lrexlib_glib.c \
+ lrexlib_glib_f.c \
wslua_dumper.c \
wslua_field.c \
wslua_gui.c \
diff --git a/epan/wslua/Makefile.nmake b/epan/wslua/Makefile.nmake
index 80bb18a64d..aa3e07a038 100644
--- a/epan/wslua/Makefile.nmake
+++ b/epan/wslua/Makefile.nmake
@@ -16,6 +16,9 @@ CFLAGS=$(WARNINGS_ARE_ERRORS) $(STANDARD_CFLAGS) \
MODULES = \
lua_bitop.c \
+ lrexlib.c \
+ lrexlib_glib.c \
+ lrexlib_glib_f.c \
wslua_tvb.c \
wslua_proto.c \
wslua_int64.c \
@@ -34,6 +37,9 @@ OBJECTS= \
register_wslua.obj \
taps_wslua.obj \
lua_bitop.obj \
+ lrexlib.obj \
+ lrexlib_glib.obj \
+ lrexlib_glib_f.obj \
wslua_tvb.obj \
wslua_proto.obj \
wslua_int64.obj \
diff --git a/epan/wslua/lrexlib.c b/epan/wslua/lrexlib.c
new file mode 100644
index 0000000000..21038fa1e3
--- /dev/null
+++ b/epan/wslua/lrexlib.c
@@ -0,0 +1,266 @@
+/* common.c */
+/* See Copyright Notice in the file LICENSE */
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include "lua.h"
+#include "lauxlib.h"
+#include "lrexlib.h"
+
+#define N_ALIGN sizeof(int)
+
+/* the table must be on Lua stack top */
+int get_int_field (lua_State *L, const char* field)
+{
+ int val;
+ lua_getfield (L, -1, field);
+ val = (int) lua_tointeger (L, -1);
+ lua_pop (L, 1);
+ return val;
+}
+
+/* the table must be on Lua stack top */
+void set_int_field (lua_State *L, const char* field, int val)
+{
+ lua_pushinteger (L, val);
+ lua_setfield (L, -2, field);
+}
+
+void *Lmalloc(lua_State *L, size_t size) {
+ void *ud;
+ lua_Alloc lalloc = lua_getallocf(L, &ud);
+ return lalloc(ud, NULL, 0, size);
+}
+
+void *Lrealloc(lua_State *L, void *p, size_t osize, size_t nsize) {
+ void *ud;
+ lua_Alloc lalloc = lua_getallocf(L, &ud);
+ return lalloc(ud, p, osize, nsize);
+}
+
+void Lfree(lua_State *L, void *p, size_t osize) {
+ void *ud;
+ lua_Alloc lalloc = lua_getallocf(L, &ud);
+ lalloc(ud, p, osize, 0);
+}
+
+/* This function fills a table with string-number pairs.
+ The table can be passed as the 1-st lua-function parameter,
+ otherwise it is created. The return value is the filled table.
+*/
+int get_flags (lua_State *L, const flag_pair **arrs) {
+ const flag_pair *p;
+ const flag_pair **pp;
+ int nparams = lua_gettop(L);
+
+ if(nparams == 0)
+ lua_newtable(L);
+ else {
+ if(!lua_istable(L, 1))
+ luaL_argerror(L, 1, "not a table");
+ if(nparams > 1)
+ lua_pushvalue(L, 1);
+ }
+
+ for(pp=arrs; *pp; ++pp) {
+ for(p=*pp; p->key; ++p) {
+ lua_pushstring(L, p->key);
+ lua_pushinteger(L, p->val);
+ lua_rawset(L, -3);
+ }
+ }
+ return 1;
+}
+
+const char *get_flag_key (const flag_pair *fp, int val) {
+ for (; fp->key; ++fp) {
+ if (fp->val == val)
+ return fp->key;
+ }
+ return NULL;
+}
+
+/* Classes */
+
+/*
+ * class TFreeList
+ * ***************
+ * Simple array of pointers to TBuffer's.
+ * The array has fixed capacity (not expanded automatically).
+ */
+
+void freelist_init (TFreeList *fl) {
+ fl->top = 0;
+}
+
+void freelist_add (TFreeList *fl, TBuffer *buf) {
+ fl->list[fl->top++] = buf;
+}
+
+void freelist_free (TFreeList *fl) {
+ while (fl->top > 0)
+ buffer_free (fl->list[--fl->top]);
+}
+
+/*
+ * class TBuffer
+ * *************
+ * Auto-extensible array of characters for building long strings incrementally.
+ * * Differs from luaL_Buffer in that:
+ * * its operations do not change Lua stack top position
+ * * buffer_addvalue does not extract the value from Lua stack
+ * * buffer_pushresult does not have to be the last operation
+ * * Uses TFreeList class:
+ * * for inserting itself into a TFreeList instance for future clean-up
+ * * calls freelist_free prior to calling luaL_error.
+ * * Has specialized "Z-operations" for maintaining mixed string/integer
+ * array: bufferZ_addlstring, bufferZ_addnum and bufferZ_next.
+ * * if the array is intended to be "mixed", then the methods
+ * buffer_addlstring and buffer_addvalue must not be used
+ * (the application will crash on bufferZ_next).
+ * * conversely, if the array is not intended to be "mixed",
+ * then the method bufferZ_next must not be used.
+ */
+
+enum { ID_NUMBER, ID_STRING };
+
+void buffer_init (TBuffer *buf, size_t sz, lua_State *L, TFreeList *fl) {
+ buf->arr = Lmalloc(L, sz);
+ if (!buf->arr) {
+ freelist_free (fl);
+ luaL_error (L, "malloc failed");
+ }
+ buf->size = sz;
+ buf->top = 0;
+ buf->L = L;
+ buf->freelist = fl;
+ freelist_add (fl, buf);
+}
+
+void buffer_free (TBuffer *buf) {
+ Lfree(buf->L, buf->arr, buf->size);
+}
+
+void buffer_clear (TBuffer *buf) {
+ buf->top = 0;
+}
+
+void buffer_pushresult (TBuffer *buf) {
+ lua_pushlstring (buf->L, buf->arr, buf->top);
+}
+
+void buffer_addbuffer (TBuffer *trg, TBuffer *src) {
+ buffer_addlstring (trg, src->arr, src->top);
+}
+
+void buffer_addlstring (TBuffer *buf, const void *src, size_t sz) {
+ size_t newtop = buf->top + sz;
+ if (newtop > buf->size) {
+ char *p = (char*) Lrealloc (buf->L, buf->arr, buf->size, 2 * newtop); /* 2x expansion */
+ if (!p) {
+ freelist_free (buf->freelist);
+ luaL_error (buf->L, "realloc failed");
+ }
+ buf->arr = p;
+ buf->size = 2 * newtop;
+ }
+ if (src)
+ memcpy (buf->arr + buf->top, src, sz);
+ buf->top = newtop;
+}
+
+void buffer_addvalue (TBuffer *buf, int stackpos) {
+ size_t len;
+ const char *p = lua_tolstring (buf->L, stackpos, &len);
+ buffer_addlstring (buf, p, len);
+}
+
+void bufferZ_addlstring (TBuffer *buf, const void *src, size_t len) {
+ int n;
+ size_t header[2] = { ID_STRING };
+ header[1] = len;
+ buffer_addlstring (buf, header, sizeof (header));
+ buffer_addlstring (buf, src, len);
+ n = len % N_ALIGN;
+ if (n) buffer_addlstring (buf, NULL, N_ALIGN - n);
+}
+
+void bufferZ_addnum (TBuffer *buf, size_t num) {
+ size_t header[2] = { ID_NUMBER };
+ header[1] = num;
+ buffer_addlstring (buf, header, sizeof (header));
+}
+
+/* 1. When called repeatedly on the same TBuffer, its existing data
+ is discarded and overwritten by the new data.
+ 2. The TBuffer's array is never shrunk by this function.
+*/
+void bufferZ_putrepstring (TBuffer *BufRep, int reppos, int nsub) {
+ char dbuf[] = { 0, 0 };
+ size_t replen;
+ const char *p = lua_tolstring (BufRep->L, reppos, &replen);
+ const char *end = p + replen;
+ BufRep->top = 0;
+ while (p < end) {
+ const char *q;
+ for (q = p; q < end && *q != '%'; ++q)
+ {}
+ if (q != p)
+ bufferZ_addlstring (BufRep, p, q - p);
+ if (q < end) {
+ if (++q < end) { /* skip % */
+ if (isdigit (*q)) {
+ int num;
+ *dbuf = *q;
+ num = (int) strtol (dbuf, NULL, 10);
+ if (num == 1 && nsub == 0)
+ num = 0;
+ else if (num > nsub) {
+ freelist_free (BufRep->freelist);
+ luaL_error (BufRep->L, "invalid capture index");
+ }
+ bufferZ_addnum (BufRep, num);
+ }
+ else bufferZ_addlstring (BufRep, q, 1);
+ }
+ p = q + 1;
+ }
+ else break;
+ }
+}
+
+/******************************************************************************
+ The intended use of this function is as follows:
+ size_t iter = 0;
+ while (bufferZ_next (buf, &iter, &num, &str)) {
+ if (str) do_something_with_string (str, num);
+ else do_something_with_number (num);
+ }
+*******************************************************************************
+*/
+int bufferZ_next (TBuffer *buf, size_t *iter, size_t *num, const char **str) {
+ if (*iter < buf->top) {
+ size_t *ptr_header = (size_t*)(buf->arr + *iter);
+ *num = ptr_header[1];
+ *iter += 2 * sizeof (size_t);
+ *str = NULL;
+ if (*ptr_header == ID_STRING) {
+ int n;
+ *str = buf->arr + *iter;
+ *iter += *num;
+ n = *iter % N_ALIGN;
+ if (n) *iter += (N_ALIGN - n);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+#if LUA_VERSION_NUM > 501
+int luaL_typerror (lua_State *L, int narg, const char *tname) {
+ const char *msg = lua_pushfstring(L, "%s expected, got %s",
+ tname, luaL_typename(L, narg));
+ return luaL_argerror(L, narg, msg);
+}
+#endif
diff --git a/epan/wslua/lrexlib.h b/epan/wslua/lrexlib.h
new file mode 100644
index 0000000000..ae578034d7
--- /dev/null
+++ b/epan/wslua/lrexlib.h
@@ -0,0 +1,130 @@
+/* common.h */
+/*
+License of Lrexlib release
+--------------------------
+
+Copyright (C) Reuben Thomas 2000-2012
+Copyright (C) Shmuel Zeigerman 2004-2012
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute,
+sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall
+be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
+OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef _LREXLIB_H
+#define _LREXLIB_H
+
+#include "lua.h"
+
+#define VERSION "2.7.2"
+
+#define LREXLIB_WIRESHARK
+
+#if LUA_VERSION_NUM > 501
+ int luaL_typerror (lua_State *L, int narg, const char *tname);
+#endif
+
+/* REX_API can be overridden from the command line or Makefile */
+#ifndef REX_API
+# define REX_API LUALIB_API
+#endif
+
+/* Special values for maxmatch in gsub. They all must be negative. */
+#define GSUB_UNLIMITED -1
+#define GSUB_CONDITIONAL -2
+
+/* Common structs and functions */
+
+typedef struct {
+ const char* key;
+ int val;
+} flag_pair;
+
+typedef struct { /* compile arguments */
+ const char * pattern;
+ size_t patlen;
+ void * ud;
+ int cflags;
+ const char * locale; /* PCRE, Oniguruma */
+ const unsigned char * tables; /* PCRE */
+ int tablespos; /* PCRE */
+ void * syntax; /* Oniguruma */
+ const unsigned char * translate; /* GNU */
+ int gnusyn; /* GNU */
+} TArgComp;
+
+typedef struct { /* exec arguments */
+ const char * text;
+ size_t textlen;
+ int startoffset;
+ int eflags;
+ int funcpos;
+ int maxmatch;
+ int funcpos2; /* used with gsub */
+ int reptype; /* used with gsub */
+ size_t ovecsize; /* PCRE: dfa_exec */
+ size_t wscount; /* PCRE: dfa_exec */
+} TArgExec;
+
+struct tagFreeList; /* forward declaration */
+
+struct tagBuffer {
+ size_t size;
+ size_t top;
+ char * arr;
+ lua_State * L;
+ struct tagFreeList * freelist;
+};
+
+struct tagFreeList {
+ struct tagBuffer * list[16];
+ int top;
+};
+
+typedef struct tagBuffer TBuffer;
+typedef struct tagFreeList TFreeList;
+
+void freelist_init (TFreeList *fl);
+void freelist_add (TFreeList *fl, TBuffer *buf);
+void freelist_free (TFreeList *fl);
+
+void buffer_init (TBuffer *buf, size_t sz, lua_State *L, TFreeList *fl);
+void buffer_free (TBuffer *buf);
+void buffer_clear (TBuffer *buf);
+void buffer_addbuffer (TBuffer *trg, TBuffer *src);
+void buffer_addlstring (TBuffer *buf, const void *src, size_t sz);
+void buffer_addvalue (TBuffer *buf, int stackpos);
+void buffer_pushresult (TBuffer *buf);
+
+void bufferZ_putrepstring (TBuffer *buf, int reppos, int nsub);
+int bufferZ_next (TBuffer *buf, size_t *iter, size_t *len, const char **str);
+void bufferZ_addlstring (TBuffer *buf, const void *src, size_t len);
+void bufferZ_addnum (TBuffer *buf, size_t num);
+
+int get_int_field (lua_State *L, const char* field);
+void set_int_field (lua_State *L, const char* field, int val);
+int get_flags (lua_State *L, const flag_pair **arr);
+const char *get_flag_key (const flag_pair *fp, int val);
+void *Lmalloc (lua_State *L, size_t size);
+void *Lrealloc (lua_State *L, void *p, size_t osize, size_t nsize);
+void Lfree (lua_State *L, void *p, size_t size);
+
+#endif
diff --git a/epan/wslua/lrexlib_algo.h b/epan/wslua/lrexlib_algo.h
new file mode 100644
index 0000000000..4e97c2b1b9
--- /dev/null
+++ b/epan/wslua/lrexlib_algo.h
@@ -0,0 +1,755 @@
+/* lrexlib_algo.h */
+/* See Copyright Notice in the file lrexlib.h */
+
+#include "lrexlib.h"
+
+#ifdef LREXLIB_WIRESHARK
+# define WSLUA_TYPEOF_FIELD "__typeof"
+# define REX_CREATEGLOBALVAR
+#endif
+
+#define REX_VERSION "Lrexlib " VERSION
+
+/* Forward declarations */
+static void gmatch_pushsubject (lua_State *L, TArgExec *argE);
+static int findmatch_exec (TUserdata *ud, TArgExec *argE);
+static int split_exec (TUserdata *ud, TArgExec *argE, int offset);
+static int compile_regex (lua_State *L, const TArgComp *argC, TUserdata **pud);
+static int generate_error (lua_State *L, const TUserdata *ud, int errcode);
+
+#if LUA_VERSION_NUM == 501
+# define ALG_ENVIRONINDEX LUA_ENVIRONINDEX
+#else
+# define ALG_ENVIRONINDEX lua_upvalueindex(1)
+#endif
+
+#ifndef ALG_CHARSIZE
+# define ALG_CHARSIZE 1
+#endif
+
+#ifndef BUFFERZ_PUTREPSTRING
+# define BUFFERZ_PUTREPSTRING bufferZ_putrepstring
+#endif
+
+#ifndef ALG_GETCARGS
+# define ALG_GETCARGS(a,b,c)
+#endif
+
+#ifndef ALG_GETEFLAGS
+# define ALG_GETEFLAGS(L,idx) luaL_optint (L, idx, ALG_EFLAGS_DFLT)
+#endif
+
+#ifndef DO_NAMED_SUBPATTERNS
+#define DO_NAMED_SUBPATTERNS(a,b,c)
+#endif
+
+/* When doing an iterative search, there can occur a situation of a zero-length
+ * match at the current position, that prevents further advance on the subject
+ * string.
+ * There are two ways to handle that (AFAIK):
+ * a) Advance by one character (continue the search from the next position),
+ * or
+ * b) Search for a non-zero-length match that begins from the current
+ * position ("retry" the search). If the match is not found then advance
+ * by one character.
+ * The "b)" seems more correct, but most regex libraries expose no API for that.
+ * The known exception is PCRE that has flags PCRE_NOTEMPTY and PCRE_ANCHORED.
+ */
+#ifdef ALG_USERETRY
+ #define SET_RETRY(a,b) (a=b)
+ static int gsub_exec (TUserdata *ud, TArgExec *argE, int offset, int retry);
+ static int gmatch_exec (TUserdata *ud, TArgExec *argE, int retry);
+ #define GSUB_EXEC gsub_exec
+ #define GMATCH_EXEC gmatch_exec
+#else
+ #define SET_RETRY(a,b) ((void)a)
+ static int gsub_exec (TUserdata *ud, TArgExec *argE, int offset);
+ static int gmatch_exec (TUserdata *ud, TArgExec *argE);
+ #define GSUB_EXEC(a,b,c,d) gsub_exec(a,b,c)
+ #define GMATCH_EXEC(a,b,c) gmatch_exec(a,b)
+#endif
+
+
+#define METHOD_FIND 0
+#define METHOD_MATCH 1
+#define METHOD_EXEC 2
+#define METHOD_TFIND 3
+
+
+static int OptLimit (lua_State *L, int pos) {
+ if (lua_isnoneornil (L, pos))
+ return GSUB_UNLIMITED;
+ if (lua_isfunction (L, pos))
+ return GSUB_CONDITIONAL;
+ if (lua_isnumber (L, pos)) {
+ int a = (int) lua_tointeger (L, pos);
+ return a < 0 ? 0 : a;
+ }
+ return luaL_typerror (L, pos, "number or function");
+}
+
+
+static int get_startoffset(lua_State *L, int stackpos, size_t len) {
+ int startoffset = luaL_optint(L, stackpos, 1);
+ if(startoffset > 0)
+ startoffset--;
+ else if(startoffset < 0) {
+ startoffset += len/ALG_CHARSIZE;
+ if(startoffset < 0)
+ startoffset = 0;
+ }
+ return startoffset*ALG_CHARSIZE;
+}
+
+
+static TUserdata* test_ud (lua_State *L, int pos)
+{
+ TUserdata *ud;
+ if (lua_getmetatable(L, pos) &&
+ lua_rawequal(L, -1, ALG_ENVIRONINDEX) &&
+ (ud = (TUserdata *)lua_touserdata(L, pos)) != NULL) {
+ lua_pop(L, 1);
+ return ud;
+ }
+ return NULL;
+}
+
+
+static TUserdata* check_ud (lua_State *L)
+{
+ TUserdata *ud = test_ud(L, 1);
+ if (ud == NULL) luaL_typerror(L, 1, REX_TYPENAME);
+ return ud;
+}
+
+
+static void check_subject (lua_State *L, int pos, TArgExec *argE)
+{
+ int stype;
+ argE->text = lua_tolstring (L, pos, &argE->textlen);
+ stype = lua_type (L, pos);
+ if (stype != LUA_TSTRING && stype != LUA_TTABLE && stype != LUA_TUSERDATA) {
+ luaL_typerror (L, pos, "string, table or userdata");
+ } else if (argE->text == NULL) {
+ int type;
+ lua_getfield (L, pos, "topointer");
+ if (lua_type (L, -1) != LUA_TFUNCTION)
+ luaL_error (L, "subject has no topointer method");
+ lua_pushvalue (L, pos);
+ lua_call (L, 1, 1);
+ type = lua_type (L, -1);
+ if (type != LUA_TLIGHTUSERDATA)
+ luaL_error (L, "subject's topointer method returned %s (expected lightuserdata)",
+ lua_typename (L, type));
+ argE->text = lua_touserdata (L, -1);
+ lua_pop (L, 1);
+#if LUA_VERSION_NUM == 501
+ lua_objlen (L, pos);
+#else
+ lua_len (L, pos);
+#endif
+ type = lua_type (L, -1);
+ if (type != LUA_TNUMBER)
+ luaL_error (L, "subject's length is %s (expected number)",
+ lua_typename (L, type));
+ argE->textlen = lua_tointeger (L, -1);
+ lua_pop (L, 1);
+ }
+}
+
+static void check_pattern (lua_State *L, int pos, TArgComp *argC)
+{
+ if (lua_isstring (L, pos)) {
+ argC->pattern = lua_tolstring (L, pos, &argC->patlen);
+ argC->ud = NULL;
+ }
+ else if ((argC->ud = test_ud (L, pos)) == NULL)
+ luaL_typerror(L, pos, "string or "REX_TYPENAME);
+}
+
+static void checkarg_new (lua_State *L, TArgComp *argC) {
+ argC->pattern = luaL_checklstring (L, 1, &argC->patlen);
+ argC->cflags = ALG_GETCFLAGS (L, 2);
+ ALG_GETCARGS (L, 3, argC);
+}
+
+
+/* function gsub (s, patt, f, [n], [cf], [ef], [larg...]) */
+static void checkarg_gsub (lua_State *L, TArgComp *argC, TArgExec *argE) {
+ check_subject (L, 1, argE);
+ check_pattern (L, 2, argC);
+ lua_tostring (L, 3); /* converts number (if any) to string */
+ argE->reptype = lua_type (L, 3);
+ if (argE->reptype != LUA_TSTRING && argE->reptype != LUA_TTABLE &&
+ argE->reptype != LUA_TFUNCTION && argE->reptype != LUA_TNIL &&
+ (argE->reptype != LUA_TBOOLEAN ||
+ (argE->reptype == LUA_TBOOLEAN && lua_toboolean (L, 3)))) {
+ luaL_typerror (L, 3, "string, table, function, false or nil");
+ }
+ argE->funcpos = 3;
+ argE->funcpos2 = 4;
+ argE->maxmatch = OptLimit (L, 4);
+ argC->cflags = ALG_GETCFLAGS (L, 5);
+ argE->eflags = ALG_GETEFLAGS (L, 6);
+ ALG_GETCARGS (L, 7, argC);
+}
+
+
+/* function find (s, patt, [st], [cf], [ef], [larg...]) */
+/* function match (s, patt, [st], [cf], [ef], [larg...]) */
+static void checkarg_find_func (lua_State *L, TArgComp *argC, TArgExec *argE) {
+ check_subject (L, 1, argE);
+ check_pattern (L, 2, argC);
+ argE->startoffset = get_startoffset (L, 3, argE->textlen);
+ argC->cflags = ALG_GETCFLAGS (L, 4);
+ argE->eflags = ALG_GETEFLAGS (L, 5);
+ ALG_GETCARGS (L, 6, argC);
+}
+
+
+/* function gmatch (s, patt, [cf], [ef], [larg...]) */
+/* function split (s, patt, [cf], [ef], [larg...]) */
+static void checkarg_gmatch_split (lua_State *L, TArgComp *argC, TArgExec *argE) {
+ check_subject (L, 1, argE);
+ check_pattern (L, 2, argC);
+ argC->cflags = ALG_GETCFLAGS (L, 3);
+ argE->eflags = ALG_GETEFLAGS (L, 4);
+ ALG_GETCARGS (L, 5, argC);
+}
+
+
+/* method r:tfind (s, [st], [ef]) */
+/* method r:exec (s, [st], [ef]) */
+/* method r:find (s, [st], [ef]) */
+/* method r:match (s, [st], [ef]) */
+static void checkarg_find_method (lua_State *L, TArgExec *argE, TUserdata **ud) {
+ *ud = check_ud (L);
+ check_subject (L, 2, argE);
+ argE->startoffset = get_startoffset (L, 3, argE->textlen);
+ argE->eflags = ALG_GETEFLAGS (L, 4);
+}
+
+
+static int algf_new (lua_State *L) {
+ TArgComp argC;
+ checkarg_new (L, &argC);
+ return compile_regex (L, &argC, NULL);
+}
+
+static void push_substrings (lua_State *L, TUserdata *ud, const char *text,
+ TFreeList *freelist) {
+ int i;
+ if (lua_checkstack (L, ALG_NSUB(ud)) == 0) {
+ if (freelist)
+ freelist_free (freelist);
+ luaL_error (L, "cannot add %d stack slots", ALG_NSUB(ud));
+ }
+ for (i = 1; i <= ALG_NSUB(ud); i++) {
+ ALG_PUSHSUB_OR_FALSE (L, ud, text, i);
+ }
+}
+
+static int algf_gsub (lua_State *L) {
+ TUserdata *ud;
+ TArgComp argC;
+ TArgExec argE;
+ int n_match = 0, n_subst = 0, st = 0, retry;
+ TBuffer BufOut, BufRep, BufTemp, *pBuf = &BufOut;
+ TFreeList freelist;
+ /*------------------------------------------------------------------*/
+ checkarg_gsub (L, &argC, &argE);
+ if (argC.ud) {
+ ud = (TUserdata*) argC.ud;
+ lua_pushvalue (L, 2);
+ }
+ else compile_regex (L, &argC, &ud);
+ freelist_init (&freelist);
+ /*------------------------------------------------------------------*/
+ if (argE.reptype == LUA_TSTRING) {
+ buffer_init (&BufRep, 256, L, &freelist);
+ BUFFERZ_PUTREPSTRING (&BufRep, argE.funcpos, ALG_NSUB(ud));
+ }
+ /*------------------------------------------------------------------*/
+ if (argE.maxmatch == GSUB_CONDITIONAL) {
+ buffer_init (&BufTemp, 1024, L, &freelist);
+ pBuf = &BufTemp;
+ }
+ /*------------------------------------------------------------------*/
+ buffer_init (&BufOut, 1024, L, &freelist);
+ SET_RETRY (retry, 0);
+ while ((argE.maxmatch < 0 || n_match < argE.maxmatch) && st <= (int)argE.textlen) {
+ int from, to, res;
+ int curr_subst = 0;
+ res = GSUB_EXEC (ud, &argE, st, retry);
+ if (ALG_NOMATCH (res)) {
+#ifdef ALG_USERETRY
+ if (retry) {
+ if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */
+ buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE);
+ st += ALG_CHARSIZE;
+ retry = 0;
+ continue;
+ }
+ }
+#endif
+ break;
+ }
+ else if (!ALG_ISMATCH (res)) {
+ freelist_free (&freelist);
+ return generate_error (L, ud, res);
+ }
+ ++n_match;
+ from = ALG_BASE(st) + ALG_SUBBEG(ud,0);
+ to = ALG_BASE(st) + ALG_SUBEND(ud,0);
+ if (st < from) {
+ buffer_addlstring (&BufOut, argE.text + st, from - st);
+#ifdef ALG_PULL
+ st = from;
+#endif
+ }
+ /*----------------------------------------------------------------*/
+ if (argE.reptype == LUA_TSTRING) {
+ size_t iter = 0, num;
+ const char *str;
+ while (bufferZ_next (&BufRep, &iter, &num, &str)) {
+ if (str)
+ buffer_addlstring (pBuf, str, num);
+ else if (num == 0 || ALG_SUBVALID (ud,(int)num))
+ buffer_addlstring (pBuf, argE.text + ALG_BASE(st) + ALG_SUBBEG(ud,(int)num), ALG_SUBLEN(ud,(int)num));
+ }
+ curr_subst = 1;
+ }
+ /*----------------------------------------------------------------*/
+ else if (argE.reptype == LUA_TTABLE) {
+ if (ALG_NSUB(ud) > 0)
+ ALG_PUSHSUB_OR_FALSE (L, ud, argE.text + ALG_BASE(st), 1);
+ else
+ lua_pushlstring (L, argE.text + from, to - from);
+ lua_gettable (L, argE.funcpos);
+ }
+ /*----------------------------------------------------------------*/
+ else if (argE.reptype == LUA_TFUNCTION) {
+ int narg;
+ lua_pushvalue (L, argE.funcpos);
+ if (ALG_NSUB(ud) > 0) {
+ push_substrings (L, ud, argE.text + ALG_BASE(st), &freelist);
+ narg = ALG_NSUB(ud);
+ }
+ else {
+ lua_pushlstring (L, argE.text + from, to - from);
+ narg = 1;
+ }
+ if (0 != lua_pcall (L, narg, 1, 0)) {
+ freelist_free (&freelist);
+ return lua_error (L); /* re-raise the error */
+ }
+ }
+ /*----------------------------------------------------------------*/
+ else if (argE.reptype == LUA_TNIL || argE.reptype == LUA_TBOOLEAN) {
+ buffer_addlstring (pBuf, argE.text + from, to - from);
+ }
+ /*----------------------------------------------------------------*/
+ if (argE.reptype == LUA_TTABLE || argE.reptype == LUA_TFUNCTION) {
+ if (lua_tostring (L, -1)) {
+ buffer_addvalue (pBuf, -1);
+ curr_subst = 1;
+ }
+ else if (!lua_toboolean (L, -1))
+ buffer_addlstring (pBuf, argE.text + from, to - from);
+ else {
+ freelist_free (&freelist);
+ luaL_error (L, "invalid replacement value (a %s)", luaL_typename (L, -1));
+ }
+ if (argE.maxmatch != GSUB_CONDITIONAL)
+ lua_pop (L, 1);
+ }
+ /*----------------------------------------------------------------*/
+ if (argE.maxmatch == GSUB_CONDITIONAL) {
+ /* Call the function */
+ lua_pushvalue (L, argE.funcpos2);
+ lua_pushinteger (L, from/ALG_CHARSIZE + 1);
+ lua_pushinteger (L, to/ALG_CHARSIZE);
+ if (argE.reptype == LUA_TSTRING)
+ buffer_pushresult (&BufTemp);
+ else {
+ lua_pushvalue (L, -4);
+ lua_remove (L, -5);
+ }
+ if (0 != lua_pcall (L, 3, 2, 0)) {
+ freelist_free (&freelist);
+ lua_error (L); /* re-raise the error */
+ }
+ /* Handle the 1-st return value */
+ if (lua_isstring (L, -2)) { /* coercion is allowed here */
+ buffer_addvalue (&BufOut, -2); /* rep2 */
+ curr_subst = 1;
+ }
+ else if (lua_toboolean (L, -2))
+ buffer_addbuffer (&BufOut, &BufTemp); /* rep1 */
+ else {
+ buffer_addlstring (&BufOut, argE.text + from, to - from); /* "no" */
+ curr_subst = 0;
+ }
+ /* Handle the 2-nd return value */
+ if (lua_type (L, -1) == LUA_TNUMBER) { /* no coercion is allowed here */
+ int n = (int) lua_tointeger (L, -1);
+ if (n < 0) /* n */
+ n = 0;
+ argE.maxmatch = n_match + n;
+ }
+ else if (lua_toboolean (L, -1)) /* "yes to all" */
+ argE.maxmatch = GSUB_UNLIMITED;
+ else
+ buffer_clear (&BufTemp);
+
+ lua_pop (L, 2);
+ if (argE.maxmatch != GSUB_CONDITIONAL)
+ pBuf = &BufOut;
+ }
+ /*----------------------------------------------------------------*/
+ n_subst += curr_subst;
+ if (st < to) {
+ st = to;
+ SET_RETRY (retry, 0);
+ }
+ else if (st < (int)argE.textlen) {
+#ifdef ALG_USERETRY
+ retry = 1;
+#else
+ /* advance by 1 char (not replaced) */
+ buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE);
+ st += ALG_CHARSIZE;
+#endif
+ }
+ else break;
+ }
+ /*------------------------------------------------------------------*/
+ buffer_addlstring (&BufOut, argE.text + st, argE.textlen - st);
+ buffer_pushresult (&BufOut);
+ lua_pushinteger (L, n_match);
+ lua_pushinteger (L, n_subst);
+ freelist_free (&freelist);
+ return 3;
+}
+
+
+static int finish_generic_find (lua_State *L, TUserdata *ud, TArgExec *argE,
+ int method, int res)
+{
+ if (ALG_ISMATCH (res)) {
+ if (method == METHOD_FIND)
+ ALG_PUSHOFFSETS (L, ud, ALG_BASE(argE->startoffset), 0);
+ if (ALG_NSUB(ud)) /* push captures */
+ push_substrings (L, ud, argE->text, NULL);
+ else if (method != METHOD_FIND) {
+ ALG_PUSHSUB (L, ud, argE->text, 0);
+ return 1;
+ }
+ return (method == METHOD_FIND) ? ALG_NSUB(ud) + 2 : ALG_NSUB(ud);
+ }
+ else if (ALG_NOMATCH (res))
+ return lua_pushnil (L), 1;
+ else
+ return generate_error (L, ud, res);
+}
+
+
+static int generic_find_func (lua_State *L, int method) {
+ TUserdata *ud;
+ TArgComp argC;
+ TArgExec argE;
+ int res;
+
+ checkarg_find_func (L, &argC, &argE);
+ if (argE.startoffset > (int)argE.textlen)
+ return lua_pushnil (L), 1;
+
+ if (argC.ud) {
+ ud = (TUserdata*) argC.ud;
+ lua_pushvalue (L, 2);
+ }
+ else compile_regex (L, &argC, &ud);
+ res = findmatch_exec (ud, &argE);
+ return finish_generic_find (L, ud, &argE, method, res);
+}
+
+
+static int algf_find (lua_State *L) {
+ return generic_find_func (L, METHOD_FIND);
+}
+
+
+static int algf_match (lua_State *L) {
+ return generic_find_func (L, METHOD_MATCH);
+}
+
+
+static int gmatch_iter (lua_State *L) {
+ int retry;
+ TArgExec argE;
+ TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1));
+ argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen);
+ argE.eflags = (int) lua_tointeger (L, lua_upvalueindex (3));
+ argE.startoffset = (int) lua_tointeger (L, lua_upvalueindex (4));
+#ifdef ALG_USERETRY
+ retry = (int) lua_tointeger (L, lua_upvalueindex (5));
+#endif
+
+ if (argE.startoffset > (int)argE.textlen)
+ return 0;
+
+ while (1) {
+ int res = GMATCH_EXEC (ud, &argE, retry);
+ if (ALG_ISMATCH (res)) {
+ int incr = 0;
+ if (ALG_SUBLEN(ud,0)) {
+ SET_RETRY (retry, 0);
+ }
+ else { /* no progress: prevent endless loop */
+#ifdef ALG_USERETRY
+ SET_RETRY (retry, 1);
+#else
+ incr = ALG_CHARSIZE;
+#endif
+ }
+ lua_pushinteger(L, ALG_BASE(argE.startoffset) + incr + ALG_SUBEND(ud,0)); /* update start offset */
+ lua_replace (L, lua_upvalueindex (4));
+#ifdef ALG_USERETRY
+ lua_pushinteger (L, retry);
+ lua_replace (L, lua_upvalueindex (5)); /* update retry */
+#endif
+ /* push either captures or entire match */
+ if (ALG_NSUB(ud)) {
+ push_substrings (L, ud, argE.text, NULL);
+ return ALG_NSUB(ud);
+ }
+ else {
+ ALG_PUSHSUB (L, ud, argE.text, 0);
+ return 1;
+ }
+ }
+ else if (ALG_NOMATCH (res)) {
+#ifdef ALG_USERETRY
+ if (retry) {
+ if (argE.startoffset < (int)argE.textlen) {
+ ++argE.startoffset; /* advance by 1 char */
+ SET_RETRY (retry, 0);
+ continue;
+ }
+ }
+#endif
+ return 0;
+ }
+ else
+ return generate_error (L, ud, res);
+ }
+}
+
+
+static int split_iter (lua_State *L) {
+ int incr, newoffset, res;
+ TArgExec argE;
+ TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1));
+ argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen);
+ argE.eflags = (int) lua_tointeger (L, lua_upvalueindex (3));
+ argE.startoffset = (int) lua_tointeger (L, lua_upvalueindex (4));
+ incr = (int) lua_tointeger (L, lua_upvalueindex (5));
+
+ if (argE.startoffset > (int)argE.textlen)
+ return 0;
+
+ if ((newoffset = argE.startoffset + incr) > (int)argE.textlen)
+ goto nomatch;
+
+ res = split_exec (ud, &argE, newoffset);
+ if (ALG_ISMATCH (res)) {
+ lua_pushinteger(L, ALG_BASE(newoffset) + ALG_SUBEND(ud,0)); /* update start offset */
+ lua_replace (L, lua_upvalueindex (4));
+ lua_pushinteger (L, ALG_SUBLEN(ud,0) ? 0 : ALG_CHARSIZE); /* update incr */
+ lua_replace (L, lua_upvalueindex (5));
+ /* push text preceding the match */
+ lua_pushlstring (L, argE.text + argE.startoffset,
+ ALG_SUBBEG(ud,0) + ALG_BASE(newoffset) - argE.startoffset);
+ /* push either captures or entire match */
+ if (ALG_NSUB(ud)) {
+ push_substrings (L, ud, argE.text + ALG_BASE(newoffset), NULL);
+ return 1 + ALG_NSUB(ud);
+ }
+ else {
+ ALG_PUSHSUB (L, ud, argE.text + ALG_BASE(newoffset), 0);
+ return 2;
+ }
+ }
+ else if (ALG_NOMATCH (res))
+ goto nomatch;
+ else
+ return generate_error (L, ud, res);
+
+nomatch:
+ lua_pushinteger (L, argE.textlen + 1); /* mark as last iteration */
+ lua_replace (L, lua_upvalueindex (4)); /* update start offset */
+ lua_pushlstring (L, argE.text+argE.startoffset, argE.textlen-argE.startoffset);
+ return 1;
+}
+
+
+static int algf_gmatch (lua_State *L)
+{
+ TArgComp argC;
+ TArgExec argE;
+ TUserdata *ud;
+ checkarg_gmatch_split (L, &argC, &argE);
+ if (argC.ud) {
+ ud = (TUserdata*) argC.ud;
+ lua_pushvalue (L, 2);
+ }
+ else compile_regex (L, &argC, &ud); /* 1-st upvalue: ud */
+ gmatch_pushsubject (L, &argE); /* 2-nd upvalue: s */
+ lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */
+ lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */
+#ifdef ALG_USERETRY
+ lua_pushinteger (L, 0); /* 5-th upvalue: retry */
+ lua_pushcclosure (L, gmatch_iter, 5);
+#else
+ lua_pushcclosure (L, gmatch_iter, 4);
+#endif
+ return 1;
+}
+
+static int algf_split (lua_State *L)
+{
+ TArgComp argC;
+ TArgExec argE;
+ TUserdata *ud;
+ checkarg_gmatch_split (L, &argC, &argE);
+ if (argC.ud) {
+ ud = (TUserdata*) argC.ud;
+ lua_pushvalue (L, 2);
+ }
+ else compile_regex (L, &argC, &ud); /* 1-st upvalue: ud */
+ gmatch_pushsubject (L, &argE); /* 2-nd upvalue: s */
+ lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */
+ lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */
+ lua_pushinteger (L, 0); /* 5-th upvalue: incr */
+ lua_pushcclosure (L, split_iter, 5);
+ return 1;
+}
+
+
+static void push_substring_table (lua_State *L, TUserdata *ud, const char *text) {
+ int i;
+ lua_newtable (L);
+ for (i = 1; i <= ALG_NSUB(ud); i++) {
+ ALG_PUSHSUB_OR_FALSE (L, ud, text, i);
+ lua_rawseti (L, -2, i);
+ }
+}
+
+
+static void push_offset_table (lua_State *L, TUserdata *ud, int startoffset) {
+ int i, j;
+ lua_newtable (L);
+ for (i=1, j=1; i <= ALG_NSUB(ud); i++) {
+ if (ALG_SUBVALID (ud,i)) {
+ ALG_PUSHSTART (L, ud, startoffset, i);
+ lua_rawseti (L, -2, j++);
+ ALG_PUSHEND (L, ud, startoffset, i);
+ lua_rawseti (L, -2, j++);
+ }
+ else {
+ lua_pushboolean (L, 0);
+ lua_rawseti (L, -2, j++);
+ lua_pushboolean (L, 0);
+ lua_rawseti (L, -2, j++);
+ }
+ }
+}
+
+
+static int generic_find_method (lua_State *L, int method) {
+ TUserdata *ud;
+ TArgExec argE;
+ int res;
+
+ checkarg_find_method (L, &argE, &ud);
+ if (argE.startoffset > (int)argE.textlen)
+ return lua_pushnil(L), 1;
+
+ res = findmatch_exec (ud, &argE);
+ if (ALG_ISMATCH (res)) {
+ switch (method) {
+ case METHOD_EXEC:
+ ALG_PUSHOFFSETS (L, ud, ALG_BASE(argE.startoffset), 0);
+ push_offset_table (L, ud, ALG_BASE(argE.startoffset));
+ DO_NAMED_SUBPATTERNS (L, ud, argE.text);
+ return 3;
+ case METHOD_TFIND:
+ ALG_PUSHOFFSETS (L, ud, ALG_BASE(argE.startoffset), 0);
+ push_substring_table (L, ud, argE.text);
+ DO_NAMED_SUBPATTERNS (L, ud, argE.text);
+ return 3;
+ case METHOD_MATCH:
+ case METHOD_FIND:
+ return finish_generic_find (L, ud, &argE, method, res);
+ }
+ return 0;
+ }
+ else if (ALG_NOMATCH (res))
+ return lua_pushnil (L), 1;
+ else
+ return generate_error(L, ud, res);
+}
+
+
+static int algm_find (lua_State *L) {
+ return generic_find_method (L, METHOD_FIND);
+}
+static int algm_match (lua_State *L) {
+ return generic_find_method (L, METHOD_MATCH);
+}
+static int algm_tfind (lua_State *L) {
+ return generic_find_method (L, METHOD_TFIND);
+}
+static int algm_exec (lua_State *L) {
+ return generic_find_method (L, METHOD_EXEC);
+}
+
+static void alg_register (lua_State *L, const luaL_Reg *r_methods,
+ const luaL_Reg *r_functions, const char *name) {
+ /* Create a new function environment to serve as a metatable for methods. */
+#if LUA_VERSION_NUM == 501
+ lua_newtable (L);
+ lua_pushvalue (L, -1);
+ lua_replace (L, LUA_ENVIRONINDEX);
+ luaL_register (L, NULL, r_methods);
+#else
+ luaL_newmetatable(L, REX_TYPENAME);
+ lua_pushvalue(L, -1);
+ luaL_setfuncs (L, r_methods, 1);
+#endif
+#ifdef LREXLIB_WIRESHARK
+ lua_pushstring(L, REX_LIBNAME);
+ lua_setfield(L, -2, WSLUA_TYPEOF_FIELD);
+#endif
+ lua_pushvalue(L, -1); /* mt.__index = mt */
+ lua_setfield(L, -2, "__index");
+
+ /* Register functions. */
+ lua_createtable(L, 0, 8);
+#if LUA_VERSION_NUM == 501
+ luaL_register (L, NULL, r_functions);
+#else
+ lua_pushvalue(L, -2);
+ luaL_setfuncs (L, r_functions, 1);
+#endif
+#ifdef REX_CREATEGLOBALVAR
+ lua_pushvalue(L, -1);
+ lua_setglobal(L, REX_LIBNAME);
+#endif
+#ifdef LREXLIB_WIRESHARK
+ lua_pushstring(L, REX_LIBNAME);
+ lua_setfield(L, -2, WSLUA_TYPEOF_FIELD);
+#endif
+ lua_pushfstring (L, REX_VERSION" (for %s)", name);
+ lua_setfield (L, -2, "_VERSION");
+}
diff --git a/epan/wslua/lrexlib_glib.c b/epan/wslua/lrexlib_glib.c
new file mode 100644
index 0000000000..01b60de133
--- /dev/null
+++ b/epan/wslua/lrexlib_glib.c
@@ -0,0 +1,414 @@
+/* lrexlib_glib.c - Lua binding of GLib Regex library */
+/* See Copyright Notice in the file lrexlib.h */
+
+/* This is similar to Lrexlib's PCRE implementation, but has been changed
+ * for GLib's pcre implementation, which is different.
+ *
+ * The changes made by me, Hadriel Kaplan, are in the Public Domain, or
+ * under the MIT license if your country does not allow Public Domain.
+ *
+ * Changes relative to Lrelxib-PCRE:
+ * - No chartables or locale handling
+ * - dfa_exec doesn't take 'ovecsize' nor 'wscount' args
+ * - dfa_exec returns boolean true for partial match, without subcapture info
+ * - named subgroups do not return a table of name-keyed entries, because
+ * GLib doesn't provide a way to learn that information
+ * - there is no 'config()' function, since GLib doesn't offer such info
+ * - the 'flags()' function still works, returning all flags, but two new
+ * functions 'compile_flags()' and 'match_flags()' return just their respective
+ * flags, since GLib has a different and smaller set of such flags, for
+ * regex compile vs. match functions
+ * - Using POSIX character classes against strings with non-ASCII characters
+ * might match high-order characters, because glib always sets PCRE_UCP
+ * even if G_REGEX_RAW is set. For example, '[:alpha;]' and '\w' match certain
+ * non-ASCII bytes.
+ * - obviously quite a bit else is changed to interface to GLib's regex instead
+ * of PCRE, but hopefully those changes aren't visible to user/caller
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <locale.h>
+#include <ctype.h>
+#include <glib.h>
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lrexlib.h"
+
+extern int Gregex_get_flags (lua_State *L);
+extern int Gregex_get_compile_flags (lua_State *L);
+extern int Gregex_get_match_flags (lua_State *L);
+extern flag_pair gregex_error_flags[];
+
+/* These 2 settings may be redefined from the command-line or the makefile.
+ * They should be kept in sync between themselves and with the target name.
+ */
+#ifndef REX_LIBNAME
+# ifdef LREXLIB_WIRESHARK
+# define REX_LIBNAME "GRegex"
+# else
+# define REX_LIBNAME "rex_glib"
+# endif
+#endif
+#ifndef REX_OPENLIB
+# define REX_OPENLIB luaopen_rex_glib
+#endif
+
+#define REX_TYPENAME REX_LIBNAME"_regex"
+
+#define ALG_CFLAGS_DFLT G_REGEX_RAW
+#define ALG_EFLAGS_DFLT 0
+
+static int getcflags (lua_State *L, int pos);
+#define ALG_GETCFLAGS(L,pos) getcflags(L, pos)
+
+#define ALG_NOMATCH(res) ((res) == FALSE)
+#define ALG_ISMATCH(res) ((res) == TRUE)
+#define ALG_SUBBEG(ud,n) getSubStartPos(ud,n)
+#define ALG_SUBEND(ud,n) getSubEndPos(ud,n)
+#define ALG_SUBLEN(ud,n) (ALG_SUBEND(ud,n) - ALG_SUBBEG(ud,n))
+#define ALG_SUBVALID(ud,n) (ALG_SUBBEG(ud,n) >= 0)
+#define ALG_NSUB(ud) ((int) g_regex_get_capture_count(ud->pr))
+
+#define ALG_PUSHSUB(L,ud,text,n) \
+ lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n))
+
+#define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \
+ (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0))
+
+#define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1)
+#define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n))
+#define ALG_PUSHOFFSETS(L,ud,offs,n) \
+ (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n))
+
+#define ALG_BASE(st) 0
+#define ALG_PULL
+/* we define ALG_USERETRY because GLib does expose PCRE's NOTEMPTY and ANCHORED flags */
+#define ALG_USERETRY
+
+#define VERSION_GLIB (GLIB_MAJOR_VERSION*100 + GLIB_MINOR_VERSION)
+/* unfortunately GLib doesn't expose cerrtain macros it would be nice to have */
+#if VERSION_GLIB >= 234
+# define G_REGEX_COMPILE_MASK_234 (G_REGEX_FIRSTLINE | \
+ G_REGEX_NEWLINE_ANYCRLF | \
+ G_REGEX_BSR_ANYCRLF | \
+ G_REGEX_JAVASCRIPT_COMPAT)
+#else
+# define G_REGEX_COMPILE_MASK_234 0
+#endif
+
+/* Mask of all the possible values for GRegexCompileFlags. */
+#define G_REGEX_COMPILE_MASK (G_REGEX_CASELESS | \
+ G_REGEX_MULTILINE | \
+ G_REGEX_DOTALL | \
+ G_REGEX_EXTENDED | \
+ G_REGEX_ANCHORED | \
+ G_REGEX_DOLLAR_ENDONLY | \
+ G_REGEX_UNGREEDY | \
+ G_REGEX_RAW | \
+ G_REGEX_NO_AUTO_CAPTURE | \
+ G_REGEX_OPTIMIZE | \
+ G_REGEX_DUPNAMES | \
+ G_REGEX_NEWLINE_CR | \
+ G_REGEX_NEWLINE_LF | \
+ G_REGEX_NEWLINE_CRLF | \
+ G_REGEX_COMPILE_MASK_234)
+
+#if VERSION_GLIB >= 234
+# define G_REGEX_MATCH_MASK_234 (G_REGEX_MATCH_NEWLINE_ANYCRLF | \
+ G_REGEX_MATCH_BSR_ANYCRLF | \
+ G_REGEX_MATCH_BSR_ANY | \
+ G_REGEX_MATCH_PARTIAL_SOFT | \
+ G_REGEX_MATCH_PARTIAL_HARD | \
+ G_REGEX_MATCH_NOTEMPTY_ATSTART)
+#else
+# define G_REGEX_MATCH_MASK_234 0
+#endif
+
+/* Mask of all the possible values for GRegexMatchFlags. */
+#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED | \
+ G_REGEX_MATCH_NOTBOL | \
+ G_REGEX_MATCH_NOTEOL | \
+ G_REGEX_MATCH_NOTEMPTY | \
+ G_REGEX_MATCH_PARTIAL | \
+ G_REGEX_MATCH_NEWLINE_CR | \
+ G_REGEX_MATCH_NEWLINE_LF | \
+ G_REGEX_MATCH_NEWLINE_CRLF | \
+ G_REGEX_MATCH_NEWLINE_ANY)
+
+
+static int check_eflags(lua_State *L, const int idx, const int def);
+#define ALG_GETEFLAGS(L,idx) check_eflags(L, idx, ALG_EFLAGS_DFLT)
+
+typedef struct {
+ GRegex * pr;
+ GMatchInfo * match_info;
+ GError * error; /* didn't want to put this here, but can't free it otherwise */
+ int freed;
+} TGrgx;
+
+static void minfo_free(TGrgx* ud) {
+ if (ud->match_info)
+ g_match_info_free (ud->match_info);
+ ud->match_info = NULL;
+}
+
+static void gerror_free(TGrgx* ud) {
+ if (ud->error)
+ g_error_free (ud->error);
+ ud->error = NULL;
+}
+
+static int getSubStartPos(TGrgx* ud, int n) {
+ int start_pos = -1;
+ g_match_info_fetch_pos (ud->match_info, n, &start_pos, NULL);
+ return start_pos;
+}
+
+static int getSubEndPos(TGrgx* ud, int n) {
+ int end_pos = -1;
+ g_match_info_fetch_pos (ud->match_info, n, NULL, &end_pos);
+ return end_pos;
+}
+
+#define TUserdata TGrgx
+
+/* TODO: handle named subpatterns somehow */
+#if 0
+static void do_named_subpatterns (lua_State *L, TGrgx *ud, const char *text);
+# define DO_NAMED_SUBPATTERNS do_named_subpatterns
+#endif
+
+#include "lrexlib_algo.h"
+
+/* Functions
+ ******************************************************************************
+ */
+
+static int getcflags (lua_State *L, int pos) {
+ switch (lua_type (L, pos)) {
+ case LUA_TNONE:
+ case LUA_TNIL:
+ return ALG_CFLAGS_DFLT;
+ case LUA_TNUMBER: {
+ int res = (int) lua_tointeger (L, pos);
+ if ((res & ~G_REGEX_COMPILE_MASK) != 0) {
+ return luaL_error (L, "GLib Regex compile flag is invalid");
+ }
+ return res;
+ }
+ case LUA_TSTRING: {
+ const char *s = lua_tostring (L, pos);
+ int res = 0, ch;
+ while ((ch = *s++) != '\0') {
+ if (ch == 'i') res |= G_REGEX_CASELESS;
+ else if (ch == 'm') res |= G_REGEX_MULTILINE;
+ else if (ch == 's') res |= G_REGEX_DOTALL;
+ else if (ch == 'x') res |= G_REGEX_EXTENDED;
+ else if (ch == 'U') res |= G_REGEX_UNGREEDY;
+ }
+ return (int)res;
+ }
+ default:
+ return luaL_typerror (L, pos, "number or string");
+ }
+}
+
+static int check_eflags(lua_State *L, const int idx, const int def) {
+ int eflags = luaL_optint (L, idx, def);
+ if ((eflags & ~G_REGEX_MATCH_MASK) != 0) {
+ return luaL_error (L, "GLib Regex match flag is invalid");
+ }
+ return eflags;
+}
+
+/* this function is used in algo.h as well */
+static int generate_error (lua_State *L, const TGrgx *ud, int errcode) {
+ const char *key = get_flag_key (gregex_error_flags, ud->error->code);
+ (void) errcode;
+ if (key)
+ return luaL_error (L, "error G_REGEX_%s (%s)", key, ud->error->message);
+ else
+ return luaL_error (L, "GLib Regex error: %s (code %d)", ud->error->message, ud->error->code);
+}
+
+
+static int compile_regex (lua_State *L, const TArgComp *argC, TGrgx **pud) {
+ TGrgx *ud;
+
+ ud = (TGrgx*)lua_newuserdata (L, sizeof (TGrgx));
+ memset (ud, 0, sizeof (TGrgx)); /* initialize all members to 0 */
+ lua_pushvalue (L, ALG_ENVIRONINDEX);
+ lua_setmetatable (L, -2);
+
+ ud->pr = g_regex_new (argC->pattern, argC->cflags | G_REGEX_RAW, 0, &ud->error);
+ if (!ud->pr)
+ return luaL_error (L, "%s (code: %d)", ud->error->message, ud->error->code);
+
+ if (pud) *pud = ud;
+ return 1;
+}
+
+/* method r:dfa_exec (s, [st], [ef]) */
+static void checkarg_dfa_exec (lua_State *L, TArgExec *argE, TGrgx **ud) {
+ *ud = check_ud (L);
+ argE->text = luaL_checklstring (L, 2, &argE->textlen);
+ argE->startoffset = get_startoffset (L, 3, argE->textlen);
+ argE->eflags = ALG_GETEFLAGS (L, 4);
+}
+
+/* unlike PCRE, partial matching won't return the actual substrings/matches */
+static int Gregex_dfa_exec (lua_State *L)
+{
+ TArgExec argE;
+ TGrgx *ud;
+ gboolean res;
+
+ checkarg_dfa_exec (L, &argE, &ud);
+
+ gerror_free (ud);
+
+ res = g_regex_match_all_full (ud->pr, argE.text, (int)argE.textlen,
+ argE.startoffset, argE.eflags, &ud->match_info, &ud->error);
+
+ if (ALG_ISMATCH (res)) {
+ int i, start_pos, end_pos;
+ int max = g_match_info_get_match_count (ud->match_info);
+ g_match_info_fetch_pos (ud->match_info, 0, &start_pos, NULL);
+ lua_pushinteger (L, start_pos + 1); /* 1-st return value */
+ lua_newtable (L); /* 2-nd return value */
+ for (i=0; i<max; i++) {
+ g_match_info_fetch_pos (ud->match_info, i, NULL, &end_pos);
+ /* I don't know why these offsets aren't incremented by 1 to match Lua indexing? */
+ lua_pushinteger (L, end_pos);
+ lua_rawseti (L, -2, i+1);
+ }
+ lua_pushinteger (L, max); /* 3-rd return value */
+ minfo_free (ud);
+ return 3;
+ }
+ else if (g_match_info_is_partial_match(ud->match_info)) {
+ lua_pushboolean(L,1);
+ minfo_free (ud);
+ return 1;
+ }
+ else {
+ minfo_free (ud);
+ if (ALG_NOMATCH (res))
+ return lua_pushnil (L), 1;
+ else
+ return generate_error (L, ud, 0);
+ }
+}
+
+#ifdef ALG_USERETRY
+ static int gmatch_exec (TUserdata *ud, TArgExec *argE, int retry) {
+ minfo_free (ud);
+ gerror_free (ud);
+ int eflags = retry ? (argE->eflags|G_REGEX_MATCH_NOTEMPTY|G_REGEX_MATCH_ANCHORED) : argE->eflags;
+ return g_regex_match_full (ud->pr, argE->text, argE->textlen,
+ argE->startoffset, eflags, &ud->match_info, &ud->error);
+ }
+#else
+ static int gmatch_exec (TUserdata *ud, TArgExec *argE) {
+ minfo_free (ud);
+ gerror_free (ud);
+ return g_regex_match_full (ud->pr, argE->text, argE->textlen,
+ argE->startoffset, argE->eflags, &ud->match_info, &ud->error);
+ }
+#endif
+
+static void gmatch_pushsubject (lua_State *L, TArgExec *argE) {
+ lua_pushlstring (L, argE->text, argE->textlen);
+}
+
+static int findmatch_exec (TGrgx *ud, TArgExec *argE) {
+ minfo_free (ud);
+ gerror_free (ud);
+ return g_regex_match_full (ud->pr, argE->text, argE->textlen,
+ argE->startoffset, argE->eflags, &ud->match_info, &ud->error);
+}
+
+#ifdef ALG_USERETRY
+ static int gsub_exec (TGrgx *ud, TArgExec *argE, int st, int retry) {
+ minfo_free (ud);
+ gerror_free (ud);
+ int eflags = retry ? (argE->eflags|G_REGEX_MATCH_NOTEMPTY|G_REGEX_MATCH_ANCHORED) : argE->eflags;
+ return g_regex_match_full (ud->pr, argE->text, argE->textlen,
+ st, eflags, &ud->match_info, &ud->error);
+ }
+#else
+ static int gsub_exec (TGrgx *ud, TArgExec *argE, int st) {
+ minfo_free (ud);
+ gerror_free (ud);
+ return g_regex_match_full (ud->pr, argE->text, argE->textlen,
+ st, argE->eflags, &ud->match_info, &ud->error);
+ }
+#endif
+
+static int split_exec (TGrgx *ud, TArgExec *argE, int offset) {
+ minfo_free (ud);
+ gerror_free (ud);
+ return g_regex_match_full (ud->pr, argE->text, argE->textlen, offset,
+ argE->eflags, &ud->match_info, &ud->error);
+}
+
+static int Gregex_gc (lua_State *L) {
+ TGrgx *ud = check_ud (L);
+ if (ud->freed == 0) { /* precaution against "manual" __gc calling */
+ ud->freed = 1;
+ if (ud->pr) g_regex_unref (ud->pr);
+ minfo_free (ud);
+ gerror_free (ud);
+ }
+ return 0;
+}
+
+static int Gregex_tostring (lua_State *L) {
+ TGrgx *ud = check_ud (L);
+ if (ud->freed == 0)
+ lua_pushfstring (L, "%s (%p)", REX_TYPENAME, (void*)ud);
+ else
+ lua_pushfstring (L, "%s (deleted)", REX_TYPENAME);
+ return 1;
+}
+
+static int Gregex_version (lua_State *L) {
+ lua_pushfstring (L, "%d.%d.%d", GLIB_MAJOR_VERSION, GLIB_MINOR_VERSION, GLIB_MICRO_VERSION);
+ return 1;
+}
+
+
+static const luaL_Reg r_methods[] = {
+ { "exec", algm_exec },
+ { "tfind", algm_tfind }, /* old name: match */
+ { "find", algm_find },
+ { "match", algm_match },
+ { "dfa_exec", Gregex_dfa_exec },
+ { "__gc", Gregex_gc },
+ { "__tostring", Gregex_tostring },
+ { NULL, NULL }
+};
+
+static const luaL_Reg r_functions[] = {
+ { "match", algf_match },
+ { "find", algf_find },
+ { "gmatch", algf_gmatch },
+ { "gsub", algf_gsub },
+ { "split", algf_split },
+ { "new", algf_new },
+ { "flags", Gregex_get_flags },
+ { "compile_flags", Gregex_get_compile_flags },
+ { "match_flags", Gregex_get_match_flags },
+ { "version", Gregex_version },
+ { NULL, NULL }
+};
+
+/* Open the library */
+REX_API int REX_OPENLIB (lua_State *L) {
+
+ alg_register(L, r_methods, r_functions, "GLib Regex");
+
+ return 1;
+}
diff --git a/epan/wslua/lrexlib_glib_f.c b/epan/wslua/lrexlib_glib_f.c
new file mode 100644
index 0000000000..ed3299081f
--- /dev/null
+++ b/epan/wslua/lrexlib_glib_f.c
@@ -0,0 +1,138 @@
+/* lrexlib_gregex_f.c - GLib regular expression library */
+/* See Copyright Notice in the file lrexlib.h */
+
+#include <glib.h>
+#include "lua.h"
+#include "lauxlib.h"
+#include "lrexlib.h"
+
+#define VERSION_GLIB (GLIB_MAJOR_VERSION*100 + GLIB_MINOR_VERSION)
+
+static flag_pair gregex_compile_flags[] = {
+ { "MAJOR", GLIB_MAJOR_VERSION },
+ { "MINOR", GLIB_MINOR_VERSION },
+ { "MICRO", GLIB_MICRO_VERSION },
+/*----------------------- Compile flags -----------------------------------*/
+ { "CASELESS", G_REGEX_CASELESS },
+ { "MULTILINE", G_REGEX_MULTILINE },
+ { "DOTALL", G_REGEX_DOTALL },
+ { "EXTENDED", G_REGEX_EXTENDED },
+ { "ANCHORED", G_REGEX_ANCHORED },
+ { "DOLLAR_ENDONLY", G_REGEX_DOLLAR_ENDONLY },
+ { "UNGREEDY", G_REGEX_UNGREEDY },
+ { "NO_AUTO_CAPTURE", G_REGEX_NO_AUTO_CAPTURE },
+ { "OPTIMIZE", G_REGEX_OPTIMIZE },
+ { "DUPNAMES", G_REGEX_DUPNAMES },
+ { "NEWLINE_CR", G_REGEX_NEWLINE_CR },
+ { "NEWLINE_LF", G_REGEX_NEWLINE_LF },
+ { "NEWLINE_CRLF", G_REGEX_NEWLINE_CRLF },
+#if VERSION_GLIB >= 234
+ { "FIRSTLINE", G_REGEX_FIRSTLINE },
+ { "NEWLINE_ANYCRLF", G_REGEX_NEWLINE_ANYCRLF },
+ { "BSR_ANYCRLF", G_REGEX_BSR_ANYCRLF },
+ { "JAVASCRIPT_COMPAT", G_REGEX_JAVASCRIPT_COMPAT },
+#endif
+/*---------------------------------------------------------------------------*/
+ { NULL, 0 }
+};
+
+/*----------------------- Match flags -------------------------------------*/
+static flag_pair gregex_match_flags[] = {
+ { "ANCHORED", G_REGEX_MATCH_ANCHORED },
+ { "NOTBOL", G_REGEX_MATCH_NOTBOL },
+ { "NOTEOL", G_REGEX_MATCH_NOTEOL },
+ { "NOTEMPTY", G_REGEX_MATCH_NOTEMPTY },
+ { "PARTIAL", G_REGEX_MATCH_PARTIAL },
+ { "NEWLINE_CR", G_REGEX_MATCH_NEWLINE_CR },
+ { "NEWLINE_LF", G_REGEX_MATCH_NEWLINE_LF },
+ { "NEWLINE_CRLF", G_REGEX_MATCH_NEWLINE_CRLF },
+ { "NEWLINE_ANY", G_REGEX_MATCH_NEWLINE_ANY },
+#if VERSION_GLIB >= 234
+ { "NEWLINE_ANYCRLF", G_REGEX_MATCH_NEWLINE_ANYCRLF },
+ { "BSR_ANYCRLF", G_REGEX_MATCH_BSR_ANYCRLF },
+ { "BSR_ANY", G_REGEX_MATCH_BSR_ANY },
+ { "PARTIAL_SOFT", G_REGEX_MATCH_PARTIAL_SOFT },
+ { "PARTIAL_HARD", G_REGEX_MATCH_PARTIAL_HARD },
+ { "NOTEMPTY_ATSTART", G_REGEX_MATCH_NOTEMPTY_ATSTART },
+#endif
+/*---------------------------------------------------------------------------*/
+ { NULL, 0 }
+};
+
+flag_pair gregex_error_flags[] = {
+ { "COMPILE", G_REGEX_ERROR_COMPILE },
+ { "OPTIMIZE", G_REGEX_ERROR_OPTIMIZE },
+ { "REPLACE", G_REGEX_ERROR_REPLACE },
+ { "MATCH", G_REGEX_ERROR_MATCH },
+ { "INTERNAL", G_REGEX_ERROR_INTERNAL },
+ { "STRAY_BACKSLASH", G_REGEX_ERROR_STRAY_BACKSLASH },
+ { "MISSING_CONTROL_CHAR", G_REGEX_ERROR_MISSING_CONTROL_CHAR },
+ { "UNRECOGNIZED_ESCAPE", G_REGEX_ERROR_UNRECOGNIZED_ESCAPE },
+ { "QUANTIFIERS_OUT_OF_ORDER", G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER },
+ { "QUANTIFIER_TOO_BIG", G_REGEX_ERROR_QUANTIFIER_TOO_BIG },
+ { "UNTERMINATED_CHARACTER_CLASS", G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS },
+ { "INVALID_ESCAPE_IN_CHARACTER_CLASS", G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS },
+ { "RANGE_OUT_OF_ORDER", G_REGEX_ERROR_RANGE_OUT_OF_ORDER },
+ { "NOTHING_TO_REPEAT", G_REGEX_ERROR_NOTHING_TO_REPEAT },
+ { "UNRECOGNIZED_CHARACTER", G_REGEX_ERROR_UNRECOGNIZED_CHARACTER },
+ { "POSIX_NAMED_CLASS_OUTSIDE_CLASS", G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS },
+ { "UNMATCHED_PARENTHESIS", G_REGEX_ERROR_UNMATCHED_PARENTHESIS },
+ { "INEXISTENT_SUBPATTERN_REFERENCE", G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE },
+ { "UNTERMINATED_COMMENT", G_REGEX_ERROR_UNTERMINATED_COMMENT },
+ { "EXPRESSION_TOO_LARGE", G_REGEX_ERROR_EXPRESSION_TOO_LARGE },
+ { "MEMORY_ERROR", G_REGEX_ERROR_MEMORY_ERROR },
+ { "VARIABLE_LENGTH_LOOKBEHIND", G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND },
+ { "MALFORMED_CONDITION", G_REGEX_ERROR_MALFORMED_CONDITION },
+ { "TOO_MANY_CONDITIONAL_BRANCHES", G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES },
+ { "ASSERTION_EXPECTED", G_REGEX_ERROR_ASSERTION_EXPECTED },
+ { "UNKNOWN_POSIX_CLASS_NAME", G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME },
+ { "POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED", G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED },
+ { "HEX_CODE_TOO_LARGE", G_REGEX_ERROR_HEX_CODE_TOO_LARGE },
+ { "INVALID_CONDITION", G_REGEX_ERROR_INVALID_CONDITION },
+ { "SINGLE_BYTE_MATCH_IN_LOOKBEHIND", G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND },
+ { "INFINITE_LOOP", G_REGEX_ERROR_INFINITE_LOOP },
+ { "MISSING_SUBPATTERN_NAME_TERMINATOR", G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR },
+ { "DUPLICATE_SUBPATTERN_NAME", G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME },
+ { "MALFORMED_PROPERTY", G_REGEX_ERROR_MALFORMED_PROPERTY },
+ { "UNKNOWN_PROPERTY", G_REGEX_ERROR_UNKNOWN_PROPERTY },
+ { "SUBPATTERN_NAME_TOO_LONG", G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG },
+ { "TOO_MANY_SUBPATTERNS", G_REGEX_ERROR_TOO_MANY_SUBPATTERNS },
+ { "INVALID_OCTAL_VALUE", G_REGEX_ERROR_INVALID_OCTAL_VALUE },
+ { "TOO_MANY_BRANCHES_IN_DEFINE", G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE },
+ { "INCONSISTENT_NEWLINE_OPTIONS", G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS },
+ { "MISSING_BACK_REFERENCE", G_REGEX_ERROR_MISSING_BACK_REFERENCE },
+#if VERSION_GLIB >= 234
+ { "INVALID_RELATIVE_REFERENCE", G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE },
+ { "BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN",G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN },
+ { "UNKNOWN_BACKTRACKING_CONTROL_VERB", G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB },
+ { "NUMBER_TOO_BIG", G_REGEX_ERROR_NUMBER_TOO_BIG },
+ { "MISSING_SUBPATTERN_NAME", G_REGEX_ERROR_MISSING_SUBPATTERN_NAME },
+ { "MISSING_DIGIT", G_REGEX_ERROR_MISSING_DIGIT },
+ { "INVALID_DATA_CHARACTER", G_REGEX_ERROR_INVALID_DATA_CHARACTER },
+ { "EXTRA_SUBPATTERN_NAME", G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME },
+ { "BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED",G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED },
+ { "INVALID_CONTROL_CHAR", G_REGEX_ERROR_INVALID_CONTROL_CHAR },
+ { "MISSING_NAME", G_REGEX_ERROR_MISSING_NAME },
+ { "NOT_SUPPORTED_IN_CLASS", G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS },
+ { "TOO_MANY_FORWARD_REFERENCES", G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES },
+ { "NAME_TOO_LONG", G_REGEX_ERROR_NAME_TOO_LONG },
+ { "CHARACTER_VALUE_TOO_LARGE", G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE },
+#endif
+/*---------------------------------------------------------------------------*/
+ { NULL, 0 }
+};
+
+int Gregex_get_compile_flags (lua_State *L) {
+ const flag_pair* fps[] = { gregex_compile_flags, NULL };
+ return get_flags (L, fps);
+}
+
+int Gregex_get_match_flags (lua_State *L) {
+ const flag_pair* fps[] = { gregex_match_flags, NULL };
+ return get_flags (L, fps);
+}
+
+int Gregex_get_flags (lua_State *L) {
+ const flag_pair* fps[] = { gregex_compile_flags, gregex_match_flags, gregex_error_flags, NULL };
+ return get_flags (L, fps);
+}
diff --git a/epan/wslua/make-reg.pl b/epan/wslua/make-reg.pl
index 7f7408dc99..1eeb167e9a 100755
--- a/epan/wslua/make-reg.pl
+++ b/epan/wslua/make-reg.pl
@@ -76,6 +76,7 @@ for (@classes) {
print C "\twslua_reg_module(L, \"bit\", luaopen_bit);\n";
# the bitops library returns a value on the stack - get rid of it
print C "\tlua_pop(L,1);\n";
+print C "\twslua_reg_module(L, \"GRegex\", luaopen_rex_glib);\n";
print C "}\n\n";
diff --git a/epan/wslua/wslua.h b/epan/wslua/wslua.h
index e73e7be931..2ec7456766 100644
--- a/epan/wslua/wslua.h
+++ b/epan/wslua/wslua.h
@@ -608,5 +608,6 @@ extern int wslua_is_field_available(lua_State* L, const char* field_abbr);
extern int wslua_bin2hex(lua_State* L, const guint8* data, const guint len, const gboolean lowercase, const gchar* sep);
extern int wslua_hex2bin(lua_State* L, const char* data, const guint len, const gchar* sep);
+extern int luaopen_rex_glib(lua_State *L);
#endif
diff --git a/test/lua/common_sets.lua b/test/lua/common_sets.lua
new file mode 100755
index 0000000000..e71251fea8
--- /dev/null
+++ b/test/lua/common_sets.lua
@@ -0,0 +1,319 @@
+-- See Copyright Notice in the file LICENSE
+
+-- This file should contain only test sets that behave identically
+-- when being run with pcre or posix regex libraries.
+
+local luatest = require "luatest"
+local N = luatest.NT
+
+local function norm(a) return a==nil and N or a end
+
+local function get_gsub (lib)
+ return lib.gsub or
+ function (subj, pattern, repl, n)
+ return lib.new (pattern) : gsub (subj, repl, n)
+ end
+end
+
+local function set_f_gmatch (lib, flg)
+ -- gmatch (s, p, [cf], [ef])
+ local function test_gmatch (subj, patt)
+ local out, guard = {}, 10
+ for a, b in lib.gmatch (subj, patt) do
+ table.insert (out, { norm(a), norm(b) })
+ guard = guard - 1
+ if guard == 0 then break end
+ end
+ return unpack (out)
+ end
+ return {
+ Name = "Function gmatch",
+ Func = test_gmatch,
+ --{ subj patt results }
+ { {"ab", lib.new"."}, {{"a",N}, {"b",N} } },
+ { {("abcd"):rep(3), "(.)b.(d)"}, {{"a","d"},{"a","d"},{"a","d"}} },
+ { {"abcd", ".*" }, {{"abcd",N},{"",N} } },--zero-length match
+ { {"abc", "^." }, {{"a",N}} },--anchored pattern
+ }
+end
+
+local function set_f_split (lib, flg)
+ -- split (s, p, [cf], [ef])
+ local function test_split (subj, patt)
+ local out, guard = {}, 10
+ for a, b, c in lib.split (subj, patt) do
+ table.insert (out, { norm(a), norm(b), norm(c) })
+ guard = guard - 1
+ if guard == 0 then break end
+ end
+ return unpack (out)
+ end
+ return {
+ Name = "Function split",
+ Func = test_split,
+ --{ subj patt results }
+ { {"ab", lib.new","}, {{"ab",N,N}, } },
+ { {"ab", ","}, {{"ab",N,N}, } },
+ { {",", ","}, {{"",",",N}, {"", N, N}, } },
+ { {",,", ","}, {{"",",",N}, {"",",",N}, {"",N,N} } },
+ { {"a,b", ","}, {{"a",",",N}, {"b",N,N}, } },
+ { {",a,b", ","}, {{"",",",N}, {"a",",",N}, {"b",N,N}} },
+ { {"a,b,", ","}, {{"a",",",N}, {"b",",",N}, {"",N,N} } },
+ { {"a,,b", ","}, {{"a",",",N}, {"",",",N}, {"b",N,N}} },
+ { {"ab<78>c", "<(.)(.)>"}, {{"ab","7","8"}, {"c",N,N}, } },
+ { {"abc", "^."}, {{"", "a",N}, {"bc",N,N}, } },--anchored pattern
+ { {"abc", "^"}, {{"", "", N}, {"abc",N,N}, } },
+-- { {"abc", "$"}, {{"abc","",N}, {"",N,N}, } },
+-- { {"abc", "^|$"}, {{"", "", N}, {"abc","",N},{"",N,N},} },
+ }
+end
+
+local function set_f_find (lib, flg)
+ return {
+ Name = "Function find",
+ Func = lib.find,
+ -- {subj, patt, st}, { results }
+ { {"abcd", lib.new".+"}, { 1,4 } }, -- [none]
+ { {"abcd", ".+"}, { 1,4 } }, -- [none]
+ { {"abcd", ".+", 2}, { 2,4 } }, -- positive st
+ { {"abcd", ".+", -2}, { 3,4 } }, -- negative st
+ { {"abcd", ".*"}, { 1,4 } }, -- [none]
+ { {"abc", "bc"}, { 2,3 } }, -- [none]
+ { {"abcd", "(.)b.(d)"}, { 1,4,"a","d" }}, -- [captures]
+ }
+end
+
+local function set_f_match (lib, flg)
+ return {
+ Name = "Function match",
+ Func = lib.match,
+ -- {subj, patt, st}, { results }
+ { {"abcd", lib.new".+"}, {"abcd"} }, -- [none]
+ { {"abcd", ".+"}, {"abcd"} }, -- [none]
+ { {"abcd", ".+", 2}, {"bcd"} }, -- positive st
+ { {"abcd", ".+", -2}, {"cd"} }, -- negative st
+ { {"abcd", ".*"}, {"abcd"} }, -- [none]
+ { {"abc", "bc"}, {"bc"} }, -- [none]
+ { {"abcd", "(.)b.(d)"}, {"a","d"} }, -- [captures]
+ }
+end
+
+local function set_m_exec (lib, flg)
+ return {
+ Name = "Method exec",
+ Method = "exec",
+ --{patt}, {subj, st} { results }
+ { {".+"}, {"abcd"}, {1,4,{}} }, -- [none]
+ { {".+"}, {"abcd",2}, {2,4,{}} }, -- positive st
+ { {".+"}, {"abcd",-2}, {3,4,{}} }, -- negative st
+ { {".*"}, {"abcd"}, {1,4,{}} }, -- [none]
+ { {"bc"}, {"abc"}, {2,3,{}} }, -- [none]
+ { { "(.)b.(d)"}, {"abcd"}, {1,4,{1,1,4,4}}},--[captures]
+ { {"(a+)6+(b+)"}, {"Taa66bbT",2}, {2,7,{2,3,6,7}}},--[st+captures]
+ }
+end
+
+local function set_m_tfind (lib, flg)
+ return {
+ Name = "Method tfind",
+ Method = "tfind",
+ --{patt}, {subj, st} { results }
+ { {".+"}, {"abcd"}, {1,4,{}} }, -- [none]
+ { {".+"}, {"abcd",2}, {2,4,{}} }, -- positive st
+ { {".+"}, {"abcd",-2}, {3,4,{}} }, -- negative st
+ { {".*"}, {"abcd"}, {1,4,{}} }, -- [none]
+ { {"bc"}, {"abc"}, {2,3,{}} }, -- [none]
+ { {"(.)b.(d)"}, {"abcd"}, {1,4,{"a","d"}}},--[captures]
+ }
+end
+
+local function set_m_find (lib, flg)
+ return {
+ Name = "Method find",
+ Method = "find",
+ --{patt}, {subj, st} { results }
+ { {".+"}, {"abcd"}, {1,4} }, -- [none]
+ { {".+"}, {"abcd",2}, {2,4} }, -- positive st
+ { {".+"}, {"abcd",-2}, {3,4} }, -- negative st
+ { {".*"}, {"abcd"}, {1,4} }, -- [none]
+ { {"bc"}, {"abc"}, {2,3} }, -- [none]
+ { {"(.)b.(d)"}, {"abcd"}, {1,4,"a","d"}},--[captures]
+ }
+end
+
+local function set_m_match (lib, flg)
+ return {
+ Name = "Method match",
+ Method = "match",
+ --{patt}, {subj, st} { results }
+ { {".+"}, {"abcd"}, {"abcd"} }, -- [none]
+ { {".+"}, {"abcd",2}, {"bcd" } }, -- positive st
+ { {".+"}, {"abcd",-2}, {"cd" } }, -- negative st
+ { {".*"}, {"abcd"}, {"abcd"} }, -- [none]
+ { {"bc"}, {"abc"}, {"bc" } }, -- [none]
+ {{ "(.)b.(d)"}, {"abcd"}, {"a","d"} }, --[captures]
+ }
+end
+
+local function set_f_gsub1 (lib, flg)
+ local subj, pat = "abcdef", "[abef]+"
+ local cpat = lib.new(pat)
+ return {
+ Name = "Function gsub, set1",
+ Func = get_gsub (lib),
+ --{ s, p, f, n, res1, res2, res3 },
+ { {subj, cpat, "", 0}, {subj, 0, 0} }, -- test "n" + empty_replace
+ { {subj, pat, "", 0}, {subj, 0, 0} }, -- test "n" + empty_replace
+ { {subj, pat, "", -1}, {subj, 0, 0} }, -- test "n" + empty_replace
+ { {subj, pat, "", 1}, {"cdef", 1, 1} },
+ { {subj, pat, "", 2}, {"cd", 2, 2} },
+ { {subj, pat, "", 3}, {"cd", 2, 2} },
+ { {subj, pat, "" }, {"cd", 2, 2} },
+ { {subj, pat, "#", 0}, {subj, 0, 0} }, -- test "n" + non-empty_replace
+ { {subj, pat, "#", 1}, {"#cdef", 1, 1} },
+ { {subj, pat, "#", 2}, {"#cd#", 2, 2} },
+ { {subj, pat, "#", 3}, {"#cd#", 2, 2} },
+ { {subj, pat, "#" }, {"#cd#", 2, 2} },
+ { {"abc", "^.", "#" }, {"#bc", 1, 1} }, -- anchored pattern
+ }
+end
+
+local function set_f_gsub2 (lib, flg)
+ local subj, pat = "abc", "([ac])"
+ return {
+ Name = "Function gsub, set2",
+ Func = get_gsub (lib),
+ --{ s, p, f, n, res1, res2, res3 },
+ { {subj, pat, "<%1>" }, {"<a>b<c>", 2, 2} }, -- test non-escaped chars in f
+ { {subj, pat, "%<%1%>" }, {"<a>b<c>", 2, 2} }, -- test escaped chars in f
+ { {subj, pat, "" }, {"b", 2, 2} }, -- test empty replace
+ { {subj, pat, "1" }, {"1b1", 2, 2} }, -- test odd and even %'s in f
+ { {subj, pat, "%1" }, {"abc", 2, 2} },
+ { {subj, pat, "%%1" }, {"%1b%1", 2, 2} },
+ { {subj, pat, "%%%1" }, {"%ab%c", 2, 2} },
+ { {subj, pat, "%%%%1" }, {"%%1b%%1", 2, 2} },
+ { {subj, pat, "%%%%%1" }, {"%%ab%%c", 2, 2} },
+ }
+end
+
+local function set_f_gsub3 (lib, flg)
+ return {
+ Name = "Function gsub, set3",
+ Func = get_gsub (lib),
+ --{ s, p, f, n, res1,res2,res3 },
+ { {"abc", "a", "%0" }, {"abc", 1, 1} }, -- test (in)valid capture index
+ { {"abc", "a", "%1" }, {"abc", 1, 1} },
+ { {"abc", "[ac]", "%1" }, {"abc", 2, 2} },
+ { {"abc", "(a)", "%1" }, {"abc", 1, 1} },
+ { {"abc", "(a)", "%2" }, "invalid capture index" },
+ }
+end
+
+local function set_f_gsub4 (lib, flg)
+ return {
+ Name = "Function gsub, set4",
+ Func = get_gsub (lib),
+ --{ s, p, f, n, res1, res2, res3 },
+ { {"a2c3", ".", "#" }, {"####", 4, 4} }, -- test .
+ { {"a2c3", ".+", "#" }, {"#", 1, 1} }, -- test .+
+ { {"a2c3", ".*", "#" }, {"##", 2, 2} }, -- test .*
+ { {"/* */ */", "\\/\\*(.*)\\*\\/", "#" }, {"#", 1, 1} },
+ { {"a2c3", "[0-9]", "#" }, {"a#c#", 2, 2} }, -- test %d
+ { {"a2c3", "[^0-9]", "#" }, {"#2#3", 2, 2} }, -- test %D
+ { {"a \t\nb", "[ \t\n]", "#" }, {"a###b", 3, 3} }, -- test %s
+ { {"a \t\nb", "[^ \t\n]", "#" }, {"# \t\n#", 2, 2} }, -- test %S
+ }
+end
+
+local function set_f_gsub5 (lib, flg)
+ local function frep1 () end -- returns nothing
+ local function frep2 () return "#" end -- ignores arguments
+ local function frep3 (...) return table.concat({...}, ",") end -- "normal"
+ local function frep4 () return {} end -- invalid return type
+ local function frep5 () return "7", "a" end -- 2-nd return is "a"
+ local function frep6 () return "7", "break" end -- 2-nd return is "break"
+ local subj = "a2c3"
+ return {
+ Name = "Function gsub, set5",
+ Func = get_gsub (lib),
+ --{ s, p, f, n, res1, res2, res3 },
+ { {subj, "a(.)c(.)", frep1 }, {subj, 1, 0} },
+ { {subj, "a(.)c(.)", frep2 }, {"#", 1, 1} },
+ { {subj, "a(.)c(.)", frep3 }, {"2,3", 1, 1} },
+ { {subj, "a.c.", frep3 }, {subj, 1, 1} },
+ { {subj, "z*", frep1 }, {subj, 5, 0} },
+ { {subj, "z*", frep2 }, {"#a#2#c#3#", 5, 5} },
+ { {subj, "z*", frep3 }, {subj, 5, 5} },
+ { {subj, subj, frep4 }, "invalid return type" },
+ { {"abc",".", frep5 }, {"777", 3, 3} },
+ { {"abc",".", frep6 }, {"777", 3, 3} },
+ }
+end
+
+local function set_f_gsub6 (lib, flg)
+ local tab1, tab2, tab3 = {}, { ["2"] = 56 }, { ["2"] = {} }
+ local subj = "a2c3"
+ return {
+ Name = "Function gsub, set6",
+ Func = get_gsub (lib),
+ --{ s, p, f, n, res1,res2,res3 },
+ { {subj, "a(.)c(.)", tab1 }, {subj, 1, 0} },
+ { {subj, "a(.)c(.)", tab2 }, {"56", 1, 1} },
+ { {subj, "a(.)c(.)", tab3 }, "invalid replacement type" },
+ { {subj, "a.c.", tab1 }, {subj, 1, 0} },
+ { {subj, "a.c.", tab2 }, {subj, 1, 0} },
+ { {subj, "a.c.", tab3 }, {subj, 1, 0} },
+ }
+end
+
+local function set_f_gsub8 (lib, flg)
+ local subj, patt, repl = "abcdef", "..", "*"
+ return {
+ Name = "Function gsub, set8",
+ Func = get_gsub (lib),
+ --{ s, p, f, n, res1, res2, res3 },
+ { {subj, patt, repl, function() end }, {"abcdef", 3, 0} },
+ { {subj, patt, repl, function() return nil end }, {"abcdef", 3, 0} },
+ { {subj, patt, repl, function() return false end }, {"abcdef", 3, 0} },
+ { {subj, patt, repl, function() return true end }, {"***", 3, 3} },
+ { {subj, patt, repl, function() return {} end }, {"***", 3, 3} },
+ { {subj, patt, repl, function() return "#" end }, {"###", 3, 3} },
+ { {subj, patt, repl, function() return 57 end }, {"575757", 3, 3} },
+ { {subj, patt, repl, function (from) return from end }, {"135", 3, 3} },
+ { {subj, patt, repl, function (from, to) return to end }, {"246", 3, 3} },
+ { {subj, patt, repl, function (from,to,rep) return rep end },
+ {"***", 3, 3} },
+ { {subj, patt, repl, function (from, to, rep) return rep..to..from end },
+ {"*21*43*65", 3, 3} },
+ { {subj, patt, repl, function() return nil end }, {"abcdef", 3, 0} },
+ { {subj, patt, repl, function() return nil, nil end }, {"abcdef", 3, 0} },
+ { {subj, patt, repl, function() return nil, false end }, {"abcdef", 3, 0} },
+ { {subj, patt, repl, function() return nil, true end }, {"ab**", 3, 2} },
+ { {subj, patt, repl, function() return true, true end }, {"***", 3, 3} },
+ { {subj, patt, repl, function() return nil, 0 end }, {"abcdef", 1, 0} },
+ { {subj, patt, repl, function() return true, 0 end }, {"*cdef", 1, 1} },
+ { {subj, patt, repl, function() return nil, 1 end }, {"ab*ef", 2, 1} },
+ { {subj, patt, repl, function() return true, 1 end }, {"**ef", 2, 2} },
+ }
+end
+
+return function (libname, isglobal)
+ local lib = isglobal and _G[libname] or require (libname)
+ return {
+ set_f_gmatch (lib),
+ set_f_split (lib),
+ set_f_find (lib),
+ set_f_match (lib),
+ set_m_exec (lib),
+ set_m_tfind (lib),
+ set_m_find (lib),
+ set_m_match (lib),
+ set_f_gsub1 (lib),
+ set_f_gsub2 (lib),
+ set_f_gsub3 (lib),
+ set_f_gsub4 (lib),
+ set_f_gsub5 (lib),
+ set_f_gsub6 (lib),
+ set_f_gsub8 (lib),
+ }
+end
diff --git a/test/lua/glib_sets.lua b/test/lua/glib_sets.lua
new file mode 100644
index 0000000000..0c3f38a483
--- /dev/null
+++ b/test/lua/glib_sets.lua
@@ -0,0 +1,204 @@
+-- See Copyright Notice in the file LICENSE
+
+local pat2pcre = require "pat2pcre"
+local luatest = require "luatest"
+local N = luatest.NT
+
+local function norm(a) return a==nil and N or a end
+
+local function fill (n, m)
+ local t = {}
+ for i = n, m, -1 do table.insert (t, i) end
+ return t
+end
+
+
+-- glib doesn't do partial matching return of matches, nor
+-- does it support ovecsize being set through the API
+local function set_m_dfa_exec (lib, flg)
+ return {
+ Name = "Method dfa_exec for glib",
+ Method = "dfa_exec",
+--{patt,cf,lo}, {subj,st,ef,os,ws} { results }
+ { {".+"}, {"abcd"}, {1,{4,3,2,1},4} }, -- [none]
+ { {".+"}, {"abcd",2}, {2,{4,3,2}, 3} }, -- positive st
+ { {".+"}, {"abcd",-2}, {3,{4,3}, 2} }, -- negative st
+ { {".+"}, {"abcd",5}, {N } }, -- failing st
+ { {".*"}, {"abcd"}, {1,{4,3,2,1,0},5}}, -- [none]
+ { {".*?"}, {"abcd"}, {1,{4,3,2,1,0},5}}, -- non-greedy
+ { {"aBC",flg.CASELESS}, {"abc"}, {1,{3},1} }, -- cf
+ { {"aBC","i" }, {"abc"}, {1,{3},1} }, -- cf
+ { {"bc"}, {"abc"}, {2,{3},1} }, -- [none]
+ { {"bc",flg.ANCHORED}, {"abc"}, {N } }, -- cf
+ { {"bc"}, {"abc",N, flg.ANCHORED}, {N } }, -- ef
+ { { "(.)b.(d)"}, {"abcd"}, {1,{4},1} }, --[captures]
+ { {"abc"}, {"ab"}, {N } },
+ { {"abc"}, {"abc",N,flg.PARTIAL}, {1,{3},1} },
+ { {"abc*"}, {"abcc",N,flg.PARTIAL}, {1,{4,3,2},3} },
+ { {"abc"}, {"ab",N,flg.PARTIAL}, {true} },
+ { {"bc"}, {"ab",N,flg.PARTIAL}, {true} },
+}
+end
+
+local function get_gsub (lib)
+ return lib.gsub or
+ function (subj, pattern, repl, n)
+ return lib.new (pattern) : gsub (subj, repl, n)
+ end
+end
+
+-- sadly, glib *always* sets the PCRE_UCP compilation flag, regardless
+-- of REGEX_RAW being set - this is, frankly, a bug in my opinion
+-- but anyway, it means things like '[:alpha:]' and '\w' match things that Lua's
+-- '%a' does not match
+local function set_f_gsub7 (lib, flg)
+ local subj = ""
+ for i = 0, 255 do
+ subj = subj .. string.char (i)
+ end
+
+ -- This set requires calling prepare_set before calling gsub_test
+ local set = {
+ Name = "Function gsub, set7 for glib",
+ Func = get_gsub (lib),
+ --{ s, p, f, n, },
+ { {subj, "[a-zA-Z]", "" }, },
+ { {subj, "[^a-zA-Z]", "" }, },
+ { {subj, "%c", "" }, },
+ { {subj, "%C", "" }, },
+ { {subj, "[a-z]", "" }, },
+ { {subj, "[^a-z]", "" }, },
+ { {subj, "%d", "" }, },
+ { {subj, "%D", "" }, },
+ { {subj, "%p", "" }, },
+ { {subj, "%P", "" }, },
+-- { {subj, "%s", "" }, },
+-- { {subj, "%S", "" }, },
+ { {subj, "[A-Z]", "" }, },
+ { {subj, "[^A-Z]", "" }, }, -- 10
+ { {subj, "[a-zA-Z0-9]", "" }, },
+ { {subj, "[^a-zA-Z0-9]", "" }, },
+ { {subj, "%x", "" }, },
+ { {subj, "%X", "" }, },
+ { {subj, "%z", "" }, },
+ { {subj, "%Z", "" }, },
+
+-- { {subj, "[%a]", "" }, },
+-- { {subj, "[%A]", "" }, },
+ { {subj, "[%c]", "" }, },
+ { {subj, "[%C]", "" }, },
+ { {subj, "[%d]", "" }, },
+ { {subj, "[%D]", "" }, },
+-- { {subj, "[%l]", "" }, },
+-- { {subj, "[%L]", "" }, },
+ { {subj, "[%p]", "" }, },
+ { {subj, "[%P]", "" }, },
+-- { {subj, "[%u]", "" }, },
+-- { {subj, "[%U]", "" }, },
+-- { {subj, "[%w]", "" }, },
+-- { {subj, "[%W]", "" }, },
+ { {subj, "[%x]", "" }, },
+ { {subj, "[%X]", "" }, },
+ { {subj, "[%z]", "" }, },
+ { {subj, "[%Z]", "" }, },
+
+-- { {subj, "[%a_]", "" }, },
+-- { {subj, "[%A_]", "" }, },
+ { {subj, "[%c_]", "" }, },
+ { {subj, "[%C_]", "" }, },
+-- { {subj, "[%l_]", "" }, },
+-- { {subj, "[%L_]", "" }, },
+ { {subj, "[%p_]", "" }, },
+ { {subj, "[%P_]", "" }, },
+-- { {subj, "[%u_]", "" }, },
+-- { {subj, "[%U_]", "" }, },
+-- { {subj, "[%w_]", "" }, },
+-- { {subj, "[%W_]", "" }, },
+ { {subj, "[%x_]", "" }, },
+ { {subj, "[%X_]", "" }, },
+ { {subj, "[%z_]", "" }, },
+ { {subj, "[%Z_]", "" }, },
+
+-- { {subj, "[%a%d]", "" }, },
+-- { {subj, "[%A%d]", "" }, },
+ { {subj, "[%c%d]", "" }, },
+ { {subj, "[%C%d]", "" }, },
+-- { {subj, "[%l%d]", "" }, },
+-- { {subj, "[%L%d]", "" }, },
+ { {subj, "[%p%d]", "" }, },
+ { {subj, "[%P%d]", "" }, },
+-- { {subj, "[%u%d]", "" }, },
+-- { {subj, "[%U%d]", "" }, },
+-- { {subj, "[%w%d]", "" }, },
+-- { {subj, "[%W%d]", "" }, },
+ { {subj, "[%x%d]", "" }, },
+ { {subj, "[%X%d]", "" }, },
+ { {subj, "[%z%d]", "" }, },
+ { {subj, "[%Z%d]", "" }, },
+
+-- { {subj, "[^%a%d]", "" }, },
+-- { {subj, "[^%A%d]", "" }, },
+ { {subj, "[^%c%d]", "" }, },
+ { {subj, "[^%C%d]", "" }, },
+-- { {subj, "[^%l%d]", "" }, },
+-- { {subj, "[^%L%d]", "" }, },
+ { {subj, "[^%p%d]", "" }, },
+ { {subj, "[^%P%d]", "" }, },
+-- { {subj, "[^%u%d]", "" }, },
+-- { {subj, "[^%U%d]", "" }, },
+-- { {subj, "[^%w%d]", "" }, },
+-- { {subj, "[^%W%d]", "" }, },
+ { {subj, "[^%x%d]", "" }, },
+ { {subj, "[^%X%d]", "" }, },
+ { {subj, "[^%z%d]", "" }, },
+ { {subj, "[^%Z%d]", "" }, },
+
+-- { {subj, "[^%a_]", "" }, },
+-- { {subj, "[^%A_]", "" }, },
+ { {subj, "[^%c_]", "" }, },
+ { {subj, "[^%C_]", "" }, },
+-- { {subj, "[^%l_]", "" }, },
+-- { {subj, "[^%L_]", "" }, },
+ { {subj, "[^%p_]", "" }, },
+ { {subj, "[^%P_]", "" }, },
+-- { {subj, "[^%u_]", "" }, },
+-- { {subj, "[^%U_]", "" }, },
+-- { {subj, "[^%w_]", "" }, },
+-- { {subj, "[^%W_]", "" }, },
+ { {subj, "[^%x_]", "" }, },
+ { {subj, "[^%X_]", "" }, },
+ { {subj, "[^%z_]", "" }, },
+ { {subj, "[^%Z_]", "" }, },
+
+ { {subj, "\100", "" }, },
+ { {subj, "[\100]", "" }, },
+ { {subj, "[^\100]", "" }, },
+ { {subj, "[\100-\200]", "" }, },
+ { {subj, "[^\100-\200]", "" }, },
+ { {subj, "\100a", "" }, },
+ { {subj, "[\100a]", "" }, },
+ { {subj, "[^\100a]", "" }, },
+ { {subj, "[\100-\200a]", "" }, },
+ { {subj, "[^\100-\200a]", "" }, },
+ }
+ -- fill in reference results
+ for _,v in ipairs(set) do
+ local r0, r1, r2 = pcall (string.gsub, unpack (v[1]))
+ v[2] = r0 and { r1, r2, r2 } or { r0, r1 }
+ end
+ -- convert patterns: lua -> pcre
+ for _, test in ipairs (set) do
+ test[1][2] = pat2pcre (test[1][2])
+ end
+ return set
+end
+
+return function (libname, isglobal)
+ local lib = isglobal and _G[libname] or require (libname)
+ local flags = lib.flags and lib.flags ()
+ local sets = {
+ set_m_dfa_exec (lib, flags),
+ set_f_gsub7 (lib, flags)
+ }
+ return sets
+end
diff --git a/test/lua/gregex.lua b/test/lua/gregex.lua
new file mode 100644
index 0000000000..2ad04ba6dc
--- /dev/null
+++ b/test/lua/gregex.lua
@@ -0,0 +1,285 @@
+
+-- Tests for GLib Regex functions
+-- written by Hadriel Kaplan, based on Lrexlib's test suite
+-- This is a test script for tshark/wireshark.
+-- This script runs inside tshark/wireshark, so to run it do:
+-- tshark -r empty.cap -X lua_script:<path_to_testdir>/lua/gregex.lua -X lua_script1:glib
+--
+-- if you have to give addtional paths to find the dependent lua files,
+-- use the '-X lua_script1:' syntax to add more arguments
+--
+-- available arguments:
+-- -d<dir> provides path directory for lua include files
+-- -v verbose mode
+-- -V very verbose mode
+
+
+-- save args before we do anything else
+local args = {...}
+for i,v in ipairs(args) do
+ print(i.." = "..v)
+end
+
+local function testing(...)
+ print("---- Testing "..tostring(...).." ----")
+end
+
+local count = 0
+
+local function test(name, ...)
+ count = count + 1
+ io.write("test "..name.."-"..count.."...")
+ if (...) == true then
+ io.write("passed\n")
+ io.flush()
+ else
+ io.write("failed!\n")
+ io.flush()
+ error(name.." test failed!")
+ end
+end
+
+------------- First test some basic stuff to make sure we're sane -----------
+
+print("Lua version: ".._VERSION)
+
+testing("Lrexlib GLib Regex library")
+
+local lib = GRegex
+test("global",_G.GRegex == lib)
+
+for name, val in pairs(lib) do
+ print("\t"..name.." = "..type(val))
+end
+
+test("class",type(lib) == 'table')
+test("class",type(lib._VERSION) == 'string')
+test("class",type(lib.find) == 'function')
+test("class",type(lib.compile_flags) == 'function')
+test("class",type(lib.match_flags) == 'function')
+test("class",type(lib.flags) == 'function')
+test("class",type(lib.gsub) == 'function')
+test("class",type(lib.gmatch) == 'function')
+test("class",type(lib.new) == 'function')
+test("class",type(lib.match) == 'function')
+test("class",type(lib.split) == 'function')
+test("class",type(lib.version) == 'function')
+
+testing("info and flags")
+
+test("typeof",typeof(lib) == 'GRegex')
+
+print(lib._VERSION)
+print("Glib version = "..lib.version())
+
+local function getTSize(t)
+ local c = 0
+ for k,v in pairs(t) do
+ -- print(k.." = "..v)
+ c = c + 1
+ end
+ return c
+end
+
+local flags = lib.flags()
+
+-- print("size = "..c)
+-- it's 84 for newer GLib, 61 for older
+test("flags", getTSize(flags) > 60)
+test("cflags", getTSize(lib.compile_flags()) > 15)
+test("eflags", getTSize(lib.match_flags()) > 8)
+
+testing("new")
+
+local results
+local function checkFunc(objname,funcname,...)
+ results = { pcall(objname[funcname],...) }
+ if results[1] then
+ return true
+ end
+ -- print("Got this error: '"..tostring(results[2]).."'")
+ return false
+end
+
+test("new", checkFunc(lib,"new",".*"))
+test("new", checkFunc(lib,"new",""))
+test("new", checkFunc(lib,"new","(hello|world)"))
+
+test("new_err", not checkFunc(lib,"new","*"))
+test("new_err", not checkFunc(lib,"new"))
+test("new_err", not checkFunc(lib,"new","(hello|world"))
+test("new_err", not checkFunc(lib,"new","[0-9"))
+-- invalid compile flag
+test("new_err", not checkFunc(lib,"new","[0-9]",flags.PARTIAL))
+
+
+local val1 = "hello world foo bar"
+local val2 = "hello wORld FOO bar"
+local patt = "hello (world) (.*) bar"
+local rgx = lib.new(patt)
+local rgx2 = lib.new(patt,flags.CASELESS)
+
+testing("typeof")
+test("typeof",typeof(rgx) == 'GRegex')
+test("typeof",typeof(rgx2) == 'GRegex')
+
+testing("match")
+test("match", checkFunc(lib,"match", val1,patt, 1, flags.CASELESS) and results[2] == "world" and results[3] == "foo")
+test("match", checkFunc(lib,"match", val2,patt, 1, flags.CASELESS) and results[2] == "wORld" and results[3] == "FOO")
+test("match", checkFunc(lib,"match", val1,rgx) and results[2] == "world" and results[3] == "foo")
+test("match", checkFunc(rgx,"match", rgx,val1) and results[2] == "world" and results[3] == "foo")
+test("match", checkFunc(rgx2,"match", rgx2,val2, 1) and results[2] == "wORld" and results[3] == "FOO")
+
+-- different offset won't match this pattern
+test("match_err", checkFunc(rgx2,"match", rgx2,val2, 4) and results[2] == nil)
+
+-- invalid compile flag
+test("match_err", not checkFunc(lib,"match", val1,patt, 1, flags.PARTIAL))
+-- invalid match flag
+test("match_err", not checkFunc(rgx,"match", rgx,val1, 1, flags.CASELESS))
+
+testing("find")
+
+test("find", checkFunc(lib,"find", val1,patt) and results[2] == 1 and results[3] == val1:len()
+ and results[4] == "world" and results[5] == "foo")
+test("find", checkFunc(lib,"find", val1,rgx) and results[2] == 1 and results[3] == val1:len()
+ and results[4] == "world" and results[5] == "foo")
+test("find", checkFunc(rgx,"find", rgx,val1) and results[2] == 1 and results[3] == val1:len()
+ and results[4] == "world" and results[5] == "foo")
+
+testing("match")
+
+--checkFunc(rgx,"exec", rgx,val1)
+--print(results[4][3],results[4][4])
+test("exec", checkFunc(rgx,"exec", rgx,val1) and results[2] == 1 and results[3] == val1:len()
+ and results[4][1] == 7 and results[4][2] == 11 and results[4][3] == 13 and results[4][4] == 15)
+
+print("\n----------------------------------------------------------\n")
+
+------- OK, we're sane, so run all the library's real tests ---------
+
+testing("Lrexlib-provided tests")
+
+-- we're not using the "real" lib name
+local GLIBNAME = "GRegex"
+local isglobal = true
+
+do
+ local dir
+ for i = 1, select ("#", ...) do
+ local arg = select (i, ...)
+ --print(arg)
+ if arg:sub(1,2) == "-d" then
+ dir = arg:sub(3)
+ end
+ end
+ dir = dir:gsub("[/\\]+$", "")
+ local path = dir .. "/?.lua;"
+ if package.path:sub(1, #path) ~= path then
+ package.path = path .. package.path
+ end
+end
+
+local luatest = require "luatest"
+
+-- returns: number of failures
+local function test_library (libname, setfile, verbose, really_verbose)
+ if verbose then
+ print (("[lib: %s; file: %s]"):format (libname, setfile))
+ end
+ local lib = isglobal and _G[libname] or require (libname)
+ local f = require (setfile)
+ local sets = f (libname, isglobal)
+
+ local n = 0 -- number of failures
+ for _, set in ipairs (sets) do
+ if verbose then
+ print (set.Name or "Unnamed set")
+ end
+ local err = luatest.test_set (set, lib, really_verbose)
+ if verbose then
+ for _,v in ipairs (err) do
+ print ("\nTest " .. v.i)
+ print (" Expected result:\n "..tostring(v))
+ luatest.print_results (v[1], " ")
+ table.remove(v,1)
+ print ("\n Got:")
+ luatest.print_results (v, " ")
+ end
+ end
+ n = n + #err
+ end
+ if verbose then
+ print ""
+ end
+
+ return n
+end
+
+local avail_tests = {
+ posix = { lib = "rex_posix", "common_sets", "posix_sets" },
+ gnu = { lib = "rex_gnu", "common_sets", "emacs_sets", "gnu_sets" },
+ oniguruma = { lib = "rex_onig", "common_sets", "oniguruma_sets", },
+ pcre = { lib = "rex_pcre", "common_sets", "pcre_sets", "pcre_sets2", },
+ glib = { lib = GLIBNAME, "common_sets", "pcre_sets", "pcre_sets2", "glib_sets" },
+ spencer = { lib = "rex_spencer", "common_sets", "posix_sets", "spencer_sets" },
+ tre = { lib = "rex_tre", "common_sets", "posix_sets", "spencer_sets", --[["tre_sets"]] },
+}
+
+do
+ local verbose, really_verbose, tests, nerr = false, false, {}, 0
+ local dir
+
+ -- check arguments
+ for i = 1, select ("#", ...) do
+ local arg = select (i, ...)
+ --print(arg)
+ if arg:sub(1,1) == "-" then
+ if arg == "-v" then
+ verbose = true
+ elseif arg == "-V" then
+ verbose = true
+ really_verbose = true
+ elseif arg:sub(1,2) == "-d" then
+ dir = arg:sub(3)
+ end
+ else
+ if avail_tests[arg] then
+ tests[#tests+1] = avail_tests[arg]
+ else
+ error ("invalid argument: [" .. arg .. "]")
+ end
+ end
+ end
+ assert (#tests > 0, "no library specified")
+ -- give priority to libraries located in the specified directory
+ if dir and not isglobal then
+ dir = dir:gsub("[/\\]+$", "")
+ for _, ext in ipairs {"dll", "so", "dylib"} do
+ if package.cpath:match ("%?%." .. ext) then
+ local cpath = dir .. "/?." .. ext .. ";"
+ if package.cpath:sub(1, #cpath) ~= cpath then
+ package.cpath = cpath .. package.cpath
+ end
+ break
+ end
+ end
+ end
+ -- do tests
+ for _, test in ipairs (tests) do
+ package.loaded[test.lib] = nil -- to force-reload the tested library
+ for _, setfile in ipairs (test) do
+ nerr = nerr + test_library (test.lib, setfile, verbose, really_verbose)
+ end
+ end
+ print ("Total number of failures: " .. nerr)
+
+ assert(nerr == 0, "Test failed!")
+end
+
+
+
+
+print("\n-----------------------------\n")
+
+print("All tests passed!\n\n")
diff --git a/test/lua/luatest.lua b/test/lua/luatest.lua
new file mode 100755
index 0000000000..617329c9c6
--- /dev/null
+++ b/test/lua/luatest.lua
@@ -0,0 +1,174 @@
+-- See Copyright Notice in the file LICENSE
+
+-- arrays: deep comparison
+local function eq (t1, t2, lut)
+ if t1 == t2 then return true end
+ if type(t1) ~= "table" or type(t2) ~= "table" or #t1 ~= #t2 then
+ return false
+ end
+
+ lut = lut or {} -- look-up table: are these 2 arrays already compared?
+ lut[t1] = lut[t1] or {}
+ if lut[t1][t2] then return true end
+ lut[t2] = lut[t2] or {}
+ lut[t1][t2], lut[t2][t1] = true, true
+
+ for k,v in ipairs (t1) do
+ if not eq (t2[k], v, lut) then return false end -- recursion
+ end
+ return true
+end
+
+-- a "nil GUID", to be used instead of nils in datasets
+local NT = "b5f74fe5-46f4-483a-8321-e58ba2fa0e17"
+
+-- pack vararg in table, replacing nils with "NT" items
+local function packNT (...)
+ local t = {}
+ for i=1, select ("#", ...) do
+ local v = select (i, ...)
+ t[i] = (v == nil) and NT or v
+ end
+ return t
+end
+
+-- unpack table into vararg, replacing "NT" items with nils
+local function unpackNT (t)
+ local len = #t
+ local function unpack_from (i)
+ local v = t[i]
+ if v == NT then v = nil end
+ if i == len then return v end
+ return v, unpack_from (i+1)
+ end
+ if len > 0 then return unpack_from (1) end
+end
+
+-- print results (deep into arrays)
+local function print_results (val, indent, lut)
+ indent = indent or ""
+ lut = lut or {} -- look-up table
+ local str = tostring (val)
+ if type (val) == "table" then
+ if lut[val] then
+ io.write (indent, str, "\n")
+ else
+ lut[val] = true
+ io.write (indent, str, "\n")
+ for i,v in ipairs (val) do
+ print_results (v, " " .. indent, lut) -- recursion
+ end
+ end
+ else
+ io.write (indent, val == NT and "nil" or str, "\n")
+ end
+end
+
+-- returns:
+-- 1) true, if success; false, if failure
+-- 2) test results table or error_message
+local function test_function (test, func)
+ local res
+ local t = packNT (pcall (func, unpackNT (test[1])))
+ if t[1] then
+ table.remove (t, 1)
+ res = t
+ if alien then
+ local subject = test[1][1]
+ local buf = alien.buffer (#subject)
+ if #subject > 0 then
+ alien.memmove (buf:topointer (), subject, #subject)
+ end
+ test[1][1] = buf
+ local t = packNT (pcall (func, unpackNT (test[1])))
+ if t[1] then
+ table.remove (t, 1)
+ res = t
+ else
+ print "alien test failed"
+ res = t[2] --> error_message
+ end
+ end
+ else
+ res = t[2] --> error_message
+ end
+ local how = (type (res) == type (test[2])) and
+ (type (res) == "string" or eq (res, test[2])) -- allow error messages to differ
+ return how, res
+end
+
+-- returns:
+-- 1) true, if success; false, if failure
+-- 2) test results table or error_message
+-- 3) test results table or error_message
+local function test_method (test, constructor, name)
+ local res1, res2
+ local subject = test[2][1]
+ local ok, r = pcall (constructor, unpackNT (test[1]))
+ if ok then
+ local t = packNT (pcall (r[name], r, unpackNT (test[2])))
+ if t[1] then
+ table.remove (t, 1)
+ res1, res2 = t
+ else
+ res1, res2 = 2, t[2] --> 2, error_message
+ end
+ else
+ res1, res2 = 1, r --> 1, error_message
+ end
+ return eq (res1, test[3]), res1, res2
+end
+
+-- returns: a list of failed tests
+local function test_set (set, lib, verbose)
+ local list = {}
+
+ if type (set.Func) == "function" then
+ local func = set.Func
+
+ for i,test in ipairs (set) do
+ if verbose then
+ io.write (" running function test "..i.."...")
+ io.flush ()
+ end
+ local ok, res = test_function (test, func)
+ if not ok then
+ if verbose then io.stdout:write("failed!\n") end
+ table.insert (list, {i=i, test[2], res})
+ elseif verbose then
+ io.write ("passed\n")
+ io.flush ()
+ end
+ end
+
+ elseif type (set.Method) == "string" then
+ for i,test in ipairs (set) do
+ if verbose then
+ io.write (" running method test "..i.."...")
+ io.flush ()
+ end
+ local ok, res1, res2 = test_method (test, lib.new, set.Method)
+ if not ok then
+ if verbose then io.stdout:write("failed!\n") end
+ table.insert (list, {i=i, test[3], res1, res2})
+ elseif verbose then
+ io.write ("passed\n")
+ io.flush ()
+ end
+ end
+
+ else
+ error ("neither set.Func nor set.Method is valid")
+ end
+
+ return list
+end
+
+return {
+ eq = eq,
+ NT = NT,
+ print_results = print_results,
+ test_function = test_function,
+ test_method = test_method,
+ test_set = test_set,
+}
diff --git a/test/lua/pat2pcre.lua b/test/lua/pat2pcre.lua
new file mode 100755
index 0000000000..2d60a443b2
--- /dev/null
+++ b/test/lua/pat2pcre.lua
@@ -0,0 +1,87 @@
+-- See Copyright Notice in the file lrexlib.h
+
+-- Convert Lua regex pattern to its PCRE equivalent.
+
+local t_esc = {
+ a = "[:alpha:]",
+ A = "[:^alpha:]",
+ c = "[:cntrl:]",
+ C = "[:^cntrl:]",
+ d = "[:digit:]",
+ D = "[:^digit:]",
+ l = "[:lower:]",
+ L = "[:^lower:]",
+ p = "[:punct:]",
+ P = "[:^punct:]",
+ s = "[:space:]",
+ S = "[:^space:]",
+ u = "[:upper:]",
+ U = "[:^upper:]",
+ w = "[:alnum:]",
+ W = "[:^alnum:]",
+ x = "[:xdigit:]",
+ X = "[:^xdigit:]",
+ z = "\\x00",
+ Z = "\\x01-\\xFF",
+}
+
+local function rep_normal (ch)
+ assert (ch ~= "b", "\"%b\" subpattern is not supported")
+ assert (ch ~= "0", "invalid capture index")
+ local v = t_esc[ch]
+ return v and ("[" .. v .. "]") or ("\\" .. ch)
+end
+
+local function rep_charclass (ch)
+ return t_esc[ch] or ("\\" .. ch)
+end
+
+function pat2pcre (s)
+ local ind = 0
+
+ local function getc ()
+ ind = ind + 1
+ return string.sub (s, ind, ind)
+ end
+
+ local function getnum ()
+ local num = string.match (s, "^\\(%d%d?%d?)", ind)
+ if num then
+ ind = ind + #num
+ return string.format ("\\x%02X", num)
+ end
+ end
+
+ local out, state = "", "normal"
+ while ind < #s do
+ local ch = getc ()
+ if state == "normal" then
+ if ch == "%" then
+ out = out .. rep_normal (getc ())
+ elseif ch == "-" then
+ out = out .. "*?"
+ elseif ch == "." then
+ out = out .. "\\C"
+ elseif ch == "[" then
+ out = out .. ch
+ state = "charclass"
+ else
+ local num = getnum ()
+ out = num and (out .. num) or (out .. ch)
+ end
+ elseif state == "charclass" then
+ if ch == "%" then
+ out = out .. rep_charclass (getc ())
+ elseif ch == "]" then
+ out = out .. ch
+ state = "normal"
+ else
+ local num = getnum ()
+ out = num and (out .. num) or (out .. ch)
+ end
+ end
+ end
+ return out
+end
+
+return pat2pcre
diff --git a/test/lua/pcre_sets.lua b/test/lua/pcre_sets.lua
new file mode 100755
index 0000000000..d1e50390cc
--- /dev/null
+++ b/test/lua/pcre_sets.lua
@@ -0,0 +1,179 @@
+-- See Copyright Notice in the file lrexlib.h
+
+local luatest = require "luatest"
+local N = luatest.NT
+
+local function norm(a) return a==nil and N or a end
+
+local function fill (n, m)
+ local t = {}
+ for i = n, m, -1 do table.insert (t, i) end
+ return t
+end
+
+local function set_named_subpatterns (lib, flg)
+ return {
+ Name = "Named Subpatterns",
+ Func = function (subj, methodname, patt, name1, name2)
+ local r = lib.new (patt)
+ local _,_,caps = r[methodname] (r, subj)
+ return norm(caps[name1]), norm(caps[name2])
+ end,
+ --{} N.B. subject is always first element
+ { {"abcd", "tfind", "(?P<dog>.)b.(?P<cat>d)", "dog", "cat"}, {"a","d"} },
+ { {"abcd", "exec", "(?P<dog>.)b.(?P<cat>d)", "dog", "cat"}, {"a","d"} },
+ }
+end
+
+local function set_f_find (lib, flg)
+ local cp1251 =
+ "ÀÁÂÃÄŨÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÜÛÚÝÞßàáâãäå¸æçèéêëìíîïðñòóôõö÷øùüûúýþÿ"
+ local loc = "Russian_Russia.1251"
+ return {
+ Name = "Function find",
+ Func = lib.find,
+ --{subj, patt, st,cf,ef,lo}, { results }
+ { {"abcd", ".+", 5}, { N } }, -- failing st
+ { {"abcd", ".*?"}, { 1,0 } }, -- non-greedy
+ { {"abc", "aBC", N,flg.CASELESS}, { 1,3 } }, -- cf
+ { {"abc", "aBC", N,"i" }, { 1,3 } }, -- cf
+ { {"abc", "bc", N,flg.ANCHORED}, { N } }, -- cf
+ { {"abc", "bc", N,N,flg.ANCHORED}, { N } }, -- ef
+--{ {cp1251, "[[:upper:]]+", N,N,N, loc}, { 1,33} }, -- locale
+--{ {cp1251, "[[:lower:]]+", N,N,N, loc}, {34,66} }, -- locale
+}
+end
+
+local function set_f_match (lib, flg)
+ return {
+ Name = "Function match",
+ Func = lib.match,
+ --{subj, patt, st,cf,ef,lo}, { results }
+ { {"abcd", ".+", 5}, { N }}, -- failing st
+ { {"abcd", ".*?"}, { "" }}, -- non-greedy
+ { {"abc", "aBC", N,flg.CASELESS}, {"abc" }}, -- cf
+ { {"abc", "aBC", N,"i" }, {"abc" }}, -- cf
+ { {"abc", "bc", N,flg.ANCHORED}, { N }}, -- cf
+ { {"abc", "bc", N,N,flg.ANCHORED}, { N }}, -- ef
+}
+end
+
+local function set_f_gmatch (lib, flg)
+ -- gmatch (s, p, [cf], [ef])
+ local pCSV = "(^[^,]*)|,([^,]*)"
+ local F = false
+ local function test_gmatch (subj, patt)
+ local out, guard = {}, 10
+ for a, b in lib.gmatch (subj, patt) do
+ table.insert (out, { norm(a), norm(b) })
+ guard = guard - 1
+ if guard == 0 then break end
+ end
+ return unpack (out)
+ end
+ return {
+ Name = "Function gmatch",
+ Func = test_gmatch,
+ --{ subj patt results }
+ { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj
+ { {"", pCSV}, {{"",F}} },
+ { {"12", pCSV}, {{"12",F}} },
+ { {",", pCSV}, {{"", F},{F,""}} },
+ { {"12,,45", pCSV}, {{"12",F},{F,""},{F,"45"}} },
+ { {",,12,45,,ab,", pCSV}, {{"",F},{F,""},{F,"12"},{F,"45"},{F,""},{F,"ab"},{F,""}} },
+ }
+end
+
+local function set_f_split (lib, flg)
+ -- split (s, p, [cf], [ef])
+ local function test_split (subj, patt)
+ local out, guard = {}, 10
+ for a, b, c in lib.split (subj, patt) do
+ table.insert (out, { norm(a), norm(b), norm(c) })
+ guard = guard - 1
+ if guard == 0 then break end
+ end
+ return unpack (out)
+ end
+ return {
+ Name = "Function split",
+ Func = test_split,
+ --{ subj patt results }
+ { {"a,\0,c", ","}, {{"a",",",N},{"\0",",",N},{"c",N,N}, } },--nuls in subj
+ { {"ab", "$"}, {{"ab","",N}, {"",N,N}, } },
+ { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } },
+ { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N},{"",N,N}, } },
+ { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } },
+ }
+end
+
+local function set_m_exec (lib, flg)
+ return {
+ Name = "Method exec",
+ Method = "exec",
+--{patt,cf,lo}, {subj,st,ef} { results }
+ { {".+"}, {"abcd",5}, { N } }, -- failing st
+ { {".*?"}, {"abcd"}, {1,0,{}} }, -- non-greedy
+ { {"aBC",flg.CASELESS}, {"abc"}, {1,3,{}} }, -- cf
+ { {"aBC","i" }, {"abc"}, {1,3,{}} }, -- cf
+ { {"bc",flg.ANCHORED}, {"abc"}, { N } }, -- cf
+ { {"bc"}, {"abc",N, flg.ANCHORED}, { N } }, -- ef
+}
+end
+
+local function set_m_tfind (lib, flg)
+ return {
+ Name = "Method tfind",
+ Method = "tfind",
+--{patt,cf,lo}, {subj,st,ef} { results }
+ { {".+"}, {"abcd",5}, { N } }, -- failing st
+ { {".*?"}, {"abcd"}, {1,0,{}} }, -- non-greedy
+ { {"aBC",flg.CASELESS}, {"abc"}, {1,3,{}} }, -- cf
+ { {"aBC","i" }, {"abc"}, {1,3,{}} }, -- cf
+ { {"bc",flg.ANCHORED}, {"abc"}, { N } }, -- cf
+ { {"bc"}, {"abc",N, flg.ANCHORED}, { N } }, -- ef
+}
+end
+
+local function set_m_dfa_exec (lib, flg)
+ return {
+ Name = "Method dfa_exec",
+ Method = "dfa_exec",
+--{patt,cf,lo}, {subj,st,ef,os,ws} { results }
+ { {".+"}, {"abcd"}, {1,{4,3,2,1},4} }, -- [none]
+ { {".+"}, {"abcd",2}, {2,{4,3,2}, 3} }, -- positive st
+ { {".+"}, {"abcd",-2}, {3,{4,3}, 2} }, -- negative st
+ { {".+"}, {"abcd",5}, {N } }, -- failing st
+ { {".*"}, {"abcd"}, {1,{4,3,2,1,0},5}}, -- [none]
+ { {".*?"}, {"abcd"}, {1,{4,3,2,1,0},5}}, -- non-greedy
+ { {"aBC",flg.CASELESS}, {"abc"}, {1,{3},1} }, -- cf
+ { {"aBC","i" }, {"abc"}, {1,{3},1} }, -- cf
+ { {"bc"}, {"abc"}, {2,{3},1} }, -- [none]
+ { {"bc",flg.ANCHORED}, {"abc"}, {N } }, -- cf
+ { {"bc"}, {"abc",N, flg.ANCHORED}, {N } }, -- ef
+ { { "(.)b.(d)"}, {"abcd"}, {1,{4},1} }, --[captures]
+ { {"abc"}, {"ab"}, {N } },
+ { {"abc"}, {"ab",N,flg.PARTIAL}, {1,{2},flg.ERROR_PARTIAL} },
+ { {".+"}, {string.rep("a",50),N,N,50,50}, {1, fill(50,26), 0}},-- small ovecsize
+}
+end
+
+return function (libname, isglobal)
+ local lib = isglobal and _G[libname] or require (libname)
+ local flags = lib.flags ()
+ local sets = {
+ set_f_match (lib, flags),
+ set_f_find (lib, flags),
+ set_f_gmatch (lib, flags),
+ set_f_split (lib, flags),
+ set_m_exec (lib, flags),
+ set_m_tfind (lib, flags),
+ }
+ if flags.MAJOR >= 4 then
+ table.insert (sets, set_named_subpatterns (lib, flags))
+ end
+ if flags.MAJOR >= 6 then
+ table.insert (sets, set_m_dfa_exec (lib, flags))
+ end
+ return sets
+end
diff --git a/test/lua/pcre_sets2.lua b/test/lua/pcre_sets2.lua
new file mode 100755
index 0000000000..c0c8d7a5b9
--- /dev/null
+++ b/test/lua/pcre_sets2.lua
@@ -0,0 +1,198 @@
+-- See Copyright Notice in the file LICENSE
+
+local pat2pcre = require "pat2pcre"
+
+local function get_gsub (lib)
+ return lib.gsub or
+ function (subj, pattern, repl, n)
+ return lib.new (pattern) : gsub (subj, repl, n)
+ end
+end
+
+local function set_f_gsub1 (lib, flg)
+ local subj, pat = "abcdef", "[abef]+"
+ return {
+ Name = "Function gsub, set1",
+ Func = get_gsub (lib),
+ --{ s, p, f, n, res1, res2, res3 },
+ { {"a\0c", ".", "#" }, {"###", 3, 3} }, -- subj contains nuls
+ }
+end
+
+local function set_f_gsub4 (lib, flg)
+ local pCSV = "(^[^,]*)|,([^,]*)"
+ local fCSV = function (a,b) return "["..(a or b).."]" end
+ local set = {
+ Name = "Function gsub, set4",
+ Func = get_gsub (lib),
+ --{ s, p, f, n, res1, res2, res3 },
+ { {"/* */ */", "%/%*(.*)%*%/", "#" }, {"#", 1, 1} },
+ { {"a2c3", ".-", "#" }, {"#########", 9, 9} }, -- test .-
+ { {"/**/", "%/%*(.-)%*%/", "#" }, {"#", 1, 1} },
+ { {"/* */ */", "%/%*(.-)%*%/", "#" }, {"# */", 1, 1} },
+ { {"a2c3", "%d", "#" }, {"a#c#", 2, 2} }, -- test %d
+ { {"a2c3", "%D", "#" }, {"#2#3", 2, 2} }, -- test %D
+ { {"a \t\nb", "%s", "#" }, {"a###b", 3, 3} }, -- test %s
+ { {"a \t\nb", "%S", "#" }, {"# \t\n#", 2, 2} }, -- test %S
+ { {"abcd", "\\b", "%1"}, {"abcd", 2, 2} },
+ { {"", pCSV,fCSV}, {"[]", 1, 1} },
+ { {"123", pCSV,fCSV}, {"[123]", 1, 1} },
+ { {",", pCSV,fCSV}, {"[][]", 2, 2} },
+ { {"123,,456", pCSV,fCSV}, {"[123][][456]", 3, 3}},
+ { {",,123,456,,abc,789,", pCSV,fCSV}, {"[][][123][456][][abc][789][]", 8, 8}},
+ }
+ -- convert patterns: lua -> pcre
+ for _, test in ipairs (set) do
+ test[1][2] = pat2pcre (test[1][2])
+ end
+ return set
+end
+
+local function set_f_gsub7 (lib, flg)
+ local subj = ""
+ for i = 0, 255 do
+ subj = subj .. string.char (i)
+ end
+
+ -- This set requires calling prepare_set before calling gsub_test
+ local set = {
+ Name = "Function gsub, set7",
+ Func = get_gsub (lib),
+ --{ s, p, f, n, },
+ { {subj, "%a", "" }, },
+ { {subj, "%A", "" }, },
+ { {subj, "%c", "" }, },
+ { {subj, "%C", "" }, },
+ { {subj, "%l", "" }, },
+ { {subj, "%L", "" }, },
+ { {subj, "%p", "" }, },
+ { {subj, "%P", "" }, },
+ { {subj, "%u", "" }, },
+ { {subj, "%U", "" }, },
+ { {subj, "%w", "" }, },
+ { {subj, "%W", "" }, },
+ { {subj, "%x", "" }, },
+ { {subj, "%X", "" }, },
+ { {subj, "%z", "" }, },
+ { {subj, "%Z", "" }, },
+
+ { {subj, "[%a]", "" }, },
+ { {subj, "[%A]", "" }, },
+ { {subj, "[%c]", "" }, },
+ { {subj, "[%C]", "" }, },
+ { {subj, "[%l]", "" }, },
+ { {subj, "[%L]", "" }, },
+ { {subj, "[%p]", "" }, },
+ { {subj, "[%P]", "" }, },
+ { {subj, "[%u]", "" }, },
+ { {subj, "[%U]", "" }, },
+ { {subj, "[%w]", "" }, },
+ { {subj, "[%W]", "" }, },
+ { {subj, "[%x]", "" }, },
+ { {subj, "[%X]", "" }, },
+ { {subj, "[%z]", "" }, },
+ { {subj, "[%Z]", "" }, },
+
+ { {subj, "[%a_]", "" }, },
+ { {subj, "[%A_]", "" }, },
+ { {subj, "[%c_]", "" }, },
+ { {subj, "[%C_]", "" }, },
+ { {subj, "[%l_]", "" }, },
+ { {subj, "[%L_]", "" }, },
+ { {subj, "[%p_]", "" }, },
+ { {subj, "[%P_]", "" }, },
+ { {subj, "[%u_]", "" }, },
+ { {subj, "[%U_]", "" }, },
+ { {subj, "[%w_]", "" }, },
+ { {subj, "[%W_]", "" }, },
+ { {subj, "[%x_]", "" }, },
+ { {subj, "[%X_]", "" }, },
+ { {subj, "[%z_]", "" }, },
+ { {subj, "[%Z_]", "" }, },
+
+ { {subj, "[%a%d]", "" }, },
+ { {subj, "[%A%d]", "" }, },
+ { {subj, "[%c%d]", "" }, },
+ { {subj, "[%C%d]", "" }, },
+ { {subj, "[%l%d]", "" }, },
+ { {subj, "[%L%d]", "" }, },
+ { {subj, "[%p%d]", "" }, },
+ { {subj, "[%P%d]", "" }, },
+ { {subj, "[%u%d]", "" }, },
+ { {subj, "[%U%d]", "" }, },
+ { {subj, "[%w%d]", "" }, },
+ { {subj, "[%W%d]", "" }, },
+ { {subj, "[%x%d]", "" }, },
+ { {subj, "[%X%d]", "" }, },
+ { {subj, "[%z%d]", "" }, },
+ { {subj, "[%Z%d]", "" }, },
+
+ { {subj, "[^%a%d]", "" }, },
+ { {subj, "[^%A%d]", "" }, },
+ { {subj, "[^%c%d]", "" }, },
+ { {subj, "[^%C%d]", "" }, },
+ { {subj, "[^%l%d]", "" }, },
+ { {subj, "[^%L%d]", "" }, },
+ { {subj, "[^%p%d]", "" }, },
+ { {subj, "[^%P%d]", "" }, },
+ { {subj, "[^%u%d]", "" }, },
+ { {subj, "[^%U%d]", "" }, },
+ { {subj, "[^%w%d]", "" }, },
+ { {subj, "[^%W%d]", "" }, },
+ { {subj, "[^%x%d]", "" }, },
+ { {subj, "[^%X%d]", "" }, },
+ { {subj, "[^%z%d]", "" }, },
+ { {subj, "[^%Z%d]", "" }, },
+
+ { {subj, "[^%a_]", "" }, },
+ { {subj, "[^%A_]", "" }, },
+ { {subj, "[^%c_]", "" }, },
+ { {subj, "[^%C_]", "" }, },
+ { {subj, "[^%l_]", "" }, },
+ { {subj, "[^%L_]", "" }, },
+ { {subj, "[^%p_]", "" }, },
+ { {subj, "[^%P_]", "" }, },
+ { {subj, "[^%u_]", "" }, },
+ { {subj, "[^%U_]", "" }, },
+ { {subj, "[^%w_]", "" }, },
+ { {subj, "[^%W_]", "" }, },
+ { {subj, "[^%x_]", "" }, },
+ { {subj, "[^%X_]", "" }, },
+ { {subj, "[^%z_]", "" }, },
+ { {subj, "[^%Z_]", "" }, },
+
+ { {subj, "\100", "" }, },
+ { {subj, "[\100]", "" }, },
+ { {subj, "[^\100]", "" }, },
+ { {subj, "[\100-\200]", "" }, },
+ { {subj, "[^\100-\200]", "" }, },
+ { {subj, "\100a", "" }, },
+ { {subj, "[\100a]", "" }, },
+ { {subj, "[^\100a]", "" }, },
+ { {subj, "[\100-\200a]", "" }, },
+ { {subj, "[^\100-\200a]", "" }, },
+ }
+ -- fill in reference results
+ for _,v in ipairs(set) do
+ local r0, r1, r2 = pcall (string.gsub, unpack (v[1]))
+ v[2] = r0 and { r1, r2, r2 } or { r0, r1 }
+ end
+ -- convert patterns: lua -> pcre
+ for _, test in ipairs (set) do
+ test[1][2] = pat2pcre (test[1][2])
+ end
+ return set
+end
+
+return function (libname, isglobal)
+ local lib = isglobal and _G[libname] or require (libname)
+ local flags = lib.flags and lib.flags ()
+ local sets = {
+ set_f_gsub1 (lib, flags),
+ set_f_gsub4 (lib, flags),
+ }
+ if flags.MAJOR*100 + flags.MINOR > 405 then
+ table.insert (sets, set_f_gsub7 (lib, flags))
+ end
+ return sets
+end
diff --git a/test/suite-wslua.sh b/test/suite-wslua.sh
index 20b97f96ab..f734f41aea 100755
--- a/test/suite-wslua.sh
+++ b/test/suite-wslua.sh
@@ -196,7 +196,7 @@ wslua_step_args_test() {
test_step_ok
}
-unittests_step_globals_test() {
+wslua_step_globals_test() {
if [ $HAVE_LUA -ne 0 ]; then
test_step_skipped
return
@@ -218,6 +218,22 @@ unittests_step_globals_test() {
test_step_ok
}
+wslua_step_gregex_test() {
+ if [ $HAVE_LUA -ne 0 ]; then
+ test_step_skipped
+ return
+ fi
+
+ # Tshark catches lua script failures, so we have to parse the output.
+ $TSHARK -r $CAPTURE_DIR/empty.pcap -X lua_script:$TESTS_DIR/lua/gregex.lua -X lua_script1:-d$TESTS_DIR/lua/ -X lua_script1:glib -X lua_script1:-V > testout.txt 2>&1
+ if grep -q "All tests passed!" testout.txt; then
+ test_step_ok
+ else
+ cat testout.txt
+ test_step_failed "didn't find pass marker"
+ fi
+}
+
wslua_step_struct_test() {
if [ $HAVE_LUA -ne 0 ]; then
test_step_skipped
@@ -244,7 +260,8 @@ wslua_suite() {
test_step_set_post wslua_cleanup_step
test_step_add "wslua dissector" wslua_step_dissector_test
test_step_add "wslua field/fieldinfo" wslua_step_field_test
- test_step_add "wslua globals" unittests_step_globals_test
+ test_step_add "wslua globals" wslua_step_globals_test
+ test_step_add "wslua gregex" wslua_step_gregex_test
test_step_add "wslua int64" wslua_step_int64_test
test_step_add "wslua listener" wslua_step_listener_test
test_step_add "wslua nstime" wslua_step_nstime_test