diff options
-rw-r--r-- | docbook/CMakeLists.txt | 1 | ||||
-rw-r--r-- | docbook/wsluarm.xml | 487 | ||||
-rw-r--r-- | epan/wslua/CMakeLists.txt | 3 | ||||
-rw-r--r-- | epan/wslua/Makefile.am | 7 | ||||
-rw-r--r-- | epan/wslua/Makefile.nmake | 6 | ||||
-rw-r--r-- | epan/wslua/lrexlib.c | 266 | ||||
-rw-r--r-- | epan/wslua/lrexlib.h | 130 | ||||
-rw-r--r-- | epan/wslua/lrexlib_algo.h | 755 | ||||
-rw-r--r-- | epan/wslua/lrexlib_glib.c | 414 | ||||
-rw-r--r-- | epan/wslua/lrexlib_glib_f.c | 138 | ||||
-rwxr-xr-x | epan/wslua/make-reg.pl | 1 | ||||
-rw-r--r-- | epan/wslua/wslua.h | 1 | ||||
-rwxr-xr-x | test/lua/common_sets.lua | 319 | ||||
-rw-r--r-- | test/lua/glib_sets.lua | 204 | ||||
-rw-r--r-- | test/lua/gregex.lua | 285 | ||||
-rwxr-xr-x | test/lua/luatest.lua | 174 | ||||
-rwxr-xr-x | test/lua/pat2pcre.lua | 87 | ||||
-rwxr-xr-x | test/lua/pcre_sets.lua | 179 | ||||
-rwxr-xr-x | test/lua/pcre_sets2.lua | 198 | ||||
-rwxr-xr-x | test/suite-wslua.sh | 21 |
20 files changed, 3673 insertions, 3 deletions
diff --git a/docbook/CMakeLists.txt b/docbook/CMakeLists.txt index bbcf284b2d..0373da5f76 100644 --- a/docbook/CMakeLists.txt +++ b/docbook/CMakeLists.txt @@ -310,7 +310,6 @@ set(WSLUA_MODULES ${CMAKE_SOURCE_DIR}/epan/wslua/wslua_tree.c ${CMAKE_SOURCE_DIR}/epan/wslua/wslua_tvb.c ${CMAKE_SOURCE_DIR}/epan/wslua/wslua_util.c - ${CMAKE_SOURCE_DIR}/epan/wslua/wslua_int64.c ${CMAKE_SOURCE_DIR}/epan/wslua/wslua_struct.c ) diff --git a/docbook/wsluarm.xml b/docbook/wsluarm.xml index 261c65fe30..1a01b568d4 100644 --- a/docbook/wsluarm.xml +++ b/docbook/wsluarm.xml @@ -179,4 +179,491 @@ end &WsLuaUtility; &WsLuaInt64; &WsLuaStruct; + + <section id='lua_module_GRegex'> + <title> GLib Regular Expressions </title> + <para> + Lua has its own native 'pattern' syntax in the string library, but sometimes a real + regex engine is more useful. Wireshark comes with GLib's Regex implementation, which + itself is based on Perl Compatible Regular Expressions (PCRE). This engine is exposed + into Wireshark's Lua engine through the well-known Lrexlib library, following the + same syntax and semantics as the Lrexlib PCRE implementation, with a few differences as follows: + <itemizedlist> + <listitem> + <para> There is no support for using custom locale/chartables </para> + </listitem> + <listitem> + <para> dfa_exec() doesn't take 'ovecsize' nor 'wscount' arguments </para> + </listitem> + <listitem> + <para> dfa_exec() returns boolean true for partial match, without subcapture info </para> + </listitem> + <listitem> + <para> Named subgroups do not return name-keyed entries in the return + table (i.e., in match/tfind/exec) + </para> + </listitem> + <listitem> + <para> The 'flags()' function still works, returning all flags, but two new + functions 'compile_flags()' and 'match_flags()' return just their respective + flags, since GLib has a different and smaller set of such flags, for + regex compile vs. match functions + </para> + </listitem> + <listitem> + <para> + Using some assertions and POSIX character classes against strings with non-ASCII characters + might match high-order characters, because glib always sets PCRE_UCP + even if G_REGEX_RAW is set. For example, '[:alpha;]' matches certain + non-ASCII bytes. The following assertions have this issue: '\b', '\B', '\s', '\S', '\w', '\W'. + The following character classes have this issue: [:alpha:], [:alnum:], [:lower:], [:upper:], + [:space:], [:word:], and [:graph:]. + </para> + </listitem> + <listitem> + <para> + The compile flag G_REGEX_RAW is always set/used, even if you didn't specify it. This is because + GLib runs PCRE in UTF-8 mode by default, whereas Lua strings are not UTF-aware. + </para> + </listitem> + </itemizedlist> + </para> + <para> + This page is based on the full documentation for Lrexlib at + <ulink url="http://rrthomas.github.io/lrexlib/manual.html">http://rrthomas.github.io/lrexlib/manual.html</ulink> + </para> + <para> + The GLib Regular expression syntax (which is essentially PCRE syntax) can be found at + <ulink url="https://developer.gnome.org/glib/2.38/glib-regex-syntax.html">https://developer.gnome.org/glib/2.38/glib-regex-syntax.html</ulink> + </para> + <section id='lua_class_GRegex'><title>GRegex</title> + <para> + GLib Regular Expressions based on PCRE. + </para> + <section id='lua_class_GRegex_notes'><title>Notes</title> + <para> + All functions that take a regular expression pattern as an argument will + generate an error if that pattern is found invalid by the regex library. + </para> + <para> + All functions that take a string-type regex argument accept a compiled regex + too. In this case, the compile flags argument is ignored (should be either supplied as nils or omitted). + </para> + <para> + The capture flag argument 'cf' may also be supplied as a string, whose characters stand for compilation flags. + Combinations of the following characters (case sensitive) are supported: + <itemizedlist> + <listitem> + <para> '<command>i</command>' = G_REGEX_CASELESS - Letters in the pattern match both upper- and lowercase letters. + This option can be changed within a pattern by a "(?i)" option setting. </para> + </listitem> + <listitem> + <para> '<command>m</command>' = G_REGEX_MULTILINE - By default, GRegex treats the strings as consisting of a single + line of characters (even if it actually contains newlines). The "start of line" + metacharacter ("^") matches only at the start of the string, while the "end of line" + metacharacter ("$") matches only at the end of the string, or before a terminating newline + (unless G_REGEX_DOLLAR_ENDONLY is set). When G_REGEX_MULTILINE is set, the "start of line" + and "end of line" constructs match immediately following or immediately before any newline + in the string, respectively, as well as at the very start and end. This can be changed + within a pattern by a "(?m)" option setting.</para> + </listitem> + <listitem> + <para> '<command>s</command>' = G_REGEX_DOTALL - A dot metacharater (".") in the pattern matches all characters, + including newlines. Without it, newlines are excluded. This option can be changed within + a pattern by a ("?s") option setting. </para> + </listitem> + <listitem> + <para> '<command>x</command>' = G_REGEX_EXTENDED - Whitespace data characters in the pattern are totally ignored + except when escaped or inside a character class. Whitespace does not include the VT + character (code 11). In addition, characters between an unescaped "#" outside a character + class and the next newline character, inclusive, are also ignored. This can be changed + within a pattern by a "(?x)" option setting. </para> + </listitem> + <listitem> + <para> '<command>U</command>' = G_REGEX_UNGREEDY - Inverts the "greediness" of the quantifiers so that they are not + greedy by default, but become greedy if followed by "?". It can also be set by a "(?U)" + option setting within the pattern. </para> + </listitem> + </itemizedlist> + </para> + </section><!-- end of notes --> + <section id='lua_fn_GRegex_new_pattern_'> + <title>GRegex.new(pattern)</title> + <para>Compiles regular expression pattern into a regular expression object whose + internal representation is corresponding to the library used. The returned + result then can be used by the methods, e.g. match, exec, etc. Regular + expression objects are automatically garbage collected. + </para> + <section><title>Arguments</title> + <variablelist> + <varlistentry><term>pattern</term> + <listitem><para> A Perl-compatible regular expression pattern string </para></listitem> + </varlistentry> <!-- function_arg_footer: pattern --> + </variablelist> + </section> + <section><title>Returns</title> + <para> The compiled regular expression (a userdata object) </para> + </section> <!-- function_returns_footer: GRegex.new --> + <section><title>Errors</title> + <itemizedlist> + <listitem><para>A malformed pattern generates a Lua error </para></listitem> + </itemizedlist> + </section> <!-- function_error_footer: GRegex.new --> + </section> <!-- function_footer: GRegex.new --> + <section id='lua_fn_GRegex_flags__table__'> + <title>GRegex.flags([table])</title> + <para>Returns a table containing the numeric values of the constants defined by + the regex library, with the keys being the (string) names of the + constants. If the table argument is supplied then it is used as the + output table, otherwise a new table is created. The constants contained + in the returned table can then be used in most functions and methods where + compilation flags or execution flags can be specified. They can also be + used for comparing with return codes of some functions and methods for + determining the reason of failure. + </para> + <section><title>Arguments</title> + <variablelist> + <varlistentry><term>table (optional)</term> + <listitem><para> A table for placing results into </para></listitem> + </varlistentry> <!-- function_arg_footer: table (optional) --> + </variablelist> + </section> + <section><title>Returns</title> + <para>A table filled with the results.</para> + </section> <!-- function_returns_footer: GRegex.flags --> + </section> <!-- function_footer: GRegex.flags --> + <section id='lua_fn_GRegex_compile_flags__table__'> + <title>GRegex.compile_flags([table])</title> + <para>Returns a table containing the numeric values of the constants defined by + the regex library for compile flags, with the keys being the (string) names of the + constants. If the table argument is supplied then it is used as the + output table, otherwise a new table is created. + </para> + <section><title>Arguments</title> + <variablelist> + <varlistentry><term>table (optional)</term> + <listitem><para> A table for placing results into </para></listitem> + </varlistentry> <!-- function_arg_footer: table (optional) --> + </variablelist> + </section> + <section><title>Returns</title> + <para>A table filled with the results.</para> + </section> <!-- function_returns_footer: GRegex.compile_flags --> + </section> <!-- function_footer: GRegex.compile_flags --> + <section id='lua_fn_GRegex_match_flags__table__'> + <title>GRegex.match_flags([table])</title> + <para>Returns a table containing the numeric values of the constants defined by + the regex library for match flags, with the keys being the (string) names of the + constants. If the table argument is supplied then it is used as the + output table, otherwise a new table is created. + </para> + <section><title>Arguments</title> + <variablelist> + <varlistentry><term>table (optional)</term> + <listitem><para> A table for placing results into </para></listitem> + </varlistentry> <!-- function_arg_footer: table (optional) --> + </variablelist> + </section> + <section><title>Returns</title> + <para>A table filled with the results.</para> + </section> <!-- function_returns_footer: GRegex.match_flags --> + </section> <!-- function_footer: GRegex.match_flags --> + <section id='lua_fn_GRegex_match_subject__pattern___init____cf____ef__'> + <title>GRegex.match(subject, pattern, [init], [cf], [ef])</title> + <para>Searches for the first match of the regexp pattern in the string subject, starting + from offset init, subject to flags cf and ef. The pattern is compiled each time this is + called, unlike the class method 'match' function. + </para> + <section><title>Arguments</title> + <variablelist> + <varlistentry><term>subject</term> + <listitem><para> Subject string to search </para></listitem> + </varlistentry> <!-- function_arg_footer: subject --> + <varlistentry><term>pattern</term> + <listitem><para> A Perl-compatible regular expression pattern string or GRegex object </para></listitem> + </varlistentry> <!-- function_arg_footer: pattern --> + <varlistentry><term>init (optional)</term> + <listitem><para> start offset in the subject (can be negative) </para></listitem> + </varlistentry> <!-- function_arg_footer: init (optional) --> + <varlistentry><term>cf (optional)</term> + <listitem><para> compilation flags (bitwise OR) </para></listitem> + </varlistentry> <!-- function_arg_footer: cf (optional) --> + <varlistentry><term>ef (optional)</term> + <listitem><para> match execution flags (bitwise OR) </para></listitem> + </varlistentry> <!-- function_arg_footer: ef (optional) --> + </variablelist> + </section> + <section><title>Returns</title> + <para>On success, returns all substring matches ("captures"), in the order they appear in the pattern. + false is returned for sub-patterns that did not participate in the match. If + the pattern specified no captures then the whole matched substring is + returned. On failure, returns nil. + </para> + </section> <!-- function_returns_footer: GRegex.match --> + </section> <!-- function_footer: GRegex.match --> + <section id='lua_fn_GRegex_find_subject__pattern___init____cf____ef__'> + <title>GRegex.find(subject, pattern, [init], [cf], [ef])</title> + <para>Searches for the first match of the regexp pattern in the string subject, starting + from offset init, subject to flags ef. The pattern is compiled each time this is + called, unlike the class method 'find' function. + </para> + <section><title>Arguments</title> + <variablelist> + <varlistentry><term>subject</term> + <listitem><para> Subject string to search </para></listitem> + </varlistentry> <!-- function_arg_footer: subject --> + <varlistentry><term>pattern</term> + <listitem><para> A Perl-compatible regular expression pattern string or GRegex object </para></listitem> + </varlistentry> <!-- function_arg_footer: pattern --> + <varlistentry><term>init (optional)</term> + <listitem><para> start offset in the subject (can be negative) </para></listitem> + </varlistentry> <!-- function_arg_footer: init (optional) --> + <varlistentry><term>cf (optional)</term> + <listitem><para> compilation flags (bitwise OR) </para></listitem> + </varlistentry> <!-- function_arg_footer: cf (optional) --> + <varlistentry><term>ef (optional)</term> + <listitem><para> match execution flags (bitwise OR) </para></listitem> + </varlistentry> <!-- function_arg_footer: ef (optional) --> + </variablelist> + </section> + <section><title>Returns</title> + <para>On success, returns the start point of the match (a number), the + end point of the match (a number), and all substring matches ("captures"), in + the order they appear in the pattern. false is returned for sub-patterns that did + not participate in the match. On failure, returns nil. + </para> + </section> <!-- function_returns_footer: GRegex.find --> + </section> <!-- function_footer: GRegex.find --> + <section id='lua_fn_GRegex_gmatch_subject__pattern___init____cf____ef__'> + <title>GRegex.gmatch(subject, pattern, [init], [cf], [ef])</title> + <para>Returns an iterator for repeated matching of the pattern patt in the string subj, subject + to flags cf and ef. The function is intended for use in the generic for Lua construct. + The pattern can be a string or a GRegex object previously compiled with GRegex.new(). + </para> + <section><title>Arguments</title> + <variablelist> + <varlistentry><term>subject</term> + <listitem><para> Subject string to search </para></listitem> + </varlistentry> <!-- function_arg_footer: subject --> + <varlistentry><term>pattern</term> + <listitem><para> A Perl-compatible regular expression pattern string or GRegex object </para></listitem> + </varlistentry> <!-- function_arg_footer: pattern --> + <varlistentry><term>init (optional)</term> + <listitem><para> start offset in the subject (can be negative) </para></listitem> + </varlistentry> <!-- function_arg_footer: init (optional) --> + <varlistentry><term>cf (optional)</term> + <listitem><para> compilation flags (bitwise OR) </para></listitem> + </varlistentry> <!-- function_arg_footer: cf (optional) --> + <varlistentry><term>ef (optional)</term> + <listitem><para> match execution flags (bitwise OR) </para></listitem> + </varlistentry> <!-- function_arg_footer: ef (optional) --> + </variablelist> + </section> + <section><title>Returns</title> + <para>The iterator function is called by Lua. On every iteration (that is, on every + match), it returns all captures in the order they appear in the pattern (or + the entire match if the pattern specified no captures). The iteration will + continue till the subject fails to match. + </para> + </section> <!-- function_returns_footer: GRegex.gmatch --> + </section> <!-- function_footer: GRegex.gmatch --> + <section id='lua_fn_GRegex_gsub_subject__pattern___repl____max____cf____ef__'> + <title>GRegex.gsub(subject, pattern, [repl], [max], [cf], [ef])</title> + <para>Searches for all matches of the pattern in the string subject and replaces them according + to the parameters repl and max. + The pattern can be a string or a GRegex object previously compiled with GRegex.new(). + </para> + <para> For details see: + <ulink url="http://rrthomas.github.io/lrexlib/manual.html#gsub">http://rrthomas.github.io/lrexlib/manual.html#gsub</ulink> + </para> + <section><title>Arguments</title> + <variablelist> + <varlistentry><term>subject</term> + <listitem><para> Subject string to search </para></listitem> + </varlistentry> <!-- function_arg_footer: subject --> + <varlistentry><term>pattern</term> + <listitem><para> A Perl-compatible regular expression pattern string or GRegex object </para></listitem> + </varlistentry> <!-- function_arg_footer: pattern --> + <varlistentry><term>repl (optional)</term> + <listitem><para> Substitution source string, function, table, false or nil </para></listitem> + </varlistentry> <!-- function_arg_footer: repl (optional) --> + <varlistentry><term>max (optional)</term> + <listitem><para> Maximum number of matches to search for, or control function, or nil </para></listitem> + </varlistentry> <!-- function_arg_footer: max (optional) --> + <varlistentry><term>cf (optional)</term> + <listitem><para> Compilation flags (bitwise OR) </para></listitem> + </varlistentry> <!-- function_arg_footer: cf (optional) --> + <varlistentry><term>ef (optional)</term> + <listitem><para> Match execution flags (bitwise OR) </para></listitem> + </varlistentry> <!-- function_arg_footer: ef (optional) --> + </variablelist> + </section> + <section><title>Returns</title> + <para>On success, returns the subject string with the substitutions made, + the number of matches found, and the number of substitutions made. + </para> + </section> <!-- function_returns_footer: GRegex.gsub --> + </section> <!-- function_footer: GRegex.gsub --> + <section id='lua_fn_GRegex_split_subject__sep___cf____ef__'> + <title>GRegex.split(subject, sep, [cf], [ef])</title> + <para>Splits a subject string subj into parts (sections). The sep parameter + is a regular expression pattern representing separators between the sections. + The function is intended for use in the generic for Lua construct. + The function returns an iterator for repeated matching of the pattern sep in + the string subj, subject to flags cf and ef. + The sep pattern can be a string or a GRegex object previously compiled with GRegex.new(). + Unlike gmatch, there will always be at least one iteration pass, even if there are no matches in the subject. + </para> + <section><title>Arguments</title> + <variablelist> + <varlistentry><term>subject</term> + <listitem><para> Subject string to search </para></listitem> + </varlistentry> <!-- function_arg_footer: subject --> + <varlistentry><term>sep</term> + <listitem><para> A Perl-compatible regular expression pattern string or GRegex object </para></listitem> + </varlistentry> <!-- function_arg_footer: sep --> + <varlistentry><term>cf (optional)</term> + <listitem><para> compilation flags (bitwise OR) </para></listitem> + </varlistentry> <!-- function_arg_footer: cf (optional) --> + <varlistentry><term>ef (optional)</term> + <listitem><para> match execution flags (bitwise OR) </para></listitem> + </varlistentry> <!-- function_arg_footer: ef (optional) --> + </variablelist> + </section> + <section><title>Returns</title> + <para>The iterator function is called by Lua. On every iteration, it returns + a subject section (can be an empty string), followed by all captures in the order they + appear in the sep pattern (or the entire match if the sep pattern specified no captures). + If there is no match (this can occur only in the last iteration), then nothing is + returned after the subject section. The iteration will continue till the end of the subject. + </para> + </section> <!-- function_returns_footer: GRegex.split --> + </section> <!-- function_footer: --> + <section id='lua_fn_GRegex_version__'> + <title>GRegex.version()</title> + <para>Returns a returns a string containing the version of the used library.</para> + <section><title>Returns</title> + <para>The version string</para> + </section> <!-- function_returns_footer: GRegex.version --> + </section> <!-- function_footer: GRegex.version --> + <section id='lua_fn_gregex_match_subject___init____ef__'> + <title>gregex:match(subject, [init], [ef])</title> + <para>Searches for the first match of the regexp pattern in the string subject, starting + from offset init, subject to flags ef. + </para> + <section><title>Arguments</title> + <variablelist> + <varlistentry><term>subject</term> + <listitem><para> Subject string to search </para></listitem> + </varlistentry> <!-- function_arg_footer: subject --> + <varlistentry><term>init (optional)</term> + <listitem><para> start offset in the subject (can be negative) </para></listitem> + </varlistentry> <!-- function_arg_footer: init (optional) --> + <varlistentry><term>ef (optional)</term> + <listitem><para> match execution flags (bitwise OR) </para></listitem> + </varlistentry> <!-- function_arg_footer: ef (optional) --> + </variablelist> + </section> + <section><title>Returns</title> + <para>On success, returns all substring matches ("captures"), in the order they appear in the pattern. + false is returned for sub-patterns that did not participate in the match. If + the pattern specified no captures then the whole matched substring is + returned. nil is returned if the pattern did not match. + </para> + </section> <!-- function_returns_footer: gregex:match --> + </section> <!-- function_footer: gregex:match --> + <section id='lua_fn_gregex_find_subject___init____ef__'> + <title>gregex:find(subject, [init], [ef])</title> + <para>Searches for the first match of the regexp pattern in the string subject, starting + from offset init, subject to flags ef. + </para> + <section><title>Arguments</title> + <variablelist> + <varlistentry><term>subject</term> + <listitem><para> Subject string to search </para></listitem> + </varlistentry> <!-- function_arg_footer: subject --> + <varlistentry><term>init (optional)</term> + <listitem><para> start offset in the subject (can be negative) </para></listitem> + </varlistentry> <!-- function_arg_footer: init (optional) --> + <varlistentry><term>ef (optional)</term> + <listitem><para> match execution flags (bitwise OR) </para></listitem> + </varlistentry> <!-- function_arg_footer: ef (optional) --> + </variablelist> + </section> + <section><title>Returns</title> + <para>On success, returns the start point of the match (a number), the + end point of the match (a number), and all substring matches ("captures"), in + the order they appear in the pattern. false is returned for sub-patterns that did + not participate in the match. On failure, returns nil. + </para> + </section> <!-- function_returns_footer: gregex:find --> + </section> <!-- function_footer: --> + <section id='lua_fn_gregex_exec_subject___init____ef__'> + <title>gregex:exec(subject, [init], [ef])</title> + <para>Searches for the first match of the compiled GRegex object in the string subject, starting + from offset init, subject to the execution match flags ef. + </para> + <section><title>Arguments</title> + <variablelist> + <varlistentry><term>subject</term> + <listitem><para> Subject string to search </para></listitem> + </varlistentry> <!-- function_arg_footer: subject --> + <varlistentry><term>init (optional)</term> + <listitem><para> start offset in the subject (can be negative) </para></listitem> + </varlistentry> <!-- function_arg_footer: init (optional) --> + <varlistentry><term>ef (optional)</term> + <listitem><para> match execution flags (bitwise OR) </para></listitem> + </varlistentry> <!-- function_arg_footer: ef (optional) --> + </variablelist> + </section> + <section><title>Returns</title> + <para>On success, returns the start point of the first match (a number), the + end point of the first match (a number), and the offsets of substring matches ("captures" + in Lua terminology) are returned as a third result, in a table. This table contains false + in the positions where the corresponding sub-pattern did not participate in the match. + On failure, returns nil. + Example: + If the whole match is at offsets 10,20 and substring matches are at offsets 12,14 and 16,19 + then the function returns the following: 10, 20, { 12,14,16,19 }. + </para> + </section> <!-- function_returns_footer: gregex:exec --> + </section> <!-- function_footer: gregex:exec --> + <section id='lua_fn_gregex_dfa_exec_subject___init____ef__'> + <title>gregex:dfa_exec(subject, [init], [ef])</title> + <para>Matches a compiled regular expression GRegex object against a given subject string subj, using a DFA matching algorithm.</para> + <section><title>Arguments</title> + <variablelist> + <varlistentry><term>subject</term> + <listitem><para> Subject string to search </para></listitem> + </varlistentry> <!-- function_arg_footer: subject --> + <varlistentry><term>init (optional)</term> + <listitem><para> start offset in the subject (can be negative) </para></listitem> + </varlistentry> <!-- function_arg_footer: init (optional) --> + <varlistentry><term>ef (optional)</term> + <listitem><para> match execution flags (bitwise OR) </para></listitem> + </varlistentry> <!-- function_arg_footer: ef (optional) --> + </variablelist> + </section> + <section><title>Returns</title> + <para>On success, returns the start point of the matches found (a number), a + table containing the end points of the matches found, the longer matches first, and the + number of matches found as the third return value. + On failure, returns nil. + Example: + If there are 3 matches found starting at offset 10 and ending at offsets 15, 20 and 25 + then the function returns the following: 10, { 25,20,15 }, 3 + </para> + </section> <!-- function_returns_footer: gregex:dfa_exec --> + </section> <!-- function_footer: gregex:dfa_exec --> + <section id='lua_fn_gregex___tostring__'> + <title>gregex:__tostring()</title> + <para>Returns a string containing debug information about the GRegex object.</para> + <section><title>Returns</title> + <para>The debug string</para> + </section> <!-- function_returns_footer: gregex:__tostring --> + </section> <!-- function_footer: --> + </section> <!-- class_footer: GRegex --> + </section> + + </chapter> diff --git a/epan/wslua/CMakeLists.txt b/epan/wslua/CMakeLists.txt index e65daeccd9..7d29798665 100644 --- a/epan/wslua/CMakeLists.txt +++ b/epan/wslua/CMakeLists.txt @@ -25,6 +25,9 @@ set(WSLUA_MODULES ${CMAKE_CURRENT_SOURCE_DIR}/wslua/lua_bitop.c + ${CMAKE_CURRENT_SOURCE_DIR}/wslua/lrexlib.c + ${CMAKE_CURRENT_SOURCE_DIR}/wslua/lrexlib_glib.c + ${CMAKE_CURRENT_SOURCE_DIR}/wslua/lrexlib_glib_f.c ${CMAKE_CURRENT_SOURCE_DIR}/wslua/wslua_int64.c ${CMAKE_CURRENT_SOURCE_DIR}/wslua/wslua_tvb.c ${CMAKE_CURRENT_SOURCE_DIR}/wslua/wslua_proto.c diff --git a/epan/wslua/Makefile.am b/epan/wslua/Makefile.am index ec4afd2cd9..0a721bd4fe 100644 --- a/epan/wslua/Makefile.am +++ b/epan/wslua/Makefile.am @@ -31,6 +31,9 @@ noinst_LTLIBRARIES = libwslua.la wslua_modules = \ $(srcdir)/lua_bitop.c \ + $(srcdir)/lrexlib.c \ + $(srcdir)/lrexlib_glib.c \ + $(srcdir)/lrexlib_glib_f.c \ $(srcdir)/wslua_tvb.c \ $(srcdir)/wslua_proto.c \ $(srcdir)/wslua_int64.c \ @@ -83,6 +86,8 @@ MAINTAINERCLEANFILES = \ EXTRA_DIST = \ declare_wslua.h \ lua_bitop.h \ + lrexlib.h \ + lrexlib_algo.h \ register_wslua.c \ taps \ make-reg.pl \ @@ -125,6 +130,8 @@ checkapi: init_wslua.c \ register_wslua.c \ taps_wslua.c \ + lrexlib_glib.c \ + lrexlib_glib_f.c \ wslua_dumper.c \ wslua_field.c \ wslua_gui.c \ diff --git a/epan/wslua/Makefile.nmake b/epan/wslua/Makefile.nmake index 80bb18a64d..aa3e07a038 100644 --- a/epan/wslua/Makefile.nmake +++ b/epan/wslua/Makefile.nmake @@ -16,6 +16,9 @@ CFLAGS=$(WARNINGS_ARE_ERRORS) $(STANDARD_CFLAGS) \ MODULES = \ lua_bitop.c \ + lrexlib.c \ + lrexlib_glib.c \ + lrexlib_glib_f.c \ wslua_tvb.c \ wslua_proto.c \ wslua_int64.c \ @@ -34,6 +37,9 @@ OBJECTS= \ register_wslua.obj \ taps_wslua.obj \ lua_bitop.obj \ + lrexlib.obj \ + lrexlib_glib.obj \ + lrexlib_glib_f.obj \ wslua_tvb.obj \ wslua_proto.obj \ wslua_int64.obj \ diff --git a/epan/wslua/lrexlib.c b/epan/wslua/lrexlib.c new file mode 100644 index 0000000000..21038fa1e3 --- /dev/null +++ b/epan/wslua/lrexlib.c @@ -0,0 +1,266 @@ +/* common.c */ +/* See Copyright Notice in the file LICENSE */ + +#include <stdlib.h> +#include <ctype.h> +#include <string.h> +#include "lua.h" +#include "lauxlib.h" +#include "lrexlib.h" + +#define N_ALIGN sizeof(int) + +/* the table must be on Lua stack top */ +int get_int_field (lua_State *L, const char* field) +{ + int val; + lua_getfield (L, -1, field); + val = (int) lua_tointeger (L, -1); + lua_pop (L, 1); + return val; +} + +/* the table must be on Lua stack top */ +void set_int_field (lua_State *L, const char* field, int val) +{ + lua_pushinteger (L, val); + lua_setfield (L, -2, field); +} + +void *Lmalloc(lua_State *L, size_t size) { + void *ud; + lua_Alloc lalloc = lua_getallocf(L, &ud); + return lalloc(ud, NULL, 0, size); +} + +void *Lrealloc(lua_State *L, void *p, size_t osize, size_t nsize) { + void *ud; + lua_Alloc lalloc = lua_getallocf(L, &ud); + return lalloc(ud, p, osize, nsize); +} + +void Lfree(lua_State *L, void *p, size_t osize) { + void *ud; + lua_Alloc lalloc = lua_getallocf(L, &ud); + lalloc(ud, p, osize, 0); +} + +/* This function fills a table with string-number pairs. + The table can be passed as the 1-st lua-function parameter, + otherwise it is created. The return value is the filled table. +*/ +int get_flags (lua_State *L, const flag_pair **arrs) { + const flag_pair *p; + const flag_pair **pp; + int nparams = lua_gettop(L); + + if(nparams == 0) + lua_newtable(L); + else { + if(!lua_istable(L, 1)) + luaL_argerror(L, 1, "not a table"); + if(nparams > 1) + lua_pushvalue(L, 1); + } + + for(pp=arrs; *pp; ++pp) { + for(p=*pp; p->key; ++p) { + lua_pushstring(L, p->key); + lua_pushinteger(L, p->val); + lua_rawset(L, -3); + } + } + return 1; +} + +const char *get_flag_key (const flag_pair *fp, int val) { + for (; fp->key; ++fp) { + if (fp->val == val) + return fp->key; + } + return NULL; +} + +/* Classes */ + +/* + * class TFreeList + * *************** + * Simple array of pointers to TBuffer's. + * The array has fixed capacity (not expanded automatically). + */ + +void freelist_init (TFreeList *fl) { + fl->top = 0; +} + +void freelist_add (TFreeList *fl, TBuffer *buf) { + fl->list[fl->top++] = buf; +} + +void freelist_free (TFreeList *fl) { + while (fl->top > 0) + buffer_free (fl->list[--fl->top]); +} + +/* + * class TBuffer + * ************* + * Auto-extensible array of characters for building long strings incrementally. + * * Differs from luaL_Buffer in that: + * * its operations do not change Lua stack top position + * * buffer_addvalue does not extract the value from Lua stack + * * buffer_pushresult does not have to be the last operation + * * Uses TFreeList class: + * * for inserting itself into a TFreeList instance for future clean-up + * * calls freelist_free prior to calling luaL_error. + * * Has specialized "Z-operations" for maintaining mixed string/integer + * array: bufferZ_addlstring, bufferZ_addnum and bufferZ_next. + * * if the array is intended to be "mixed", then the methods + * buffer_addlstring and buffer_addvalue must not be used + * (the application will crash on bufferZ_next). + * * conversely, if the array is not intended to be "mixed", + * then the method bufferZ_next must not be used. + */ + +enum { ID_NUMBER, ID_STRING }; + +void buffer_init (TBuffer *buf, size_t sz, lua_State *L, TFreeList *fl) { + buf->arr = Lmalloc(L, sz); + if (!buf->arr) { + freelist_free (fl); + luaL_error (L, "malloc failed"); + } + buf->size = sz; + buf->top = 0; + buf->L = L; + buf->freelist = fl; + freelist_add (fl, buf); +} + +void buffer_free (TBuffer *buf) { + Lfree(buf->L, buf->arr, buf->size); +} + +void buffer_clear (TBuffer *buf) { + buf->top = 0; +} + +void buffer_pushresult (TBuffer *buf) { + lua_pushlstring (buf->L, buf->arr, buf->top); +} + +void buffer_addbuffer (TBuffer *trg, TBuffer *src) { + buffer_addlstring (trg, src->arr, src->top); +} + +void buffer_addlstring (TBuffer *buf, const void *src, size_t sz) { + size_t newtop = buf->top + sz; + if (newtop > buf->size) { + char *p = (char*) Lrealloc (buf->L, buf->arr, buf->size, 2 * newtop); /* 2x expansion */ + if (!p) { + freelist_free (buf->freelist); + luaL_error (buf->L, "realloc failed"); + } + buf->arr = p; + buf->size = 2 * newtop; + } + if (src) + memcpy (buf->arr + buf->top, src, sz); + buf->top = newtop; +} + +void buffer_addvalue (TBuffer *buf, int stackpos) { + size_t len; + const char *p = lua_tolstring (buf->L, stackpos, &len); + buffer_addlstring (buf, p, len); +} + +void bufferZ_addlstring (TBuffer *buf, const void *src, size_t len) { + int n; + size_t header[2] = { ID_STRING }; + header[1] = len; + buffer_addlstring (buf, header, sizeof (header)); + buffer_addlstring (buf, src, len); + n = len % N_ALIGN; + if (n) buffer_addlstring (buf, NULL, N_ALIGN - n); +} + +void bufferZ_addnum (TBuffer *buf, size_t num) { + size_t header[2] = { ID_NUMBER }; + header[1] = num; + buffer_addlstring (buf, header, sizeof (header)); +} + +/* 1. When called repeatedly on the same TBuffer, its existing data + is discarded and overwritten by the new data. + 2. The TBuffer's array is never shrunk by this function. +*/ +void bufferZ_putrepstring (TBuffer *BufRep, int reppos, int nsub) { + char dbuf[] = { 0, 0 }; + size_t replen; + const char *p = lua_tolstring (BufRep->L, reppos, &replen); + const char *end = p + replen; + BufRep->top = 0; + while (p < end) { + const char *q; + for (q = p; q < end && *q != '%'; ++q) + {} + if (q != p) + bufferZ_addlstring (BufRep, p, q - p); + if (q < end) { + if (++q < end) { /* skip % */ + if (isdigit (*q)) { + int num; + *dbuf = *q; + num = (int) strtol (dbuf, NULL, 10); + if (num == 1 && nsub == 0) + num = 0; + else if (num > nsub) { + freelist_free (BufRep->freelist); + luaL_error (BufRep->L, "invalid capture index"); + } + bufferZ_addnum (BufRep, num); + } + else bufferZ_addlstring (BufRep, q, 1); + } + p = q + 1; + } + else break; + } +} + +/****************************************************************************** + The intended use of this function is as follows: + size_t iter = 0; + while (bufferZ_next (buf, &iter, &num, &str)) { + if (str) do_something_with_string (str, num); + else do_something_with_number (num); + } +******************************************************************************* +*/ +int bufferZ_next (TBuffer *buf, size_t *iter, size_t *num, const char **str) { + if (*iter < buf->top) { + size_t *ptr_header = (size_t*)(buf->arr + *iter); + *num = ptr_header[1]; + *iter += 2 * sizeof (size_t); + *str = NULL; + if (*ptr_header == ID_STRING) { + int n; + *str = buf->arr + *iter; + *iter += *num; + n = *iter % N_ALIGN; + if (n) *iter += (N_ALIGN - n); + } + return 1; + } + return 0; +} + +#if LUA_VERSION_NUM > 501 +int luaL_typerror (lua_State *L, int narg, const char *tname) { + const char *msg = lua_pushfstring(L, "%s expected, got %s", + tname, luaL_typename(L, narg)); + return luaL_argerror(L, narg, msg); +} +#endif diff --git a/epan/wslua/lrexlib.h b/epan/wslua/lrexlib.h new file mode 100644 index 0000000000..ae578034d7 --- /dev/null +++ b/epan/wslua/lrexlib.h @@ -0,0 +1,130 @@ +/* common.h */ +/* +License of Lrexlib release +-------------------------- + +Copyright (C) Reuben Thomas 2000-2012 +Copyright (C) Shmuel Zeigerman 2004-2012 + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, +sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall +be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY +KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS +OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifndef _LREXLIB_H +#define _LREXLIB_H + +#include "lua.h" + +#define VERSION "2.7.2" + +#define LREXLIB_WIRESHARK + +#if LUA_VERSION_NUM > 501 + int luaL_typerror (lua_State *L, int narg, const char *tname); +#endif + +/* REX_API can be overridden from the command line or Makefile */ +#ifndef REX_API +# define REX_API LUALIB_API +#endif + +/* Special values for maxmatch in gsub. They all must be negative. */ +#define GSUB_UNLIMITED -1 +#define GSUB_CONDITIONAL -2 + +/* Common structs and functions */ + +typedef struct { + const char* key; + int val; +} flag_pair; + +typedef struct { /* compile arguments */ + const char * pattern; + size_t patlen; + void * ud; + int cflags; + const char * locale; /* PCRE, Oniguruma */ + const unsigned char * tables; /* PCRE */ + int tablespos; /* PCRE */ + void * syntax; /* Oniguruma */ + const unsigned char * translate; /* GNU */ + int gnusyn; /* GNU */ +} TArgComp; + +typedef struct { /* exec arguments */ + const char * text; + size_t textlen; + int startoffset; + int eflags; + int funcpos; + int maxmatch; + int funcpos2; /* used with gsub */ + int reptype; /* used with gsub */ + size_t ovecsize; /* PCRE: dfa_exec */ + size_t wscount; /* PCRE: dfa_exec */ +} TArgExec; + +struct tagFreeList; /* forward declaration */ + +struct tagBuffer { + size_t size; + size_t top; + char * arr; + lua_State * L; + struct tagFreeList * freelist; +}; + +struct tagFreeList { + struct tagBuffer * list[16]; + int top; +}; + +typedef struct tagBuffer TBuffer; +typedef struct tagFreeList TFreeList; + +void freelist_init (TFreeList *fl); +void freelist_add (TFreeList *fl, TBuffer *buf); +void freelist_free (TFreeList *fl); + +void buffer_init (TBuffer *buf, size_t sz, lua_State *L, TFreeList *fl); +void buffer_free (TBuffer *buf); +void buffer_clear (TBuffer *buf); +void buffer_addbuffer (TBuffer *trg, TBuffer *src); +void buffer_addlstring (TBuffer *buf, const void *src, size_t sz); +void buffer_addvalue (TBuffer *buf, int stackpos); +void buffer_pushresult (TBuffer *buf); + +void bufferZ_putrepstring (TBuffer *buf, int reppos, int nsub); +int bufferZ_next (TBuffer *buf, size_t *iter, size_t *len, const char **str); +void bufferZ_addlstring (TBuffer *buf, const void *src, size_t len); +void bufferZ_addnum (TBuffer *buf, size_t num); + +int get_int_field (lua_State *L, const char* field); +void set_int_field (lua_State *L, const char* field, int val); +int get_flags (lua_State *L, const flag_pair **arr); +const char *get_flag_key (const flag_pair *fp, int val); +void *Lmalloc (lua_State *L, size_t size); +void *Lrealloc (lua_State *L, void *p, size_t osize, size_t nsize); +void Lfree (lua_State *L, void *p, size_t size); + +#endif diff --git a/epan/wslua/lrexlib_algo.h b/epan/wslua/lrexlib_algo.h new file mode 100644 index 0000000000..4e97c2b1b9 --- /dev/null +++ b/epan/wslua/lrexlib_algo.h @@ -0,0 +1,755 @@ +/* lrexlib_algo.h */ +/* See Copyright Notice in the file lrexlib.h */ + +#include "lrexlib.h" + +#ifdef LREXLIB_WIRESHARK +# define WSLUA_TYPEOF_FIELD "__typeof" +# define REX_CREATEGLOBALVAR +#endif + +#define REX_VERSION "Lrexlib " VERSION + +/* Forward declarations */ +static void gmatch_pushsubject (lua_State *L, TArgExec *argE); +static int findmatch_exec (TUserdata *ud, TArgExec *argE); +static int split_exec (TUserdata *ud, TArgExec *argE, int offset); +static int compile_regex (lua_State *L, const TArgComp *argC, TUserdata **pud); +static int generate_error (lua_State *L, const TUserdata *ud, int errcode); + +#if LUA_VERSION_NUM == 501 +# define ALG_ENVIRONINDEX LUA_ENVIRONINDEX +#else +# define ALG_ENVIRONINDEX lua_upvalueindex(1) +#endif + +#ifndef ALG_CHARSIZE +# define ALG_CHARSIZE 1 +#endif + +#ifndef BUFFERZ_PUTREPSTRING +# define BUFFERZ_PUTREPSTRING bufferZ_putrepstring +#endif + +#ifndef ALG_GETCARGS +# define ALG_GETCARGS(a,b,c) +#endif + +#ifndef ALG_GETEFLAGS +# define ALG_GETEFLAGS(L,idx) luaL_optint (L, idx, ALG_EFLAGS_DFLT) +#endif + +#ifndef DO_NAMED_SUBPATTERNS +#define DO_NAMED_SUBPATTERNS(a,b,c) +#endif + +/* When doing an iterative search, there can occur a situation of a zero-length + * match at the current position, that prevents further advance on the subject + * string. + * There are two ways to handle that (AFAIK): + * a) Advance by one character (continue the search from the next position), + * or + * b) Search for a non-zero-length match that begins from the current + * position ("retry" the search). If the match is not found then advance + * by one character. + * The "b)" seems more correct, but most regex libraries expose no API for that. + * The known exception is PCRE that has flags PCRE_NOTEMPTY and PCRE_ANCHORED. + */ +#ifdef ALG_USERETRY + #define SET_RETRY(a,b) (a=b) + static int gsub_exec (TUserdata *ud, TArgExec *argE, int offset, int retry); + static int gmatch_exec (TUserdata *ud, TArgExec *argE, int retry); + #define GSUB_EXEC gsub_exec + #define GMATCH_EXEC gmatch_exec +#else + #define SET_RETRY(a,b) ((void)a) + static int gsub_exec (TUserdata *ud, TArgExec *argE, int offset); + static int gmatch_exec (TUserdata *ud, TArgExec *argE); + #define GSUB_EXEC(a,b,c,d) gsub_exec(a,b,c) + #define GMATCH_EXEC(a,b,c) gmatch_exec(a,b) +#endif + + +#define METHOD_FIND 0 +#define METHOD_MATCH 1 +#define METHOD_EXEC 2 +#define METHOD_TFIND 3 + + +static int OptLimit (lua_State *L, int pos) { + if (lua_isnoneornil (L, pos)) + return GSUB_UNLIMITED; + if (lua_isfunction (L, pos)) + return GSUB_CONDITIONAL; + if (lua_isnumber (L, pos)) { + int a = (int) lua_tointeger (L, pos); + return a < 0 ? 0 : a; + } + return luaL_typerror (L, pos, "number or function"); +} + + +static int get_startoffset(lua_State *L, int stackpos, size_t len) { + int startoffset = luaL_optint(L, stackpos, 1); + if(startoffset > 0) + startoffset--; + else if(startoffset < 0) { + startoffset += len/ALG_CHARSIZE; + if(startoffset < 0) + startoffset = 0; + } + return startoffset*ALG_CHARSIZE; +} + + +static TUserdata* test_ud (lua_State *L, int pos) +{ + TUserdata *ud; + if (lua_getmetatable(L, pos) && + lua_rawequal(L, -1, ALG_ENVIRONINDEX) && + (ud = (TUserdata *)lua_touserdata(L, pos)) != NULL) { + lua_pop(L, 1); + return ud; + } + return NULL; +} + + +static TUserdata* check_ud (lua_State *L) +{ + TUserdata *ud = test_ud(L, 1); + if (ud == NULL) luaL_typerror(L, 1, REX_TYPENAME); + return ud; +} + + +static void check_subject (lua_State *L, int pos, TArgExec *argE) +{ + int stype; + argE->text = lua_tolstring (L, pos, &argE->textlen); + stype = lua_type (L, pos); + if (stype != LUA_TSTRING && stype != LUA_TTABLE && stype != LUA_TUSERDATA) { + luaL_typerror (L, pos, "string, table or userdata"); + } else if (argE->text == NULL) { + int type; + lua_getfield (L, pos, "topointer"); + if (lua_type (L, -1) != LUA_TFUNCTION) + luaL_error (L, "subject has no topointer method"); + lua_pushvalue (L, pos); + lua_call (L, 1, 1); + type = lua_type (L, -1); + if (type != LUA_TLIGHTUSERDATA) + luaL_error (L, "subject's topointer method returned %s (expected lightuserdata)", + lua_typename (L, type)); + argE->text = lua_touserdata (L, -1); + lua_pop (L, 1); +#if LUA_VERSION_NUM == 501 + lua_objlen (L, pos); +#else + lua_len (L, pos); +#endif + type = lua_type (L, -1); + if (type != LUA_TNUMBER) + luaL_error (L, "subject's length is %s (expected number)", + lua_typename (L, type)); + argE->textlen = lua_tointeger (L, -1); + lua_pop (L, 1); + } +} + +static void check_pattern (lua_State *L, int pos, TArgComp *argC) +{ + if (lua_isstring (L, pos)) { + argC->pattern = lua_tolstring (L, pos, &argC->patlen); + argC->ud = NULL; + } + else if ((argC->ud = test_ud (L, pos)) == NULL) + luaL_typerror(L, pos, "string or "REX_TYPENAME); +} + +static void checkarg_new (lua_State *L, TArgComp *argC) { + argC->pattern = luaL_checklstring (L, 1, &argC->patlen); + argC->cflags = ALG_GETCFLAGS (L, 2); + ALG_GETCARGS (L, 3, argC); +} + + +/* function gsub (s, patt, f, [n], [cf], [ef], [larg...]) */ +static void checkarg_gsub (lua_State *L, TArgComp *argC, TArgExec *argE) { + check_subject (L, 1, argE); + check_pattern (L, 2, argC); + lua_tostring (L, 3); /* converts number (if any) to string */ + argE->reptype = lua_type (L, 3); + if (argE->reptype != LUA_TSTRING && argE->reptype != LUA_TTABLE && + argE->reptype != LUA_TFUNCTION && argE->reptype != LUA_TNIL && + (argE->reptype != LUA_TBOOLEAN || + (argE->reptype == LUA_TBOOLEAN && lua_toboolean (L, 3)))) { + luaL_typerror (L, 3, "string, table, function, false or nil"); + } + argE->funcpos = 3; + argE->funcpos2 = 4; + argE->maxmatch = OptLimit (L, 4); + argC->cflags = ALG_GETCFLAGS (L, 5); + argE->eflags = ALG_GETEFLAGS (L, 6); + ALG_GETCARGS (L, 7, argC); +} + + +/* function find (s, patt, [st], [cf], [ef], [larg...]) */ +/* function match (s, patt, [st], [cf], [ef], [larg...]) */ +static void checkarg_find_func (lua_State *L, TArgComp *argC, TArgExec *argE) { + check_subject (L, 1, argE); + check_pattern (L, 2, argC); + argE->startoffset = get_startoffset (L, 3, argE->textlen); + argC->cflags = ALG_GETCFLAGS (L, 4); + argE->eflags = ALG_GETEFLAGS (L, 5); + ALG_GETCARGS (L, 6, argC); +} + + +/* function gmatch (s, patt, [cf], [ef], [larg...]) */ +/* function split (s, patt, [cf], [ef], [larg...]) */ +static void checkarg_gmatch_split (lua_State *L, TArgComp *argC, TArgExec *argE) { + check_subject (L, 1, argE); + check_pattern (L, 2, argC); + argC->cflags = ALG_GETCFLAGS (L, 3); + argE->eflags = ALG_GETEFLAGS (L, 4); + ALG_GETCARGS (L, 5, argC); +} + + +/* method r:tfind (s, [st], [ef]) */ +/* method r:exec (s, [st], [ef]) */ +/* method r:find (s, [st], [ef]) */ +/* method r:match (s, [st], [ef]) */ +static void checkarg_find_method (lua_State *L, TArgExec *argE, TUserdata **ud) { + *ud = check_ud (L); + check_subject (L, 2, argE); + argE->startoffset = get_startoffset (L, 3, argE->textlen); + argE->eflags = ALG_GETEFLAGS (L, 4); +} + + +static int algf_new (lua_State *L) { + TArgComp argC; + checkarg_new (L, &argC); + return compile_regex (L, &argC, NULL); +} + +static void push_substrings (lua_State *L, TUserdata *ud, const char *text, + TFreeList *freelist) { + int i; + if (lua_checkstack (L, ALG_NSUB(ud)) == 0) { + if (freelist) + freelist_free (freelist); + luaL_error (L, "cannot add %d stack slots", ALG_NSUB(ud)); + } + for (i = 1; i <= ALG_NSUB(ud); i++) { + ALG_PUSHSUB_OR_FALSE (L, ud, text, i); + } +} + +static int algf_gsub (lua_State *L) { + TUserdata *ud; + TArgComp argC; + TArgExec argE; + int n_match = 0, n_subst = 0, st = 0, retry; + TBuffer BufOut, BufRep, BufTemp, *pBuf = &BufOut; + TFreeList freelist; + /*------------------------------------------------------------------*/ + checkarg_gsub (L, &argC, &argE); + if (argC.ud) { + ud = (TUserdata*) argC.ud; + lua_pushvalue (L, 2); + } + else compile_regex (L, &argC, &ud); + freelist_init (&freelist); + /*------------------------------------------------------------------*/ + if (argE.reptype == LUA_TSTRING) { + buffer_init (&BufRep, 256, L, &freelist); + BUFFERZ_PUTREPSTRING (&BufRep, argE.funcpos, ALG_NSUB(ud)); + } + /*------------------------------------------------------------------*/ + if (argE.maxmatch == GSUB_CONDITIONAL) { + buffer_init (&BufTemp, 1024, L, &freelist); + pBuf = &BufTemp; + } + /*------------------------------------------------------------------*/ + buffer_init (&BufOut, 1024, L, &freelist); + SET_RETRY (retry, 0); + while ((argE.maxmatch < 0 || n_match < argE.maxmatch) && st <= (int)argE.textlen) { + int from, to, res; + int curr_subst = 0; + res = GSUB_EXEC (ud, &argE, st, retry); + if (ALG_NOMATCH (res)) { +#ifdef ALG_USERETRY + if (retry) { + if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */ + buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE); + st += ALG_CHARSIZE; + retry = 0; + continue; + } + } +#endif + break; + } + else if (!ALG_ISMATCH (res)) { + freelist_free (&freelist); + return generate_error (L, ud, res); + } + ++n_match; + from = ALG_BASE(st) + ALG_SUBBEG(ud,0); + to = ALG_BASE(st) + ALG_SUBEND(ud,0); + if (st < from) { + buffer_addlstring (&BufOut, argE.text + st, from - st); +#ifdef ALG_PULL + st = from; +#endif + } + /*----------------------------------------------------------------*/ + if (argE.reptype == LUA_TSTRING) { + size_t iter = 0, num; + const char *str; + while (bufferZ_next (&BufRep, &iter, &num, &str)) { + if (str) + buffer_addlstring (pBuf, str, num); + else if (num == 0 || ALG_SUBVALID (ud,(int)num)) + buffer_addlstring (pBuf, argE.text + ALG_BASE(st) + ALG_SUBBEG(ud,(int)num), ALG_SUBLEN(ud,(int)num)); + } + curr_subst = 1; + } + /*----------------------------------------------------------------*/ + else if (argE.reptype == LUA_TTABLE) { + if (ALG_NSUB(ud) > 0) + ALG_PUSHSUB_OR_FALSE (L, ud, argE.text + ALG_BASE(st), 1); + else + lua_pushlstring (L, argE.text + from, to - from); + lua_gettable (L, argE.funcpos); + } + /*----------------------------------------------------------------*/ + else if (argE.reptype == LUA_TFUNCTION) { + int narg; + lua_pushvalue (L, argE.funcpos); + if (ALG_NSUB(ud) > 0) { + push_substrings (L, ud, argE.text + ALG_BASE(st), &freelist); + narg = ALG_NSUB(ud); + } + else { + lua_pushlstring (L, argE.text + from, to - from); + narg = 1; + } + if (0 != lua_pcall (L, narg, 1, 0)) { + freelist_free (&freelist); + return lua_error (L); /* re-raise the error */ + } + } + /*----------------------------------------------------------------*/ + else if (argE.reptype == LUA_TNIL || argE.reptype == LUA_TBOOLEAN) { + buffer_addlstring (pBuf, argE.text + from, to - from); + } + /*----------------------------------------------------------------*/ + if (argE.reptype == LUA_TTABLE || argE.reptype == LUA_TFUNCTION) { + if (lua_tostring (L, -1)) { + buffer_addvalue (pBuf, -1); + curr_subst = 1; + } + else if (!lua_toboolean (L, -1)) + buffer_addlstring (pBuf, argE.text + from, to - from); + else { + freelist_free (&freelist); + luaL_error (L, "invalid replacement value (a %s)", luaL_typename (L, -1)); + } + if (argE.maxmatch != GSUB_CONDITIONAL) + lua_pop (L, 1); + } + /*----------------------------------------------------------------*/ + if (argE.maxmatch == GSUB_CONDITIONAL) { + /* Call the function */ + lua_pushvalue (L, argE.funcpos2); + lua_pushinteger (L, from/ALG_CHARSIZE + 1); + lua_pushinteger (L, to/ALG_CHARSIZE); + if (argE.reptype == LUA_TSTRING) + buffer_pushresult (&BufTemp); + else { + lua_pushvalue (L, -4); + lua_remove (L, -5); + } + if (0 != lua_pcall (L, 3, 2, 0)) { + freelist_free (&freelist); + lua_error (L); /* re-raise the error */ + } + /* Handle the 1-st return value */ + if (lua_isstring (L, -2)) { /* coercion is allowed here */ + buffer_addvalue (&BufOut, -2); /* rep2 */ + curr_subst = 1; + } + else if (lua_toboolean (L, -2)) + buffer_addbuffer (&BufOut, &BufTemp); /* rep1 */ + else { + buffer_addlstring (&BufOut, argE.text + from, to - from); /* "no" */ + curr_subst = 0; + } + /* Handle the 2-nd return value */ + if (lua_type (L, -1) == LUA_TNUMBER) { /* no coercion is allowed here */ + int n = (int) lua_tointeger (L, -1); + if (n < 0) /* n */ + n = 0; + argE.maxmatch = n_match + n; + } + else if (lua_toboolean (L, -1)) /* "yes to all" */ + argE.maxmatch = GSUB_UNLIMITED; + else + buffer_clear (&BufTemp); + + lua_pop (L, 2); + if (argE.maxmatch != GSUB_CONDITIONAL) + pBuf = &BufOut; + } + /*----------------------------------------------------------------*/ + n_subst += curr_subst; + if (st < to) { + st = to; + SET_RETRY (retry, 0); + } + else if (st < (int)argE.textlen) { +#ifdef ALG_USERETRY + retry = 1; +#else + /* advance by 1 char (not replaced) */ + buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE); + st += ALG_CHARSIZE; +#endif + } + else break; + } + /*------------------------------------------------------------------*/ + buffer_addlstring (&BufOut, argE.text + st, argE.textlen - st); + buffer_pushresult (&BufOut); + lua_pushinteger (L, n_match); + lua_pushinteger (L, n_subst); + freelist_free (&freelist); + return 3; +} + + +static int finish_generic_find (lua_State *L, TUserdata *ud, TArgExec *argE, + int method, int res) +{ + if (ALG_ISMATCH (res)) { + if (method == METHOD_FIND) + ALG_PUSHOFFSETS (L, ud, ALG_BASE(argE->startoffset), 0); + if (ALG_NSUB(ud)) /* push captures */ + push_substrings (L, ud, argE->text, NULL); + else if (method != METHOD_FIND) { + ALG_PUSHSUB (L, ud, argE->text, 0); + return 1; + } + return (method == METHOD_FIND) ? ALG_NSUB(ud) + 2 : ALG_NSUB(ud); + } + else if (ALG_NOMATCH (res)) + return lua_pushnil (L), 1; + else + return generate_error (L, ud, res); +} + + +static int generic_find_func (lua_State *L, int method) { + TUserdata *ud; + TArgComp argC; + TArgExec argE; + int res; + + checkarg_find_func (L, &argC, &argE); + if (argE.startoffset > (int)argE.textlen) + return lua_pushnil (L), 1; + + if (argC.ud) { + ud = (TUserdata*) argC.ud; + lua_pushvalue (L, 2); + } + else compile_regex (L, &argC, &ud); + res = findmatch_exec (ud, &argE); + return finish_generic_find (L, ud, &argE, method, res); +} + + +static int algf_find (lua_State *L) { + return generic_find_func (L, METHOD_FIND); +} + + +static int algf_match (lua_State *L) { + return generic_find_func (L, METHOD_MATCH); +} + + +static int gmatch_iter (lua_State *L) { + int retry; + TArgExec argE; + TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1)); + argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen); + argE.eflags = (int) lua_tointeger (L, lua_upvalueindex (3)); + argE.startoffset = (int) lua_tointeger (L, lua_upvalueindex (4)); +#ifdef ALG_USERETRY + retry = (int) lua_tointeger (L, lua_upvalueindex (5)); +#endif + + if (argE.startoffset > (int)argE.textlen) + return 0; + + while (1) { + int res = GMATCH_EXEC (ud, &argE, retry); + if (ALG_ISMATCH (res)) { + int incr = 0; + if (ALG_SUBLEN(ud,0)) { + SET_RETRY (retry, 0); + } + else { /* no progress: prevent endless loop */ +#ifdef ALG_USERETRY + SET_RETRY (retry, 1); +#else + incr = ALG_CHARSIZE; +#endif + } + lua_pushinteger(L, ALG_BASE(argE.startoffset) + incr + ALG_SUBEND(ud,0)); /* update start offset */ + lua_replace (L, lua_upvalueindex (4)); +#ifdef ALG_USERETRY + lua_pushinteger (L, retry); + lua_replace (L, lua_upvalueindex (5)); /* update retry */ +#endif + /* push either captures or entire match */ + if (ALG_NSUB(ud)) { + push_substrings (L, ud, argE.text, NULL); + return ALG_NSUB(ud); + } + else { + ALG_PUSHSUB (L, ud, argE.text, 0); + return 1; + } + } + else if (ALG_NOMATCH (res)) { +#ifdef ALG_USERETRY + if (retry) { + if (argE.startoffset < (int)argE.textlen) { + ++argE.startoffset; /* advance by 1 char */ + SET_RETRY (retry, 0); + continue; + } + } +#endif + return 0; + } + else + return generate_error (L, ud, res); + } +} + + +static int split_iter (lua_State *L) { + int incr, newoffset, res; + TArgExec argE; + TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1)); + argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen); + argE.eflags = (int) lua_tointeger (L, lua_upvalueindex (3)); + argE.startoffset = (int) lua_tointeger (L, lua_upvalueindex (4)); + incr = (int) lua_tointeger (L, lua_upvalueindex (5)); + + if (argE.startoffset > (int)argE.textlen) + return 0; + + if ((newoffset = argE.startoffset + incr) > (int)argE.textlen) + goto nomatch; + + res = split_exec (ud, &argE, newoffset); + if (ALG_ISMATCH (res)) { + lua_pushinteger(L, ALG_BASE(newoffset) + ALG_SUBEND(ud,0)); /* update start offset */ + lua_replace (L, lua_upvalueindex (4)); + lua_pushinteger (L, ALG_SUBLEN(ud,0) ? 0 : ALG_CHARSIZE); /* update incr */ + lua_replace (L, lua_upvalueindex (5)); + /* push text preceding the match */ + lua_pushlstring (L, argE.text + argE.startoffset, + ALG_SUBBEG(ud,0) + ALG_BASE(newoffset) - argE.startoffset); + /* push either captures or entire match */ + if (ALG_NSUB(ud)) { + push_substrings (L, ud, argE.text + ALG_BASE(newoffset), NULL); + return 1 + ALG_NSUB(ud); + } + else { + ALG_PUSHSUB (L, ud, argE.text + ALG_BASE(newoffset), 0); + return 2; + } + } + else if (ALG_NOMATCH (res)) + goto nomatch; + else + return generate_error (L, ud, res); + +nomatch: + lua_pushinteger (L, argE.textlen + 1); /* mark as last iteration */ + lua_replace (L, lua_upvalueindex (4)); /* update start offset */ + lua_pushlstring (L, argE.text+argE.startoffset, argE.textlen-argE.startoffset); + return 1; +} + + +static int algf_gmatch (lua_State *L) +{ + TArgComp argC; + TArgExec argE; + TUserdata *ud; + checkarg_gmatch_split (L, &argC, &argE); + if (argC.ud) { + ud = (TUserdata*) argC.ud; + lua_pushvalue (L, 2); + } + else compile_regex (L, &argC, &ud); /* 1-st upvalue: ud */ + gmatch_pushsubject (L, &argE); /* 2-nd upvalue: s */ + lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */ + lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */ +#ifdef ALG_USERETRY + lua_pushinteger (L, 0); /* 5-th upvalue: retry */ + lua_pushcclosure (L, gmatch_iter, 5); +#else + lua_pushcclosure (L, gmatch_iter, 4); +#endif + return 1; +} + +static int algf_split (lua_State *L) +{ + TArgComp argC; + TArgExec argE; + TUserdata *ud; + checkarg_gmatch_split (L, &argC, &argE); + if (argC.ud) { + ud = (TUserdata*) argC.ud; + lua_pushvalue (L, 2); + } + else compile_regex (L, &argC, &ud); /* 1-st upvalue: ud */ + gmatch_pushsubject (L, &argE); /* 2-nd upvalue: s */ + lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */ + lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */ + lua_pushinteger (L, 0); /* 5-th upvalue: incr */ + lua_pushcclosure (L, split_iter, 5); + return 1; +} + + +static void push_substring_table (lua_State *L, TUserdata *ud, const char *text) { + int i; + lua_newtable (L); + for (i = 1; i <= ALG_NSUB(ud); i++) { + ALG_PUSHSUB_OR_FALSE (L, ud, text, i); + lua_rawseti (L, -2, i); + } +} + + +static void push_offset_table (lua_State *L, TUserdata *ud, int startoffset) { + int i, j; + lua_newtable (L); + for (i=1, j=1; i <= ALG_NSUB(ud); i++) { + if (ALG_SUBVALID (ud,i)) { + ALG_PUSHSTART (L, ud, startoffset, i); + lua_rawseti (L, -2, j++); + ALG_PUSHEND (L, ud, startoffset, i); + lua_rawseti (L, -2, j++); + } + else { + lua_pushboolean (L, 0); + lua_rawseti (L, -2, j++); + lua_pushboolean (L, 0); + lua_rawseti (L, -2, j++); + } + } +} + + +static int generic_find_method (lua_State *L, int method) { + TUserdata *ud; + TArgExec argE; + int res; + + checkarg_find_method (L, &argE, &ud); + if (argE.startoffset > (int)argE.textlen) + return lua_pushnil(L), 1; + + res = findmatch_exec (ud, &argE); + if (ALG_ISMATCH (res)) { + switch (method) { + case METHOD_EXEC: + ALG_PUSHOFFSETS (L, ud, ALG_BASE(argE.startoffset), 0); + push_offset_table (L, ud, ALG_BASE(argE.startoffset)); + DO_NAMED_SUBPATTERNS (L, ud, argE.text); + return 3; + case METHOD_TFIND: + ALG_PUSHOFFSETS (L, ud, ALG_BASE(argE.startoffset), 0); + push_substring_table (L, ud, argE.text); + DO_NAMED_SUBPATTERNS (L, ud, argE.text); + return 3; + case METHOD_MATCH: + case METHOD_FIND: + return finish_generic_find (L, ud, &argE, method, res); + } + return 0; + } + else if (ALG_NOMATCH (res)) + return lua_pushnil (L), 1; + else + return generate_error(L, ud, res); +} + + +static int algm_find (lua_State *L) { + return generic_find_method (L, METHOD_FIND); +} +static int algm_match (lua_State *L) { + return generic_find_method (L, METHOD_MATCH); +} +static int algm_tfind (lua_State *L) { + return generic_find_method (L, METHOD_TFIND); +} +static int algm_exec (lua_State *L) { + return generic_find_method (L, METHOD_EXEC); +} + +static void alg_register (lua_State *L, const luaL_Reg *r_methods, + const luaL_Reg *r_functions, const char *name) { + /* Create a new function environment to serve as a metatable for methods. */ +#if LUA_VERSION_NUM == 501 + lua_newtable (L); + lua_pushvalue (L, -1); + lua_replace (L, LUA_ENVIRONINDEX); + luaL_register (L, NULL, r_methods); +#else + luaL_newmetatable(L, REX_TYPENAME); + lua_pushvalue(L, -1); + luaL_setfuncs (L, r_methods, 1); +#endif +#ifdef LREXLIB_WIRESHARK + lua_pushstring(L, REX_LIBNAME); + lua_setfield(L, -2, WSLUA_TYPEOF_FIELD); +#endif + lua_pushvalue(L, -1); /* mt.__index = mt */ + lua_setfield(L, -2, "__index"); + + /* Register functions. */ + lua_createtable(L, 0, 8); +#if LUA_VERSION_NUM == 501 + luaL_register (L, NULL, r_functions); +#else + lua_pushvalue(L, -2); + luaL_setfuncs (L, r_functions, 1); +#endif +#ifdef REX_CREATEGLOBALVAR + lua_pushvalue(L, -1); + lua_setglobal(L, REX_LIBNAME); +#endif +#ifdef LREXLIB_WIRESHARK + lua_pushstring(L, REX_LIBNAME); + lua_setfield(L, -2, WSLUA_TYPEOF_FIELD); +#endif + lua_pushfstring (L, REX_VERSION" (for %s)", name); + lua_setfield (L, -2, "_VERSION"); +} diff --git a/epan/wslua/lrexlib_glib.c b/epan/wslua/lrexlib_glib.c new file mode 100644 index 0000000000..01b60de133 --- /dev/null +++ b/epan/wslua/lrexlib_glib.c @@ -0,0 +1,414 @@ +/* lrexlib_glib.c - Lua binding of GLib Regex library */ +/* See Copyright Notice in the file lrexlib.h */ + +/* This is similar to Lrexlib's PCRE implementation, but has been changed + * for GLib's pcre implementation, which is different. + * + * The changes made by me, Hadriel Kaplan, are in the Public Domain, or + * under the MIT license if your country does not allow Public Domain. + * + * Changes relative to Lrelxib-PCRE: + * - No chartables or locale handling + * - dfa_exec doesn't take 'ovecsize' nor 'wscount' args + * - dfa_exec returns boolean true for partial match, without subcapture info + * - named subgroups do not return a table of name-keyed entries, because + * GLib doesn't provide a way to learn that information + * - there is no 'config()' function, since GLib doesn't offer such info + * - the 'flags()' function still works, returning all flags, but two new + * functions 'compile_flags()' and 'match_flags()' return just their respective + * flags, since GLib has a different and smaller set of such flags, for + * regex compile vs. match functions + * - Using POSIX character classes against strings with non-ASCII characters + * might match high-order characters, because glib always sets PCRE_UCP + * even if G_REGEX_RAW is set. For example, '[:alpha;]' and '\w' match certain + * non-ASCII bytes. + * - obviously quite a bit else is changed to interface to GLib's regex instead + * of PCRE, but hopefully those changes aren't visible to user/caller + */ + +#include <stdlib.h> +#include <string.h> +#include <locale.h> +#include <ctype.h> +#include <glib.h> + +#include "lua.h" +#include "lauxlib.h" +#include "lrexlib.h" + +extern int Gregex_get_flags (lua_State *L); +extern int Gregex_get_compile_flags (lua_State *L); +extern int Gregex_get_match_flags (lua_State *L); +extern flag_pair gregex_error_flags[]; + +/* These 2 settings may be redefined from the command-line or the makefile. + * They should be kept in sync between themselves and with the target name. + */ +#ifndef REX_LIBNAME +# ifdef LREXLIB_WIRESHARK +# define REX_LIBNAME "GRegex" +# else +# define REX_LIBNAME "rex_glib" +# endif +#endif +#ifndef REX_OPENLIB +# define REX_OPENLIB luaopen_rex_glib +#endif + +#define REX_TYPENAME REX_LIBNAME"_regex" + +#define ALG_CFLAGS_DFLT G_REGEX_RAW +#define ALG_EFLAGS_DFLT 0 + +static int getcflags (lua_State *L, int pos); +#define ALG_GETCFLAGS(L,pos) getcflags(L, pos) + +#define ALG_NOMATCH(res) ((res) == FALSE) +#define ALG_ISMATCH(res) ((res) == TRUE) +#define ALG_SUBBEG(ud,n) getSubStartPos(ud,n) +#define ALG_SUBEND(ud,n) getSubEndPos(ud,n) +#define ALG_SUBLEN(ud,n) (ALG_SUBEND(ud,n) - ALG_SUBBEG(ud,n)) +#define ALG_SUBVALID(ud,n) (ALG_SUBBEG(ud,n) >= 0) +#define ALG_NSUB(ud) ((int) g_regex_get_capture_count(ud->pr)) + +#define ALG_PUSHSUB(L,ud,text,n) \ + lua_pushlstring (L, (text) + ALG_SUBBEG(ud,n), ALG_SUBLEN(ud,n)) + +#define ALG_PUSHSUB_OR_FALSE(L,ud,text,n) \ + (ALG_SUBVALID(ud,n) ? ALG_PUSHSUB (L,ud,text,n) : lua_pushboolean (L,0)) + +#define ALG_PUSHSTART(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBBEG(ud,n) + 1) +#define ALG_PUSHEND(L,ud,offs,n) lua_pushinteger(L, (offs) + ALG_SUBEND(ud,n)) +#define ALG_PUSHOFFSETS(L,ud,offs,n) \ + (ALG_PUSHSTART(L,ud,offs,n), ALG_PUSHEND(L,ud,offs,n)) + +#define ALG_BASE(st) 0 +#define ALG_PULL +/* we define ALG_USERETRY because GLib does expose PCRE's NOTEMPTY and ANCHORED flags */ +#define ALG_USERETRY + +#define VERSION_GLIB (GLIB_MAJOR_VERSION*100 + GLIB_MINOR_VERSION) +/* unfortunately GLib doesn't expose cerrtain macros it would be nice to have */ +#if VERSION_GLIB >= 234 +# define G_REGEX_COMPILE_MASK_234 (G_REGEX_FIRSTLINE | \ + G_REGEX_NEWLINE_ANYCRLF | \ + G_REGEX_BSR_ANYCRLF | \ + G_REGEX_JAVASCRIPT_COMPAT) +#else +# define G_REGEX_COMPILE_MASK_234 0 +#endif + +/* Mask of all the possible values for GRegexCompileFlags. */ +#define G_REGEX_COMPILE_MASK (G_REGEX_CASELESS | \ + G_REGEX_MULTILINE | \ + G_REGEX_DOTALL | \ + G_REGEX_EXTENDED | \ + G_REGEX_ANCHORED | \ + G_REGEX_DOLLAR_ENDONLY | \ + G_REGEX_UNGREEDY | \ + G_REGEX_RAW | \ + G_REGEX_NO_AUTO_CAPTURE | \ + G_REGEX_OPTIMIZE | \ + G_REGEX_DUPNAMES | \ + G_REGEX_NEWLINE_CR | \ + G_REGEX_NEWLINE_LF | \ + G_REGEX_NEWLINE_CRLF | \ + G_REGEX_COMPILE_MASK_234) + +#if VERSION_GLIB >= 234 +# define G_REGEX_MATCH_MASK_234 (G_REGEX_MATCH_NEWLINE_ANYCRLF | \ + G_REGEX_MATCH_BSR_ANYCRLF | \ + G_REGEX_MATCH_BSR_ANY | \ + G_REGEX_MATCH_PARTIAL_SOFT | \ + G_REGEX_MATCH_PARTIAL_HARD | \ + G_REGEX_MATCH_NOTEMPTY_ATSTART) +#else +# define G_REGEX_MATCH_MASK_234 0 +#endif + +/* Mask of all the possible values for GRegexMatchFlags. */ +#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED | \ + G_REGEX_MATCH_NOTBOL | \ + G_REGEX_MATCH_NOTEOL | \ + G_REGEX_MATCH_NOTEMPTY | \ + G_REGEX_MATCH_PARTIAL | \ + G_REGEX_MATCH_NEWLINE_CR | \ + G_REGEX_MATCH_NEWLINE_LF | \ + G_REGEX_MATCH_NEWLINE_CRLF | \ + G_REGEX_MATCH_NEWLINE_ANY) + + +static int check_eflags(lua_State *L, const int idx, const int def); +#define ALG_GETEFLAGS(L,idx) check_eflags(L, idx, ALG_EFLAGS_DFLT) + +typedef struct { + GRegex * pr; + GMatchInfo * match_info; + GError * error; /* didn't want to put this here, but can't free it otherwise */ + int freed; +} TGrgx; + +static void minfo_free(TGrgx* ud) { + if (ud->match_info) + g_match_info_free (ud->match_info); + ud->match_info = NULL; +} + +static void gerror_free(TGrgx* ud) { + if (ud->error) + g_error_free (ud->error); + ud->error = NULL; +} + +static int getSubStartPos(TGrgx* ud, int n) { + int start_pos = -1; + g_match_info_fetch_pos (ud->match_info, n, &start_pos, NULL); + return start_pos; +} + +static int getSubEndPos(TGrgx* ud, int n) { + int end_pos = -1; + g_match_info_fetch_pos (ud->match_info, n, NULL, &end_pos); + return end_pos; +} + +#define TUserdata TGrgx + +/* TODO: handle named subpatterns somehow */ +#if 0 +static void do_named_subpatterns (lua_State *L, TGrgx *ud, const char *text); +# define DO_NAMED_SUBPATTERNS do_named_subpatterns +#endif + +#include "lrexlib_algo.h" + +/* Functions + ****************************************************************************** + */ + +static int getcflags (lua_State *L, int pos) { + switch (lua_type (L, pos)) { + case LUA_TNONE: + case LUA_TNIL: + return ALG_CFLAGS_DFLT; + case LUA_TNUMBER: { + int res = (int) lua_tointeger (L, pos); + if ((res & ~G_REGEX_COMPILE_MASK) != 0) { + return luaL_error (L, "GLib Regex compile flag is invalid"); + } + return res; + } + case LUA_TSTRING: { + const char *s = lua_tostring (L, pos); + int res = 0, ch; + while ((ch = *s++) != '\0') { + if (ch == 'i') res |= G_REGEX_CASELESS; + else if (ch == 'm') res |= G_REGEX_MULTILINE; + else if (ch == 's') res |= G_REGEX_DOTALL; + else if (ch == 'x') res |= G_REGEX_EXTENDED; + else if (ch == 'U') res |= G_REGEX_UNGREEDY; + } + return (int)res; + } + default: + return luaL_typerror (L, pos, "number or string"); + } +} + +static int check_eflags(lua_State *L, const int idx, const int def) { + int eflags = luaL_optint (L, idx, def); + if ((eflags & ~G_REGEX_MATCH_MASK) != 0) { + return luaL_error (L, "GLib Regex match flag is invalid"); + } + return eflags; +} + +/* this function is used in algo.h as well */ +static int generate_error (lua_State *L, const TGrgx *ud, int errcode) { + const char *key = get_flag_key (gregex_error_flags, ud->error->code); + (void) errcode; + if (key) + return luaL_error (L, "error G_REGEX_%s (%s)", key, ud->error->message); + else + return luaL_error (L, "GLib Regex error: %s (code %d)", ud->error->message, ud->error->code); +} + + +static int compile_regex (lua_State *L, const TArgComp *argC, TGrgx **pud) { + TGrgx *ud; + + ud = (TGrgx*)lua_newuserdata (L, sizeof (TGrgx)); + memset (ud, 0, sizeof (TGrgx)); /* initialize all members to 0 */ + lua_pushvalue (L, ALG_ENVIRONINDEX); + lua_setmetatable (L, -2); + + ud->pr = g_regex_new (argC->pattern, argC->cflags | G_REGEX_RAW, 0, &ud->error); + if (!ud->pr) + return luaL_error (L, "%s (code: %d)", ud->error->message, ud->error->code); + + if (pud) *pud = ud; + return 1; +} + +/* method r:dfa_exec (s, [st], [ef]) */ +static void checkarg_dfa_exec (lua_State *L, TArgExec *argE, TGrgx **ud) { + *ud = check_ud (L); + argE->text = luaL_checklstring (L, 2, &argE->textlen); + argE->startoffset = get_startoffset (L, 3, argE->textlen); + argE->eflags = ALG_GETEFLAGS (L, 4); +} + +/* unlike PCRE, partial matching won't return the actual substrings/matches */ +static int Gregex_dfa_exec (lua_State *L) +{ + TArgExec argE; + TGrgx *ud; + gboolean res; + + checkarg_dfa_exec (L, &argE, &ud); + + gerror_free (ud); + + res = g_regex_match_all_full (ud->pr, argE.text, (int)argE.textlen, + argE.startoffset, argE.eflags, &ud->match_info, &ud->error); + + if (ALG_ISMATCH (res)) { + int i, start_pos, end_pos; + int max = g_match_info_get_match_count (ud->match_info); + g_match_info_fetch_pos (ud->match_info, 0, &start_pos, NULL); + lua_pushinteger (L, start_pos + 1); /* 1-st return value */ + lua_newtable (L); /* 2-nd return value */ + for (i=0; i<max; i++) { + g_match_info_fetch_pos (ud->match_info, i, NULL, &end_pos); + /* I don't know why these offsets aren't incremented by 1 to match Lua indexing? */ + lua_pushinteger (L, end_pos); + lua_rawseti (L, -2, i+1); + } + lua_pushinteger (L, max); /* 3-rd return value */ + minfo_free (ud); + return 3; + } + else if (g_match_info_is_partial_match(ud->match_info)) { + lua_pushboolean(L,1); + minfo_free (ud); + return 1; + } + else { + minfo_free (ud); + if (ALG_NOMATCH (res)) + return lua_pushnil (L), 1; + else + return generate_error (L, ud, 0); + } +} + +#ifdef ALG_USERETRY + static int gmatch_exec (TUserdata *ud, TArgExec *argE, int retry) { + minfo_free (ud); + gerror_free (ud); + int eflags = retry ? (argE->eflags|G_REGEX_MATCH_NOTEMPTY|G_REGEX_MATCH_ANCHORED) : argE->eflags; + return g_regex_match_full (ud->pr, argE->text, argE->textlen, + argE->startoffset, eflags, &ud->match_info, &ud->error); + } +#else + static int gmatch_exec (TUserdata *ud, TArgExec *argE) { + minfo_free (ud); + gerror_free (ud); + return g_regex_match_full (ud->pr, argE->text, argE->textlen, + argE->startoffset, argE->eflags, &ud->match_info, &ud->error); + } +#endif + +static void gmatch_pushsubject (lua_State *L, TArgExec *argE) { + lua_pushlstring (L, argE->text, argE->textlen); +} + +static int findmatch_exec (TGrgx *ud, TArgExec *argE) { + minfo_free (ud); + gerror_free (ud); + return g_regex_match_full (ud->pr, argE->text, argE->textlen, + argE->startoffset, argE->eflags, &ud->match_info, &ud->error); +} + +#ifdef ALG_USERETRY + static int gsub_exec (TGrgx *ud, TArgExec *argE, int st, int retry) { + minfo_free (ud); + gerror_free (ud); + int eflags = retry ? (argE->eflags|G_REGEX_MATCH_NOTEMPTY|G_REGEX_MATCH_ANCHORED) : argE->eflags; + return g_regex_match_full (ud->pr, argE->text, argE->textlen, + st, eflags, &ud->match_info, &ud->error); + } +#else + static int gsub_exec (TGrgx *ud, TArgExec *argE, int st) { + minfo_free (ud); + gerror_free (ud); + return g_regex_match_full (ud->pr, argE->text, argE->textlen, + st, argE->eflags, &ud->match_info, &ud->error); + } +#endif + +static int split_exec (TGrgx *ud, TArgExec *argE, int offset) { + minfo_free (ud); + gerror_free (ud); + return g_regex_match_full (ud->pr, argE->text, argE->textlen, offset, + argE->eflags, &ud->match_info, &ud->error); +} + +static int Gregex_gc (lua_State *L) { + TGrgx *ud = check_ud (L); + if (ud->freed == 0) { /* precaution against "manual" __gc calling */ + ud->freed = 1; + if (ud->pr) g_regex_unref (ud->pr); + minfo_free (ud); + gerror_free (ud); + } + return 0; +} + +static int Gregex_tostring (lua_State *L) { + TGrgx *ud = check_ud (L); + if (ud->freed == 0) + lua_pushfstring (L, "%s (%p)", REX_TYPENAME, (void*)ud); + else + lua_pushfstring (L, "%s (deleted)", REX_TYPENAME); + return 1; +} + +static int Gregex_version (lua_State *L) { + lua_pushfstring (L, "%d.%d.%d", GLIB_MAJOR_VERSION, GLIB_MINOR_VERSION, GLIB_MICRO_VERSION); + return 1; +} + + +static const luaL_Reg r_methods[] = { + { "exec", algm_exec }, + { "tfind", algm_tfind }, /* old name: match */ + { "find", algm_find }, + { "match", algm_match }, + { "dfa_exec", Gregex_dfa_exec }, + { "__gc", Gregex_gc }, + { "__tostring", Gregex_tostring }, + { NULL, NULL } +}; + +static const luaL_Reg r_functions[] = { + { "match", algf_match }, + { "find", algf_find }, + { "gmatch", algf_gmatch }, + { "gsub", algf_gsub }, + { "split", algf_split }, + { "new", algf_new }, + { "flags", Gregex_get_flags }, + { "compile_flags", Gregex_get_compile_flags }, + { "match_flags", Gregex_get_match_flags }, + { "version", Gregex_version }, + { NULL, NULL } +}; + +/* Open the library */ +REX_API int REX_OPENLIB (lua_State *L) { + + alg_register(L, r_methods, r_functions, "GLib Regex"); + + return 1; +} diff --git a/epan/wslua/lrexlib_glib_f.c b/epan/wslua/lrexlib_glib_f.c new file mode 100644 index 0000000000..ed3299081f --- /dev/null +++ b/epan/wslua/lrexlib_glib_f.c @@ -0,0 +1,138 @@ +/* lrexlib_gregex_f.c - GLib regular expression library */ +/* See Copyright Notice in the file lrexlib.h */ + +#include <glib.h> +#include "lua.h" +#include "lauxlib.h" +#include "lrexlib.h" + +#define VERSION_GLIB (GLIB_MAJOR_VERSION*100 + GLIB_MINOR_VERSION) + +static flag_pair gregex_compile_flags[] = { + { "MAJOR", GLIB_MAJOR_VERSION }, + { "MINOR", GLIB_MINOR_VERSION }, + { "MICRO", GLIB_MICRO_VERSION }, +/*----------------------- Compile flags -----------------------------------*/ + { "CASELESS", G_REGEX_CASELESS }, + { "MULTILINE", G_REGEX_MULTILINE }, + { "DOTALL", G_REGEX_DOTALL }, + { "EXTENDED", G_REGEX_EXTENDED }, + { "ANCHORED", G_REGEX_ANCHORED }, + { "DOLLAR_ENDONLY", G_REGEX_DOLLAR_ENDONLY }, + { "UNGREEDY", G_REGEX_UNGREEDY }, + { "NO_AUTO_CAPTURE", G_REGEX_NO_AUTO_CAPTURE }, + { "OPTIMIZE", G_REGEX_OPTIMIZE }, + { "DUPNAMES", G_REGEX_DUPNAMES }, + { "NEWLINE_CR", G_REGEX_NEWLINE_CR }, + { "NEWLINE_LF", G_REGEX_NEWLINE_LF }, + { "NEWLINE_CRLF", G_REGEX_NEWLINE_CRLF }, +#if VERSION_GLIB >= 234 + { "FIRSTLINE", G_REGEX_FIRSTLINE }, + { "NEWLINE_ANYCRLF", G_REGEX_NEWLINE_ANYCRLF }, + { "BSR_ANYCRLF", G_REGEX_BSR_ANYCRLF }, + { "JAVASCRIPT_COMPAT", G_REGEX_JAVASCRIPT_COMPAT }, +#endif +/*---------------------------------------------------------------------------*/ + { NULL, 0 } +}; + +/*----------------------- Match flags -------------------------------------*/ +static flag_pair gregex_match_flags[] = { + { "ANCHORED", G_REGEX_MATCH_ANCHORED }, + { "NOTBOL", G_REGEX_MATCH_NOTBOL }, + { "NOTEOL", G_REGEX_MATCH_NOTEOL }, + { "NOTEMPTY", G_REGEX_MATCH_NOTEMPTY }, + { "PARTIAL", G_REGEX_MATCH_PARTIAL }, + { "NEWLINE_CR", G_REGEX_MATCH_NEWLINE_CR }, + { "NEWLINE_LF", G_REGEX_MATCH_NEWLINE_LF }, + { "NEWLINE_CRLF", G_REGEX_MATCH_NEWLINE_CRLF }, + { "NEWLINE_ANY", G_REGEX_MATCH_NEWLINE_ANY }, +#if VERSION_GLIB >= 234 + { "NEWLINE_ANYCRLF", G_REGEX_MATCH_NEWLINE_ANYCRLF }, + { "BSR_ANYCRLF", G_REGEX_MATCH_BSR_ANYCRLF }, + { "BSR_ANY", G_REGEX_MATCH_BSR_ANY }, + { "PARTIAL_SOFT", G_REGEX_MATCH_PARTIAL_SOFT }, + { "PARTIAL_HARD", G_REGEX_MATCH_PARTIAL_HARD }, + { "NOTEMPTY_ATSTART", G_REGEX_MATCH_NOTEMPTY_ATSTART }, +#endif +/*---------------------------------------------------------------------------*/ + { NULL, 0 } +}; + +flag_pair gregex_error_flags[] = { + { "COMPILE", G_REGEX_ERROR_COMPILE }, + { "OPTIMIZE", G_REGEX_ERROR_OPTIMIZE }, + { "REPLACE", G_REGEX_ERROR_REPLACE }, + { "MATCH", G_REGEX_ERROR_MATCH }, + { "INTERNAL", G_REGEX_ERROR_INTERNAL }, + { "STRAY_BACKSLASH", G_REGEX_ERROR_STRAY_BACKSLASH }, + { "MISSING_CONTROL_CHAR", G_REGEX_ERROR_MISSING_CONTROL_CHAR }, + { "UNRECOGNIZED_ESCAPE", G_REGEX_ERROR_UNRECOGNIZED_ESCAPE }, + { "QUANTIFIERS_OUT_OF_ORDER", G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER }, + { "QUANTIFIER_TOO_BIG", G_REGEX_ERROR_QUANTIFIER_TOO_BIG }, + { "UNTERMINATED_CHARACTER_CLASS", G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS }, + { "INVALID_ESCAPE_IN_CHARACTER_CLASS", G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS }, + { "RANGE_OUT_OF_ORDER", G_REGEX_ERROR_RANGE_OUT_OF_ORDER }, + { "NOTHING_TO_REPEAT", G_REGEX_ERROR_NOTHING_TO_REPEAT }, + { "UNRECOGNIZED_CHARACTER", G_REGEX_ERROR_UNRECOGNIZED_CHARACTER }, + { "POSIX_NAMED_CLASS_OUTSIDE_CLASS", G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS }, + { "UNMATCHED_PARENTHESIS", G_REGEX_ERROR_UNMATCHED_PARENTHESIS }, + { "INEXISTENT_SUBPATTERN_REFERENCE", G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE }, + { "UNTERMINATED_COMMENT", G_REGEX_ERROR_UNTERMINATED_COMMENT }, + { "EXPRESSION_TOO_LARGE", G_REGEX_ERROR_EXPRESSION_TOO_LARGE }, + { "MEMORY_ERROR", G_REGEX_ERROR_MEMORY_ERROR }, + { "VARIABLE_LENGTH_LOOKBEHIND", G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND }, + { "MALFORMED_CONDITION", G_REGEX_ERROR_MALFORMED_CONDITION }, + { "TOO_MANY_CONDITIONAL_BRANCHES", G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES }, + { "ASSERTION_EXPECTED", G_REGEX_ERROR_ASSERTION_EXPECTED }, + { "UNKNOWN_POSIX_CLASS_NAME", G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME }, + { "POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED", G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED }, + { "HEX_CODE_TOO_LARGE", G_REGEX_ERROR_HEX_CODE_TOO_LARGE }, + { "INVALID_CONDITION", G_REGEX_ERROR_INVALID_CONDITION }, + { "SINGLE_BYTE_MATCH_IN_LOOKBEHIND", G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND }, + { "INFINITE_LOOP", G_REGEX_ERROR_INFINITE_LOOP }, + { "MISSING_SUBPATTERN_NAME_TERMINATOR", G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR }, + { "DUPLICATE_SUBPATTERN_NAME", G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME }, + { "MALFORMED_PROPERTY", G_REGEX_ERROR_MALFORMED_PROPERTY }, + { "UNKNOWN_PROPERTY", G_REGEX_ERROR_UNKNOWN_PROPERTY }, + { "SUBPATTERN_NAME_TOO_LONG", G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG }, + { "TOO_MANY_SUBPATTERNS", G_REGEX_ERROR_TOO_MANY_SUBPATTERNS }, + { "INVALID_OCTAL_VALUE", G_REGEX_ERROR_INVALID_OCTAL_VALUE }, + { "TOO_MANY_BRANCHES_IN_DEFINE", G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE }, + { "INCONSISTENT_NEWLINE_OPTIONS", G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS }, + { "MISSING_BACK_REFERENCE", G_REGEX_ERROR_MISSING_BACK_REFERENCE }, +#if VERSION_GLIB >= 234 + { "INVALID_RELATIVE_REFERENCE", G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE }, + { "BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN",G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN }, + { "UNKNOWN_BACKTRACKING_CONTROL_VERB", G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB }, + { "NUMBER_TOO_BIG", G_REGEX_ERROR_NUMBER_TOO_BIG }, + { "MISSING_SUBPATTERN_NAME", G_REGEX_ERROR_MISSING_SUBPATTERN_NAME }, + { "MISSING_DIGIT", G_REGEX_ERROR_MISSING_DIGIT }, + { "INVALID_DATA_CHARACTER", G_REGEX_ERROR_INVALID_DATA_CHARACTER }, + { "EXTRA_SUBPATTERN_NAME", G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME }, + { "BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED",G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED }, + { "INVALID_CONTROL_CHAR", G_REGEX_ERROR_INVALID_CONTROL_CHAR }, + { "MISSING_NAME", G_REGEX_ERROR_MISSING_NAME }, + { "NOT_SUPPORTED_IN_CLASS", G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS }, + { "TOO_MANY_FORWARD_REFERENCES", G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES }, + { "NAME_TOO_LONG", G_REGEX_ERROR_NAME_TOO_LONG }, + { "CHARACTER_VALUE_TOO_LARGE", G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE }, +#endif +/*---------------------------------------------------------------------------*/ + { NULL, 0 } +}; + +int Gregex_get_compile_flags (lua_State *L) { + const flag_pair* fps[] = { gregex_compile_flags, NULL }; + return get_flags (L, fps); +} + +int Gregex_get_match_flags (lua_State *L) { + const flag_pair* fps[] = { gregex_match_flags, NULL }; + return get_flags (L, fps); +} + +int Gregex_get_flags (lua_State *L) { + const flag_pair* fps[] = { gregex_compile_flags, gregex_match_flags, gregex_error_flags, NULL }; + return get_flags (L, fps); +} diff --git a/epan/wslua/make-reg.pl b/epan/wslua/make-reg.pl index 7f7408dc99..1eeb167e9a 100755 --- a/epan/wslua/make-reg.pl +++ b/epan/wslua/make-reg.pl @@ -76,6 +76,7 @@ for (@classes) { print C "\twslua_reg_module(L, \"bit\", luaopen_bit);\n"; # the bitops library returns a value on the stack - get rid of it print C "\tlua_pop(L,1);\n"; +print C "\twslua_reg_module(L, \"GRegex\", luaopen_rex_glib);\n"; print C "}\n\n"; diff --git a/epan/wslua/wslua.h b/epan/wslua/wslua.h index e73e7be931..2ec7456766 100644 --- a/epan/wslua/wslua.h +++ b/epan/wslua/wslua.h @@ -608,5 +608,6 @@ extern int wslua_is_field_available(lua_State* L, const char* field_abbr); extern int wslua_bin2hex(lua_State* L, const guint8* data, const guint len, const gboolean lowercase, const gchar* sep); extern int wslua_hex2bin(lua_State* L, const char* data, const guint len, const gchar* sep); +extern int luaopen_rex_glib(lua_State *L); #endif diff --git a/test/lua/common_sets.lua b/test/lua/common_sets.lua new file mode 100755 index 0000000000..e71251fea8 --- /dev/null +++ b/test/lua/common_sets.lua @@ -0,0 +1,319 @@ +-- See Copyright Notice in the file LICENSE + +-- This file should contain only test sets that behave identically +-- when being run with pcre or posix regex libraries. + +local luatest = require "luatest" +local N = luatest.NT + +local function norm(a) return a==nil and N or a end + +local function get_gsub (lib) + return lib.gsub or + function (subj, pattern, repl, n) + return lib.new (pattern) : gsub (subj, repl, n) + end +end + +local function set_f_gmatch (lib, flg) + -- gmatch (s, p, [cf], [ef]) + local function test_gmatch (subj, patt) + local out, guard = {}, 10 + for a, b in lib.gmatch (subj, patt) do + table.insert (out, { norm(a), norm(b) }) + guard = guard - 1 + if guard == 0 then break end + end + return unpack (out) + end + return { + Name = "Function gmatch", + Func = test_gmatch, + --{ subj patt results } + { {"ab", lib.new"."}, {{"a",N}, {"b",N} } }, + { {("abcd"):rep(3), "(.)b.(d)"}, {{"a","d"},{"a","d"},{"a","d"}} }, + { {"abcd", ".*" }, {{"abcd",N},{"",N} } },--zero-length match + { {"abc", "^." }, {{"a",N}} },--anchored pattern + } +end + +local function set_f_split (lib, flg) + -- split (s, p, [cf], [ef]) + local function test_split (subj, patt) + local out, guard = {}, 10 + for a, b, c in lib.split (subj, patt) do + table.insert (out, { norm(a), norm(b), norm(c) }) + guard = guard - 1 + if guard == 0 then break end + end + return unpack (out) + end + return { + Name = "Function split", + Func = test_split, + --{ subj patt results } + { {"ab", lib.new","}, {{"ab",N,N}, } }, + { {"ab", ","}, {{"ab",N,N}, } }, + { {",", ","}, {{"",",",N}, {"", N, N}, } }, + { {",,", ","}, {{"",",",N}, {"",",",N}, {"",N,N} } }, + { {"a,b", ","}, {{"a",",",N}, {"b",N,N}, } }, + { {",a,b", ","}, {{"",",",N}, {"a",",",N}, {"b",N,N}} }, + { {"a,b,", ","}, {{"a",",",N}, {"b",",",N}, {"",N,N} } }, + { {"a,,b", ","}, {{"a",",",N}, {"",",",N}, {"b",N,N}} }, + { {"ab<78>c", "<(.)(.)>"}, {{"ab","7","8"}, {"c",N,N}, } }, + { {"abc", "^."}, {{"", "a",N}, {"bc",N,N}, } },--anchored pattern + { {"abc", "^"}, {{"", "", N}, {"abc",N,N}, } }, +-- { {"abc", "$"}, {{"abc","",N}, {"",N,N}, } }, +-- { {"abc", "^|$"}, {{"", "", N}, {"abc","",N},{"",N,N},} }, + } +end + +local function set_f_find (lib, flg) + return { + Name = "Function find", + Func = lib.find, + -- {subj, patt, st}, { results } + { {"abcd", lib.new".+"}, { 1,4 } }, -- [none] + { {"abcd", ".+"}, { 1,4 } }, -- [none] + { {"abcd", ".+", 2}, { 2,4 } }, -- positive st + { {"abcd", ".+", -2}, { 3,4 } }, -- negative st + { {"abcd", ".*"}, { 1,4 } }, -- [none] + { {"abc", "bc"}, { 2,3 } }, -- [none] + { {"abcd", "(.)b.(d)"}, { 1,4,"a","d" }}, -- [captures] + } +end + +local function set_f_match (lib, flg) + return { + Name = "Function match", + Func = lib.match, + -- {subj, patt, st}, { results } + { {"abcd", lib.new".+"}, {"abcd"} }, -- [none] + { {"abcd", ".+"}, {"abcd"} }, -- [none] + { {"abcd", ".+", 2}, {"bcd"} }, -- positive st + { {"abcd", ".+", -2}, {"cd"} }, -- negative st + { {"abcd", ".*"}, {"abcd"} }, -- [none] + { {"abc", "bc"}, {"bc"} }, -- [none] + { {"abcd", "(.)b.(d)"}, {"a","d"} }, -- [captures] + } +end + +local function set_m_exec (lib, flg) + return { + Name = "Method exec", + Method = "exec", + --{patt}, {subj, st} { results } + { {".+"}, {"abcd"}, {1,4,{}} }, -- [none] + { {".+"}, {"abcd",2}, {2,4,{}} }, -- positive st + { {".+"}, {"abcd",-2}, {3,4,{}} }, -- negative st + { {".*"}, {"abcd"}, {1,4,{}} }, -- [none] + { {"bc"}, {"abc"}, {2,3,{}} }, -- [none] + { { "(.)b.(d)"}, {"abcd"}, {1,4,{1,1,4,4}}},--[captures] + { {"(a+)6+(b+)"}, {"Taa66bbT",2}, {2,7,{2,3,6,7}}},--[st+captures] + } +end + +local function set_m_tfind (lib, flg) + return { + Name = "Method tfind", + Method = "tfind", + --{patt}, {subj, st} { results } + { {".+"}, {"abcd"}, {1,4,{}} }, -- [none] + { {".+"}, {"abcd",2}, {2,4,{}} }, -- positive st + { {".+"}, {"abcd",-2}, {3,4,{}} }, -- negative st + { {".*"}, {"abcd"}, {1,4,{}} }, -- [none] + { {"bc"}, {"abc"}, {2,3,{}} }, -- [none] + { {"(.)b.(d)"}, {"abcd"}, {1,4,{"a","d"}}},--[captures] + } +end + +local function set_m_find (lib, flg) + return { + Name = "Method find", + Method = "find", + --{patt}, {subj, st} { results } + { {".+"}, {"abcd"}, {1,4} }, -- [none] + { {".+"}, {"abcd",2}, {2,4} }, -- positive st + { {".+"}, {"abcd",-2}, {3,4} }, -- negative st + { {".*"}, {"abcd"}, {1,4} }, -- [none] + { {"bc"}, {"abc"}, {2,3} }, -- [none] + { {"(.)b.(d)"}, {"abcd"}, {1,4,"a","d"}},--[captures] + } +end + +local function set_m_match (lib, flg) + return { + Name = "Method match", + Method = "match", + --{patt}, {subj, st} { results } + { {".+"}, {"abcd"}, {"abcd"} }, -- [none] + { {".+"}, {"abcd",2}, {"bcd" } }, -- positive st + { {".+"}, {"abcd",-2}, {"cd" } }, -- negative st + { {".*"}, {"abcd"}, {"abcd"} }, -- [none] + { {"bc"}, {"abc"}, {"bc" } }, -- [none] + {{ "(.)b.(d)"}, {"abcd"}, {"a","d"} }, --[captures] + } +end + +local function set_f_gsub1 (lib, flg) + local subj, pat = "abcdef", "[abef]+" + local cpat = lib.new(pat) + return { + Name = "Function gsub, set1", + Func = get_gsub (lib), + --{ s, p, f, n, res1, res2, res3 }, + { {subj, cpat, "", 0}, {subj, 0, 0} }, -- test "n" + empty_replace + { {subj, pat, "", 0}, {subj, 0, 0} }, -- test "n" + empty_replace + { {subj, pat, "", -1}, {subj, 0, 0} }, -- test "n" + empty_replace + { {subj, pat, "", 1}, {"cdef", 1, 1} }, + { {subj, pat, "", 2}, {"cd", 2, 2} }, + { {subj, pat, "", 3}, {"cd", 2, 2} }, + { {subj, pat, "" }, {"cd", 2, 2} }, + { {subj, pat, "#", 0}, {subj, 0, 0} }, -- test "n" + non-empty_replace + { {subj, pat, "#", 1}, {"#cdef", 1, 1} }, + { {subj, pat, "#", 2}, {"#cd#", 2, 2} }, + { {subj, pat, "#", 3}, {"#cd#", 2, 2} }, + { {subj, pat, "#" }, {"#cd#", 2, 2} }, + { {"abc", "^.", "#" }, {"#bc", 1, 1} }, -- anchored pattern + } +end + +local function set_f_gsub2 (lib, flg) + local subj, pat = "abc", "([ac])" + return { + Name = "Function gsub, set2", + Func = get_gsub (lib), + --{ s, p, f, n, res1, res2, res3 }, + { {subj, pat, "<%1>" }, {"<a>b<c>", 2, 2} }, -- test non-escaped chars in f + { {subj, pat, "%<%1%>" }, {"<a>b<c>", 2, 2} }, -- test escaped chars in f + { {subj, pat, "" }, {"b", 2, 2} }, -- test empty replace + { {subj, pat, "1" }, {"1b1", 2, 2} }, -- test odd and even %'s in f + { {subj, pat, "%1" }, {"abc", 2, 2} }, + { {subj, pat, "%%1" }, {"%1b%1", 2, 2} }, + { {subj, pat, "%%%1" }, {"%ab%c", 2, 2} }, + { {subj, pat, "%%%%1" }, {"%%1b%%1", 2, 2} }, + { {subj, pat, "%%%%%1" }, {"%%ab%%c", 2, 2} }, + } +end + +local function set_f_gsub3 (lib, flg) + return { + Name = "Function gsub, set3", + Func = get_gsub (lib), + --{ s, p, f, n, res1,res2,res3 }, + { {"abc", "a", "%0" }, {"abc", 1, 1} }, -- test (in)valid capture index + { {"abc", "a", "%1" }, {"abc", 1, 1} }, + { {"abc", "[ac]", "%1" }, {"abc", 2, 2} }, + { {"abc", "(a)", "%1" }, {"abc", 1, 1} }, + { {"abc", "(a)", "%2" }, "invalid capture index" }, + } +end + +local function set_f_gsub4 (lib, flg) + return { + Name = "Function gsub, set4", + Func = get_gsub (lib), + --{ s, p, f, n, res1, res2, res3 }, + { {"a2c3", ".", "#" }, {"####", 4, 4} }, -- test . + { {"a2c3", ".+", "#" }, {"#", 1, 1} }, -- test .+ + { {"a2c3", ".*", "#" }, {"##", 2, 2} }, -- test .* + { {"/* */ */", "\\/\\*(.*)\\*\\/", "#" }, {"#", 1, 1} }, + { {"a2c3", "[0-9]", "#" }, {"a#c#", 2, 2} }, -- test %d + { {"a2c3", "[^0-9]", "#" }, {"#2#3", 2, 2} }, -- test %D + { {"a \t\nb", "[ \t\n]", "#" }, {"a###b", 3, 3} }, -- test %s + { {"a \t\nb", "[^ \t\n]", "#" }, {"# \t\n#", 2, 2} }, -- test %S + } +end + +local function set_f_gsub5 (lib, flg) + local function frep1 () end -- returns nothing + local function frep2 () return "#" end -- ignores arguments + local function frep3 (...) return table.concat({...}, ",") end -- "normal" + local function frep4 () return {} end -- invalid return type + local function frep5 () return "7", "a" end -- 2-nd return is "a" + local function frep6 () return "7", "break" end -- 2-nd return is "break" + local subj = "a2c3" + return { + Name = "Function gsub, set5", + Func = get_gsub (lib), + --{ s, p, f, n, res1, res2, res3 }, + { {subj, "a(.)c(.)", frep1 }, {subj, 1, 0} }, + { {subj, "a(.)c(.)", frep2 }, {"#", 1, 1} }, + { {subj, "a(.)c(.)", frep3 }, {"2,3", 1, 1} }, + { {subj, "a.c.", frep3 }, {subj, 1, 1} }, + { {subj, "z*", frep1 }, {subj, 5, 0} }, + { {subj, "z*", frep2 }, {"#a#2#c#3#", 5, 5} }, + { {subj, "z*", frep3 }, {subj, 5, 5} }, + { {subj, subj, frep4 }, "invalid return type" }, + { {"abc",".", frep5 }, {"777", 3, 3} }, + { {"abc",".", frep6 }, {"777", 3, 3} }, + } +end + +local function set_f_gsub6 (lib, flg) + local tab1, tab2, tab3 = {}, { ["2"] = 56 }, { ["2"] = {} } + local subj = "a2c3" + return { + Name = "Function gsub, set6", + Func = get_gsub (lib), + --{ s, p, f, n, res1,res2,res3 }, + { {subj, "a(.)c(.)", tab1 }, {subj, 1, 0} }, + { {subj, "a(.)c(.)", tab2 }, {"56", 1, 1} }, + { {subj, "a(.)c(.)", tab3 }, "invalid replacement type" }, + { {subj, "a.c.", tab1 }, {subj, 1, 0} }, + { {subj, "a.c.", tab2 }, {subj, 1, 0} }, + { {subj, "a.c.", tab3 }, {subj, 1, 0} }, + } +end + +local function set_f_gsub8 (lib, flg) + local subj, patt, repl = "abcdef", "..", "*" + return { + Name = "Function gsub, set8", + Func = get_gsub (lib), + --{ s, p, f, n, res1, res2, res3 }, + { {subj, patt, repl, function() end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return nil end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return false end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return true end }, {"***", 3, 3} }, + { {subj, patt, repl, function() return {} end }, {"***", 3, 3} }, + { {subj, patt, repl, function() return "#" end }, {"###", 3, 3} }, + { {subj, patt, repl, function() return 57 end }, {"575757", 3, 3} }, + { {subj, patt, repl, function (from) return from end }, {"135", 3, 3} }, + { {subj, patt, repl, function (from, to) return to end }, {"246", 3, 3} }, + { {subj, patt, repl, function (from,to,rep) return rep end }, + {"***", 3, 3} }, + { {subj, patt, repl, function (from, to, rep) return rep..to..from end }, + {"*21*43*65", 3, 3} }, + { {subj, patt, repl, function() return nil end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return nil, nil end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return nil, false end }, {"abcdef", 3, 0} }, + { {subj, patt, repl, function() return nil, true end }, {"ab**", 3, 2} }, + { {subj, patt, repl, function() return true, true end }, {"***", 3, 3} }, + { {subj, patt, repl, function() return nil, 0 end }, {"abcdef", 1, 0} }, + { {subj, patt, repl, function() return true, 0 end }, {"*cdef", 1, 1} }, + { {subj, patt, repl, function() return nil, 1 end }, {"ab*ef", 2, 1} }, + { {subj, patt, repl, function() return true, 1 end }, {"**ef", 2, 2} }, + } +end + +return function (libname, isglobal) + local lib = isglobal and _G[libname] or require (libname) + return { + set_f_gmatch (lib), + set_f_split (lib), + set_f_find (lib), + set_f_match (lib), + set_m_exec (lib), + set_m_tfind (lib), + set_m_find (lib), + set_m_match (lib), + set_f_gsub1 (lib), + set_f_gsub2 (lib), + set_f_gsub3 (lib), + set_f_gsub4 (lib), + set_f_gsub5 (lib), + set_f_gsub6 (lib), + set_f_gsub8 (lib), + } +end diff --git a/test/lua/glib_sets.lua b/test/lua/glib_sets.lua new file mode 100644 index 0000000000..0c3f38a483 --- /dev/null +++ b/test/lua/glib_sets.lua @@ -0,0 +1,204 @@ +-- See Copyright Notice in the file LICENSE + +local pat2pcre = require "pat2pcre" +local luatest = require "luatest" +local N = luatest.NT + +local function norm(a) return a==nil and N or a end + +local function fill (n, m) + local t = {} + for i = n, m, -1 do table.insert (t, i) end + return t +end + + +-- glib doesn't do partial matching return of matches, nor +-- does it support ovecsize being set through the API +local function set_m_dfa_exec (lib, flg) + return { + Name = "Method dfa_exec for glib", + Method = "dfa_exec", +--{patt,cf,lo}, {subj,st,ef,os,ws} { results } + { {".+"}, {"abcd"}, {1,{4,3,2,1},4} }, -- [none] + { {".+"}, {"abcd",2}, {2,{4,3,2}, 3} }, -- positive st + { {".+"}, {"abcd",-2}, {3,{4,3}, 2} }, -- negative st + { {".+"}, {"abcd",5}, {N } }, -- failing st + { {".*"}, {"abcd"}, {1,{4,3,2,1,0},5}}, -- [none] + { {".*?"}, {"abcd"}, {1,{4,3,2,1,0},5}}, -- non-greedy + { {"aBC",flg.CASELESS}, {"abc"}, {1,{3},1} }, -- cf + { {"aBC","i" }, {"abc"}, {1,{3},1} }, -- cf + { {"bc"}, {"abc"}, {2,{3},1} }, -- [none] + { {"bc",flg.ANCHORED}, {"abc"}, {N } }, -- cf + { {"bc"}, {"abc",N, flg.ANCHORED}, {N } }, -- ef + { { "(.)b.(d)"}, {"abcd"}, {1,{4},1} }, --[captures] + { {"abc"}, {"ab"}, {N } }, + { {"abc"}, {"abc",N,flg.PARTIAL}, {1,{3},1} }, + { {"abc*"}, {"abcc",N,flg.PARTIAL}, {1,{4,3,2},3} }, + { {"abc"}, {"ab",N,flg.PARTIAL}, {true} }, + { {"bc"}, {"ab",N,flg.PARTIAL}, {true} }, +} +end + +local function get_gsub (lib) + return lib.gsub or + function (subj, pattern, repl, n) + return lib.new (pattern) : gsub (subj, repl, n) + end +end + +-- sadly, glib *always* sets the PCRE_UCP compilation flag, regardless +-- of REGEX_RAW being set - this is, frankly, a bug in my opinion +-- but anyway, it means things like '[:alpha:]' and '\w' match things that Lua's +-- '%a' does not match +local function set_f_gsub7 (lib, flg) + local subj = "" + for i = 0, 255 do + subj = subj .. string.char (i) + end + + -- This set requires calling prepare_set before calling gsub_test + local set = { + Name = "Function gsub, set7 for glib", + Func = get_gsub (lib), + --{ s, p, f, n, }, + { {subj, "[a-zA-Z]", "" }, }, + { {subj, "[^a-zA-Z]", "" }, }, + { {subj, "%c", "" }, }, + { {subj, "%C", "" }, }, + { {subj, "[a-z]", "" }, }, + { {subj, "[^a-z]", "" }, }, + { {subj, "%d", "" }, }, + { {subj, "%D", "" }, }, + { {subj, "%p", "" }, }, + { {subj, "%P", "" }, }, +-- { {subj, "%s", "" }, }, +-- { {subj, "%S", "" }, }, + { {subj, "[A-Z]", "" }, }, + { {subj, "[^A-Z]", "" }, }, -- 10 + { {subj, "[a-zA-Z0-9]", "" }, }, + { {subj, "[^a-zA-Z0-9]", "" }, }, + { {subj, "%x", "" }, }, + { {subj, "%X", "" }, }, + { {subj, "%z", "" }, }, + { {subj, "%Z", "" }, }, + +-- { {subj, "[%a]", "" }, }, +-- { {subj, "[%A]", "" }, }, + { {subj, "[%c]", "" }, }, + { {subj, "[%C]", "" }, }, + { {subj, "[%d]", "" }, }, + { {subj, "[%D]", "" }, }, +-- { {subj, "[%l]", "" }, }, +-- { {subj, "[%L]", "" }, }, + { {subj, "[%p]", "" }, }, + { {subj, "[%P]", "" }, }, +-- { {subj, "[%u]", "" }, }, +-- { {subj, "[%U]", "" }, }, +-- { {subj, "[%w]", "" }, }, +-- { {subj, "[%W]", "" }, }, + { {subj, "[%x]", "" }, }, + { {subj, "[%X]", "" }, }, + { {subj, "[%z]", "" }, }, + { {subj, "[%Z]", "" }, }, + +-- { {subj, "[%a_]", "" }, }, +-- { {subj, "[%A_]", "" }, }, + { {subj, "[%c_]", "" }, }, + { {subj, "[%C_]", "" }, }, +-- { {subj, "[%l_]", "" }, }, +-- { {subj, "[%L_]", "" }, }, + { {subj, "[%p_]", "" }, }, + { {subj, "[%P_]", "" }, }, +-- { {subj, "[%u_]", "" }, }, +-- { {subj, "[%U_]", "" }, }, +-- { {subj, "[%w_]", "" }, }, +-- { {subj, "[%W_]", "" }, }, + { {subj, "[%x_]", "" }, }, + { {subj, "[%X_]", "" }, }, + { {subj, "[%z_]", "" }, }, + { {subj, "[%Z_]", "" }, }, + +-- { {subj, "[%a%d]", "" }, }, +-- { {subj, "[%A%d]", "" }, }, + { {subj, "[%c%d]", "" }, }, + { {subj, "[%C%d]", "" }, }, +-- { {subj, "[%l%d]", "" }, }, +-- { {subj, "[%L%d]", "" }, }, + { {subj, "[%p%d]", "" }, }, + { {subj, "[%P%d]", "" }, }, +-- { {subj, "[%u%d]", "" }, }, +-- { {subj, "[%U%d]", "" }, }, +-- { {subj, "[%w%d]", "" }, }, +-- { {subj, "[%W%d]", "" }, }, + { {subj, "[%x%d]", "" }, }, + { {subj, "[%X%d]", "" }, }, + { {subj, "[%z%d]", "" }, }, + { {subj, "[%Z%d]", "" }, }, + +-- { {subj, "[^%a%d]", "" }, }, +-- { {subj, "[^%A%d]", "" }, }, + { {subj, "[^%c%d]", "" }, }, + { {subj, "[^%C%d]", "" }, }, +-- { {subj, "[^%l%d]", "" }, }, +-- { {subj, "[^%L%d]", "" }, }, + { {subj, "[^%p%d]", "" }, }, + { {subj, "[^%P%d]", "" }, }, +-- { {subj, "[^%u%d]", "" }, }, +-- { {subj, "[^%U%d]", "" }, }, +-- { {subj, "[^%w%d]", "" }, }, +-- { {subj, "[^%W%d]", "" }, }, + { {subj, "[^%x%d]", "" }, }, + { {subj, "[^%X%d]", "" }, }, + { {subj, "[^%z%d]", "" }, }, + { {subj, "[^%Z%d]", "" }, }, + +-- { {subj, "[^%a_]", "" }, }, +-- { {subj, "[^%A_]", "" }, }, + { {subj, "[^%c_]", "" }, }, + { {subj, "[^%C_]", "" }, }, +-- { {subj, "[^%l_]", "" }, }, +-- { {subj, "[^%L_]", "" }, }, + { {subj, "[^%p_]", "" }, }, + { {subj, "[^%P_]", "" }, }, +-- { {subj, "[^%u_]", "" }, }, +-- { {subj, "[^%U_]", "" }, }, +-- { {subj, "[^%w_]", "" }, }, +-- { {subj, "[^%W_]", "" }, }, + { {subj, "[^%x_]", "" }, }, + { {subj, "[^%X_]", "" }, }, + { {subj, "[^%z_]", "" }, }, + { {subj, "[^%Z_]", "" }, }, + + { {subj, "\100", "" }, }, + { {subj, "[\100]", "" }, }, + { {subj, "[^\100]", "" }, }, + { {subj, "[\100-\200]", "" }, }, + { {subj, "[^\100-\200]", "" }, }, + { {subj, "\100a", "" }, }, + { {subj, "[\100a]", "" }, }, + { {subj, "[^\100a]", "" }, }, + { {subj, "[\100-\200a]", "" }, }, + { {subj, "[^\100-\200a]", "" }, }, + } + -- fill in reference results + for _,v in ipairs(set) do + local r0, r1, r2 = pcall (string.gsub, unpack (v[1])) + v[2] = r0 and { r1, r2, r2 } or { r0, r1 } + end + -- convert patterns: lua -> pcre + for _, test in ipairs (set) do + test[1][2] = pat2pcre (test[1][2]) + end + return set +end + +return function (libname, isglobal) + local lib = isglobal and _G[libname] or require (libname) + local flags = lib.flags and lib.flags () + local sets = { + set_m_dfa_exec (lib, flags), + set_f_gsub7 (lib, flags) + } + return sets +end diff --git a/test/lua/gregex.lua b/test/lua/gregex.lua new file mode 100644 index 0000000000..2ad04ba6dc --- /dev/null +++ b/test/lua/gregex.lua @@ -0,0 +1,285 @@ + +-- Tests for GLib Regex functions +-- written by Hadriel Kaplan, based on Lrexlib's test suite +-- This is a test script for tshark/wireshark. +-- This script runs inside tshark/wireshark, so to run it do: +-- tshark -r empty.cap -X lua_script:<path_to_testdir>/lua/gregex.lua -X lua_script1:glib +-- +-- if you have to give addtional paths to find the dependent lua files, +-- use the '-X lua_script1:' syntax to add more arguments +-- +-- available arguments: +-- -d<dir> provides path directory for lua include files +-- -v verbose mode +-- -V very verbose mode + + +-- save args before we do anything else +local args = {...} +for i,v in ipairs(args) do + print(i.." = "..v) +end + +local function testing(...) + print("---- Testing "..tostring(...).." ----") +end + +local count = 0 + +local function test(name, ...) + count = count + 1 + io.write("test "..name.."-"..count.."...") + if (...) == true then + io.write("passed\n") + io.flush() + else + io.write("failed!\n") + io.flush() + error(name.." test failed!") + end +end + +------------- First test some basic stuff to make sure we're sane ----------- + +print("Lua version: ".._VERSION) + +testing("Lrexlib GLib Regex library") + +local lib = GRegex +test("global",_G.GRegex == lib) + +for name, val in pairs(lib) do + print("\t"..name.." = "..type(val)) +end + +test("class",type(lib) == 'table') +test("class",type(lib._VERSION) == 'string') +test("class",type(lib.find) == 'function') +test("class",type(lib.compile_flags) == 'function') +test("class",type(lib.match_flags) == 'function') +test("class",type(lib.flags) == 'function') +test("class",type(lib.gsub) == 'function') +test("class",type(lib.gmatch) == 'function') +test("class",type(lib.new) == 'function') +test("class",type(lib.match) == 'function') +test("class",type(lib.split) == 'function') +test("class",type(lib.version) == 'function') + +testing("info and flags") + +test("typeof",typeof(lib) == 'GRegex') + +print(lib._VERSION) +print("Glib version = "..lib.version()) + +local function getTSize(t) + local c = 0 + for k,v in pairs(t) do + -- print(k.." = "..v) + c = c + 1 + end + return c +end + +local flags = lib.flags() + +-- print("size = "..c) +-- it's 84 for newer GLib, 61 for older +test("flags", getTSize(flags) > 60) +test("cflags", getTSize(lib.compile_flags()) > 15) +test("eflags", getTSize(lib.match_flags()) > 8) + +testing("new") + +local results +local function checkFunc(objname,funcname,...) + results = { pcall(objname[funcname],...) } + if results[1] then + return true + end + -- print("Got this error: '"..tostring(results[2]).."'") + return false +end + +test("new", checkFunc(lib,"new",".*")) +test("new", checkFunc(lib,"new","")) +test("new", checkFunc(lib,"new","(hello|world)")) + +test("new_err", not checkFunc(lib,"new","*")) +test("new_err", not checkFunc(lib,"new")) +test("new_err", not checkFunc(lib,"new","(hello|world")) +test("new_err", not checkFunc(lib,"new","[0-9")) +-- invalid compile flag +test("new_err", not checkFunc(lib,"new","[0-9]",flags.PARTIAL)) + + +local val1 = "hello world foo bar" +local val2 = "hello wORld FOO bar" +local patt = "hello (world) (.*) bar" +local rgx = lib.new(patt) +local rgx2 = lib.new(patt,flags.CASELESS) + +testing("typeof") +test("typeof",typeof(rgx) == 'GRegex') +test("typeof",typeof(rgx2) == 'GRegex') + +testing("match") +test("match", checkFunc(lib,"match", val1,patt, 1, flags.CASELESS) and results[2] == "world" and results[3] == "foo") +test("match", checkFunc(lib,"match", val2,patt, 1, flags.CASELESS) and results[2] == "wORld" and results[3] == "FOO") +test("match", checkFunc(lib,"match", val1,rgx) and results[2] == "world" and results[3] == "foo") +test("match", checkFunc(rgx,"match", rgx,val1) and results[2] == "world" and results[3] == "foo") +test("match", checkFunc(rgx2,"match", rgx2,val2, 1) and results[2] == "wORld" and results[3] == "FOO") + +-- different offset won't match this pattern +test("match_err", checkFunc(rgx2,"match", rgx2,val2, 4) and results[2] == nil) + +-- invalid compile flag +test("match_err", not checkFunc(lib,"match", val1,patt, 1, flags.PARTIAL)) +-- invalid match flag +test("match_err", not checkFunc(rgx,"match", rgx,val1, 1, flags.CASELESS)) + +testing("find") + +test("find", checkFunc(lib,"find", val1,patt) and results[2] == 1 and results[3] == val1:len() + and results[4] == "world" and results[5] == "foo") +test("find", checkFunc(lib,"find", val1,rgx) and results[2] == 1 and results[3] == val1:len() + and results[4] == "world" and results[5] == "foo") +test("find", checkFunc(rgx,"find", rgx,val1) and results[2] == 1 and results[3] == val1:len() + and results[4] == "world" and results[5] == "foo") + +testing("match") + +--checkFunc(rgx,"exec", rgx,val1) +--print(results[4][3],results[4][4]) +test("exec", checkFunc(rgx,"exec", rgx,val1) and results[2] == 1 and results[3] == val1:len() + and results[4][1] == 7 and results[4][2] == 11 and results[4][3] == 13 and results[4][4] == 15) + +print("\n----------------------------------------------------------\n") + +------- OK, we're sane, so run all the library's real tests --------- + +testing("Lrexlib-provided tests") + +-- we're not using the "real" lib name +local GLIBNAME = "GRegex" +local isglobal = true + +do + local dir + for i = 1, select ("#", ...) do + local arg = select (i, ...) + --print(arg) + if arg:sub(1,2) == "-d" then + dir = arg:sub(3) + end + end + dir = dir:gsub("[/\\]+$", "") + local path = dir .. "/?.lua;" + if package.path:sub(1, #path) ~= path then + package.path = path .. package.path + end +end + +local luatest = require "luatest" + +-- returns: number of failures +local function test_library (libname, setfile, verbose, really_verbose) + if verbose then + print (("[lib: %s; file: %s]"):format (libname, setfile)) + end + local lib = isglobal and _G[libname] or require (libname) + local f = require (setfile) + local sets = f (libname, isglobal) + + local n = 0 -- number of failures + for _, set in ipairs (sets) do + if verbose then + print (set.Name or "Unnamed set") + end + local err = luatest.test_set (set, lib, really_verbose) + if verbose then + for _,v in ipairs (err) do + print ("\nTest " .. v.i) + print (" Expected result:\n "..tostring(v)) + luatest.print_results (v[1], " ") + table.remove(v,1) + print ("\n Got:") + luatest.print_results (v, " ") + end + end + n = n + #err + end + if verbose then + print "" + end + + return n +end + +local avail_tests = { + posix = { lib = "rex_posix", "common_sets", "posix_sets" }, + gnu = { lib = "rex_gnu", "common_sets", "emacs_sets", "gnu_sets" }, + oniguruma = { lib = "rex_onig", "common_sets", "oniguruma_sets", }, + pcre = { lib = "rex_pcre", "common_sets", "pcre_sets", "pcre_sets2", }, + glib = { lib = GLIBNAME, "common_sets", "pcre_sets", "pcre_sets2", "glib_sets" }, + spencer = { lib = "rex_spencer", "common_sets", "posix_sets", "spencer_sets" }, + tre = { lib = "rex_tre", "common_sets", "posix_sets", "spencer_sets", --[["tre_sets"]] }, +} + +do + local verbose, really_verbose, tests, nerr = false, false, {}, 0 + local dir + + -- check arguments + for i = 1, select ("#", ...) do + local arg = select (i, ...) + --print(arg) + if arg:sub(1,1) == "-" then + if arg == "-v" then + verbose = true + elseif arg == "-V" then + verbose = true + really_verbose = true + elseif arg:sub(1,2) == "-d" then + dir = arg:sub(3) + end + else + if avail_tests[arg] then + tests[#tests+1] = avail_tests[arg] + else + error ("invalid argument: [" .. arg .. "]") + end + end + end + assert (#tests > 0, "no library specified") + -- give priority to libraries located in the specified directory + if dir and not isglobal then + dir = dir:gsub("[/\\]+$", "") + for _, ext in ipairs {"dll", "so", "dylib"} do + if package.cpath:match ("%?%." .. ext) then + local cpath = dir .. "/?." .. ext .. ";" + if package.cpath:sub(1, #cpath) ~= cpath then + package.cpath = cpath .. package.cpath + end + break + end + end + end + -- do tests + for _, test in ipairs (tests) do + package.loaded[test.lib] = nil -- to force-reload the tested library + for _, setfile in ipairs (test) do + nerr = nerr + test_library (test.lib, setfile, verbose, really_verbose) + end + end + print ("Total number of failures: " .. nerr) + + assert(nerr == 0, "Test failed!") +end + + + + +print("\n-----------------------------\n") + +print("All tests passed!\n\n") diff --git a/test/lua/luatest.lua b/test/lua/luatest.lua new file mode 100755 index 0000000000..617329c9c6 --- /dev/null +++ b/test/lua/luatest.lua @@ -0,0 +1,174 @@ +-- See Copyright Notice in the file LICENSE + +-- arrays: deep comparison +local function eq (t1, t2, lut) + if t1 == t2 then return true end + if type(t1) ~= "table" or type(t2) ~= "table" or #t1 ~= #t2 then + return false + end + + lut = lut or {} -- look-up table: are these 2 arrays already compared? + lut[t1] = lut[t1] or {} + if lut[t1][t2] then return true end + lut[t2] = lut[t2] or {} + lut[t1][t2], lut[t2][t1] = true, true + + for k,v in ipairs (t1) do + if not eq (t2[k], v, lut) then return false end -- recursion + end + return true +end + +-- a "nil GUID", to be used instead of nils in datasets +local NT = "b5f74fe5-46f4-483a-8321-e58ba2fa0e17" + +-- pack vararg in table, replacing nils with "NT" items +local function packNT (...) + local t = {} + for i=1, select ("#", ...) do + local v = select (i, ...) + t[i] = (v == nil) and NT or v + end + return t +end + +-- unpack table into vararg, replacing "NT" items with nils +local function unpackNT (t) + local len = #t + local function unpack_from (i) + local v = t[i] + if v == NT then v = nil end + if i == len then return v end + return v, unpack_from (i+1) + end + if len > 0 then return unpack_from (1) end +end + +-- print results (deep into arrays) +local function print_results (val, indent, lut) + indent = indent or "" + lut = lut or {} -- look-up table + local str = tostring (val) + if type (val) == "table" then + if lut[val] then + io.write (indent, str, "\n") + else + lut[val] = true + io.write (indent, str, "\n") + for i,v in ipairs (val) do + print_results (v, " " .. indent, lut) -- recursion + end + end + else + io.write (indent, val == NT and "nil" or str, "\n") + end +end + +-- returns: +-- 1) true, if success; false, if failure +-- 2) test results table or error_message +local function test_function (test, func) + local res + local t = packNT (pcall (func, unpackNT (test[1]))) + if t[1] then + table.remove (t, 1) + res = t + if alien then + local subject = test[1][1] + local buf = alien.buffer (#subject) + if #subject > 0 then + alien.memmove (buf:topointer (), subject, #subject) + end + test[1][1] = buf + local t = packNT (pcall (func, unpackNT (test[1]))) + if t[1] then + table.remove (t, 1) + res = t + else + print "alien test failed" + res = t[2] --> error_message + end + end + else + res = t[2] --> error_message + end + local how = (type (res) == type (test[2])) and + (type (res) == "string" or eq (res, test[2])) -- allow error messages to differ + return how, res +end + +-- returns: +-- 1) true, if success; false, if failure +-- 2) test results table or error_message +-- 3) test results table or error_message +local function test_method (test, constructor, name) + local res1, res2 + local subject = test[2][1] + local ok, r = pcall (constructor, unpackNT (test[1])) + if ok then + local t = packNT (pcall (r[name], r, unpackNT (test[2]))) + if t[1] then + table.remove (t, 1) + res1, res2 = t + else + res1, res2 = 2, t[2] --> 2, error_message + end + else + res1, res2 = 1, r --> 1, error_message + end + return eq (res1, test[3]), res1, res2 +end + +-- returns: a list of failed tests +local function test_set (set, lib, verbose) + local list = {} + + if type (set.Func) == "function" then + local func = set.Func + + for i,test in ipairs (set) do + if verbose then + io.write (" running function test "..i.."...") + io.flush () + end + local ok, res = test_function (test, func) + if not ok then + if verbose then io.stdout:write("failed!\n") end + table.insert (list, {i=i, test[2], res}) + elseif verbose then + io.write ("passed\n") + io.flush () + end + end + + elseif type (set.Method) == "string" then + for i,test in ipairs (set) do + if verbose then + io.write (" running method test "..i.."...") + io.flush () + end + local ok, res1, res2 = test_method (test, lib.new, set.Method) + if not ok then + if verbose then io.stdout:write("failed!\n") end + table.insert (list, {i=i, test[3], res1, res2}) + elseif verbose then + io.write ("passed\n") + io.flush () + end + end + + else + error ("neither set.Func nor set.Method is valid") + end + + return list +end + +return { + eq = eq, + NT = NT, + print_results = print_results, + test_function = test_function, + test_method = test_method, + test_set = test_set, +} diff --git a/test/lua/pat2pcre.lua b/test/lua/pat2pcre.lua new file mode 100755 index 0000000000..2d60a443b2 --- /dev/null +++ b/test/lua/pat2pcre.lua @@ -0,0 +1,87 @@ +-- See Copyright Notice in the file lrexlib.h + +-- Convert Lua regex pattern to its PCRE equivalent. + +local t_esc = { + a = "[:alpha:]", + A = "[:^alpha:]", + c = "[:cntrl:]", + C = "[:^cntrl:]", + d = "[:digit:]", + D = "[:^digit:]", + l = "[:lower:]", + L = "[:^lower:]", + p = "[:punct:]", + P = "[:^punct:]", + s = "[:space:]", + S = "[:^space:]", + u = "[:upper:]", + U = "[:^upper:]", + w = "[:alnum:]", + W = "[:^alnum:]", + x = "[:xdigit:]", + X = "[:^xdigit:]", + z = "\\x00", + Z = "\\x01-\\xFF", +} + +local function rep_normal (ch) + assert (ch ~= "b", "\"%b\" subpattern is not supported") + assert (ch ~= "0", "invalid capture index") + local v = t_esc[ch] + return v and ("[" .. v .. "]") or ("\\" .. ch) +end + +local function rep_charclass (ch) + return t_esc[ch] or ("\\" .. ch) +end + +function pat2pcre (s) + local ind = 0 + + local function getc () + ind = ind + 1 + return string.sub (s, ind, ind) + end + + local function getnum () + local num = string.match (s, "^\\(%d%d?%d?)", ind) + if num then + ind = ind + #num + return string.format ("\\x%02X", num) + end + end + + local out, state = "", "normal" + while ind < #s do + local ch = getc () + if state == "normal" then + if ch == "%" then + out = out .. rep_normal (getc ()) + elseif ch == "-" then + out = out .. "*?" + elseif ch == "." then + out = out .. "\\C" + elseif ch == "[" then + out = out .. ch + state = "charclass" + else + local num = getnum () + out = num and (out .. num) or (out .. ch) + end + elseif state == "charclass" then + if ch == "%" then + out = out .. rep_charclass (getc ()) + elseif ch == "]" then + out = out .. ch + state = "normal" + else + local num = getnum () + out = num and (out .. num) or (out .. ch) + end + end + end + return out +end + +return pat2pcre diff --git a/test/lua/pcre_sets.lua b/test/lua/pcre_sets.lua new file mode 100755 index 0000000000..d1e50390cc --- /dev/null +++ b/test/lua/pcre_sets.lua @@ -0,0 +1,179 @@ +-- See Copyright Notice in the file lrexlib.h + +local luatest = require "luatest" +local N = luatest.NT + +local function norm(a) return a==nil and N or a end + +local function fill (n, m) + local t = {} + for i = n, m, -1 do table.insert (t, i) end + return t +end + +local function set_named_subpatterns (lib, flg) + return { + Name = "Named Subpatterns", + Func = function (subj, methodname, patt, name1, name2) + local r = lib.new (patt) + local _,_,caps = r[methodname] (r, subj) + return norm(caps[name1]), norm(caps[name2]) + end, + --{} N.B. subject is always first element + { {"abcd", "tfind", "(?P<dog>.)b.(?P<cat>d)", "dog", "cat"}, {"a","d"} }, + { {"abcd", "exec", "(?P<dog>.)b.(?P<cat>d)", "dog", "cat"}, {"a","d"} }, + } +end + +local function set_f_find (lib, flg) + local cp1251 = + "ÀÁÂÃÄŨÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÜÛÚÝÞßàáâãäå¸æçèéêëìíîïðñòóôõö÷øùüûúýþÿ" + local loc = "Russian_Russia.1251" + return { + Name = "Function find", + Func = lib.find, + --{subj, patt, st,cf,ef,lo}, { results } + { {"abcd", ".+", 5}, { N } }, -- failing st + { {"abcd", ".*?"}, { 1,0 } }, -- non-greedy + { {"abc", "aBC", N,flg.CASELESS}, { 1,3 } }, -- cf + { {"abc", "aBC", N,"i" }, { 1,3 } }, -- cf + { {"abc", "bc", N,flg.ANCHORED}, { N } }, -- cf + { {"abc", "bc", N,N,flg.ANCHORED}, { N } }, -- ef +--{ {cp1251, "[[:upper:]]+", N,N,N, loc}, { 1,33} }, -- locale +--{ {cp1251, "[[:lower:]]+", N,N,N, loc}, {34,66} }, -- locale +} +end + +local function set_f_match (lib, flg) + return { + Name = "Function match", + Func = lib.match, + --{subj, patt, st,cf,ef,lo}, { results } + { {"abcd", ".+", 5}, { N }}, -- failing st + { {"abcd", ".*?"}, { "" }}, -- non-greedy + { {"abc", "aBC", N,flg.CASELESS}, {"abc" }}, -- cf + { {"abc", "aBC", N,"i" }, {"abc" }}, -- cf + { {"abc", "bc", N,flg.ANCHORED}, { N }}, -- cf + { {"abc", "bc", N,N,flg.ANCHORED}, { N }}, -- ef +} +end + +local function set_f_gmatch (lib, flg) + -- gmatch (s, p, [cf], [ef]) + local pCSV = "(^[^,]*)|,([^,]*)" + local F = false + local function test_gmatch (subj, patt) + local out, guard = {}, 10 + for a, b in lib.gmatch (subj, patt) do + table.insert (out, { norm(a), norm(b) }) + guard = guard - 1 + if guard == 0 then break end + end + return unpack (out) + end + return { + Name = "Function gmatch", + Func = test_gmatch, + --{ subj patt results } + { {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj + { {"", pCSV}, {{"",F}} }, + { {"12", pCSV}, {{"12",F}} }, + { {",", pCSV}, {{"", F},{F,""}} }, + { {"12,,45", pCSV}, {{"12",F},{F,""},{F,"45"}} }, + { {",,12,45,,ab,", pCSV}, {{"",F},{F,""},{F,"12"},{F,"45"},{F,""},{F,"ab"},{F,""}} }, + } +end + +local function set_f_split (lib, flg) + -- split (s, p, [cf], [ef]) + local function test_split (subj, patt) + local out, guard = {}, 10 + for a, b, c in lib.split (subj, patt) do + table.insert (out, { norm(a), norm(b), norm(c) }) + guard = guard - 1 + if guard == 0 then break end + end + return unpack (out) + end + return { + Name = "Function split", + Func = test_split, + --{ subj patt results } + { {"a,\0,c", ","}, {{"a",",",N},{"\0",",",N},{"c",N,N}, } },--nuls in subj + { {"ab", "$"}, {{"ab","",N}, {"",N,N}, } }, + { {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } }, + { {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N},{"",N,N}, } }, + { {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } }, + } +end + +local function set_m_exec (lib, flg) + return { + Name = "Method exec", + Method = "exec", +--{patt,cf,lo}, {subj,st,ef} { results } + { {".+"}, {"abcd",5}, { N } }, -- failing st + { {".*?"}, {"abcd"}, {1,0,{}} }, -- non-greedy + { {"aBC",flg.CASELESS}, {"abc"}, {1,3,{}} }, -- cf + { {"aBC","i" }, {"abc"}, {1,3,{}} }, -- cf + { {"bc",flg.ANCHORED}, {"abc"}, { N } }, -- cf + { {"bc"}, {"abc",N, flg.ANCHORED}, { N } }, -- ef +} +end + +local function set_m_tfind (lib, flg) + return { + Name = "Method tfind", + Method = "tfind", +--{patt,cf,lo}, {subj,st,ef} { results } + { {".+"}, {"abcd",5}, { N } }, -- failing st + { {".*?"}, {"abcd"}, {1,0,{}} }, -- non-greedy + { {"aBC",flg.CASELESS}, {"abc"}, {1,3,{}} }, -- cf + { {"aBC","i" }, {"abc"}, {1,3,{}} }, -- cf + { {"bc",flg.ANCHORED}, {"abc"}, { N } }, -- cf + { {"bc"}, {"abc",N, flg.ANCHORED}, { N } }, -- ef +} +end + +local function set_m_dfa_exec (lib, flg) + return { + Name = "Method dfa_exec", + Method = "dfa_exec", +--{patt,cf,lo}, {subj,st,ef,os,ws} { results } + { {".+"}, {"abcd"}, {1,{4,3,2,1},4} }, -- [none] + { {".+"}, {"abcd",2}, {2,{4,3,2}, 3} }, -- positive st + { {".+"}, {"abcd",-2}, {3,{4,3}, 2} }, -- negative st + { {".+"}, {"abcd",5}, {N } }, -- failing st + { {".*"}, {"abcd"}, {1,{4,3,2,1,0},5}}, -- [none] + { {".*?"}, {"abcd"}, {1,{4,3,2,1,0},5}}, -- non-greedy + { {"aBC",flg.CASELESS}, {"abc"}, {1,{3},1} }, -- cf + { {"aBC","i" }, {"abc"}, {1,{3},1} }, -- cf + { {"bc"}, {"abc"}, {2,{3},1} }, -- [none] + { {"bc",flg.ANCHORED}, {"abc"}, {N } }, -- cf + { {"bc"}, {"abc",N, flg.ANCHORED}, {N } }, -- ef + { { "(.)b.(d)"}, {"abcd"}, {1,{4},1} }, --[captures] + { {"abc"}, {"ab"}, {N } }, + { {"abc"}, {"ab",N,flg.PARTIAL}, {1,{2},flg.ERROR_PARTIAL} }, + { {".+"}, {string.rep("a",50),N,N,50,50}, {1, fill(50,26), 0}},-- small ovecsize +} +end + +return function (libname, isglobal) + local lib = isglobal and _G[libname] or require (libname) + local flags = lib.flags () + local sets = { + set_f_match (lib, flags), + set_f_find (lib, flags), + set_f_gmatch (lib, flags), + set_f_split (lib, flags), + set_m_exec (lib, flags), + set_m_tfind (lib, flags), + } + if flags.MAJOR >= 4 then + table.insert (sets, set_named_subpatterns (lib, flags)) + end + if flags.MAJOR >= 6 then + table.insert (sets, set_m_dfa_exec (lib, flags)) + end + return sets +end diff --git a/test/lua/pcre_sets2.lua b/test/lua/pcre_sets2.lua new file mode 100755 index 0000000000..c0c8d7a5b9 --- /dev/null +++ b/test/lua/pcre_sets2.lua @@ -0,0 +1,198 @@ +-- See Copyright Notice in the file LICENSE + +local pat2pcre = require "pat2pcre" + +local function get_gsub (lib) + return lib.gsub or + function (subj, pattern, repl, n) + return lib.new (pattern) : gsub (subj, repl, n) + end +end + +local function set_f_gsub1 (lib, flg) + local subj, pat = "abcdef", "[abef]+" + return { + Name = "Function gsub, set1", + Func = get_gsub (lib), + --{ s, p, f, n, res1, res2, res3 }, + { {"a\0c", ".", "#" }, {"###", 3, 3} }, -- subj contains nuls + } +end + +local function set_f_gsub4 (lib, flg) + local pCSV = "(^[^,]*)|,([^,]*)" + local fCSV = function (a,b) return "["..(a or b).."]" end + local set = { + Name = "Function gsub, set4", + Func = get_gsub (lib), + --{ s, p, f, n, res1, res2, res3 }, + { {"/* */ */", "%/%*(.*)%*%/", "#" }, {"#", 1, 1} }, + { {"a2c3", ".-", "#" }, {"#########", 9, 9} }, -- test .- + { {"/**/", "%/%*(.-)%*%/", "#" }, {"#", 1, 1} }, + { {"/* */ */", "%/%*(.-)%*%/", "#" }, {"# */", 1, 1} }, + { {"a2c3", "%d", "#" }, {"a#c#", 2, 2} }, -- test %d + { {"a2c3", "%D", "#" }, {"#2#3", 2, 2} }, -- test %D + { {"a \t\nb", "%s", "#" }, {"a###b", 3, 3} }, -- test %s + { {"a \t\nb", "%S", "#" }, {"# \t\n#", 2, 2} }, -- test %S + { {"abcd", "\\b", "%1"}, {"abcd", 2, 2} }, + { {"", pCSV,fCSV}, {"[]", 1, 1} }, + { {"123", pCSV,fCSV}, {"[123]", 1, 1} }, + { {",", pCSV,fCSV}, {"[][]", 2, 2} }, + { {"123,,456", pCSV,fCSV}, {"[123][][456]", 3, 3}}, + { {",,123,456,,abc,789,", pCSV,fCSV}, {"[][][123][456][][abc][789][]", 8, 8}}, + } + -- convert patterns: lua -> pcre + for _, test in ipairs (set) do + test[1][2] = pat2pcre (test[1][2]) + end + return set +end + +local function set_f_gsub7 (lib, flg) + local subj = "" + for i = 0, 255 do + subj = subj .. string.char (i) + end + + -- This set requires calling prepare_set before calling gsub_test + local set = { + Name = "Function gsub, set7", + Func = get_gsub (lib), + --{ s, p, f, n, }, + { {subj, "%a", "" }, }, + { {subj, "%A", "" }, }, + { {subj, "%c", "" }, }, + { {subj, "%C", "" }, }, + { {subj, "%l", "" }, }, + { {subj, "%L", "" }, }, + { {subj, "%p", "" }, }, + { {subj, "%P", "" }, }, + { {subj, "%u", "" }, }, + { {subj, "%U", "" }, }, + { {subj, "%w", "" }, }, + { {subj, "%W", "" }, }, + { {subj, "%x", "" }, }, + { {subj, "%X", "" }, }, + { {subj, "%z", "" }, }, + { {subj, "%Z", "" }, }, + + { {subj, "[%a]", "" }, }, + { {subj, "[%A]", "" }, }, + { {subj, "[%c]", "" }, }, + { {subj, "[%C]", "" }, }, + { {subj, "[%l]", "" }, }, + { {subj, "[%L]", "" }, }, + { {subj, "[%p]", "" }, }, + { {subj, "[%P]", "" }, }, + { {subj, "[%u]", "" }, }, + { {subj, "[%U]", "" }, }, + { {subj, "[%w]", "" }, }, + { {subj, "[%W]", "" }, }, + { {subj, "[%x]", "" }, }, + { {subj, "[%X]", "" }, }, + { {subj, "[%z]", "" }, }, + { {subj, "[%Z]", "" }, }, + + { {subj, "[%a_]", "" }, }, + { {subj, "[%A_]", "" }, }, + { {subj, "[%c_]", "" }, }, + { {subj, "[%C_]", "" }, }, + { {subj, "[%l_]", "" }, }, + { {subj, "[%L_]", "" }, }, + { {subj, "[%p_]", "" }, }, + { {subj, "[%P_]", "" }, }, + { {subj, "[%u_]", "" }, }, + { {subj, "[%U_]", "" }, }, + { {subj, "[%w_]", "" }, }, + { {subj, "[%W_]", "" }, }, + { {subj, "[%x_]", "" }, }, + { {subj, "[%X_]", "" }, }, + { {subj, "[%z_]", "" }, }, + { {subj, "[%Z_]", "" }, }, + + { {subj, "[%a%d]", "" }, }, + { {subj, "[%A%d]", "" }, }, + { {subj, "[%c%d]", "" }, }, + { {subj, "[%C%d]", "" }, }, + { {subj, "[%l%d]", "" }, }, + { {subj, "[%L%d]", "" }, }, + { {subj, "[%p%d]", "" }, }, + { {subj, "[%P%d]", "" }, }, + { {subj, "[%u%d]", "" }, }, + { {subj, "[%U%d]", "" }, }, + { {subj, "[%w%d]", "" }, }, + { {subj, "[%W%d]", "" }, }, + { {subj, "[%x%d]", "" }, }, + { {subj, "[%X%d]", "" }, }, + { {subj, "[%z%d]", "" }, }, + { {subj, "[%Z%d]", "" }, }, + + { {subj, "[^%a%d]", "" }, }, + { {subj, "[^%A%d]", "" }, }, + { {subj, "[^%c%d]", "" }, }, + { {subj, "[^%C%d]", "" }, }, + { {subj, "[^%l%d]", "" }, }, + { {subj, "[^%L%d]", "" }, }, + { {subj, "[^%p%d]", "" }, }, + { {subj, "[^%P%d]", "" }, }, + { {subj, "[^%u%d]", "" }, }, + { {subj, "[^%U%d]", "" }, }, + { {subj, "[^%w%d]", "" }, }, + { {subj, "[^%W%d]", "" }, }, + { {subj, "[^%x%d]", "" }, }, + { {subj, "[^%X%d]", "" }, }, + { {subj, "[^%z%d]", "" }, }, + { {subj, "[^%Z%d]", "" }, }, + + { {subj, "[^%a_]", "" }, }, + { {subj, "[^%A_]", "" }, }, + { {subj, "[^%c_]", "" }, }, + { {subj, "[^%C_]", "" }, }, + { {subj, "[^%l_]", "" }, }, + { {subj, "[^%L_]", "" }, }, + { {subj, "[^%p_]", "" }, }, + { {subj, "[^%P_]", "" }, }, + { {subj, "[^%u_]", "" }, }, + { {subj, "[^%U_]", "" }, }, + { {subj, "[^%w_]", "" }, }, + { {subj, "[^%W_]", "" }, }, + { {subj, "[^%x_]", "" }, }, + { {subj, "[^%X_]", "" }, }, + { {subj, "[^%z_]", "" }, }, + { {subj, "[^%Z_]", "" }, }, + + { {subj, "\100", "" }, }, + { {subj, "[\100]", "" }, }, + { {subj, "[^\100]", "" }, }, + { {subj, "[\100-\200]", "" }, }, + { {subj, "[^\100-\200]", "" }, }, + { {subj, "\100a", "" }, }, + { {subj, "[\100a]", "" }, }, + { {subj, "[^\100a]", "" }, }, + { {subj, "[\100-\200a]", "" }, }, + { {subj, "[^\100-\200a]", "" }, }, + } + -- fill in reference results + for _,v in ipairs(set) do + local r0, r1, r2 = pcall (string.gsub, unpack (v[1])) + v[2] = r0 and { r1, r2, r2 } or { r0, r1 } + end + -- convert patterns: lua -> pcre + for _, test in ipairs (set) do + test[1][2] = pat2pcre (test[1][2]) + end + return set +end + +return function (libname, isglobal) + local lib = isglobal and _G[libname] or require (libname) + local flags = lib.flags and lib.flags () + local sets = { + set_f_gsub1 (lib, flags), + set_f_gsub4 (lib, flags), + } + if flags.MAJOR*100 + flags.MINOR > 405 then + table.insert (sets, set_f_gsub7 (lib, flags)) + end + return sets +end diff --git a/test/suite-wslua.sh b/test/suite-wslua.sh index 20b97f96ab..f734f41aea 100755 --- a/test/suite-wslua.sh +++ b/test/suite-wslua.sh @@ -196,7 +196,7 @@ wslua_step_args_test() { test_step_ok } -unittests_step_globals_test() { +wslua_step_globals_test() { if [ $HAVE_LUA -ne 0 ]; then test_step_skipped return @@ -218,6 +218,22 @@ unittests_step_globals_test() { test_step_ok } +wslua_step_gregex_test() { + if [ $HAVE_LUA -ne 0 ]; then + test_step_skipped + return + fi + + # Tshark catches lua script failures, so we have to parse the output. + $TSHARK -r $CAPTURE_DIR/empty.pcap -X lua_script:$TESTS_DIR/lua/gregex.lua -X lua_script1:-d$TESTS_DIR/lua/ -X lua_script1:glib -X lua_script1:-V > testout.txt 2>&1 + if grep -q "All tests passed!" testout.txt; then + test_step_ok + else + cat testout.txt + test_step_failed "didn't find pass marker" + fi +} + wslua_step_struct_test() { if [ $HAVE_LUA -ne 0 ]; then test_step_skipped @@ -244,7 +260,8 @@ wslua_suite() { test_step_set_post wslua_cleanup_step test_step_add "wslua dissector" wslua_step_dissector_test test_step_add "wslua field/fieldinfo" wslua_step_field_test - test_step_add "wslua globals" unittests_step_globals_test + test_step_add "wslua globals" wslua_step_globals_test + test_step_add "wslua gregex" wslua_step_gregex_test test_step_add "wslua int64" wslua_step_int64_test test_step_add "wslua listener" wslua_step_listener_test test_step_add "wslua nstime" wslua_step_nstime_test |