diff --git a/doc/format-spec.md b/doc/format-spec.md
index ea2ba6e..c2c5607 100644
--- a/doc/format-spec.md
+++ b/doc/format-spec.md
@@ -125,6 +125,7 @@ The following commands are available:
Match the extended regular expression 'str', which must be of
string type. Matching is performed greedily and '.' (a dot)
matches a newline, use [^\n] to work around this.
+ More details about the supported regex syntax can be found [here](regex-spec.md).
Optionally assign the matched string to variable 'name'. Note that
since some string characters have to be escaped already, you might
need to double escape them in a regex string. For example, to
diff --git a/doc/regex-spec.md b/doc/regex-spec.md
new file mode 100644
index 0000000..a4707cf
--- /dev/null
+++ b/doc/regex-spec.md
@@ -0,0 +1,63 @@
+Checktestdata Regex specification
+=================================
+
+A **reg**ular **ex**pression (or regex) can be used to match strings.
+Formally, it describes a set of strings and a string is matched if it is contained in the set.
+
+Regular expressions can contain both literal and special characters.
+Most literal characters, like `A`, `a`, or `0`, are the simplest regular expressions and they simply match themselves.
+Additionally, more complex regular expressions can be expressed by concatenating simpler regular expressions.
+If *A* and *B* are both regular expressions, then *AB* is also a regular expression.
+In general, if a string *x* matches *A* and another string *y* matches *B*, then the string *xy* matches *AB*.
+
+Besides the literal characters, there are also the following special characters: `'('`, `')'`, `'{'`, `'}'`, `'['`, `']'`, `'*'`, `'+'`, `'?'`, `'|'`, `'\'`, `'^'`, `'.'`, `'-'`.
+Their meaning is as follows:
+
+* `.`: this matches any character, including newlines. If you need to match anything except the newline character use `[^\n]` instead.
+* `[]`: indicates a set of characters.
+Inside a set definition:
+ * Literal characters can be listed and all of them will be matched, i.e., `[abc]` will match `'a'`, `'b'` as well as `'c'` but not `'abc'`.
+ * Ranges can be specified with `-`, for example `[a-z]` will match any lowercase ASCII letter and `[0-9]` will match any digit.
+ If `-` is escaped (e.g. `[a\-z]`) or if the character preceding it belongs to another range (e.g. `[a-a-z]`), or if it is the first or last character (e.g. `[-a]` or `[a-]`), it will match a literal `'-'`.
+ It is an error if the first character of the range has a higher code point than the last (e.g., `[z-a]`).
+ * The complement of a character set is formed if the first character of the set is `^`.
+ For example `[^a]` will match anything except `'a'`.
+ If `^` is escaped (e.g. `[\^]`) or if it is not the first character (e.g. `[a^]`) it will match a literal `'^'`.
+ * `\` can be used to escape a special characters.
+ However, most special characters do not need to be escaped.
+ Only `'['` and `']'` must be escaped and `'^'` or `'-'` might need to be escaped depending on the position.
+ For example both `[\-]` and `[-]` will match a literal `'-'`.
+ If `\` is not followed by a special characters it matches a literal `'\'`.
+ * It is an error if the character set does not specify any characters (e.g. `[]` or `[^]`).
+* `{m,n}`: causes the resulting regular expression to match from `m` to `n` repetitions of the preceding regular expression.
+Matching is done greedily, i.e., as many repetitions as possible are matched.
+Omitting *m* specifies a lower bound of zero, and omitting *n* specifies an infinite upper bound.
+It is an error if *m* is larger than *n*.
+Both *m* and *n* must be an integer without sign and without leading zeros.
+It is an error if the preceding regular expression is empty or ends with another repetition (e.g. `{1,2}{1,2}`). If you want to do that use `()` (e.g. `({1,2}){1,2}`).
+* `{m}`: is a shorthand for `{m,m}`.
+It is an error to omit `m`.
+* `*`: is a shorthand for `{0,}`.
+* `+`: is a shorthand for `{1,}`.
+* `?`: is a shorthand for `{0,1}`.
+* `|`: can be used to form the union of two regular expressions.
+If *A* and *B* are both regular expressions, then *A|B* is also a regular expression.
+In general, if a string *x* matches *A* or it matches *B*, then it also matches *A|B*.
+Matching is done in *leftmost-first* fashion.
+This means that any match of *A* is preferred over all matches for *B*.
+This means that the checktestdata command `REGEX("p|ps")` will only extract `p` even if the input is `ps`.
+* `(...)`: if *A* is a regular expression then *(A)* is also a regular expression.
+* `\`: escapes the subsequent special character.
+If `\` is not followed by a special character it will match a literal `\` (e.g. `\d` will match `'\d'`).
+Note that checktestdata strings also use `\` to escape characters.
+Therefore, `REGEX("\\*")` becomes the regular expression `\*` and matches a literal `'*'`, not a variable amount of `\`.
+
+## Notes
+
+The regular expression syntax and behaviour is carefully chosen to match a common subset of many modern regular expression definitions and implementations like Perl, Python, JavaScript, Ruby, PHP, Java, C++, Rust, Go, ...
+Advanced features like quantifiers, groups, lookahead, lookbehind, etc. are not supported.
+Shorthands like `\d` or `[:digit:]` are also not supported, use `[0-9]` instead.
+
+> [!WARNING]
+> Earlier versions of checktestdata used POSIX-like regular expressions with *leftmost-longest* matching and support for `[:digit:]`.
+> This is no longer supported and matching is done *leftmost-first* instead.
diff --git a/libchecktestdata.cc b/libchecktestdata.cc
index 4e28c24..82f6fe8 100644
--- a/libchecktestdata.cc
+++ b/libchecktestdata.cc
@@ -41,8 +41,8 @@ class doesnt_match_exception {};
class eof_found_exception {};
class generate_exception {};
-const int display_before_error = 65;
-const int display_after_error = 50;
+constexpr int display_before_error = 65;
+constexpr int display_after_error = 50;
size_t prognr;
const command *currcmd;
@@ -63,8 +63,8 @@ vector program;
// This stores array-type variables like x[i,j] as string "x" and
// vector of the indices. Plain variables are stored using an index
// vector of zero length.
-typedef map,value_t> indexmap;
-typedef map>> valuemap;
+using indexmap = map,value_t>;
+using valuemap = map>>;
map variable, preset;
map rev_variable, rev_preset;
@@ -214,6 +214,190 @@ long string2int(const string &s)
return res;
}
+// Cache for compiled regular expressions
+map regex_cache;
+
+// Restrict/adjust c++ standard regex behaviour:
+// '.' matches everything, including newline
+// '[...]' character set (non empty). '^' form the complement of the charset
+// '{m,n}' repeat m to n times (m and n are optional)
+// '{m}' repeat exactly m times (m is mandatory)
+// '*', '+', '?' shorthand repeat notation
+// '|' union of two regex
+// '(...)' parenthesis
+// '\' escape special characters
+class RegexParser {
+ static constexpr int STATE_EMPTY = 1;
+ static constexpr int STATE_NONEMPTY = 2;
+ static constexpr int STATE_REPEAT = 3;
+
+ static constexpr char ANY_CHAR = '\0';
+ static constexpr string_view SPECIAL = "(){}[]*+?|\\^.-";
+ static constexpr string_view UNSAFE = "(){}[]*+?|\\^.-$~";
+ static constexpr string_view CHARSET_UNSAFE = "[]|\\^-&~";
+
+ static bool is_special(char c) {
+ return SPECIAL.find(&c, 0, 1) != string_view::npos;
+ }
+
+ static bool is_charset_unsafe(char c) {
+ return CHARSET_UNSAFE.find(&c, 0, 1) != string_view::npos;
+ }
+
+ static bool is_unsafe(char c) {
+ return UNSAFE.find(&c, 0, 1) != string_view::npos;
+ }
+
+ string raw;
+ string_view todo;
+ string out;
+
+ string pop() {
+ size_t len = todo.size() >= 2 && todo[0] == '\\' && is_special(todo[1]) ? 2 : 1;
+ string token = string(todo.substr(0, len));
+ todo.remove_prefix(token.size());
+ return token;
+ }
+
+ void consume(char expected = ANY_CHAR, bool literal = false) {
+ string token = pop();
+ if ( expected!=ANY_CHAR && !token.empty() && token[0]!=expected ) {
+ error("invalid regex: unexpected char");
+ }
+ assert(!token.empty());
+ if ( literal && token.size()==1 && is_unsafe(token[0]) ) out += '\\';
+ if ( !literal && token=="." ) token = "[\\s\\S]";
+ out += token;
+ }
+
+ void parse_charset() {
+ consume('[');
+ if ( !todo.empty() && todo[0]=='^' ) consume();
+ vector tmp;
+ auto flush_tmp = [&](){
+ for ( string& token : tmp ) {
+ if ( token.size()==1 && is_charset_unsafe(token[0]) ) out += '\\';
+ out += token;
+ }
+ tmp.clear();
+ };
+ bool empty = true;
+ while ( !todo.empty() && todo[0]!=']' ) {
+ if ( todo[0]=='[' ) {
+ error("invalid regex: nested charset?");
+ }
+ tmp.push_back(pop());
+ empty = false;
+ if ( tmp.size() >= 3 && tmp[tmp.size()-2]=="-" ) {
+ string lhs = tmp[tmp.size()-3];
+ string rhs = tmp[tmp.size()-1];
+ if ( lhs.back()>rhs.back() ) {
+ error("invalid regex: invalid character range");
+ }
+ tmp.pop_back();
+ tmp.pop_back();
+ flush_tmp();
+ out += '-';
+ tmp.push_back(rhs);
+ flush_tmp();
+ }
+ }
+ flush_tmp();
+ if ( empty ) error("empty character set");
+ consume(']');
+ }
+
+ int parse_non_negative_int() {
+ string digits = "";
+ while ( !todo.empty() && todo[0]>='0' && todo[0]<='9' ) {
+ out += todo[0];
+ digits += todo[0];
+ todo.remove_prefix(1);
+ }
+ if ( digits.size()>1 && digits[0]=='0' ) {
+ error("invalid regex: range bound has leading zeros");
+ }
+ return digits.empty() ? -1 : string2int(digits);
+ }
+
+ void parse_repeat() {
+ consume('{');
+ int lower = parse_non_negative_int();
+ if ( !todo.empty() && todo[0]==',' ) {
+ if ( lower < 0 ) out += '0';
+ consume();
+ int upper = parse_non_negative_int();
+ if ( lower>=0 && upper>=0 && lower>upper ) {
+ error("invalid regex: invalid range");
+ }
+ } else if ( lower<0 ) {
+ error("invalid regex: missing range length");
+ }
+ consume('}');
+ }
+
+ void parse() {
+ int state = STATE_EMPTY;
+ auto transition = [&](int next) {
+ if ( next==STATE_REPEAT ) {
+ if ( state==STATE_EMPTY ) {
+ error("invalid regex: nothing to repeat");
+ } else if ( state==STATE_REPEAT ) {
+ error("invalid regex: multiple repeats");
+ }
+ }
+ state = next;
+ };
+ while ( !todo.empty() && todo[0]!=')' ) {
+ switch ( todo[0] ) {
+ case '[':
+ transition(STATE_NONEMPTY);
+ parse_charset();
+ break;
+ case '(':
+ transition(STATE_NONEMPTY);
+ consume();
+ parse();
+ consume(')');
+ break;
+ case '{':
+ transition(STATE_REPEAT);
+ parse_repeat();
+ break;
+ case '*':
+ case '+':
+ case '?':
+ transition(STATE_REPEAT);
+ consume();
+ break;
+ case '|':
+ transition(STATE_EMPTY);
+ consume();
+ break;
+ case '.':
+ transition(STATE_NONEMPTY);
+ consume();
+ break;
+ default:
+ transition(STATE_NONEMPTY);
+ consume(ANY_CHAR, true);
+ }
+ }
+ }
+
+public:
+ RegexParser(const string& raw_) : raw(raw_), todo(raw) {}
+
+ regex compile() {
+ if ( !todo.empty() ) parse();
+ if ( !todo.empty() ) {
+ assert(todo[0]==')');
+ error("invalid regex: unmatched parenthesis");
+ }
+ return regex(out, regex::optimize | regex::nosubs);
+ }
+};
+
// forward declarations
value_t eval(const expr&);
bigint eval_as_int(const expr& e);
@@ -322,11 +506,11 @@ value_t value(const expr& x)
template
struct arith_result {
- typedef typename conditional<
+ using type = typename conditional<
is_same::value && is_same::value,
bigint,
mpf_class
- >::type type;
+ >::type;
};
template struct arith_compatible {
@@ -352,7 +536,7 @@ struct arithmetic_##name : public boost::static_visitor {\
}\
template::value,int>::type = 0,\
class C = typename arith_result::type>\
- value_t operator()(const A& a, const B& b)const {\
+ value_t operator()(const A& a, const B& b)const {\
return value_t(C(a op b));\
}\
};\
@@ -364,12 +548,12 @@ value_t operator op(const value_t &x, const value_t &y) \
#define DECL_VALUE_CMPOP(op,name) \
struct arithmetic_##name : public boost::static_visitor {\
template::value,int>::type = 0>\
- bool operator()(const A& a, const B& b)const {\
+ bool operator()(const A& a, const B& b)const {\
cerr << "cannot compute " << a << " " #op " " << b << endl; \
exit(exit_failure);\
}\
template::value,int>::type = 0>\
- bool operator()(const A& a, const B& b)const {\
+ bool operator()(const A& a, const B& b)const {\
return a op b;\
}\
};\
@@ -952,7 +1136,7 @@ void gentoken(command cmd, ostream &datastream)
else if ( cmd.name()=="REGEX" ) {
string regexstr = eval(cmd.args[0]).getstr();
-// regex e1(regex, regex::extended); // this is only to check the expression
+// RegexParser(regexstr).compile(); // this is only to check the expression
string str = genregex(regexstr);
datastream << str;
if ( cmd.nargs()>=2 ) setvar(cmd.args[1],value_t(str));
@@ -1114,7 +1298,11 @@ void checktoken(const command& cmd)
else if ( cmd.name()=="REGEX" ) {
string str = eval(cmd.args[0]).getstr();
- regex regexstr(str,regex::extended|regex::nosubs|regex::optimize);
+ auto cache_it = regex_cache.find(str);
+ if ( cache_it == regex_cache.end() ) {
+ cache_it = regex_cache.emplace(str, RegexParser(str).compile()).first;
+ }
+ regex regexstr = cache_it->second;
smatch res;
string matchstr;
diff --git a/libchecktestdata.hpp b/libchecktestdata.hpp
index bd1ed25..cf27dc9 100644
--- a/libchecktestdata.hpp
+++ b/libchecktestdata.hpp
@@ -9,13 +9,13 @@
namespace checktestdata {
-const int exit_failure = 2;
+constexpr int exit_failure = 2;
-const int opt_whitespace_ok = 1; // ignore additional whitespace
-const int opt_quiet = 2; // quiet execution: only return status
-const int opt_debugging = 4; // print additional debugging statements
+constexpr int opt_whitespace_ok = 1; // ignore additional whitespace
+constexpr int opt_quiet = 2; // quiet execution: only return status
+constexpr int opt_debugging = 4; // print additional debugging statements
-const int float_precision = 15; // output precision (digits) of floats
+constexpr int float_precision = 15; // output precision (digits) of floats
void init_checktestdata(std::istream &progstream, int opt_mask = 0, long seed = -1);
/* Initialize libchecktestdata by loading syntax from progstream and
diff --git a/tests/test_23_prog.in b/tests/test_23_prog.in
index 9da11ef..35ebc07 100644
--- a/tests/test_23_prog.in
+++ b/tests/test_23_prog.in
@@ -1,3 +1,3 @@
SET(foo="bar.*")
STRING(foo) NEWLINE
-REGEX(foo) # Note that '.' also matches newlines and ERE is greedy.
+REGEX(foo) # Note that '.' also matches newlines and is greedy.
diff --git a/tests/test_regex1_data.in b/tests/test_regex1_data.in
new file mode 100644
index 0000000..1f67c6a
--- /dev/null
+++ b/tests/test_regex1_data.in
@@ -0,0 +1 @@
+a afJ7bayb
diff --git a/tests/test_regex1_prog.in b/tests/test_regex1_prog.in
new file mode 100644
index 0000000..f32f169
--- /dev/null
+++ b/tests/test_regex1_prog.in
@@ -0,0 +1,12 @@
+# IGNORE GENERATE TESTING
+REGEX("a a") # contains space
+REGEX("[e-h]") # character class
+REGEX("[I-M]") # character class
+REGEX("[5-8]") # character class
+REGEX("[^a]") # character class
+STRING("a")
+REGEX("x?") # optional
+REGEX("y?") # optional
+REGEX("z?") # optional
+STRING("b")
+REGEX(".+") # any including newline
diff --git a/tests/test_regex2_data.err1 b/tests/test_regex2_data.err1
new file mode 100644
index 0000000..ace2750
--- /dev/null
+++ b/tests/test_regex2_data.err1
@@ -0,0 +1,2 @@
+1
+"[]"
diff --git a/tests/test_regex2_data.err10 b/tests/test_regex2_data.err10
new file mode 100644
index 0000000..4be5494
--- /dev/null
+++ b/tests/test_regex2_data.err10
@@ -0,0 +1,2 @@
+1
+"*" *
diff --git a/tests/test_regex2_data.err11 b/tests/test_regex2_data.err11
new file mode 100644
index 0000000..bcfbc97
--- /dev/null
+++ b/tests/test_regex2_data.err11
@@ -0,0 +1,2 @@
+1
+"+" +
diff --git a/tests/test_regex2_data.err12 b/tests/test_regex2_data.err12
new file mode 100644
index 0000000..5e6f61f
--- /dev/null
+++ b/tests/test_regex2_data.err12
@@ -0,0 +1,2 @@
+1
+"?" ?
diff --git a/tests/test_regex2_data.err13 b/tests/test_regex2_data.err13
new file mode 100644
index 0000000..acda165
--- /dev/null
+++ b/tests/test_regex2_data.err13
@@ -0,0 +1,2 @@
+1
+"(" (
diff --git a/tests/test_regex2_data.err14 b/tests/test_regex2_data.err14
new file mode 100644
index 0000000..5cb72f3
--- /dev/null
+++ b/tests/test_regex2_data.err14
@@ -0,0 +1,2 @@
+1
+"a{+1,2}" a
diff --git a/tests/test_regex2_data.err15 b/tests/test_regex2_data.err15
new file mode 100644
index 0000000..14d6939
--- /dev/null
+++ b/tests/test_regex2_data.err15
@@ -0,0 +1,2 @@
+1
+"[[]" [
diff --git a/tests/test_regex2_data.err16 b/tests/test_regex2_data.err16
new file mode 100644
index 0000000..809acf2
--- /dev/null
+++ b/tests/test_regex2_data.err16
@@ -0,0 +1,2 @@
+1
+"[]]" ]
diff --git a/tests/test_regex2_data.err17 b/tests/test_regex2_data.err17
new file mode 100644
index 0000000..c8f309a
--- /dev/null
+++ b/tests/test_regex2_data.err17
@@ -0,0 +1,2 @@
+1
+"a{2}?" aa
diff --git a/tests/test_regex2_data.err18 b/tests/test_regex2_data.err18
new file mode 100644
index 0000000..94bb12e
--- /dev/null
+++ b/tests/test_regex2_data.err18
@@ -0,0 +1,2 @@
+1
+"a{2}{1,3}" aa
diff --git a/tests/test_regex2_data.err19 b/tests/test_regex2_data.err19
new file mode 100644
index 0000000..c5505e3
--- /dev/null
+++ b/tests/test_regex2_data.err19
@@ -0,0 +1,2 @@
+1
+"a{2}{2}" aaaa
diff --git a/tests/test_regex2_data.err2 b/tests/test_regex2_data.err2
new file mode 100644
index 0000000..8e78fb5
--- /dev/null
+++ b/tests/test_regex2_data.err2
@@ -0,0 +1,2 @@
+1
+"\d" 5
diff --git a/tests/test_regex2_data.err20 b/tests/test_regex2_data.err20
new file mode 100644
index 0000000..39d246d
--- /dev/null
+++ b/tests/test_regex2_data.err20
@@ -0,0 +1,2 @@
+1
+"a{2}*" aa
diff --git a/tests/test_regex2_data.err21 b/tests/test_regex2_data.err21
new file mode 100644
index 0000000..123ca3f
--- /dev/null
+++ b/tests/test_regex2_data.err21
@@ -0,0 +1,2 @@
+1
+"a{2}+" aa
diff --git a/tests/test_regex2_data.err22 b/tests/test_regex2_data.err22
new file mode 100644
index 0000000..6fea297
--- /dev/null
+++ b/tests/test_regex2_data.err22
@@ -0,0 +1,2 @@
+1
+"[a{1,2}]" aa
diff --git a/tests/test_regex2_data.err3 b/tests/test_regex2_data.err3
new file mode 100644
index 0000000..9a47840
--- /dev/null
+++ b/tests/test_regex2_data.err3
@@ -0,0 +1,2 @@
+1
+"[c-a]" b
diff --git a/tests/test_regex2_data.err4 b/tests/test_regex2_data.err4
new file mode 100644
index 0000000..1539b46
--- /dev/null
+++ b/tests/test_regex2_data.err4
@@ -0,0 +1,2 @@
+1
+"[^]" x
diff --git a/tests/test_regex2_data.err5 b/tests/test_regex2_data.err5
new file mode 100644
index 0000000..5319085
--- /dev/null
+++ b/tests/test_regex2_data.err5
@@ -0,0 +1,2 @@
+1
+"a{}"
diff --git a/tests/test_regex2_data.err6 b/tests/test_regex2_data.err6
new file mode 100644
index 0000000..c0f7f77
--- /dev/null
+++ b/tests/test_regex2_data.err6
@@ -0,0 +1,2 @@
+1
+"a{}" a
diff --git a/tests/test_regex2_data.err7 b/tests/test_regex2_data.err7
new file mode 100644
index 0000000..99ec7fc
--- /dev/null
+++ b/tests/test_regex2_data.err7
@@ -0,0 +1,2 @@
+1
+"a{-1,2}" a
diff --git a/tests/test_regex2_data.err8 b/tests/test_regex2_data.err8
new file mode 100644
index 0000000..16b2876
--- /dev/null
+++ b/tests/test_regex2_data.err8
@@ -0,0 +1,2 @@
+1
+"a{00,2}" a
diff --git a/tests/test_regex2_data.err9 b/tests/test_regex2_data.err9
new file mode 100644
index 0000000..9edb50a
--- /dev/null
+++ b/tests/test_regex2_data.err9
@@ -0,0 +1,2 @@
+1
+"^a*$" aaaa
diff --git a/tests/test_regex2_data.in b/tests/test_regex2_data.in
new file mode 100644
index 0000000..5704073
--- /dev/null
+++ b/tests/test_regex2_data.in
@@ -0,0 +1,47 @@
+45
+"1" 1
+"a" a
+""
+"."
+
+"[a-z]" x
+"[a-z]*"
+"[a-z]*" abcdefghxyz
+"[*+?]*" +?*
+"[-]" -
+"[--]" -
+"[---]" -
+"[-a-c-]" b
+"[^a]{9}" +-?*][|/\
+"[\]]" ]
+"[\^]" ^
+"[ ]"
+"[a^b]" ^
+"[a^b]" b
+"[a\-z]" -
+"a{3}" aaa
+"a{3,4}" aaa
+"a{3,4}" aaaa
+"a{,2}"
+"a{1,}" aaaaaaa
+"a{,}" aaa
+"a*"
+"a*" a
+"a*" aaaaaaaaaaaaaaaaaaaa
+"a+" a
+"a+" aaaaaaa
+"a?"
+"a?" a
+"a|b" a
+"a|b" b
+"a|b|c" c
+"\d" \d
+"[:digit:]" d
+"[\[:digits:\]]" i
+"()?"
+"(a{2}){2}" aaaa
+"a{3,4}a{3}b" aaaaaaab
+"^abc$" ^abc$
+"\?\*\+\]\[\)\(\}\{\|\.\-\^\\" ?*+][)(}{|.-^\
+"\a\&\$\,\" \a\&\$\,\
+"[a{1,2}]" ,
diff --git a/tests/test_regex2_data.in2 b/tests/test_regex2_data.in2
new file mode 100644
index 0000000..8ebfd75
--- /dev/null
+++ b/tests/test_regex2_data.in2
@@ -0,0 +1,4 @@
+3
+"[abc-]" -
+"[a-bd-e]+" bd
+"[a-cd-f]+" be
diff --git a/tests/test_regex2_prog.in b/tests/test_regex2_prog.in
new file mode 100644
index 0000000..d6b65a1
--- /dev/null
+++ b/tests/test_regex2_prog.in
@@ -0,0 +1,9 @@
+# IGNORE GENERATE TESTING
+INT(0, 100, n) NEWLINE
+REP(n)
+ STRING("\"") REGEX("[^\"]*", regex) STRING("\"")
+ SPACE
+ REGEX(regex)
+ NEWLINE
+END
+EOF
diff --git a/tests/test_regex3_data.in b/tests/test_regex3_data.in
new file mode 100644
index 0000000..5a5134c
--- /dev/null
+++ b/tests/test_regex3_data.in
@@ -0,0 +1 @@
+ps
diff --git a/tests/test_regex3_prog.err b/tests/test_regex3_prog.err
new file mode 100644
index 0000000..1643cdf
--- /dev/null
+++ b/tests/test_regex3_prog.err
@@ -0,0 +1 @@
+REGEX("ps|p") STRING("s") NEWLINE
diff --git a/tests/test_regex3_prog.in b/tests/test_regex3_prog.in
new file mode 100644
index 0000000..4441fed
--- /dev/null
+++ b/tests/test_regex3_prog.in
@@ -0,0 +1,2 @@
+# IGNORE GENERATE TESTING
+REGEX("p|ps") STRING("s") NEWLINE
diff --git a/tests/test_regex4_data.in b/tests/test_regex4_data.in
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_regex4_data.in2 b/tests/test_regex4_data.in2
new file mode 100644
index 0000000..4d6d8c8
--- /dev/null
+++ b/tests/test_regex4_data.in2
@@ -0,0 +1,6 @@
+
+ @#ä
+
+ a
+ 90ß
+
\ No newline at end of file
diff --git a/tests/test_regex4_prog.in b/tests/test_regex4_prog.in
new file mode 100644
index 0000000..4db0ace
--- /dev/null
+++ b/tests/test_regex4_prog.in
@@ -0,0 +1,2 @@
+# IGNORE GENERATE TESTING
+REGEX(".*") EOF