From 8847029b0c8fd8ead79d3c294f2fdc1bc641aa6f Mon Sep 17 00:00:00 2001
From: Stefanos Chaliasos <stefanos@chaliasos.com>
Date: Tue, 4 Mar 2025 13:15:20 +0200
Subject: [PATCH 01/12] Use automate-lib to implement DFA related functions

Implement functions to:
* transform a regex to a minimized DFA
* check if a dfa has multiple accepting states
---
 pyproject.toml            |  1 +
 src/zkregex_fuzzer/dfa.py | 42 +++++++++++++++++++++++++++++++++++++++
 tests/test_dfa.py         | 38 +++++++++++++++++++++++++++++++++++
 3 files changed, 81 insertions(+)
 create mode 100644 src/zkregex_fuzzer/dfa.py
 create mode 100644 tests/test_dfa.py

diff --git a/pyproject.toml b/pyproject.toml
index bad7dee..24a5a95 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,7 @@ dependencies = [
   "rstr",
   "exrex",
   "joblib",
+  "automata-lib",
 ]
 
 [project.optional-dependencies]
diff --git a/src/zkregex_fuzzer/dfa.py b/src/zkregex_fuzzer/dfa.py
new file mode 100644
index 0000000..2fd01b9
--- /dev/null
+++ b/src/zkregex_fuzzer/dfa.py
@@ -0,0 +1,42 @@
+"""
+dfa
+
+A number of functions for working with DFAs.
+"""
+
+from automata.fa.nfa import NFA
+from automata.fa.dfa import DFA
+
+def regex_to_dfa(regex: str) -> DFA:
+    """
+    Convert a regex to a DFA.
+    """
+    try:
+        nfa = NFA.from_regex(regex)
+    except Exception as e:
+        raise ValueError(f"Failed to parse '{regex}' into an automaton: {e}")
+    try:
+        return DFA.from_nfa(nfa, minify=True)
+    except Exception as e:
+        raise ValueError(f"Failed to convert NFA to DFA: {e}")
+
+def has_multiple_accepting_states_regex(regex: str) -> bool:
+    """
+    Returns True if converting the given regex to a DFA yields
+    multiple accepting (final) states. Returns False otherwise.
+
+    NOTE:
+      - Only handles a subset of regex syntax recognized by automata-lib.
+      - For advanced Python regex features, a custom NFA builder is needed.
+    """
+    dfa = regex_to_dfa(regex)
+    num_final_states = len(dfa.final_states)
+
+    return num_final_states > 1
+
+def has_multiple_accepting_states_dfa(dfa: DFA) -> bool:
+    """
+    Returns True if the given DFA has multiple accepting (final) states.
+    Returns False otherwise.
+    """
+    return len(dfa.final_states) > 1
diff --git a/tests/test_dfa.py b/tests/test_dfa.py
new file mode 100644
index 0000000..9ff7603
--- /dev/null
+++ b/tests/test_dfa.py
@@ -0,0 +1,38 @@
+from zkregex_fuzzer.dfa import has_multiple_accepting_states_regex
+
+
+def test_has_multiple_accepting_states_regex_without_multiple():
+    regex_without_multiple_accepting_states = [
+        r"(a|b)*",
+        r"abc",
+        r"(abc|def|ghi)",
+        r"(abc)*",
+        r"(hello)",
+        r"(ab)*",
+        r"(a|b|c)*",
+        r"((a|b|c)*abc)",
+        r"[a-zA-Z]+",
+        r"[0-9]+",
+        r"(abc|abcd|abcde)f",
+        r"(hello|helloo|hellooo)(foo|foob|fooba)?bar",
+        r"(foo|foob|fooba)?bar",
+        r"(abc|def)(gh|jk)(lm|nop)",
+    ]
+
+    for regex in regex_without_multiple_accepting_states:
+        assert not has_multiple_accepting_states_regex(regex)
+
+def test_has_multiple_accepting_states_regex_with_multiple():
+    regex_with_multiple_accepting_states = [
+        r"(ab|aba)",
+        r"(ab|aba)*",
+        r"(hello|hell)",
+        r"b(aa|aaa)",
+        r"(cat|cats)",
+        r"(xy|xyx)",
+        r"(a|ab|abc)",
+        r"(1|12)",
+    ]
+
+    for regex in regex_with_multiple_accepting_states:
+        assert has_multiple_accepting_states_regex(regex)

From 34f2f59b20c2b303487847cef68b18fb3761a04c Mon Sep 17 00:00:00 2001
From: Stefanos Chaliasos <stefanos@chaliasos.com>
Date: Tue, 4 Mar 2025 14:40:05 +0200
Subject: [PATCH 02/12] Implement dfa_to_regex and
 transform_dfa_to_single_accepting_state

---
 src/zkregex_fuzzer/dfa.py | 188 +++++++++++++++++++++++++++++++++++++-
 tests/test_dfa.py         |  53 +++++++++--
 2 files changed, 231 insertions(+), 10 deletions(-)

diff --git a/src/zkregex_fuzzer/dfa.py b/src/zkregex_fuzzer/dfa.py
index 2fd01b9..6261a3b 100644
--- a/src/zkregex_fuzzer/dfa.py
+++ b/src/zkregex_fuzzer/dfa.py
@@ -4,8 +4,12 @@
 A number of functions for working with DFAs.
 """
 
-from automata.fa.nfa import NFA
+import random
+
 from automata.fa.dfa import DFA
+from automata.fa.gnfa import GNFA
+from automata.fa.nfa import NFA
+
 
 def regex_to_dfa(regex: str) -> DFA:
     """
@@ -20,6 +24,7 @@ def regex_to_dfa(regex: str) -> DFA:
     except Exception as e:
         raise ValueError(f"Failed to convert NFA to DFA: {e}")
 
+
 def has_multiple_accepting_states_regex(regex: str) -> bool:
     """
     Returns True if converting the given regex to a DFA yields
@@ -34,9 +39,190 @@ def has_multiple_accepting_states_regex(regex: str) -> bool:
 
     return num_final_states > 1
 
+
 def has_multiple_accepting_states_dfa(dfa: DFA) -> bool:
     """
     Returns True if the given DFA has multiple accepting (final) states.
     Returns False otherwise.
     """
     return len(dfa.final_states) > 1
+
+
+def transform_dfa_to_regex(dfa: DFA) -> str:
+    """
+    Convert a DFA to a regular expression.
+    """
+    # Convert the DFA to an equivalent GNFA
+    gnfa = GNFA.from_dfa(dfa)
+    # Use state elimination to get a regular expression
+    regex = gnfa.to_regex()
+    return regex
+
+
+def _pick_one_strategy(
+    states: set, alphabet: set, transitions: dict, initial: str, original_finals: set
+) -> DFA:
+    """
+    Choose one of the accepting states as the sole final state.
+    """
+    chosen_final = random.choice(list(original_finals))
+    new_final_states = {chosen_final}
+    # Redirect transitions that pointed to any other final state
+    for state in states:
+        for symbol in alphabet:
+            if (
+                transitions[state].get(symbol) in original_finals
+                and transitions[state][symbol] != chosen_final
+            ):
+                transitions[state][symbol] = chosen_final
+    # Remove other final states if they are no longer needed (unreachable and not initial)
+    for f in list(original_finals):
+        if f != chosen_final and f != initial:
+            states.discard(f)
+            transitions.pop(f, None)
+    # Construct the new DFA
+    return DFA(
+        states=states,
+        input_symbols=alphabet,
+        transitions=transitions,
+        initial_state=initial,
+        final_states=new_final_states,
+        allow_partial=True,
+    )
+
+
+def _new_dummy_strategy(
+    states: set, alphabet: set, transitions: dict, initial: str, original_finals: set
+) -> DFA:
+    """
+    Introduce a new dummy accepting state.
+    """
+    new_final_name = "DummyFinal"
+    # Ensure the new state name is unique
+    while new_final_name in states:
+        new_final_name += "_X"
+    # Add the new state
+    states.add(new_final_name)
+    # Redirect all transitions that lead into any original final state to the new dummy final
+    for state in states:
+        if state == new_final_name:
+            continue
+        for symbol in alphabet:
+            if transitions[state].get(symbol) in original_finals:
+                transitions[state][symbol] = new_final_name
+    # Define the new final state's transitions. We can leave it partial (no outgoing transitions)
+    # or make it a trap for completeness. Here we leave it with no outgoing transitions (partial DFA).
+    transitions[new_final_name] = {}
+    # Remove final status from original finals and drop those states if unreachable (except initial)
+    for f in original_finals:
+        if f != initial:
+            states.discard(f)
+            transitions.pop(f, None)
+    # New final state set contains only the dummy state
+    return DFA(
+        states=states,
+        input_symbols=alphabet,
+        transitions=transitions,
+        initial_state=initial,
+        final_states={new_final_name},
+        allow_partial=True,
+    )
+
+
+def _merge_strategy(
+    states: set, alphabet: set, transitions: dict, initial: str, original_finals: set
+) -> DFA:
+    """
+    Merge all accepting states into one unified state.
+    """
+    merged_name = "MergedFinal"
+    while merged_name in states:
+        merged_name += "_X"
+    # If the initial state is one of the finals, handle carefully by keeping it (to preserve empty-string acceptance)
+    if initial in original_finals:
+        merged_name = (
+            initial  # use initial as the merged final to preserve its identity
+        )
+    # Build the merged state's transition function by combining outgoing transitions of all original finals
+    merged_transitions = {}
+    for symbol in alphabet:
+        destinations = set()
+        for f in original_finals:
+            if f not in transitions:
+                continue
+            dest = transitions[f].get(symbol)
+            # If the destination is one of the original finals, treat it as a self-loop in the merged state
+            if dest in original_finals:
+                destinations.add(merged_name)
+            elif dest is not None:
+                destinations.add(dest)
+        if len(destinations) == 1:
+            # Exactly one possible destination for this symbol
+            merged_transitions[symbol] = destinations.pop()
+        elif len(destinations) > 1:
+            # Conflict: multiple different destinations for the same symbol.
+            # To keep the DFA deterministic, choose one arbitrarily (e.g., the first in the set).
+            merged_transitions[symbol] = next(iter(destinations))
+        # If destinations is empty, no transition defined (partial DFA for that symbol from merged state).
+    # Remove all old final states (except if one is initial, which we are reusing as merged_name)
+    for f in list(original_finals):
+        if f == initial:  # if initial is being used as merged_name, skip removal
+            continue
+        states.discard(f)
+        transitions.pop(f, None)
+    # Add the merged state to the state set
+    states.add(merged_name)
+    # Update transitions: redirect any transition pointing to an old final to point to the merged state
+    for state in list(states):
+        if state == merged_name:
+            continue
+        for symbol in alphabet:
+            if transitions[state].get(symbol) in original_finals:
+                transitions[state][symbol] = merged_name
+    # Set the merged state's transitions as computed
+    transitions[merged_name] = merged_transitions
+    # Define the single new final state
+    return DFA(
+        states=states,
+        input_symbols=alphabet,
+        transitions=transitions,
+        initial_state=initial,
+        final_states={merged_name},
+        allow_partial=True,
+    )
+
+
+def transform_dfa_to_single_accepting_state(dfa: DFA, strategy: str = "random") -> DFA:
+    """
+    Transform a DFA to a single accepting state.
+    """
+    # If there's already one or zero accepting states, no change needed
+    if len(dfa.final_states) <= 1:
+        return dfa
+
+    assert strategy in ["pick_one", "new_dummy", "merge", "random"]
+
+    # Copy components of the DFA for modification
+    states = set(dfa.states)
+    alphabet = set(dfa.input_symbols)
+    transitions = {
+        state: dict(dest_dict)  # copy of inner dict
+        for state, dest_dict in dfa.transitions.items()
+    }
+    initial = dfa.initial_state
+    original_finals = set(dfa.final_states)
+
+    # Randomly choose one of the transformation strategies
+    if strategy == "random":
+        strategy = random.choice(["pick_one", "new_dummy", "merge"])
+
+    if strategy == "pick_one":
+        return _pick_one_strategy(
+            states, alphabet, transitions, initial, original_finals
+        )
+    elif strategy == "new_dummy":
+        return _new_dummy_strategy(
+            states, alphabet, transitions, initial, original_finals
+        )
+    else:
+        return _merge_strategy(states, alphabet, transitions, initial, original_finals)
diff --git a/tests/test_dfa.py b/tests/test_dfa.py
index 9ff7603..aea5bbd 100644
--- a/tests/test_dfa.py
+++ b/tests/test_dfa.py
@@ -1,4 +1,23 @@
-from zkregex_fuzzer.dfa import has_multiple_accepting_states_regex
+import re
+
+from automata.regex.regex import isequal
+from zkregex_fuzzer.dfa import (
+    has_multiple_accepting_states_regex,
+    regex_to_dfa,
+    transform_dfa_to_regex,
+    transform_dfa_to_single_accepting_state,
+)
+
+regex_with_multiple_accepting_states = [
+    r"(ab|aba)",
+    r"(ab|aba)*",
+    r"(hello|hell)",
+    r"b(aa|aaa)",
+    r"(cat|cats)",
+    r"(xy|xyx)",
+    r"(a|ab|abc)",
+    r"(1|12)",
+]
 
 
 def test_has_multiple_accepting_states_regex_without_multiple():
@@ -22,17 +41,33 @@ def test_has_multiple_accepting_states_regex_without_multiple():
     for regex in regex_without_multiple_accepting_states:
         assert not has_multiple_accepting_states_regex(regex)
 
+
 def test_has_multiple_accepting_states_regex_with_multiple():
-    regex_with_multiple_accepting_states = [
+    for regex in regex_with_multiple_accepting_states:
+        assert has_multiple_accepting_states_regex(regex)
+
+
+def test_transform_dfa_to_regex():
+    regexes = [
         r"(ab|aba)",
         r"(ab|aba)*",
         r"(hello|hell)",
-        r"b(aa|aaa)",
-        r"(cat|cats)",
-        r"(xy|xyx)",
-        r"(a|ab|abc)",
-        r"(1|12)",
     ]
+    for regex in regexes:
+        dfa = regex_to_dfa(regex)
+        transformed_regex = transform_dfa_to_regex(dfa)
+        assert isequal(regex, transformed_regex)
 
-    for regex in regex_with_multiple_accepting_states:
-        assert has_multiple_accepting_states_regex(regex)
+
+def test_transform_dfa_to_regex_with_multiple_accepting_states():
+    strategies = ["pick_one", "new_dummy", "merge"]
+    for strategy in strategies:
+        for regex in regex_with_multiple_accepting_states:
+            dfa = regex_to_dfa(regex)
+            transformed_dfa = transform_dfa_to_single_accepting_state(
+                dfa, strategy=strategy
+            )
+            assert len(transformed_dfa.final_states) == 1
+            transformed_regex = transform_dfa_to_regex(transformed_dfa)
+            new_dfa = regex_to_dfa(transformed_regex)
+            assert len(new_dfa.final_states) == 1

From 712302288ae64e0db56562f502c4a8d49ede2182 Mon Sep 17 00:00:00 2001
From: Stefanos Chaliasos <stefanos@chaliasos.com>
Date: Tue, 4 Mar 2025 14:55:33 +0200
Subject: [PATCH 03/12] Minor fix in the lint script

---
 pyproject.toml            | 3 +--
 scripts/lint_and_tests.py | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 24a5a95..10e4eae 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,8 +35,7 @@ dev = [
 [tool.ruff]
 line-length = 88
 target-version = "py38"
-lint.select = ["E", "F", "W"]
-lint.extend-select = ["I"]
+lint.select = ["E", "F", "W", "I"]
 lint.ignore = ["F401", "E501"]
 
 [tool.ruff.format]
diff --git a/scripts/lint_and_tests.py b/scripts/lint_and_tests.py
index 7154586..6e86cf9 100644
--- a/scripts/lint_and_tests.py
+++ b/scripts/lint_and_tests.py
@@ -23,7 +23,7 @@ def run_formatter() -> int:
 
 def run_tests() -> int:
     """Placeholder for future test execution."""
-    print("Running Tests (Placeholder)...")
+    print("Running Tests...")
     return run_command("pytest")
 
 

From 624ee9d714b7e21a4772011d3c0d8520f054eef8 Mon Sep 17 00:00:00 2001
From: Stefanos Chaliasos <stefanos@chaliasos.com>
Date: Tue, 4 Mar 2025 15:24:17 +0200
Subject: [PATCH 04/12] Remove unused import

---
 tests/test_dfa.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/test_dfa.py b/tests/test_dfa.py
index aea5bbd..e367151 100644
--- a/tests/test_dfa.py
+++ b/tests/test_dfa.py
@@ -1,5 +1,3 @@
-import re
-
 from automata.regex.regex import isequal
 from zkregex_fuzzer.dfa import (
     has_multiple_accepting_states_regex,

From 63cd762fa45734d8d7152438a8c23c2e2d2a54c1 Mon Sep 17 00:00:00 2001
From: Stefanos Chaliasos <stefanos@chaliasos.com>
Date: Wed, 5 Mar 2025 07:37:26 +0200
Subject: [PATCH 05/12] Add DFA generator

---
 src/zkregex_fuzzer/cli.py      |  10 ++-
 src/zkregex_fuzzer/configs.py  |   7 +-
 src/zkregex_fuzzer/dfa.py      | 159 +++++++++++++++++++++++++++++++++
 src/zkregex_fuzzer/fuzzer.py   |  25 +++++-
 src/zkregex_fuzzer/regexgen.py |  34 +++++++
 tests/test_dfa.py              |  29 ++++++
 6 files changed, 261 insertions(+), 3 deletions(-)

diff --git a/src/zkregex_fuzzer/cli.py b/src/zkregex_fuzzer/cli.py
index 6cdc66c..966e4c6 100644
--- a/src/zkregex_fuzzer/cli.py
+++ b/src/zkregex_fuzzer/cli.py
@@ -9,7 +9,7 @@
 from pathlib import Path
 
 from zkregex_fuzzer.configs import GENERATORS, TARGETS, VALID_INPUT_GENERATORS
-from zkregex_fuzzer.fuzzer import fuzz_with_database, fuzz_with_grammar
+from zkregex_fuzzer.fuzzer import fuzz_with_database, fuzz_with_dfa, fuzz_with_grammar
 from zkregex_fuzzer.grammar import REGEX_GRAMMAR
 from zkregex_fuzzer.harness import HarnessStatus
 from zkregex_fuzzer.logger import logger
@@ -240,6 +240,14 @@ def do_fuzz(args):
             inputs_num=args.inputs_num,
             kwargs=kwargs,
         )
+    elif args.fuzzer == "dfa":
+        fuzz_with_dfa(
+            target_implementation=args.target,
+            oracle_params=(args.oracle == "valid", args.valid_input_generator),
+            regex_num=args.regex_num,
+            inputs_num=args.inputs_num,
+            kwargs=kwargs,
+        )
 
 
 def do_reproduce(args):
diff --git a/src/zkregex_fuzzer/configs.py b/src/zkregex_fuzzer/configs.py
index e38adb6..9a462fd 100644
--- a/src/zkregex_fuzzer/configs.py
+++ b/src/zkregex_fuzzer/configs.py
@@ -1,5 +1,9 @@
 from zkregex_fuzzer.grammar import REGEX_GRAMMAR
-from zkregex_fuzzer.regexgen import DatabaseRegexGenerator, GrammarRegexGenerator
+from zkregex_fuzzer.regexgen import (
+    DatabaseRegexGenerator,
+    DFARegexGenerator,
+    GrammarRegexGenerator,
+)
 from zkregex_fuzzer.runner import CircomRunner, NoirRunner, PythonReRunner
 from zkregex_fuzzer.vinpgen import ExrexGenerator, GrammarBasedGenerator, RstrGenerator
 
@@ -22,4 +26,5 @@
 GENERATORS = {
     "grammar": GrammarRegexGenerator,
     "database": DatabaseRegexGenerator,
+    "dfa": DFARegexGenerator,
 }
diff --git a/src/zkregex_fuzzer/dfa.py b/src/zkregex_fuzzer/dfa.py
index 6261a3b..8f087fd 100644
--- a/src/zkregex_fuzzer/dfa.py
+++ b/src/zkregex_fuzzer/dfa.py
@@ -5,6 +5,8 @@
 """
 
 import random
+import string
+from typing import Dict, Optional, Set
 
 from automata.fa.dfa import DFA
 from automata.fa.gnfa import GNFA
@@ -226,3 +228,160 @@ def transform_dfa_to_single_accepting_state(dfa: DFA, strategy: str = "random")
         )
     else:
         return _merge_strategy(states, alphabet, transitions, initial, original_finals)
+
+
+def _get_alphabet(
+    use_unicode: bool, num_states: int, min_size: int = 2, max_size: int = 10
+) -> Set[str]:
+    """
+    Generate a random alphabet for a DFA.
+    """
+    alphabet_size = random.randint(min_size, max_size)
+    if use_unicode:
+        alphabet = set()
+        while len(alphabet) < alphabet_size:
+            codepoint = random.randint(0, 0x10FFFF)
+            try:
+                char = chr(codepoint)
+            except ValueError:
+                continue  # skip invalid code points (if any)
+            alphabet.add(char)
+    else:
+        # Restricted character set: letters, digits, punctuation, whitespace
+        allowed_pool = (
+            string.ascii_letters
+            + string.digits
+            + string.punctuation
+            + string.whitespace
+        )
+        alphabet = set(random.sample(allowed_pool, alphabet_size))
+    return alphabet
+
+
+def generate_random_dfa(
+    max_depth: int = 5,
+    use_unicode: bool = False,
+    single_final_state: bool = False,
+    seed: Optional[int] = None,
+) -> DFA:
+    """
+    Generate a random DFA with a given seed for reproducibility.
+    """
+    # Seed the random number generator for reproducibility (if seed is given)
+    if seed is not None:
+        random.seed(seed)
+    else:
+        seed = random.randrange(0, 2**32)
+        random.seed(seed)
+
+    num_states = random.randint(1, max_depth)
+
+    # Define state names (q0, q1, ..., qN) and the initial state
+    states = {f"q{i}" for i in range(num_states)}
+    initial_state = "q0"
+
+    # Determine final state(s)
+    if single_final_state:
+        final_state = random.choice(list(states))
+        final_states = {final_state}
+    else:
+        # One or more final states (randomly chosen subset of states)
+        num_finals = random.randint(1, num_states)  # at least one final
+        final_states = set(random.sample(list(states), num_finals))
+
+    alphabet = _get_alphabet(use_unicode, num_states)
+
+    # Construct transitions: for each state and each symbol, choose a random next state
+    transitions: Dict[str, Dict[str, str]] = {}
+    for state in states:
+        transitions[state] = {}
+        for sym in alphabet:
+            transitions[state][sym] = random.choice(list(states))
+
+    # Ensure at least one self-loop (cycle)
+    loop_exists = any(
+        state == dest for state in states for dest in transitions[state].values()
+    )
+    if not loop_exists:
+        # Add a self-loop on a random state with a random symbol
+        some_state = random.choice(list(states))
+        some_symbol = random.choice(list(alphabet))
+        transitions[some_state][some_symbol] = some_state
+
+    # Ensure at least one branching point (one state with two different outgoing targets)
+    if len(alphabet) >= 2:
+        branching_exists = any(len(set(transitions[s].values())) >= 2 for s in states)
+        if not branching_exists:
+            # Force branching on the initial state (as an example)
+            sym_list = list(alphabet)
+            # Make sure we have at least two symbols to create a branch
+            if len(sym_list) >= 2:
+                sym1, sym2 = sym_list[0], sym_list[1]
+                # Assign different targets for sym1 and sym2 from the initial state
+                if transitions[initial_state][sym1] == transitions[initial_state][sym2]:
+                    # Pick a different state for sym2 if both symbols currently go to the same target
+                    possible_targets = list(states - {transitions[initial_state][sym1]})
+                    if possible_targets:
+                        transitions[initial_state][sym2] = random.choice(
+                            possible_targets
+                        )
+                    # (If no possible_targets, it means only one state exists, handled by loop above)
+
+    # Introduce an "optional" path (allow skipping or taking a symbol):
+    # We do this by creating an alternate route to a final state.
+    if single_final_state and len(states) > 1:
+        # For a single final state, ensure multiple paths (direct & indirect) to it
+        final_state = next(iter(final_states))  # the only final state
+        # If initial state doesn't already have a direct transition to final, add one
+        if final_state not in transitions[initial_state].values():
+            sym = random.choice(list(alphabet))
+            transitions[initial_state][sym] = final_state
+        # Also ensure an indirect path: find a symbol from initial that goes to an intermediate state
+        intermediate_symbols = [
+            sym
+            for sym, dest in transitions[initial_state].items()
+            if dest != final_state
+        ]
+        if intermediate_symbols:
+            sym = intermediate_symbols[0]
+            intermediate_state = transitions[initial_state][sym]
+            # Link the intermediate state to the final state on some symbol (if not already final)
+            if intermediate_state != final_state:
+                sym2 = random.choice(list(alphabet))
+                transitions[intermediate_state][sym2] = final_state
+    elif not single_final_state:
+        # If multiple finals are allowed, we can treat the start state as an optional accepting state
+        # (Accept empty string or early termination)
+        if initial_state not in final_states:
+            final_states.add(initial_state)
+
+    # Construct the DFA with the generated components
+    dfa = DFA(
+        states=states,
+        input_symbols=alphabet,
+        transitions=transitions,
+        initial_state=initial_state,
+        final_states=final_states,
+    )
+
+    # Minimize the DFA for a simpler equivalent automaton
+    try:
+        # If automata-lib provides a direct minification method
+        dfa = dfa.minify()
+    except AttributeError:
+        # Fallback: convert to NFA and use DFA.from_nfa for minimization
+        nfa_transitions: Dict[str, Dict[str, Set[str]]] = {}
+        for state, trans in transitions.items():
+            # Each DFA transition becomes a singleton set in the NFA transition
+            nfa_transitions[state] = {sym: {dest} for sym, dest in trans.items()}
+        nfa = NFA(
+            states=states,
+            input_symbols=alphabet,
+            transitions=nfa_transitions,
+            initial_state=initial_state,
+            final_states=final_states,
+        )
+        # Convert NFA to DFA with minimization
+        dfa = DFA.from_nfa(nfa, minify=True)
+
+    return dfa
diff --git a/src/zkregex_fuzzer/fuzzer.py b/src/zkregex_fuzzer/fuzzer.py
index cc0d4a4..ba442d4 100644
--- a/src/zkregex_fuzzer/fuzzer.py
+++ b/src/zkregex_fuzzer/fuzzer.py
@@ -8,7 +8,11 @@
 from zkregex_fuzzer.configs import GRAMMARS, TARGETS, VALID_INPUT_GENERATORS
 from zkregex_fuzzer.harness import HarnessResult, HarnessStatus, harness
 from zkregex_fuzzer.logger import dynamic_filter, logger
-from zkregex_fuzzer.regexgen import DatabaseRegexGenerator, GrammarRegexGenerator
+from zkregex_fuzzer.regexgen import (
+    DatabaseRegexGenerator,
+    DFARegexGenerator,
+    GrammarRegexGenerator,
+)
 from zkregex_fuzzer.runner import PythonReRunner
 from zkregex_fuzzer.runner.base_runner import Runner
 from zkregex_fuzzer.transformers import regex_to_grammar
@@ -56,6 +60,25 @@ def fuzz_with_database(
     fuzz_with_regexes(regexes, inputs_num, target_runner, oracle_params, kwargs)
 
 
+def fuzz_with_dfa(
+    target_implementation: str,
+    oracle_params: tuple[bool, str],
+    regex_num: int,
+    inputs_num: int,
+    kwargs: dict,
+):
+    """
+    Fuzz test with DFA.
+    """
+    target_runner = TARGETS[target_implementation]
+
+    regex_generator = DFARegexGenerator()
+    regexes = regex_generator.generate_many(regex_num)
+    logger.info(f"Generated {len(regexes)} regexes.")
+
+    fuzz_with_regexes(regexes, inputs_num, target_runner, oracle_params, kwargs)
+
+
 def fuzz_with_regexes(
     regexes: list[str],
     inputs_num: int,
diff --git a/src/zkregex_fuzzer/regexgen.py b/src/zkregex_fuzzer/regexgen.py
index f4c8394..390d750 100644
--- a/src/zkregex_fuzzer/regexgen.py
+++ b/src/zkregex_fuzzer/regexgen.py
@@ -22,6 +22,10 @@
 
 from fuzzingbook.Grammars import Grammar
 
+from zkregex_fuzzer.dfa import (
+    generate_random_dfa,
+    transform_dfa_to_regex,
+)
 from zkregex_fuzzer.logger import logger
 from zkregex_fuzzer.utils import (
     check_zkregex_rules_basic,
@@ -144,3 +148,33 @@ def generate_many(self, num):
                         break
 
             return result
+
+
+class DFARegexGenerator(RegexGenerator):
+    """
+    Generate regexes using a DFA.
+    """
+
+    def __init__(
+        self,
+        max_depth: int = 5,
+        use_unicode: bool = False,
+        single_final_state: bool = True,
+    ):
+        self.max_depth = max_depth
+        self.use_unicode = use_unicode
+        self.single_final_state = single_final_state
+
+    def generate_unsafe(self) -> str:
+        """
+        Generate a regex using a DFA.
+        """
+        while True:
+            try:
+                dfa = generate_random_dfa(
+                    self.max_depth, self.use_unicode, self.single_final_state
+                )
+                return transform_dfa_to_regex(dfa)
+            except Exception as e:
+                logger.debug(f"Error generating DFA: {e}")
+                continue
diff --git a/tests/test_dfa.py b/tests/test_dfa.py
index e367151..bc2a5b9 100644
--- a/tests/test_dfa.py
+++ b/tests/test_dfa.py
@@ -1,5 +1,6 @@
 from automata.regex.regex import isequal
 from zkregex_fuzzer.dfa import (
+    generate_random_dfa,
     has_multiple_accepting_states_regex,
     regex_to_dfa,
     transform_dfa_to_regex,
@@ -69,3 +70,31 @@ def test_transform_dfa_to_regex_with_multiple_accepting_states():
             transformed_regex = transform_dfa_to_regex(transformed_dfa)
             new_dfa = regex_to_dfa(transformed_regex)
             assert len(new_dfa.final_states) == 1
+
+
+def test_generate_dfa():
+    while True:
+        try:
+            dfa_with_final = generate_random_dfa(
+                max_depth=10, use_unicode=False, single_final_state=True
+            )
+            regex_with_final = transform_dfa_to_regex(dfa_with_final)
+            break
+        except Exception:
+            continue
+    dfa_from_regex_with_final = regex_to_dfa(regex_with_final)
+    assert len(dfa_with_final.final_states) == 1
+    assert len(dfa_from_regex_with_final.final_states) == 1
+
+    while True:
+        try:
+            dfa_without_final = generate_random_dfa(
+                max_depth=10, use_unicode=False, single_final_state=False
+            )
+            regex_without_final = transform_dfa_to_regex(dfa_without_final)
+            break
+        except Exception:
+            continue
+    dfa_from_regex_without_final = regex_to_dfa(regex_without_final)
+    assert len(dfa_without_final.final_states) >= 1
+    assert len(dfa_from_regex_without_final.final_states) >= 1

From f72d87359cdad33189c342136ec060fe07f23806 Mon Sep 17 00:00:00 2001
From: Stefanos Chaliasos <stefanos@chaliasos.com>
Date: Wed, 5 Mar 2025 08:51:46 +0200
Subject: [PATCH 06/12] Add dfa input generator

---
 pyproject.toml            |  2 +-
 src/zkregex_fuzzer/dfa.py | 99 +++++++++++++++++++++++++++++++++++++++
 tests/test_dfa.py         | 54 +++++++++++++--------
 3 files changed, 135 insertions(+), 20 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 10e4eae..dee51e1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,7 +34,7 @@ dev = [
 
 [tool.ruff]
 line-length = 88
-target-version = "py38"
+target-version = "py312"
 lint.select = ["E", "F", "W", "I"]
 lint.ignore = ["F401", "E501"]
 
diff --git a/src/zkregex_fuzzer/dfa.py b/src/zkregex_fuzzer/dfa.py
index 8f087fd..0c1cc10 100644
--- a/src/zkregex_fuzzer/dfa.py
+++ b/src/zkregex_fuzzer/dfa.py
@@ -385,3 +385,102 @@ def generate_random_dfa(
         dfa = DFA.from_nfa(nfa, minify=True)
 
     return dfa
+
+
+def dfa_string_matching(
+    regex: str,
+    max_length: int = 10,
+) -> str:
+    """
+    Convert `regex` to a DFA using automata-lib, then randomly generate a string
+    that the DFA accepts. Returns a string that the DFA accepts.
+    """
+
+    # Step 1: Convert to NFA or directly to DFA
+    dfa = regex_to_dfa(regex)
+
+    # Step 2: Determine for each state if acceptance is possible from that state
+    # We'll do a BFS backward from each final state to mark reachable states.
+    can_reach_accept = _compute_accept_reachability(dfa)
+
+    # Step 3: Do a random walk
+    s = _random_walk_dfa(dfa, can_reach_accept, max_length)
+    if s is None:
+        raise ValueError("Failed to generate a string that the DFA accepts.")
+    return s
+
+
+def _compute_accept_reachability(dfa: DFA) -> dict:
+    """
+    For each state, store whether it's possible to reach a final state.
+    Returns a dict: state -> bool
+    """
+    # Start from final states and do BFS/DFS backwards:
+    # We'll create a graph reversed: from each state, we see where it can come from.
+    reverse_graph = {s: [] for s in dfa.states}
+    for s in dfa.states:
+        for sym, t in dfa.transitions[s].items():
+            reverse_graph[t].append((s, sym))
+
+    can_reach = {s: False for s in dfa.states}
+    # Mark final states as can_reach = True
+    queue = list(dfa.final_states)
+    for f in queue:
+        can_reach[f] = True
+
+    # BFS
+    idx = 0
+    while idx < len(queue):
+        current = queue[idx]
+        idx += 1
+        for prev_state, _symbol in reverse_graph[current]:
+            if not can_reach[prev_state]:
+                can_reach[prev_state] = True
+                queue.append(prev_state)
+
+    return can_reach
+
+
+def _random_walk_dfa(
+    dfa: DFA, can_reach_accept: dict, max_length: int
+) -> Optional[str]:
+    """
+    Start at dfa.initial_state, randomly choose transitions that lead to states
+    from which a final state is reachable, until we reach a final or exceed max_length.
+    Note that max_length is not a hard limit, but rather a wanted length.
+    Return the accepted string or None if we can't produce one.
+    """
+    hard_limit = 100
+    current_state = dfa.initial_state
+    out = []
+    # We'll limit the number of steps to avoid infinite loops
+    for length_counter in range(hard_limit):
+        # If current_state is final, maybe stop or continue?
+        # We'll do a random 50% chance to stop if final, producing a short string.
+        if current_state in dfa.final_states:
+            if length_counter >= max_length or random.random() < 0.5:
+                # 50% chance to end early if final
+                return "".join(out)
+        # gather possible transitions that lead to can_reach_accept state
+        next_options = [
+            (symbol, dest)
+            for symbol, dest in dfa.transitions[current_state].items()
+            if can_reach_accept[dest]
+        ]
+
+        if not next_options:
+            # no valid transitions, so if we are final we can stop; else give up
+            if current_state in dfa.final_states:
+                return "".join(out)
+            else:
+                return None
+
+        # choose a random transition
+        symbol, dest = random.choice(next_options)
+        out.append(symbol)
+        current_state = dest
+
+    # If we are here, we've reached max_length. Accept if the state is final
+    if current_state in dfa.final_states:
+        return "".join(out)
+    return None
diff --git a/tests/test_dfa.py b/tests/test_dfa.py
index bc2a5b9..b72b8f3 100644
--- a/tests/test_dfa.py
+++ b/tests/test_dfa.py
@@ -1,5 +1,6 @@
 from automata.regex.regex import isequal
 from zkregex_fuzzer.dfa import (
+    dfa_string_matching,
     generate_random_dfa,
     has_multiple_accepting_states_regex,
     regex_to_dfa,
@@ -17,26 +18,29 @@
     r"(a|ab|abc)",
     r"(1|12)",
 ]
+regex_without_multiple_accepting_states = [
+    r"(a|b)*",
+    r"abc",
+    r"(abc|def|ghi)",
+    r"(abc)*",
+    r"(hello)",
+    r"(ab)*",
+    r"(a|b|c)*",
+    r"((a|b|c)*abc)",  # This is somewhat comples, do we want to support this?
+    r"[a-zA-Z]+",
+    r"[0-9]+",
+    r"(abc|abcd|abcde)f",
+    r"(hello|helloo|hellooo)(foo|foob|fooba)?bar",
+    r"(foo|foob|fooba)?bar",
+    r"(abc|def)(gh|jk)(lm|nop)",
+]
+single_solution_regexes = [
+    r"abc",
+    r"(hello)",
+]
 
 
 def test_has_multiple_accepting_states_regex_without_multiple():
-    regex_without_multiple_accepting_states = [
-        r"(a|b)*",
-        r"abc",
-        r"(abc|def|ghi)",
-        r"(abc)*",
-        r"(hello)",
-        r"(ab)*",
-        r"(a|b|c)*",
-        r"((a|b|c)*abc)",
-        r"[a-zA-Z]+",
-        r"[0-9]+",
-        r"(abc|abcd|abcde)f",
-        r"(hello|helloo|hellooo)(foo|foob|fooba)?bar",
-        r"(foo|foob|fooba)?bar",
-        r"(abc|def)(gh|jk)(lm|nop)",
-    ]
-
     for regex in regex_without_multiple_accepting_states:
         assert not has_multiple_accepting_states_regex(regex)
 
@@ -79,10 +83,10 @@ def test_generate_dfa():
                 max_depth=10, use_unicode=False, single_final_state=True
             )
             regex_with_final = transform_dfa_to_regex(dfa_with_final)
+            dfa_from_regex_with_final = regex_to_dfa(regex_with_final)
             break
         except Exception:
             continue
-    dfa_from_regex_with_final = regex_to_dfa(regex_with_final)
     assert len(dfa_with_final.final_states) == 1
     assert len(dfa_from_regex_with_final.final_states) == 1
 
@@ -92,9 +96,21 @@ def test_generate_dfa():
                 max_depth=10, use_unicode=False, single_final_state=False
             )
             regex_without_final = transform_dfa_to_regex(dfa_without_final)
+            dfa_from_regex_without_final = regex_to_dfa(regex_without_final)
             break
         except Exception:
             continue
-    dfa_from_regex_without_final = regex_to_dfa(regex_without_final)
     assert len(dfa_without_final.final_states) >= 1
     assert len(dfa_from_regex_without_final.final_states) >= 1
+
+
+def test_dfa_string_matching():
+    for regex in regex_without_multiple_accepting_states:
+        string = dfa_string_matching(regex)
+        assert string is not None
+        for _ in range(5):
+            string2 = dfa_string_matching(regex)
+            if string != string2:
+                break
+        if regex not in single_solution_regexes:
+            assert string != string2

From 8ce52cd5c3bccbd817af08b44a06cfb61e6e02c7 Mon Sep 17 00:00:00 2001
From: Stefanos Chaliasos <stefanos@chaliasos.com>
Date: Wed, 5 Mar 2025 09:00:48 +0200
Subject: [PATCH 07/12] Try to surpass linting issue

---
 tests/test_dfa.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_dfa.py b/tests/test_dfa.py
index b72b8f3..89df890 100644
--- a/tests/test_dfa.py
+++ b/tests/test_dfa.py
@@ -1,3 +1,4 @@
+# ruff: noqa: I001
 from automata.regex.regex import isequal
 from zkregex_fuzzer.dfa import (
     dfa_string_matching,

From b75efa2a848495f15f652c724a40e86e53a0cbe9 Mon Sep 17 00:00:00 2001
From: Stefanos Chaliasos <stefanos@chaliasos.com>
Date: Wed, 5 Mar 2025 14:44:41 +0200
Subject: [PATCH 08/12] Update check_zkregex_rules_basic

---
 src/zkregex_fuzzer/dfa.py      |  32 ++++++++
 src/zkregex_fuzzer/regexgen.py |   4 +-
 src/zkregex_fuzzer/utils.py    | 126 ++++++++++++++++++--------------
 tests/test_utils.py            | 130 ++++++++++++++++++++++++++++++++-
 4 files changed, 237 insertions(+), 55 deletions(-)

diff --git a/src/zkregex_fuzzer/dfa.py b/src/zkregex_fuzzer/dfa.py
index 0c1cc10..9ed31eb 100644
--- a/src/zkregex_fuzzer/dfa.py
+++ b/src/zkregex_fuzzer/dfa.py
@@ -6,6 +6,7 @@
 
 import random
 import string
+import re
 from typing import Dict, Optional, Set
 
 from automata.fa.dfa import DFA
@@ -42,6 +43,37 @@ def has_multiple_accepting_states_regex(regex: str) -> bool:
     return num_final_states > 1
 
 
+def has_one_accepting_state_regex(regex: str) -> bool:
+    """
+    Returns True if converting the given regex to a DFA yields
+    exactly one accepting (final) state. Returns False otherwise.
+    """
+    dfa = regex_to_dfa(regex)
+    return len(dfa.final_states) == 1
+
+
+def wrapped_has_one_accepting_state_regex(regex: str) -> bool:
+    """
+    Returns True if converting the given regex to a DFA yields
+    exactly one accepting (final) state. Returns False otherwise.
+
+    NOTE:
+      - As the automata-lib does not support starting with '^' and ending with '$',
+      we just remove them from the regex and check if the rest of the regex has one accepting state.
+    """
+    if regex.startswith("^"):
+        regex = regex[1:]
+    # There are also some more cases with "starting" "^"
+    if regex.startswith("(|^)"):
+        regex = regex[4:]
+    # Cases like '(\r\n|^)...', '(\r|^)...', '(\n|^)...'
+    if bool(re.match(r'^\([\\r\\n]*|\s*\|\^\).*', regex)):
+        regex = regex[regex.find("^")+2:]
+    if regex.endswith("$"):
+        regex = regex[:-1]
+    return has_one_accepting_state_regex(regex)
+
+
 def has_multiple_accepting_states_dfa(dfa: DFA) -> bool:
     """
     Returns True if the given DFA has multiple accepting (final) states.
diff --git a/src/zkregex_fuzzer/regexgen.py b/src/zkregex_fuzzer/regexgen.py
index 390d750..014ca6a 100644
--- a/src/zkregex_fuzzer/regexgen.py
+++ b/src/zkregex_fuzzer/regexgen.py
@@ -54,7 +54,9 @@ def generate(self) -> str:
             regex = self.generate_unsafe()
             if not is_valid_regex(regex):
                 continue
-            if not check_zkregex_rules_basic(regex):
+            correct, accepting_state_check = check_zkregex_rules_basic(regex)
+            if not correct:
+                # TODO: We should try to fix the regex if it has multiple accepting states
                 continue
             logger.debug(f"Generated regex: {regex}")
             return regex
diff --git a/src/zkregex_fuzzer/utils.py b/src/zkregex_fuzzer/utils.py
index 90b56c0..f5301eb 100644
--- a/src/zkregex_fuzzer/utils.py
+++ b/src/zkregex_fuzzer/utils.py
@@ -7,7 +7,7 @@
 import string
 
 from fuzzingbook.Grammars import Grammar, simple_grammar_fuzzer
-
+from zkregex_fuzzer.dfa import wrapped_has_one_accepting_state_regex
 
 def is_valid_regex(regex: str) -> bool:
     """
@@ -20,65 +20,85 @@ def is_valid_regex(regex: str) -> bool:
         return False
 
 
-def check_zkregex_rules_basic(regex: str) -> bool:
+def has_lazy_quantifier(pattern: str) -> bool:
     """
-    Check partial zk-regex constraints with a text-based approach:
-      1) Must end with '$'
-      2) If '^' is present, it is either at index 0 or in substring '(|^)'
-      3) No lazy quantifiers like '*?' or '+?' or '??' or '{m,n}?'
-    Returns True if all checks pass, False otherwise.
-
-    TODO: DFA Checks -- code that actually compiles the regex to an automaton and verifies:
-        - No loop from initial state back to itself (i.e. no .*-like or equivalent)
-        - Only one accepting state
+    Returns True if `pattern` contains any lazy quantifiers (i.e., *?, +?, ??, or {m,n}?),
+    False otherwise.
+
+    This is a naive textual check and doesn't handle escaping inside character classes or
+    more advanced regex syntax. For most simple usage, however, it suffices.
     """
+    # Regex to search for the typical lazy quantifier patterns:
+    #   *?   +?   ??   {m,n}?
+    # We'll assume m,n are simple digit sets, e.g. {2,5}
+    lazy_check = re.compile(r'(\*\?)|(\+\?)|(\?\?)|\{\d+(,\d+)?\}\?')
+    
+    match = lazy_check.search(pattern)
+    return bool(match)
 
-    # 1) Must end with '$' (if it present)
-    if "$" in regex and not regex.endswith("$"):
-        return False
 
-    # 2) '^' must be at start or in '(|^)'
-    # We'll allow no '^' at all. If it appears, check positions.
-    # We'll define a function to find all occurrences of '^'.
-    allowed_positions = set()
-    # If the string starts with '^', that’s allowed
-    if len(regex) > 0 and regex[0] == "^":
-        allowed_positions.add(0)
-
-    # If the string contains '|^', that means '^' is at position (idx+1)
-    idx = 0
-    while True:
-        idx = regex.find("|^", idx)
-        if idx == -1:
-            break
-        # '^' occurs at (idx + 1)
-        allowed_positions.add(idx + 1)
-        idx += 2  # skip past
-
-    # If the string contains '[^]', that means '^' is at position (idx+1)
-    idx = 0
-    while True:
-        idx = regex.find("[^", idx)
-        if idx == -1:
-            break
-        # '^' occurs at (idx + 1)
-        allowed_positions.add(idx + 1)
-        idx += 2  # skip past
-
-    # Now see if there's any '^' outside those allowed positions
-    for match in re.finditer(r"\^", regex):
-        pos = match.start()
-        if pos not in allowed_positions:
+def correct_carret_position(regex: str) -> bool:
+    """
+    Correct positions are:
+        - At the start of the regex
+        - In a capturing group that is at the start of the regex
+        - In a negated character class
+    Returns True if the '^' is in the correct position, False otherwise.
+
+    This is a naive textual check and doesn't handle escaping inside character classes or
+    more advanced regex syntax. For most simple usage, however, it suffices.
+    """
+    # Find all occurrences of '^' that are not escaped
+    caret_positions = [match.start() for match in re.finditer(r'(?<!\\)\^', regex)]
+    if len(caret_positions) == 0:
+        return True
+    # Check each position
+    status = False
+    for pos in caret_positions:
+        status = False
+        if pos == 0:
+            status = True
+            continue
+        # We have '^' at the end of the regex
+        if pos+1 == len(regex) and len(regex) > 1:
+            continue
+        # Let's check if the '^' is in a group that is at the start of the regex
+        # and before '^' there is a '|' and before '|' there is either nothing or \r or \n until
+        # the beginning of the group
+        if regex[pos-1] == '|' and regex[pos+1] == ')' and regex[0] == '(' and bool(re.match(r'^\s*', regex[1:pos-1])):
+            status = True
+            continue
+        # Let's check if the '^' is in a negated character class
+        if regex[pos-1] == '[':
+            status = True
+            continue
+        if status is False:
             return False
+    return status
+    
 
-    # 3) Check no lazy quantifiers like *?, +?, ??, or {m,n}?
-    # We do a simple regex search for them:
-    # Patterns we search for: (*?), (+?), (??), ({\d+(,\d+)?}\?)
-    lazy_pattern = re.compile(r"(\*\?|\+\?|\?\?|\{\d+(,\d+)?\}\?)")
-    if lazy_pattern.search(regex):
-        return False
+def check_zkregex_rules_basic(regex: str) -> tuple[bool, bool]:
+    """
+    Check partial zk-regex constraints with a text-based approach:
+      1) If '^' is present, it is either at index 0 or in substring '(|^)' or in (\r\n|^) or in substring '[^...]'
+      2) No lazy quantifiers like '*?' or '+?' or '??' or '{m,n}?'
+      3) Check that the regex has exactly one accepting state
+    Returns True if all checks pass, False otherwise. Also return the status of the accepting state check.
+    Returns (True, True) if all checks pass, (False, True) if the regex is invalid, (False, False) if the regex has multiple accepting states.
+    """
+    # 1) If '^' is present, it is either at index 0 or in substring '(|^)' or in (\r\n|^) or in substring '[^...]'
+    if not correct_carret_position(regex):
+        return False, True  # we return True as we haven't performed the DFA check
+
+    # 2) Check no lazy quantifiers like *?, +?, ??, or {m,n}?
+    if has_lazy_quantifier(regex):
+        return False, True  # we return True as we haven't performed the DFA check
+
+    # 3) Check that the regex has exactly one accepting state
+    if not wrapped_has_one_accepting_state_regex(regex):
+        return False, False  
 
-    return True
+    return True, True
 
 
 def check_if_string_is_valid(regex: str, string: str) -> bool:
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 9933b4c..f20ae2b 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,4 +1,4 @@
-from zkregex_fuzzer.utils import is_valid_regex
+from zkregex_fuzzer.utils import is_valid_regex, has_lazy_quantifier, correct_carret_position, check_zkregex_rules_basic
 
 
 def test_valid_regex():
@@ -26,3 +26,131 @@ def test_invalid_regex():
     ]
     for pattern in invalid_patterns:
         assert not is_valid_regex(pattern), f"Expected {pattern} to be invalid"
+
+
+def test_has_lazy_quantifier():
+    """Test that has_lazy_quantifier returns True for patterns with lazy quantifiers."""
+    patterns = [
+        (r"ab*c", False),         
+        (r"a+?", True),         
+        (r"(abc){2,5}?", True),  
+        (r"xyz", False),          
+        (r"[a-z]*", False),       
+        (r".+?", True),          
+    ]
+    for pattern, expected in patterns:
+        assert has_lazy_quantifier(pattern) == expected, f"Expected {pattern} to have lazy quantifier: {expected}"
+
+
+def test_correct_carret_position():
+    """
+    Test the correct_carret_position function with various corner cases.
+    """
+    # Test cases with expected results
+    test_cases = [
+        # Basic cases
+        (r"^abc", True),          # Start of regex
+        (r"abc", True),           # No caret
+        (r"abc^", False),         # Invalid position at end
+        
+        # Capturing group cases
+        (r"(^abc)", False),        # Start of capturing group
+        (r"(|^)", True),          # Alternative with caret
+        (r"(abc|^def)", False),   # Caret in middle of alternative
+        (r"(|^)", True),          # Simple alternative with caret
+        (r"(\n|^)", True),        # Newline alternative
+        (r"abc(\n|^)", False),     # Not at start of regex
+        (r"(\r|^)", True),        # Carriage return alternative
+        (r"(\r\n|^)", True),      # CRLF alternative
+        (r"(\n\r|^)", True),      # CRLF alternative
+        (r"(  |^)", True),        # Spaces before alternative
+        
+        # Character class cases
+        (r"[^abc]", True),        # Simple negated character class
+        (r"abc[^xyz]def", True),  # Negated character class in middle
+        (r"[abc^]", False),       # Caret not at start of character class
+        (r"[[^]]", True),         # Nested character class
+        (r"[^]", True),           # Empty negated character class
+        
+        # Multiple caret cases
+        (r"^abc[^xyz]", True),    # Valid multiple carets
+        (r"^abc^", False),        # Invalid multiple carets
+        (r"[^abc][^xyz]", True),  # Multiple negated character classes
+        
+        # Edge cases
+        (r"", True),              # Empty string
+        (r"^", True),             # Just caret
+        (r"[]^]", False),         # Invalid character class
+        (r"(^)|^", False),        # Multiple start anchors
+        (r"(^abc|^def)", False),  # Multiple start anchors in group
+        
+        # Complex cases
+        (r"(|^)abc[^xyz]123", True),     # Combination of valid cases
+        (r"^abc[^xyz](|^)def", False),   # Invalid multiple start anchors
+        (r"[^abc]^[^xyz]", False),       # Invalid caret between character classes
+        (r"(  \r\n  |^)abc", True),      # Complex whitespace before alternative
+
+        # Escaped caret cases
+        (r"abc\^", True),
+        (r"abc\^def", True),
+    ]
+    for regex, expected in test_cases:
+        assert correct_carret_position(regex) == expected, f"Expected {regex} to have correct caret position: {expected}"
+
+
+def test_check_zkregex_rules_basic():
+    """
+    Test the check_zkregex_rules_basic function with various test cases.
+    """
+    # Test cases with expected results
+    test_cases = [
+        # 1. Dollar sign tests
+        (r"abc$", (True, True)),  # Valid dollar sign at end,
+        (r"abc$def", (True, True)),  # Valid dollar sign in middle
+        (r"abc", (True, True)),  # No dollar sign
+        (r"$abc", (True, True)),  # Dollar sign at start
+        
+        # 2. Caret position tests
+        (r"^abc", (True, True)),  # Valid caret at start
+        (r"(|^)abc", (True, True)),  # Valid caret in alternative
+        (r"(\r\n|^)abc", (True, True)),  # Valid caret with CRLF alternative
+        (r"[^abc]", (True, True)),  # Valid caret in character class
+        (r"abc^", (False, True)),  # Invalid caret at end
+        (r"abc^def", (False, True)),  # Invalid caret in middle
+        
+        # 3. Lazy quantifier tests
+        (r"abc*", (True, True)),  # Valid greedy quantifier
+        (r"abc*?", (False, True)),  # Invalid lazy star quantifier
+        (r"abc+?", (False, True)),  # Invalid lazy plus quantifier
+        (r"abc??", (False, True)),  # Invalid lazy question mark quantifier
+        (r"abc{1,2}?", (False, True)),  # Invalid lazy range quantifier
+        
+        # 4. Combined valid cases
+        (r"^abc$", (True, True)),  # Valid start and end anchors
+        (r"(|^)abc$", (True, True)),  # Valid alternative and end anchor
+        (r"[^abc].*$", (True, True)),  # Valid character class and end anchor
+        
+        # 5. Combined invalid cases
+        (r"^abc$def", (True, True)),  # Valid dollar position with caret
+        (r"abc^def$", (False, True)),  # Invalid caret with dollar
+        (r"[^abc]*?$", (False, True)),  # Invalid lazy quantifier with valid anchors
+        
+        # 6. Complex cases
+        (r"(|^)abc[^xyz]*$", (True, True)),  # Complex valid regex
+        (r"^abc[^xyz]+def$", (True, True)),  # Complex valid regex with quantifiers
+        (r"(|^)abc*?[^xyz]$", (False, True)),  # Complex invalid regex with lazy quantifier
+        (r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$", (True, True)),
+
+        # 7. The common regexes from zkemail
+        (r">[^<>]+<.*", (True, True)),
+        (r"(\r\n|^)to:[^\r\n]+\r\n", (True, True)),
+        (r"(\r\n|^)subject:[^\r\n]+\r\n", (True, True)),
+        #(r"[A-Za-z0-9!#$%&'*+=?\-\^_`{|}~.\/]+@[A-Za-z0-9.\-@]+", (True, True)),
+        #(r"[A-Za-z0-9!#$%&'*+=?\-\^_`{|}~.\/@]+@[A-Za-z0-9.\-]+", (True, True)),
+        (r"(\r\n|^)from:[^\r\n]+\r\n", (True, True)),
+        (r"(\r\n|^)dkim-signature:([a-z]+=[^;]+; )+bh=[a-zA-Z0-9+/=]+;", (True, True)),
+        (r"(\r\n|^)dkim-signature:([a-z]+=[^;]+; )+t=[0-9]+;", (True, True)),
+        (r"(\r\n|^)message-id:<[A-Za-z0-9=@\.\+_-]+>\r\n", (True, True)),
+    ]
+    for regex, expected in test_cases:
+        assert check_zkregex_rules_basic(regex) == expected, f"Expected {regex} to have correct zk-regex rules: {expected}"

From 95ceba05ff4b46a48bc984b573816f9aa670807b Mon Sep 17 00:00:00 2001
From: Stefanos Chaliasos <stefanos@chaliasos.com>
Date: Wed, 5 Mar 2025 16:54:46 +0200
Subject: [PATCH 09/12] Fix an error in matchning entry pattern and linting

---
 src/zkregex_fuzzer/dfa.py      |   8 +--
 src/zkregex_fuzzer/regexgen.py |   1 +
 src/zkregex_fuzzer/utils.py    |  23 ++++---
 tests/test_utils.py            | 106 +++++++++++++++++----------------
 4 files changed, 74 insertions(+), 64 deletions(-)

diff --git a/src/zkregex_fuzzer/dfa.py b/src/zkregex_fuzzer/dfa.py
index 9ed31eb..a5ebe59 100644
--- a/src/zkregex_fuzzer/dfa.py
+++ b/src/zkregex_fuzzer/dfa.py
@@ -5,8 +5,8 @@
 """
 
 import random
-import string
 import re
+import string
 from typing import Dict, Optional, Set
 
 from automata.fa.dfa import DFA
@@ -64,11 +64,11 @@ def wrapped_has_one_accepting_state_regex(regex: str) -> bool:
     if regex.startswith("^"):
         regex = regex[1:]
     # There are also some more cases with "starting" "^"
-    if regex.startswith("(|^)"):
+    elif regex.startswith("(|^)"):
         regex = regex[4:]
     # Cases like '(\r\n|^)...', '(\r|^)...', '(\n|^)...'
-    if bool(re.match(r'^\([\\r\\n]*|\s*\|\^\).*', regex)):
-        regex = regex[regex.find("^")+2:]
+    elif bool(re.match(r"^\([\\r\\n]*\|\^\).*", regex)):
+        regex = regex[regex.find("^") + 2 :]
     if regex.endswith("$"):
         regex = regex[:-1]
     return has_one_accepting_state_regex(regex)
diff --git a/src/zkregex_fuzzer/regexgen.py b/src/zkregex_fuzzer/regexgen.py
index 014ca6a..b5c1161 100644
--- a/src/zkregex_fuzzer/regexgen.py
+++ b/src/zkregex_fuzzer/regexgen.py
@@ -24,6 +24,7 @@
 
 from zkregex_fuzzer.dfa import (
     generate_random_dfa,
+    regex_to_dfa,
     transform_dfa_to_regex,
 )
 from zkregex_fuzzer.logger import logger
diff --git a/src/zkregex_fuzzer/utils.py b/src/zkregex_fuzzer/utils.py
index f5301eb..01fbdb1 100644
--- a/src/zkregex_fuzzer/utils.py
+++ b/src/zkregex_fuzzer/utils.py
@@ -7,8 +7,10 @@
 import string
 
 from fuzzingbook.Grammars import Grammar, simple_grammar_fuzzer
+
 from zkregex_fuzzer.dfa import wrapped_has_one_accepting_state_regex
 
+
 def is_valid_regex(regex: str) -> bool:
     """
     Check if a regex is valid.
@@ -31,8 +33,8 @@ def has_lazy_quantifier(pattern: str) -> bool:
     # Regex to search for the typical lazy quantifier patterns:
     #   *?   +?   ??   {m,n}?
     # We'll assume m,n are simple digit sets, e.g. {2,5}
-    lazy_check = re.compile(r'(\*\?)|(\+\?)|(\?\?)|\{\d+(,\d+)?\}\?')
-    
+    lazy_check = re.compile(r"(\*\?)|(\+\?)|(\?\?)|\{\d+(,\d+)?\}\?")
+
     match = lazy_check.search(pattern)
     return bool(match)
 
@@ -49,7 +51,7 @@ def correct_carret_position(regex: str) -> bool:
     more advanced regex syntax. For most simple usage, however, it suffices.
     """
     # Find all occurrences of '^' that are not escaped
-    caret_positions = [match.start() for match in re.finditer(r'(?<!\\)\^', regex)]
+    caret_positions = [match.start() for match in re.finditer(r"(?<!\\)\^", regex)]
     if len(caret_positions) == 0:
         return True
     # Check each position
@@ -60,22 +62,27 @@ def correct_carret_position(regex: str) -> bool:
             status = True
             continue
         # We have '^' at the end of the regex
-        if pos+1 == len(regex) and len(regex) > 1:
+        if pos + 1 == len(regex) and len(regex) > 1:
             continue
         # Let's check if the '^' is in a group that is at the start of the regex
         # and before '^' there is a '|' and before '|' there is either nothing or \r or \n until
         # the beginning of the group
-        if regex[pos-1] == '|' and regex[pos+1] == ')' and regex[0] == '(' and bool(re.match(r'^\s*', regex[1:pos-1])):
+        if (
+            regex[pos - 1] == "|"
+            and regex[pos + 1] == ")"
+            and regex[0] == "("
+            and bool(re.match(r"^\s*", regex[1 : pos - 1]))
+        ):
             status = True
             continue
         # Let's check if the '^' is in a negated character class
-        if regex[pos-1] == '[':
+        if regex[pos - 1] == "[":
             status = True
             continue
         if status is False:
             return False
     return status
-    
+
 
 def check_zkregex_rules_basic(regex: str) -> tuple[bool, bool]:
     """
@@ -96,7 +103,7 @@ def check_zkregex_rules_basic(regex: str) -> tuple[bool, bool]:
 
     # 3) Check that the regex has exactly one accepting state
     if not wrapped_has_one_accepting_state_regex(regex):
-        return False, False  
+        return False, False
 
     return True, True
 
diff --git a/tests/test_utils.py b/tests/test_utils.py
index f20ae2b..22c9673 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,4 +1,9 @@
-from zkregex_fuzzer.utils import is_valid_regex, has_lazy_quantifier, correct_carret_position, check_zkregex_rules_basic
+from zkregex_fuzzer.utils import (
+    check_zkregex_rules_basic,
+    correct_carret_position,
+    has_lazy_quantifier,
+    is_valid_regex,
+)
 
 
 def test_valid_regex():
@@ -31,15 +36,17 @@ def test_invalid_regex():
 def test_has_lazy_quantifier():
     """Test that has_lazy_quantifier returns True for patterns with lazy quantifiers."""
     patterns = [
-        (r"ab*c", False),         
-        (r"a+?", True),         
-        (r"(abc){2,5}?", True),  
-        (r"xyz", False),          
-        (r"[a-z]*", False),       
-        (r".+?", True),          
+        (r"ab*c", False),
+        (r"a+?", True),
+        (r"(abc){2,5}?", True),
+        (r"xyz", False),
+        (r"[a-z]*", False),
+        (r".+?", True),
     ]
     for pattern, expected in patterns:
-        assert has_lazy_quantifier(pattern) == expected, f"Expected {pattern} to have lazy quantifier: {expected}"
+        assert has_lazy_quantifier(pattern) == expected, (
+            f"Expected {pattern} to have lazy quantifier: {expected}"
+        )
 
 
 def test_correct_carret_position():
@@ -49,53 +56,49 @@ def test_correct_carret_position():
     # Test cases with expected results
     test_cases = [
         # Basic cases
-        (r"^abc", True),          # Start of regex
-        (r"abc", True),           # No caret
-        (r"abc^", False),         # Invalid position at end
-        
+        (r"^abc", True),  # Start of regex
+        (r"abc", True),  # No caret
+        (r"abc^", False),  # Invalid position at end
         # Capturing group cases
-        (r"(^abc)", False),        # Start of capturing group
-        (r"(|^)", True),          # Alternative with caret
-        (r"(abc|^def)", False),   # Caret in middle of alternative
-        (r"(|^)", True),          # Simple alternative with caret
-        (r"(\n|^)", True),        # Newline alternative
-        (r"abc(\n|^)", False),     # Not at start of regex
-        (r"(\r|^)", True),        # Carriage return alternative
-        (r"(\r\n|^)", True),      # CRLF alternative
-        (r"(\n\r|^)", True),      # CRLF alternative
-        (r"(  |^)", True),        # Spaces before alternative
-        
+        (r"(^abc)", False),  # Start of capturing group
+        (r"(|^)", True),  # Alternative with caret
+        (r"(abc|^def)", False),  # Caret in middle of alternative
+        (r"(|^)", True),  # Simple alternative with caret
+        (r"(\n|^)", True),  # Newline alternative
+        (r"abc(\n|^)", False),  # Not at start of regex
+        (r"(\r|^)", True),  # Carriage return alternative
+        (r"(\r\n|^)", True),  # CRLF alternative
+        (r"(\n\r|^)", True),  # CRLF alternative
+        (r"(  |^)", True),  # Spaces before alternative
         # Character class cases
-        (r"[^abc]", True),        # Simple negated character class
+        (r"[^abc]", True),  # Simple negated character class
         (r"abc[^xyz]def", True),  # Negated character class in middle
-        (r"[abc^]", False),       # Caret not at start of character class
-        (r"[[^]]", True),         # Nested character class
-        (r"[^]", True),           # Empty negated character class
-        
+        (r"[abc^]", False),  # Caret not at start of character class
+        (r"[[^]]", True),  # Nested character class
+        (r"[^]", True),  # Empty negated character class
         # Multiple caret cases
-        (r"^abc[^xyz]", True),    # Valid multiple carets
-        (r"^abc^", False),        # Invalid multiple carets
+        (r"^abc[^xyz]", True),  # Valid multiple carets
+        (r"^abc^", False),  # Invalid multiple carets
         (r"[^abc][^xyz]", True),  # Multiple negated character classes
-        
         # Edge cases
-        (r"", True),              # Empty string
-        (r"^", True),             # Just caret
-        (r"[]^]", False),         # Invalid character class
-        (r"(^)|^", False),        # Multiple start anchors
+        (r"", True),  # Empty string
+        (r"^", True),  # Just caret
+        (r"[]^]", False),  # Invalid character class
+        (r"(^)|^", False),  # Multiple start anchors
         (r"(^abc|^def)", False),  # Multiple start anchors in group
-        
         # Complex cases
-        (r"(|^)abc[^xyz]123", True),     # Combination of valid cases
-        (r"^abc[^xyz](|^)def", False),   # Invalid multiple start anchors
-        (r"[^abc]^[^xyz]", False),       # Invalid caret between character classes
-        (r"(  \r\n  |^)abc", True),      # Complex whitespace before alternative
-
+        (r"(|^)abc[^xyz]123", True),  # Combination of valid cases
+        (r"^abc[^xyz](|^)def", False),  # Invalid multiple start anchors
+        (r"[^abc]^[^xyz]", False),  # Invalid caret between character classes
+        (r"(  \r\n  |^)abc", True),  # Complex whitespace before alternative
         # Escaped caret cases
         (r"abc\^", True),
         (r"abc\^def", True),
     ]
     for regex, expected in test_cases:
-        assert correct_carret_position(regex) == expected, f"Expected {regex} to have correct caret position: {expected}"
+        assert correct_carret_position(regex) == expected, (
+            f"Expected {regex} to have correct caret position: {expected}"
+        )
 
 
 def test_check_zkregex_rules_basic():
@@ -109,7 +112,6 @@ def test_check_zkregex_rules_basic():
         (r"abc$def", (True, True)),  # Valid dollar sign in middle
         (r"abc", (True, True)),  # No dollar sign
         (r"$abc", (True, True)),  # Dollar sign at start
-        
         # 2. Caret position tests
         (r"^abc", (True, True)),  # Valid caret at start
         (r"(|^)abc", (True, True)),  # Valid caret in alternative
@@ -117,40 +119,40 @@ def test_check_zkregex_rules_basic():
         (r"[^abc]", (True, True)),  # Valid caret in character class
         (r"abc^", (False, True)),  # Invalid caret at end
         (r"abc^def", (False, True)),  # Invalid caret in middle
-        
         # 3. Lazy quantifier tests
         (r"abc*", (True, True)),  # Valid greedy quantifier
         (r"abc*?", (False, True)),  # Invalid lazy star quantifier
         (r"abc+?", (False, True)),  # Invalid lazy plus quantifier
         (r"abc??", (False, True)),  # Invalid lazy question mark quantifier
         (r"abc{1,2}?", (False, True)),  # Invalid lazy range quantifier
-        
         # 4. Combined valid cases
         (r"^abc$", (True, True)),  # Valid start and end anchors
         (r"(|^)abc$", (True, True)),  # Valid alternative and end anchor
         (r"[^abc].*$", (True, True)),  # Valid character class and end anchor
-        
         # 5. Combined invalid cases
         (r"^abc$def", (True, True)),  # Valid dollar position with caret
         (r"abc^def$", (False, True)),  # Invalid caret with dollar
         (r"[^abc]*?$", (False, True)),  # Invalid lazy quantifier with valid anchors
-        
         # 6. Complex cases
         (r"(|^)abc[^xyz]*$", (True, True)),  # Complex valid regex
         (r"^abc[^xyz]+def$", (True, True)),  # Complex valid regex with quantifiers
-        (r"(|^)abc*?[^xyz]$", (False, True)),  # Complex invalid regex with lazy quantifier
+        (
+            r"(|^)abc*?[^xyz]$",
+            (False, True),
+        ),  # Complex invalid regex with lazy quantifier
         (r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$", (True, True)),
-
         # 7. The common regexes from zkemail
         (r">[^<>]+<.*", (True, True)),
         (r"(\r\n|^)to:[^\r\n]+\r\n", (True, True)),
         (r"(\r\n|^)subject:[^\r\n]+\r\n", (True, True)),
-        #(r"[A-Za-z0-9!#$%&'*+=?\-\^_`{|}~.\/]+@[A-Za-z0-9.\-@]+", (True, True)),
-        #(r"[A-Za-z0-9!#$%&'*+=?\-\^_`{|}~.\/@]+@[A-Za-z0-9.\-]+", (True, True)),
+        # (r"[A-Za-z0-9!#$%&'*+=?\-\^_`{|}~.\/]+@[A-Za-z0-9.\-@]+", (True, True)),
+        # (r"[A-Za-z0-9!#$%&'*+=?\-\^_`{|}~.\/@]+@[A-Za-z0-9.\-]+", (True, True)),
         (r"(\r\n|^)from:[^\r\n]+\r\n", (True, True)),
         (r"(\r\n|^)dkim-signature:([a-z]+=[^;]+; )+bh=[a-zA-Z0-9+/=]+;", (True, True)),
         (r"(\r\n|^)dkim-signature:([a-z]+=[^;]+; )+t=[0-9]+;", (True, True)),
         (r"(\r\n|^)message-id:<[A-Za-z0-9=@\.\+_-]+>\r\n", (True, True)),
     ]
     for regex, expected in test_cases:
-        assert check_zkregex_rules_basic(regex) == expected, f"Expected {regex} to have correct zk-regex rules: {expected}"
+        assert check_zkregex_rules_basic(regex) == expected, (
+            f"Expected {regex} to have correct zk-regex rules: {expected}"
+        )

From 042ff397dc0ea0f17ac3225ad308aa4a52cd00e1 Mon Sep 17 00:00:00 2001
From: Stefanos Chaliasos <stefanos@chaliasos.com>
Date: Thu, 6 Mar 2025 15:09:32 +0200
Subject: [PATCH 10/12] Fix DFA issue (wip)

---
 pyproject.toml              |  2 +-
 src/zkregex_fuzzer/dfa.py   | 11 +++++++++--
 src/zkregex_fuzzer/utils.py | 13 +++++++++++++
 tests/test_dfa.py           | 29 +++++++++++++++++++++++++++--
 4 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index dee51e1..e39167e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,7 +23,7 @@ dependencies = [
   "rstr",
   "exrex",
   "joblib",
-  "automata-lib",
+  #"automata-lib",
 ]
 
 [project.optional-dependencies]
diff --git a/src/zkregex_fuzzer/dfa.py b/src/zkregex_fuzzer/dfa.py
index a5ebe59..899da44 100644
--- a/src/zkregex_fuzzer/dfa.py
+++ b/src/zkregex_fuzzer/dfa.py
@@ -11,15 +11,22 @@
 
 from automata.fa.dfa import DFA
 from automata.fa.gnfa import GNFA
-from automata.fa.nfa import NFA
+from automata.fa.nfa import NFA, RESERVED_CHARACTERS
+
+
+from zkregex_fuzzer.utils import ASCII_CHARS
 
 
 def regex_to_dfa(regex: str) -> DFA:
     """
     Convert a regex to a DFA.
     """
+    # Symbols should include at least all ASCII characters
+    symbols = ASCII_CHARS
+    symbols = symbols - RESERVED_CHARACTERS
+
     try:
-        nfa = NFA.from_regex(regex)
+        nfa = NFA.from_regex(regex, input_symbols=symbols)
     except Exception as e:
         raise ValueError(f"Failed to parse '{regex}' into an automaton: {e}")
     try:
diff --git a/src/zkregex_fuzzer/utils.py b/src/zkregex_fuzzer/utils.py
index 01fbdb1..0b97826 100644
--- a/src/zkregex_fuzzer/utils.py
+++ b/src/zkregex_fuzzer/utils.py
@@ -11,6 +11,19 @@
 from zkregex_fuzzer.dfa import wrapped_has_one_accepting_state_regex
 
 
+def create_range(start_char: str, end_char: str) -> set[str]:
+    """
+    Create a set of characters from start_char to end_char.
+    """
+    return {chr(i) for i in range(ord(start_char), ord(end_char) + 1)}
+
+
+LATIN_EXT_CHARS = create_range("¡", "ƿ")
+GREEK_CHARS = create_range("Ͱ", "Ͽ")
+CYRILLIC_CHARS = create_range("Ѐ", "ӿ")
+ASCII_CHARS = set(string.printable)
+
+
 def is_valid_regex(regex: str) -> bool:
     """
     Check if a regex is valid.
diff --git a/tests/test_dfa.py b/tests/test_dfa.py
index 89df890..68665e5 100644
--- a/tests/test_dfa.py
+++ b/tests/test_dfa.py
@@ -8,7 +8,7 @@
     transform_dfa_to_regex,
     transform_dfa_to_single_accepting_state,
 )
-
+import re
 regex_with_multiple_accepting_states = [
     r"(ab|aba)",
     r"(ab|aba)*",
@@ -27,7 +27,7 @@
     r"(hello)",
     r"(ab)*",
     r"(a|b|c)*",
-    r"((a|b|c)*abc)",  # This is somewhat comples, do we want to support this?
+    r"((a|b|c)*abc)",  # This is somewhat complex, do we want to support this?
     r"[a-zA-Z]+",
     r"[0-9]+",
     r"(abc|abcd|abcde)f",
@@ -39,6 +39,17 @@
     r"abc",
     r"(hello)",
 ]
+zkemail_regexes = [
+    ">[^<>]+<.*",
+    r"to:[^\r\n]+\r\n",
+    r")subject:[^\r\n]+\r\n",
+    r"[A-Za-z0-9!#$%&'*+=?\-\^_`{|}~./]+@[A-Za-z0-9.\-@]+",
+    r"dkim-signature:([a-z]+=[^;]+; )+bh=[a-zA-Z0-9+/=]+;",
+    r"[A-Za-z0-9!#$%&'*+=?\-\^_`{|}~./@]+@[A-Za-z0-9.\-]+",
+    r"from:[^\r\n]+\r\n",
+    r"dkim-signature:([a-z]+=[^;]+; )+t=[0-9]+;",
+    r"message-id:<[A-Za-z0-9=@\.\+_-]+>\r\n",
+]
 
 
 def test_has_multiple_accepting_states_regex_without_multiple():
@@ -115,3 +126,17 @@ def test_dfa_string_matching():
                 break
         if regex not in single_solution_regexes:
             assert string != string2
+
+
+def test_dfa_string_matching_zkemail():
+    for regex in zkemail_regexes:
+        string = dfa_string_matching(regex)
+        print()
+        print("--------------------------------")
+        print(f"Testing regex: {regex}" )
+        print(f"String: {string}")
+        print("--------------------------------")
+        print()
+        assert string is not None
+        # we also need to check against re module
+        assert re.match(regex, string) is not None

From d3b2d6a0f9ff1c9c94c1d8e3f5a5e2daa1a7ba69 Mon Sep 17 00:00:00 2001
From: Stefanos Chaliasos <stefanos@chaliasos.com>
Date: Fri, 7 Mar 2025 13:08:56 +0200
Subject: [PATCH 11/12] Various fixes and clean

---
 src/zkregex_fuzzer/chars.py   |  15 +
 src/zkregex_fuzzer/configs.py |   8 +-
 src/zkregex_fuzzer/dfa.py     | 520 ++++++++++++++++------------------
 src/zkregex_fuzzer/utils.py   |  13 -
 src/zkregex_fuzzer/vinpgen.py |  14 +
 tests/test_dfa.py             |  33 +--
 tests/test_utils.py           |   5 +-
 7 files changed, 297 insertions(+), 311 deletions(-)
 create mode 100644 src/zkregex_fuzzer/chars.py

diff --git a/src/zkregex_fuzzer/chars.py b/src/zkregex_fuzzer/chars.py
new file mode 100644
index 0000000..fe097a8
--- /dev/null
+++ b/src/zkregex_fuzzer/chars.py
@@ -0,0 +1,15 @@
+import string
+
+
+def create_range(start_char: str, end_char: str) -> set[str]:
+    """
+    Create a set of characters from start_char to end_char.
+    """
+    return {chr(i) for i in range(ord(start_char), ord(end_char) + 1)}
+
+
+LATIN_EXT_CHARS = create_range("¡", "ƿ")
+GREEK_CHARS = create_range("Ͱ", "Ͽ")
+CYRILLIC_CHARS = create_range("Ѐ", "ӿ")
+ASCII_CHARS = set(string.printable)
+ALL_CHARS = ASCII_CHARS.union(LATIN_EXT_CHARS).union(GREEK_CHARS).union(CYRILLIC_CHARS)
diff --git a/src/zkregex_fuzzer/configs.py b/src/zkregex_fuzzer/configs.py
index 9a462fd..bf9e27a 100644
--- a/src/zkregex_fuzzer/configs.py
+++ b/src/zkregex_fuzzer/configs.py
@@ -5,7 +5,12 @@
     GrammarRegexGenerator,
 )
 from zkregex_fuzzer.runner import CircomRunner, NoirRunner, PythonReRunner
-from zkregex_fuzzer.vinpgen import ExrexGenerator, GrammarBasedGenerator, RstrGenerator
+from zkregex_fuzzer.vinpgen import (
+    DFAWalkerGenerator,
+    ExrexGenerator,
+    GrammarBasedGenerator,
+    RstrGenerator,
+)
 
 TARGETS = {
     "circom": CircomRunner,
@@ -21,6 +26,7 @@
     "grammar": GrammarBasedGenerator,
     "rstr": RstrGenerator,
     "exrex": ExrexGenerator,
+    "dfa": DFAWalkerGenerator,
 }
 
 GENERATORS = {
diff --git a/src/zkregex_fuzzer/dfa.py b/src/zkregex_fuzzer/dfa.py
index 899da44..be7559d 100644
--- a/src/zkregex_fuzzer/dfa.py
+++ b/src/zkregex_fuzzer/dfa.py
@@ -11,19 +11,26 @@
 
 from automata.fa.dfa import DFA
 from automata.fa.gnfa import GNFA
-from automata.fa.nfa import NFA, RESERVED_CHARACTERS
+from automata.fa.nfa import NFA
 
+from zkregex_fuzzer.chars import ASCII_CHARS
 
-from zkregex_fuzzer.utils import ASCII_CHARS
+
+def get_supported_symbols() -> set[str]:
+    """
+    Get the set of symbols that are supported by the regex engine.
+    """
+    # TODO make this configurable
+    # Symbols should include at least all ASCII characters
+    return ASCII_CHARS
 
 
 def regex_to_dfa(regex: str) -> DFA:
     """
     Convert a regex to a DFA.
     """
-    # Symbols should include at least all ASCII characters
-    symbols = ASCII_CHARS
-    symbols = symbols - RESERVED_CHARACTERS
+    symbols = get_supported_symbols()
+    regex = unwrap_regex(regex)
 
     try:
         nfa = NFA.from_regex(regex, input_symbols=symbols)
@@ -59,14 +66,9 @@ def has_one_accepting_state_regex(regex: str) -> bool:
     return len(dfa.final_states) == 1
 
 
-def wrapped_has_one_accepting_state_regex(regex: str) -> bool:
+def unwrap_regex(regex: str) -> str:
     """
-    Returns True if converting the given regex to a DFA yields
-    exactly one accepting (final) state. Returns False otherwise.
-
-    NOTE:
-      - As the automata-lib does not support starting with '^' and ending with '$',
-      we just remove them from the regex and check if the rest of the regex has one accepting state.
+    Unwrap a regex by removing the start and end anchors.
     """
     if regex.startswith("^"):
         regex = regex[1:]
@@ -74,11 +76,25 @@ def wrapped_has_one_accepting_state_regex(regex: str) -> bool:
     elif regex.startswith("(|^)"):
         regex = regex[4:]
     # Cases like '(\r\n|^)...', '(\r|^)...', '(\n|^)...'
+    elif bool(re.match(r"^\([\r\n]*\|\^\).*", regex)):
+        regex = regex[regex.find("^") + 2 :]
     elif bool(re.match(r"^\([\\r\\n]*\|\^\).*", regex)):
         regex = regex[regex.find("^") + 2 :]
     if regex.endswith("$"):
         regex = regex[:-1]
-    return has_one_accepting_state_regex(regex)
+    return regex.replace("\n", r"\n").replace("\r", r"\r")
+
+
+def wrapped_has_one_accepting_state_regex(regex: str) -> bool:
+    """
+    Returns True if converting the given regex to a DFA yields
+    exactly one accepting (final) state. Returns False otherwise.
+
+    NOTE:
+      - As the automata-lib does not support starting with '^' and ending with '$',
+      we just remove them from the regex and check if the rest of the regex has one accepting state.
+    """
+    return has_one_accepting_state_regex(unwrap_regex(regex))
 
 
 def has_multiple_accepting_states_dfa(dfa: DFA) -> bool:
@@ -100,175 +116,6 @@ def transform_dfa_to_regex(dfa: DFA) -> str:
     return regex
 
 
-def _pick_one_strategy(
-    states: set, alphabet: set, transitions: dict, initial: str, original_finals: set
-) -> DFA:
-    """
-    Choose one of the accepting states as the sole final state.
-    """
-    chosen_final = random.choice(list(original_finals))
-    new_final_states = {chosen_final}
-    # Redirect transitions that pointed to any other final state
-    for state in states:
-        for symbol in alphabet:
-            if (
-                transitions[state].get(symbol) in original_finals
-                and transitions[state][symbol] != chosen_final
-            ):
-                transitions[state][symbol] = chosen_final
-    # Remove other final states if they are no longer needed (unreachable and not initial)
-    for f in list(original_finals):
-        if f != chosen_final and f != initial:
-            states.discard(f)
-            transitions.pop(f, None)
-    # Construct the new DFA
-    return DFA(
-        states=states,
-        input_symbols=alphabet,
-        transitions=transitions,
-        initial_state=initial,
-        final_states=new_final_states,
-        allow_partial=True,
-    )
-
-
-def _new_dummy_strategy(
-    states: set, alphabet: set, transitions: dict, initial: str, original_finals: set
-) -> DFA:
-    """
-    Introduce a new dummy accepting state.
-    """
-    new_final_name = "DummyFinal"
-    # Ensure the new state name is unique
-    while new_final_name in states:
-        new_final_name += "_X"
-    # Add the new state
-    states.add(new_final_name)
-    # Redirect all transitions that lead into any original final state to the new dummy final
-    for state in states:
-        if state == new_final_name:
-            continue
-        for symbol in alphabet:
-            if transitions[state].get(symbol) in original_finals:
-                transitions[state][symbol] = new_final_name
-    # Define the new final state's transitions. We can leave it partial (no outgoing transitions)
-    # or make it a trap for completeness. Here we leave it with no outgoing transitions (partial DFA).
-    transitions[new_final_name] = {}
-    # Remove final status from original finals and drop those states if unreachable (except initial)
-    for f in original_finals:
-        if f != initial:
-            states.discard(f)
-            transitions.pop(f, None)
-    # New final state set contains only the dummy state
-    return DFA(
-        states=states,
-        input_symbols=alphabet,
-        transitions=transitions,
-        initial_state=initial,
-        final_states={new_final_name},
-        allow_partial=True,
-    )
-
-
-def _merge_strategy(
-    states: set, alphabet: set, transitions: dict, initial: str, original_finals: set
-) -> DFA:
-    """
-    Merge all accepting states into one unified state.
-    """
-    merged_name = "MergedFinal"
-    while merged_name in states:
-        merged_name += "_X"
-    # If the initial state is one of the finals, handle carefully by keeping it (to preserve empty-string acceptance)
-    if initial in original_finals:
-        merged_name = (
-            initial  # use initial as the merged final to preserve its identity
-        )
-    # Build the merged state's transition function by combining outgoing transitions of all original finals
-    merged_transitions = {}
-    for symbol in alphabet:
-        destinations = set()
-        for f in original_finals:
-            if f not in transitions:
-                continue
-            dest = transitions[f].get(symbol)
-            # If the destination is one of the original finals, treat it as a self-loop in the merged state
-            if dest in original_finals:
-                destinations.add(merged_name)
-            elif dest is not None:
-                destinations.add(dest)
-        if len(destinations) == 1:
-            # Exactly one possible destination for this symbol
-            merged_transitions[symbol] = destinations.pop()
-        elif len(destinations) > 1:
-            # Conflict: multiple different destinations for the same symbol.
-            # To keep the DFA deterministic, choose one arbitrarily (e.g., the first in the set).
-            merged_transitions[symbol] = next(iter(destinations))
-        # If destinations is empty, no transition defined (partial DFA for that symbol from merged state).
-    # Remove all old final states (except if one is initial, which we are reusing as merged_name)
-    for f in list(original_finals):
-        if f == initial:  # if initial is being used as merged_name, skip removal
-            continue
-        states.discard(f)
-        transitions.pop(f, None)
-    # Add the merged state to the state set
-    states.add(merged_name)
-    # Update transitions: redirect any transition pointing to an old final to point to the merged state
-    for state in list(states):
-        if state == merged_name:
-            continue
-        for symbol in alphabet:
-            if transitions[state].get(symbol) in original_finals:
-                transitions[state][symbol] = merged_name
-    # Set the merged state's transitions as computed
-    transitions[merged_name] = merged_transitions
-    # Define the single new final state
-    return DFA(
-        states=states,
-        input_symbols=alphabet,
-        transitions=transitions,
-        initial_state=initial,
-        final_states={merged_name},
-        allow_partial=True,
-    )
-
-
-def transform_dfa_to_single_accepting_state(dfa: DFA, strategy: str = "random") -> DFA:
-    """
-    Transform a DFA to a single accepting state.
-    """
-    # If there's already one or zero accepting states, no change needed
-    if len(dfa.final_states) <= 1:
-        return dfa
-
-    assert strategy in ["pick_one", "new_dummy", "merge", "random"]
-
-    # Copy components of the DFA for modification
-    states = set(dfa.states)
-    alphabet = set(dfa.input_symbols)
-    transitions = {
-        state: dict(dest_dict)  # copy of inner dict
-        for state, dest_dict in dfa.transitions.items()
-    }
-    initial = dfa.initial_state
-    original_finals = set(dfa.final_states)
-
-    # Randomly choose one of the transformation strategies
-    if strategy == "random":
-        strategy = random.choice(["pick_one", "new_dummy", "merge"])
-
-    if strategy == "pick_one":
-        return _pick_one_strategy(
-            states, alphabet, transitions, initial, original_finals
-        )
-    elif strategy == "new_dummy":
-        return _new_dummy_strategy(
-            states, alphabet, transitions, initial, original_finals
-        )
-    else:
-        return _merge_strategy(states, alphabet, transitions, initial, original_finals)
-
-
 def _get_alphabet(
     use_unicode: bool, num_states: int, min_size: int = 2, max_size: int = 10
 ) -> Set[str]:
@@ -301,18 +148,24 @@ def generate_random_dfa(
     max_depth: int = 5,
     use_unicode: bool = False,
     single_final_state: bool = False,
-    seed: Optional[int] = None,
 ) -> DFA:
     """
     Generate a random DFA with a given seed for reproducibility.
-    """
-    # Seed the random number generator for reproducibility (if seed is given)
-    if seed is not None:
-        random.seed(seed)
-    else:
-        seed = random.randrange(0, 2**32)
-        random.seed(seed)
 
+    Randomly incorporates regex features like character classes, repetition,
+    and fixed string prefixes/suffixes.
+
+    Parameters:
+        max_depth: Maximum number of states in the DFA
+        use_unicode: Whether to use Unicode characters in the alphabet
+        single_final_state: Whether to generate a DFA with exactly one final state
+
+    TODO:
+      - Add regex features
+      - Add support for more complex regex features
+      - Add support for more complex DFA structures
+    """
+    # Original implementation for generating a DFA directly
     num_states = random.randint(1, max_depth)
 
     # Define state names (q0, q1, ..., qN) and the initial state
@@ -426,100 +279,219 @@ def generate_random_dfa(
     return dfa
 
 
+def transform_dfa_to_single_final_state(dfa: DFA) -> DFA:
+    """
+    Convert a DFA with multiple final states to one with a single final state.
+
+    This implementation follows a principled automata theory approach:
+    1. Add a new final state
+    2. Redirect transitions from original final states to this new state
+    3. Make the new final state the only accepting state
+    4. Ensure the DFA is complete
+
+    Returns:
+        A new DFA with exactly one final state
+    """
+    # If the DFA already has a single final state, return it as-is
+    if len(dfa.final_states) == 1:
+        return dfa
+
+    # Create mutable copies of the DFA's components
+    states = set(dfa.states)
+    alphabet = set(dfa.input_symbols)
+    transitions = {}
+    for state in states:
+        transitions[state] = {}
+        for symbol in alphabet:
+            if state in dfa.transitions and symbol in dfa.transitions[state]:
+                transitions[state][symbol] = dfa.transitions[state][symbol]
+
+    initial_state = dfa.initial_state
+    original_finals = set(dfa.final_states)
+
+    # Step 1: Add a new single final state
+    new_final = max(states) + 1
+    states.add(new_final)
+    transitions[new_final] = {}
+
+    # Step 2: Redirect transitions from all existing final states to the new final state
+    for final_state in original_finals:
+        for symbol in alphabet:
+            if symbol in transitions[final_state]:
+                transitions[final_state][symbol] = new_final
+        if len(transitions[final_state]) == 0:
+            transitions[final_state][list(alphabet)[0]] = new_final
+
+    # Step 4: Create the transformed DFA with single final state
+    new_dfa = DFA(
+        states=states,
+        input_symbols=alphabet,
+        transitions=transitions,
+        initial_state=initial_state,
+        final_states={new_final},
+        allow_partial=True,
+    )
+    # Step 5: Minimize the DFA to merge equivalent states
+    # The automata-lib library has a built-in minify method
+    try:
+        minimized_dfa = new_dfa.minify()
+        # check if we can transform the minimized dfa to a regex
+        regex = transform_dfa_to_regex(minimized_dfa)
+        if not regex:
+            raise Exception("Failed to transform minimized DFA to regex")
+        return minimized_dfa
+    except Exception as e:
+        raise Exception(f"DFA minimization failed: {e}")
+
+
 def dfa_string_matching(
     regex: str,
-    max_length: int = 10,
+    wanted_length: int = 50,
+    direct_match: bool = True,
 ) -> str:
     """
     Convert `regex` to a DFA using automata-lib, then randomly generate a string
     that the DFA accepts. Returns a string that the DFA accepts.
-    """
 
-    # Step 1: Convert to NFA or directly to DFA
-    dfa = regex_to_dfa(regex)
-
-    # Step 2: Determine for each state if acceptance is possible from that state
-    # We'll do a BFS backward from each final state to mark reachable states.
-    can_reach_accept = _compute_accept_reachability(dfa)
+    Parameters:
+        regex: The regular expression to match
+        wanted_length: The desired length of the generated string
+        direct_match: If True, only follow paths that lead to accepting states
+    """
+    regex = unwrap_regex(regex)
+    # Some hard limited length that we can't exceed
+    # TODO make this configurable
+    max_length = 500
+    # Convert regex to NFA
+    nfa = NFA.from_regex(regex, input_symbols=get_supported_symbols())
+
+    # Start with the initial state and an empty string
+    current_states = nfa._get_lambda_closures()[nfa.initial_state]
+    result = ""
+
+    # If we start in a final state and regex allows empty string, we might return empty
+    if not current_states.isdisjoint(nfa.final_states) and random.random() < 0.2:
+        return ""
+
+    # If direct_match is True, precompute which states can reach a final state
+    reachable_to_final = None
+    if direct_match:
+        # Compute states that can reach a final state (reverse BFS)
+        reachable_to_final = set()
+        queue = list(nfa.final_states)
+        visited = set(queue)
+
+        # Build reverse transition graph
+        reverse_transitions = {}
+        for state in nfa.states:
+            reverse_transitions[state] = []
+
+        for state in nfa.states:
+            if state in nfa.transitions:
+                for symbol, next_states in nfa.transitions[state].items():
+                    for next_state in next_states:
+                        reverse_transitions[next_state].append((state, symbol))
+
+        # Do BFS from final states
+        while queue:
+            state = queue.pop(0)
+            reachable_to_final.add(state)
+
+            for prev_state, _ in reverse_transitions[state]:
+                if prev_state not in visited:
+                    visited.add(prev_state)
+                    queue.append(prev_state)
+
+    # Maximum number of attempts to find an accepting path
+    max_attempts = 5
+    for attempt in range(max_attempts):
+        current_states = nfa._get_lambda_closures()[nfa.initial_state]
+        result = ""
+
+        # Try to build a matching string by traversing the NFA
+        for _ in range(max_length):
+            # Get all possible transitions from current states
+            possible_moves = []
+            for state in current_states:
+                if state in nfa.transitions:
+                    for symbol, next_states in nfa.transitions[state].items():
+                        if symbol:  # Skip lambda transitions
+                            for next_state in next_states:
+                                # If direct_match is True, only consider moves that can reach a final state
+                                if not direct_match or next_state in reachable_to_final:
+                                    possible_moves.append((symbol, next_state))
+
+            # No more possible moves
+            if not possible_moves:
+                break
+
+            # Choose moves with a bias toward making progress
+            # For longer patterns, we want to avoid getting stuck in loops
+            if len(possible_moves) > 1 and len(result) > wanted_length * 0.7:
+                # In later stages, prioritize moves that might lead to acceptance faster
+                # We'll do this by favoring transitions to states closer to final states
+
+                # Group possible moves by their target state
+                moves_by_state = {}
+                for symbol, next_state in possible_moves:
+                    if next_state not in moves_by_state:
+                        moves_by_state[next_state] = []
+                    moves_by_state[next_state].append(symbol)
+
+                # If we're in a state we've seen before, try to avoid it
+                # Convert states to string representation for hashing
+                current_state_str = "".join(str(s) for s in sorted(current_states))
+                if hasattr(dfa_string_matching, "seen_states"):
+                    if current_state_str in dfa_string_matching.seen_states:
+                        # Try to choose a different path than before
+                        dfa_string_matching.seen_states[current_state_str] += 1
+                    else:
+                        dfa_string_matching.seen_states[current_state_str] = 1
+                else:
+                    dfa_string_matching.seen_states = {current_state_str: 1}
+
+                # Bias towards less-visited transitions
+                weights = []
+                for symbol, next_state in possible_moves:
+                    next_state_str = "".join(
+                        str(s) for s in sorted(nfa._get_lambda_closures()[next_state])
+                    )
+                    visits = dfa_string_matching.seen_states.get(next_state_str, 0)
+                    # Weight inversely to number of visits (add 1 to avoid division by zero)
+                    weights.append(1.0 / (visits + 1))
+
+                # Normalize weights
+                total = sum(weights)
+                if total > 0:
+                    weights = [w / total for w in weights]
+                    symbol, next_state = random.choices(
+                        possible_moves, weights=weights, k=1
+                    )[0]
+                else:
+                    symbol, next_state = random.choice(possible_moves)
+            else:
+                # Standard random choice for early parts of the pattern
+                symbol, next_state = random.choice(possible_moves)
 
-    # Step 3: Do a random walk
-    s = _random_walk_dfa(dfa, can_reach_accept, max_length)
-    if s is None:
-        raise ValueError("Failed to generate a string that the DFA accepts.")
-    return s
+            result += symbol
 
+            # Update current states with the chosen move and its lambda closure
+            current_states = nfa._get_lambda_closures()[next_state]
 
-def _compute_accept_reachability(dfa: DFA) -> dict:
-    """
-    For each state, store whether it's possible to reach a final state.
-    Returns a dict: state -> bool
-    """
-    # Start from final states and do BFS/DFS backwards:
-    # We'll create a graph reversed: from each state, we see where it can come from.
-    reverse_graph = {s: [] for s in dfa.states}
-    for s in dfa.states:
-        for sym, t in dfa.transitions[s].items():
-            reverse_graph[t].append((s, sym))
-
-    can_reach = {s: False for s in dfa.states}
-    # Mark final states as can_reach = True
-    queue = list(dfa.final_states)
-    for f in queue:
-        can_reach[f] = True
-
-    # BFS
-    idx = 0
-    while idx < len(queue):
-        current = queue[idx]
-        idx += 1
-        for prev_state, _symbol in reverse_graph[current]:
-            if not can_reach[prev_state]:
-                can_reach[prev_state] = True
-                queue.append(prev_state)
-
-    return can_reach
-
-
-def _random_walk_dfa(
-    dfa: DFA, can_reach_accept: dict, max_length: int
-) -> Optional[str]:
-    """
-    Start at dfa.initial_state, randomly choose transitions that lead to states
-    from which a final state is reachable, until we reach a final or exceed max_length.
-    Note that max_length is not a hard limit, but rather a wanted length.
-    Return the accepted string or None if we can't produce one.
-    """
-    hard_limit = 100
-    current_state = dfa.initial_state
-    out = []
-    # We'll limit the number of steps to avoid infinite loops
-    for length_counter in range(hard_limit):
-        # If current_state is final, maybe stop or continue?
-        # We'll do a random 50% chance to stop if final, producing a short string.
-        if current_state in dfa.final_states:
-            if length_counter >= max_length or random.random() < 0.5:
-                # 50% chance to end early if final
-                return "".join(out)
-        # gather possible transitions that lead to can_reach_accept state
-        next_options = [
-            (symbol, dest)
-            for symbol, dest in dfa.transitions[current_state].items()
-            if can_reach_accept[dest]
-        ]
+            # If we're in a final state, we might choose to stop
+            if not current_states.isdisjoint(nfa.final_states):
+                if random.random() < 0.3:
+                    break
+                # If we have reached the wanted length, we're more likely to stop
+                if len(result) >= wanted_length and random.random() < 0.9:
+                    break
 
-        if not next_options:
-            # no valid transitions, so if we are final we can stop; else give up
-            if current_state in dfa.final_states:
-                return "".join(out)
-            else:
-                return None
+        # Check if our string is accepted by the NFA
+        if nfa.accepts_input(result):
+            return result
 
-        # choose a random transition
-        symbol, dest = random.choice(next_options)
-        out.append(symbol)
-        current_state = dest
+        # If we failed, we'll try again with a clean slate
+        if hasattr(dfa_string_matching, "seen_states"):
+            delattr(dfa_string_matching, "seen_states")
 
-    # If we are here, we've reached max_length. Accept if the state is final
-    if current_state in dfa.final_states:
-        return "".join(out)
-    return None
+    raise ValueError(f"Failed to generate a string that the NFA accepts: {regex}")
diff --git a/src/zkregex_fuzzer/utils.py b/src/zkregex_fuzzer/utils.py
index 0b97826..01fbdb1 100644
--- a/src/zkregex_fuzzer/utils.py
+++ b/src/zkregex_fuzzer/utils.py
@@ -11,19 +11,6 @@
 from zkregex_fuzzer.dfa import wrapped_has_one_accepting_state_regex
 
 
-def create_range(start_char: str, end_char: str) -> set[str]:
-    """
-    Create a set of characters from start_char to end_char.
-    """
-    return {chr(i) for i in range(ord(start_char), ord(end_char) + 1)}
-
-
-LATIN_EXT_CHARS = create_range("¡", "ƿ")
-GREEK_CHARS = create_range("Ͱ", "Ͽ")
-CYRILLIC_CHARS = create_range("Ѐ", "ӿ")
-ASCII_CHARS = set(string.printable)
-
-
 def is_valid_regex(regex: str) -> bool:
     """
     Check if a regex is valid.
diff --git a/src/zkregex_fuzzer/vinpgen.py b/src/zkregex_fuzzer/vinpgen.py
index 296e9b3..f9ff826 100644
--- a/src/zkregex_fuzzer/vinpgen.py
+++ b/src/zkregex_fuzzer/vinpgen.py
@@ -14,6 +14,7 @@
 import exrex
 import rstr
 
+from zkregex_fuzzer.dfa import dfa_string_matching
 from zkregex_fuzzer.logger import logger
 from zkregex_fuzzer.transformers import regex_to_grammar
 from zkregex_fuzzer.utils import check_if_string_is_valid, grammar_fuzzer, pretty_regex
@@ -130,3 +131,16 @@ def __init__(self, regex: str):
 
     def generate_unsafe(self) -> str:
         return exrex.getone(self.regex)
+
+
+class DFAWalkerGenerator(ValidInputGenerator):
+    """
+    Generate valid inputs for a regex using a DFA walker.
+    """
+
+    def __init__(self, regex: str):
+        super().__init__(regex)
+
+    def generate_unsafe(self) -> str:
+        inp = dfa_string_matching(self.regex)
+        return inp
diff --git a/tests/test_dfa.py b/tests/test_dfa.py
index 68665e5..39c3809 100644
--- a/tests/test_dfa.py
+++ b/tests/test_dfa.py
@@ -6,9 +6,10 @@
     has_multiple_accepting_states_regex,
     regex_to_dfa,
     transform_dfa_to_regex,
-    transform_dfa_to_single_accepting_state,
+    transform_dfa_to_single_final_state,
 )
 import re
+
 regex_with_multiple_accepting_states = [
     r"(ab|aba)",
     r"(ab|aba)*",
@@ -27,7 +28,7 @@
     r"(hello)",
     r"(ab)*",
     r"(a|b|c)*",
-    r"((a|b|c)*abc)",  # This is somewhat complex, do we want to support this?
+    r"((a|b|c)*abc)",
     r"[a-zA-Z]+",
     r"[0-9]+",
     r"(abc|abcd|abcde)f",
@@ -40,9 +41,9 @@
     r"(hello)",
 ]
 zkemail_regexes = [
-    ">[^<>]+<.*",
+    r">[^<>]+<.*",
     r"to:[^\r\n]+\r\n",
-    r")subject:[^\r\n]+\r\n",
+    r"subject:[^\r\n]+\r\n",
     r"[A-Za-z0-9!#$%&'*+=?\-\^_`{|}~./]+@[A-Za-z0-9.\-@]+",
     r"dkim-signature:([a-z]+=[^;]+; )+bh=[a-zA-Z0-9+/=]+;",
     r"[A-Za-z0-9!#$%&'*+=?\-\^_`{|}~./@]+@[A-Za-z0-9.\-]+",
@@ -75,17 +76,13 @@ def test_transform_dfa_to_regex():
 
 
 def test_transform_dfa_to_regex_with_multiple_accepting_states():
-    strategies = ["pick_one", "new_dummy", "merge"]
-    for strategy in strategies:
-        for regex in regex_with_multiple_accepting_states:
-            dfa = regex_to_dfa(regex)
-            transformed_dfa = transform_dfa_to_single_accepting_state(
-                dfa, strategy=strategy
-            )
-            assert len(transformed_dfa.final_states) == 1
-            transformed_regex = transform_dfa_to_regex(transformed_dfa)
-            new_dfa = regex_to_dfa(transformed_regex)
-            assert len(new_dfa.final_states) == 1
+    for regex in regex_with_multiple_accepting_states:
+        dfa = regex_to_dfa(regex)
+        transformed_dfa = transform_dfa_to_single_final_state(dfa)
+        assert len(transformed_dfa.final_states) == 1
+        transformed_regex = transform_dfa_to_regex(transformed_dfa)
+        new_dfa = regex_to_dfa(transformed_regex)
+        assert len(new_dfa.final_states) == 1
 
 
 def test_generate_dfa():
@@ -131,12 +128,6 @@ def test_dfa_string_matching():
 def test_dfa_string_matching_zkemail():
     for regex in zkemail_regexes:
         string = dfa_string_matching(regex)
-        print()
-        print("--------------------------------")
-        print(f"Testing regex: {regex}" )
-        print(f"String: {string}")
-        print("--------------------------------")
-        print()
         assert string is not None
         # we also need to check against re module
         assert re.match(regex, string) is not None
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 22c9673..29ffff3 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -140,13 +140,14 @@ def test_check_zkregex_rules_basic():
             r"(|^)abc*?[^xyz]$",
             (False, True),
         ),  # Complex invalid regex with lazy quantifier
+        (r"[a-zA-Z0-9._%+-]+", (True, True)),
         (r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$", (True, True)),
         # 7. The common regexes from zkemail
         (r">[^<>]+<.*", (True, True)),
         (r"(\r\n|^)to:[^\r\n]+\r\n", (True, True)),
         (r"(\r\n|^)subject:[^\r\n]+\r\n", (True, True)),
-        # (r"[A-Za-z0-9!#$%&'*+=?\-\^_`{|}~.\/]+@[A-Za-z0-9.\-@]+", (True, True)),
-        # (r"[A-Za-z0-9!#$%&'*+=?\-\^_`{|}~.\/@]+@[A-Za-z0-9.\-]+", (True, True)),
+        (r"[A-Za-z0-9!#$%&'*+=?\-\^_`{|}~.\/]+@[A-Za-z0-9.\-@]+", (True, True)),
+        (r"[A-Za-z0-9!#$%&'*+=?\-\^_`{|}~.\/@]+@[A-Za-z0-9.\-]+", (True, True)),
         (r"(\r\n|^)from:[^\r\n]+\r\n", (True, True)),
         (r"(\r\n|^)dkim-signature:([a-z]+=[^;]+; )+bh=[a-zA-Z0-9+/=]+;", (True, True)),
         (r"(\r\n|^)dkim-signature:([a-z]+=[^;]+; )+t=[0-9]+;", (True, True)),

From 00f4126b8d713f92c099b816a6946875e5a80d73 Mon Sep 17 00:00:00 2001
From: Stefanos Chaliasos <stefanos@chaliasos.com>
Date: Fri, 7 Mar 2025 13:11:55 +0200
Subject: [PATCH 12/12] Temporarily use a fork of automata-lib

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index e39167e..8783671 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,7 @@ dependencies = [
   "exrex",
   "joblib",
   #"automata-lib",
+  "automata-lib @ git+https://github.com/StefanosChaliasos/automata.git@add-support-for-charclass"
 ]
 
 [project.optional-dependencies]