-
| Hello, I am a regex noob, but I’d like to have a function that returns all matches from a DFA regex (because I have lookahead in some regexes). From the documentation about the  use regex_automata::{
    Input, MatchKind,
    hybrid::dfa::{DFA, OverlappingState},
}; // 0.4.9
const EMAIL_REGEX: &str = r"[a-zA-Z0-9-_.]*[a-zA-Z0-9_](@[a-zA-Z0-9_]+)(\.[a-zA-Z0-9_]+(\.[a-zA-Z0-9_]+)?[a-zA-Z])(?-u:\b)";
const SHA_REGEX: &str = r"(?-u:\b)[0-9a-fA-F]{7,28}(?-u:\b)";
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct Match {
    pattern_id: usize,
    offset_start: usize,
    offset_end: usize,
}
fn main() {
    let haystack = "email@deadbeef.com";
    assert_eq!(
        &find_all_matches(haystack),
        &[
            Match {
                pattern_id: 0,
                offset_start: 0,
                offset_end: 18,
            },
            Match {
                pattern_id: 1,
                offset_start: 6,
                offset_end: 14,
            }
        ]
    )
}
fn find_all_matches(haystack: &str) -> Vec<Match> {
    let dfa = DFA::builder()
        .configure(DFA::config().match_kind(MatchKind::All))
        .build_many(&[EMAIL_REGEX, SHA_REGEX])
        .unwrap();
    let mut cache = dfa.create_cache();
    let mut state = OverlappingState::start();
    let mut matches = Vec::<Match>::new();
    loop {
        if dfa
            .try_search_overlapping_fwd(&mut cache, &Input::new(haystack), &mut state)
            .is_err()
        {
            break;
        }
        let Some(half_match) = state.get_match() else {
            break;
        };
        // half_match reports the end of the first match
        todo!("Combine this half match with a reverse search to complete the match")
    }
    matches
}But now that I successfully have a  
 It is a bit hand-holding I know, but I really can’t figure out how to close the loop there. Thanks for the library anyways, Gerry | 
Beta Was this translation helpful? Give feedback.
Replies: 2 comments 5 replies
-
| Does this code snippet and comment help? I think it's what you're trying to do: regex/regex-automata/tests/dfa/suite.rs Lines 393 to 443 in 1a069b9 | 
Beta Was this translation helpful? Give feedback.
-
| So, yes! That was exactly what I was looking for! But! https://play.rust-lang.org/?version=stable&mode=debug&edition=2024&gist=621360e40dfdea8605f610ea4d7b06c4 use regex_automata::{
    Anchored, Input, MatchKind,
    hybrid::dfa::{DFA, OverlappingState},
    hybrid::regex::{Cache, Regex},
}; // 0.4.9
const EMAIL_REGEX: &str = r"[a-zA-Z0-9-_.]*[a-zA-Z0-9_](@[a-zA-Z0-9_]+)(\.[a-zA-Z0-9_]+(\.[a-zA-Z0-9_]+)?[a-zA-Z])(?-u:\b)";
const SHA_REGEX: &str = r"(?-u:\b)[0-9a-fA-F]{7,28}(?-u:\b)";
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct Match {
    pattern_id: usize,
    offset_start: usize,
    offset_end: usize,
}
fn main() {
    let haystack = "email@deadbeef.com";
    assert_eq!(
        &find_all_matches(haystack),
        &[
            Match {
                pattern_id: 0,
                offset_start: 0,
                offset_end: 18,
            },
            Match {
                pattern_id: 1,
                offset_start: 6,
                offset_end: 14,
            }
        ]
    )
}
fn find_all_matches(haystack: &str) -> Vec<Match> {
    let re = Regex::new_many(&[EMAIL_REGEX, SHA_REGEX]).unwrap();
    let mut cache = re.create_cache();
    try_search_overlapping(&re, &mut cache, &Input::new(haystack))
}
fn try_search_overlapping(re: &Regex, cache: &mut Cache, input: &Input<'_>) -> Vec<Match> {
    let mut matches = vec![];
    let mut fwd_state = OverlappingState::start();
    let (fwd_dfa, rev_dfa) = (re.forward(), re.reverse());
    let (fwd_cache, rev_cache) = cache.as_parts_mut();
    while let Some(end) = {
        fwd_dfa.try_search_overlapping_fwd(fwd_cache, input, &mut fwd_state).unwrap();
        fwd_state.get_match()
    } {
        let revsearch = input
            .clone()
            .range(input.start()..end.offset())
            .anchored(Anchored::Pattern(end.pattern()))
            .earliest(false);
        let mut rev_state = OverlappingState::start();
        while let Some(start) = {
            rev_dfa.try_search_overlapping_rev(rev_cache, &revsearch, &mut rev_state).unwrap();
            rev_state.get_match()
        } {
            let mat = Match {
                pattern_id: end.pattern().as_usize(),
                offset_start: start.offset(),
                offset_end: end.offset(),
            };
            matches.push(mat);
        }
    }
    matches
}leads to  At least I’m a lot closer now, so anyway I can try to debug myself out a bit | 
Beta Was this translation helpful? Give feedback.
Yeah I just figured that out!! Thanks
Final code to mark the answer: