From 8cb417808933ca73efdba846b276e27fe5dabe56 Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Tue, 11 Nov 2025 13:08:58 +0400 Subject: [PATCH 1/9] Don't use old method in tests --- src/blocker.rs | 8 ++++++ src/engine.rs | 5 ++++ src/filters/network.rs | 36 +++++++++----------------- src/filters/network_matchers.rs | 35 ------------------------- tests/legacy_harness.rs | 35 +++++++++++++------------ tests/matching.rs | 5 ++-- tests/unit/filters/network_matchers.rs | 33 +++++++++++++++++++++-- tests/unit/optimizer.rs | 12 +++------ 8 files changed, 80 insertions(+), 89 deletions(-) diff --git a/src/blocker.rs b/src/blocker.rs index 4e062119..de023e3c 100644 --- a/src/blocker.rs +++ b/src/blocker.rs @@ -155,6 +155,14 @@ impl Blocker { .is_some() } + #[doc(hidden)] + pub(crate) fn check_exceptions(&self, request: &Request) -> bool { + let mut regex_manager = self.borrow_regex_manager(); + self.exceptions() + .check(request, &HashSet::new(), &mut regex_manager) + .is_some() + } + pub fn check_parameterised( &self, request: &Request, diff --git a/src/engine.rs b/src/engine.rs index 47962030..ff25cd42 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -139,6 +139,11 @@ impl Engine { self.blocker.check(request, &self.resources) } + #[doc(hidden)] + pub(crate) fn check_network_request_exceptions(&self, request: &Request) -> bool { + self.blocker.check_exceptions(request) + } + pub fn check_network_request_subset( &self, request: &Request, diff --git a/src/filters/network.rs b/src/filters/network.rs index 043b7825..7f087655 100644 --- a/src/filters/network.rs +++ b/src/filters/network.rs @@ -969,6 +969,18 @@ impl NetworkFilter { FilterTokens::Other(t) } } + + #[doc(hidden)] + pub fn matches_test(&self, request: &request::Request) -> bool { + let filter_set = crate::FilterSet::new_with_rules(vec![self.clone()], vec![], true); + let engine = crate::Engine::from_filter_set(filter_set, true); + + if self.is_exception() { + engine.check_network_request_exceptions(request) + } else { + engine.check_network_request(request).matched + } + } } impl NetworkFilterMaskHelper for NetworkFilter { @@ -993,30 +1005,6 @@ pub trait NetworkMatchable { fn matches_test(&self, request: &request::Request) -> bool; } -impl NetworkMatchable for NetworkFilter { - fn matches(&self, request: &request::Request, regex_manager: &mut RegexManager) -> bool { - use crate::filters::network_matchers::{ - check_excluded_domains, check_included_domains, check_options, check_pattern, - }; - check_options(self.mask, request) - && check_included_domains(self.opt_domains.as_deref(), request) - && check_excluded_domains(self.opt_not_domains.as_deref(), request) - && check_pattern( - self.mask, - self.filter.iter(), - self.hostname.as_deref(), - (self as *const NetworkFilter) as u64, - request, - regex_manager, - ) - } - - #[cfg(test)] - fn matches_test(&self, request: &request::Request) -> bool { - self.matches(request, &mut RegexManager::default()) - } -} - // --------------------------------------------------------------------------- // Filter parsing // --------------------------------------------------------------------------- diff --git a/src/filters/network_matchers.rs b/src/filters/network_matchers.rs index 476cc0df..01eefa47 100644 --- a/src/filters/network_matchers.rs +++ b/src/filters/network_matchers.rs @@ -414,22 +414,6 @@ pub fn check_options(mask: NetworkFilterMask, request: &request::Request) -> boo true } -#[inline] -pub fn check_included_domains(opt_domains: Option<&[Hash]>, request: &request::Request) -> bool { - // Source URL must be among these domains to match - if let Some(included_domains) = opt_domains.as_ref() { - if let Some(source_hashes) = request.source_hostname_hashes.as_ref() { - if source_hashes - .iter() - .all(|h| !utils::bin_lookup(included_domains, *h)) - { - return false; - } - } - } - true -} - #[inline] pub fn check_included_domains_mapped( opt_domains: Option<&[u32]>, @@ -451,25 +435,6 @@ pub fn check_included_domains_mapped( true } -#[inline] -pub fn check_excluded_domains( - opt_not_domains: Option<&[Hash]>, - request: &request::Request, -) -> bool { - if let Some(excluded_domains) = opt_not_domains.as_ref() { - if let Some(source_hashes) = request.source_hostname_hashes.as_ref() { - if source_hashes - .iter() - .any(|h| utils::bin_lookup(excluded_domains, *h)) - { - return false; - } - } - } - - true -} - #[inline] pub fn check_excluded_domains_mapped( opt_not_domains: Option<&[u32]>, diff --git a/tests/legacy_harness.rs b/tests/legacy_harness.rs index dc5d8a4e..c8b959bc 100644 --- a/tests/legacy_harness.rs +++ b/tests/legacy_harness.rs @@ -1,9 +1,8 @@ mod legacy_test_filters { use adblock::filters::network::NetworkFilter; use adblock::filters::network::NetworkFilterMask; - use adblock::filters::network::NetworkMatchable; - use adblock::regex_manager::RegexManager; use adblock::request::Request; + use adblock::Engine; fn test_filter<'a>( raw_filter: &str, @@ -35,12 +34,15 @@ mod legacy_test_filters { filter.filter ); + let engine = Engine::from_rules_debug(&[raw_filter], Default::default()); + for to_block in blocked { assert!( - filter.matches( - &Request::new(to_block, "https://example.com", "other").unwrap(), - &mut RegexManager::default() - ), + engine + .check_network_request( + &Request::new(to_block, "https://example.com", "other").unwrap(), + ) + .matched, "Expected filter {} to match {}", raw_filter, &to_block @@ -49,10 +51,11 @@ mod legacy_test_filters { for to_pass in not_blocked { assert!( - !filter.matches( - &Request::new(to_pass, "https://example.com", "other").unwrap(), - &mut RegexManager::default() - ), + !engine + .check_network_request( + &Request::new(to_pass, "https://example.com", "other").unwrap(), + ) + .matched, "Expected filter {} to pass {}", raw_filter, &to_pass @@ -302,14 +305,12 @@ mod legacy_test_filters { ); // explicit, separate testcase construction of the "script" option as it is not the deafult - let filter = NetworkFilter::parse( - "||googlesyndication.com/safeframe/$third-party,script", - true, - Default::default(), - ) - .unwrap(); let request = Request::new("http://tpc.googlesyndication.com/safeframe/1-0-2/html/container.html#xpc=sf-gdn-exp-2&p=http%3A//slashdot.org;", "https://this-is-always-third-party.com", "script").unwrap(); - assert!(filter.matches(&request, &mut RegexManager::default())); + let engine = Engine::from_rules_debug( + &["||googlesyndication.com/safeframe/$third-party,script"], + Default::default(), + ); + assert!(engine.check_network_request(&request).matched); } } diff --git a/tests/matching.rs b/tests/matching.rs index a1a478d7..fc4dc9d9 100644 --- a/tests/matching.rs +++ b/tests/matching.rs @@ -1,5 +1,4 @@ -use adblock::filters::network::{NetworkFilter, NetworkFilterMaskHelper, NetworkMatchable}; -use adblock::regex_manager::RegexManager; +use adblock::filters::network::{NetworkFilter, NetworkFilterMaskHelper}; use adblock::request::Request; use adblock::resources::{MimeType, Resource, ResourceType}; use adblock::Engine; @@ -75,7 +74,7 @@ fn check_filter_matching() { // The dataset has cases where URL is set to just "http://" or "https://", which we do not support if let Ok(request) = request_res { assert!( - network_filter.matches(&request, &mut RegexManager::default()), + network_filter.matches_test(&request), "Expected {} to match {} at {}, typed {}", filter, req.url, diff --git a/tests/unit/filters/network_matchers.rs b/tests/unit/filters/network_matchers.rs index f35cb25a..5b5bfb68 100644 --- a/tests/unit/filters/network_matchers.rs +++ b/tests/unit/filters/network_matchers.rs @@ -350,9 +350,38 @@ mod match_tests { } fn check_options(filter: &NetworkFilter, request: &request::Request) -> bool { + let mut mapping = HashMap::new(); + let opt_domains = filter.opt_domains.clone().map(|domains| { + domains + .iter() + .map(|domain| { + mapping.insert(*domain, *domain as u32); + *domain as u32 + }) + .collect::>() + }); + + let opt_not_domains = filter.opt_not_domains.clone().map(|domains| { + domains + .iter() + .map(|domain| { + mapping.insert(*domain, *domain as u32); + *domain as u32 + }) + .collect::>() + }); + super::super::check_options(filter.mask, request) - && super::super::check_included_domains(filter.opt_domains.as_deref(), request) - && super::super::check_excluded_domains(filter.opt_not_domains.as_deref(), request) + && super::super::check_included_domains_mapped( + opt_domains.as_deref(), + request, + &mapping, + ) + && super::super::check_excluded_domains_mapped( + opt_not_domains.as_deref(), + request, + &mapping, + ) } #[test] diff --git a/tests/unit/optimizer.rs b/tests/unit/optimizer.rs index a7ad2c45..35c1e7a3 100644 --- a/tests/unit/optimizer.rs +++ b/tests/unit/optimizer.rs @@ -3,7 +3,6 @@ mod optimization_tests_pattern_group { #[cfg(test)] mod optimization_tests_pattern_group_tests { use super::*; - use crate::filters::network::NetworkMatchable; use crate::lists; use crate::regex_manager::CompiledRegex; use crate::regex_manager::RegexManager; @@ -19,19 +18,18 @@ mod optimization_tests_pattern_group { } fn check_match( - regex_manager: &mut RegexManager, + _regex_manager: &mut RegexManager, filter: &NetworkFilter, url_path: &str, matches: bool, ) { - let is_match = filter.matches( + let is_match = filter.matches_test( &Request::new( ("https://example.com/".to_string() + url_path).as_str(), "https://google.com", "", ) .unwrap(), - regex_manager, ); assert!( is_match == matches, @@ -325,7 +323,6 @@ mod optimization_tests_pattern_group { } */ use super::super::*; - use crate::filters::network::NetworkMatchable; use crate::lists; use crate::regex_manager::CompiledRegex; use crate::regex_manager::RegexManager; @@ -341,19 +338,18 @@ mod optimization_tests_pattern_group { } fn check_match( - regex_manager: &mut RegexManager, + _regex_manager: &mut RegexManager, filter: &NetworkFilter, url_path: &str, matches: bool, ) { - let is_match = filter.matches( + let is_match = filter.matches_test( &Request::new( ("https://example.com/".to_string() + url_path).as_str(), "https://google.com", "", ) .unwrap(), - regex_manager, ); assert!( is_match == matches, From a16321573b7e75df9c0257f7ad49b6701b581b3b Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Tue, 11 Nov 2025 19:53:32 +0400 Subject: [PATCH 2/9] Remove _regex_manager --- src/blocker.rs | 4 +- src/engine.rs | 7 ++ tests/unit/filters/network_matchers.rs | 30 +++++--- tests/unit/optimizer.rs | 100 ++++++++----------------- tests/unit/regex_manager.rs | 76 +++++++++++-------- 5 files changed, 108 insertions(+), 109 deletions(-) diff --git a/src/blocker.rs b/src/blocker.rs index de023e3c..8b2640a1 100644 --- a/src/blocker.rs +++ b/src/blocker.rs @@ -131,7 +131,7 @@ impl Blocker { } #[cfg(feature = "single-thread")] - fn borrow_regex_manager(&self) -> std::cell::RefMut<'_, RegexManager> { + pub(crate) fn borrow_regex_manager(&self) -> std::cell::RefMut<'_, RegexManager> { #[allow(unused_mut)] let mut manager = self.regex_manager.borrow_mut(); @@ -142,7 +142,7 @@ impl Blocker { } #[cfg(not(feature = "single-thread"))] - fn borrow_regex_manager(&self) -> std::sync::MutexGuard<'_, RegexManager> { + pub(crate) fn borrow_regex_manager(&self) -> std::sync::MutexGuard<'_, RegexManager> { let mut manager = self.regex_manager.lock().unwrap(); manager.update_time(); manager diff --git a/src/engine.rs b/src/engine.rs index ff25cd42..093cc091 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -271,6 +271,13 @@ impl Engine { self.blocker.set_regex_discard_policy(new_discard_policy); } + #[cfg(test)] + pub fn borrow_regex_manager( + &self, + ) -> std::cell::RefMut<'_, crate::regex_manager::RegexManager> { + self.blocker.borrow_regex_manager() + } + #[cfg(feature = "debug-info")] pub fn discard_regex(&mut self, regex_id: u64) { self.blocker.discard_regex(regex_id); diff --git a/tests/unit/filters/network_matchers.rs b/tests/unit/filters/network_matchers.rs index 5b5bfb68..7701bcf2 100644 --- a/tests/unit/filters/network_matchers.rs +++ b/tests/unit/filters/network_matchers.rs @@ -688,22 +688,34 @@ mod match_tests { } #[test] - #[ignore] // Not going to handle lookaround regexes #[cfg(feature = "debug-info")] fn check_lookaround_regex_handled() { { + use crate::Engine; let filter = r#"/^https?:\/\/([0-9a-z\-]+\.)?(9anime|animeland|animenova|animeplus|animetoon|animewow|gamestorrent|goodanime|gogoanime|igg-games|kimcartoon|memecenter|readcomiconline|toonget|toonova|watchcartoononline)\.[a-z]{2,4}\/(?!([Ee]xternal|[Ii]mages|[Ss]cripts|[Uu]ploads|ac|ajax|assets|combined|content|cov|cover|(img\/bg)|(img\/icon)|inc|jwplayer|player|playlist-cat-rss|static|thumbs|wp-content|wp-includes)\/)(.*)/$image,other,script,~third-party,xmlhttprequest,domain=~animeland.hu"#; - let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); - let url = "https://data.foo.com/9VjjrjU9Or2aqkb8PDiqTBnULPgeI48WmYEHkYer"; - let source = "http://123movies.com"; + let engine = Engine::from_rules(vec![filter], Default::default()); + let url = "https://9anime.to/watch/episode-1"; + let source = "https://9anime.to"; let request = request::Request::new(url, source, "script").unwrap(); - let mut regex_manager = RegexManager::default(); - assert!(regex_manager.get_compiled_regex_count() == 0); + assert_eq!( + engine + .get_debug_info() + .regex_debug_info + .compiled_regex_count, + 0 + ); + // Regex can't be compiled, so no match. assert!( - network_filter.matches(&request, &mut regex_manager), - "Expected match for {filter} on {url}" + !engine.check_network_request(&request).matched, + "Expected no match for {filter} on {url}" + ); + assert_eq!( + engine + .get_debug_info() + .regex_debug_info + .compiled_regex_count, + 1 ); - assert!(regex_manager.get_compiled_regex_count() == 1); } } diff --git a/tests/unit/optimizer.rs b/tests/unit/optimizer.rs index 35c1e7a3..8b35eec8 100644 --- a/tests/unit/optimizer.rs +++ b/tests/unit/optimizer.rs @@ -5,7 +5,6 @@ mod optimization_tests_pattern_group { use super::*; use crate::lists; use crate::regex_manager::CompiledRegex; - use crate::regex_manager::RegexManager; use crate::request::Request; use regex::bytes::RegexSetBuilder as BytesRegexSetBuilder; @@ -18,7 +17,6 @@ mod optimization_tests_pattern_group { } fn check_match( - _regex_manager: &mut RegexManager, filter: &NetworkFilter, url_path: &str, matches: bool, @@ -93,38 +91,22 @@ mod optimization_tests_pattern_group { fused.to_string(), "/static/ad- <+> /static/ad. <+> /static/ad/* <+> /static/ads/* <+> /static/adv/*" ); - let mut regex_manager = RegexManager::default(); - check_match(&mut regex_manager, &fused, "/static/ad-", true); - check_match(&mut regex_manager, &fused, "/static/ad.", true); - check_match(&mut regex_manager, &fused, "/static/ad%", false); - check_match(&mut regex_manager, &fused, "/static/ads-", false); - check_match(&mut regex_manager, &fused, "/static/ad/", true); - check_match(&mut regex_manager, &fused, "/static/ad", false); - check_match(&mut regex_manager, &fused, "/static/ad/foobar", true); - check_match( - &mut regex_manager, - &fused, - "/static/ad/foobar/asd?q=1", - true, - ); - check_match(&mut regex_manager, &fused, "/static/ads/", true); - check_match(&mut regex_manager, &fused, "/static/ads", false); - check_match(&mut regex_manager, &fused, "/static/ads/foobar", true); - check_match( - &mut regex_manager, - &fused, - "/static/ads/foobar/asd?q=1", - true, - ); - check_match(&mut regex_manager, &fused, "/static/adv/", true); - check_match(&mut regex_manager, &fused, "/static/adv", false); - check_match(&mut regex_manager, &fused, "/static/adv/foobar", true); - check_match( - &mut regex_manager, - &fused, - "/static/adv/foobar/asd?q=1", - true, - ); + check_match(&fused, "/static/ad-", true); + check_match(&fused, "/static/ad.", true); + check_match(&fused, "/static/ad%", false); + check_match(&fused, "/static/ads-", false); + check_match(&fused, "/static/ad/", true); + check_match(&fused, "/static/ad", false); + check_match(&fused, "/static/ad/foobar", true); + check_match(&fused, "/static/ad/foobar/asd?q=1", true); + check_match(&fused, "/static/ads/", true); + check_match(&fused, "/static/ads", false); + check_match(&fused, "/static/ads/foobar", true); + check_match(&fused, "/static/ads/foobar/asd?q=1", true); + check_match(&fused, "/static/adv/", true); + check_match(&fused, "/static/adv", false); + check_match(&fused, "/static/adv/foobar", true); + check_match(&fused, "/static/adv/foobar/asd?q=1", true); } #[test] @@ -325,7 +307,6 @@ mod optimization_tests_pattern_group { use super::super::*; use crate::lists; use crate::regex_manager::CompiledRegex; - use crate::regex_manager::RegexManager; use crate::request::Request; use regex::bytes::RegexSetBuilder as BytesRegexSetBuilder; @@ -338,7 +319,6 @@ mod optimization_tests_pattern_group { } fn check_match( - _regex_manager: &mut RegexManager, filter: &NetworkFilter, url_path: &str, matches: bool, @@ -413,38 +393,22 @@ mod optimization_tests_pattern_group { fused.to_string(), "/static/ad- <+> /static/ad. <+> /static/ad/* <+> /static/ads/* <+> /static/adv/*" ); - let mut regex_manager = RegexManager::default(); - check_match(&mut regex_manager, &fused, "/static/ad-", true); - check_match(&mut regex_manager, &fused, "/static/ad.", true); - check_match(&mut regex_manager, &fused, "/static/ad%", false); - check_match(&mut regex_manager, &fused, "/static/ads-", false); - check_match(&mut regex_manager, &fused, "/static/ad/", true); - check_match(&mut regex_manager, &fused, "/static/ad", false); - check_match(&mut regex_manager, &fused, "/static/ad/foobar", true); - check_match( - &mut regex_manager, - &fused, - "/static/ad/foobar/asd?q=1", - true, - ); - check_match(&mut regex_manager, &fused, "/static/ads/", true); - check_match(&mut regex_manager, &fused, "/static/ads", false); - check_match(&mut regex_manager, &fused, "/static/ads/foobar", true); - check_match( - &mut regex_manager, - &fused, - "/static/ads/foobar/asd?q=1", - true, - ); - check_match(&mut regex_manager, &fused, "/static/adv/", true); - check_match(&mut regex_manager, &fused, "/static/adv", false); - check_match(&mut regex_manager, &fused, "/static/adv/foobar", true); - check_match( - &mut regex_manager, - &fused, - "/static/adv/foobar/asd?q=1", - true, - ); + check_match(&fused, "/static/ad-", true); + check_match(&fused, "/static/ad.", true); + check_match(&fused, "/static/ad%", false); + check_match(&fused, "/static/ads-", false); + check_match(&fused, "/static/ad/", true); + check_match(&fused, "/static/ad", false); + check_match(&fused, "/static/ad/foobar", true); + check_match(&fused, "/static/ad/foobar/asd?q=1", true); + check_match(&fused, "/static/ads/", true); + check_match(&fused, "/static/ads", false); + check_match(&fused, "/static/ads/foobar", true); + check_match(&fused, "/static/ads/foobar/asd?q=1", true); + check_match(&fused, "/static/adv/", true); + check_match(&fused, "/static/adv", false); + check_match(&fused, "/static/adv/foobar", true); + check_match(&fused, "/static/adv/foobar/asd?q=1", true); } #[test] diff --git a/tests/unit/regex_manager.rs b/tests/unit/regex_manager.rs index fdd9d107..00df2c5e 100644 --- a/tests/unit/regex_manager.rs +++ b/tests/unit/regex_manager.rs @@ -2,13 +2,12 @@ mod tests { use super::super::*; - use crate::filters::network::{NetworkFilter, NetworkMatchable}; - use crate::request; + use crate::{request, Engine}; use mock_instant::global::MockClock; - fn make_filter(line: &str) -> NetworkFilter { - NetworkFilter::parse(line, true, Default::default()).unwrap() + fn make_engine(line: &str) -> Engine { + Engine::from_rules(vec![line], Default::default()) } fn make_request(url: &str) -> request::Request { @@ -25,45 +24,62 @@ mod tests { #[test] fn simple_match() { - let mut regex_manager = RegexManager::default(); - regex_manager.update_time(); + let engine = make_engine("||geo*.hltv.org^"); + assert!( + engine + .check_network_request(&make_request("https://geo2.hltv.org/")) + .matched + ); - let filter = make_filter("||geo*.hltv.org^"); - assert!(filter.matches(&make_request("https://geo2.hltv.org/"), &mut regex_manager)); + let regex_manager = engine.borrow_regex_manager(); assert_eq!(get_active_regex_count(®ex_manager), 1); assert_eq!(regex_manager.get_debug_regex_data().len(), 1); } #[test] fn discard_and_recreate() { - let mut regex_manager = RegexManager::default(); - regex_manager.update_time(); + let engine = make_engine("||geo*.hltv.org^"); - let filter = make_filter("||geo*.hltv.org^"); - assert!(filter.matches(&make_request("https://geo2.hltv.org/"), &mut regex_manager)); - assert_eq!(regex_manager.get_compiled_regex_count(), 1); - assert_eq!(get_active_regex_count(®ex_manager), 1); + assert!( + engine + .check_network_request(&make_request("https://geo2.hltv.org/")) + .matched + ); - MockClock::advance(DEFAULT_DISCARD_UNUSED_TIME - Duration::from_secs(1)); - regex_manager.update_time(); - // The entry shouldn't be discarded because was used during - // last REGEX_MANAGER_DISCARD_TIME. - assert_eq!(get_active_regex_count(®ex_manager), 1); + { + let regex_manager = engine.borrow_regex_manager(); + assert_eq!(regex_manager.get_compiled_regex_count(), 1); + assert_eq!(get_active_regex_count(®ex_manager), 1); + } - // The entry is entry is outdated, but should be discarded only - // in the next cleanup() call. The call was 2 sec ago and is throttled - // now. - MockClock::advance(DEFAULT_CLEAN_UP_INTERVAL - Duration::from_secs(1)); - regex_manager.update_time(); - assert_eq!(get_active_regex_count(®ex_manager), 1); + { + let regex_manager = engine.borrow_regex_manager(); + MockClock::advance(DEFAULT_DISCARD_UNUSED_TIME - Duration::from_secs(1)); + // The entry shouldn't be discarded because was used during + // last REGEX_MANAGER_DISCARD_TIME. + assert_eq!(get_active_regex_count(®ex_manager), 1); + + // The entry is entry is outdated, but should be discarded only + // in the next cleanup() call. The call was 2 sec ago and is throttled + // now. + MockClock::advance(DEFAULT_CLEAN_UP_INTERVAL - Duration::from_secs(1)); + assert_eq!(get_active_regex_count(®ex_manager), 1); + } - MockClock::advance(Duration::from_secs(2)); - regex_manager.update_time(); - // The entry is now outdated & cleanup() should be called => discard. - assert_eq!(get_active_regex_count(®ex_manager), 0); + { + MockClock::advance(Duration::from_secs(2)); + let regex_manager = engine.borrow_regex_manager(); + // The entry is now outdated & cleanup() should be called => discard. + assert_eq!(get_active_regex_count(®ex_manager), 0); + } // The entry is recreated, get_compiled_regex_count() increased +1. - assert!(filter.matches(&make_request("https://geo2.hltv.org/"), &mut regex_manager)); + assert!( + engine + .check_network_request(&make_request("https://geo2.hltv.org/")) + .matched + ); + let regex_manager = engine.borrow_regex_manager(); assert_eq!(regex_manager.get_compiled_regex_count(), 2); assert_eq!(get_active_regex_count(®ex_manager), 1); } From 8967aea38be30cd7b0427f82a9de873aabe12a6c Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Tue, 11 Nov 2025 19:57:29 +0400 Subject: [PATCH 3/9] cargo fmt --- tests/unit/optimizer.rs | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/tests/unit/optimizer.rs b/tests/unit/optimizer.rs index 8b35eec8..d4779d20 100644 --- a/tests/unit/optimizer.rs +++ b/tests/unit/optimizer.rs @@ -16,11 +16,7 @@ mod optimization_tests_pattern_group { ); } - fn check_match( - filter: &NetworkFilter, - url_path: &str, - matches: bool, - ) { + fn check_match(filter: &NetworkFilter, url_path: &str, matches: bool) { let is_match = filter.matches_test( &Request::new( ("https://example.com/".to_string() + url_path).as_str(), @@ -318,11 +314,7 @@ mod optimization_tests_pattern_group { ); } - fn check_match( - filter: &NetworkFilter, - url_path: &str, - matches: bool, - ) { + fn check_match(filter: &NetworkFilter, url_path: &str, matches: bool) { let is_match = filter.matches_test( &Request::new( ("https://example.com/".to_string() + url_path).as_str(), From c167d3ed5fd3bf3d8a23c6e22405ea523a138baa Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Tue, 11 Nov 2025 20:09:52 +0400 Subject: [PATCH 4/9] Fix compilation --- src/blocker.rs | 19 +++++++++---------- src/engine.rs | 4 +--- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/blocker.rs b/src/blocker.rs index 8b2640a1..e476c81c 100644 --- a/src/blocker.rs +++ b/src/blocker.rs @@ -79,6 +79,11 @@ pub struct Blocker { pub(crate) filter_data_context: FilterDataContextRef, } +#[cfg(feature = "single-thread")] +pub(crate) type RegexManagerRef<'a> = std::cell::RefMut<'a, RegexManager>; +#[cfg(not(feature = "single-thread"))] +pub(crate) type RegexManagerRef<'a> = std::sync::MutexGuard<'a, RegexManager>; + impl Blocker { /// Decide if a network request (usually from WebRequest API) should be /// blocked, redirected or allowed. @@ -130,10 +135,11 @@ impl Blocker { self.get_list(NetworkFilterListId::TaggedFiltersAll) } - #[cfg(feature = "single-thread")] - pub(crate) fn borrow_regex_manager(&self) -> std::cell::RefMut<'_, RegexManager> { - #[allow(unused_mut)] + pub(crate) fn borrow_regex_manager(&self) -> RegexManagerRef<'_> { + #[cfg(feature = "single-thread")] let mut manager = self.regex_manager.borrow_mut(); + #[cfg(not(feature = "single-thread"))] + let mut manager = self.regex_manager.lock().unwrap(); #[cfg(not(target_arch = "wasm32"))] manager.update_time(); @@ -141,13 +147,6 @@ impl Blocker { manager } - #[cfg(not(feature = "single-thread"))] - pub(crate) fn borrow_regex_manager(&self) -> std::sync::MutexGuard<'_, RegexManager> { - let mut manager = self.regex_manager.lock().unwrap(); - manager.update_time(); - manager - } - pub fn check_generic_hide(&self, hostname_request: &Request) -> bool { let mut regex_manager = self.borrow_regex_manager(); self.generic_hide() diff --git a/src/engine.rs b/src/engine.rs index 093cc091..06307fee 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -272,9 +272,7 @@ impl Engine { } #[cfg(test)] - pub fn borrow_regex_manager( - &self, - ) -> std::cell::RefMut<'_, crate::regex_manager::RegexManager> { + pub fn borrow_regex_manager(&self) -> crate::blocker::RegexManagerRef<'_> { self.blocker.borrow_regex_manager() } From fd26238b6f48cbc79af7e0e012e2d367319ce2bb Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Tue, 11 Nov 2025 20:10:51 +0400 Subject: [PATCH 5/9] Fix clippy --- tests/legacy_harness.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/legacy_harness.rs b/tests/legacy_harness.rs index c8b959bc..1cb133d6 100644 --- a/tests/legacy_harness.rs +++ b/tests/legacy_harness.rs @@ -34,7 +34,7 @@ mod legacy_test_filters { filter.filter ); - let engine = Engine::from_rules_debug(&[raw_filter], Default::default()); + let engine = Engine::from_rules_debug([raw_filter], Default::default()); for to_block in blocked { assert!( @@ -307,7 +307,7 @@ mod legacy_test_filters { // explicit, separate testcase construction of the "script" option as it is not the deafult let request = Request::new("http://tpc.googlesyndication.com/safeframe/1-0-2/html/container.html#xpc=sf-gdn-exp-2&p=http%3A//slashdot.org;", "https://this-is-always-third-party.com", "script").unwrap(); let engine = Engine::from_rules_debug( - &["||googlesyndication.com/safeframe/$third-party,script"], + ["||googlesyndication.com/safeframe/$third-party,script"], Default::default(), ); assert!(engine.check_network_request(&request).matched); From 7e6fe72ae63364583001c6b68c439a1a4c868d20 Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Tue, 11 Nov 2025 20:16:41 +0400 Subject: [PATCH 6/9] Fix wasm compilation --- src/blocker.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/blocker.rs b/src/blocker.rs index e476c81c..e0337268 100644 --- a/src/blocker.rs +++ b/src/blocker.rs @@ -137,6 +137,7 @@ impl Blocker { pub(crate) fn borrow_regex_manager(&self) -> RegexManagerRef<'_> { #[cfg(feature = "single-thread")] + #[allow(unused_mut)] let mut manager = self.regex_manager.borrow_mut(); #[cfg(not(feature = "single-thread"))] let mut manager = self.regex_manager.lock().unwrap(); From 5035c1042c0f3dfd155ef17c9fdbd3a45179a30c Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Wed, 12 Nov 2025 00:07:10 +0400 Subject: [PATCH 7/9] fix tests --- tests/unit/regex_manager.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/unit/regex_manager.rs b/tests/unit/regex_manager.rs index 00df2c5e..fff53d93 100644 --- a/tests/unit/regex_manager.rs +++ b/tests/unit/regex_manager.rs @@ -25,6 +25,8 @@ mod tests { #[test] fn simple_match() { let engine = make_engine("||geo*.hltv.org^"); + engine.borrow_regex_manager(); + assert!( engine .check_network_request(&make_request("https://geo2.hltv.org/")) @@ -39,6 +41,7 @@ mod tests { #[test] fn discard_and_recreate() { let engine = make_engine("||geo*.hltv.org^"); + engine.borrow_regex_manager(); assert!( engine From 9102886c8f23fad32fee82ac6cd1f23a3845b013 Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Wed, 12 Nov 2025 11:57:04 +0400 Subject: [PATCH 8/9] use mock_instant::thread_local::MockClock --- src/regex_manager.rs | 2 +- tests/unit/regex_manager.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/regex_manager.rs b/src/regex_manager.rs index 5713b620..2d22b7f6 100644 --- a/src/regex_manager.rs +++ b/src/regex_manager.rs @@ -15,7 +15,7 @@ use std::time::Duration; #[cfg(test)] #[cfg(not(target_arch = "wasm32"))] -use mock_instant::global::Instant; +use mock_instant::thread_local::Instant; #[cfg(not(test))] #[cfg(not(target_arch = "wasm32"))] use std::time::Instant; diff --git a/tests/unit/regex_manager.rs b/tests/unit/regex_manager.rs index fff53d93..37adc553 100644 --- a/tests/unit/regex_manager.rs +++ b/tests/unit/regex_manager.rs @@ -4,7 +4,7 @@ mod tests { use crate::{request, Engine}; - use mock_instant::global::MockClock; + use mock_instant::thread_local::MockClock; fn make_engine(line: &str) -> Engine { Engine::from_rules(vec![line], Default::default()) From 82d4cceaa5c5497347e03ea1facea0f71258bec3 Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Tue, 18 Nov 2025 01:01:51 +0400 Subject: [PATCH 9/9] Fix review issues --- src/blocker.rs | 4 +++- src/engine.rs | 2 +- src/filters/network.rs | 4 ++-- tests/matching.rs | 38 ++++++++++++++++++++++++++++--------- tests/unit/regex_manager.rs | 2 -- 5 files changed, 35 insertions(+), 15 deletions(-) diff --git a/src/blocker.rs b/src/blocker.rs index e0337268..b87bd45e 100644 --- a/src/blocker.rs +++ b/src/blocker.rs @@ -135,6 +135,8 @@ impl Blocker { self.get_list(NetworkFilterListId::TaggedFiltersAll) } + /// Borrow mutable reference to the regex manager for the ['Blocker`]. + /// Only one caller can borrow the regex manager at a time. pub(crate) fn borrow_regex_manager(&self) -> RegexManagerRef<'_> { #[cfg(feature = "single-thread")] #[allow(unused_mut)] @@ -155,7 +157,7 @@ impl Blocker { .is_some() } - #[doc(hidden)] + #[cfg(test)] pub(crate) fn check_exceptions(&self, request: &Request) -> bool { let mut regex_manager = self.borrow_regex_manager(); self.exceptions() diff --git a/src/engine.rs b/src/engine.rs index 06307fee..7867e0cd 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -139,7 +139,7 @@ impl Engine { self.blocker.check(request, &self.resources) } - #[doc(hidden)] + #[cfg(test)] pub(crate) fn check_network_request_exceptions(&self, request: &Request) -> bool { self.blocker.check_exceptions(request) } diff --git a/src/filters/network.rs b/src/filters/network.rs index 7f087655..a5d736d0 100644 --- a/src/filters/network.rs +++ b/src/filters/network.rs @@ -970,8 +970,8 @@ impl NetworkFilter { } } - #[doc(hidden)] - pub fn matches_test(&self, request: &request::Request) -> bool { + #[cfg(test)] + pub(crate) fn matches_test(&self, request: &request::Request) -> bool { let filter_set = crate::FilterSet::new_with_rules(vec![self.clone()], vec![], true); let engine = crate::Engine::from_filter_set(filter_set, true); diff --git a/tests/matching.rs b/tests/matching.rs index fc4dc9d9..53ec06a8 100644 --- a/tests/matching.rs +++ b/tests/matching.rs @@ -1,4 +1,4 @@ -use adblock::filters::network::{NetworkFilter, NetworkFilterMaskHelper}; +use adblock::filters::network::{NetworkFilter, NetworkFilterMask, NetworkFilterMaskHelper}; use adblock::request::Request; use adblock::resources::{MimeType, Resource, ResourceType}; use adblock::Engine; @@ -69,18 +69,38 @@ fn check_filter_matching() { "Could not parse filter {filter}" ); let network_filter = network_filter_res.unwrap(); + let mut filters = vec![network_filter.clone()]; + if network_filter.is_exception() { + let mut original_filter = network_filter.clone(); + original_filter + .mask + .set(NetworkFilterMask::IS_EXCEPTION, false); + filters.push(original_filter); + } + let filter_set = adblock::FilterSet::new_with_rules(filters, vec![], true); + let engine = adblock::Engine::from_filter_set(filter_set, true); let request_res = Request::new(&req.url, &req.sourceUrl, &req.r#type); // The dataset has cases where URL is set to just "http://" or "https://", which we do not support if let Ok(request) = request_res { - assert!( - network_filter.matches_test(&request), - "Expected {} to match {} at {}, typed {}", - filter, - req.url, - req.sourceUrl, - req.r#type - ); + let result = engine.check_network_request(&request); + if !network_filter.is_exception() { + assert!( + result.matched, + "Expected {} to match {} at {}, typed {}", + filter, req.url, req.sourceUrl, req.r#type + ); + } else { + assert!( + !result.matched && result.exception.is_some(), + "Expected {} exception to match {} at {}, typed {}", + filter, + req.url, + req.sourceUrl, + req.r#type + ); + } + requests_checked += 1; } } diff --git a/tests/unit/regex_manager.rs b/tests/unit/regex_manager.rs index 37adc553..739781dd 100644 --- a/tests/unit/regex_manager.rs +++ b/tests/unit/regex_manager.rs @@ -25,7 +25,6 @@ mod tests { #[test] fn simple_match() { let engine = make_engine("||geo*.hltv.org^"); - engine.borrow_regex_manager(); assert!( engine @@ -41,7 +40,6 @@ mod tests { #[test] fn discard_and_recreate() { let engine = make_engine("||geo*.hltv.org^"); - engine.borrow_regex_manager(); assert!( engine