Skip to content

Commit d863116

Browse files
committed
also detect #id
1 parent dc6239d commit d863116

File tree

1 file changed

+17
-1
lines changed

1 file changed

+17
-1
lines changed

src/anonymizer/detect.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ impl Default for DetectionConfig {
1919

2020
#[derive(Default, Debug)]
2121
pub(crate) struct DetectionResult {
22+
id: Option<String>,
2223
name: Option<String>,
2324
address_line1: Option<String>,
2425
address_line2: Option<String>,
@@ -28,7 +29,8 @@ pub(crate) struct DetectionResult {
2829

2930
impl DetectionResult {
3031
fn all_found(&self) -> bool {
31-
self.name.is_some()
32+
self.id.is_some()
33+
&& self.name.is_some()
3234
&& self.address_line1.is_some()
3335
&& self.address_line2.is_some()
3436
&& self.account_spaced.is_some()
@@ -97,6 +99,11 @@ pub fn detect_pii(input_path: &str) -> Result<(), Box<dyn std::error::Error>> {
9799
// Build final ordered list: name, addr1, addr2, account_spaced, account_ms
98100
let mut final_texts: Vec<String> = Vec::new();
99101
let mut inserted = std::collections::HashSet::new();
102+
if let Some(id) = result.id.as_ref() {
103+
if inserted.insert(id.clone()) {
104+
final_texts.push(id.clone());
105+
}
106+
}
100107
if let Some(n) = result.name.as_ref() {
101108
if inserted.insert(n.clone()) {
102109
final_texts.push(n.clone());
@@ -248,6 +255,15 @@ fn handle_for_and_extract(
248255
}
249256
}
250257

258+
// If we found a later occurrence, check for ID immediately before it.
259+
if anchor_index > i + 1 {
260+
let id_candidate = &extracted_texts[anchor_index - 1];
261+
if !id_candidate.is_empty() {
262+
info!("Found ID before name anchor: {}", id_candidate);
263+
result.id = Some(id_candidate.clone());
264+
}
265+
}
266+
251267
let mut collected = 0;
252268
let mut look = 1; // start looking after the anchor name
253269
while collected < 2 && anchor_index + look < extracted_texts.len() {

0 commit comments

Comments
 (0)