Skip to content

Commit d333a38

Browse files
committed
Addressing requested changes(#14085)
Removing all accidentally changed files and only commiting related work files
1 parent ff2e898 commit d333a38

25 files changed

+2021
-0
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
package org.jabref.logic.importer;
2+
3+
import java.util.Optional;
4+
5+
import org.jabref.model.entry.BibEntry;
6+
import org.jabref.model.entry.field.Field;
7+
import org.jabref.model.entry.field.FieldFactory;
8+
9+
public class RelatedWorkAnnotator {
10+
11+
public static void appendSummaryToEntry(
12+
BibEntry entry,
13+
String username,
14+
String citingPaperKey,
15+
String summarySentence
16+
) {
17+
String fieldName = "comment-" + username;
18+
Field commentField = FieldFactory.parseField(fieldName);
19+
20+
String cleaned = summarySentence.strip();
21+
if (!cleaned.endsWith(".")) {
22+
cleaned = cleaned + ".";
23+
}
24+
String formattedBlock = "[" + citingPaperKey + "]: " + cleaned;
25+
26+
Optional<String> existing = entry.getField(commentField);
27+
String newValue = existing
28+
.map(old -> old.strip() + "\n\n" + formattedBlock)
29+
.orElse(formattedBlock);
30+
31+
entry.setField(commentField, newValue);
32+
}
33+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package org.jabref.logic.importer.relatedwork;
2+
3+
import java.util.ArrayList;
4+
import java.util.HashMap;
5+
import java.util.List;
6+
import java.util.Map;
7+
8+
import org.jabref.model.entry.BibEntry;
9+
10+
/**
11+
* Adapts HeuristicRelatedWorkExtractor (citationKey -> snippet) to the
12+
* RelatedWorkEvaluationRunner.Extractor interface (BibEntry -> snippets).
13+
*/
14+
public final class HeuristicExtractorAdapter implements RelatedWorkEvaluationRunner.Extractor {
15+
16+
private final HeuristicRelatedWorkExtractor delegate;
17+
18+
public HeuristicExtractorAdapter(HeuristicRelatedWorkExtractor delegate) {
19+
this.delegate = delegate;
20+
}
21+
22+
@Override
23+
public Map<BibEntry, List<String>> apply(String relatedWorkText, List<BibEntry> candidates) {
24+
Map<String, String> byKey = delegate.extract(relatedWorkText, candidates);
25+
26+
Map<String, BibEntry> entryByKey = new HashMap<>();
27+
for (BibEntry be : candidates) {
28+
be.getCitationKey().ifPresent(k -> entryByKey.put(k, be));
29+
}
30+
31+
Map<BibEntry, List<String>> out = new HashMap<>();
32+
for (Map.Entry<String, String> e : byKey.entrySet()) {
33+
BibEntry be = entryByKey.get(e.getKey());
34+
if (be == null) {
35+
continue; // no match for that citation key among candidates
36+
}
37+
out.computeIfAbsent(be, k -> new ArrayList<>()).add(e.getValue());
38+
}
39+
return out;
40+
}
41+
}
Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
package org.jabref.logic.importer.relatedwork;
2+
3+
import java.text.Normalizer;
4+
import java.util.HashMap;
5+
import java.util.LinkedHashMap;
6+
import java.util.List;
7+
import java.util.Locale;
8+
import java.util.Map;
9+
import java.util.Optional;
10+
import java.util.regex.Matcher;
11+
import java.util.regex.Pattern;
12+
13+
import org.jabref.model.entry.BibEntry;
14+
import org.jabref.model.entry.field.StandardField;
15+
16+
/**
17+
* Deterministic extractor for author–year style citations in "Related Work" sections.
18+
* Handles single and multi-citation parentheticals, including diacritics and all-caps acronyms (e.g., CIA, Šimić).
19+
*/
20+
public class HeuristicRelatedWorkExtractor implements RelatedWorkExtractor {
21+
22+
// Headings like "1.4 Related work", "RELATED WORK", etc. (case-insensitive)
23+
private static final Pattern RELATED_WORK_HEADING =
24+
Pattern.compile("(?im)^(\\d+(?:\\.\\d+)*)?\\s*related\\s+work[s]?\\s*[:\\-]?$");
25+
26+
// Any parenthetical block; author-year pairs are mined inside it.
27+
private static final Pattern PAREN_BLOCK = Pattern.compile("\\(([^)]+)\\)");
28+
29+
// Unicode-aware author–year inside a parenthetical.
30+
// Allows all-caps acronyms like "CIA" and Unicode surnames like "Šimić".
31+
// \p{Lu} = uppercase letter, \p{L} = any letter, \p{M} = combining mark.
32+
private static final Pattern AUTHOR_YEAR_INNER = Pattern.compile(
33+
"(?U)" // enable Unicode character classes
34+
+ "(\\p{Lu}[\\p{L}\\p{M}'\\-]*)" // 1: first author token (can be acronym or surname)
35+
+ "(?:\\s+(?:et\\s+al\\.)|\\s*(?:&|and)\\s+\\p{Lu}[\\p{L}\\p{M}'\\-]+)?"
36+
+ "\\s*,?\\s*"
37+
+ "(\\d{4})([a-z]?)" // 2: year, 3: optional trailing letter
38+
);
39+
40+
/**
41+
* Extract a mapping from cited entry key to a short contextual snippet.
42+
*
43+
* <p>The returned map uses the cited entry's citation key (for example, {@code Smith2021})
44+
* as the key, and a sentence-like snippet taken from around the in-text citation as the value.</p>
45+
*
46+
* @param fullText the full (plain) text of the paper or section to scan
47+
* @param bibliography candidate entries that may be cited; used to resolve author/year to a citation key
48+
* @return a {@code Map} from citation key to snippet; never {@code null}, possibly empty
49+
*/
50+
@Override
51+
public Map<String, String> extract(String fullText, List<BibEntry> bibliography) {
52+
String related = sliceRelatedWorkSection(fullText);
53+
Map<String, BibEntry> index = buildIndex(bibliography);
54+
Map<String, String> out = new LinkedHashMap<>();
55+
56+
Matcher paren = PAREN_BLOCK.matcher(related);
57+
while (paren.find()) {
58+
String inner = paren.group(1);
59+
Matcher cite = AUTHOR_YEAR_INNER.matcher(inner);
60+
61+
while (cite.find()) {
62+
String citedToken = normalizeSurname(cite.group(1)); // e.g., "cia" or "nash"
63+
String yearDigits = cite.group(2); // ignore group(3) letter
64+
String citedKey = findKeyFor(citedToken, yearDigits, index);
65+
if (citedKey == null || out.containsKey(citedKey)) {
66+
continue;
67+
}
68+
69+
String snippet = expandToSentenceLikeSpan(related, paren.start(), paren.end());
70+
snippet = pruneTrailingCitationTail(snippet).trim();
71+
72+
if (!snippet.endsWith(".")) {
73+
snippet = snippet + ".";
74+
}
75+
if (snippet.length() > 300) {
76+
snippet = snippet.substring(0, 300) + "...";
77+
}
78+
79+
out.put(citedKey, snippet);
80+
}
81+
}
82+
83+
return out;
84+
}
85+
86+
/**
87+
* Try to isolate the "Related work" section; fallback to full text.
88+
*/
89+
private String sliceRelatedWorkSection(String text) {
90+
Matcher start = RELATED_WORK_HEADING.matcher(text);
91+
int begin = -1;
92+
while (start.find()) {
93+
begin = start.end();
94+
break;
95+
}
96+
if (begin < 0) {
97+
return text; // fallback: whole text
98+
}
99+
100+
// Next likely section heading AFTER begin (numbered or ALL-CAPS)
101+
Pattern nextSection = Pattern.compile(
102+
"(?m)^(?:\\d+(?:\\.\\d+)*)\\s+[A-Z][A-Z\\s\\-]{3,}$|^[A-Z][A-Z\\s\\-]{3,}$");
103+
Matcher end = nextSection.matcher(text);
104+
int stop = text.length();
105+
while (end.find()) {
106+
if (end.start() > begin) {
107+
stop = end.start();
108+
break;
109+
}
110+
}
111+
return text.substring(begin, stop);
112+
}
113+
114+
private Map<String, BibEntry> buildIndex(List<BibEntry> bibs) {
115+
Map<String, BibEntry> idx = new HashMap<>();
116+
for (BibEntry b : bibs) {
117+
Optional<String> y = b.getField(StandardField.YEAR);
118+
if (y.isEmpty()) {
119+
Optional<String> date = b.getField(StandardField.DATE);
120+
if (date.isPresent()) {
121+
Matcher m = Pattern.compile("(\\d{4})").matcher(date.get());
122+
if (m.find()) {
123+
y = Optional.of(m.group(1));
124+
}
125+
}
126+
}
127+
Optional<String> a = b.getField(StandardField.AUTHOR);
128+
if (y.isEmpty() || a.isEmpty()) {
129+
continue;
130+
}
131+
String yearDigits = y.get().replaceAll("[^0-9]", "");
132+
if (yearDigits.isEmpty()) {
133+
continue;
134+
}
135+
136+
String firstAuthor = firstAuthorRaw(a.get());
137+
String firstSurname = extractFirstSurnameFromRaw(firstAuthor);
138+
if (!firstSurname.isEmpty()) {
139+
idx.put(normalizeSurname(firstSurname) + yearDigits, b);
140+
}
141+
142+
// Also index acronym for corporate/multi-word first author without comma.
143+
String acronym = maybeAcronym(firstAuthor);
144+
if (!acronym.isEmpty()) {
145+
idx.put(acronym + yearDigits, b);
146+
}
147+
}
148+
return idx;
149+
}
150+
151+
/**
152+
* Get the raw first author string (before surname extraction).
153+
*/
154+
private String firstAuthorRaw(String authorField) {
155+
return authorField.split("\\s+and\\s+")[0].trim();
156+
}
157+
158+
/**
159+
* Extract the first author surname from a raw first-author token.
160+
*/
161+
private String extractFirstSurnameFromRaw(String firstAuthor) {
162+
if (firstAuthor.contains(",")) {
163+
return firstAuthor.substring(0, firstAuthor.indexOf(',')).trim();
164+
}
165+
if (firstAuthor.startsWith("{") && firstAuthor.endsWith("}")) {
166+
String inner = firstAuthor.substring(1, firstAuthor.length() - 1).trim();
167+
String[] parts = inner.split("\\s+");
168+
return parts.length == 0 ? "" : parts[parts.length - 1];
169+
}
170+
String[] parts = firstAuthor.split("\\s+");
171+
return parts.length == 0 ? "" : parts[parts.length - 1];
172+
}
173+
174+
private String maybeAcronym(String firstAuthor) {
175+
if (firstAuthor.contains(",")) {
176+
return ""; // likely "Surname, Given" → skip acronym
177+
}
178+
String unbraced = firstAuthor;
179+
if (unbraced.startsWith("{") && unbraced.endsWith("}")) {
180+
unbraced = unbraced.substring(1, unbraced.length() - 1);
181+
}
182+
String[] parts = unbraced.trim().split("\\s+");
183+
if (parts.length < 2) {
184+
return ""; // single token → not helpful
185+
}
186+
StringBuilder sb = new StringBuilder();
187+
for (String p : parts) {
188+
if (p.isEmpty()) {
189+
continue;
190+
}
191+
char c = p.charAt(0);
192+
if (Character.isLetter(c)) {
193+
sb.append(Character.toLowerCase(c));
194+
}
195+
}
196+
return sb.toString();
197+
}
198+
199+
/**
200+
* Normalize token: remove braces, strip diacritics, lowercase.
201+
*/
202+
private String normalizeSurname(String s) {
203+
String noBraces = s.replace("{", "").replace("}", "");
204+
String normalized = Normalizer.normalize(noBraces, Normalizer.Form.NFD)
205+
.replaceAll("\\p{M}+", "");
206+
return normalized.toLowerCase(Locale.ROOT);
207+
}
208+
209+
/**
210+
* Lookup by normalized token (surname or acronym) + 4-digit year.
211+
*/
212+
private String findKeyFor(String lowerToken, String yearDigits, Map<String, BibEntry> index) {
213+
BibEntry entry = index.get(lowerToken + yearDigits);
214+
return (entry != null) ? entry.getCitationKey().orElse(null) : null; // null signals "not found"
215+
}
216+
217+
/**
218+
* Expand to a sentence-like span around the parenthetical match.
219+
*/
220+
private String expandToSentenceLikeSpan(String text, int matchStart, int matchEnd) {
221+
int left = matchStart;
222+
while (left > 0) {
223+
char c = text.charAt(left - 1);
224+
if (c == '.' || c == '!' || c == '?' || c == '\n') {
225+
break;
226+
}
227+
left--;
228+
}
229+
int right = matchEnd;
230+
int len = text.length();
231+
while (right < len) {
232+
char c = text.charAt(right);
233+
if (c == '.' || c == '!' || c == '?' || c == '\n') {
234+
right++; // include the boundary char
235+
break;
236+
}
237+
right++;
238+
}
239+
if (right > len) {
240+
right = len;
241+
}
242+
return text.substring(left, right);
243+
}
244+
245+
/**
246+
* Heuristically remove trailing citation trains at the end of a snippet
247+
*/
248+
private String pruneTrailingCitationTail(String s) {
249+
int lastParen = s.lastIndexOf(')');
250+
if (lastParen > -1 && lastParen >= s.length() - 3) {
251+
String head = s.substring(0, lastParen + 1).trim();
252+
if (head.endsWith(").")) {
253+
return head;
254+
}
255+
if (head.endsWith(")")) {
256+
return head + ".";
257+
}
258+
return head;
259+
}
260+
return s;
261+
}
262+
}

0 commit comments

Comments
 (0)