Skip to content

Commit 5316e68

Browse files
Add commit collection support to Apache Tomcat V2 importer
- Extract GitHub and GitBox commit URLs from security advisories - Create PackageCommitPatchData for each commit hash - Populate fixed_by_commit_patches on AffectedPackageV2 - Add ReferenceV2 for all fix-related URLs - Add tests for commit URL extraction, GitBox support, and pipeline integration Signed-off-by: Dhirenderchoudhary <dhirenderchoudhary0001@gmail.com>
1 parent 65d7a58 commit 5316e68

File tree

2 files changed

+156
-18
lines changed

2 files changed

+156
-18
lines changed

vulnerabilities/pipelines/v2_importers/apache_tomcat_importer.py

Lines changed: 66 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from collections import defaultdict
1414
from collections import namedtuple
1515
from typing import Iterable
16+
from typing import List
1617

1718
import requests
1819
from bs4 import BeautifulSoup
@@ -25,8 +26,18 @@
2526

2627
from vulnerabilities.importer import AdvisoryDataV2
2728
from vulnerabilities.importer import AffectedPackageV2
29+
from vulnerabilities.importer import PackageCommitPatchData
30+
from vulnerabilities.importer import ReferenceV2
2831
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
2932

33+
GITHUB_COMMIT_URL_RE = re.compile(
34+
r"https?://github\.com/apache/tomcat/commit/(?P<commit_hash>[0-9a-f]{5,40})"
35+
)
36+
GITBOX_COMMIT_URL_RE = re.compile(
37+
r"https?://gitbox\.apache\.org/repos/asf\?p=tomcat\.git;a=commit;h=(?P<commit_hash>[0-9a-f]{5,40})"
38+
)
39+
TOMCAT_VCS_URL = "https://github.com/apache/tomcat"
40+
3041

3142
class ApacheTomcatImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
3243
"""
@@ -75,6 +86,9 @@ def collect_advisories(self) -> Iterable[AdvisoryDataV2]:
7586
grouped[advisory.cve].append(advisory)
7687
for cve, advisory_list in grouped.items():
7788
affected_packages = []
89+
references = []
90+
all_commit_patches = []
91+
7892
for advisory in advisory_list:
7993
self.log(f"Processing advisory {advisory.cve}")
8094
apache_range = to_version_ranges_apache(
@@ -88,10 +102,19 @@ def collect_advisories(self) -> Iterable[AdvisoryDataV2]:
88102
advisory.fixed_in,
89103
)
90104

105+
commit_patches = get_commit_patches(
106+
advisory.commit_urls,
107+
)
108+
all_commit_patches.extend(commit_patches)
109+
110+
for ref_url in advisory.reference_urls:
111+
references.append(ReferenceV2(url=ref_url))
112+
91113
affected_packages.append(
92114
AffectedPackageV2(
93115
package=PackageURL(type="apache", name="tomcat"),
94116
affected_version_range=apache_range,
117+
fixed_by_commit_patches=commit_patches,
95118
)
96119
)
97120

@@ -103,13 +126,15 @@ def collect_advisories(self) -> Iterable[AdvisoryDataV2]:
103126
name="tomcat",
104127
),
105128
affected_version_range=maven_range,
129+
fixed_by_commit_patches=commit_patches,
106130
)
107131
)
108132
page_id = page_url.split("/")[-1].replace(".html", "")
109133
yield AdvisoryDataV2(
110134
advisory_id=f"{page_id}/{cve}",
111135
summary=advisory_list[0].summary,
112136
affected_packages=affected_packages,
137+
references=references,
113138
url=page_url,
114139
)
115140

@@ -260,6 +285,8 @@ class TomcatAdvisoryData:
260285
summary: str
261286
fixed_in: str
262287
affected_versions: str
288+
commit_urls: List[str] = dataclasses.field(default_factory=list)
289+
reference_urls: List[str] = dataclasses.field(default_factory=list)
263290

264291

265292
def parse_tomcat_security(html_content):
@@ -285,36 +312,60 @@ def parse_tomcat_security(html_content):
285312

286313
if strong and cve_link:
287314
if current:
288-
results.append(current)
315+
results.append(_finalize_advisory(current))
289316

290317
current = {
291318
"cve": cve_link.get_text(strip=True),
292319
"summary": strong.get_text(" ", strip=True),
293320
"affected_versions": None,
294321
"fixed_in": fixed_in,
322+
"commit_urls": [],
323+
"reference_urls": [],
295324
}
296325
continue
297326

298327
if current:
299328
text = p.get_text(" ", strip=True)
329+
330+
if "was fixed" in text.lower():
331+
for link in p.find_all("a", href=True):
332+
href = link["href"]
333+
if GITHUB_COMMIT_URL_RE.match(href) or GITBOX_COMMIT_URL_RE.match(href):
334+
current["commit_urls"].append(href)
335+
current["reference_urls"].append(href)
336+
300337
if text.startswith("Affects:"):
301338
current["affected_versions"] = text.replace("Affects:", "").strip()
302-
current = TomcatAdvisoryData(
303-
cve=current["cve"],
304-
summary=current["summary"],
305-
affected_versions=current["affected_versions"],
306-
fixed_in=current["fixed_in"],
307-
)
308-
results.append(current)
339+
results.append(_finalize_advisory(current))
309340
current = None
310341

311342
if current:
312-
current = TomcatAdvisoryData(
313-
cve=current["cve"],
314-
summary=current["summary"],
315-
affected_versions=current["affected_versions"],
316-
fixed_in=current["fixed_in"],
317-
)
318-
results.append(current)
343+
results.append(_finalize_advisory(current))
319344

320345
return results
346+
347+
348+
def _finalize_advisory(current):
349+
return TomcatAdvisoryData(
350+
cve=current["cve"],
351+
summary=current["summary"],
352+
affected_versions=current["affected_versions"],
353+
fixed_in=current["fixed_in"],
354+
commit_urls=current.get("commit_urls", []),
355+
reference_urls=current.get("reference_urls", []),
356+
)
357+
358+
359+
def get_commit_patches(commit_urls):
360+
commit_patches = []
361+
for url in commit_urls:
362+
match = GITHUB_COMMIT_URL_RE.match(url) or GITBOX_COMMIT_URL_RE.match(url)
363+
if match:
364+
commit_hash = match.group("commit_hash")
365+
commit_patches.append(
366+
PackageCommitPatchData(
367+
vcs_url=TOMCAT_VCS_URL,
368+
commit_hash=commit_hash,
369+
)
370+
)
371+
return commit_patches

vulnerabilities/tests/pipelines/v2_importers/test_apache_tomcat_importer_pipeline.py

Lines changed: 90 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,14 @@
1616

1717
from vulnerabilities.importer import AdvisoryDataV2
1818
from vulnerabilities.importer import AffectedPackageV2
19+
from vulnerabilities.importer import PackageCommitPatchData
20+
from vulnerabilities.importer import ReferenceV2
1921
from vulnerabilities.pipelines.v2_importers.apache_tomcat_importer import (
2022
ApacheTomcatImporterPipeline,
23+
TomcatAdvisoryData,
24+
get_commit_patches,
25+
parse_tomcat_security,
2126
)
22-
from vulnerabilities.pipelines.v2_importers.apache_tomcat_importer import TomcatAdvisoryData
23-
from vulnerabilities.pipelines.v2_importers.apache_tomcat_importer import parse_tomcat_security
2427

2528
TOMCAT_SECURITY_HTML = """
2629
<html>
@@ -52,6 +55,26 @@
5255
</html>
5356
"""
5457

58+
TOMCAT_SECURITY_HTML_WITH_COMMITS = """
59+
<html>
60+
<body>
61+
<h3 id="Fixed_in_Apache_Tomcat_10.1.40">Fixed in Apache Tomcat 10.1.40</h3>
62+
<div class="text">
63+
<p>
64+
<strong>Important: Denial of Service</strong>
65+
<a href="https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2025-31650">CVE-2025-31650</a>
66+
</p>
67+
<p>This was fixed with commits
68+
<a href="https://github.com/apache/tomcat/commit/cba1a0fe1289ee7f5dd46c61c38d1e1ac5437bff">cba1a0fe</a>,
69+
<a href="https://github.com/apache/tomcat/commit/1eef1dc459c45f1e421d8bd25ef340fc1cc34edc">1eef1dc4</a> and
70+
<a href="https://github.com/apache/tomcat/commit/8cc3b8fb3f2d8d4d6a757e014f19d1fafa948a60">8cc3b8fb</a>.
71+
</p>
72+
<p>Affects: 10.1.10 to 10.1.39</p>
73+
</div>
74+
</body>
75+
</html>
76+
"""
77+
5578

5679
def test_parse_tomcat_security_multiple_fixed_sections_same_cve():
5780
advisories = parse_tomcat_security(TOMCAT_SECURITY_HTML)
@@ -108,7 +131,6 @@ def test_affected_packages_structure():
108131
url="https://tomcat.apache.org/security-10.html",
109132
)
110133

111-
# Validate package structure expectations
112134
for pkg in advisory.affected_packages:
113135
assert isinstance(pkg, AffectedPackageV2)
114136
assert isinstance(pkg.package, PackageURL)
@@ -143,3 +165,68 @@ def test_apache_and_maven_version_ranges_created(mock_get):
143165

144166
for r in maven_ranges:
145167
assert isinstance(r, MavenVersionRange)
168+
169+
170+
def test_parse_tomcat_security_extracts_commit_urls():
171+
advisories = parse_tomcat_security(TOMCAT_SECURITY_HTML_WITH_COMMITS)
172+
assert len(advisories) == 1
173+
adv = advisories[0]
174+
assert adv.cve == "CVE-2025-31650"
175+
assert len(adv.commit_urls) == 3
176+
assert "cba1a0fe1289ee7f5dd46c61c38d1e1ac5437bff" in adv.commit_urls[0]
177+
assert "1eef1dc459c45f1e421d8bd25ef340fc1cc34edc" in adv.commit_urls[1]
178+
assert "8cc3b8fb3f2d8d4d6a757e014f19d1fafa948a60" in adv.commit_urls[2]
179+
assert len(adv.reference_urls) == 3
180+
181+
182+
def test_parse_tomcat_security_extracts_gitbox_commits():
183+
html = """
184+
<html><body>
185+
<h3 id="Fixed">Fixed 1.0</h3>
186+
<div class="text">
187+
<p><strong>Bug</strong><a href="CVE-2021-25329">CVE-2021-25329</a></p>
188+
<p>Fixed with commit <a href="https://gitbox.apache.org/repos/asf?p=tomcat.git;a=commit;h=7b5269715a77">7b52697</a></p>
189+
<p>Affects: 1.0</p>
190+
</div>
191+
</body></html>
192+
"""
193+
advisories = parse_tomcat_security(html)
194+
assert len(advisories) == 1
195+
assert "7b5269715a77" in advisories[0].commit_urls[0]
196+
197+
198+
def test_get_commit_patches_creates_patch_data():
199+
urls = [
200+
"https://github.com/apache/tomcat/commit/b59099e4ca501a039510334ebe1024971cd6f959",
201+
"https://github.com/apache/tomcat/commit/cba1a0fe1289ee7f5dd46c61c38d1e1ac5437bff",
202+
]
203+
patches = get_commit_patches(urls)
204+
assert len(patches) == 2
205+
assert patches[0].commit_hash == "b59099e4ca501a039510334ebe1024971cd6f959"
206+
assert patches[0].vcs_url == "https://github.com/apache/tomcat"
207+
assert patches[1].commit_hash == "cba1a0fe1289ee7f5dd46c61c38d1e1ac5437bff"
208+
209+
210+
@patch("vulnerabilities.pipelines.v2_importers.apache_tomcat_importer.requests.get")
211+
def test_pipeline_populates_commit_patches_and_references(mock_get):
212+
mock_get.return_value.content = TOMCAT_SECURITY_HTML_WITH_COMMITS.encode("utf-8")
213+
214+
pipeline = ApacheTomcatImporterPipeline()
215+
pipeline.fetch_advisory_links = types.MethodType(
216+
lambda self: ["https://tomcat.apache.org/security-10.html"],
217+
pipeline,
218+
)
219+
220+
advisory = list(pipeline.collect_advisories())[0]
221+
222+
assert len(advisory.affected_packages) == 2
223+
224+
for pkg in advisory.affected_packages:
225+
assert len(pkg.fixed_by_commit_patches) == 3
226+
for patch in pkg.fixed_by_commit_patches:
227+
assert isinstance(patch, PackageCommitPatchData)
228+
assert patch.vcs_url == "https://github.com/apache/tomcat"
229+
230+
assert len(advisory.references) == 3
231+
for ref in advisory.references:
232+
assert isinstance(ref, ReferenceV2)

0 commit comments

Comments
 (0)