Skip to content

Commit 857b080

Browse files
committed
Improve CollectRepoFixCommitPipeline to use input and ensure it collect fixed_by_commit_patches correctly.
Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent 9704ea5 commit 857b080

File tree

5 files changed

+89
-43
lines changed

5 files changed

+89
-43
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,6 @@
4646
from vulnerabilities.pipelines.v2_importers import apache_kafka_importer as apache_kafka_importer_v2
4747
from vulnerabilities.pipelines.v2_importers import apache_tomcat_importer as apache_tomcat_v2
4848
from vulnerabilities.pipelines.v2_importers import archlinux_importer as archlinux_importer_v2
49-
from vulnerabilities.pipelines.v2_importers import (
50-
collect_repo_fix_commits as collect_repo_fix_commits,
51-
)
5249
from vulnerabilities.pipelines.v2_importers import curl_importer as curl_importer_v2
5350
from vulnerabilities.pipelines.v2_importers import debian_importer as debian_importer_v2
5451
from vulnerabilities.pipelines.v2_importers import (
@@ -148,6 +145,5 @@
148145
ubuntu_usn.UbuntuUSNImporter,
149146
fireeye.FireyeImporter,
150147
oss_fuzz.OSSFuzzImporter,
151-
collect_repo_fix_commits.CollectRepoFixCommitPipeline,
152148
]
153149
)

vulnerabilities/pipelines/v2_importers/collect_repo_fix_commits.py

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
from git import Repo
77

88
from vulnerabilities.importer import AdvisoryData
9-
from vulnerabilities.importer import ReferenceV2
9+
from vulnerabilities.importer import AffectedPackageV2
10+
from vulnerabilities.importer import PackageCommitPatchData
1011
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
1112

1213
SECURITY_PATTERNS = [
@@ -22,7 +23,7 @@ class CollectRepoFixCommitPipeline(VulnerableCodeBaseImporterPipelineV2):
2223
Pipeline to collect fix commits from any git repository.
2324
"""
2425

25-
pipeline_id = "repo_fix_commit"
26+
pipeline_id = "collect_fix_commit"
2627

2728
@classmethod
2829
def steps(cls):
@@ -34,23 +35,26 @@ def steps(cls):
3435

3536
def clone(self):
3637
"""Clone the repository."""
37-
self.repo_url = "https://github.com/torvalds/linux"
38-
repo_path = tempfile.mkdtemp()
38+
self.repo_url = self.inputs["repo_url"]
39+
if not self.repo_url:
40+
raise ValueError("Repo is required for CollectRepoFixCommitPipeline")
41+
42+
self.purl = self.inputs["purl"]
3943
self.repo = Repo.clone_from(
4044
url=self.repo_url,
41-
to_path=repo_path,
45+
to_path=tempfile.mkdtemp(),
4246
bare=True,
4347
no_checkout=True,
4448
multi_options=["--filter=blob:none"],
4549
)
4650

4751
def advisories_count(self) -> int:
48-
return int(self.repo.git.rev_list("--count", "HEAD"))
52+
return 0
4953

50-
def classify_commit_type(self, commit) -> list[str]:
54+
def extract_vulnerability_id(self, commit) -> list[str]:
5155
"""
52-
Extract vulnerability identifiers from a commit message.
53-
Returns a list of matched vulnerability IDs (normalized to uppercase).
56+
Extract vulnerability id from a commit message.
57+
Returns a list of matched vulnerability IDs
5458
"""
5559
matches = []
5660
for pattern in SECURITY_PATTERNS:
@@ -67,7 +71,7 @@ def collect_fix_commits(self):
6771

6872
grouped_commits = defaultdict(list)
6973
for commit in self.repo.iter_commits("--all"):
70-
matched_ids = self.classify_commit_type(commit)
74+
matched_ids = self.extract_vulnerability_id(commit)
7175
if not matched_ids:
7276
continue
7377

@@ -87,16 +91,30 @@ def collect_advisories(self):
8791
"""
8892
self.log("Generating AdvisoryData objects from grouped commits.")
8993
grouped_commits = self.collect_fix_commits()
90-
for vuln_id, commits in grouped_commits.items():
91-
references = [ReferenceV2(url=f"{self.repo_url}/commit/{cid}") for cid, _ in commits]
94+
for vuln_id, commits_data in grouped_commits.items():
95+
if not commits_data or not vuln_id:
96+
continue
9297

93-
summary_lines = [f"- {cid}: {msg}" for cid, msg in commits]
98+
summary_lines = []
99+
for c_hash, msg in commits_data:
100+
summary_lines.append(f"{c_hash}: {msg}")
94101
summary = f"Commits fixing {vuln_id}:\n" + "\n".join(summary_lines)
102+
103+
commit_hash_set = {commit_hash for commit_hash, _ in commits_data}
104+
affected_packages = [
105+
AffectedPackageV2(
106+
package=self.purl,
107+
fixed_by_commit_patches=[
108+
PackageCommitPatchData(vcs_url=self.repo_url, commit_hash=commit_hash)
109+
for commit_hash in commit_hash_set
110+
],
111+
)
112+
]
113+
95114
yield AdvisoryData(
96115
advisory_id=vuln_id,
97-
aliases=[vuln_id],
98116
summary=summary,
99-
references_v2=references,
117+
affected_packages=affected_packages,
100118
url=self.repo_url,
101119
)
102120

vulnerabilities/tests/pipelines/v2_importers/test_collect_fix_commit.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from unittest.mock import patch
1515

1616
import pytest
17+
from packageurl import PackageURL
1718

1819
from vulnerabilities.pipelines.v2_importers.collect_repo_fix_commits import (
1920
CollectRepoFixCommitPipeline,
@@ -33,7 +34,7 @@ def test_classify_commit_type_extracts_ids(pipeline):
3334
class DummyCommit:
3435
message = "Fix for CVE-2023-1234 and GHSA-2479-qvv7-47qq"
3536

36-
result = pipeline.classify_commit_type(DummyCommit)
37+
result = pipeline.extract_vulnerability_id(DummyCommit)
3738
assert result == ["CVE-2023-1234", "GHSA-2479-qvv7-47qq"]
3839

3940

@@ -78,12 +79,13 @@ def test_collect_advisories_from_json(self):
7879

7980
pipeline = CollectRepoFixCommitPipeline()
8081
pipeline.repo_url = "https://github.com/test/repo"
82+
pipeline.purl = PackageURL.from_string("pkg:generic/test")
8183
pipeline.log = MagicMock()
8284
pipeline.collect_fix_commits = MagicMock(return_value=grouped_commits)
8385

8486
result = [adv.to_dict() for adv in pipeline.collect_advisories()]
8587

86-
util_tests.check_results_against_json(result, expected_file)
88+
util_tests.check_results_against_json(result, expected_file, True)
8789

8890

8991
@pytest.mark.parametrize(
@@ -108,7 +110,7 @@ def __init__(self, message):
108110
self.message = message
109111

110112
commit = DummyCommit(commit_message)
111-
result = pipeline.classify_commit_type(commit)
113+
result = pipeline.extract_vulnerability_id(commit)
112114

113115
assert result == expected_ids, f"Unexpected result for message: {commit_message}"
114116

@@ -119,6 +121,6 @@ def test_classify_commit_type_case_insensitive(pipeline):
119121
class DummyCommit:
120122
message = "fix cVe-2022-9999 and ghSa-dead-beef-baad"
121123

122-
result = pipeline.classify_commit_type(DummyCommit)
124+
result = pipeline.extract_vulnerability_id(DummyCommit)
123125
assert any("CVE-2022-9999" in r.upper() for r in result)
124126
assert any("GHSA-DEAD-BEEF-BAAD" in r.upper() for r in result)

vulnerabilities/tests/test_data/fix_commits/expected_linux_advisory_output.json

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,67 @@
11
[
22
{
33
"advisory_id": "CVE-2021-0001",
4-
"aliases": [
5-
"CVE-2021-0001"
6-
],
7-
"summary": "Commits fixing CVE-2021-0001:\n- abc123: Fix CVE-2021-0001",
8-
"affected_packages": [],
9-
"references_v2": [
4+
"aliases": [],
5+
"summary": "Commits fixing CVE-2021-0001:\n41b43c74bda19753c757036673ea9db74acf494a: Fixed CVE-2025-59681 -- Protected QuerySet.annotate(), alias(), aggregate(), and extra() against SQL injection in column aliases on MySQL/MariaDB.",
6+
"affected_packages": [
107
{
11-
"reference_id": "",
12-
"reference_type": "",
13-
"url": "https://github.com/test/repo/commit/abc123"
8+
"package": {
9+
"type": "generic",
10+
"namespace": "",
11+
"name": "test",
12+
"version": "",
13+
"qualifiers": "",
14+
"subpath": ""
15+
},
16+
"affected_version_range": null,
17+
"fixed_version_range": null,
18+
"introduced_by_commit_patches": [],
19+
"fixed_by_commit_patches": [
20+
{
21+
"vcs_url": "https://github.com/test/repo",
22+
"commit_hash": "41b43c74bda19753c757036673ea9db74acf494a",
23+
"patch_text": null,
24+
"patch_checksum": null
25+
}
26+
]
1427
}
1528
],
29+
"references_v2": [],
30+
"patches": [],
1631
"severities": [],
1732
"date_published": null,
1833
"weaknesses": [],
1934
"url": "https://github.com/test/repo"
2035
},
2136
{
2237
"advisory_id": "GHSA-dead-beef-baad",
23-
"aliases": [
24-
"GHSA-dead-beef-baad"
25-
],
26-
"summary": "Commits fixing GHSA-dead-beef-baad:\n- def456: Patch GHSA-dead-beef-baad",
27-
"affected_packages": [],
28-
"references_v2": [
38+
"aliases": [],
39+
"summary": "Commits fixing GHSA-dead-beef-baad:\n49ff1042aa66bb25eda87e9a8ef82f3b0ad4eeba: Fixed CVE-2024-53907 -- Mitigated potential DoS in strip_tags().",
40+
"affected_packages": [
2941
{
30-
"reference_id": "",
31-
"reference_type": "",
32-
"url": "https://github.com/test/repo/commit/def456"
42+
"package": {
43+
"type": "generic",
44+
"namespace": "",
45+
"name": "test",
46+
"version": "",
47+
"qualifiers": "",
48+
"subpath": ""
49+
},
50+
"affected_version_range": null,
51+
"fixed_version_range": null,
52+
"introduced_by_commit_patches": [],
53+
"fixed_by_commit_patches": [
54+
{
55+
"vcs_url": "https://github.com/test/repo",
56+
"commit_hash": "49ff1042aa66bb25eda87e9a8ef82f3b0ad4eeba",
57+
"patch_text": null,
58+
"patch_checksum": null
59+
}
60+
]
3361
}
3462
],
63+
"references_v2": [],
64+
"patches": [],
3565
"severities": [],
3666
"date_published": null,
3767
"weaknesses": [],
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
{
22
"CVE-2021-0001": [
3-
["abc123", "Fix CVE-2021-0001"]
3+
["41b43c74bda19753c757036673ea9db74acf494a", "Fixed CVE-2025-59681 -- Protected QuerySet.annotate(), alias(), aggregate(), and extra() against SQL injection in column aliases on MySQL/MariaDB."]
44
],
55
"GHSA-dead-beef-baad": [
6-
["def456", "Patch GHSA-dead-beef-baad"]
6+
["49ff1042aa66bb25eda87e9a8ef82f3b0ad4eeba", "Fixed CVE-2024-53907 -- Mitigated potential DoS in strip_tags()."]
77
]
88
}

0 commit comments

Comments
 (0)