Skip to content

Commit 28d65d9

Browse files
committed
Move the CollectVCSFixCommitPipeline base pipelines to pipes
Remove CollectOpensslFixCommits Pipeline Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent ad451b4 commit 28d65d9

File tree

6 files changed

+175
-162
lines changed

6 files changed

+175
-162
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,6 @@
163163
collect_fix_commits_v2.CollectRustFixCommitsPipeline,
164164
collect_fix_commits_v2.CollectOpenjdkFixCommitsPipeline,
165165
collect_fix_commits_v2.CollectSwiftFixCommitsPipeline,
166-
collect_fix_commits_v2.CollectOpensslFixCommitsPipeline,
167166
collect_fix_commits_v2.CollectDjangoFixCommitsPipeline,
168167
collect_fix_commits_v2.CollectRailsFixCommitsPipeline,
169168
collect_fix_commits_v2.CollectLaravelFixCommitsPipeline,

vulnerabilities/pipelines/__init__.py

Lines changed: 0 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,7 @@
88
#
99

1010
import logging
11-
import re
12-
import shutil
13-
import tempfile
1411
import traceback
15-
from collections import defaultdict
1612
from datetime import datetime
1713
from datetime import timezone
1814
from timeit import default_timer as timer
@@ -23,12 +19,8 @@
2319
from aboutcode.pipeline import LoopProgress
2420
from aboutcode.pipeline import PipelineDefinition
2521
from aboutcode.pipeline import humanize_time
26-
from git import Repo
27-
from packageurl.contrib.url2purl import url2purl
2822

2923
from vulnerabilities.importer import AdvisoryData
30-
from vulnerabilities.importer import AffectedPackageV2
31-
from vulnerabilities.importer import PackageCommitPatchData
3224
from vulnerabilities.improver import MAX_CONFIDENCE
3325
from vulnerabilities.models import Advisory
3426
from vulnerabilities.models import PipelineRun
@@ -336,109 +328,3 @@ def collect_and_store_advisories(self):
336328
continue
337329

338330
self.log(f"Successfully collected {collected_advisory_count:,d} advisories")
339-
340-
341-
class CollectVCSFixCommitPipeline(VulnerableCodeBaseImporterPipelineV2):
342-
"""
343-
Pipeline to collect fix commits from any git repository.
344-
"""
345-
346-
repo_url: str
347-
patterns: list[str] = [
348-
r"\bCVE-\d{4}-\d{4,19}\b",
349-
r"GHSA-[2-9cfghjmpqrvwx]{4}-[2-9cfghjmpqrvwx]{4}-[2-9cfghjmpqrvwx]{4}",
350-
]
351-
352-
@classmethod
353-
def steps(cls):
354-
return (
355-
cls.clone,
356-
cls.collect_and_store_advisories,
357-
cls.clean_downloads,
358-
)
359-
360-
def clone(self):
361-
"""Clone the repository."""
362-
self.repo = Repo.clone_from(
363-
url=self.repo_url,
364-
to_path=tempfile.mkdtemp(),
365-
bare=True,
366-
no_checkout=True,
367-
multi_options=["--filter=blob:none"],
368-
)
369-
370-
def advisories_count(self) -> int:
371-
return 0
372-
373-
def extract_vulnerability_id(self, commit) -> list[str]:
374-
"""
375-
Extract vulnerability id from a commit message.
376-
Returns a list of matched vulnerability IDs
377-
"""
378-
matches = []
379-
for pattern in self.patterns:
380-
found = re.findall(pattern, commit.message, flags=re.IGNORECASE)
381-
matches.extend(found)
382-
return matches
383-
384-
def collect_fix_commits(self):
385-
"""
386-
Iterate through repository commits and group them by vulnerability identifiers.
387-
return a list with (vuln_id, [(commit_id, commit_message)]).
388-
"""
389-
self.log("Processing git repository fix commits (grouped by vulnerability IDs).")
390-
391-
grouped_commits = defaultdict(list)
392-
for commit in self.repo.iter_commits("--all"):
393-
matched_ids = self.extract_vulnerability_id(commit)
394-
if not matched_ids:
395-
continue
396-
397-
commit_id = commit.hexsha
398-
commit_message = commit.message.strip()
399-
400-
for vuln_id in matched_ids:
401-
grouped_commits[vuln_id].append((commit_id, commit_message))
402-
403-
self.log(f"Found {len(grouped_commits)} vulnerabilities with related commits.")
404-
self.log("Finished processing all commits.")
405-
return grouped_commits
406-
407-
def collect_advisories(self):
408-
"""
409-
Generate AdvisoryData objects for each vulnerability ID grouped with its related commits.
410-
"""
411-
self.log("Generating AdvisoryData objects from grouped commits.")
412-
grouped_commits = self.collect_fix_commits()
413-
purl = url2purl(self.repo_url)
414-
415-
for vuln_id, commits_data in grouped_commits.items():
416-
if not commits_data or not vuln_id:
417-
continue
418-
419-
commit_hash_set = {commit_hash for commit_hash, _ in commits_data}
420-
affected_packages = [
421-
AffectedPackageV2(
422-
package=purl,
423-
fixed_by_commit_patches=[
424-
PackageCommitPatchData(vcs_url=self.repo_url, commit_hash=commit_hash)
425-
for commit_hash in commit_hash_set
426-
],
427-
)
428-
]
429-
430-
yield AdvisoryData(
431-
advisory_id=vuln_id,
432-
affected_packages=affected_packages,
433-
url=self.repo_url,
434-
)
435-
436-
def clean_downloads(self):
437-
"""Cleanup any temporary repository data."""
438-
self.log("Cleaning up local repository resources.")
439-
if hasattr(self, "repo") and self.repo.working_dir:
440-
shutil.rmtree(path=self.repo.working_dir)
441-
442-
def on_failure(self):
443-
"""Ensure cleanup is always performed on failure."""
444-
self.clean_downloads()
Lines changed: 38 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,191 +1,186 @@
1-
from vulnerabilities.pipelines import CollectVCSFixCommitPipeline
1+
from vulnerabilities.pipes.vcs_collector_utils import CollectVCSFixCommitPipeline
22

33

44
class CollectLinuxFixCommitsPipeline(CollectVCSFixCommitPipeline):
55
pipeline_id = "collect_linux_fix_commits"
6-
repo_url = "https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git"
6+
repo_url = "https://github.com/torvalds/linux"
77

88

99
class CollectBusyBoxFixCommitsPipeline(CollectVCSFixCommitPipeline):
1010
pipeline_id = "collect_busybox_fix_commits"
11-
repo_url = "https://github.com/mirror/busybox.git"
11+
repo_url = "https://github.com/mirror/busybox"
1212

1313

1414
class CollectNginxFixCommitsPipeline(CollectVCSFixCommitPipeline):
1515
pipeline_id = "collect_nginx_fix_commits"
16-
repo_url = "https://github.com/nginx/nginx.git"
16+
repo_url = "https://github.com/nginx/nginx"
1717

1818

1919
class CollectApacheTomcatFixCommitsPipeline(CollectVCSFixCommitPipeline):
2020
pipeline_id = "collect_apache_tomcat_fix_commits"
21-
repo_url = "https://github.com/apache/tomcat.git"
21+
repo_url = "https://github.com/apache/tomcat"
2222

2323

2424
class CollectMysqlServerFixCommitsPipeline(CollectVCSFixCommitPipeline):
2525
pipeline_id = "collect_mysql_server_fix_commits"
26-
repo_url = "https://github.com/mysql/mysql-server.git"
26+
repo_url = "https://github.com/mysql/mysql-server"
2727

2828

2929
class CollectPostgresqlFixCommitsPipeline(CollectVCSFixCommitPipeline):
3030
pipeline_id = "collect_postgresql_fix_commits"
31-
repo_url = "https://github.com/postgres/postgres.git"
31+
repo_url = "https://github.com/postgres/postgres"
3232

3333

3434
class CollectMongodbFixCommitsPipeline(CollectVCSFixCommitPipeline):
3535
pipeline_id = "collect_mongodb_fix_commits"
36-
repo_url = "https://github.com/mongodb/mongo.git"
36+
repo_url = "https://github.com/mongodb/mongo"
3737

3838

3939
class CollectRedisFixCommitsPipeline(CollectVCSFixCommitPipeline):
4040
pipeline_id = "collect_redis_fix_commits"
41-
repo_url = "https://github.com/redis/redis.git"
41+
repo_url = "https://github.com/redis/redis"
4242

4343

4444
class CollectSqliteFixCommitsPipeline(CollectVCSFixCommitPipeline):
4545
pipeline_id = "collect_sqlite_fix_commits"
46-
repo_url = "https://github.com/sqlite/sqlite.git"
46+
repo_url = "https://github.com/sqlite/sqlite"
4747

4848

4949
class CollectPhpFixCommitsPipeline(CollectVCSFixCommitPipeline):
5050
pipeline_id = "collect_php_fix_commits"
51-
repo_url = "https://github.com/php/php-src.git"
51+
repo_url = "https://github.com/php/php-src"
5252

5353

5454
class CollectPythonCpythonFixCommitsPipeline(CollectVCSFixCommitPipeline):
5555
pipeline_id = "collect_python_cpython_fix_commits"
56-
repo_url = "https://github.com/python/cpython.git"
56+
repo_url = "https://github.com/python/cpython"
5757

5858

5959
class CollectRubyFixCommitsPipeline(CollectVCSFixCommitPipeline):
6060
pipeline_id = "collect_ruby_fix_commits"
61-
repo_url = "https://github.com/ruby/ruby.git"
61+
repo_url = "https://github.com/ruby/ruby"
6262

6363

6464
class CollectGoFixCommitsPipeline(CollectVCSFixCommitPipeline):
6565
pipeline_id = "collect_go_fix_commits"
66-
repo_url = "https://github.com/golang/go.git"
66+
repo_url = "https://github.com/golang/go"
6767

6868

6969
class CollectNodeJsFixCommitsPipeline(CollectVCSFixCommitPipeline):
7070
pipeline_id = "collect_node_js_fix_commits"
71-
repo_url = "https://github.com/nodejs/node.git"
71+
repo_url = "https://github.com/nodejs/node"
7272

7373

7474
class CollectRustFixCommitsPipeline(CollectVCSFixCommitPipeline):
7575
pipeline_id = "collect_rust_fix_commits"
76-
repo_url = "https://github.com/rust-lang/rust.git"
76+
repo_url = "https://github.com/rust-lang/rust"
7777

7878

7979
class CollectOpenjdkFixCommitsPipeline(CollectVCSFixCommitPipeline):
8080
pipeline_id = "collect_openjdk_fix_commits"
81-
repo_url = "https://github.com/openjdk/jdk.git"
81+
repo_url = "https://github.com/openjdk/jdk"
8282

8383

8484
class CollectSwiftFixCommitsPipeline(CollectVCSFixCommitPipeline):
8585
pipeline_id = "collect_swift_fix_commits"
86-
repo_url = "https://github.com/swiftlang/swift.git"
87-
88-
89-
class CollectOpensslFixCommitsPipeline(CollectVCSFixCommitPipeline):
90-
pipeline_id = "collect_openssl_fix_commits"
91-
repo_url = "https://github.com/openssl/openssl.git"
86+
repo_url = "https://github.com/swiftlang/swift"
9287

9388

9489
class CollectDjangoFixCommitsPipeline(CollectVCSFixCommitPipeline):
9590
pipeline_id = "collect_django_fix_commits"
96-
repo_url = "https://github.com/django/django.git"
91+
repo_url = "https://github.com/django/django"
9792

9893

9994
class CollectRailsFixCommitsPipeline(CollectVCSFixCommitPipeline):
10095
pipeline_id = "collect_rails_fix_commits"
101-
repo_url = "https://github.com/rails/rails.git"
96+
repo_url = "https://github.com/rails/rails"
10297

10398

10499
class CollectLaravelFixCommitsPipeline(CollectVCSFixCommitPipeline):
105100
pipeline_id = "collect_laravel_fix_commits"
106-
repo_url = "https://github.com/laravel/framework.git"
101+
repo_url = "https://github.com/laravel/framework"
107102

108103

109104
class CollectSpringFrameworkFixCommitsPipeline(CollectVCSFixCommitPipeline):
110105
pipeline_id = "collect_spring_framework_fix_commits"
111-
repo_url = "https://github.com/spring-projects/spring-framework.git"
106+
repo_url = "https://github.com/spring-projects/spring-framework"
112107

113108

114109
class CollectReactFixCommitsPipeline(CollectVCSFixCommitPipeline):
115110
pipeline_id = "collect_react_fix_commits"
116-
repo_url = "https://github.com/facebook/react.git"
111+
repo_url = "https://github.com/facebook/react"
117112

118113

119114
class CollectAngularFixCommitsPipeline(CollectVCSFixCommitPipeline):
120115
pipeline_id = "collect_angular_fix_commits"
121-
repo_url = "https://github.com/angular/angular.git"
116+
repo_url = "https://github.com/angular/angular"
122117

123118

124119
class CollectWordpressFixCommitsPipeline(CollectVCSFixCommitPipeline):
125120
pipeline_id = "collect_wordpress_fix_commits"
126-
repo_url = "https://github.com/WordPress/WordPress.git"
121+
repo_url = "https://github.com/WordPress/WordPress"
127122

128123

129124
class CollectDockerMobyFixCommitsPipeline(CollectVCSFixCommitPipeline):
130125
pipeline_id = "collect_docker_moby_fix_commits"
131-
repo_url = "https://github.com/moby/moby.git"
126+
repo_url = "https://github.com/moby/moby"
132127

133128

134129
class CollectKubernetesFixCommitsPipeline(CollectVCSFixCommitPipeline):
135130
pipeline_id = "collect_kubernetes_fix_commits"
136-
repo_url = "https://github.com/kubernetes/kubernetes.git"
131+
repo_url = "https://github.com/kubernetes/kubernetes"
137132

138133

139134
class CollectQemuFixCommitsPipeline(CollectVCSFixCommitPipeline):
140135
pipeline_id = "collect_qemu_fix_commits"
141-
repo_url = "https://gitlab.com/qemu-project/qemu.git"
136+
repo_url = "https://gitlab.com/qemu-project/qemu"
142137

143138

144139
class CollectXenProjectFixCommitsPipeline(CollectVCSFixCommitPipeline):
145140
pipeline_id = "collect_xen_project_fix_commits"
146-
repo_url = "https://github.com/xen-project/xen.git"
141+
repo_url = "https://github.com/xen-project/xen"
147142

148143

149144
class CollectVirtualboxFixCommitsPipeline(CollectVCSFixCommitPipeline):
150145
pipeline_id = "collect_virtualbox_fix_commits"
151-
repo_url = "https://github.com/mirror/vbox.git"
146+
repo_url = "https://github.com/mirror/vbox"
152147

153148

154149
class CollectContainerdFixCommitsPipeline(CollectVCSFixCommitPipeline):
155150
pipeline_id = "collect_containerd_fix_commits"
156-
repo_url = "https://github.com/containerd/containerd.git"
151+
repo_url = "https://github.com/containerd/containerd"
157152

158153

159154
class CollectAnsibleFixCommitsPipeline(CollectVCSFixCommitPipeline):
160155
pipeline_id = "collect_ansible_fix_commits"
161-
repo_url = "https://github.com/ansible/ansible.git"
156+
repo_url = "https://github.com/ansible/ansible"
162157

163158

164159
class CollectTerraformFixCommitsPipeline(CollectVCSFixCommitPipeline):
165160
pipeline_id = "collect_terraform_fix_commits"
166-
repo_url = "https://github.com/hashicorp/terraform.git"
161+
repo_url = "https://github.com/hashicorp/terraform"
167162

168163

169164
class CollectWiresharkFixCommitsPipeline(CollectVCSFixCommitPipeline):
170165
pipeline_id = "collect_wireshark_fix_commits"
171-
repo_url = "https://gitlab.com/wireshark/wireshark.git"
166+
repo_url = "https://gitlab.com/wireshark/wireshark"
172167

173168

174169
class CollectTcpdumpFixCommitsPipeline(CollectVCSFixCommitPipeline):
175170
pipeline_id = "collect_tcpdump_fix_commits"
176-
repo_url = "https://github.com/the-tcpdump-group/tcpdump.git"
171+
repo_url = "https://github.com/the-tcpdump-group/tcpdump"
177172

178173

179174
class CollectGitFixCommitsPipeline(CollectVCSFixCommitPipeline):
180175
pipeline_id = "collect_git_fix_commits"
181-
repo_url = "https://github.com/git/git.git"
176+
repo_url = "https://github.com/git/git"
182177

183178

184179
class CollectJenkinsFixCommitsPipeline(CollectVCSFixCommitPipeline):
185180
pipeline_id = "collect_jenkins_fix_commits"
186-
repo_url = "https://github.com/jenkinsci/jenkins.git"
181+
repo_url = "https://github.com/jenkinsci/jenkins"
187182

188183

189184
class CollectGitlabFixCommitsPipeline(CollectVCSFixCommitPipeline):
190185
pipeline_id = "collect_gitlab_fix_commits"
191-
repo_url = "https://gitlab.com/gitlab-org/gitlab-foss.git"
186+
repo_url = "https://gitlab.com/gitlab-org/gitlab-foss"

0 commit comments

Comments
 (0)