Skip to content

Commit 91fbbe8

Browse files
committed
Add pipeline to federate package vulnerabilities
Signed-off-by: Keshav Priyadarshi <git@keshav.space>
1 parent 376a4d5 commit 91fbbe8

File tree

4 files changed

+446
-0
lines changed

4 files changed

+446
-0
lines changed

vulnerabilities/pipelines/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,10 @@ def log(self, message, level=logging.INFO):
141141
class VulnerableCodePipeline(PipelineDefinition, BasePipelineRun):
142142
pipeline_id = None # Unique Pipeline ID
143143

144+
# When set to true pipeline is run only once.
145+
# To rerun onetime pipeline reset is_active field to True via migration.
146+
run_once = False
147+
144148
def on_failure(self):
145149
"""
146150
Tasks to run in the event that pipeline execution fails.
Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
# Copyright (c) nexB Inc. and others. All rights reserved.
2+
# VulnerableCode is a trademark of nexB Inc.
3+
# SPDX-License-Identifier: Apache-2.0
4+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
5+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
6+
# See https://aboutcode.org for more information about nexB OSS projects.
7+
#
8+
9+
10+
import itertools
11+
import shutil
12+
from operator import attrgetter
13+
from pathlib import Path
14+
15+
import saneyaml
16+
from aboutcode.pipeline import LoopProgress
17+
from django.conf import settings
18+
19+
from aboutcode.federated import DataFederation
20+
from vulnerabilities.models import PackageV2
21+
from vulnerabilities.pipelines import VulnerableCodePipeline
22+
from vulnerabilities.pipes import federatedcode
23+
24+
25+
class FederatePackageVulnerabilities(VulnerableCodePipeline):
26+
"""Export package vulnerabilities and advisory to FederatedCode."""
27+
28+
pipeline_id = "federate_package_vulnerabilities_v2"
29+
30+
@classmethod
31+
def steps(cls):
32+
return (
33+
cls.check_federatedcode_eligibility,
34+
cls.create_federatedcode_working_dir,
35+
cls.fetch_federation_config,
36+
cls.clone_vulnerabilities_repo,
37+
cls.publish_vulnerabilities,
38+
cls.delete_working_dir,
39+
)
40+
41+
def check_federatedcode_eligibility(self):
42+
"""Check if FederatedCode is configured."""
43+
federatedcode.check_federatedcode_configured_and_available(self.log)
44+
45+
def create_federatedcode_working_dir(self):
46+
"""Create temporary working dir."""
47+
self.working_path = federatedcode.create_federatedcode_working_dir()
48+
49+
def fetch_federation_config(self):
50+
"""Fetch config for PackageURL Federation."""
51+
data_federation = DataFederation.from_url(
52+
name="aboutcode-data",
53+
remote_root_url="https://github.com/aboutcode-data",
54+
)
55+
self.data_cluster = data_federation.get_cluster("purls")
56+
57+
def clone_vulnerabilities_repo(self):
58+
self.repo = federatedcode.clone_repository(
59+
repo_url=settings.FEDERATEDCODE_VULNERABILITIES_REPO,
60+
clone_path=self.working_path / "vulnerabilities-data",
61+
logger=self.log,
62+
)
63+
64+
def publish_vulnerabilities(self):
65+
"""Publish package vulnerabilities and advisory to FederatedCode"""
66+
repo_path = Path(self.repo.working_dir)
67+
commit_count = 1
68+
batch_size = 2000
69+
files_to_commit = set()
70+
exported_avids = set()
71+
72+
distinct_packages_count = (
73+
PackageV2.objects.values("type", "namespace", "name")
74+
.distinct("type", "namespace", "name")
75+
.count()
76+
)
77+
package_qs = package_prefetched_qs()
78+
grouped_packages = itertools.groupby(
79+
package_qs.iterator(chunk_size=2000),
80+
key=attrgetter("type", "namespace", "name"),
81+
)
82+
83+
self.log(f"Exporting vulnerabilities for {distinct_packages_count} packages.")
84+
progress = LoopProgress(
85+
total_iterations=distinct_packages_count,
86+
progress_step=1,
87+
logger=self.log,
88+
)
89+
for _, packages in progress.iter(grouped_packages):
90+
package_urls = []
91+
package_vulnerabilities = []
92+
for package in packages:
93+
purl = package.package_url
94+
package_urls.append(purl)
95+
package_vulnerabilities.append(serialize_package_vulnerability(package))
96+
97+
impacts = itertools.chain(
98+
package.affected_in_impacts.all(),
99+
package.fixed_in_impacts.all(),
100+
)
101+
for impact in impacts:
102+
adv = impact.advisory
103+
avid = adv.avid
104+
if avid in exported_avids:
105+
continue
106+
107+
exported_avids.add(avid)
108+
advisory = serialize_advisory(adv)
109+
adv_file = f"vulnerabilities/{avid}.yml"
110+
write_file(
111+
repo_path=repo_path,
112+
file_path=adv_file,
113+
data=advisory,
114+
)
115+
files_to_commit.add(adv_file)
116+
117+
package_repo, datafile_path = self.data_cluster.get_datafile_repo_and_path(purl=purl)
118+
package_vulnerability_path = datafile_path.replace("/purls.yml", "/vulnerabilities.yml")
119+
package_vulnerability_path = f"packages/{package_repo}/{package_vulnerability_path}"
120+
package_path = f"packages/{package_repo}/{datafile_path}"
121+
122+
write_file(
123+
repo_path=repo_path,
124+
file_path=package_path,
125+
data=package_urls,
126+
)
127+
files_to_commit.add(package_path)
128+
129+
write_file(
130+
repo_path=repo_path,
131+
file_path=package_vulnerability_path,
132+
data=package_vulnerabilities,
133+
)
134+
files_to_commit.add(package_vulnerability_path)
135+
136+
if len(files_to_commit) > batch_size:
137+
if federatedcode.commit_and_push_changes(
138+
commit_message=self.commit_message(commit_count),
139+
repo=self.repo,
140+
files_to_commit=files_to_commit,
141+
logger=self.log,
142+
):
143+
commit_count += 1
144+
files_to_commit.clear()
145+
146+
if files_to_commit:
147+
federatedcode.commit_and_push_changes(
148+
commit_message=self.commit_message(commit_count, commit_count),
149+
repo=self.repo,
150+
files_to_commit=files_to_commit,
151+
logger=self.log,
152+
)
153+
154+
self.log(
155+
f"Federated {distinct_packages_count} package and {len(exported_avids)} vulnerabilities."
156+
)
157+
158+
def delete_working_dir(self):
159+
"""Remove temporary working dir."""
160+
if hasattr(self, "working_path") and self.working_path:
161+
shutil.rmtree(self.working_path)
162+
163+
def on_failure(self):
164+
self.delete_working_dir()
165+
166+
def commit_message(self, commit_count, total_commit_count="many"):
167+
"""Commit message for pushing Package vulnerability."""
168+
return federatedcode.commit_message(
169+
commit_count=commit_count,
170+
total_commit_count=total_commit_count,
171+
)
172+
173+
174+
def package_prefetched_qs():
175+
return PackageV2.objects.order_by("type", "namespace", "name", "version").prefetch_related(
176+
"affected_in_impacts",
177+
"affected_in_impacts__advisory",
178+
"affected_in_impacts__advisory__impacted_packages",
179+
"affected_in_impacts__advisory__aliases",
180+
"affected_in_impacts__advisory__references",
181+
"affected_in_impacts__advisory__severities",
182+
"affected_in_impacts__advisory__weaknesses",
183+
"fixed_in_impacts",
184+
"fixed_in_impacts__advisory",
185+
"fixed_in_impacts__advisory__impacted_packages",
186+
"fixed_in_impacts__advisory__aliases",
187+
"fixed_in_impacts__advisory__references",
188+
"fixed_in_impacts__advisory__severities",
189+
"fixed_in_impacts__advisory__weaknesses",
190+
)
191+
192+
193+
def serialize_package_vulnerability(package):
194+
affected_by_vulnerabilities = [
195+
impact.advisory.avid for impact in package.affected_in_impacts.all()
196+
]
197+
fixing_vulnerabilities = [impact.advisory.avid for impact in package.fixed_in_impacts.all()]
198+
199+
return {
200+
"purl": package.package_url,
201+
"affected_by_vulnerabilities": affected_by_vulnerabilities,
202+
"fixing_vulnerabilities": fixing_vulnerabilities,
203+
}
204+
205+
206+
def serialize_severity(sev):
207+
return {
208+
"score": sev.value,
209+
"scoring_system": sev.scoring_system,
210+
"scoring_elements": sev.scoring_elements,
211+
"published_at": str(sev.published_at),
212+
"url": sev.url,
213+
}
214+
215+
216+
def serialize_references(reference):
217+
return {
218+
"url": reference.url,
219+
"reference_type": reference.reference_type,
220+
"reference_id": reference.reference_id,
221+
}
222+
223+
224+
def serialize_advisory(advisory):
225+
"""Return a plain data mapping serialized from advisory object."""
226+
aliases = [a.alias for a in advisory.aliases.all()]
227+
severities = [serialize_severity(sev) for sev in advisory.severities.all()]
228+
weaknesses = [wkns.cwe for wkns in advisory.weaknesses.all()]
229+
references = [serialize_references(ref) for ref in advisory.references.all()]
230+
impacts = [
231+
{
232+
"purl": impact.base_purl,
233+
"affected_versions": impact.affecting_vers,
234+
"fixed_versions": impact.fixed_vers,
235+
}
236+
for impact in advisory.impacted_packages.all()
237+
]
238+
239+
return {
240+
"advisory_id": advisory.advisory_id,
241+
"datasource_id": advisory.avid,
242+
"datasource_url": advisory.url,
243+
"aliases": aliases,
244+
"summary": advisory.summary,
245+
"impacted_packages": impacts,
246+
"severities": severities,
247+
"weaknesses": weaknesses,
248+
"references": references,
249+
}
250+
251+
252+
def write_file(repo_path, file_path, data):
253+
"""Write ``data`` as YAML to ``repo_path``."""
254+
write_to = repo_path / file_path
255+
write_to.parent.mkdir(parents=True, exist_ok=True)
256+
with open(write_to, encoding="utf-8", mode="w") as f:
257+
f.write(saneyaml.dump(data))

0 commit comments

Comments
 (0)