Skip to content

Commit ab1a60b

Browse files
committed
Make advisorydata V2
Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent 66c914b commit ab1a60b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+350
-337
lines changed

vulnerabilities/importer.py

Lines changed: 81 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -565,14 +565,68 @@ class AdvisoryData:
565565
date_published must be aware datetime
566566
"""
567567

568-
advisory_id: str = ""
569568
aliases: List[str] = dataclasses.field(default_factory=list)
570569
summary: Optional[str] = ""
571-
affected_packages: Union[List[AffectedPackage], List[AffectedPackageV2]] = dataclasses.field(
572-
default_factory=list
573-
)
570+
affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list)
574571
references: List[Reference] = dataclasses.field(default_factory=list)
575-
references_v2: List[ReferenceV2] = dataclasses.field(default_factory=list)
572+
date_published: Optional[datetime.datetime] = None
573+
weaknesses: List[int] = dataclasses.field(default_factory=list)
574+
url: Optional[str] = None
575+
576+
def __post_init__(self):
577+
if self.summary:
578+
self.summary = clean_summary(self.summary)
579+
580+
def to_dict(self):
581+
return {
582+
"aliases": self.aliases,
583+
"summary": self.summary,
584+
"affected_packages": [pkg.to_dict() for pkg in self.affected_packages],
585+
"references": [ref.to_dict() for ref in self.references],
586+
"date_published": self.date_published.isoformat() if self.date_published else None,
587+
"weaknesses": self.weaknesses,
588+
"url": self.url if self.url else "",
589+
}
590+
591+
@classmethod
592+
def from_dict(cls, advisory_data):
593+
date_published = advisory_data["date_published"]
594+
transformed = {
595+
"aliases": advisory_data["aliases"],
596+
"summary": advisory_data["summary"],
597+
"affected_packages": [
598+
AffectedPackage.from_dict(pkg)
599+
for pkg in advisory_data["affected_packages"]
600+
if pkg is not None
601+
],
602+
"references": [Reference.from_dict(ref) for ref in advisory_data["references"]],
603+
"date_published": datetime.datetime.fromisoformat(date_published)
604+
if date_published
605+
else None,
606+
"weaknesses": advisory_data["weaknesses"],
607+
"url": advisory_data.get("url") or None,
608+
}
609+
return cls(**transformed)
610+
611+
612+
@dataclasses.dataclass(order=True)
613+
class AdvisoryDataV2:
614+
"""
615+
This data class expresses the contract between data sources and the import runner.
616+
617+
If a vulnerability_id is present then:
618+
summary or affected_packages or references must be present
619+
otherwise
620+
either affected_package or references should be present
621+
622+
date_published must be aware datetime
623+
"""
624+
625+
advisory_id: str = ""
626+
aliases: List[str] = dataclasses.field(default_factory=list)
627+
summary: Optional[str] = ""
628+
affected_packages: List[AffectedPackageV2] = dataclasses.field(default_factory=list)
629+
references: List[ReferenceV2] = dataclasses.field(default_factory=list)
576630
patches: List[PatchData] = dataclasses.field(default_factory=list)
577631
date_published: Optional[datetime.datetime] = None
578632
weaknesses: List[int] = dataclasses.field(default_factory=list)
@@ -581,46 +635,24 @@ class AdvisoryData:
581635
original_advisory_text: Optional[str] = None
582636

583637
def __post_init__(self):
638+
if not self.advisory_id:
639+
raise ValueError("advisory_id is required for AdvisoryDataV2")
584640
if self.advisory_id and self.advisory_id in self.aliases:
585641
raise ValueError(
586642
f"advisory_id {self.advisory_id} should not be present in aliases {self.aliases}"
587643
)
588644
if self.summary:
589-
self.summary = self.clean_summary(self.summary)
590-
591-
def clean_summary(self, summary):
592-
# https://nvd.nist.gov/vuln/detail/CVE-2013-4314
593-
# https://github.com/cms-dev/cms/issues/888#issuecomment-516977572
594-
summary = summary.strip()
595-
if summary:
596-
summary = summary.replace("\x00", "\uFFFD")
597-
return summary
645+
self.summary = clean_summary(self.summary)
598646

599647
def to_dict(self):
600-
is_adv_v2 = (
601-
self.advisory_id
602-
or self.severities
603-
or self.references_v2
604-
or (self.affected_packages and isinstance(self.affected_packages[0], AffectedPackageV2))
605-
)
606-
if is_adv_v2:
607-
return {
608-
"advisory_id": self.advisory_id,
609-
"aliases": self.aliases,
610-
"summary": self.summary,
611-
"affected_packages": [pkg.to_dict() for pkg in self.affected_packages],
612-
"references_v2": [ref.to_dict() for ref in self.references_v2],
613-
"patches": [patch.to_dict() for patch in self.patches],
614-
"severities": [sev.to_dict() for sev in self.severities],
615-
"date_published": self.date_published.isoformat() if self.date_published else None,
616-
"weaknesses": self.weaknesses,
617-
"url": self.url if self.url else "",
618-
}
619648
return {
649+
"advisory_id": self.advisory_id,
620650
"aliases": self.aliases,
621651
"summary": self.summary,
622652
"affected_packages": [pkg.to_dict() for pkg in self.affected_packages],
623653
"references": [ref.to_dict() for ref in self.references],
654+
"patches": [patch.to_dict() for patch in self.patches],
655+
"severities": [sev.to_dict() for sev in self.severities],
624656
"date_published": self.date_published.isoformat() if self.date_published else None,
625657
"weaknesses": self.weaknesses,
626658
"url": self.url if self.url else "",
@@ -629,31 +661,37 @@ def to_dict(self):
629661
@classmethod
630662
def from_dict(cls, advisory_data):
631663
date_published = advisory_data["date_published"]
632-
affected_packages = advisory_data["affected_packages"]
633-
affected_package_cls = AffectedPackage
634-
if affected_packages:
635-
affected_package_cls = (
636-
AffectedPackageV2
637-
if "fixed_version_range" in affected_packages[0]
638-
else AffectedPackage
639-
)
640664
transformed = {
641665
"aliases": advisory_data["aliases"],
642666
"summary": advisory_data["summary"],
643667
"affected_packages": [
644-
affected_package_cls.from_dict(pkg) for pkg in affected_packages if pkg is not None
668+
AffectedPackageV2.from_dict(pkg)
669+
for pkg in advisory_data["affected_packages"]
670+
if pkg is not None
645671
],
646672
"patches": [PatchData.from_dict(patch) for patch in advisory_data.get("patches", [])],
647-
"references": [Reference.from_dict(ref) for ref in advisory_data["references"]],
673+
"references": [ReferenceV2.from_dict(ref) for ref in advisory_data["references"]],
648674
"date_published": datetime.datetime.fromisoformat(date_published)
649675
if date_published
650676
else None,
651677
"weaknesses": advisory_data["weaknesses"],
678+
"severities": [
679+
VulnerabilitySeverity.from_dict(sev) for sev in advisory_data.get("severities", [])
680+
],
652681
"url": advisory_data.get("url") or None,
653682
}
654683
return cls(**transformed)
655684

656685

686+
def clean_summary(summary):
687+
# https://nvd.nist.gov/vuln/detail/CVE-2013-4314
688+
# https://github.com/cms-dev/cms/issues/888#issuecomment-516977572
689+
summary = summary.strip()
690+
if summary:
691+
summary = summary.replace("\x00", "\uFFFD")
692+
return summary
693+
694+
657695
class NoLicenseError(Exception):
658696
pass
659697

vulnerabilities/importers/osv.py

Lines changed: 0 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -111,83 +111,6 @@ def parse_advisory_data(
111111
)
112112

113113

114-
def parse_advisory_data_v2(
115-
raw_data: dict, supported_ecosystems, advisory_url: str, advisory_text: str
116-
) -> Optional[AdvisoryData]:
117-
"""
118-
Return an AdvisoryData build from a ``raw_data`` mapping of OSV advisory and
119-
a ``supported_ecosystem`` string.
120-
"""
121-
advisory_id = raw_data.get("id") or ""
122-
if not advisory_id:
123-
logger.error(f"Missing advisory id in OSV data: {raw_data}")
124-
return None
125-
summary = raw_data.get("summary") or ""
126-
details = raw_data.get("details") or ""
127-
summary = build_description(summary=summary, description=details)
128-
aliases = raw_data.get("aliases") or []
129-
130-
date_published = get_published_date(raw_data=raw_data)
131-
severities = list(get_severities(raw_data=raw_data))
132-
references = get_references_v2(raw_data=raw_data)
133-
134-
affected_packages = []
135-
136-
for affected_pkg in raw_data.get("affected") or []:
137-
purl = get_affected_purl(affected_pkg=affected_pkg, raw_id=advisory_id)
138-
139-
if not purl or purl.type not in supported_ecosystems:
140-
logger.error(f"Unsupported package type: {affected_pkg!r} in OSV: {advisory_id!r}")
141-
continue
142-
143-
affected_version_range = get_affected_version_range(
144-
affected_pkg=affected_pkg,
145-
raw_id=advisory_id,
146-
supported_ecosystem=purl.type,
147-
)
148-
149-
fixed_versions = []
150-
fixed_version_range = None
151-
for fixed_range in affected_pkg.get("ranges") or []:
152-
fixed_version = get_fixed_versions(
153-
fixed_range=fixed_range, raw_id=advisory_id, supported_ecosystem=purl.type
154-
)
155-
fixed_versions.extend([v.string for v in fixed_version])
156-
157-
fixed_version_range = (
158-
get_fixed_version_range(fixed_versions, purl.type) if fixed_versions else None
159-
)
160-
161-
if fixed_version_range or affected_version_range:
162-
affected_packages.append(
163-
AffectedPackageV2(
164-
package=purl,
165-
affected_version_range=affected_version_range,
166-
fixed_version_range=fixed_version_range,
167-
)
168-
)
169-
170-
database_specific = raw_data.get("database_specific") or {}
171-
cwe_ids = database_specific.get("cwe_ids") or []
172-
weaknesses = list(map(get_cwe_id, cwe_ids))
173-
174-
if advisory_id in aliases:
175-
aliases.remove(advisory_id)
176-
177-
return AdvisoryData(
178-
advisory_id=advisory_id,
179-
aliases=aliases,
180-
summary=summary,
181-
references_v2=references,
182-
severities=severities,
183-
affected_packages=affected_packages,
184-
date_published=date_published,
185-
weaknesses=weaknesses,
186-
url=advisory_url,
187-
original_advisory_text=advisory_text or json.dumps(raw_data, indent=2, ensure_ascii=False),
188-
)
189-
190-
191114
def extract_fixed_versions(fixed_range) -> Iterable[str]:
192115
"""
193116
Return a list of fixed version strings given a ``fixed_range`` mapping of

vulnerabilities/models.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565

6666
import vulnerablecode
6767
from vulnerabilities import utils
68+
from vulnerabilities.importer import AdvisoryDataV2
6869
from vulnerabilities.severity_systems import EPSS
6970
from vulnerabilities.severity_systems import SCORING_SYSTEMS
7071
from vulnerabilities.utils import compute_patch_checksum
@@ -2987,6 +2988,12 @@ class AdvisoryV2(models.Model):
29872988
help_text="Weighted severity is the highest value calculated by multiplying each severity by its corresponding weight, divided by 10.",
29882989
)
29892990

2991+
# precedence = models.IntegerField(
2992+
# null=True,
2993+
# blank=True,
2994+
# help_text="Precedence indicates the priority level of addressing a vulnerability based on its overall risk",
2995+
# )
2996+
29902997
@property
29912998
def risk_score(self):
29922999
"""
@@ -3026,17 +3033,17 @@ def get_absolute_url(self):
30263033
"""
30273034
return reverse("advisory_details", args=[self.avid])
30283035

3029-
def to_advisory_data(self) -> "AdvisoryData":
3030-
from vulnerabilities.importer import AdvisoryData
3036+
def to_advisory_data(self) -> "AdvisoryDataV2":
3037+
from vulnerabilities.importer import AdvisoryDataV2
30313038

3032-
return AdvisoryData(
3039+
return AdvisoryDataV2(
30333040
advisory_id=self.advisory_id,
30343041
aliases=[item.alias for item in self.aliases.all()],
30353042
summary=self.summary,
30363043
affected_packages=[
30373044
impacted.to_affected_package_data() for impacted in self.impacted_packages.all()
30383045
],
3039-
references_v2=[ref.to_reference_v2_data() for ref in self.references.all()],
3046+
references=[ref.to_reference_v2_data() for ref in self.references.all()],
30403047
patches=[patch.to_patch_data() for patch in self.patches.all()],
30413048
date_published=self.date_published,
30423049
weaknesses=[weak.cwe_id for weak in self.weaknesses.all()],

vulnerabilities/pipelines/v2_importers/aosp_importer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import dateparser
1616
from fetchcode.vcs import fetch_via_vcs
1717

18-
from vulnerabilities.importer import AdvisoryData
18+
from vulnerabilities.importer import AdvisoryDataV2
1919
from vulnerabilities.importer import VulnerabilitySeverity
2020
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
2121
from vulnerabilities.pipes.advisory import append_patch_classifications
@@ -100,13 +100,13 @@ def collect_advisories(self):
100100
f"{quote(file_path.name)}"
101101
)
102102

103-
yield AdvisoryData(
103+
yield AdvisoryDataV2(
104104
advisory_id=vulnerability_id,
105105
summary=summary,
106106
affected_packages=affected_packages,
107107
severities=severities,
108108
patches=patches,
109-
references_v2=references,
109+
references=references,
110110
date_published=date_published,
111111
url=url,
112112
)

vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from univers.version_range import ApacheVersionRange
2222
from univers.versions import SemverVersion
2323

24-
from vulnerabilities.importer import AdvisoryData
24+
from vulnerabilities.importer import AdvisoryDataV2
2525
from vulnerabilities.importer import AffectedPackageV2
2626
from vulnerabilities.importer import ReferenceV2
2727
from vulnerabilities.importer import VulnerabilitySeverity
@@ -223,7 +223,7 @@ class ApacheHTTPDImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
223223
def steps(cls):
224224
return (cls.collect_and_store_advisories,)
225225

226-
def collect_advisories(self) -> Iterable[AdvisoryData]:
226+
def collect_advisories(self) -> Iterable[AdvisoryDataV2]:
227227
if not self.links:
228228
self.links = fetch_links(self.base_url)
229229
for link in self.links:
@@ -301,12 +301,12 @@ def to_advisory(self, data):
301301

302302
weaknesses = get_weaknesses(data)
303303

304-
return AdvisoryData(
304+
return AdvisoryDataV2(
305305
advisory_id=alias,
306306
aliases=[],
307307
summary=description or "",
308308
affected_packages=affected_packages,
309-
references_v2=[reference],
309+
references=[reference],
310310
weaknesses=weaknesses,
311311
url=reference.url,
312312
severities=severities,

vulnerabilities/pipelines/v2_importers/apache_kafka_importer.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from packageurl import PackageURL
1919
from univers.version_range import ApacheVersionRange
2020

21-
from vulnerabilities.importer import AdvisoryData
21+
from vulnerabilities.importer import AdvisoryDataV2
2222
from vulnerabilities.importer import AffectedPackageV2
2323
from vulnerabilities.importer import ReferenceV2
2424
from vulnerabilities.models import AdvisoryReference
@@ -63,11 +63,11 @@ def fetch(self):
6363
def advisories_count(self):
6464
return sum(1 for _ in self.soup.find(class_="td-content").find_all("table"))
6565

66-
def collect_advisories(self) -> Iterable[AdvisoryData]:
66+
def collect_advisories(self) -> Iterable[AdvisoryDataV2]:
6767
for table in self.soup.find(class_="td-content").find_all("table"):
6868
yield self.to_advisory_data(table)
6969

70-
def to_advisory_data(self, table) -> Iterable[AdvisoryData]:
70+
def to_advisory_data(self, table) -> Iterable[AdvisoryDataV2]:
7171
affected_constraints = None
7272
fixed_constraints = None
7373
affected_packages = []
@@ -124,13 +124,13 @@ def to_advisory_data(self, table) -> Iterable[AdvisoryData]:
124124
)
125125
)
126126

127-
return AdvisoryData(
127+
return AdvisoryDataV2(
128128
advisory_id=cve,
129129
aliases=[],
130130
summary=build_description(summary=title, description=description),
131131
date_published=date_published,
132132
affected_packages=affected_packages,
133-
references_v2=references,
133+
references=references,
134134
url=f"{self.url}#{cve}",
135135
original_advisory_text=original_advisory,
136136
)

0 commit comments

Comments
 (0)