Skip to content

Commit 49cf20b

Browse files
committed
Add LibreOffice security advisories importer
Fetches CVE IDs from the LibreOffice advisory listing page and retrieves structured data (CVSS, CWE, references, dates) from the CVE 5.0 JSON API at cveawg.mitre.org. Fixes: #1898 Signed-off-by: Anmol Vats <anmolvats2003@gmail.com>
1 parent 2dbbd38 commit 49cf20b

File tree

6 files changed

+484
-0
lines changed

6 files changed

+484
-0
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
from vulnerabilities.pipelines.v2_importers import github_osv_importer as github_osv_importer_v2
6060
from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2
6161
from vulnerabilities.pipelines.v2_importers import istio_importer as istio_importer_v2
62+
from vulnerabilities.pipelines.v2_importers import libreoffice_importer as libreoffice_importer_v2
6263
from vulnerabilities.pipelines.v2_importers import mattermost_importer as mattermost_importer_v2
6364
from vulnerabilities.pipelines.v2_importers import mozilla_importer as mozilla_importer_v2
6465
from vulnerabilities.pipelines.v2_importers import nginx_importer as nginx_importer_v2
@@ -118,6 +119,7 @@
118119
retiredotnet_importer_v2.RetireDotnetImporterPipeline,
119120
ubuntu_osv_importer_v2.UbuntuOSVImporterPipeline,
120121
alpine_linux_importer_v2.AlpineLinuxImporterPipeline,
122+
libreoffice_importer_v2.LibreOfficeImporterPipeline,
121123
nvd_importer.NVDImporterPipeline,
122124
github_importer.GitHubAPIImporterPipeline,
123125
gitlab_importer.GitLabImporterPipeline,
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import json
11+
import logging
12+
import re
13+
from typing import Iterable
14+
15+
import dateparser
16+
import requests
17+
18+
from vulnerabilities.importer import AdvisoryDataV2
19+
from vulnerabilities.importer import ReferenceV2
20+
from vulnerabilities.importer import VulnerabilitySeverity
21+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
22+
from vulnerabilities.severity_systems import SCORING_SYSTEMS
23+
from vulnerabilities.utils import get_cwe_id
24+
25+
logger = logging.getLogger(__name__)
26+
27+
ADVISORIES_URL = "https://www.libreoffice.org/about-us/security/advisories/"
28+
CVE_API_URL = "https://cveawg.mitre.org/api/cve/{cve_id}"
29+
30+
CVSS_KEY_MAP = {
31+
"cvssV4_0": SCORING_SYSTEMS["cvssv4"],
32+
"cvssV3_1": SCORING_SYSTEMS["cvssv3.1"],
33+
"cvssV3_0": SCORING_SYSTEMS["cvssv3"],
34+
"cvssV2_0": SCORING_SYSTEMS["cvssv2"],
35+
}
36+
37+
38+
class LibreOfficeImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
39+
"""Collect LibreOffice security advisories via the CVE API."""
40+
41+
pipeline_id = "libreoffice_importer"
42+
spdx_license_expression = "LicenseRef-scancode-proprietary-license"
43+
license_url = "https://www.libreoffice.org/about-us/security/"
44+
precedence = 200
45+
46+
@classmethod
47+
def steps(cls):
48+
return (
49+
cls.fetch,
50+
cls.collect_and_store_advisories,
51+
)
52+
53+
def fetch(self):
54+
self.log(f"Fetch `{ADVISORIES_URL}`")
55+
resp = requests.get(ADVISORIES_URL, timeout=30)
56+
resp.raise_for_status()
57+
self.cve_ids = parse_cve_ids(resp.text)
58+
59+
def advisories_count(self):
60+
return len(self.cve_ids)
61+
62+
def collect_advisories(self) -> Iterable[AdvisoryDataV2]:
63+
for cve_id in self.cve_ids:
64+
url = CVE_API_URL.format(cve_id=cve_id)
65+
try:
66+
resp = requests.get(url, timeout=30)
67+
resp.raise_for_status()
68+
except Exception as e:
69+
logger.error("Failed to fetch CVE API for %s: %s", cve_id, e)
70+
continue
71+
advisory = parse_cve_advisory(resp.json(), cve_id)
72+
if advisory:
73+
yield advisory
74+
75+
76+
def parse_cve_ids(html: str) -> list:
77+
"""Return deduplicated CVE IDs from the LibreOffice advisories listing page."""
78+
return list(dict.fromkeys(re.findall(r"CVE-\d{4}-\d+", html)))
79+
80+
81+
def parse_cve_advisory(data: dict, cve_id: str):
82+
"""Parse a CVE 5.0 JSON record from cveawg.mitre.org; return None if CVE ID is absent."""
83+
cve_metadata = data.get("cveMetadata") or {}
84+
advisory_id = cve_metadata.get("cveId") or cve_id
85+
if not advisory_id:
86+
return None
87+
88+
date_published = None
89+
raw_date = cve_metadata.get("datePublished") or ""
90+
if raw_date:
91+
date_published = dateparser.parse(
92+
raw_date,
93+
settings={"TIMEZONE": "UTC", "RETURN_AS_TIMEZONE_AWARE": True, "TO_TIMEZONE": "UTC"},
94+
)
95+
if date_published is None:
96+
logger.warning("Could not parse date %r for %s", raw_date, advisory_id)
97+
98+
cna = (data.get("containers") or {}).get("cna") or {}
99+
100+
summary = ""
101+
for desc in cna.get("descriptions") or []:
102+
if desc.get("lang") in ("en", "en-US"):
103+
summary = desc.get("value") or ""
104+
break
105+
106+
severities = []
107+
for metric in cna.get("metrics") or []:
108+
for key, system in CVSS_KEY_MAP.items():
109+
cvss = metric.get(key)
110+
if not cvss:
111+
continue
112+
vector = cvss.get("vectorString") or ""
113+
score = cvss.get("baseScore")
114+
if vector and score is not None:
115+
severities.append(
116+
VulnerabilitySeverity(
117+
system=system,
118+
value=str(score),
119+
scoring_elements=vector,
120+
)
121+
)
122+
break
123+
124+
weaknesses = []
125+
for problem_type in cna.get("problemTypes") or []:
126+
for desc in problem_type.get("descriptions") or []:
127+
cwe_str = desc.get("cweId") or ""
128+
if cwe_str.upper().startswith("CWE-"):
129+
try:
130+
weaknesses.append(get_cwe_id(cwe_str))
131+
except Exception:
132+
pass
133+
134+
advisory_url = (
135+
f"https://www.libreoffice.org/about-us/security/advisories/{advisory_id.lower()}/"
136+
)
137+
references = []
138+
for ref in cna.get("references") or []:
139+
url = ref.get("url") or ""
140+
if url:
141+
references.append(ReferenceV2(url=url))
142+
143+
return AdvisoryDataV2(
144+
advisory_id=advisory_id,
145+
aliases=[],
146+
summary=summary,
147+
affected_packages=[],
148+
references=references,
149+
date_published=date_published,
150+
weaknesses=weaknesses,
151+
severities=severities,
152+
url=advisory_url,
153+
original_advisory_text=json.dumps(data, indent=2, ensure_ascii=False),
154+
)
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import json
11+
import os
12+
from unittest import TestCase
13+
from unittest.mock import MagicMock
14+
from unittest.mock import patch
15+
16+
from vulnerabilities.pipelines.v2_importers.libreoffice_importer import LibreOfficeImporterPipeline
17+
from vulnerabilities.pipelines.v2_importers.libreoffice_importer import parse_cve_advisory
18+
from vulnerabilities.pipelines.v2_importers.libreoffice_importer import parse_cve_ids
19+
20+
TEST_DATA = os.path.join(os.path.dirname(__file__), "..", "..", "test_data", "libreoffice")
21+
22+
23+
def load_json(filename):
24+
with open(os.path.join(TEST_DATA, filename), encoding="utf-8") as f:
25+
return json.load(f)
26+
27+
28+
def load_html(filename):
29+
with open(os.path.join(TEST_DATA, filename), encoding="utf-8") as f:
30+
return f.read()
31+
32+
33+
class TestParseCveIds(TestCase):
34+
def test_extracts_cve_ids_from_html(self):
35+
html = load_html("advisories.html")
36+
cve_ids = parse_cve_ids(html)
37+
self.assertIn("CVE-2025-1080", cve_ids)
38+
self.assertIn("CVE-2023-2255", cve_ids)
39+
self.assertIn("CVE-2023-4863", cve_ids)
40+
41+
def test_deduplicates_repeated_ids(self):
42+
html = "<a>CVE-2025-1080</a> ... <a>CVE-2025-1080</a>"
43+
self.assertEqual(parse_cve_ids(html), ["CVE-2025-1080"])
44+
45+
def test_empty_html_returns_empty_list(self):
46+
self.assertEqual(parse_cve_ids("<html></html>"), [])
47+
48+
49+
class TestParseCveAdvisory(TestCase):
50+
def test_cvss4_and_cwe(self):
51+
data = load_json("cve_2025_1080.json")
52+
advisory = parse_cve_advisory(data, "CVE-2025-1080")
53+
self.assertIsNotNone(advisory)
54+
self.assertEqual(advisory.advisory_id, "CVE-2025-1080")
55+
self.assertEqual(advisory.aliases, [])
56+
self.assertIn("macro", advisory.summary.lower())
57+
self.assertEqual(len(advisory.severities), 1)
58+
self.assertEqual(advisory.severities[0].value, "7.2")
59+
self.assertIn("CVSS:4.0/", advisory.severities[0].scoring_elements)
60+
self.assertEqual(advisory.weaknesses, [20])
61+
self.assertIsNotNone(advisory.date_published)
62+
self.assertIn("cve-2025-1080", advisory.url)
63+
64+
def test_no_cvss_has_empty_severities(self):
65+
data = load_json("cve_2023_2255.json")
66+
advisory = parse_cve_advisory(data, "CVE-2023-2255")
67+
self.assertIsNotNone(advisory)
68+
self.assertEqual(advisory.severities, [])
69+
70+
def test_cwe_264_extracted(self):
71+
data = load_json("cve_2023_2255.json")
72+
advisory = parse_cve_advisory(data, "CVE-2023-2255")
73+
self.assertEqual(advisory.weaknesses, [264])
74+
75+
def test_references_from_cna(self):
76+
data = load_json("cve_2023_2255.json")
77+
advisory = parse_cve_advisory(data, "CVE-2023-2255")
78+
urls = [r.url for r in advisory.references]
79+
self.assertIn("https://www.debian.org/security/2023/dsa-5415", urls)
80+
self.assertIn("https://security.gentoo.org/glsa/202311-15", urls)
81+
82+
def test_missing_cve_id_returns_none(self):
83+
advisory = parse_cve_advisory({"cveMetadata": {"cveId": ""}, "containers": {}}, "")
84+
self.assertIsNone(advisory)
85+
86+
def test_original_advisory_text_is_json(self):
87+
data = load_json("cve_2025_1080.json")
88+
advisory = parse_cve_advisory(data, "CVE-2025-1080")
89+
parsed = json.loads(advisory.original_advisory_text)
90+
self.assertEqual(parsed["cveMetadata"]["cveId"], "CVE-2025-1080")
91+
92+
def test_malformed_cwe_skipped(self):
93+
data = load_json("cve_2025_1080.json")
94+
data = json.loads(json.dumps(data))
95+
data["containers"]["cna"]["problemTypes"] = [
96+
{"descriptions": [{"cweId": "CWE-INVALID", "lang": "en", "type": "CWE"}]}
97+
]
98+
advisory = parse_cve_advisory(data, "CVE-2025-1080")
99+
self.assertEqual(advisory.weaknesses, [])
100+
101+
102+
class TestLibreOfficeImporterPipeline(TestCase):
103+
def _make_resp(self, data, status=200):
104+
resp = MagicMock()
105+
resp.json.return_value = data
106+
resp.text = json.dumps(data)
107+
resp.raise_for_status.return_value = None
108+
resp.status_code = status
109+
return resp
110+
111+
@patch("vulnerabilities.pipelines.v2_importers.libreoffice_importer.requests.get")
112+
def test_fetch_stores_cve_ids(self, mock_get):
113+
html = load_html("advisories.html")
114+
mock_get.return_value = MagicMock(text=html, raise_for_status=MagicMock())
115+
pipeline = LibreOfficeImporterPipeline()
116+
pipeline.fetch()
117+
self.assertIn("CVE-2025-1080", pipeline.cve_ids)
118+
self.assertIn("CVE-2023-2255", pipeline.cve_ids)
119+
120+
@patch("vulnerabilities.pipelines.v2_importers.libreoffice_importer.requests.get")
121+
def test_collect_advisories_yields_advisory(self, mock_get):
122+
cve_data = load_json("cve_2025_1080.json")
123+
pipeline = LibreOfficeImporterPipeline()
124+
pipeline.cve_ids = ["CVE-2025-1080"]
125+
mock_get.return_value = self._make_resp(cve_data)
126+
advisories = list(pipeline.collect_advisories())
127+
self.assertEqual(len(advisories), 1)
128+
self.assertEqual(advisories[0].advisory_id, "CVE-2025-1080")
129+
130+
@patch("vulnerabilities.pipelines.v2_importers.libreoffice_importer.requests.get")
131+
def test_collect_advisories_skips_on_http_error(self, mock_get):
132+
pipeline = LibreOfficeImporterPipeline()
133+
pipeline.cve_ids = ["CVE-2025-1080"]
134+
mock_get.side_effect = Exception("timeout")
135+
logger_name = "vulnerabilities.pipelines.v2_importers.libreoffice_importer"
136+
with self.assertLogs(logger_name, level="ERROR") as cm:
137+
advisories = list(pipeline.collect_advisories())
138+
self.assertEqual(advisories, [])
139+
self.assertTrue(any("CVE-2025-1080" in msg for msg in cm.output))
140+
141+
def test_advisories_count(self):
142+
pipeline = LibreOfficeImporterPipeline()
143+
pipeline.cve_ids = ["CVE-2025-1080", "CVE-2023-2255"]
144+
self.assertEqual(pipeline.advisories_count(), 2)
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<body>
4+
<h3>Addressed in LibreOffice 24.8.5 and 25.2.1</h3>
5+
<ul>
6+
<li><a href="/about-us/security/advisories/cve-2025-1080/">CVE-2025-1080</a> Macro URL arbitrary script execution</li>
7+
</ul>
8+
<h3>Addressed in LibreOffice 7.4.7 and 7.5.3</h3>
9+
<ul>
10+
<li><a href="/about-us/security/advisories/cve-2023-2255/">CVE-2023-2255</a> Remote documents loaded without prompt via IFrame</li>
11+
</ul>
12+
<h3>Third Party Advisories</h3>
13+
<ul>
14+
<li><a href="/about-us/security/advisories/cve-2023-4863/">CVE-2023-4863</a> libwebp heap buffer overflow</li>
15+
</ul>
16+
</body>
17+
</html>

0 commit comments

Comments
 (0)