Skip to content

Commit 158890f

Browse files
committed
parser-json-sarif: separate module for SarifTreeDecoder
1 parent f98d916 commit 158890f

File tree

4 files changed

+412
-348
lines changed

4 files changed

+412
-348
lines changed

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ add_library(cs STATIC
7777
parser-gcc.cc
7878
parser-json.cc
7979
parser-json-cov.cc
80+
parser-json-sarif.cc
8081
parser-json-simple.cc
8182
parser-xml.cc
8283
shared-string.cc

src/parser-json-sarif.cc

Lines changed: 365 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,365 @@
1+
/*
2+
* Copyright (C) 2012-2022 Red Hat, Inc.
3+
*
4+
* This file is part of csdiff.
5+
*
6+
* csdiff is free software: you can redistribute it and/or modify
7+
* it under the terms of the GNU General Public License as published by
8+
* the Free Software Foundation, either version 3 of the License, or
9+
* any later version.
10+
*
11+
* csdiff is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
* GNU General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU General Public License
17+
* along with csdiff. If not, see <http://www.gnu.org/licenses/>.
18+
*/
19+
20+
#include "parser-json-sarif.hh"
21+
22+
#include "parser-common.hh" // for ImpliedAttrDigger
23+
#include "regex.hh"
24+
25+
#include <boost/algorithm/string/predicate.hpp>
26+
27+
struct SarifTreeDecoder::Private {
28+
void updateCweMap(const pt::ptree *driverNode);
29+
30+
std::string singleChecker = "UNKNOWN_SARIF_WARNING";
31+
const RE reCwe = RE("^CWE-([0-9]+)$");
32+
const RE reVersion = RE("^([0-9][0-9.]+).*$");
33+
const RE reRuleId =
34+
RE("(" RE_CHECKER_NAME "): (" RE_EVENT ")");
35+
36+
typedef std::map<std::string, int> TCweMap;
37+
TCweMap cweMap;
38+
39+
ImpliedAttrDigger digger;
40+
};
41+
42+
SarifTreeDecoder::SarifTreeDecoder():
43+
d(new Private)
44+
{
45+
}
46+
47+
SarifTreeDecoder::~SarifTreeDecoder() = default;
48+
49+
void SarifTreeDecoder::Private::updateCweMap(const pt::ptree *driverNode)
50+
{
51+
const pt::ptree *rules;
52+
if (!findChildOf(&rules, *driverNode, "rules"))
53+
return;
54+
55+
for (const auto &item : *rules) {
56+
const pt::ptree &rule = item.second;
57+
const auto id = valueOf<std::string>(rule, "id", "");
58+
if (id.empty())
59+
// rule ID missing
60+
continue;
61+
62+
const pt::ptree *props;
63+
if (!findChildOf(&props, rule, "properties"))
64+
// properties missing
65+
continue;
66+
67+
const pt::ptree *cweList;
68+
if (!findChildOf(&cweList, *props, "cwe") || cweList->empty())
69+
// cwe list missing
70+
continue;
71+
72+
const std::string cweStr = cweList->begin()->second.data();
73+
boost::smatch sm;
74+
if (!boost::regex_match(cweStr, sm, this->reCwe))
75+
// unable to parse cwe
76+
continue;
77+
78+
const int cwe = std::stoi(sm[/* cwe */ 1]);
79+
this->cweMap[id] = cwe;
80+
}
81+
}
82+
83+
void SarifTreeDecoder::readScanProps(
84+
TScanProps *pDst,
85+
const pt::ptree *root)
86+
{
87+
// read external properties if available
88+
const pt::ptree *iep;
89+
if (findChildOf(&iep, *root, "inlineExternalProperties")
90+
&& (1U == iep->size()))
91+
{
92+
const pt::ptree *props;
93+
if (findChildOf(&props, iep->begin()->second, "externalizedProperties"))
94+
for (const pt::ptree::value_type &item : *props)
95+
(*pDst)[item.first] = item.second.data();
96+
}
97+
98+
// check that we have exactly one run
99+
const pt::ptree *runs;
100+
if (!findChildOf(&runs, *root, "runs") || (1U != runs->size()))
101+
return;
102+
103+
// check which tool was used for the run
104+
const pt::ptree *toolNode;
105+
if (!findChildOf(&toolNode, runs->begin()->second, "tool"))
106+
return;
107+
const pt::ptree *driverNode;
108+
if (!findChildOf(&driverNode, *toolNode, "driver"))
109+
return;
110+
111+
d->updateCweMap(driverNode);
112+
113+
const auto name = valueOf<std::string>(*driverNode, "name", "");
114+
auto version = valueOf<std::string>(*driverNode, "version", "");
115+
if (version.empty())
116+
version = valueOf<std::string>(*driverNode, "semanticVersion", "");
117+
118+
if (name == "SnykCode") {
119+
// Snyk Code detected!
120+
d->singleChecker = "SNYK_CODE_WARNING";
121+
122+
if (!version.empty())
123+
// record tool version of Snyk Code
124+
(*pDst)["analyzer-version-snyk-code"] = version;
125+
}
126+
else if (name == "gitleaks") {
127+
// gitleaks
128+
d->singleChecker = "GITLEAKS_WARNING";
129+
130+
if (!version.empty())
131+
(*pDst)["analyzer-version-gitleaks"] = version;
132+
}
133+
else if (boost::starts_with(name, "GNU C")) {
134+
// GCC
135+
d->singleChecker = "COMPILER_WARNING";
136+
137+
boost::smatch sm;
138+
if (boost::regex_match(version, sm, d->reVersion))
139+
(*pDst)["analyzer-version-gcc"] = sm[/* version */ 1];
140+
}
141+
}
142+
143+
void SarifTreeDecoder::readRoot(const pt::ptree *runs)
144+
{
145+
if (1U != runs->size())
146+
// exactly one run expected
147+
return;
148+
149+
if (!findChildOf(&defList_, runs->begin()->second, "results"))
150+
// no results found
151+
return;
152+
153+
// initialize the iteration over "results"
154+
defIter_ = defList_->begin();
155+
}
156+
157+
static void sarifReadLocation(DefEvent *pEvt, const pt::ptree &loc)
158+
{
159+
const pt::ptree *pl;
160+
if (!findChildOf(&pl, loc, "physicalLocation"))
161+
// unknown location info format
162+
return;
163+
164+
const pt::ptree *al;
165+
if (findChildOf(&al, *pl, "artifactLocation")) {
166+
const auto uri = valueOf<std::string>(*al, "uri", "");
167+
if (!uri.empty())
168+
// read file name
169+
pEvt->fileName = uri;
170+
}
171+
172+
const pt::ptree *reg;
173+
if (findChildOf(&reg, *pl, "region")) {
174+
// read line/col if available
175+
pEvt->line = valueOf<int>(*reg, "startLine", 0);
176+
pEvt->column = valueOf<int>(*reg, "startColumn", 0);
177+
}
178+
}
179+
180+
static bool sarifReadMsg(std::string *pDst, const pt::ptree &node)
181+
{
182+
const pt::ptree *msgNode;
183+
if (!findChildOf(&msgNode, node, "message"))
184+
return false;
185+
186+
*pDst = valueOf<std::string>(*msgNode, "text", "<unknown>");
187+
return true;
188+
}
189+
190+
static void sarifReadComments(Defect *pDef, const pt::ptree &relatedLocs)
191+
{
192+
for (const auto &item : relatedLocs) {
193+
const pt::ptree &loc = item.second;
194+
195+
DefEvent tmp;
196+
sarifReadLocation(&tmp, loc);
197+
if (!tmp.fileName.empty())
198+
// location info available --> not a csdiff-encoded comment
199+
continue;
200+
201+
DefEvent evt("#");
202+
if (!sarifReadMsg(&evt.msg, loc))
203+
continue;
204+
205+
evt.verbosityLevel = 1;
206+
pDef->events.push_back(evt);
207+
}
208+
}
209+
210+
static void sarifReadCodeFlow(Defect *pDef, const pt::ptree &cf)
211+
{
212+
const pt::ptree *tf;
213+
if ((1U != cf.size())
214+
|| !findChildOf(&tf, cf.begin()->second, "threadFlows"))
215+
return;
216+
217+
const pt::ptree *locs;
218+
if (1U != tf->size()
219+
|| !findChildOf(&locs, tf->begin()->second, "locations"))
220+
return;
221+
222+
TEvtList events;
223+
int keyEventIdx = -1;
224+
225+
// read the full list of events
226+
for (const auto &item : *locs) {
227+
const pt::ptree &tfLoc = item.second;
228+
229+
const pt::ptree *kindList;
230+
if (!findChildOf(&kindList, tfLoc, "kinds") || kindList->empty())
231+
// kind of the event not specified
232+
continue;
233+
234+
// concatenate event name
235+
std::string evtName;
236+
for (const auto &kindItem : *kindList) {
237+
const pt::ptree &kind = kindItem.second;
238+
if (!evtName.empty())
239+
evtName += "_";
240+
evtName += kind.data();
241+
}
242+
243+
// append a new event of the specified kind
244+
events.push_back(DefEvent(evtName));
245+
DefEvent &evt = events.back();
246+
247+
evt.verbosityLevel = valueOf<int>(tfLoc, "nestingLevel", 1);
248+
if (!evt.verbosityLevel)
249+
// update key event
250+
keyEventIdx = events.size() - 1U;
251+
252+
const pt::ptree *loc;
253+
if (!findChildOf(&loc, tfLoc, "location"))
254+
// location info missing
255+
return;
256+
257+
sarifReadLocation(&evt, *loc);
258+
sarifReadMsg(&evt.msg, *loc);
259+
}
260+
261+
if (events.size() <= 1U)
262+
// we failed to read more than one event
263+
return;
264+
265+
if (-1 == keyEventIdx) {
266+
// no key event in threadFlows
267+
std::copy(events.begin(), events.end(),
268+
std::back_inserter(pDef->events));
269+
}
270+
else {
271+
// use only the events from threadFlows
272+
events.swap(pDef->events);
273+
pDef->keyEventIdx = keyEventIdx;
274+
}
275+
}
276+
277+
static int sarifCweFromDefNode(const pt::ptree &defNode)
278+
{
279+
const pt::ptree *taxa;
280+
if (!findChildOf(&taxa, defNode, "taxa"))
281+
return 0;
282+
283+
for (const auto &item : *taxa) {
284+
const pt::ptree &t = item.second;
285+
286+
const pt::ptree *tc;
287+
if (!findChildOf(&tc, t, "toolComponent"))
288+
continue;
289+
290+
if (valueOf<std::string>(*tc, "name", "") == "cwe")
291+
return valueOf<int>(t, "id", 0);
292+
}
293+
294+
// not found
295+
return 0;
296+
}
297+
298+
bool SarifTreeDecoder::readNode(Defect *def)
299+
{
300+
// move the iterator after we get the current position
301+
const pt::ptree *pNode = this->nextNode();
302+
if (!pNode)
303+
// failed initialization or EOF
304+
return false;
305+
306+
const pt::ptree &defNode = *pNode;
307+
308+
// initialize the defect structure
309+
*def = Defect(d->singleChecker);
310+
311+
// initialize the key event
312+
const auto level = valueOf<std::string>(defNode, "level", "warning");
313+
def->events.push_back(DefEvent(level));
314+
DefEvent &keyEvent = def->events.back();
315+
316+
// read "rule" that triggered the report
317+
const auto rule = valueOf<std::string>(defNode, "ruleId", "");
318+
if (!rule.empty()) {
319+
boost::smatch sm;
320+
if (boost::regex_match(rule, sm, d->reRuleId)) {
321+
// csdiff format
322+
def->checker = sm[/* checker */ 1];
323+
keyEvent.event = sm[/* keyEvent */ 2];
324+
}
325+
else {
326+
// output of a single tool
327+
keyEvent.event += "[" + rule + "]";
328+
329+
// distinguish GCC compiler/analyzer
330+
if (def->checker == "COMPILER_WARNING"
331+
&& boost::starts_with(rule, "-Wanalyzer-"))
332+
def->checker = "GCC_ANALYZER_WARNING";
333+
}
334+
}
335+
336+
def->cwe = sarifCweFromDefNode(defNode);
337+
if (!def->cwe) {
338+
// lookup cwe
339+
const auto it = d->cweMap.find(rule);
340+
if (d->cweMap.end() != it)
341+
def->cwe = it->second;
342+
}
343+
344+
// read location and diagnostic message
345+
keyEvent.fileName = "<unknown>";
346+
const pt::ptree *locs;
347+
if (findChildOf(&locs, defNode, "locations") && !locs->empty())
348+
sarifReadLocation(&keyEvent, locs->begin()->second);
349+
sarifReadMsg(&keyEvent.msg, defNode);
350+
351+
// read code flow if available
352+
const pt::ptree *cf;
353+
if (findChildOf(&cf, defNode, "codeFlows"))
354+
sarifReadCodeFlow(def, *cf);
355+
356+
// read comments if available
357+
const pt::ptree *relatedLocs;
358+
if (findChildOf(&relatedLocs, defNode, "relatedLocations"))
359+
sarifReadComments(def, *relatedLocs);
360+
361+
d->digger.inferLangFromChecker(def);
362+
d->digger.inferToolFromChecker(def);
363+
364+
return true;
365+
}

0 commit comments

Comments
 (0)