Skip to content

Commit bedb803

Browse files
authored
Merge pull request #20940 from asgerf/js/detect-minified-files
JS: Skip minified file if avg line length > 200
2 parents dc7ce3f + 06cc323 commit bedb803

File tree

9 files changed

+190
-7
lines changed

9 files changed

+190
-7
lines changed

javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -408,8 +408,10 @@ private void setupFilters() {
408408
for (String extension : fileTypes.keySet()) patterns.add("**/*" + extension);
409409

410410
// exclude files whose name strongly suggests they are minified
411-
patterns.add("-**/*.min.js");
412-
patterns.add("-**/*-min.js");
411+
if (!EnvironmentVariables.allowMinifiedFiles()) {
412+
patterns.add("-**/*.min.js");
413+
patterns.add("-**/*-min.js");
414+
}
413415

414416
// exclude `node_modules` and `bower_components`
415417
patterns.add("-**/node_modules");
@@ -1074,6 +1076,7 @@ private ExtractorConfig mkExtractorConfig() {
10741076
config = config.withSourceType(getSourceType());
10751077
config = config.withVirtualSourceRoot(virtualSourceRoot);
10761078
if (defaultEncoding != null) config = config.withDefaultEncoding(defaultEncoding);
1079+
config = config.withAllowMinified(EnvironmentVariables.allowMinifiedFiles());
10771080
return config;
10781081
}
10791082

javascript/extractor/src/com/semmle/js/extractor/EnvironmentVariables.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,4 +101,12 @@ public static String getWipDatabase() {
101101
public static boolean isActionsExtractor() {
102102
return Env.systemEnv().getNonEmpty(CODEQL_EXTRACTOR_ACTIONS_WIP_DATABASE_ENV_VAR) != null;
103103
}
104+
105+
public static boolean allowMinifiedFiles() {
106+
String env = Env.systemEnv().getNonEmpty("CODEQL_EXTRACTOR_JAVASCRIPT_ALLOW_MINIFIED_FILES");
107+
if (env == null) {
108+
return false; // default is to not allow minified files
109+
}
110+
return Boolean.parseBoolean(env);
111+
}
104112
}

javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,9 @@ public Set<String> getPredefinedGlobals() {
205205
/** Should parse errors be reported as violations instead of aborting extraction? */
206206
private boolean tolerateParseErrors;
207207

208+
/** Should minified files be allowed? */
209+
private boolean allowMinified;
210+
208211
/** How should HTML files be extracted? */
209212
private HtmlPopulator.Config htmlHandling;
210213

@@ -236,6 +239,7 @@ public ExtractorConfig(boolean experimental) {
236239
this.sourceType = SourceType.AUTO;
237240
this.htmlHandling = HtmlPopulator.Config.ELEMENTS;
238241
this.tolerateParseErrors = true;
242+
this.allowMinified = false;
239243
if (experimental) {
240244
this.mozExtensions = true;
241245
this.jscript = true;
@@ -258,6 +262,7 @@ public ExtractorConfig(ExtractorConfig that) {
258262
this.v8Extensions = that.v8Extensions;
259263
this.e4x = that.e4x;
260264
this.tolerateParseErrors = that.tolerateParseErrors;
265+
this.allowMinified = that.allowMinified;
261266
this.fileType = that.fileType;
262267
this.sourceType = that.sourceType;
263268
this.htmlHandling = that.htmlHandling;
@@ -357,6 +362,16 @@ public ExtractorConfig withTolerateParseErrors(boolean tolerateParseErrors) {
357362
return res;
358363
}
359364

365+
public boolean isAllowMinified() {
366+
return allowMinified;
367+
}
368+
369+
public ExtractorConfig withAllowMinified(boolean allowMinified) {
370+
ExtractorConfig res = new ExtractorConfig(this);
371+
res.allowMinified = allowMinified;
372+
return res;
373+
}
374+
360375
public boolean hasFileType() {
361376
return fileType != null;
362377
}
@@ -467,6 +482,8 @@ public String toString() {
467482
+ e4x
468483
+ ", tolerateParseErrors="
469484
+ tolerateParseErrors
485+
+ ", allowMinified="
486+
+ allowMinified
470487
+ ", htmlHandling="
471488
+ htmlHandling
472489
+ ", fileType="

javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -549,10 +549,15 @@ private ParseResultInfo extractContents(
549549
new TextualExtractor(
550550
trapwriter, locationManager, source, config.getExtractLines(), metrics, extractedFile);
551551
ParseResultInfo loc = extractor.extract(textualExtractor);
552-
int numLines = textualExtractor.isSnippet() ? 0 : textualExtractor.getNumLines();
553-
int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments();
554-
trapwriter.addTuple("numlines", fileLabel, numLines, linesOfCode, linesOfComments);
555-
trapwriter.addTuple("filetype", fileLabel, fileType.toString());
552+
if (loc.getSkipReason() != null) {
553+
System.err.println("Skipping file " + extractedFile + ": " + loc.getSkipReason());
554+
System.err.flush();
555+
} else {
556+
int numLines = textualExtractor.isSnippet() ? 0 : textualExtractor.getNumLines();
557+
int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments();
558+
trapwriter.addTuple("numlines", fileLabel, numLines, linesOfCode, linesOfComments);
559+
trapwriter.addTuple("filetype", fileLabel, fileType.toString());
560+
}
556561
metrics.stopPhase(ExtractionPhase.FileExtractor_extractContents);
557562
metrics.writeTimingsToTrap(trapwriter);
558563
successful = true;

javascript/extractor/src/com/semmle/js/extractor/ParseResultInfo.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,27 @@
1010
public class ParseResultInfo {
1111
private int linesOfCode, linesOfComments;
1212
private List<ParseError> parseErrors;
13+
private String skipReason;
1314

1415
public ParseResultInfo(int linesOfCode, int linesOfComments, List<ParseError> parseErrors) {
1516
this.linesOfCode = linesOfCode;
1617
this.linesOfComments = linesOfComments;
1718
this.parseErrors = new ArrayList<>(parseErrors);
1819
}
1920

21+
private ParseResultInfo() {
22+
this.linesOfCode = 0;
23+
this.linesOfComments = 0;
24+
this.parseErrors = new ArrayList<>();
25+
this.skipReason = null;
26+
}
27+
28+
public static final ParseResultInfo skipped(String reason) {
29+
ParseResultInfo info = new ParseResultInfo();
30+
info.skipReason = reason;
31+
return info;
32+
}
33+
2034
public void add(ParseResultInfo that) {
2135
this.linesOfCode += that.linesOfCode;
2236
this.linesOfComments += that.linesOfComments;
@@ -41,4 +55,11 @@ public int getLinesOfComments() {
4155
public List<ParseError> getParseErrors() {
4256
return parseErrors;
4357
}
58+
59+
/**
60+
* If extraction of this file was skipped, gets the reason for skipping it.
61+
*/
62+
public String getSkipReason() {
63+
return skipReason;
64+
}
4465
}

javascript/extractor/src/com/semmle/js/extractor/ScriptExtractor.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,34 @@ private boolean isAlwaysCommonJSModule(String extension, String packageType) {
3838
return extension.equals(".cjs") || (extension.equals(".js") && "commonjs".equals(packageType));
3939
}
4040

41+
private boolean isMinified(String source) {
42+
// If the average line length is over 200 characters, consider the file minified.
43+
int numberOfLineBreaks = 0;
44+
for (int i = 0; i < source.length(); i++) {
45+
char c = source.charAt(i);
46+
if (c == '\n') {
47+
numberOfLineBreaks++;
48+
} else if (c == '\r') {
49+
numberOfLineBreaks++;
50+
if (i + 1 < source.length() && source.charAt(i + 1) == '\n') {
51+
i++; // skip the next \n in case of \r\n
52+
}
53+
}
54+
}
55+
int averageLineLength =
56+
numberOfLineBreaks == 0 ? source.length() : source.length() / numberOfLineBreaks;
57+
return averageLineLength > 200;
58+
}
59+
4160
@Override
4261
public ParseResultInfo extract(TextualExtractor textualExtractor) {
4362
LocationManager locationManager = textualExtractor.getLocationManager();
4463
String source = textualExtractor.getSource();
64+
65+
if (!config.isAllowMinified() && isMinified(source)) {
66+
return ParseResultInfo.skipped("File appears to be minified.");
67+
}
68+
4569
String shebangLine = null, shebangLineTerm = null;
4670

4771
if (source.startsWith("#!")) {
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
category: majorAnalysis
3+
---
4+
* JavaScript files with an average line length greater than 200 are now considered minified and will no longer be analyzed.
5+
For use-cases where minified files should be analyzed, the original behavior can be restored by setting the environment variable
6+
`CODEQL_EXTRACTOR_JAVASCRIPT_ALLOW_MINIFIED_FILES=true`.

javascript/ql/test/query-tests/Security/CWE-400/ReDoS/regexplib/dates.js

Lines changed: 100 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

javascript/ql/test/query-tests/filters/ClassifyFiles/ClassifyFiles.expected

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
| jquery-datatables.js:0:0:0:0 | jquery-datatables.js | library |
1818
| jquery-jstree.js:0:0:0:0 | jquery-jstree.js | library |
1919
| jquery-snippet.js:0:0:0:0 | jquery-snippet.js | library |
20-
| json-like.js:0:0:0:0 | json-like.js | generated |
2120
| jsx-old.js:0:0:0:0 | jsx-old.js | generated |
2221
| jsx.js:0:0:0:0 | jsx.js | generated |
2322
| multi-part-bundle.html:0:0:0:0 | multi-part-bundle.html | generated |

0 commit comments

Comments
 (0)