-
Notifications
You must be signed in to change notification settings - Fork 2
fix: replace url.parse with whatwg-url #175
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,18 +5,34 @@ import * as path from "path"; | |
| import * as url from "url"; | ||
|
|
||
| export function sanitizeUrl(urlRemote: string) { | ||
| var urlParts = url.parse(urlRemote, false, true); | ||
| var origin = urlParts.host || ""; | ||
| try { | ||
| const urlObj = new URL(urlRemote); | ||
| var origin = urlObj.host || ""; | ||
|
|
||
| if ((urlParts.pathname || "").charAt(0) != "/") origin += "/"; | ||
| if ((urlObj.pathname || "").charAt(0) != "/") origin += "/"; | ||
|
|
||
| origin += urlParts.pathname; | ||
| return [ | ||
| urlParts.protocol || "http:", | ||
| "//", | ||
| url.resolve("", origin), | ||
| urlParts.search || "" | ||
| ].join(""); | ||
| origin += urlObj.pathname; | ||
| return [ | ||
| urlObj.protocol || "http:", | ||
| "//", | ||
| url.resolve("", origin), | ||
| urlObj.search || "" | ||
| ].join(""); | ||
| } catch (e) { | ||
| // If URL is invalid, try to construct it with a base | ||
| const urlObj = new URL(urlRemote, "http://dummy"); | ||
| var origin = urlObj.host || ""; | ||
|
|
||
| if ((urlObj.pathname || "").charAt(0) != "/") origin += "/"; | ||
|
|
||
| origin += urlObj.pathname; | ||
| return [ | ||
| urlObj.protocol || "http:", | ||
| "//", | ||
| url.resolve("", origin), | ||
| urlObj.search || "" | ||
| ].join(""); | ||
| } | ||
|
Comment on lines
+8
to
+35
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: find . -name "Address.ts" -o -name "address.ts" | head -20Repository: loanlink-nl/cget Length of output: 76 🏁 Script executed: cat -n src/Address.ts | head -50Repository: loanlink-nl/cget Length of output: 1741 🏁 Script executed: rg -t ts -t js "sanitizeUrl" --max-count=20 -B 2 -A 2Repository: loanlink-nl/cget Length of output: 1104 🏁 Script executed: fd -e test.ts -e test.js -e spec.ts -e spec.js | head -20Repository: loanlink-nl/cget Length of output: 97 🏁 Script executed: cat -n src/Address.test.ts | head -100Repository: loanlink-nl/cget Length of output: 3253 🏁 Script executed: cat -n src/Address.test.ts | tail -100Repository: loanlink-nl/cget Length of output: 3535 🏁 Script executed: rg "new URL" -B 3 -A 3 -t tsRepository: loanlink-nl/cget Length of output: 1430 🏁 Script executed: sed -n '38,150p' src/Address.tsRepository: loanlink-nl/cget Length of output: 2349 Fallback path injects Lines 23-35 use Suggested fix export function sanitizeUrl(urlRemote: string) {
+ const toSanitized = (urlObj: URL, stripDummyHost = false) => {
+ let origin =
+ stripDummyHost && urlObj.host === "dummy" ? "" : urlObj.host || "";
+
+ if ((urlObj.pathname || "").charAt(0) != "/") origin += "/";
+ origin += urlObj.pathname;
+
+ return [
+ urlObj.protocol || "http:",
+ "//",
+ url.resolve("", origin),
+ urlObj.search || ""
+ ].join("");
+ };
+
try {
- const urlObj = new URL(urlRemote);
- var origin = urlObj.host || "";
-
- if ((urlObj.pathname || "").charAt(0) != "/") origin += "/";
-
- origin += urlObj.pathname;
- return [
- urlObj.protocol || "http:",
- "//",
- url.resolve("", origin),
- urlObj.search || ""
- ].join("");
+ return toSanitized(new URL(urlRemote));
} catch (e) {
- // If URL is invalid, try to construct it with a base
- const urlObj = new URL(urlRemote, "http://dummy");
- var origin = urlObj.host || "";
-
- if ((urlObj.pathname || "").charAt(0) != "/") origin += "/";
-
- origin += urlObj.pathname;
- return [
- urlObj.protocol || "http:",
- "//",
- url.resolve("", origin),
- urlObj.search || ""
- ].join("");
+ // If URL is invalid, try to construct it with a base.
+ return toSanitized(new URL(urlRemote, "http://dummy"), true);
}
}🧰 Tools🪛 Biome (2.4.4)[error] 24-24: Shouldn't redeclare 'origin'. Consider to delete it or rename it. (lint/suspicious/noRedeclare) 🤖 Prompt for AI Agents |
||
| } | ||
|
|
||
| /** Last line of defence to filter malicious paths. */ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,6 @@ | |
| import * as fs from "fs"; | ||
| import * as path from "path"; | ||
|
|
||
| import * as url from "url"; | ||
| import * as http from "http"; | ||
| import * as stream from "stream"; | ||
| import got from "got"; | ||
|
|
@@ -565,27 +564,29 @@ export class Cache { | |
| private static forceRedirect(urlRemote: string, options: FetchOptions) { | ||
| if (!options.forceHost && !options.forcePort) return urlRemote; | ||
|
|
||
| var urlParts = url.parse(urlRemote); | ||
| const urlObj = new URL(urlRemote); | ||
| var changed = false; | ||
|
|
||
| if (!urlParts.hostname) return urlRemote; | ||
| if (!urlObj.hostname) return urlRemote; | ||
|
|
||
| if (options.forceHost && urlParts.hostname != options.forceHost) { | ||
| urlParts.hostname = options.forceHost; | ||
| if (options.forceHost && urlObj.hostname != options.forceHost) { | ||
| urlObj.hostname = options.forceHost; | ||
| changed = true; | ||
| } | ||
|
|
||
| if (options.forcePort && urlParts.port != "" + options.forcePort) { | ||
| urlParts.port = "" + options.forcePort; | ||
| if (options.forcePort && urlObj.port != "" + options.forcePort) { | ||
| urlObj.port = "" + options.forcePort; | ||
| changed = true; | ||
| } | ||
|
|
||
| if (!changed) return urlRemote; | ||
|
|
||
| urlParts.search = "?host=" + encodeURIComponent(urlParts.host || ""); | ||
| urlParts.host = null as any; | ||
| const originalHost = urlObj.host; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Suggested FixCapture the original host into a variable immediately after the Prompt for AI AgentDid we get this right? 👍 / 👎 to inform future reviews. |
||
| urlObj.search = "?host=" + encodeURIComponent(originalHost || ""); | ||
|
Comment on lines
+584
to
+585
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔴 In Root Cause and ImpactWith urlParts.search = "?host=" + encodeURIComponent(urlParts.host || ""); // original host
urlParts.host = null;In the new code, the WHATWG const originalHost = urlObj.host; // ALREADY 'localhost:12345', not 'example.com'This breaks the test infrastructure's Impact: The Prompt for agentsWas this helpful? React with 👍 or 👎 to provide feedback. |
||
| // Remove the host to use the modified hostname/port | ||
|
Comment on lines
+572
to
+586
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: cat -n src/Cache.ts | sed -n '560,600p'Repository: loanlink-nl/cget Length of output: 1463 🏁 Script executed: cat > /tmp/test_url.js << 'EOF'
// Test WHATWG URL API behavior
const url = new URL("http://original.example.com:8080/path");
console.log("Original host:", url.host);
console.log("Original hostname:", url.hostname);
console.log("Original port:", url.port);
// Mutate hostname
url.hostname = "forced.example.com";
console.log("\nAfter hostname mutation:");
console.log("host:", url.host);
console.log("hostname:", url.hostname);
console.log("port:", url.port);
// Mutate port
url.port = "9090";
console.log("\nAfter port mutation:");
console.log("host:", url.host);
console.log("hostname:", url.hostname);
console.log("port:", url.port);
EOF
node /tmp/test_url.jsRepository: loanlink-nl/cget Length of output: 348 Capture the original host before rewriting hostname/port. At line 584, Additionally, line 587's manual host reconstruction is redundant—the WHATWG URL API automatically updates the 🤖 Prompt for AI Agents |
||
| urlObj.host = urlObj.hostname + (urlObj.port ? ":" + urlObj.port : ""); | ||
|
|
||
| return url.format(urlParts); | ||
| return urlObj.href; | ||
| } | ||
|
|
||
| /** Queue for limiting parallel downloads. */ | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -20,7 +20,6 @@ console.log( | |||||||||||||||||||||
| import { assignIn } from "lodash"; | ||||||||||||||||||||||
| import * as fs from "fs"; | ||||||||||||||||||||||
| import { fsa } from "../dist/mkdirp"; | ||||||||||||||||||||||
| import * as url from "url"; | ||||||||||||||||||||||
| import * as http from "http"; | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| import { Address, Cache } from "../dist/cget"; | ||||||||||||||||||||||
|
|
@@ -86,8 +85,8 @@ function checkRemoteLink(cachePath: string) { | |||||||||||||||||||||
|
|
||||||||||||||||||||||
| var app = http.createServer( | ||||||||||||||||||||||
| (req: http.IncomingMessage, res: http.ServerResponse) => { | ||||||||||||||||||||||
| var urlParts = url.parse(req.url); | ||||||||||||||||||||||
| var args = parseArgs(urlParts.query); | ||||||||||||||||||||||
| const urlObj = new URL(req.url || "/", `http://${req.headers.host || "localhost"}`); | ||||||||||||||||||||||
| var args = parseArgs(urlObj.search.substring(1)); | ||||||||||||||||||||||
|
Comment on lines
+88
to
+89
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: cat -n test/serve.ts | sed -n '80,120p'Repository: loanlink-nl/cget Length of output: 1486 🏁 Script executed: grep -n "reportError" test/serve.tsRepository: loanlink-nl/cget Length of output: 198 🏁 Script executed: grep -n "function.*reportError\|const reportError\|var reportError" test/serve.tsRepository: loanlink-nl/cget Length of output: 84 🏁 Script executed: cat -n test/serve.ts | sed -n '47,60p'Repository: loanlink-nl/cget Length of output: 454 Guard URL construction to prevent unhandled exceptions from crashing the test server. At line 88, Proposed fix- const urlObj = new URL(req.url || "/", `http://${req.headers.host || "localhost"}`);
- var args = parseArgs(urlObj.search.substring(1));
+ let urlObj: URL;
+ try {
+ urlObj = new URL(req.url || "/", `http://${req.headers.host || "localhost"}`);
+ } catch {
+ reportError(res, 400);
+ return;
+ }
+ var args = parseArgs(urlObj.search.substring(1));📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||
| var host = args["host"]; | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| // TODO does localhost not count as a host? | ||||||||||||||||||||||
|
|
@@ -105,13 +104,12 @@ var app = http.createServer( | |||||||||||||||||||||
| return; | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| urlParts.protocol = "http"; | ||||||||||||||||||||||
| urlParts.search = null; | ||||||||||||||||||||||
| urlParts.query = null; | ||||||||||||||||||||||
| urlParts.host = host; | ||||||||||||||||||||||
| urlObj.protocol = "http"; | ||||||||||||||||||||||
| urlObj.search = ""; | ||||||||||||||||||||||
| urlObj.host = host; | ||||||||||||||||||||||
|
Comment on lines
+107
to
+109
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: cat -n test/serve.ts | sed -n '100,120p'Repository: loanlink-nl/cget Length of output: 710 🏁 Script executed: cat -n test/serve.ts | sed -n '70,110p'Repository: loanlink-nl/cget Length of output: 1534 🌐 Web query:
💡 Result: Assigning
Assigning When urlObj.protocol = "http";
urlObj.search = "";
- urlObj.host = host;
+ urlObj.host = host || "";Use 📝 Committable suggestion
Suggested change
🤖 Prompt for AI AgentsThere was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔴 Setting In Root Cause and ImpactThe old code used With the WHATWG The resulting URL Impact: The test server will fail to serve any requests from localhost when no
Suggested change
Was this helpful? React with 👍 or 👎 to provide feedback. |
||||||||||||||||||||||
|
|
||||||||||||||||||||||
| cache | ||||||||||||||||||||||
| .getCachePath(new Address(url.format(urlParts))) | ||||||||||||||||||||||
| .getCachePath(new Address(urlObj.href)) | ||||||||||||||||||||||
| .then((cachePath: string) => | ||||||||||||||||||||||
| checkRemoteLink(cachePath) | ||||||||||||||||||||||
| .then((urlRemote: string) => { | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🔴
sanitizeUrlcatch branch leaks dummy host into output for protocol-less URLsWhen
sanitizeUrlreceives a URL without a protocol (e.g.,example.com/path),new URL()throws on line 9, and the catch branch on line 23 usesnew URL(urlRemote, "http://dummy"). This resolves the input relative to the dummy base, causingurlObj.hostto be"dummy"and the pathname to be/example.com/path.Root Cause and Impact
The old code used
url.parse(urlRemote, false, true)where the third argument (true) enabledslashesDenotingHostmode. For inputexample.com/path, this correctly parsed it ashost: null, pathname: 'example.com/path', producinghttp:///example.com/path.The new catch branch with
new URL('example.com/path', 'http://dummy')instead produces:host: 'dummy'pathname: '/example.com/path'http://dummy/example.com/pathThis means any protocol-less URL passed to
sanitizeUrlwill have the fake hostdummyinjected into the output. While the test suite primarily tests full URLs with protocols (so existing tests pass), this is a behavioral regression for any real-world usage with bare hostnames or protocol-relative URLs like//example.com/pathwhere the host is correctly handled but the protocol is inherited from the dummy base.Impact: Protocol-less URLs (if any are used in practice) will be incorrectly resolved against a fake
dummyhost, producing invalid URLs.Prompt for agents
Was this helpful? React with 👍 or 👎 to provide feedback.