Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 19 additions & 10 deletions html2text/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,12 @@ def handle_tag(
self.quote = not self.quote

def link_url(self: HTML2Text, link: str, title: str = "") -> None:
url = urlparse.urljoin(self.baseurl, link)
url = link
try:
url = urlparse.urljoin(self.baseurl, link)
except ValueError:
# Ignore malformed URLs.
pass
title = ' "{}"'.format(title) if title.strip() else ""
self.o("]({url}{title})".format(url=escape_md(url), title=title))

Expand Down Expand Up @@ -584,9 +589,12 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
self.o("![" + escape_md(alt) + "]")
if self.inline_links:
href = attrs.get("href") or ""
self.o(
"(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")"
)
try:
href = urlparse.urljoin(self.baseurl, href)
except ValueError:
# Ignore malformed URLs.
pass
self.o("(" + escape_md(href) + ")")
else:
i = self.previousIndex(attrs)
if i is not None:
Expand Down Expand Up @@ -820,12 +828,13 @@ def o(
newa = []
for link in self.a:
if self.outcount > link.outcount:
self.out(
" ["
+ str(link.count)
+ "]: "
+ urlparse.urljoin(self.baseurl, link.attrs["href"])
)
href = link.attrs["href"]
try:
href = urlparse.urljoin(self.baseurl, href)
except ValueError:
# Ignore malformed URLs, and also calm mypy.
assert href is not None
self.out(" [" + str(link.count) + "]: " + href)
if "title" in link.attrs:
assert link.attrs["title"] is not None
self.out(" (" + link.attrs["title"] + ")")
Expand Down
1 change: 1 addition & 0 deletions test/invalid_link.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<a href="http://example.com]path">The typo in the link here causes urljoin to raise.</a>
2 changes: 2 additions & 0 deletions test/invalid_link.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[The typo in the link here causes urljoin to raise.](http://example.com\]path)