diff --git a/html2text/__init__.py b/html2text/__init__.py
index 07a3dab..79f4311 100644
--- a/html2text/__init__.py
+++ b/html2text/__init__.py
@@ -201,24 +201,17 @@ def previousIndex(self, attrs: Dict[str, Optional[str]]) -> Optional[int]:
self.a list. If the set of attributes is not found, returns None
:rtype: int
"""
- if "href" not in attrs:
+ attrs_href = attrs.get("href")
+ if attrs_href is None:
return None
- match = False
+ attrs_title = attrs.get("title")
for i, a in enumerate(self.a):
- if "href" in a.attrs and a.attrs["href"] == attrs["href"]:
- if "title" in a.attrs or "title" in attrs:
- if (
- "title" in a.attrs
- and "title" in attrs
- and a.attrs["title"] == attrs["title"]
- ):
- match = True
- else:
- match = True
-
- if match:
+ if (attrs_href == a.attrs.get("href")) and (
+ attrs_title == a.attrs.get("title")
+ ):
return i
+
return None
def handle_emphasis(
@@ -435,10 +428,8 @@ def no_preceding_space(self: HTML2Text) -> bool:
if tag == "abbr":
if start:
- self.abbr_title = None
+ self.abbr_title = attrs.get("title")
self.abbr_data = ""
- if "title" in attrs:
- self.abbr_title = attrs["title"]
else:
if self.abbr_title is not None:
assert self.abbr_data is not None
@@ -460,18 +451,17 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
if tag == "a" and not self.ignore_links:
if start:
- if (
- "href" in attrs
- and attrs["href"] is not None
- and not (self.skip_internal_links and attrs["href"].startswith("#"))
+ attrs_href = attrs.get("href")
+ if attrs_href is None or (
+ self.skip_internal_links and attrs_href.startswith("#")
):
+ self.astack.append(None)
+ else:
self.astack.append(attrs)
- self.maybe_automatic_link = attrs["href"]
+ self.maybe_automatic_link = attrs_href
self.empty_link = True
if self.protect_links:
- attrs["href"] = "<" + attrs["href"] + ">"
- else:
- self.astack.append(None)
+ attrs["href"] = "<" + attrs_href + ">"
else:
if self.astack:
a = self.astack.pop()
@@ -484,8 +474,8 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
self.empty_link = False
self.maybe_automatic_link = None
if self.inline_links:
- title = a.get("title") or ""
- title = escape_md(title)
+ title = a.get("title")
+ title = "" if title is None else escape_md(title)
link_url(self, a["href"], title)
else:
i = self.previousIndex(a)
@@ -498,38 +488,41 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
self.o("][" + str(a_props.count) + "]")
if tag == "img" and start and not self.ignore_images:
- if "src" in attrs:
- assert attrs["src"] is not None
+ img_attrs_src = attrs.get("src")
+ if img_attrs_src is not None:
if not self.images_to_alt:
- attrs["href"] = attrs["src"]
- alt = attrs.get("alt") or self.default_image_alt
+ attrs["href"] = img_attrs_src
+ alt = attrs.get("alt", self.default_image_alt)
# If we have images_with_size, write raw html including width,
# height, and alt attributes
+ img_attrs_width = attrs.get("width")
+ img_attrs_height = attrs.get("height")
if self.images_as_html or (
- self.images_with_size and ("width" in attrs or "height" in attrs)
+ self.images_with_size
+ and not (not img_attrs_width and not img_attrs_height)
):
- self.o("
")
return
+ alt = "" if alt is None else escape_md(alt)
+
# If we have a link to create, output the start
if self.maybe_automatic_link is not None:
href = self.maybe_automatic_link
if (
self.images_to_alt
- and escape_md(alt) == href
+ and alt == href
and self.absolute_url_matcher.match(href)
):
- self.o("<" + escape_md(alt) + ">")
+ self.o("<" + alt + ">")
self.empty_link = False
return
else:
@@ -540,9 +533,9 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
# If we have images_to_alt, we discard the image itself,
# considering only the alt text.
if self.images_to_alt:
- self.o(escape_md(alt))
+ self.o(alt)
else:
- self.o("![" + escape_md(alt) + "]")
+ self.o("![" + alt + "]")
if self.inline_links:
href = attrs.get("href") or ""
self.o(
@@ -777,9 +770,9 @@ def o(
+ "]: "
+ urlparse.urljoin(self.baseurl, link.attrs["href"])
)
- if "title" in link.attrs:
- assert link.attrs["title"] is not None
- self.out(" (" + link.attrs["title"] + ")")
+ link_attrs_title = link.attrs.get("title")
+ if link_attrs_title is not None:
+ self.out(" (" + link_attrs_title + ")")
self.out("\n")
else:
newa.append(link)