diff --git a/html2text/__init__.py b/html2text/__init__.py index a4115d5..e8222ba 100644 --- a/html2text/__init__.py +++ b/html2text/__init__.py @@ -86,7 +86,7 @@ def __init__( self.tag_callback = None self.open_quote = config.OPEN_QUOTE # covered in cli self.close_quote = config.CLOSE_QUOTE # covered in cli - + if out is None: self.out = self.outtextf else: @@ -120,6 +120,8 @@ def __init__( self.tag_stack = ( [] ) # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]] + self.emphasis_tag_stack = {} + self.remove_space = False self.emphasis = 0 self.drop_white_space = 0 self.inheader = False @@ -303,10 +305,19 @@ def handle_tag( ) -> None: self.current_tag = tag + if tag in ["b","em","i","u"]: + if start: + if tag in self.emphasis_tag_stack: + self.emphasis_tag_stack[tag] += 1 + else: + self.emphasis_tag_stack[tag] = 1 + elif list(self.emphasis_tag_stack.keys()): + self.emphasis_tag_stack.popitem() + if self.tag_callback is not None: if self.tag_callback(self, tag, attrs, start) is True: return - + # first thing inside the anchor tag is another tag # that produces some output if ( @@ -373,10 +384,24 @@ def handle_tag( self.p() if tag == "br" and start: + for key in list(self.emphasis_tag_stack.keys())[::-1]: + if(key == "b"): + self.o(self.strong_mark) + elif key in ["em","i","u"]: + self.o(self.emphasis_mark) + if self.blockquote > 0: self.o(" \n> ") else: self.o(" \n") + + for key in list(self.emphasis_tag_stack.keys()): + if(key == "b"): + self.o(self.strong_mark) + elif key in ["em","i","u"]: + self.o(self.emphasis_mark) + self.remove_space = True + self.drop_white_space = 1 if tag == "hr" and start: self.p() @@ -641,11 +666,11 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None: # https://spec.commonmark.org/0.28/#motivation # TODO: line up