From aec1cabdc2a18f8f690e43dba73d0e0e2bebf73c Mon Sep 17 00:00:00 2001 From: NineBits Date: Thu, 5 Jun 2025 13:14:03 +0300 Subject: [PATCH 1/5] Update ElementInterface.php Added methods: getNode, getSelector --- src/ElementInterface.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/ElementInterface.php b/src/ElementInterface.php index d8477cf..26fb0c4 100644 --- a/src/ElementInterface.php +++ b/src/ElementInterface.php @@ -12,6 +12,8 @@ public function isText(): bool; public function isWhitespace(): bool; + public function getNode(): ?\DOMNode; + public function getTagName(): string; public function getValue(): string; @@ -47,4 +49,6 @@ public function setFinalMarkdown(string $markdown): void; public function getListItemLevel(): int; public function getAttribute(string $name): string; + + public function getSelector(): string; } From db40fa2e3df02f119dbe176a2524d34f4f6331e0 Mon Sep 17 00:00:00 2001 From: NineBits Date: Thu, 5 Jun 2025 13:15:47 +0300 Subject: [PATCH 2/5] Update Element.php Added methods: getNode, getSelector --- src/Element.php | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/Element.php b/src/Element.php index ef3ecfa..86e4483 100644 --- a/src/Element.php +++ b/src/Element.php @@ -56,6 +56,11 @@ public function isWhitespace(): bool return $this->getTagName() === '#text' && \trim($this->getValue()) === ''; } + public function getNode(): ?\DOMNode + { + return $this->node; + } + public function getTagName(): string { return $this->node->nodeName; @@ -221,6 +226,24 @@ public function getAttribute(string $name): string return ''; } + + public function getSelector(): string { + $element = $this; + if (!empty($element->getAttribute('id'))) { + return '#' . $element->getAttribute('id'); + } + $path = []; + while ($element && $element->getTagName() !== 'body') { + $part = $element->getTagName(); + $index = $element->getSiblingPosition(); + if ($index > 0) { + $part .= ':nth-child(' . $index . ')'; + } + array_unshift($path, $part); + $element = $element->getParent(); + } + return implode(' > ', $path); + } public function equals(ElementInterface $element): bool { From 76a265a9353a96e58be219387ad681b66621d617 Mon Sep 17 00:00:00 2001 From: NineBits Date: Thu, 5 Jun 2025 17:01:22 +0300 Subject: [PATCH 3/5] Update EmphasisConverter.php Added more tags --- src/Converter/EmphasisConverter.php | 97 +++++++++++++++++++++++------ 1 file changed, 78 insertions(+), 19 deletions(-) diff --git a/src/Converter/EmphasisConverter.php b/src/Converter/EmphasisConverter.php index a122f40..8b9430a 100644 --- a/src/Converter/EmphasisConverter.php +++ b/src/Converter/EmphasisConverter.php @@ -17,15 +17,35 @@ protected function getNormTag(?ElementInterface $element): string { if ($element !== null && ! $element->isText()) { $tag = $element->getTagName(); - if ($tag === 'i' || $tag === 'em') { - return 'em'; - } - - if ($tag === 'b' || $tag === 'strong') { - return 'strong'; + switch($tag) { + case 'i': + case 'em': + case 'cite': + case 'dfn': + case 'var': + return 'em'; + case 'b': + case 'strong': + return 'strong'; + case 'strike': + case 's': + case 'del': + return 'del'; + case 'sub': + return 'sub'; + case 'sup': + return 'sup'; + case 'u': + case 'ins': + return 'u'; + case 'kdb': + return 'kbd'; + case 'span': + case 'small': + case 'abbr': + return $tag; } } - return ''; } @@ -42,22 +62,38 @@ public function convert(ElementInterface $element): string if (! \trim($value)) { return $value; } - - if ($tag === 'em') { - $style = $this->config->getOption('italic_style'); - } else { - $style = $this->config->getOption('bold_style'); + switch ($tag) { + case 'em': + $style = $this->config->getOption('italic_style'); + break; + case 'del': + $style = $this->config->getOption('strikethrough_style'); + break; + case 'sub': + $style = $this->config->getOption('subscript_style'); + break; + case 'sup': + $style = $this->config->getOption('superscript_style'); + break; + case 'strong': + $style = $this->config->getOption('bold_style'); + break; + case 'u': + $style = $this->config->getOption('underline_style'); + break; + case 'kdb': + $style = $this->config->getOption('keyboard_style'); + break; + default: + $style = $this->config->getOption('undefined_style'); + break; } $prefix = \ltrim($value) !== $value ? ' ' : ''; $suffix = \rtrim($value) !== $value ? ' ' : ''; - /* If this node is immediately preceded or followed by one of the same type don't emit - * the start or end $style, respectively. This prevents foobar from - * being converted to *foo**bar* which is incorrect. We want *foobar* instead. - */ - $preStyle = $this->getNormTag($element->getPreviousSibling()) === $tag ? '' : $style; - $postStyle = $this->getNormTag($element->getNextSibling()) === $tag ? '' : $style; + $preStyle = $this->makeDelimiter($element, $tag, $style); + $postStyle = $this->makeDelimiter($element, $tag, $style, false); return $prefix . $preStyle . \trim($value) . $postStyle . $suffix; } @@ -67,6 +103,29 @@ public function convert(ElementInterface $element): string */ public function getSupportedTags(): array { - return ['em', 'i', 'strong', 'b']; + return [ + 'em', 'i', 'cite', 'dfn', 'var', + 'strong', 'b', + 'del', 'strike', 's', + 'sub', 'sup', + 'u', 'ins', + 'kbd', + 'span', 'small', 'abbr' + ]; + } + + protected function makeDelimiter($element, string $tag, $style, bool $prev = true): string + { + /* If this node is immediately preceded or followed by one of the same type don't emit + * the start or end $style, respectively. This prevents foobar from + * being converted to *foo**bar* which is incorrect. We want *foobar* instead. + */ + if($prev) { + $ignore = $this->getNormTag($element->getPreviousSibling()) === $tag; + } else { + $ignore = $this->getNormTag($element->getNextSibling()) === $tag; + } + if (!is_string($style ?? null) || $ignore) return ''; + return empty($style) ? "<" . ($prev ? "" : "/") ."{$tag}>" : $style; } } From 0ddbd434e6f9b5df2c803e9d99d18c170ab2732c Mon Sep 17 00:00:00 2001 From: NineBits Date: Thu, 5 Jun 2025 17:03:04 +0300 Subject: [PATCH 4/5] Update HtmlConverter.php Added config for more tags --- src/HtmlConverter.php | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/HtmlConverter.php b/src/HtmlConverter.php index 944cb08..fcd3d0d 100644 --- a/src/HtmlConverter.php +++ b/src/HtmlConverter.php @@ -36,6 +36,12 @@ public function __construct($options = []) 'strip_placeholder_links' => false, // Set to true to remove that doesn't have href. 'bold_style' => '**', // DEPRECATED: Set to '__' if you prefer the underlined style 'italic_style' => '*', // DEPRECATED: Set to '_' if you prefer the underlined style + 'strikethrough_style' => '~~', + 'superscript_style' => '', // Set to '^' to use the superscript style + 'subscript_style' => '', // Set to '~' to use the subscript style + 'keyboard_style' => '\'', + 'underline_style' => '', // Set to null to clear this style + 'undefined_style' => '', // Set to null to clear this style 'remove_nodes' => '', // space-separated list of dom nodes that should be removed. example: 'meta style script' 'hard_break' => false, // Set to true to turn
into `\n` instead of ` \n` 'list_item_style' => '-', // Set the default character for each
  • in a