From f40101e6c876e402923b5f4723e82e54325a7c8f Mon Sep 17 00:00:00 2001 From: IgorA100 Date: Mon, 23 Dec 2024 10:10:45 +0300 Subject: [PATCH 1/3] Fix: Analysis of relative links (Node.php) 1 Cannot use the "Link" object to analyze relative links Because the "Link" for the Atom feed will be corrupted. Use the additionally created "LinkForAnalysis" object 2. - Fix: Error preg_replace(), add all other possible replacements for relative links. - Replaced links like href="#aaa/bbb.xxx" - Replaced links like href="aaa/bbb.xxx" Similar to #427 --- src/FeedIo/Feed/Node.php | 61 +++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/src/FeedIo/Feed/Node.php b/src/FeedIo/Feed/Node.php index e29ab316..234057f4 100644 --- a/src/FeedIo/Feed/Node.php +++ b/src/FeedIo/Feed/Node.php @@ -30,6 +30,8 @@ class Node implements NodeInterface, ElementsAwareInterface, ArrayableInterface protected ?string $host = null; + protected ?string $linkLinkForAnalysis = null; + public function __construct() { $this->initElements(); @@ -135,10 +137,23 @@ public function getLink(): ?string return $this->link; } + public function getLinkForAnalysis(): ?string + { + return $this->linkForAnalysis; + } + public function setLink(string $link = null): NodeInterface { $this->link = $link; $this->setHost($link); + $this->setLinkForAnalysis($link); + + return $this; + } + + public function setLinkForAnalysis(string $link = null): NodeInterface + { + $this->linkForAnalysis = $link; return $this; } @@ -152,29 +167,43 @@ protected function setHost(string $link = null): void protected function setHostInContent(string $host = null): void { - if (property_exists($this, 'content')){ - if (!is_null($host) && !is_null($this->content)) { - $this->content = preg_replace('!(<*\s*[^>]*)(href=)(.?)(\/[^\/])!','\1 href=\3'.$host.'\4', $this->content ); - $this->content = preg_replace('!(<*\s*[^>]*)(src=)(.?)(\/[^\/])!','\1 src=\3'.$host.'\4', $this->content ); - } + if (is_null($host)) { + return; } - if (property_exists($this, 'description')){ - if (!is_null($host) && !is_null($this->description)) { - $this->description = preg_replace('!(<*\s*[^>]*)(href=)(.?)(\/[^\/])!','\1 href=\3'.$host.'\4', $this->description ); - $this->description = preg_replace('!(<*\s*[^>]*)(src=)(.?)(\/[^\/])!','\1 src=\3'.$host.'\4', $this->description ); - } + // Replaced links like href="/aaa/bbb.xxx" + $pattern = '(<\s*[^>]*)(href=|src=)(.?)(\/[^\/])(?!(.(?!)'; + $this->pregReplaceInProperty('content', $pattern, '\1\2\3'.$host.'\4'); + $this->pregReplaceInProperty('description', $pattern, '\1\2\3'.$host.'\4'); + + $itemFullLink = $this->getLinkForAnalysis(); + $itemLink = implode("/", array_slice(explode("/", $itemFullLink), 0, -1))."/"; + + // Replaced links like href="#aaa/bbb.xxx" + $pattern = '(<\s*[^>]*)(href=|src=)(.?)(#)(?!(.(?!)'; + $this->pregReplaceInProperty('content', $pattern, '\1\2\3'.$itemFullLink.'\4'); + $this->pregReplaceInProperty('description', $pattern, '\1\2\3'.$itemFullLink.'\4'); + + // Replaced links like href="aaa/bbb.xxx" + $pattern = '(<\s*[^>]*)(href=|src=)(.?)(\w+\b)(?![:])(?!(.(?!)'; + $this->pregReplaceInProperty('content', $pattern, '\1\2\3'.$itemLink.'\4'); + $this->pregReplaceInProperty('description', $pattern, '\1\2\3'.$itemLink.'\4'); + } + + public function pregReplaceInProperty(string $property, string $pattern, string $replacement): void + { + if (property_exists($this, $property) && !is_null($this->{$property})) { + $this->{$property} = preg_replace('~'.$pattern.'~', $replacement, $this->{$property}) ?? $this->{$property}; } } public function getHostFromLink(): ?string { - if (!is_null($this->getLink())) { - $partsUrl = parse_url($this->getLink()); - $result = $partsUrl['scheme']."://".$partsUrl['host']; - } else - $result = null; + if (is_null($this->getLinkForAnalysis())) { + return null; + } + $partsUrl = parse_url($this->getLinkForAnalysis()); - return $result; + return $partsUrl['scheme']."://".$partsUrl['host']; } public function getValue(string $name): ?string From f36e6cabc48aa9a6dfde3d7a1b4c9c78dac153b9 Mon Sep 17 00:00:00 2001 From: IgorA100 Date: Mon, 23 Dec 2024 10:20:32 +0300 Subject: [PATCH 2/3] Use the new method "setLinkForAnalysis" instead of "setLink" (XmlParser.php) --- src/FeedIo/Parser/XmlParser.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/FeedIo/Parser/XmlParser.php b/src/FeedIo/Parser/XmlParser.php index 3156bc37..eb3696f3 100644 --- a/src/FeedIo/Parser/XmlParser.php +++ b/src/FeedIo/Parser/XmlParser.php @@ -77,7 +77,8 @@ protected function handleNode(NodeInterface $item, DOMElement $node, RuleSet $ru { if ($this->isItem($node->tagName) && $item instanceof FeedInterface) { $linkItem = $item->getLink(); - $newItem = $this->parseNode($item->newItem()->setLink($linkItem), $node, $this->getItemRuleSet()); + $newItem = $this->parseNode($item->newItem()->setLinkForAnalysis($linkItem), $node, $this->getItemRuleSet()); + $this->addValidItem($item, $newItem); } else { $rule = $ruleSet->get($node->tagName); From 8e1d646047fcd0bec6f5226ac5dcfda099f071fb Mon Sep 17 00:00:00 2001 From: IgorA100 Date: Mon, 23 Dec 2024 10:22:51 +0300 Subject: [PATCH 3/3] Added analysis of relative links for the Atom feed (Link.php) --- src/FeedIo/Rule/Atom/Link.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/FeedIo/Rule/Atom/Link.php b/src/FeedIo/Rule/Atom/Link.php index 57ac43a6..85cf9e50 100644 --- a/src/FeedIo/Rule/Atom/Link.php +++ b/src/FeedIo/Rule/Atom/Link.php @@ -28,7 +28,11 @@ protected function selectAlternateLink(NodeInterface $node, \DOMElement $element ($element->hasAttribute('rel') && $element->getAttribute('rel') == 'alternate') || is_null($node->getLink()) ) { - $node->setLink($element->getAttribute('href')); + $href = $element->getAttribute('href'); + if (parse_url($href, PHP_URL_HOST) == null) { + $href = $node->getHostFromLink(). $href; + } + $node->setLink($href); } }