@@ -287,6 +287,7 @@ public function init(): bool
287287
288288 if (null === $ articleContent ) {
289289 $ this ->success = false ;
290+ /** @var JSLikeHTMLElement */
290291 $ articleContent = $ this ->dom ->createElement ('div ' );
291292 $ articleContent ->setAttribute ('class ' , 'readability-content ' );
292293 $ articleContent ->setInnerHtml ('<p>Sorry, Readability was unable to parse this page for content.</p> ' );
@@ -302,7 +303,9 @@ public function init(): bool
302303
303304 // without tidy the body can (sometimes) be wiped, so re-create it
304305 if (false === isset ($ this ->body ->childNodes )) {
305- $ this ->body = $ this ->dom ->createElement ('body ' );
306+ /** @var JSLikeHTMLElement */
307+ $ body = $ this ->dom ->createElement ('body ' );
308+ $ this ->body = $ body ;
306309 }
307310
308311 // Clear the old HTML, insert the new content.
@@ -335,19 +338,23 @@ public function postProcessContent(\DOMElement $articleContent): void
335338 */
336339 public function addFootnotes (\DOMElement $ articleContent ): void
337340 {
341+ /** @var JSLikeHTMLElement */
338342 $ footnotesWrapper = $ this ->dom ->createElement ('footer ' );
339343 $ footnotesWrapper ->setAttribute ('class ' , 'readability-footnotes ' );
340344 $ footnotesWrapper ->setInnerHtml ('<h3>References</h3> ' );
341345 $ articleFootnotes = $ this ->dom ->createElement ('ol ' );
342346 $ articleFootnotes ->setAttribute ('class ' , 'readability-footnotes-list ' );
343347 $ footnotesWrapper ->appendChild ($ articleFootnotes );
348+ /** @var \DOMNodeList<JSLikeHTMLElement> */
344349 $ articleLinks = $ articleContent ->getElementsByTagName ('a ' );
345350 $ linkCount = 0 ;
346351
347352 for ($ i = 0 ; $ i < $ articleLinks ->length ; ++$ i ) {
348353 $ articleLink = $ articleLinks ->item ($ i );
349354 $ footnoteLink = $ articleLink ->cloneNode (true );
355+ /** @var JSLikeHTMLElement */
350356 $ refLink = $ this ->dom ->createElement ('a ' );
357+ /** @var JSLikeHTMLElement */
351358 $ footnote = $ this ->dom ->createElement ('li ' );
352359 $ linkDomain = @parse_url ($ footnoteLink ->getAttribute ('href ' ), \PHP_URL_HOST );
353360 if (!$ linkDomain && isset ($ this ->url )) {
@@ -609,6 +616,7 @@ public function killBreaks(JSLikeHTMLElement $node): void
609616 */
610617 public function clean (JSLikeHTMLElement $ e , string $ tag ): void
611618 {
619+ /** @var \DOMNodeList<JSLikeHTMLElement> */
612620 $ targetList = $ e ->getElementsByTagName ($ tag );
613621 $ isEmbed = ('audio ' === $ tag || 'video ' === $ tag || 'iframe ' === $ tag || 'object ' === $ tag || 'embed ' === $ tag );
614622
@@ -645,6 +653,7 @@ public function cleanConditionally(JSLikeHTMLElement $e, string $tag): void
645653 return ;
646654 }
647655
656+ /** @var \DOMNodeList<JSLikeHTMLElement> */
648657 $ tagsList = $ e ->getElementsByTagName ($ tag );
649658 $ curTagsLength = $ tagsList ->length ;
650659
@@ -755,6 +764,7 @@ public function cleanConditionally(JSLikeHTMLElement $e, string $tag): void
755764 public function cleanHeaders (JSLikeHTMLElement $ e ): void
756765 {
757766 for ($ headerIndex = 1 ; $ headerIndex < 3 ; ++$ headerIndex ) {
767+ /** @var \DOMNodeList<JSLikeHTMLElement> */
758768 $ headers = $ e ->getElementsByTagName ('h ' . $ headerIndex );
759769
760770 for ($ i = $ headers ->length - 1 ; $ i >= 0 ; --$ i ) {
@@ -823,6 +833,7 @@ protected function getArticleTitle(): JSLikeHTMLElement
823833 $ curTitle = $ origTitle ;
824834 }
825835
836+ /** @var JSLikeHTMLElement */
826837 $ articleTitle = $ this ->dom ->createElement ('h1 ' );
827838 $ articleTitle ->setInnerHtml ($ curTitle );
828839
@@ -840,7 +851,9 @@ protected function prepDocument(): void
840851 * so we create a new body node and append it to the document.
841852 */
842853 if (null === $ this ->body ) {
843- $ this ->body = $ this ->dom ->createElement ('body ' );
854+ /** @var JSLikeHTMLElement */
855+ $ body = $ this ->dom ->createElement ('body ' );
856+ $ this ->body = $ body ;
844857 $ this ->dom ->documentElement ->appendChild ($ this ->body );
845858 }
846859
@@ -944,6 +957,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
944957 $ xpath = new \DOMXPath ($ page );
945958 }
946959
960+ /** @var \DOMNodeList<JSLikeHTMLElement> */
947961 $ allElements = $ page ->getElementsByTagName ('* ' );
948962
949963 for ($ nodeIndex = 0 ; $ allElements ->item ($ nodeIndex ); ++$ nodeIndex ) {
@@ -986,6 +1000,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
9861000 // (as in, where they contain no other block level elements).
9871001 if ('div ' === $ tagName ) {
9881002 if (!preg_match ($ this ->regexps ['divToPElements ' ], $ nodeContent )) {
1003+ /** @var JSLikeHTMLElement */
9891004 $ newNode = $ this ->dom ->createElement ('p ' );
9901005
9911006 try {
@@ -1156,7 +1171,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
11561171 }
11571172 }
11581173
1159- /** @var \DOMNodeList <JSLikeHTMLElement> */
1174+ /** @var non-empty-array <JSLikeHTMLElement|null > */
11601175 $ topCandidates = array_filter (
11611176 $ topCandidates ,
11621177 fn ($ v , $ idx ) => 0 === $ idx || null !== $ v ,
@@ -1169,18 +1184,21 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
11691184 * We also have to copy the body node so it is something we can modify.
11701185 */
11711186 if (null === $ topCandidate || 0 === strcasecmp ($ topCandidate ->tagName , 'body ' )) {
1187+ /** @var JSLikeHTMLElement */
11721188 $ topCandidate = $ this ->dom ->createElement ('div ' );
11731189
11741190 if ($ page instanceof \DOMDocument) {
1175- if (!isset ($ page ->documentElement )) {
1191+ /** @var ?JSLikeHTMLElement */
1192+ $ documentElement = $ page ->documentElement ;
1193+ if (null === $ documentElement ) {
11761194 // we don't have a body either? what a mess! :)
11771195 $ this ->logger ->debug ('The page has no body! ' );
11781196 } else {
11791197 $ this ->logger ->debug ('Setting body to a raw HTML of original page! ' );
1180- $ topCandidate ->setInnerHtml ($ page -> documentElement ->getInnerHTML ());
1181- $ page -> documentElement ->setInnerHtml ('' );
1198+ $ topCandidate ->setInnerHtml ($ documentElement ->getInnerHTML ());
1199+ $ documentElement ->setInnerHtml ('' );
11821200 $ this ->reinitBody ();
1183- $ page -> documentElement ->appendChild ($ topCandidate );
1201+ $ documentElement ->appendChild ($ topCandidate );
11841202 }
11851203 } else {
11861204 $ topCandidate ->setInnerHtml ($ page ->getInnerHTML ());
@@ -1189,7 +1207,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
11891207 }
11901208
11911209 $ this ->initializeNode ($ topCandidate );
1192- } elseif ($ topCandidate ) {
1210+ } elseif (null !== $ topCandidate ) {
11931211 $ alternativeCandidateAncestors = [];
11941212 foreach ($ topCandidates as $ candidate ) {
11951213 if ((int ) $ candidate ->getAttribute ('readability ' ) / (int ) $ topCandidate ->getAttribute ('readability ' ) >= 0.75 ) {
@@ -1200,7 +1218,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
12001218 }
12011219 if (\count ($ alternativeCandidateAncestors ) >= 3 ) {
12021220 $ parentOfTopCandidate = $ topCandidate ->parentNode ;
1203- while ('body ' !== $ parentOfTopCandidate ->nodeName ) {
1221+ while ('body ' !== $ parentOfTopCandidate ->nodeName && $ parentOfTopCandidate instanceof JSLikeHTMLElement ) {
12041222 $ listsContainingThisAncestor = 0 ;
12051223 for ($ ancestorIndex = 0 ; $ ancestorIndex < \count ($ alternativeCandidateAncestors ) && $ listsContainingThisAncestor < 3 ; ++$ ancestorIndex ) {
12061224 $ listsContainingThisAncestor += (int ) \in_array ($ parentOfTopCandidate , $ alternativeCandidateAncestors [$ ancestorIndex ], true );
@@ -1264,6 +1282,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
12641282 * Now that we have the top candidate, look through its siblings for content that might also be related.
12651283 * Things like preambles, content split by ads that we removed, etc.
12661284 */
1285+ /** @var JSLikeHTMLElement */
12671286 $ articleContent = $ this ->dom ->createElement ('div ' );
12681287 $ articleContent ->setAttribute ('class ' , 'readability-content ' );
12691288 $ siblingScoreThreshold = max (10 , ((int ) $ topCandidate ->getAttribute ('readability ' )) * 0.2 );
@@ -1311,6 +1330,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
13111330 if (0 !== strcasecmp ($ siblingNodeName , 'div ' ) && 0 !== strcasecmp ($ siblingNodeName , 'p ' )) {
13121331 // We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident.
13131332 $ this ->logger ->debug ('Altering siblingNode " ' . $ siblingNodeName . '" to "div". ' );
1333+ /** @var JSLikeHTMLElement */
13141334 $ nodeToAppend = $ this ->dom ->createElement ('div ' );
13151335
13161336 try {
@@ -1412,7 +1432,9 @@ protected function weightAttribute(JSLikeHTMLElement $element, string $attribute
14121432 protected function reinitBody (): void
14131433 {
14141434 if (!isset ($ this ->body ->childNodes )) {
1415- $ this ->body = $ this ->dom ->createElement ('body ' );
1435+ /** @var JSLikeHTMLElement */
1436+ $ body = $ this ->dom ->createElement ('body ' );
1437+ $ this ->body = $ body ;
14161438 $ this ->body ->setInnerHtml ($ this ->bodyCache );
14171439 }
14181440 }
@@ -1544,7 +1566,7 @@ private function isPhrasingContent($node): bool
15441566 private function getSingleTagInsideElement (JSLikeHTMLElement $ node , string $ tag ): ?JSLikeHTMLElement
15451567 {
15461568 $ childNodes = iterator_to_array ($ node ->childNodes );
1547- $ children = array_filter ($ childNodes , fn ($ childNode ) => $ childNode instanceof \DOMElement );
1569+ $ children = array_filter ($ childNodes , fn ($ childNode ) => $ childNode instanceof JSLikeHTMLElement );
15481570
15491571 // There should be exactly 1 element child with given tag
15501572 if (1 !== \count ($ children ) || $ children [0 ]->nodeName !== $ tag ) {
0 commit comments