Use PHP type hints instead of PHPDoc

jtojnar · jtojnar · commit c6af06473b34 · 2024-03-16T17:43:51.000+01:00
PHP 7.4 supports property type hints.
diff --git a/src/JSLikeHTMLElement.php b/src/JSLikeHTMLElement.php
@@ -43,7 +43,7 @@ class JSLikeHTMLElement extends \DOMElement
      * $div->innerHTML = '<h2>Chapter 2</h2><p>The story begins...</p>';
      * ```
      */
-    public function __set($name, $value)
+    public function __set($name, $value): void
     {
         if ('innerHTML' !== $name) {
             $trace = debug_backtrace();
@@ -126,18 +126,18 @@ public function __get($name)
         trigger_error('Undefined property via __get(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], \E_USER_NOTICE);
     }
 
-    public function __toString()
+    public function __toString(): string
     {
         return '[' . $this->tagName . ']';
     }
 
-    public function getInnerHtml()
+    public function getInnerHtml(): string
     {
         return $this->__get('innerHTML');
     }
 
-    public function setInnerHtml($value)
+    public function setInnerHtml($value): void
     {
-        return $this->__set('innerHTML', $value);
+        $this->__set('innerHTML', $value);
     }
 }
diff --git a/src/Readability.php b/src/Readability.php
@@ -24,25 +24,34 @@ class Readability implements LoggerAwareInterface
     public const MIN_ARTICLE_LENGTH = 200;
     public const MIN_NODE_LENGTH = 80;
     public const MAX_LINK_DENSITY = 0.25;
-    public $convertLinksToFootnotes = false;
-    public $revertForcedParagraphElements = false;
-    public $articleTitle;
-    public $articleContent;
-    public $original_html;
+
+    public bool $convertLinksToFootnotes = false;
+    public bool $revertForcedParagraphElements = false;
+
+    public ?\DOMElement $articleTitle;
+
+    public ?\DOMElement $articleContent;
+
+    public ?string $original_html;
+
+    public ?\DOMDocument $dom;
+
     /**
-     * @var \DOMDocument
+     * @var ?string URL where HTML was retrieved
      */
-    public $dom;
-    // optional - URL where HTML was retrieved
-    public $url = null;
-    // preserves more content (experimental)
-    public $lightClean = true;
+    public ?string $url = null;
 
     /**
-     * All of the regular expressions in use within readability.
+     * @var bool preserves more content (experimental)
+     */
+    public bool $lightClean = true;
+
+    /**
+     * @var array<string, string> All of the regular expressions in use within readability.
+     *
      * Defined up here so we don't instantiate them repeatedly in loops.
      */
-    public $regexps = [
+    public array $regexps = [
         'unlikelyCandidates' => '/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i',
         'okMaybeItsACandidate' => '/article\b|contain|\bcontent|column|general|detail|shadow|lightbox|blog|body|entry|main|page|footnote|element/i',
         'positive' => '/read|full|article|body|\bcontent|contain|entry|main|markdown|media|page|attach|pagination|post|text|blog|story/i',
@@ -54,18 +63,30 @@ class Readability implements LoggerAwareInterface
         'hasContent' => '/\S$/',
         'isNotVisible' => '/display\s*:\s*none/',
     ];
-    public $defaultTagsToScore = ['section', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'td', 'pre'];
-    // The commented out elements qualify as phrasing content but tend to be
-    // removed by readability when put into paragraphs, so we ignore them here.
-    public $phrasingElements = [
+
+    /**
+     * @var array<string>
+     */
+    public array $defaultTagsToScore = ['section', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'td', 'pre'];
+
+    /**
+     * @var array<string>
+     */
+    public array $phrasingElements = [
+        // The commented out elements qualify as phrasing content but tend to be
+        // removed by readability when put into paragraphs, so we ignore them here.
         // "CANVAS", "IFRAME", "SVG", "VIDEO",
         'ABBR', 'AUDIO', 'B', 'BDO', 'BR', 'BUTTON', 'CITE', 'CODE', 'DATA',
         'DATALIST', 'DFN', 'EM', 'EMBED', 'I', 'IMG', 'INPUT', 'KBD', 'LABEL',
         'MARK', 'MATH', 'METER', 'NOSCRIPT', 'OBJECT', 'OUTPUT', 'PROGRESS', 'Q',
         'RUBY', 'SAMP', 'SCRIPT', 'SELECT', 'SMALL', 'SPAN', 'STRONG', 'SUB',
         'SUP', 'TEXTAREA', 'TIME', 'VAR', 'WBR',
     ];
-    public $tidy_config = [
+
+    /**
+     * @var array<string, bool|string>
+     */
+    public array $tidy_config = [
         'tidy-mark' => false,
         'vertical-space' => false,
         'doctype' => 'omit',
@@ -89,21 +110,41 @@ class Readability implements LoggerAwareInterface
         'output-encoding' => 'utf8',
         'hide-comments' => true,
     ];
-    // article domain regexp for calibration
-    protected $domainRegExp = null;
-    protected $body = null;
-    // Cache the body HTML in case we need to re-use it later
-    protected $bodyCache = null;
-    // 1 | 2 | 4;   // Start with all processing flags set.
-    protected $flags = 7;
-    // indicates whether we were able to extract or not
-    protected $success = false;
-    protected $logger;
-    protected $parser;
-    protected $html;
-    protected $useTidy;
-    // raw HTML filters
-    protected $pre_filters = [
+
+    /**
+     * @var ?string article domain regexp for calibration
+     */
+    protected ?string $domainRegExp = null;
+
+    protected ?\DOMElement $body = null;
+
+    /**
+     * @var ?string Cache the body HTML in case we need to re-use it later
+     */
+    protected ?string $bodyCache = null;
+
+    /**
+     * @var int-mask-of<self::FLAG_*> start with all processing flags set
+     */
+    protected int $flags = self::FLAG_STRIP_UNLIKELYS | self::FLAG_WEIGHT_ATTRIBUTES | self::FLAG_CLEAN_CONDITIONALLY;
+
+    /**
+     * @var bool indicates whether we were able to extract or not
+     */
+    protected bool $success = false;
+
+    protected LoggerInterface $logger;
+
+    protected string $parser;
+
+    protected string $html;
+
+    protected bool $useTidy;
+
+    /**
+     * @var array<string, string> raw HTML filters
+     */
+    protected array $pre_filters = [
         // remove spans as we redefine styles and they're probably special-styled
         '!</?span[^>]*>!is' => '',
         // HACK: firewall-filtered content
@@ -115,8 +156,11 @@ class Readability implements LoggerAwareInterface
         // replace fonts to spans
         '!<(/?)font[^>]*>!is' => '<\\1span>',
     ];
-    // output HTML filters
-    protected $post_filters = [
+
+    /**
+     * @var array<string, string> output HTML filters
+     */
+    protected array $post_filters = [
         // replace excessive br's
         '/<br\s*\/?>\s*<p/i' => '<p',
         // replace empty tags that break layouts
@@ -156,20 +200,16 @@ public function setLogger(LoggerInterface $logger): void
 
     /**
      * Get article title element.
-     *
-     * @return \DOMElement
      */
-    public function getTitle()
+    public function getTitle(): \DOMElement
     {
         return $this->articleTitle;
     }
 
     /**
      * Get article content element.
-     *
-     * @return \DOMElement
      */
-    public function getContent()
+    public function getContent(): \DOMElement
     {
         return $this->articleContent;
     }
@@ -451,12 +491,8 @@ public function prepArticle(\DOMNode $articleContent): void
     /**
      * Get the inner text of a node.
      * This also strips out any excess whitespace to be found.
-     *
-     * @param \DOMElement $e
-     * @param bool        $normalizeSpaces (default: true)
-     * @param bool        $flattenLines    (default: false)
      */
-    public function getInnerText($e, bool $normalizeSpaces = true, bool $flattenLines = false): string
+    public function getInnerText(\DOMElement $e, bool $normalizeSpaces = true, bool $flattenLines = false): string
     {
         if (null === $e || !isset($e->textContent) || '' === $e->textContent) {
             return '';
@@ -749,10 +785,8 @@ public function removeFlag(int $flag): void
 
     /**
      * Get the article title as an H1.
-     *
-     * @return \DOMElement
      */
-    protected function getArticleTitle()
+    protected function getArticleTitle(): \DOMElement
     {
         try {
             $curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0));
@@ -892,7 +926,7 @@ protected function initializeNode(\DOMElement $node): void
      *
      * @return \DOMElement|false
      */
-    protected function grabArticle(?\DOMElement $page = null)
+    protected function grabArticle(?\DOMElement $page = null): \DOMElement|bool
     {
         if (!$page) {
             $page = $this->dom;
diff --git a/tests/ReadabilityTest.php b/tests/ReadabilityTest.php
@@ -10,10 +10,8 @@
 
 class ReadabilityTest extends \PHPUnit\Framework\TestCase
 {
-    /** @var TestHandler */
-    public $logHandler;
-    /** @var LoggerInterface */
-    public $logger;
+    public TestHandler $logHandler;
+    public LoggerInterface $logger;
 
     /**
      * @requires extension tidy
@@ -338,7 +336,7 @@ public function testAutoClosingIframeNotThrowingException(): void
         $oldErrorReporting = error_reporting(\E_ALL | \E_STRICT);
         $oldDisplayErrors = ini_set('display_errors', '1');
         // dummy function to be used to the next test
-        set_error_handler(function (int $errno, string $errstr, string $errfile, int $errline, array $errcontext) {
+        set_error_handler(function (int $errno, string $errstr, string $errfile, int $errline, array $errcontext): void {
             throw new \Exception($errstr, $errno);
         }, \E_ALL | \E_STRICT);
 

Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,7 @@ class JSLikeHTMLElement extends \DOMElement`
`43`	`43`	`* $div->innerHTML = '<h2>Chapter 2</h2><p>The story begins...</p>';`
`44`	`44`	* ```
`45`	`45`	`*/`
`46`		`- public function __set($name, $value)`
	`46`	`+ public function __set($name, $value): void`
`47`	`47`	`{`
`48`	`48`	`if ('innerHTML' !== $name) {`
`49`	`49`	`$trace = debug_backtrace();`
`@@ -126,18 +126,18 @@ public function __get($name)`
`126`	`126`	`trigger_error('Undefined property via __get(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], \E_USER_NOTICE);`
`127`	`127`	`}`
`128`	`128`
`129`		`- public function __toString()`
	`129`	`+ public function __toString(): string`
`130`	`130`	`{`
`131`	`131`	`return '[' . $this->tagName . ']';`
`132`	`132`	`}`
`133`	`133`
`134`		`- public function getInnerHtml()`
	`134`	`+ public function getInnerHtml(): string`
`135`	`135`	`{`
`136`	`136`	`return $this->__get('innerHTML');`
`137`	`137`	`}`
`138`	`138`
`139`		`- public function setInnerHtml($value)`
	`139`	`+ public function setInnerHtml($value): void`
`140`	`140`	`{`
`141`		`- return $this->__set('innerHTML', $value);`
	`141`	`+ $this->__set('innerHTML', $value);`
`142`	`142`	`}`
`143`	`143`	`}`