Skip to content

Commit c6af064

Browse files
committed
Use PHP type hints instead of PHPDoc
PHP 7.4 supports property type hints.
1 parent 09ccc3a commit c6af064

File tree

3 files changed

+92
-60
lines changed

3 files changed

+92
-60
lines changed

src/JSLikeHTMLElement.php

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ class JSLikeHTMLElement extends \DOMElement
4343
* $div->innerHTML = '<h2>Chapter 2</h2><p>The story begins...</p>';
4444
* ```
4545
*/
46-
public function __set($name, $value)
46+
public function __set($name, $value): void
4747
{
4848
if ('innerHTML' !== $name) {
4949
$trace = debug_backtrace();
@@ -126,18 +126,18 @@ public function __get($name)
126126
trigger_error('Undefined property via __get(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], \E_USER_NOTICE);
127127
}
128128

129-
public function __toString()
129+
public function __toString(): string
130130
{
131131
return '[' . $this->tagName . ']';
132132
}
133133

134-
public function getInnerHtml()
134+
public function getInnerHtml(): string
135135
{
136136
return $this->__get('innerHTML');
137137
}
138138

139-
public function setInnerHtml($value)
139+
public function setInnerHtml($value): void
140140
{
141-
return $this->__set('innerHTML', $value);
141+
$this->__set('innerHTML', $value);
142142
}
143143
}

src/Readability.php

Lines changed: 84 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -24,25 +24,34 @@ class Readability implements LoggerAwareInterface
2424
public const MIN_ARTICLE_LENGTH = 200;
2525
public const MIN_NODE_LENGTH = 80;
2626
public const MAX_LINK_DENSITY = 0.25;
27-
public $convertLinksToFootnotes = false;
28-
public $revertForcedParagraphElements = false;
29-
public $articleTitle;
30-
public $articleContent;
31-
public $original_html;
27+
28+
public bool $convertLinksToFootnotes = false;
29+
public bool $revertForcedParagraphElements = false;
30+
31+
public ?\DOMElement $articleTitle;
32+
33+
public ?\DOMElement $articleContent;
34+
35+
public ?string $original_html;
36+
37+
public ?\DOMDocument $dom;
38+
3239
/**
33-
* @var \DOMDocument
40+
* @var ?string URL where HTML was retrieved
3441
*/
35-
public $dom;
36-
// optional - URL where HTML was retrieved
37-
public $url = null;
38-
// preserves more content (experimental)
39-
public $lightClean = true;
42+
public ?string $url = null;
4043

4144
/**
42-
* All of the regular expressions in use within readability.
45+
* @var bool preserves more content (experimental)
46+
*/
47+
public bool $lightClean = true;
48+
49+
/**
50+
* @var array<string, string> All of the regular expressions in use within readability.
51+
*
4352
* Defined up here so we don't instantiate them repeatedly in loops.
4453
*/
45-
public $regexps = [
54+
public array $regexps = [
4655
'unlikelyCandidates' => '/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i',
4756
'okMaybeItsACandidate' => '/article\b|contain|\bcontent|column|general|detail|shadow|lightbox|blog|body|entry|main|page|footnote|element/i',
4857
'positive' => '/read|full|article|body|\bcontent|contain|entry|main|markdown|media|page|attach|pagination|post|text|blog|story/i',
@@ -54,18 +63,30 @@ class Readability implements LoggerAwareInterface
5463
'hasContent' => '/\S$/',
5564
'isNotVisible' => '/display\s*:\s*none/',
5665
];
57-
public $defaultTagsToScore = ['section', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'td', 'pre'];
58-
// The commented out elements qualify as phrasing content but tend to be
59-
// removed by readability when put into paragraphs, so we ignore them here.
60-
public $phrasingElements = [
66+
67+
/**
68+
* @var array<string>
69+
*/
70+
public array $defaultTagsToScore = ['section', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'td', 'pre'];
71+
72+
/**
73+
* @var array<string>
74+
*/
75+
public array $phrasingElements = [
76+
// The commented out elements qualify as phrasing content but tend to be
77+
// removed by readability when put into paragraphs, so we ignore them here.
6178
// "CANVAS", "IFRAME", "SVG", "VIDEO",
6279
'ABBR', 'AUDIO', 'B', 'BDO', 'BR', 'BUTTON', 'CITE', 'CODE', 'DATA',
6380
'DATALIST', 'DFN', 'EM', 'EMBED', 'I', 'IMG', 'INPUT', 'KBD', 'LABEL',
6481
'MARK', 'MATH', 'METER', 'NOSCRIPT', 'OBJECT', 'OUTPUT', 'PROGRESS', 'Q',
6582
'RUBY', 'SAMP', 'SCRIPT', 'SELECT', 'SMALL', 'SPAN', 'STRONG', 'SUB',
6683
'SUP', 'TEXTAREA', 'TIME', 'VAR', 'WBR',
6784
];
68-
public $tidy_config = [
85+
86+
/**
87+
* @var array<string, bool|string>
88+
*/
89+
public array $tidy_config = [
6990
'tidy-mark' => false,
7091
'vertical-space' => false,
7192
'doctype' => 'omit',
@@ -89,21 +110,41 @@ class Readability implements LoggerAwareInterface
89110
'output-encoding' => 'utf8',
90111
'hide-comments' => true,
91112
];
92-
// article domain regexp for calibration
93-
protected $domainRegExp = null;
94-
protected $body = null;
95-
// Cache the body HTML in case we need to re-use it later
96-
protected $bodyCache = null;
97-
// 1 | 2 | 4; // Start with all processing flags set.
98-
protected $flags = 7;
99-
// indicates whether we were able to extract or not
100-
protected $success = false;
101-
protected $logger;
102-
protected $parser;
103-
protected $html;
104-
protected $useTidy;
105-
// raw HTML filters
106-
protected $pre_filters = [
113+
114+
/**
115+
* @var ?string article domain regexp for calibration
116+
*/
117+
protected ?string $domainRegExp = null;
118+
119+
protected ?\DOMElement $body = null;
120+
121+
/**
122+
* @var ?string Cache the body HTML in case we need to re-use it later
123+
*/
124+
protected ?string $bodyCache = null;
125+
126+
/**
127+
* @var int-mask-of<self::FLAG_*> start with all processing flags set
128+
*/
129+
protected int $flags = self::FLAG_STRIP_UNLIKELYS | self::FLAG_WEIGHT_ATTRIBUTES | self::FLAG_CLEAN_CONDITIONALLY;
130+
131+
/**
132+
* @var bool indicates whether we were able to extract or not
133+
*/
134+
protected bool $success = false;
135+
136+
protected LoggerInterface $logger;
137+
138+
protected string $parser;
139+
140+
protected string $html;
141+
142+
protected bool $useTidy;
143+
144+
/**
145+
* @var array<string, string> raw HTML filters
146+
*/
147+
protected array $pre_filters = [
107148
// remove spans as we redefine styles and they're probably special-styled
108149
'!</?span[^>]*>!is' => '',
109150
// HACK: firewall-filtered content
@@ -115,8 +156,11 @@ class Readability implements LoggerAwareInterface
115156
// replace fonts to spans
116157
'!<(/?)font[^>]*>!is' => '<\\1span>',
117158
];
118-
// output HTML filters
119-
protected $post_filters = [
159+
160+
/**
161+
* @var array<string, string> output HTML filters
162+
*/
163+
protected array $post_filters = [
120164
// replace excessive br's
121165
'/<br\s*\/?>\s*<p/i' => '<p',
122166
// replace empty tags that break layouts
@@ -156,20 +200,16 @@ public function setLogger(LoggerInterface $logger): void
156200

157201
/**
158202
* Get article title element.
159-
*
160-
* @return \DOMElement
161203
*/
162-
public function getTitle()
204+
public function getTitle(): \DOMElement
163205
{
164206
return $this->articleTitle;
165207
}
166208

167209
/**
168210
* Get article content element.
169-
*
170-
* @return \DOMElement
171211
*/
172-
public function getContent()
212+
public function getContent(): \DOMElement
173213
{
174214
return $this->articleContent;
175215
}
@@ -451,12 +491,8 @@ public function prepArticle(\DOMNode $articleContent): void
451491
/**
452492
* Get the inner text of a node.
453493
* This also strips out any excess whitespace to be found.
454-
*
455-
* @param \DOMElement $e
456-
* @param bool $normalizeSpaces (default: true)
457-
* @param bool $flattenLines (default: false)
458494
*/
459-
public function getInnerText($e, bool $normalizeSpaces = true, bool $flattenLines = false): string
495+
public function getInnerText(\DOMElement $e, bool $normalizeSpaces = true, bool $flattenLines = false): string
460496
{
461497
if (null === $e || !isset($e->textContent) || '' === $e->textContent) {
462498
return '';
@@ -749,10 +785,8 @@ public function removeFlag(int $flag): void
749785

750786
/**
751787
* Get the article title as an H1.
752-
*
753-
* @return \DOMElement
754788
*/
755-
protected function getArticleTitle()
789+
protected function getArticleTitle(): \DOMElement
756790
{
757791
try {
758792
$curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0));
@@ -892,7 +926,7 @@ protected function initializeNode(\DOMElement $node): void
892926
*
893927
* @return \DOMElement|false
894928
*/
895-
protected function grabArticle(?\DOMElement $page = null)
929+
protected function grabArticle(?\DOMElement $page = null): \DOMElement|bool
896930
{
897931
if (!$page) {
898932
$page = $this->dom;

tests/ReadabilityTest.php

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,8 @@
1010

1111
class ReadabilityTest extends \PHPUnit\Framework\TestCase
1212
{
13-
/** @var TestHandler */
14-
public $logHandler;
15-
/** @var LoggerInterface */
16-
public $logger;
13+
public TestHandler $logHandler;
14+
public LoggerInterface $logger;
1715

1816
/**
1917
* @requires extension tidy
@@ -338,7 +336,7 @@ public function testAutoClosingIframeNotThrowingException(): void
338336
$oldErrorReporting = error_reporting(\E_ALL | \E_STRICT);
339337
$oldDisplayErrors = ini_set('display_errors', '1');
340338
// dummy function to be used to the next test
341-
set_error_handler(function (int $errno, string $errstr, string $errfile, int $errline, array $errcontext) {
339+
set_error_handler(function (int $errno, string $errstr, string $errfile, int $errline, array $errcontext): void {
342340
throw new \Exception($errstr, $errno);
343341
}, \E_ALL | \E_STRICT);
344342

0 commit comments

Comments
 (0)