From 7e152ccaeac9800768af2737fc4fab7fc8518cd4 Mon Sep 17 00:00:00 2001 From: Michael Klein Date: Sat, 2 Dec 2017 23:05:47 +0100 Subject: [PATCH 1/2] Fix file_get_contents(): stream does not support seeking Fixes an error in PHP 7.1 for the file_get_contents() function. --- src/simple_html_dom.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/simple_html_dom.php b/src/simple_html_dom.php index 70abf50..417170b 100755 --- a/src/simple_html_dom.php +++ b/src/simple_html_dom.php @@ -68,7 +68,7 @@ // ----------------------------------------------------------------------------- // get html dom from file // $maxlen is defined in the code as PHP_STREAM_COPY_ALL which is defined as -1. -function file_get_html($url, $use_include_path = false, $context=null, $offset = -1, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) +function file_get_html($url, $use_include_path = false, $context=null, $offset = 0, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) { // We DO force the tags to be terminated. $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText); @@ -1719,4 +1719,4 @@ function getElementsByTagName($name, $idx=-1) {return $this->find($name, $idx);} function loadFile() {$args = func_get_args();$this->load_file($args);} } -?> \ No newline at end of file +?> From c2d3d884cc013390b400aa7480dd97265e00efb2 Mon Sep 17 00:00:00 2001 From: Gregory Duchatelet Date: Thu, 4 Aug 2022 10:36:13 +0200 Subject: [PATCH 2/2] fix regex --- src/simple_html_dom.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/simple_html_dom.php b/src/simple_html_dom.php index 417170b..997d5b5 100755 --- a/src/simple_html_dom.php +++ b/src/simple_html_dom.php @@ -681,7 +681,7 @@ protected function parse_selector($selector_string) { // This implies that an html attribute specifier may start with an @ sign that is NOT captured by the expression. // farther study is required to determine of this should be documented or removed. // $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; - $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; + $pattern = "/([a-zA-Z_0-9:\*-]*)(?:\#([a-zA-Z_0-9-]+)|\.([a-zA-Z_0-9-]+))?(?:\[@?(!?[a-zA-Z_0-9-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER); if (is_object($debugObject)) {$debugObject->debugLog(2, "Matches Array: ", $matches);} @@ -703,7 +703,7 @@ protected function parse_selector($selector_string) { if (!empty($m[6])) {$val=$m[6];} // convert to lowercase - if ($this->dom->lowercase) {$tag=strtolower($tag); $key=strtolower($key);} + if ($this->dom->lowercase) {$tag=strtolower($tag); $key=strtolower($key ?? '');} //elements that do NOT have the specified attribute if (isset($key[0]) && $key[0]==='!') {$key=substr($key, 1); $no_key=true;} @@ -1362,7 +1362,7 @@ protected function read_tag() return true; } - if (!preg_match("/^[\w-:]+$/", $tag)) { + if (!preg_match("/^[a-zA-Z_0-9:-]+$/", $tag)) { $node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>'); if ($this->char==='<') { $this->link_nodes($node, false);