diff --git a/src/simple_html_dom.php b/src/simple_html_dom.php
index 70abf50..997d5b5 100755
--- a/src/simple_html_dom.php
+++ b/src/simple_html_dom.php
@@ -68,7 +68,7 @@
// -----------------------------------------------------------------------------
// get html dom from file
// $maxlen is defined in the code as PHP_STREAM_COPY_ALL which is defined as -1.
-function file_get_html($url, $use_include_path = false, $context=null, $offset = -1, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT)
+function file_get_html($url, $use_include_path = false, $context=null, $offset = 0, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT)
{
// We DO force the tags to be terminated.
$dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
@@ -681,7 +681,7 @@ protected function parse_selector($selector_string) {
// This implies that an html attribute specifier may start with an @ sign that is NOT captured by the expression.
// farther study is required to determine of this should be documented or removed.
// $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
- $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
+ $pattern = "/([a-zA-Z_0-9:\*-]*)(?:\#([a-zA-Z_0-9-]+)|\.([a-zA-Z_0-9-]+))?(?:\[@?(!?[a-zA-Z_0-9-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER);
if (is_object($debugObject)) {$debugObject->debugLog(2, "Matches Array: ", $matches);}
@@ -703,7 +703,7 @@ protected function parse_selector($selector_string) {
if (!empty($m[6])) {$val=$m[6];}
// convert to lowercase
- if ($this->dom->lowercase) {$tag=strtolower($tag); $key=strtolower($key);}
+ if ($this->dom->lowercase) {$tag=strtolower($tag); $key=strtolower($key ?? '');}
//elements that do NOT have the specified attribute
if (isset($key[0]) && $key[0]==='!') {$key=substr($key, 1); $no_key=true;}
@@ -1362,7 +1362,7 @@ protected function read_tag()
return true;
}
- if (!preg_match("/^[\w-:]+$/", $tag)) {
+ if (!preg_match("/^[a-zA-Z_0-9:-]+$/", $tag)) {
$node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>');
if ($this->char==='<') {
$this->link_nodes($node, false);
@@ -1719,4 +1719,4 @@ function getElementsByTagName($name, $idx=-1) {return $this->find($name, $idx);}
function loadFile() {$args = func_get_args();$this->load_file($args);}
}
-?>
\ No newline at end of file
+?>