1<?php
2/**
3 * Website: http://sourceforge.net/projects/simplehtmldom/
4 * Additional projects: http://sourceforge.net/projects/debugobject/
5 * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
6 *
7 * Licensed under The MIT License
8 * See the LICENSE file in the project root for more information.
9 *
10 * Authors:
11 *   S.C. Chen
12 *   John Schlick
13 *   Rus Carroll
14 *   logmanoriginal
15 *
16 * Contributors:
17 *   Yousuke Kumakura
18 *   Vadim Voituk
19 *   Antcs
20 *   James Collins (nomadjimbob)
21 *
22 * Based on Version Rev. 1.9.1 (291)
23 * Version 1.9.1.1
24 */
25
26define('HDOM_TYPE_ELEMENT', 1);
27define('HDOM_TYPE_COMMENT', 2);
28define('HDOM_TYPE_TEXT', 3);
29define('HDOM_TYPE_ENDTAG', 4);
30define('HDOM_TYPE_ROOT', 5);
31define('HDOM_TYPE_UNKNOWN', 6);
32define('HDOM_QUOTE_DOUBLE', 0);
33define('HDOM_QUOTE_SINGLE', 1);
34define('HDOM_QUOTE_NO', 3);
35define('HDOM_INFO_BEGIN', 0);
36define('HDOM_INFO_END', 1);
37define('HDOM_INFO_QUOTE', 2);
38define('HDOM_INFO_SPACE', 3);
39define('HDOM_INFO_TEXT', 4);
40define('HDOM_INFO_INNER', 5);
41define('HDOM_INFO_OUTER', 6);
42define('HDOM_INFO_ENDSPACE', 7);
43
44defined('DEFAULT_TARGET_CHARSET') || define('DEFAULT_TARGET_CHARSET', 'UTF-8');
45defined('DEFAULT_BR_TEXT') || define('DEFAULT_BR_TEXT', "\r\n");
46defined('DEFAULT_SPAN_TEXT') || define('DEFAULT_SPAN_TEXT', ' ');
47defined('MAX_FILE_SIZE') || define('MAX_FILE_SIZE', 600000);
48define('HDOM_SMARTY_AS_TEXT', 1);
49
50function file_get_html(
51	$url,
52	$use_include_path = false,
53	$context = null,
54	$offset = 0,
55	$maxLen = -1,
56	$lowercase = true,
57	$forceTagsClosed = true,
58	$target_charset = DEFAULT_TARGET_CHARSET,
59	$stripRN = true,
60	$defaultBRText = DEFAULT_BR_TEXT,
61	$defaultSpanText = DEFAULT_SPAN_TEXT)
62{
63	if($maxLen <= 0) { $maxLen = MAX_FILE_SIZE; }
64
65	$dom = new simple_html_dom(
66		null,
67		$lowercase,
68		$forceTagsClosed,
69		$target_charset,
70		$stripRN,
71		$defaultBRText,
72		$defaultSpanText
73	);
74
75	/**
76	 * For sourceforge users: uncomment the next line and comment the
77	 * retrieve_url_contents line 2 lines down if it is not already done.
78	 */
79	$contents = file_get_contents(
80		$url,
81		$use_include_path,
82		$context,
83		$offset,
84		$maxLen
85	);
86	// $contents = retrieve_url_contents($url);
87
88	if (empty($contents) || strlen($contents) > $maxLen) {
89		$dom->clear();
90		return false;
91	}
92
93	return $dom->load($contents, $lowercase, $stripRN);
94}
95
96function str_get_html(
97	$str,
98	$lowercase = true,
99	$forceTagsClosed = true,
100	$target_charset = DEFAULT_TARGET_CHARSET,
101	$stripRN = true,
102	$defaultBRText = DEFAULT_BR_TEXT,
103	$defaultSpanText = DEFAULT_SPAN_TEXT)
104{
105	$dom = new simple_html_dom(
106		null,
107		$lowercase,
108		$forceTagsClosed,
109		$target_charset,
110		$stripRN,
111		$defaultBRText,
112		$defaultSpanText
113	);
114
115	if (empty($str) || strlen($str) > MAX_FILE_SIZE) {
116		$dom->clear();
117		return false;
118	}
119
120	return $dom->load($str, $lowercase, $stripRN);
121}
122
123function dump_html_tree($node, $show_attr = true, $deep = 0)
124{
125	$node->dump($node);
126}
127
128class simple_html_dom_node
129{
130	public $nodetype = HDOM_TYPE_TEXT;
131	public $tag = 'text';
132	public $attr = array();
133	public $children = array();
134	public $nodes = array();
135	public $parent = null;
136	public $_ = array();
137	public $tag_start = 0;
138	private $dom = null;
139
140	function __construct($dom)
141	{
142		$this->dom = $dom;
143		$dom->nodes[] = $this;
144	}
145
146	function __destruct()
147	{
148		$this->clear();
149	}
150
151	function __toString()
152	{
153		return $this->outertext();
154	}
155
156	function clear()
157	{
158		$this->dom = null;
159		$this->nodes = null;
160		$this->parent = null;
161		$this->children = null;
162	}
163
164	function dump($show_attr = true, $depth = 0)
165	{
166		echo str_repeat("\t", $depth) . $this->tag;
167
168		if ($show_attr && count($this->attr) > 0) {
169			echo '(';
170			foreach ($this->attr as $k => $v) {
171				echo "[$k]=>\"$v\", ";
172			}
173			echo ')';
174		}
175
176		echo "\n";
177
178		if ($this->nodes) {
179			foreach ($this->nodes as $node) {
180				$node->dump($show_attr, $depth + 1);
181			}
182		}
183	}
184
185	function dump_node($echo = true)
186	{
187		$string = $this->tag;
188
189		if (count($this->attr) > 0) {
190			$string .= '(';
191			foreach ($this->attr as $k => $v) {
192				$string .= "[$k]=>\"$v\", ";
193			}
194			$string .= ')';
195		}
196
197		if (count($this->_) > 0) {
198			$string .= ' $_ (';
199			foreach ($this->_ as $k => $v) {
200				if (is_array($v)) {
201					$string .= "[$k]=>(";
202					foreach ($v as $k2 => $v2) {
203						$string .= "[$k2]=>\"$v2\", ";
204					}
205					$string .= ')';
206				} else {
207					$string .= "[$k]=>\"$v\", ";
208				}
209			}
210			$string .= ')';
211		}
212
213		if (isset($this->text)) {
214			$string .= " text: ({$this->text})";
215		}
216
217		$string .= ' HDOM_INNER_INFO: ';
218
219		if (isset($node->_[HDOM_INFO_INNER])) {
220			$string .= "'" . $node->_[HDOM_INFO_INNER] . "'";
221		} else {
222			$string .= ' NULL ';
223		}
224
225		$string .= ' children: ' . count($this->children);
226		$string .= ' nodes: ' . count($this->nodes);
227		$string .= ' tag_start: ' . $this->tag_start;
228		$string .= "\n";
229
230		if ($echo) {
231			echo $string;
232			return;
233		} else {
234			return $string;
235		}
236	}
237
238	function parent($parent = null)
239	{
240		// I am SURE that this doesn't work properly.
241		// It fails to unset the current node from it's current parents nodes or
242		// children list first.
243		if ($parent !== null) {
244			$this->parent = $parent;
245			$this->parent->nodes[] = $this;
246			$this->parent->children[] = $this;
247		}
248
249		return $this->parent;
250	}
251
252	function has_child()
253	{
254		return !empty($this->children);
255	}
256
257	function children($idx = -1)
258	{
259		if ($idx === -1) {
260			return $this->children;
261		}
262
263		if (isset($this->children[$idx])) {
264			return $this->children[$idx];
265		}
266
267		return null;
268	}
269
270	function first_child()
271	{
272		if (count($this->children) > 0) {
273			return $this->children[0];
274		}
275		return null;
276	}
277
278	function last_child()
279	{
280		if (count($this->children) > 0) {
281			return end($this->children);
282		}
283		return null;
284	}
285
286	function next_sibling()
287	{
288		if ($this->parent === null) {
289			return null;
290		}
291
292		$idx = array_search($this, $this->parent->children, true);
293
294		if ($idx !== false && isset($this->parent->children[$idx + 1])) {
295			return $this->parent->children[$idx + 1];
296		}
297
298		return null;
299	}
300
301	function prev_sibling()
302	{
303		if ($this->parent === null) {
304			return null;
305		}
306
307		$idx = array_search($this, $this->parent->children, true);
308
309		if ($idx !== false && $idx > 0) {
310			return $this->parent->children[$idx - 1];
311		}
312
313		return null;
314	}
315
316	function find_ancestor_tag($tag)
317	{
318		global $debug_object;
319		if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
320
321		if ($this->parent === null) {
322			return null;
323		}
324
325		$ancestor = $this->parent;
326
327		while (!is_null($ancestor)) {
328			if (is_object($debug_object)) {
329				$debug_object->debug_log(2, 'Current tag is: ' . $ancestor->tag);
330			}
331
332			if ($ancestor->tag === $tag) {
333				break;
334			}
335
336			$ancestor = $ancestor->parent;
337		}
338
339		return $ancestor;
340	}
341
342	function innertext()
343	{
344		if (isset($this->_[HDOM_INFO_INNER])) {
345			return $this->_[HDOM_INFO_INNER];
346		}
347
348		if (isset($this->_[HDOM_INFO_TEXT])) {
349			return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
350		}
351
352		$ret = '';
353
354		foreach ($this->nodes as $n) {
355			$ret .= $n->outertext();
356		}
357
358		return $ret;
359	}
360
361	function outertext()
362	{
363		global $debug_object;
364
365		if (is_object($debug_object)) {
366			$text = '';
367
368			if ($this->tag === 'text') {
369				if (!empty($this->text)) {
370					$text = ' with text: ' . $this->text;
371				}
372			}
373
374			$debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text);
375		}
376
377		if ($this->tag === 'root') {
378			return $this->innertext();
379		}
380
381		// todo: What is the use of this callback? Remove?
382		if ($this->dom && $this->dom->callback !== null) {
383			call_user_func_array($this->dom->callback, array($this));
384		}
385
386		if (isset($this->_[HDOM_INFO_OUTER])) {
387			return $this->_[HDOM_INFO_OUTER];
388		}
389
390		if (isset($this->_[HDOM_INFO_TEXT])) {
391			return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
392		}
393
394		$ret = '';
395
396		if ($this->dom && $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]) {
397			$ret = $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]->makeup();
398		}
399
400		if (isset($this->_[HDOM_INFO_INNER])) {
401			// todo: <br> should either never have HDOM_INFO_INNER or always
402			if ($this->tag !== 'br') {
403				$ret .= $this->_[HDOM_INFO_INNER];
404			}
405		} elseif ($this->nodes) {
406			foreach ($this->nodes as $n) {
407				$ret .= $this->convert_text($n->outertext());
408			}
409		}
410
411		if (isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END] != 0) {
412			$ret .= '</' . $this->tag . '>';
413		}
414
415		return $ret;
416	}
417
418	function text()
419	{
420		if (isset($this->_[HDOM_INFO_INNER])) {
421			return $this->_[HDOM_INFO_INNER];
422		}
423
424		switch ($this->nodetype) {
425			case HDOM_TYPE_TEXT: return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
426			case HDOM_TYPE_COMMENT: return '';
427			case HDOM_TYPE_UNKNOWN: return '';
428		}
429
430		if (strcasecmp($this->tag, 'script') === 0) { return ''; }
431		if (strcasecmp($this->tag, 'style') === 0) { return ''; }
432
433		$ret = '';
434
435		// In rare cases, (always node type 1 or HDOM_TYPE_ELEMENT - observed
436		// for some span tags, and some p tags) $this->nodes is set to NULL.
437		// NOTE: This indicates that there is a problem where it's set to NULL
438		// without a clear happening.
439		// WHY is this happening?
440		if (!is_null($this->nodes)) {
441			foreach ($this->nodes as $n) {
442				// Start paragraph after a blank line
443				if ($n->tag === 'p') {
444					$ret = trim($ret) . "\n\n";
445				}
446
447				$ret .= $this->convert_text($n->text());
448
449				// If this node is a span... add a space at the end of it so
450				// multiple spans don't run into each other.  This is plaintext
451				// after all.
452				if ($n->tag === 'span') {
453					$ret .= $this->dom->default_span_text;
454				}
455			}
456		}
457		return $ret;
458	}
459
460	function xmltext()
461	{
462		$ret = $this->innertext();
463		$ret = str_ireplace('<![CDATA[', '', $ret);
464		$ret = str_replace(']]>', '', $ret);
465		return $ret;
466	}
467
468	function makeup()
469	{
470		// text, comment, unknown
471		if (isset($this->_[HDOM_INFO_TEXT])) {
472			return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
473		}
474
475		$ret = '<' . $this->tag;
476		$i = -1;
477
478		foreach ($this->attr as $key => $val) {
479			++$i;
480
481			// skip removed attribute
482			if ($val === null || $val === false) { continue; }
483
484			$ret .= $this->_[HDOM_INFO_SPACE][$i][0];
485
486			//no value attr: nowrap, checked selected...
487			if ($val === true) {
488				$ret .= $key;
489			} else {
490				switch ($this->_[HDOM_INFO_QUOTE][$i])
491				{
492					case HDOM_QUOTE_DOUBLE: $quote = '"'; break;
493					case HDOM_QUOTE_SINGLE: $quote = '\''; break;
494					default: $quote = '';
495				}
496
497				$ret .= $key
498				. $this->_[HDOM_INFO_SPACE][$i][1]
499				. '='
500				. $this->_[HDOM_INFO_SPACE][$i][2]
501				. $quote
502				. $val
503				. $quote;
504			}
505		}
506
507		$ret = $this->dom->restore_noise($ret);
508		return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>';
509	}
510
511	function find($selector, $idx = null, $lowercase = false)
512	{
513		$selectors = $this->parse_selector($selector);
514		if (($count = count($selectors)) === 0) { return array(); }
515		$found_keys = array();
516
517		// find each selector
518		for ($c = 0; $c < $count; ++$c) {
519			// The change on the below line was documented on the sourceforge
520			// code tracker id 2788009
521			// used to be: if (($levle=count($selectors[0]))===0) return array();
522			if (($levle = count($selectors[$c])) === 0) { return array(); }
523			if (!isset($this->_[HDOM_INFO_BEGIN])) { return array(); }
524
525			$head = array($this->_[HDOM_INFO_BEGIN] => 1);
526			$cmd = ' '; // Combinator
527
528			// handle descendant selectors, no recursive!
529			for ($l = 0; $l < $levle; ++$l) {
530				$ret = array();
531
532				foreach ($head as $k => $v) {
533					$n = ($k === -1) ? $this->dom->root : $this->dom->nodes[$k];
534					//PaperG - Pass this optional parameter on to the seek function.
535					$n->seek($selectors[$c][$l], $ret, $cmd, $lowercase);
536				}
537
538				$head = $ret;
539				$cmd = $selectors[$c][$l][4]; // Next Combinator
540			}
541
542			foreach ($head as $k => $v) {
543				if (!isset($found_keys[$k])) {
544					$found_keys[$k] = 1;
545				}
546			}
547		}
548
549		// sort keys
550		ksort($found_keys);
551
552		$found = array();
553		foreach ($found_keys as $k => $v) {
554			$found[] = $this->dom->nodes[$k];
555		}
556
557		// return nth-element or array
558		if (is_null($idx)) { return $found; }
559		elseif ($idx < 0) { $idx = count($found) + $idx; }
560		return (isset($found[$idx])) ? $found[$idx] : null;
561	}
562
563	protected function seek($selector, &$ret, $parent_cmd, $lowercase = false)
564	{
565		global $debug_object;
566		if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
567
568		list($tag, $id, $class, $attributes, $cmb) = $selector;
569		$nodes = array();
570
571		if ($parent_cmd === ' ') { // Descendant Combinator
572			// Find parent closing tag if the current element doesn't have a closing
573			// tag (i.e. void element)
574			$end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0;
575			if ($end == 0) {
576				$parent = $this->parent;
577				while (!isset($parent->_[HDOM_INFO_END]) && $parent !== null) {
578					$end -= 1;
579					$parent = $parent->parent;
580				}
581				$end += $parent->_[HDOM_INFO_END];
582			}
583
584			// Get list of target nodes
585			$nodes_start = $this->_[HDOM_INFO_BEGIN] + 1;
586			$nodes_count = $end - $nodes_start;
587			$nodes = array_slice($this->dom->nodes, $nodes_start, $nodes_count, true);
588		} elseif ($parent_cmd === '>') { // Child Combinator
589			$nodes = $this->children;
590		} elseif ($parent_cmd === '+'
591			&& $this->parent
592			&& in_array($this, $this->parent->children)) { // Next-Sibling Combinator
593				$index = array_search($this, $this->parent->children, true) + 1;
594				if ($index < count($this->parent->children))
595					$nodes[] = $this->parent->children[$index];
596		} elseif ($parent_cmd === '~'
597			&& $this->parent
598			&& in_array($this, $this->parent->children)) { // Subsequent Sibling Combinator
599				$index = array_search($this, $this->parent->children, true);
600				$nodes = array_slice($this->parent->children, $index);
601		}
602
603		// Go throgh each element starting at this element until the end tag
604		// Note: If this element is a void tag, any previous void element is
605		// skipped.
606		foreach($nodes as $node) {
607			$pass = true;
608
609			// Skip root nodes
610			if(!$node->parent) {
611				$pass = false;
612			}
613
614			// Handle 'text' selector
615			if($pass && $tag === 'text' && $node->tag === 'text') {
616				$ret[array_search($node, $this->dom->nodes, true)] = 1;
617				unset($node);
618				continue;
619			}
620
621			// Skip if node isn't a child node (i.e. text nodes)
622			if($pass && !in_array($node, $node->parent->children, true)) {
623				$pass = false;
624			}
625
626			// Skip if tag doesn't match
627			if ($pass && $tag !== '' && $tag !== $node->tag && $tag !== '*') {
628				$pass = false;
629			}
630
631			// Skip if ID doesn't exist
632			if ($pass && $id !== '' && !isset($node->attr['id'])) {
633				$pass = false;
634			}
635
636			// Check if ID matches
637			if ($pass && $id !== '' && isset($node->attr['id'])) {
638				// Note: Only consider the first ID (as browsers do)
639				$node_id = explode(' ', trim($node->attr['id']))[0];
640
641				if($id !== $node_id) { $pass = false; }
642			}
643
644			// Check if all class(es) exist
645			if ($pass && $class !== '' && is_array($class) && !empty($class)) {
646				if (isset($node->attr['class'])) {
647					$node_classes = explode(' ', $node->attr['class']);
648
649					if ($lowercase) {
650						$node_classes = array_map('strtolower', $node_classes);
651					}
652
653					foreach($class as $c) {
654						if(!in_array($c, $node_classes)) {
655							$pass = false;
656							break;
657						}
658					}
659				} else {
660					$pass = false;
661				}
662			}
663
664			// Check attributes
665			if ($pass
666				&& $attributes !== ''
667				&& is_array($attributes)
668				&& !empty($attributes)) {
669					foreach($attributes as $a) {
670						list (
671							$att_name,
672							$att_expr,
673							$att_val,
674							$att_inv,
675							$att_case_sensitivity
676						) = $a;
677
678						// Handle indexing attributes (i.e. "[2]")
679						/**
680						 * Note: This is not supported by the CSS Standard but adds
681						 * the ability to select items compatible to XPath (i.e.
682						 * the 3rd element within it's parent).
683						 *
684						 * Note: This doesn't conflict with the CSS Standard which
685						 * doesn't work on numeric attributes anyway.
686						 */
687						if (is_numeric($att_name)
688							&& $att_expr === ''
689							&& $att_val === '') {
690								$count = 0;
691
692								// Find index of current element in parent
693								foreach ($node->parent->children as $c) {
694									if ($c->tag === $node->tag) ++$count;
695									if ($c === $node) break;
696								}
697
698								// If this is the correct node, continue with next
699								// attribute
700								if ($count === (int)$att_name) continue;
701						}
702
703						// Check attribute availability
704						if ($att_inv) { // Attribute should NOT be set
705							if (isset($node->attr[$att_name])) {
706								$pass = false;
707								break;
708							}
709						} else { // Attribute should be set
710							// todo: "plaintext" is not a valid CSS selector!
711							if ($att_name !== 'plaintext'
712								&& !isset($node->attr[$att_name])) {
713									$pass = false;
714									break;
715							}
716						}
717
718						// Continue with next attribute if expression isn't defined
719						if ($att_expr === '') continue;
720
721						// If they have told us that this is a "plaintext"
722						// search then we want the plaintext of the node - right?
723						// todo "plaintext" is not a valid CSS selector!
724						if ($att_name === 'plaintext') {
725							$nodeKeyValue = $node->text();
726						} else {
727							$nodeKeyValue = $node->attr[$att_name];
728						}
729
730						if (is_object($debug_object)) {
731							$debug_object->debug_log(2,
732								'testing node: '
733								. $node->tag
734								. ' for attribute: '
735								. $att_name
736								. $att_expr
737								. $att_val
738								. ' where nodes value is: '
739								. $nodeKeyValue
740							);
741						}
742
743						// If lowercase is set, do a case insensitive test of
744						// the value of the selector.
745						if ($lowercase) {
746							$check = $this->match(
747								$att_expr,
748								strtolower($att_val),
749								strtolower($nodeKeyValue),
750								$att_case_sensitivity
751							);
752						} else {
753							$check = $this->match(
754								$att_expr,
755								$att_val,
756								$nodeKeyValue,
757								$att_case_sensitivity
758							);
759						}
760
761						if (is_object($debug_object)) {
762							$debug_object->debug_log(2,
763								'after match: '
764								. ($check ? 'true' : 'false')
765							);
766						}
767
768						if (!$check) {
769							$pass = false;
770							break;
771						}
772					}
773			}
774
775			// Found a match. Add to list and clear node
776			if ($pass) $ret[$node->_[HDOM_INFO_BEGIN]] = 1;
777			unset($node);
778		}
779		// It's passed by reference so this is actually what this function returns.
780		if (is_object($debug_object)) {
781			$debug_object->debug_log(1, 'EXIT - ret: ', $ret);
782		}
783	}
784
785	protected function match($exp, $pattern, $value, $case_sensitivity)
786	{
787		global $debug_object;
788		if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
789
790		if ($case_sensitivity === 'i') {
791			$pattern = strtolower($pattern);
792			$value = strtolower($value);
793		}
794
795		switch ($exp) {
796			case '=':
797				return ($value === $pattern);
798			case '!=':
799				return ($value !== $pattern);
800			case '^=':
801				return preg_match('/^' . preg_quote($pattern, '/') . '/', $value);
802			case '$=':
803				return preg_match('/' . preg_quote($pattern, '/') . '$/', $value);
804			case '*=':
805				return preg_match('/' . preg_quote($pattern, '/') . '/', $value);
806			case '|=':
807				/**
808				 * [att|=val]
809				 *
810				 * Represents an element with the att attribute, its value
811				 * either being exactly "val" or beginning with "val"
812				 * immediately followed by "-" (U+002D).
813				 */
814				return strpos($value, $pattern) === 0;
815			case '~=':
816				/**
817				 * [att~=val]
818				 *
819				 * Represents an element with the att attribute whose value is a
820				 * whitespace-separated list of words, one of which is exactly
821				 * "val". If "val" contains whitespace, it will never represent
822				 * anything (since the words are separated by spaces). Also if
823				 * "val" is the empty string, it will never represent anything.
824				 */
825				return in_array($pattern, explode(' ', trim($value)), true);
826		}
827		return false;
828	}
829
830	protected function parse_selector($selector_string)
831	{
832		global $debug_object;
833		if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
834
835		/**
836		 * Pattern of CSS selectors, modified from mootools (https://mootools.net/)
837		 *
838		 * Paperg: Add the colon to the attribute, so that it properly finds
839		 * <tag attr:ibute="something" > like google does.
840		 *
841		 * Note: if you try to look at this attribute, you MUST use getAttribute
842		 * since $dom->x:y will fail the php syntax check.
843		 *
844		 * Notice the \[ starting the attribute? and the @? following? This
845		 * implies that an attribute can begin with an @ sign that is not
846		 * captured. This implies that an html attribute specifier may start
847		 * with an @ sign that is NOT captured by the expression. Farther study
848		 * is required to determine of this should be documented or removed.
849		 *
850		 * Matches selectors in this order:
851		 *
852		 * [0] - full match
853		 *
854		 * [1] - tag name
855		 *     ([\w:\*-]*)
856		 *     Matches the tag name consisting of zero or more words, colons,
857		 *     asterisks and hyphens.
858		 *
859		 * [2] - id name
860		 *     (?:\#([\w-]+))
861		 *     Optionally matches a id name, consisting of an "#" followed by
862		 *     the id name (one or more words and hyphens).
863		 *
864		 * [3] - class names (including dots)
865		 *     (?:\.([\w\.-]+))?
866		 *     Optionally matches a list of classs, consisting of an "."
867		 *     followed by the class name (one or more words and hyphens)
868		 *     where multiple classes can be chained (i.e. ".foo.bar.baz")
869		 *
870		 * [4] - attributes
871		 *     ((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?
872		 *     Optionally matches the attributes list
873		 *
874		 * [5] - separator
875		 *     ([\/, >+~]+)
876		 *     Matches the selector list separator
877		 */
878		// phpcs:ignore Generic.Files.LineLength
879		$pattern = "/([\w:\*-]*)(?:\#([\w-]+))?(?:|\.([\w\.-]+))?((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?([\/, >+~]+)/is";
880
881		preg_match_all(
882			$pattern,
883			trim($selector_string) . ' ', // Add final ' ' as pseudo separator
884			$matches,
885			PREG_SET_ORDER
886		);
887
888		if (is_object($debug_object)) {
889			$debug_object->debug_log(2, 'Matches Array: ', $matches);
890		}
891
892		$selectors = array();
893		$result = array();
894
895		foreach ($matches as $m) {
896			$m[0] = trim($m[0]);
897
898			// Skip NoOps
899			if ($m[0] === '' || $m[0] === '/' || $m[0] === '//') { continue; }
900
901			// Convert to lowercase
902			if ($this->dom->lowercase) {
903				$m[1] = strtolower($m[1]);
904			}
905
906			// Extract classes
907			if ($m[3] !== '') { $m[3] = explode('.', $m[3]); }
908
909			/* Extract attributes (pattern based on the pattern above!)
910
911			 * [0] - full match
912			 * [1] - attribute name
913			 * [2] - attribute expression
914			 * [3] - attribute value
915			 * [4] - case sensitivity
916			 *
917			 * Note: Attributes can be negated with a "!" prefix to their name
918			 */
919			if($m[4] !== '') {
920				preg_match_all(
921					"/\[@?(!?[\w:-]+)(?:([!*^$|~]?=)[\"']?(.*?)[\"']?)?(?:\s+?([iIsS])?)?\]/is",
922					trim($m[4]),
923					$attributes,
924					PREG_SET_ORDER
925				);
926
927				// Replace element by array
928				$m[4] = array();
929
930				foreach($attributes as $att) {
931					// Skip empty matches
932					if(trim($att[0]) === '') { continue; }
933
934					$inverted = (isset($att[1][0]) && $att[1][0] === '!');
935					$m[4][] = array(
936						$inverted ? substr($att[1], 1) : $att[1], // Name
937						(isset($att[2])) ? $att[2] : '', // Expression
938						(isset($att[3])) ? $att[3] : '', // Value
939						$inverted, // Inverted Flag
940						(isset($att[4])) ? strtolower($att[4]) : '', // Case-Sensitivity
941					);
942				}
943			}
944
945			// Sanitize Separator
946			if ($m[5] !== '' && trim($m[5]) === '') { // Descendant Separator
947				$m[5] = ' ';
948			} else { // Other Separator
949				$m[5] = trim($m[5]);
950			}
951
952			// Clear Separator if it's a Selector List
953			if ($is_list = ($m[5] === ',')) { $m[5] = ''; }
954
955			// Remove full match before adding to results
956			array_shift($m);
957			$result[] = $m;
958
959			if ($is_list) { // Selector List
960				$selectors[] = $result;
961				$result = array();
962			}
963		}
964
965		if (count($result) > 0) { $selectors[] = $result; }
966		return $selectors;
967	}
968
969	function __get($name)
970	{
971		if (isset($this->attr[$name])) {
972			return $this->convert_text($this->attr[$name]);
973		}
974		switch ($name) {
975			case 'outertext': return $this->outertext();
976			case 'innertext': return $this->innertext();
977			case 'plaintext': return $this->text();
978			case 'xmltext': return $this->xmltext();
979			default: return array_key_exists($name, $this->attr);
980		}
981	}
982
983	function __set($name, $value)
984	{
985		global $debug_object;
986		if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
987
988		switch ($name) {
989			case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value;
990			case 'innertext':
991				if (isset($this->_[HDOM_INFO_TEXT])) {
992					return $this->_[HDOM_INFO_TEXT] = $value;
993				}
994				return $this->_[HDOM_INFO_INNER] = $value;
995		}
996
997		if (!isset($this->attr[$name])) {
998			$this->_[HDOM_INFO_SPACE][] = array(' ', '', '');
999			$this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE;
1000		}
1001
1002		$this->attr[$name] = $value;
1003	}
1004
1005	function __isset($name)
1006	{
1007		switch ($name) {
1008			case 'outertext': return true;
1009			case 'innertext': return true;
1010			case 'plaintext': return true;
1011		}
1012		//no value attr: nowrap, checked selected...
1013		return (array_key_exists($name, $this->attr)) ? true : isset($this->attr[$name]);
1014	}
1015
1016	function __unset($name)
1017	{
1018		if (isset($this->attr[$name])) { unset($this->attr[$name]); }
1019	}
1020
1021	function convert_text($text)
1022	{
1023		global $debug_object;
1024		if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
1025
1026		$converted_text = $text;
1027
1028		$sourceCharset = '';
1029		$targetCharset = '';
1030
1031		if ($this->dom) {
1032			$sourceCharset = strtoupper($this->dom->_charset);
1033			$targetCharset = strtoupper($this->dom->_target_charset);
1034		}
1035
1036		if (is_object($debug_object)) {
1037			$debug_object->debug_log(3,
1038				'source charset: '
1039				. $sourceCharset
1040				. ' target charaset: '
1041				. $targetCharset
1042			);
1043		}
1044
1045		if (!empty($sourceCharset)
1046			&& !empty($targetCharset)
1047			&& (strcasecmp($sourceCharset, $targetCharset) != 0)) {
1048			// Check if the reported encoding could have been incorrect and the text is actually already UTF-8
1049			if ((strcasecmp($targetCharset, 'UTF-8') == 0)
1050				&& ($this->is_utf8($text))) {
1051				$converted_text = $text;
1052			} else {
1053				$converted_text = iconv($sourceCharset, $targetCharset, $text);
1054			}
1055		}
1056
1057		// Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output.
1058		if ($targetCharset === 'UTF-8') {
1059			if (substr($converted_text, 0, 3) === "\xef\xbb\xbf") {
1060				$converted_text = substr($converted_text, 3);
1061			}
1062
1063			if (substr($converted_text, -3) === "\xef\xbb\xbf") {
1064				$converted_text = substr($converted_text, 0, -3);
1065			}
1066		}
1067
1068		return $converted_text;
1069	}
1070
1071	static function is_utf8($str)
1072	{
1073		$c = 0; $b = 0;
1074		$bits = 0;
1075		$len = strlen($str);
1076		for($i = 0; $i < $len; $i++) {
1077			$c = ord($str[$i]);
1078			if($c > 128) {
1079				if(($c >= 254)) { return false; }
1080				elseif($c >= 252) { $bits = 6; }
1081				elseif($c >= 248) { $bits = 5; }
1082				elseif($c >= 240) { $bits = 4; }
1083				elseif($c >= 224) { $bits = 3; }
1084				elseif($c >= 192) { $bits = 2; }
1085				else { return false; }
1086				if(($i + $bits) > $len) { return false; }
1087				while($bits > 1) {
1088					$i++;
1089					$b = ord($str[$i]);
1090					if($b < 128 || $b > 191) { return false; }
1091					$bits--;
1092				}
1093			}
1094		}
1095		return true;
1096	}
1097
1098	function get_display_size()
1099	{
1100		global $debug_object;
1101
1102		$width = -1;
1103		$height = -1;
1104
1105		if ($this->tag !== 'img') {
1106			return false;
1107		}
1108
1109		// See if there is aheight or width attribute in the tag itself.
1110		if (isset($this->attr['width'])) {
1111			$width = $this->attr['width'];
1112		}
1113
1114		if (isset($this->attr['height'])) {
1115			$height = $this->attr['height'];
1116		}
1117
1118		// Now look for an inline style.
1119		if (isset($this->attr['style'])) {
1120			// Thanks to user gnarf from stackoverflow for this regular expression.
1121			$attributes = array();
1122
1123			preg_match_all(
1124				'/([\w-]+)\s*:\s*([^;]+)\s*;?/',
1125				$this->attr['style'],
1126				$matches,
1127				PREG_SET_ORDER
1128			);
1129
1130			foreach ($matches as $match) {
1131				$attributes[$match[1]] = $match[2];
1132			}
1133
1134			// If there is a width in the style attributes:
1135			if (isset($attributes['width']) && $width == -1) {
1136				// check that the last two characters are px (pixels)
1137				if (strtolower(substr($attributes['width'], -2)) === 'px') {
1138					$proposed_width = substr($attributes['width'], 0, -2);
1139					// Now make sure that it's an integer and not something stupid.
1140					if (filter_var($proposed_width, FILTER_VALIDATE_INT)) {
1141						$width = $proposed_width;
1142					}
1143				}
1144			}
1145
1146			// If there is a width in the style attributes:
1147			if (isset($attributes['height']) && $height == -1) {
1148				// check that the last two characters are px (pixels)
1149				if (strtolower(substr($attributes['height'], -2)) == 'px') {
1150					$proposed_height = substr($attributes['height'], 0, -2);
1151					// Now make sure that it's an integer and not something stupid.
1152					if (filter_var($proposed_height, FILTER_VALIDATE_INT)) {
1153						$height = $proposed_height;
1154					}
1155				}
1156			}
1157
1158		}
1159
1160		// Future enhancement:
1161		// Look in the tag to see if there is a class or id specified that has
1162		// a height or width attribute to it.
1163
1164		// Far future enhancement
1165		// Look at all the parent tags of this image to see if they specify a
1166		// class or id that has an img selector that specifies a height or width
1167		// Note that in this case, the class or id will have the img subselector
1168		// for it to apply to the image.
1169
1170		// ridiculously far future development
1171		// If the class or id is specified in a SEPARATE css file thats not on
1172		// the page, go get it and do what we were just doing for the ones on
1173		// the page.
1174
1175		$result = array(
1176			'height' => $height,
1177			'width' => $width
1178		);
1179
1180		return $result;
1181	}
1182
1183	function save($filepath = '')
1184	{
1185		$ret = $this->outertext();
1186
1187		if ($filepath !== '') {
1188			file_put_contents($filepath, $ret, LOCK_EX);
1189		}
1190
1191		return $ret;
1192	}
1193
1194	function addClass($class)
1195	{
1196		if (is_string($class)) {
1197			$class = explode(' ', $class);
1198		}
1199
1200		if (is_array($class)) {
1201			foreach($class as $c) {
1202				if (isset($this->class)) {
1203					if ($this->hasClass($c)) {
1204						continue;
1205					} else {
1206						$this->class .= ' ' . $c;
1207					}
1208				} else {
1209					$this->class = $c;
1210				}
1211			}
1212		} else {
1213			if (is_object($debug_object)) {
1214				$debug_object->debug_log(2, 'Invalid type: ', gettype($class));
1215			}
1216		}
1217	}
1218
1219	function hasClass($class)
1220	{
1221		if (is_string($class)) {
1222			if (isset($this->class)) {
1223				return in_array($class, explode(' ', $this->class), true);
1224			}
1225		} else {
1226			if (is_object($debug_object)) {
1227				$debug_object->debug_log(2, 'Invalid type: ', gettype($class));
1228			}
1229		}
1230
1231		return false;
1232	}
1233
1234	function removeClass($class = null)
1235	{
1236		if (!isset($this->class)) {
1237			return;
1238		}
1239
1240		if (is_null($class)) {
1241			$this->removeAttribute('class');
1242			return;
1243		}
1244
1245		if (is_string($class)) {
1246			$class = explode(' ', $class);
1247		}
1248
1249		if (is_array($class)) {
1250			$class = array_diff(explode(' ', $this->class), $class);
1251			if (empty($class)) {
1252				$this->removeAttribute('class');
1253			} else {
1254				$this->class = implode(' ', $class);
1255			}
1256		}
1257	}
1258
1259	function getAllAttributes()
1260	{
1261		return $this->attr;
1262	}
1263
1264	function getAttribute($name)
1265	{
1266		return $this->__get($name);
1267	}
1268
1269	function setAttribute($name, $value)
1270	{
1271		$this->__set($name, $value);
1272	}
1273
1274	function hasAttribute($name)
1275	{
1276		return $this->__isset($name);
1277	}
1278
1279	function removeAttribute($name)
1280	{
1281		$this->__set($name, null);
1282	}
1283
1284	function remove()
1285	{
1286		if ($this->parent) {
1287			$this->parent->removeChild($this);
1288		}
1289	}
1290
1291	function removeChild($node)
1292	{
1293		$nidx = array_search($node, $this->nodes, true);
1294		$cidx = array_search($node, $this->children, true);
1295		$didx = array_search($node, $this->dom->nodes, true);
1296
1297		if ($nidx !== false && $cidx !== false && $didx !== false) {
1298
1299			foreach($node->children as $child) {
1300				$node->removeChild($child);
1301			}
1302
1303			foreach($node->nodes as $entity) {
1304				$enidx = array_search($entity, $node->nodes, true);
1305				$edidx = array_search($entity, $node->dom->nodes, true);
1306
1307				if ($enidx !== false && $edidx !== false) {
1308					unset($node->nodes[$enidx]);
1309					unset($node->dom->nodes[$edidx]);
1310				}
1311			}
1312
1313			unset($this->nodes[$nidx]);
1314			unset($this->children[$cidx]);
1315			unset($this->dom->nodes[$didx]);
1316
1317			$node->clear();
1318
1319		}
1320	}
1321
1322	function getElementById($id)
1323	{
1324		return $this->find("#$id", 0);
1325	}
1326
1327	function getElementsById($id, $idx = null)
1328	{
1329		return $this->find("#$id", $idx);
1330	}
1331
1332	function getElementByTagName($name)
1333	{
1334		return $this->find($name, 0);
1335	}
1336
1337	function getElementsByTagName($name, $idx = null)
1338	{
1339		return $this->find($name, $idx);
1340	}
1341
1342	function parentNode()
1343	{
1344		return $this->parent();
1345	}
1346
1347	function childNodes($idx = -1)
1348	{
1349		return $this->children($idx);
1350	}
1351
1352	function firstChild()
1353	{
1354		return $this->first_child();
1355	}
1356
1357	function lastChild()
1358	{
1359		return $this->last_child();
1360	}
1361
1362	function nextSibling()
1363	{
1364		return $this->next_sibling();
1365	}
1366
1367	function previousSibling()
1368	{
1369		return $this->prev_sibling();
1370	}
1371
1372	function hasChildNodes()
1373	{
1374		return $this->has_child();
1375	}
1376
1377	function nodeName()
1378	{
1379		return $this->tag;
1380	}
1381
1382	function appendChild($node)
1383	{
1384		$node->parent($this);
1385		return $node;
1386	}
1387
1388}
1389
1390class simple_html_dom
1391{
1392	public $root = null;
1393	public $nodes = array();
1394	public $callback = null;
1395	public $lowercase = false;
1396	public $original_size;
1397    public $size;
1398
1399    public $stripRNAttrValues = true;       // added option to ignore RN in attr values - nomadjimbob
1400
1401	protected $pos;
1402	protected $doc;
1403	protected $char;
1404
1405	protected $cursor;
1406	protected $parent;
1407	protected $noise = array();
1408	protected $token_blank = " \t\r\n";
1409	protected $token_equal = ' =/>';
1410	protected $token_slash = " />\r\n\t";
1411	protected $token_attr = ' >';
1412
1413	public $_charset = '';
1414	public $_target_charset = '';
1415
1416	protected $default_br_text = '';
1417
1418	public $default_span_text = '';
1419
1420	protected $self_closing_tags = array(
1421		'area' => 1,
1422		'base' => 1,
1423		'br' => 1,
1424		'col' => 1,
1425		'embed' => 1,
1426		'hr' => 1,
1427		'img' => 1,
1428		'input' => 1,
1429		'link' => 1,
1430		'meta' => 1,
1431		'param' => 1,
1432		'source' => 1,
1433		'track' => 1,
1434		'wbr' => 1
1435	);
1436	protected $block_tags = array(
1437		'body' => 1,
1438		'div' => 1,
1439		'form' => 1,
1440		'root' => 1,
1441		'span' => 1,
1442		'table' => 1
1443	);
1444	protected $optional_closing_tags = array(
1445		// Not optional, see
1446		// https://www.w3.org/TR/html/textlevel-semantics.html#the-b-element
1447		'b' => array('b' => 1),
1448		'dd' => array('dd' => 1, 'dt' => 1),
1449		// Not optional, see
1450		// https://www.w3.org/TR/html/grouping-content.html#the-dl-element
1451		'dl' => array('dd' => 1, 'dt' => 1),
1452		'dt' => array('dd' => 1, 'dt' => 1),
1453		'li' => array('li' => 1),
1454		'optgroup' => array('optgroup' => 1, 'option' => 1),
1455		'option' => array('optgroup' => 1, 'option' => 1),
1456		'p' => array('p' => 1),
1457		'rp' => array('rp' => 1, 'rt' => 1),
1458		'rt' => array('rp' => 1, 'rt' => 1),
1459		'td' => array('td' => 1, 'th' => 1),
1460		'th' => array('td' => 1, 'th' => 1),
1461		'tr' => array('td' => 1, 'th' => 1, 'tr' => 1),
1462	);
1463
1464	function __construct(
1465		$str = null,
1466		$lowercase = true,
1467		$forceTagsClosed = true,
1468		$target_charset = DEFAULT_TARGET_CHARSET,
1469		$stripRN = true,
1470		$defaultBRText = DEFAULT_BR_TEXT,
1471		$defaultSpanText = DEFAULT_SPAN_TEXT,
1472		$options = 0)
1473	{
1474		if ($str) {
1475			if (preg_match('/^http:\/\//i', $str) || is_file($str)) {
1476				$this->load_file($str);
1477			} else {
1478				$this->load(
1479					$str,
1480					$lowercase,
1481					$stripRN,
1482					$defaultBRText,
1483					$defaultSpanText,
1484					$options
1485				);
1486			}
1487		}
1488		// Forcing tags to be closed implies that we don't trust the html, but
1489		// it can lead to parsing errors if we SHOULD trust the html.
1490		if (!$forceTagsClosed) {
1491			$this->optional_closing_array = array();
1492		}
1493
1494		$this->_target_charset = $target_charset;
1495	}
1496
1497	function __destruct()
1498	{
1499		$this->clear();
1500	}
1501
1502	function load(
1503		$str,
1504		$lowercase = true,
1505		$stripRN = true,
1506		$defaultBRText = DEFAULT_BR_TEXT,
1507		$defaultSpanText = DEFAULT_SPAN_TEXT,
1508		$options = 0)
1509	{
1510		global $debug_object;
1511
1512		// prepare
1513		$this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText);
1514
1515		// Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037
1516		// Script tags removal now preceeds style tag removal.
1517		// strip out <script> tags
1518		$this->remove_noise("'<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>'is");
1519		$this->remove_noise("'<\s*script\s*>(.*?)<\s*/\s*script\s*>'is");
1520
1521		// strip out the \r \n's if we are told to.
1522		if ($stripRN) {
1523			$this->doc = str_replace("\r", ' ', $this->doc);
1524			$this->doc = str_replace("\n", ' ', $this->doc);
1525
1526			// set the length of content since we have changed it.
1527			$this->size = strlen($this->doc);
1528		}
1529
1530		// strip out cdata
1531		$this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is", true);
1532		// strip out comments
1533		$this->remove_noise("'<!--(.*?)-->'is");
1534		// strip out <style> tags
1535		$this->remove_noise("'<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>'is");
1536		$this->remove_noise("'<\s*style\s*>(.*?)<\s*/\s*style\s*>'is");
1537		// strip out preformatted tags
1538		$this->remove_noise("'<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*>'is");
1539		// strip out server side scripts
1540		$this->remove_noise("'(<\?)(.*?)(\?>)'s", true);
1541
1542		if($options & HDOM_SMARTY_AS_TEXT) { // Strip Smarty scripts
1543			$this->remove_noise("'(\{\w)(.*?)(\})'s", true);
1544		}
1545
1546		// parsing
1547		$this->parse();
1548		// end
1549		$this->root->_[HDOM_INFO_END] = $this->cursor;
1550		$this->parse_charset();
1551
1552		// make load function chainable
1553		return $this;
1554	}
1555
1556	function load_file()
1557	{
1558		$args = func_get_args();
1559
1560		if(($doc = call_user_func_array('file_get_contents', $args)) !== false) {
1561			$this->load($doc, true);
1562		} else {
1563			return false;
1564		}
1565	}
1566
1567	function set_callback($function_name)
1568	{
1569		$this->callback = $function_name;
1570	}
1571
1572	function remove_callback()
1573	{
1574		$this->callback = null;
1575	}
1576
1577	function save($filepath = '')
1578	{
1579		$ret = $this->root->innertext();
1580		if ($filepath !== '') { file_put_contents($filepath, $ret, LOCK_EX); }
1581		return $ret;
1582	}
1583
1584	function find($selector, $idx = null, $lowercase = false)
1585	{
1586		return $this->root->find($selector, $idx, $lowercase);
1587	}
1588
1589	function clear()
1590	{
1591		if (isset($this->nodes)) {
1592			foreach ($this->nodes as $n) {
1593				$n->clear();
1594				$n = null;
1595			}
1596		}
1597
1598		// This add next line is documented in the sourceforge repository.
1599		// 2977248 as a fix for ongoing memory leaks that occur even with the
1600		// use of clear.
1601		if (isset($this->children)) {
1602			foreach ($this->children as $n) {
1603				$n->clear();
1604				$n = null;
1605			}
1606		}
1607
1608		if (isset($this->parent)) {
1609			$this->parent->clear();
1610			unset($this->parent);
1611		}
1612
1613		if (isset($this->root)) {
1614			$this->root->clear();
1615			unset($this->root);
1616		}
1617
1618		unset($this->doc);
1619		unset($this->noise);
1620	}
1621
1622	function dump($show_attr = true)
1623	{
1624		$this->root->dump($show_attr);
1625	}
1626
1627	protected function prepare(
1628		$str, $lowercase = true,
1629		$defaultBRText = DEFAULT_BR_TEXT,
1630		$defaultSpanText = DEFAULT_SPAN_TEXT)
1631	{
1632		$this->clear();
1633
1634		$this->doc = trim($str);
1635		$this->size = strlen($this->doc);
1636		$this->original_size = $this->size; // original size of the html
1637		$this->pos = 0;
1638		$this->cursor = 1;
1639		$this->noise = array();
1640		$this->nodes = array();
1641		$this->lowercase = $lowercase;
1642		$this->default_br_text = $defaultBRText;
1643		$this->default_span_text = $defaultSpanText;
1644		$this->root = new simple_html_dom_node($this);
1645		$this->root->tag = 'root';
1646		$this->root->_[HDOM_INFO_BEGIN] = -1;
1647		$this->root->nodetype = HDOM_TYPE_ROOT;
1648		$this->parent = $this->root;
1649		if ($this->size > 0) { $this->char = $this->doc[0]; }
1650	}
1651
1652	protected function parse()
1653	{
1654		while (true) {
1655			// Read next tag if there is no text between current position and the
1656			// next opening tag.
1657			if (($s = $this->copy_until_char('<')) === '') {
1658				if($this->read_tag()) {
1659					continue;
1660				} else {
1661					return true;
1662				}
1663			}
1664
1665			// Add a text node for text between tags
1666			$node = new simple_html_dom_node($this);
1667			++$this->cursor;
1668			$node->_[HDOM_INFO_TEXT] = $s;
1669			$this->link_nodes($node, false);
1670		}
1671	}
1672
1673	protected function parse_charset()
1674	{
1675		global $debug_object;
1676
1677		$charset = null;
1678
1679		if (function_exists('get_last_retrieve_url_contents_content_type')) {
1680			$contentTypeHeader = get_last_retrieve_url_contents_content_type();
1681			$success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches);
1682			if ($success) {
1683				$charset = $matches[1];
1684				if (is_object($debug_object)) {
1685					$debug_object->debug_log(2,
1686						'header content-type found charset of: '
1687						. $charset
1688					);
1689				}
1690			}
1691		}
1692
1693		if (empty($charset)) {
1694			// https://www.w3.org/TR/html/document-metadata.html#statedef-http-equiv-content-type
1695			$el = $this->root->find('meta[http-equiv=Content-Type]', 0, true);
1696
1697			if (!empty($el)) {
1698				$fullvalue = $el->content;
1699				if (is_object($debug_object)) {
1700					$debug_object->debug_log(2,
1701						'meta content-type tag found'
1702						. $fullvalue
1703					);
1704				}
1705
1706				if (!empty($fullvalue)) {
1707					$success = preg_match(
1708						'/charset=(.+)/i',
1709						$fullvalue,
1710						$matches
1711					);
1712
1713					if ($success) {
1714						$charset = $matches[1];
1715					} else {
1716						// If there is a meta tag, and they don't specify the
1717						// character set, research says that it's typically
1718						// ISO-8859-1
1719						if (is_object($debug_object)) {
1720							$debug_object->debug_log(2,
1721								'meta content-type tag couldn\'t be parsed. using iso-8859 default.'
1722							);
1723						}
1724
1725						$charset = 'ISO-8859-1';
1726					}
1727				}
1728			}
1729		}
1730
1731		if (empty($charset)) {
1732			// https://www.w3.org/TR/html/document-metadata.html#character-encoding-declaration
1733			if ($meta = $this->root->find('meta[charset]', 0)) {
1734				$charset = $meta->charset;
1735				if (is_object($debug_object)) {
1736					$debug_object->debug_log(2, 'meta charset: ' . $charset);
1737				}
1738			}
1739		}
1740
1741		if (empty($charset)) {
1742			// Try to guess the charset based on the content
1743			// Requires Multibyte String (mbstring) support (optional)
1744			if (function_exists('mb_detect_encoding')) {
1745				/**
1746				 * mb_detect_encoding() is not intended to distinguish between
1747				 * charsets, especially single-byte charsets. Its primary
1748				 * purpose is to detect which multibyte encoding is in use,
1749				 * i.e. UTF-8, UTF-16, shift-JIS, etc.
1750				 *
1751				 * -- https://bugs.php.net/bug.php?id=38138
1752				 *
1753				 * Adding both CP1251/ISO-8859-5 and CP1252/ISO-8859-1 will
1754				 * always result in CP1251/ISO-8859-5 and vice versa.
1755				 *
1756				 * Thus, only detect if it's either UTF-8 or CP1252/ISO-8859-1
1757				 * to stay compatible.
1758				 */
1759				$encoding = mb_detect_encoding(
1760					$this->doc,
1761					array( 'UTF-8', 'CP1252', 'ISO-8859-1' )
1762				);
1763
1764				if ($encoding === 'CP1252' || $encoding === 'ISO-8859-1') {
1765					// Due to a limitation of mb_detect_encoding
1766					// 'CP1251'/'ISO-8859-5' will be detected as
1767					// 'CP1252'/'ISO-8859-1'. This will cause iconv to fail, in
1768					// which case we can simply assume it is the other charset.
1769					if (!@iconv('CP1252', 'UTF-8', $this->doc)) {
1770						$encoding = 'CP1251';
1771					}
1772				}
1773
1774				if ($encoding !== false) {
1775					$charset = $encoding;
1776					if (is_object($debug_object)) {
1777						$debug_object->debug_log(2, 'mb_detect: ' . $charset);
1778					}
1779				}
1780			}
1781		}
1782
1783		if (empty($charset)) {
1784			// Assume it's UTF-8 as it is the most likely charset to be used
1785			$charset = 'UTF-8';
1786			if (is_object($debug_object)) {
1787				$debug_object->debug_log(2, 'No match found, assume ' . $charset);
1788			}
1789		}
1790
1791		// Since CP1252 is a superset, if we get one of it's subsets, we want
1792		// it instead.
1793		if ((strtolower($charset) == 'iso-8859-1')
1794			|| (strtolower($charset) == 'latin1')
1795			|| (strtolower($charset) == 'latin-1')) {
1796			$charset = 'CP1252';
1797			if (is_object($debug_object)) {
1798				$debug_object->debug_log(2,
1799					'replacing ' . $charset . ' with CP1252 as its a superset'
1800				);
1801			}
1802		}
1803
1804		if (is_object($debug_object)) {
1805			$debug_object->debug_log(1, 'EXIT - ' . $charset);
1806		}
1807
1808		return $this->_charset = $charset;
1809	}
1810
1811	protected function read_tag()
1812	{
1813		// Set end position if no further tags found
1814		if ($this->char !== '<') {
1815			$this->root->_[HDOM_INFO_END] = $this->cursor;
1816			return false;
1817		}
1818
1819		$begin_tag_pos = $this->pos;
1820		$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1821
1822		// end tag
1823		if ($this->char === '/') {
1824			$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1825
1826			// Skip whitespace in end tags (i.e. in "</   html>")
1827			$this->skip($this->token_blank);
1828			$tag = $this->copy_until_char('>');
1829
1830			// Skip attributes in end tags
1831			if (($pos = strpos($tag, ' ')) !== false) {
1832				$tag = substr($tag, 0, $pos);
1833			}
1834
1835			$parent_lower = strtolower($this->parent->tag);
1836			$tag_lower = strtolower($tag);
1837
1838			// The end tag is supposed to close the parent tag. Handle situations
1839			// when it doesn't
1840			if ($parent_lower !== $tag_lower) {
1841				// Parent tag does not have to be closed necessarily (optional closing tag)
1842				// Current tag is a block tag, so it may close an ancestor
1843				if (isset($this->optional_closing_tags[$parent_lower])
1844					&& isset($this->block_tags[$tag_lower])) {
1845
1846					$this->parent->_[HDOM_INFO_END] = 0;
1847					$org_parent = $this->parent;
1848
1849					// Traverse ancestors to find a matching opening tag
1850					// Stop at root node
1851					while (($this->parent->parent)
1852						&& strtolower($this->parent->tag) !== $tag_lower
1853					){
1854						$this->parent = $this->parent->parent;
1855					}
1856
1857					// If we don't have a match add current tag as text node
1858					if (strtolower($this->parent->tag) !== $tag_lower) {
1859						$this->parent = $org_parent; // restore origonal parent
1860
1861						if ($this->parent->parent) {
1862							$this->parent = $this->parent->parent;
1863						}
1864
1865						$this->parent->_[HDOM_INFO_END] = $this->cursor;
1866						return $this->as_text_node($tag);
1867					}
1868				} elseif (($this->parent->parent)
1869					&& isset($this->block_tags[$tag_lower])
1870				) {
1871					// Grandparent exists and current tag is a block tag, so our
1872					// parent doesn't have an end tag
1873					$this->parent->_[HDOM_INFO_END] = 0; // No end tag
1874					$org_parent = $this->parent;
1875
1876					// Traverse ancestors to find a matching opening tag
1877					// Stop at root node
1878					while (($this->parent->parent)
1879						&& strtolower($this->parent->tag) !== $tag_lower
1880					) {
1881						$this->parent = $this->parent->parent;
1882					}
1883
1884					// If we don't have a match add current tag as text node
1885					if (strtolower($this->parent->tag) !== $tag_lower) {
1886						$this->parent = $org_parent; // restore origonal parent
1887						$this->parent->_[HDOM_INFO_END] = $this->cursor;
1888						return $this->as_text_node($tag);
1889					}
1890				} elseif (($this->parent->parent)
1891					&& strtolower($this->parent->parent->tag) === $tag_lower
1892				) { // Grandparent exists and current tag closes it
1893					$this->parent->_[HDOM_INFO_END] = 0;
1894					$this->parent = $this->parent->parent;
1895				} else { // Random tag, add as text node
1896					return $this->as_text_node($tag);
1897				}
1898			}
1899
1900			// Set end position of parent tag to current cursor position
1901			$this->parent->_[HDOM_INFO_END] = $this->cursor;
1902
1903			if ($this->parent->parent) {
1904				$this->parent = $this->parent->parent;
1905			}
1906
1907			$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1908			return true;
1909		}
1910
1911		// start tag
1912		$node = new simple_html_dom_node($this);
1913		$node->_[HDOM_INFO_BEGIN] = $this->cursor;
1914		++$this->cursor;
1915		$tag = $this->copy_until($this->token_slash); // Get tag name
1916		$node->tag_start = $begin_tag_pos;
1917
1918		// doctype, cdata & comments...
1919		// <!DOCTYPE html>
1920		// <![CDATA[ ... ]]>
1921		// <!-- Comment -->
1922		if (isset($tag[0]) && $tag[0] === '!') {
1923			$node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until_char('>');
1924
1925			if (isset($tag[2]) && $tag[1] === '-' && $tag[2] === '-') { // Comment ("<!--")
1926				$node->nodetype = HDOM_TYPE_COMMENT;
1927				$node->tag = 'comment';
1928			} else { // Could be doctype or CDATA but we don't care
1929				$node->nodetype = HDOM_TYPE_UNKNOWN;
1930				$node->tag = 'unknown';
1931			}
1932
1933			if ($this->char === '>') { $node->_[HDOM_INFO_TEXT] .= '>'; }
1934
1935			$this->link_nodes($node, true);
1936			$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1937			return true;
1938		}
1939
1940		// The start tag cannot contain another start tag, if so add as text
1941		// i.e. "<<html>"
1942		if ($pos = strpos($tag, '<') !== false) {
1943			$tag = '<' . substr($tag, 0, -1);
1944			$node->_[HDOM_INFO_TEXT] = $tag;
1945			$this->link_nodes($node, false);
1946			$this->char = $this->doc[--$this->pos]; // prev
1947			return true;
1948		}
1949
1950		// Handle invalid tag names (i.e. "<html#doc>")
1951		if (!preg_match('/^\w[\w:-]*$/', $tag)) {
1952			$node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>');
1953
1954			// Next char is the beginning of a new tag, don't touch it.
1955			if ($this->char === '<') {
1956				$this->link_nodes($node, false);
1957				return true;
1958			}
1959
1960			// Next char closes current tag, add and be done with it.
1961			if ($this->char === '>') { $node->_[HDOM_INFO_TEXT] .= '>'; }
1962			$this->link_nodes($node, false);
1963			$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1964			return true;
1965		}
1966
1967		// begin tag, add new node
1968		$node->nodetype = HDOM_TYPE_ELEMENT;
1969		$tag_lower = strtolower($tag);
1970		$node->tag = ($this->lowercase) ? $tag_lower : $tag;
1971
1972		// handle optional closing tags
1973		if (isset($this->optional_closing_tags[$tag_lower])) {
1974			// Traverse ancestors to close all optional closing tags
1975			while (isset($this->optional_closing_tags[$tag_lower][strtolower($this->parent->tag)])) {
1976				$this->parent->_[HDOM_INFO_END] = 0;
1977				$this->parent = $this->parent->parent;
1978			}
1979			$node->parent = $this->parent;
1980		}
1981
1982		$guard = 0; // prevent infinity loop
1983
1984		// [0] Space between tag and first attribute
1985		$space = array($this->copy_skip($this->token_blank), '', '');
1986
1987		// attributes
1988		do {
1989			// Everything until the first equal sign should be the attribute name
1990			$name = $this->copy_until($this->token_equal);
1991
1992			if ($name === '' && $this->char !== null && $space[0] === '') {
1993				break;
1994			}
1995
1996			if ($guard === $this->pos) { // Escape infinite loop
1997				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1998				continue;
1999			}
2000
2001			$guard = $this->pos;
2002
2003			// handle endless '<'
2004			// Out of bounds before the tag ended
2005			if ($this->pos >= $this->size - 1 && $this->char !== '>') {
2006				$node->nodetype = HDOM_TYPE_TEXT;
2007				$node->_[HDOM_INFO_END] = 0;
2008				$node->_[HDOM_INFO_TEXT] = '<' . $tag . $space[0] . $name;
2009				$node->tag = 'text';
2010				$this->link_nodes($node, false);
2011				return true;
2012			}
2013
2014			// handle mismatch '<'
2015			// Attributes cannot start after opening tag
2016			if ($this->doc[$this->pos - 1] == '<') {
2017				$node->nodetype = HDOM_TYPE_TEXT;
2018				$node->tag = 'text';
2019				$node->attr = array();
2020				$node->_[HDOM_INFO_END] = 0;
2021				$node->_[HDOM_INFO_TEXT] = substr(
2022					$this->doc,
2023					$begin_tag_pos,
2024					$this->pos - $begin_tag_pos - 1
2025				);
2026				$this->pos -= 2;
2027				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2028				$this->link_nodes($node, false);
2029				return true;
2030			}
2031
2032			if ($name !== '/' && $name !== '') { // this is a attribute name
2033				// [1] Whitespace after attribute name
2034				$space[1] = $this->copy_skip($this->token_blank);
2035
2036				$name = $this->restore_noise($name); // might be a noisy name
2037
2038				if ($this->lowercase) { $name = strtolower($name); }
2039
2040				if ($this->char === '=') { // attribute with value
2041					$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2042					$this->parse_attr($node, $name, $space); // get attribute value
2043				} else {
2044					//no value attr: nowrap, checked selected...
2045					$node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_NO;
2046					$node->attr[$name] = true;
2047					if ($this->char != '>') { $this->char = $this->doc[--$this->pos]; } // prev
2048				}
2049
2050				$node->_[HDOM_INFO_SPACE][] = $space;
2051
2052				// prepare for next attribute
2053				$space = array(
2054					$this->copy_skip($this->token_blank),
2055					'',
2056					''
2057				);
2058			} else { // no more attributes
2059				break;
2060			}
2061		} while ($this->char !== '>' && $this->char !== '/'); // go until the tag ended
2062
2063		$this->link_nodes($node, true);
2064		$node->_[HDOM_INFO_ENDSPACE] = $space[0];
2065
2066		// handle empty tags (i.e. "<div/>")
2067		if ($this->copy_until_char('>') === '/') {
2068			$node->_[HDOM_INFO_ENDSPACE] .= '/';
2069			$node->_[HDOM_INFO_END] = 0;
2070		} else {
2071			// reset parent
2072			if (!isset($this->self_closing_tags[strtolower($node->tag)])) {
2073				$this->parent = $node;
2074			}
2075		}
2076
2077		$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2078
2079		// If it's a BR tag, we need to set it's text to the default text.
2080		// This way when we see it in plaintext, we can generate formatting that the user wants.
2081		// since a br tag never has sub nodes, this works well.
2082		if ($node->tag === 'br') {
2083			$node->_[HDOM_INFO_INNER] = $this->default_br_text;
2084		}
2085
2086		return true;
2087	}
2088
2089	protected function parse_attr($node, $name, &$space)
2090	{
2091		$is_duplicate = isset($node->attr[$name]);
2092
2093		if (!$is_duplicate) // Copy whitespace between "=" and value
2094			$space[2] = $this->copy_skip($this->token_blank);
2095
2096		switch ($this->char) {
2097			case '"':
2098				$quote_type = HDOM_QUOTE_DOUBLE;
2099				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2100				$value = $this->copy_until_char('"');
2101				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2102				break;
2103			case '\'':
2104				$quote_type = HDOM_QUOTE_SINGLE;
2105				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2106				$value = $this->copy_until_char('\'');
2107				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2108				break;
2109			default:
2110				$quote_type = HDOM_QUOTE_NO;
2111				$value = $this->copy_until($this->token_attr);
2112		}
2113
2114		$value = $this->restore_noise($value);
2115
2116		// PaperG: Attributes should not have \r or \n in them, that counts as
2117        // html whitespace.
2118
2119        // Added $stripRNAttrValues option for DokuWiki - nomadjimbob
2120        if($this->stripRNAttrValues) {
2121            $value = str_replace("\r", '', $value);
2122            $value = str_replace("\n", '', $value);
2123        }
2124
2125		// PaperG: If this is a "class" selector, lets get rid of the preceeding
2126		// and trailing space since some people leave it in the multi class case.
2127		if ($name === 'class') {
2128			$value = trim($value);
2129		}
2130
2131		if (!$is_duplicate) {
2132			$node->_[HDOM_INFO_QUOTE][] = $quote_type;
2133			$node->attr[$name] = $value;
2134		}
2135	}
2136
2137	protected function link_nodes(&$node, $is_child)
2138	{
2139		$node->parent = $this->parent;
2140		$this->parent->nodes[] = $node;
2141		if ($is_child) {
2142			$this->parent->children[] = $node;
2143		}
2144	}
2145
2146	protected function as_text_node($tag)
2147	{
2148		$node = new simple_html_dom_node($this);
2149		++$this->cursor;
2150		$node->_[HDOM_INFO_TEXT] = '</' . $tag . '>';
2151		$this->link_nodes($node, false);
2152		$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2153		return true;
2154	}
2155
2156	protected function skip($chars)
2157	{
2158		$this->pos += strspn($this->doc, $chars, $this->pos);
2159		$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2160	}
2161
2162	protected function copy_skip($chars)
2163	{
2164		$pos = $this->pos;
2165		$len = strspn($this->doc, $chars, $pos);
2166		$this->pos += $len;
2167		$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2168		if ($len === 0) { return ''; }
2169		return substr($this->doc, $pos, $len);
2170	}
2171
2172	protected function copy_until($chars)
2173	{
2174		$pos = $this->pos;
2175		$len = strcspn($this->doc, $chars, $pos);
2176		$this->pos += $len;
2177		$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2178		return substr($this->doc, $pos, $len);
2179	}
2180
2181	protected function copy_until_char($char)
2182	{
2183		if ($this->char === null) { return ''; }
2184
2185		if (($pos = strpos($this->doc, $char, $this->pos)) === false) {
2186			$ret = substr($this->doc, $this->pos, $this->size - $this->pos);
2187			$this->char = null;
2188			$this->pos = $this->size;
2189			return $ret;
2190		}
2191
2192		if ($pos === $this->pos) { return ''; }
2193
2194		$pos_old = $this->pos;
2195		$this->char = $this->doc[$pos];
2196		$this->pos = $pos;
2197		return substr($this->doc, $pos_old, $pos - $pos_old);
2198	}
2199
2200	protected function remove_noise($pattern, $remove_tag = false)
2201	{
2202		global $debug_object;
2203		if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
2204
2205		$count = preg_match_all(
2206			$pattern,
2207			$this->doc,
2208			$matches,
2209			PREG_SET_ORDER | PREG_OFFSET_CAPTURE
2210		);
2211
2212		for ($i = $count - 1; $i > -1; --$i) {
2213			$key = '___noise___' . sprintf('% 5d', count($this->noise) + 1000);
2214
2215			if (is_object($debug_object)) {
2216				$debug_object->debug_log(2, 'key is: ' . $key);
2217			}
2218
2219			$idx = ($remove_tag) ? 0 : 1; // 0 = entire match, 1 = submatch
2220			$this->noise[$key] = $matches[$i][$idx][0];
2221			$this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
2222		}
2223
2224		// reset the length of content
2225		$this->size = strlen($this->doc);
2226
2227		if ($this->size > 0) {
2228			$this->char = $this->doc[0];
2229		}
2230	}
2231
2232	function restore_noise($text)
2233	{
2234		global $debug_object;
2235		if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
2236
2237		while (($pos = strpos($text, '___noise___')) !== false) {
2238			// Sometimes there is a broken piece of markup, and we don't GET the
2239			// pos+11 etc... token which indicates a problem outside of us...
2240
2241			// todo: "___noise___1000" (or any number with four or more digits)
2242			// in the DOM causes an infinite loop which could be utilized by
2243			// malicious software
2244			if (strlen($text) > $pos + 15) {
2245				$key = '___noise___'
2246				. $text[$pos + 11]
2247				. $text[$pos + 12]
2248				. $text[$pos + 13]
2249				. $text[$pos + 14]
2250				. $text[$pos + 15];
2251
2252				if (is_object($debug_object)) {
2253					$debug_object->debug_log(2, 'located key of: ' . $key);
2254				}
2255
2256				if (isset($this->noise[$key])) {
2257					$text = substr($text, 0, $pos)
2258					. $this->noise[$key]
2259					. substr($text, $pos + 16);
2260				} else {
2261					// do this to prevent an infinite loop.
2262					$text = substr($text, 0, $pos)
2263					. 'UNDEFINED NOISE FOR KEY: '
2264					. $key
2265					. substr($text, $pos + 16);
2266				}
2267			} else {
2268				// There is no valid key being given back to us... We must get
2269				// rid of the ___noise___ or we will have a problem.
2270				$text = substr($text, 0, $pos)
2271				. 'NO NUMERIC NOISE KEY'
2272				. substr($text, $pos + 11);
2273			}
2274		}
2275		return $text;
2276	}
2277
2278	function search_noise($text)
2279	{
2280		global $debug_object;
2281		if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
2282
2283		foreach($this->noise as $noiseElement) {
2284			if (strpos($noiseElement, $text) !== false) {
2285				return $noiseElement;
2286			}
2287		}
2288	}
2289
2290	function __toString()
2291	{
2292		return $this->root->innertext();
2293	}
2294
2295	function __get($name)
2296	{
2297		switch ($name) {
2298			case 'outertext':
2299				return $this->root->innertext();
2300			case 'innertext':
2301				return $this->root->innertext();
2302			case 'plaintext':
2303				return $this->root->text();
2304			case 'charset':
2305				return $this->_charset;
2306			case 'target_charset':
2307				return $this->_target_charset;
2308		}
2309	}
2310
2311	function childNodes($idx = -1)
2312	{
2313		return $this->root->childNodes($idx);
2314	}
2315
2316	function firstChild()
2317	{
2318		return $this->root->first_child();
2319	}
2320
2321	function lastChild()
2322	{
2323		return $this->root->last_child();
2324	}
2325
2326	function createElement($name, $value = null)
2327	{
2328		return @str_get_html("<$name>$value</$name>")->firstChild();
2329	}
2330
2331	function createTextNode($value)
2332	{
2333		return @end(str_get_html($value)->nodes);
2334	}
2335
2336	function getElementById($id)
2337	{
2338		return $this->find("#$id", 0);
2339	}
2340
2341	function getElementsById($id, $idx = null)
2342	{
2343		return $this->find("#$id", $idx);
2344	}
2345
2346	function getElementByTagName($name)
2347	{
2348		return $this->find($name, 0);
2349	}
2350
2351	function getElementsByTagName($name, $idx = -1)
2352	{
2353		return $this->find($name, $idx);
2354	}
2355
2356	function loadFile()
2357	{
2358		$args = func_get_args();
2359		$this->load_file($args);
2360	}
2361}
2362