xref: /template/mikio/inc/simple_html_dom.php (revision ec2e984b5015a4fd7032a4233b00b3177e20bb1f)
1<?php
2/**
3 * Website: http://sourceforge.net/projects/simplehtmldom/
4 * Additional projects: http://sourceforge.net/projects/debugobject/
5 * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
6 *
7 * Licensed under The MIT License
8 * See the LICENSE file in the project root for more information.
9 *
10 * Authors:
11 *   S.C. Chen
12 *   John Schlick
13 *   Rus Carroll
14 *   logmanoriginal
15 *
16 * Contributors:
17 *   Yousuke Kumakura
18 *   Vadim Voituk
19 *   Antcs
20 *
21 * Version Rev. 1.9.1 (291)
22 *
23 * THIS LIBRARY HAS BEEN MODIFIED BY NOMADJIMBOB - james.collins@outlook.com.au
24 * Lines 2116 - stripping of \r\n from attributes has been disabled
25 */
26
27define('HDOM_TYPE_ELEMENT', 1);
28define('HDOM_TYPE_COMMENT', 2);
29define('HDOM_TYPE_TEXT', 3);
30define('HDOM_TYPE_ENDTAG', 4);
31define('HDOM_TYPE_ROOT', 5);
32define('HDOM_TYPE_UNKNOWN', 6);
33define('HDOM_QUOTE_DOUBLE', 0);
34define('HDOM_QUOTE_SINGLE', 1);
35define('HDOM_QUOTE_NO', 3);
36define('HDOM_INFO_BEGIN', 0);
37define('HDOM_INFO_END', 1);
38define('HDOM_INFO_QUOTE', 2);
39define('HDOM_INFO_SPACE', 3);
40define('HDOM_INFO_TEXT', 4);
41define('HDOM_INFO_INNER', 5);
42define('HDOM_INFO_OUTER', 6);
43define('HDOM_INFO_ENDSPACE', 7);
44
45defined('DEFAULT_TARGET_CHARSET') || define('DEFAULT_TARGET_CHARSET', 'UTF-8');
46defined('DEFAULT_BR_TEXT') || define('DEFAULT_BR_TEXT', "\r\n");
47defined('DEFAULT_SPAN_TEXT') || define('DEFAULT_SPAN_TEXT', ' ');
48defined('MAX_FILE_SIZE') || define('MAX_FILE_SIZE', 600000);
49define('HDOM_SMARTY_AS_TEXT', 1);
50
51function file_get_html(
52	$url,
53	$use_include_path = false,
54	$context = null,
55	$offset = 0,
56	$maxLen = -1,
57	$lowercase = true,
58	$forceTagsClosed = true,
59	$target_charset = DEFAULT_TARGET_CHARSET,
60	$stripRN = true,
61	$defaultBRText = DEFAULT_BR_TEXT,
62	$defaultSpanText = DEFAULT_SPAN_TEXT)
63{
64	if($maxLen <= 0) { $maxLen = MAX_FILE_SIZE; }
65
66	$dom = new simple_html_dom(
67		null,
68		$lowercase,
69		$forceTagsClosed,
70		$target_charset,
71		$stripRN,
72		$defaultBRText,
73		$defaultSpanText
74	);
75
76	/**
77	 * For sourceforge users: uncomment the next line and comment the
78	 * retrieve_url_contents line 2 lines down if it is not already done.
79	 */
80	$contents = file_get_contents(
81		$url,
82		$use_include_path,
83		$context,
84		$offset,
85		$maxLen
86	);
87	// $contents = retrieve_url_contents($url);
88
89	if (empty($contents) || strlen($contents) > $maxLen) {
90		$dom->clear();
91		return false;
92	}
93
94	return $dom->load($contents, $lowercase, $stripRN);
95}
96
97function str_get_html(
98	$str,
99	$lowercase = true,
100	$forceTagsClosed = true,
101	$target_charset = DEFAULT_TARGET_CHARSET,
102	$stripRN = true,
103	$defaultBRText = DEFAULT_BR_TEXT,
104	$defaultSpanText = DEFAULT_SPAN_TEXT)
105{
106	$dom = new simple_html_dom(
107		null,
108		$lowercase,
109		$forceTagsClosed,
110		$target_charset,
111		$stripRN,
112		$defaultBRText,
113		$defaultSpanText
114	);
115
116	if (empty($str) || strlen($str) > MAX_FILE_SIZE) {
117		$dom->clear();
118		return false;
119	}
120
121	return $dom->load($str, $lowercase, $stripRN);
122}
123
124function dump_html_tree($node, $show_attr = true, $deep = 0)
125{
126	$node->dump($node);
127}
128
129class simple_html_dom_node
130{
131	public $nodetype = HDOM_TYPE_TEXT;
132	public $tag = 'text';
133	public $attr = array();
134	public $children = array();
135	public $nodes = array();
136	public $parent = null;
137	public $_ = array();
138	public $tag_start = 0;
139	private $dom = null;
140
141	function __construct($dom)
142	{
143		$this->dom = $dom;
144		$dom->nodes[] = $this;
145	}
146
147	function __destruct()
148	{
149		$this->clear();
150	}
151
152	function __toString()
153	{
154		return $this->outertext();
155	}
156
157	function clear()
158	{
159		$this->dom = null;
160		$this->nodes = null;
161		$this->parent = null;
162		$this->children = null;
163	}
164
165	function dump($show_attr = true, $depth = 0)
166	{
167		echo str_repeat("\t", $depth) . $this->tag;
168
169		if ($show_attr && count($this->attr) > 0) {
170			echo '(';
171			foreach ($this->attr as $k => $v) {
172				echo "[$k]=>\"$v\", ";
173			}
174			echo ')';
175		}
176
177		echo "\n";
178
179		if ($this->nodes) {
180			foreach ($this->nodes as $node) {
181				$node->dump($show_attr, $depth + 1);
182			}
183		}
184	}
185
186	function dump_node($echo = true)
187	{
188		$string = $this->tag;
189
190		if (count($this->attr) > 0) {
191			$string .= '(';
192			foreach ($this->attr as $k => $v) {
193				$string .= "[$k]=>\"$v\", ";
194			}
195			$string .= ')';
196		}
197
198		if (count($this->_) > 0) {
199			$string .= ' $_ (';
200			foreach ($this->_ as $k => $v) {
201				if (is_array($v)) {
202					$string .= "[$k]=>(";
203					foreach ($v as $k2 => $v2) {
204						$string .= "[$k2]=>\"$v2\", ";
205					}
206					$string .= ')';
207				} else {
208					$string .= "[$k]=>\"$v\", ";
209				}
210			}
211			$string .= ')';
212		}
213
214		if (isset($this->text)) {
215			$string .= " text: ({$this->text})";
216		}
217
218		$string .= ' HDOM_INNER_INFO: ';
219
220		if (isset($node->_[HDOM_INFO_INNER])) {
221			$string .= "'" . $node->_[HDOM_INFO_INNER] . "'";
222		} else {
223			$string .= ' NULL ';
224		}
225
226		$string .= ' children: ' . count($this->children);
227		$string .= ' nodes: ' . count($this->nodes);
228		$string .= ' tag_start: ' . $this->tag_start;
229		$string .= "\n";
230
231		if ($echo) {
232			echo $string;
233			return;
234		} else {
235			return $string;
236		}
237	}
238
239	function parent($parent = null)
240	{
241		// I am SURE that this doesn't work properly.
242		// It fails to unset the current node from it's current parents nodes or
243		// children list first.
244		if ($parent !== null) {
245			$this->parent = $parent;
246			$this->parent->nodes[] = $this;
247			$this->parent->children[] = $this;
248		}
249
250		return $this->parent;
251	}
252
253	function has_child()
254	{
255		return !empty($this->children);
256	}
257
258	function children($idx = -1)
259	{
260		if ($idx === -1) {
261			return $this->children;
262		}
263
264		if (isset($this->children[$idx])) {
265			return $this->children[$idx];
266		}
267
268		return null;
269	}
270
271	function first_child()
272	{
273		if (count($this->children) > 0) {
274			return $this->children[0];
275		}
276		return null;
277	}
278
279	function last_child()
280	{
281		if (count($this->children) > 0) {
282			return end($this->children);
283		}
284		return null;
285	}
286
287	function next_sibling()
288	{
289		if ($this->parent === null) {
290			return null;
291		}
292
293		$idx = array_search($this, $this->parent->children, true);
294
295		if ($idx !== false && isset($this->parent->children[$idx + 1])) {
296			return $this->parent->children[$idx + 1];
297		}
298
299		return null;
300	}
301
302	function prev_sibling()
303	{
304		if ($this->parent === null) {
305			return null;
306		}
307
308		$idx = array_search($this, $this->parent->children, true);
309
310		if ($idx !== false && $idx > 0) {
311			return $this->parent->children[$idx - 1];
312		}
313
314		return null;
315	}
316
317	function find_ancestor_tag($tag)
318	{
319		global $debug_object;
320		if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
321
322		if ($this->parent === null) {
323			return null;
324		}
325
326		$ancestor = $this->parent;
327
328		while (!is_null($ancestor)) {
329			if (is_object($debug_object)) {
330				$debug_object->debug_log(2, 'Current tag is: ' . $ancestor->tag);
331			}
332
333			if ($ancestor->tag === $tag) {
334				break;
335			}
336
337			$ancestor = $ancestor->parent;
338		}
339
340		return $ancestor;
341	}
342
343	function innertext()
344	{
345		if (isset($this->_[HDOM_INFO_INNER])) {
346			return $this->_[HDOM_INFO_INNER];
347		}
348
349		if (isset($this->_[HDOM_INFO_TEXT])) {
350			return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
351		}
352
353		$ret = '';
354
355		foreach ($this->nodes as $n) {
356			$ret .= $n->outertext();
357		}
358
359		return $ret;
360	}
361
362	function outertext()
363	{
364		global $debug_object;
365
366		if (is_object($debug_object)) {
367			$text = '';
368
369			if ($this->tag === 'text') {
370				if (!empty($this->text)) {
371					$text = ' with text: ' . $this->text;
372				}
373			}
374
375			$debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text);
376		}
377
378		if ($this->tag === 'root') {
379			return $this->innertext();
380		}
381
382		// todo: What is the use of this callback? Remove?
383		if ($this->dom && $this->dom->callback !== null) {
384			call_user_func_array($this->dom->callback, array($this));
385		}
386
387		if (isset($this->_[HDOM_INFO_OUTER])) {
388			return $this->_[HDOM_INFO_OUTER];
389		}
390
391		if (isset($this->_[HDOM_INFO_TEXT])) {
392			return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
393		}
394
395		$ret = '';
396
397		if ($this->dom && $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]) {
398			$ret = $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]->makeup();
399		}
400
401		if (isset($this->_[HDOM_INFO_INNER])) {
402			// todo: <br> should either never have HDOM_INFO_INNER or always
403			if ($this->tag !== 'br') {
404				$ret .= $this->_[HDOM_INFO_INNER];
405			}
406		} elseif ($this->nodes) {
407			foreach ($this->nodes as $n) {
408				$ret .= $this->convert_text($n->outertext());
409			}
410		}
411
412		if (isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END] != 0) {
413			$ret .= '</' . $this->tag . '>';
414		}
415
416		return $ret;
417	}
418
419	function text()
420	{
421		if (isset($this->_[HDOM_INFO_INNER])) {
422			return $this->_[HDOM_INFO_INNER];
423		}
424
425		switch ($this->nodetype) {
426			case HDOM_TYPE_TEXT: return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
427			case HDOM_TYPE_COMMENT: return '';
428			case HDOM_TYPE_UNKNOWN: return '';
429		}
430
431		if (strcasecmp($this->tag, 'script') === 0) { return ''; }
432		if (strcasecmp($this->tag, 'style') === 0) { return ''; }
433
434		$ret = '';
435
436		// In rare cases, (always node type 1 or HDOM_TYPE_ELEMENT - observed
437		// for some span tags, and some p tags) $this->nodes is set to NULL.
438		// NOTE: This indicates that there is a problem where it's set to NULL
439		// without a clear happening.
440		// WHY is this happening?
441		if (!is_null($this->nodes)) {
442			foreach ($this->nodes as $n) {
443				// Start paragraph after a blank line
444				if ($n->tag === 'p') {
445					$ret = trim($ret) . "\n\n";
446				}
447
448				$ret .= $this->convert_text($n->text());
449
450				// If this node is a span... add a space at the end of it so
451				// multiple spans don't run into each other.  This is plaintext
452				// after all.
453				if ($n->tag === 'span') {
454					$ret .= $this->dom->default_span_text;
455				}
456			}
457		}
458		return $ret;
459	}
460
461	function xmltext()
462	{
463		$ret = $this->innertext();
464		$ret = str_ireplace('<![CDATA[', '', $ret);
465		$ret = str_replace(']]>', '', $ret);
466		return $ret;
467	}
468
469	function makeup()
470	{
471		// text, comment, unknown
472		if (isset($this->_[HDOM_INFO_TEXT])) {
473			return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
474		}
475
476		$ret = '<' . $this->tag;
477		$i = -1;
478
479		foreach ($this->attr as $key => $val) {
480			++$i;
481
482			// skip removed attribute
483			if ($val === null || $val === false) { continue; }
484
485			$ret .= $this->_[HDOM_INFO_SPACE][$i][0];
486
487			//no value attr: nowrap, checked selected...
488			if ($val === true) {
489				$ret .= $key;
490			} else {
491				switch ($this->_[HDOM_INFO_QUOTE][$i])
492				{
493					case HDOM_QUOTE_DOUBLE: $quote = '"'; break;
494					case HDOM_QUOTE_SINGLE: $quote = '\''; break;
495					default: $quote = '';
496				}
497
498				$ret .= $key
499				. $this->_[HDOM_INFO_SPACE][$i][1]
500				. '='
501				. $this->_[HDOM_INFO_SPACE][$i][2]
502				. $quote
503				. $val
504				. $quote;
505			}
506		}
507
508		$ret = $this->dom->restore_noise($ret);
509		return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>';
510	}
511
512	function find($selector, $idx = null, $lowercase = false)
513	{
514		$selectors = $this->parse_selector($selector);
515		if (($count = count($selectors)) === 0) { return array(); }
516		$found_keys = array();
517
518		// find each selector
519		for ($c = 0; $c < $count; ++$c) {
520			// The change on the below line was documented on the sourceforge
521			// code tracker id 2788009
522			// used to be: if (($levle=count($selectors[0]))===0) return array();
523			if (($levle = count($selectors[$c])) === 0) { return array(); }
524			if (!isset($this->_[HDOM_INFO_BEGIN])) { return array(); }
525
526			$head = array($this->_[HDOM_INFO_BEGIN] => 1);
527			$cmd = ' '; // Combinator
528
529			// handle descendant selectors, no recursive!
530			for ($l = 0; $l < $levle; ++$l) {
531				$ret = array();
532
533				foreach ($head as $k => $v) {
534					$n = ($k === -1) ? $this->dom->root : $this->dom->nodes[$k];
535					//PaperG - Pass this optional parameter on to the seek function.
536					$n->seek($selectors[$c][$l], $ret, $cmd, $lowercase);
537				}
538
539				$head = $ret;
540				$cmd = $selectors[$c][$l][4]; // Next Combinator
541			}
542
543			foreach ($head as $k => $v) {
544				if (!isset($found_keys[$k])) {
545					$found_keys[$k] = 1;
546				}
547			}
548		}
549
550		// sort keys
551		ksort($found_keys);
552
553		$found = array();
554		foreach ($found_keys as $k => $v) {
555			$found[] = $this->dom->nodes[$k];
556		}
557
558		// return nth-element or array
559		if (is_null($idx)) { return $found; }
560		elseif ($idx < 0) { $idx = count($found) + $idx; }
561		return (isset($found[$idx])) ? $found[$idx] : null;
562	}
563
564	protected function seek($selector, &$ret, $parent_cmd, $lowercase = false)
565	{
566		global $debug_object;
567		if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
568
569		list($tag, $id, $class, $attributes, $cmb) = $selector;
570		$nodes = array();
571
572		if ($parent_cmd === ' ') { // Descendant Combinator
573			// Find parent closing tag if the current element doesn't have a closing
574			// tag (i.e. void element)
575			$end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0;
576			if ($end == 0) {
577				$parent = $this->parent;
578				while (!isset($parent->_[HDOM_INFO_END]) && $parent !== null) {
579					$end -= 1;
580					$parent = $parent->parent;
581				}
582				$end += $parent->_[HDOM_INFO_END];
583			}
584
585			// Get list of target nodes
586			$nodes_start = $this->_[HDOM_INFO_BEGIN] + 1;
587			$nodes_count = $end - $nodes_start;
588			$nodes = array_slice($this->dom->nodes, $nodes_start, $nodes_count, true);
589		} elseif ($parent_cmd === '>') { // Child Combinator
590			$nodes = $this->children;
591		} elseif ($parent_cmd === '+'
592			&& $this->parent
593			&& in_array($this, $this->parent->children)) { // Next-Sibling Combinator
594				$index = array_search($this, $this->parent->children, true) + 1;
595				if ($index < count($this->parent->children))
596					$nodes[] = $this->parent->children[$index];
597		} elseif ($parent_cmd === '~'
598			&& $this->parent
599			&& in_array($this, $this->parent->children)) { // Subsequent Sibling Combinator
600				$index = array_search($this, $this->parent->children, true);
601				$nodes = array_slice($this->parent->children, $index);
602		}
603
604		// Go throgh each element starting at this element until the end tag
605		// Note: If this element is a void tag, any previous void element is
606		// skipped.
607		foreach($nodes as $node) {
608			$pass = true;
609
610			// Skip root nodes
611			if(!$node->parent) {
612				$pass = false;
613			}
614
615			// Handle 'text' selector
616			if($pass && $tag === 'text' && $node->tag === 'text') {
617				$ret[array_search($node, $this->dom->nodes, true)] = 1;
618				unset($node);
619				continue;
620			}
621
622			// Skip if node isn't a child node (i.e. text nodes)
623			if($pass && !in_array($node, $node->parent->children, true)) {
624				$pass = false;
625			}
626
627			// Skip if tag doesn't match
628			if ($pass && $tag !== '' && $tag !== $node->tag && $tag !== '*') {
629				$pass = false;
630			}
631
632			// Skip if ID doesn't exist
633			if ($pass && $id !== '' && !isset($node->attr['id'])) {
634				$pass = false;
635			}
636
637			// Check if ID matches
638			if ($pass && $id !== '' && isset($node->attr['id'])) {
639				// Note: Only consider the first ID (as browsers do)
640				$node_id = explode(' ', trim($node->attr['id']))[0];
641
642				if($id !== $node_id) { $pass = false; }
643			}
644
645			// Check if all class(es) exist
646			if ($pass && $class !== '' && is_array($class) && !empty($class)) {
647				if (isset($node->attr['class'])) {
648					$node_classes = explode(' ', $node->attr['class']);
649
650					if ($lowercase) {
651						$node_classes = array_map('strtolower', $node_classes);
652					}
653
654					foreach($class as $c) {
655						if(!in_array($c, $node_classes)) {
656							$pass = false;
657							break;
658						}
659					}
660				} else {
661					$pass = false;
662				}
663			}
664
665			// Check attributes
666			if ($pass
667				&& $attributes !== ''
668				&& is_array($attributes)
669				&& !empty($attributes)) {
670					foreach($attributes as $a) {
671						list (
672							$att_name,
673							$att_expr,
674							$att_val,
675							$att_inv,
676							$att_case_sensitivity
677						) = $a;
678
679						// Handle indexing attributes (i.e. "[2]")
680						/**
681						 * Note: This is not supported by the CSS Standard but adds
682						 * the ability to select items compatible to XPath (i.e.
683						 * the 3rd element within it's parent).
684						 *
685						 * Note: This doesn't conflict with the CSS Standard which
686						 * doesn't work on numeric attributes anyway.
687						 */
688						if (is_numeric($att_name)
689							&& $att_expr === ''
690							&& $att_val === '') {
691								$count = 0;
692
693								// Find index of current element in parent
694								foreach ($node->parent->children as $c) {
695									if ($c->tag === $node->tag) ++$count;
696									if ($c === $node) break;
697								}
698
699								// If this is the correct node, continue with next
700								// attribute
701								if ($count === (int)$att_name) continue;
702						}
703
704						// Check attribute availability
705						if ($att_inv) { // Attribute should NOT be set
706							if (isset($node->attr[$att_name])) {
707								$pass = false;
708								break;
709							}
710						} else { // Attribute should be set
711							// todo: "plaintext" is not a valid CSS selector!
712							if ($att_name !== 'plaintext'
713								&& !isset($node->attr[$att_name])) {
714									$pass = false;
715									break;
716							}
717						}
718
719						// Continue with next attribute if expression isn't defined
720						if ($att_expr === '') continue;
721
722						// If they have told us that this is a "plaintext"
723						// search then we want the plaintext of the node - right?
724						// todo "plaintext" is not a valid CSS selector!
725						if ($att_name === 'plaintext') {
726							$nodeKeyValue = $node->text();
727						} else {
728							$nodeKeyValue = $node->attr[$att_name];
729						}
730
731						if (is_object($debug_object)) {
732							$debug_object->debug_log(2,
733								'testing node: '
734								. $node->tag
735								. ' for attribute: '
736								. $att_name
737								. $att_expr
738								. $att_val
739								. ' where nodes value is: '
740								. $nodeKeyValue
741							);
742						}
743
744						// If lowercase is set, do a case insensitive test of
745						// the value of the selector.
746						if ($lowercase) {
747							$check = $this->match(
748								$att_expr,
749								strtolower($att_val),
750								strtolower($nodeKeyValue),
751								$att_case_sensitivity
752							);
753						} else {
754							$check = $this->match(
755								$att_expr,
756								$att_val,
757								$nodeKeyValue,
758								$att_case_sensitivity
759							);
760						}
761
762						if (is_object($debug_object)) {
763							$debug_object->debug_log(2,
764								'after match: '
765								. ($check ? 'true' : 'false')
766							);
767						}
768
769						if (!$check) {
770							$pass = false;
771							break;
772						}
773					}
774			}
775
776			// Found a match. Add to list and clear node
777			if ($pass) $ret[$node->_[HDOM_INFO_BEGIN]] = 1;
778			unset($node);
779		}
780		// It's passed by reference so this is actually what this function returns.
781		if (is_object($debug_object)) {
782			$debug_object->debug_log(1, 'EXIT - ret: ', $ret);
783		}
784	}
785
786	protected function match($exp, $pattern, $value, $case_sensitivity)
787	{
788		global $debug_object;
789		if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
790
791		if ($case_sensitivity === 'i') {
792			$pattern = strtolower($pattern);
793			$value = strtolower($value);
794		}
795
796		switch ($exp) {
797			case '=':
798				return ($value === $pattern);
799			case '!=':
800				return ($value !== $pattern);
801			case '^=':
802				return preg_match('/^' . preg_quote($pattern, '/') . '/', $value);
803			case '$=':
804				return preg_match('/' . preg_quote($pattern, '/') . '$/', $value);
805			case '*=':
806				return preg_match('/' . preg_quote($pattern, '/') . '/', $value);
807			case '|=':
808				/**
809				 * [att|=val]
810				 *
811				 * Represents an element with the att attribute, its value
812				 * either being exactly "val" or beginning with "val"
813				 * immediately followed by "-" (U+002D).
814				 */
815				return strpos($value, $pattern) === 0;
816			case '~=':
817				/**
818				 * [att~=val]
819				 *
820				 * Represents an element with the att attribute whose value is a
821				 * whitespace-separated list of words, one of which is exactly
822				 * "val". If "val" contains whitespace, it will never represent
823				 * anything (since the words are separated by spaces). Also if
824				 * "val" is the empty string, it will never represent anything.
825				 */
826				return in_array($pattern, explode(' ', trim($value)), true);
827		}
828		return false;
829	}
830
831	protected function parse_selector($selector_string)
832	{
833		global $debug_object;
834		if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
835
836		/**
837		 * Pattern of CSS selectors, modified from mootools (https://mootools.net/)
838		 *
839		 * Paperg: Add the colon to the attribute, so that it properly finds
840		 * <tag attr:ibute="something" > like google does.
841		 *
842		 * Note: if you try to look at this attribute, you MUST use getAttribute
843		 * since $dom->x:y will fail the php syntax check.
844		 *
845		 * Notice the \[ starting the attribute? and the @? following? This
846		 * implies that an attribute can begin with an @ sign that is not
847		 * captured. This implies that an html attribute specifier may start
848		 * with an @ sign that is NOT captured by the expression. Farther study
849		 * is required to determine of this should be documented or removed.
850		 *
851		 * Matches selectors in this order:
852		 *
853		 * [0] - full match
854		 *
855		 * [1] - tag name
856		 *     ([\w:\*-]*)
857		 *     Matches the tag name consisting of zero or more words, colons,
858		 *     asterisks and hyphens.
859		 *
860		 * [2] - id name
861		 *     (?:\#([\w-]+))
862		 *     Optionally matches a id name, consisting of an "#" followed by
863		 *     the id name (one or more words and hyphens).
864		 *
865		 * [3] - class names (including dots)
866		 *     (?:\.([\w\.-]+))?
867		 *     Optionally matches a list of classs, consisting of an "."
868		 *     followed by the class name (one or more words and hyphens)
869		 *     where multiple classes can be chained (i.e. ".foo.bar.baz")
870		 *
871		 * [4] - attributes
872		 *     ((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?
873		 *     Optionally matches the attributes list
874		 *
875		 * [5] - separator
876		 *     ([\/, >+~]+)
877		 *     Matches the selector list separator
878		 */
879		// phpcs:ignore Generic.Files.LineLength
880		$pattern = "/([\w:\*-]*)(?:\#([\w-]+))?(?:|\.([\w\.-]+))?((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?([\/, >+~]+)/is";
881
882		preg_match_all(
883			$pattern,
884			trim($selector_string) . ' ', // Add final ' ' as pseudo separator
885			$matches,
886			PREG_SET_ORDER
887		);
888
889		if (is_object($debug_object)) {
890			$debug_object->debug_log(2, 'Matches Array: ', $matches);
891		}
892
893		$selectors = array();
894		$result = array();
895
896		foreach ($matches as $m) {
897			$m[0] = trim($m[0]);
898
899			// Skip NoOps
900			if ($m[0] === '' || $m[0] === '/' || $m[0] === '//') { continue; }
901
902			// Convert to lowercase
903			if ($this->dom->lowercase) {
904				$m[1] = strtolower($m[1]);
905			}
906
907			// Extract classes
908			if ($m[3] !== '') { $m[3] = explode('.', $m[3]); }
909
910			/* Extract attributes (pattern based on the pattern above!)
911
912			 * [0] - full match
913			 * [1] - attribute name
914			 * [2] - attribute expression
915			 * [3] - attribute value
916			 * [4] - case sensitivity
917			 *
918			 * Note: Attributes can be negated with a "!" prefix to their name
919			 */
920			if($m[4] !== '') {
921				preg_match_all(
922					"/\[@?(!?[\w:-]+)(?:([!*^$|~]?=)[\"']?(.*?)[\"']?)?(?:\s+?([iIsS])?)?\]/is",
923					trim($m[4]),
924					$attributes,
925					PREG_SET_ORDER
926				);
927
928				// Replace element by array
929				$m[4] = array();
930
931				foreach($attributes as $att) {
932					// Skip empty matches
933					if(trim($att[0]) === '') { continue; }
934
935					$inverted = (isset($att[1][0]) && $att[1][0] === '!');
936					$m[4][] = array(
937						$inverted ? substr($att[1], 1) : $att[1], // Name
938						(isset($att[2])) ? $att[2] : '', // Expression
939						(isset($att[3])) ? $att[3] : '', // Value
940						$inverted, // Inverted Flag
941						(isset($att[4])) ? strtolower($att[4]) : '', // Case-Sensitivity
942					);
943				}
944			}
945
946			// Sanitize Separator
947			if ($m[5] !== '' && trim($m[5]) === '') { // Descendant Separator
948				$m[5] = ' ';
949			} else { // Other Separator
950				$m[5] = trim($m[5]);
951			}
952
953			// Clear Separator if it's a Selector List
954			if ($is_list = ($m[5] === ',')) { $m[5] = ''; }
955
956			// Remove full match before adding to results
957			array_shift($m);
958			$result[] = $m;
959
960			if ($is_list) { // Selector List
961				$selectors[] = $result;
962				$result = array();
963			}
964		}
965
966		if (count($result) > 0) { $selectors[] = $result; }
967		return $selectors;
968	}
969
970	function __get($name)
971	{
972		if (isset($this->attr[$name])) {
973			return $this->convert_text($this->attr[$name]);
974		}
975		switch ($name) {
976			case 'outertext': return $this->outertext();
977			case 'innertext': return $this->innertext();
978			case 'plaintext': return $this->text();
979			case 'xmltext': return $this->xmltext();
980			default: return array_key_exists($name, $this->attr);
981		}
982	}
983
984	function __set($name, $value)
985	{
986		global $debug_object;
987		if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
988
989		switch ($name) {
990			case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value;
991			case 'innertext':
992				if (isset($this->_[HDOM_INFO_TEXT])) {
993					return $this->_[HDOM_INFO_TEXT] = $value;
994				}
995				return $this->_[HDOM_INFO_INNER] = $value;
996		}
997
998		if (!isset($this->attr[$name])) {
999			$this->_[HDOM_INFO_SPACE][] = array(' ', '', '');
1000			$this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE;
1001		}
1002
1003		$this->attr[$name] = $value;
1004	}
1005
1006	function __isset($name)
1007	{
1008		switch ($name) {
1009			case 'outertext': return true;
1010			case 'innertext': return true;
1011			case 'plaintext': return true;
1012		}
1013		//no value attr: nowrap, checked selected...
1014		return (array_key_exists($name, $this->attr)) ? true : isset($this->attr[$name]);
1015	}
1016
1017	function __unset($name)
1018	{
1019		if (isset($this->attr[$name])) { unset($this->attr[$name]); }
1020	}
1021
1022	function convert_text($text)
1023	{
1024		global $debug_object;
1025		if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
1026
1027		$converted_text = $text;
1028
1029		$sourceCharset = '';
1030		$targetCharset = '';
1031
1032		if ($this->dom) {
1033			$sourceCharset = strtoupper($this->dom->_charset);
1034			$targetCharset = strtoupper($this->dom->_target_charset);
1035		}
1036
1037		if (is_object($debug_object)) {
1038			$debug_object->debug_log(3,
1039				'source charset: '
1040				. $sourceCharset
1041				. ' target charaset: '
1042				. $targetCharset
1043			);
1044		}
1045
1046		if (!empty($sourceCharset)
1047			&& !empty($targetCharset)
1048			&& (strcasecmp($sourceCharset, $targetCharset) != 0)) {
1049			// Check if the reported encoding could have been incorrect and the text is actually already UTF-8
1050			if ((strcasecmp($targetCharset, 'UTF-8') == 0)
1051				&& ($this->is_utf8($text))) {
1052				$converted_text = $text;
1053			} else {
1054				$converted_text = iconv($sourceCharset, $targetCharset, $text);
1055			}
1056		}
1057
1058		// Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output.
1059		if ($targetCharset === 'UTF-8') {
1060			if (substr($converted_text, 0, 3) === "\xef\xbb\xbf") {
1061				$converted_text = substr($converted_text, 3);
1062			}
1063
1064			if (substr($converted_text, -3) === "\xef\xbb\xbf") {
1065				$converted_text = substr($converted_text, 0, -3);
1066			}
1067		}
1068
1069		return $converted_text;
1070	}
1071
1072	static function is_utf8($str)
1073	{
1074		$c = 0; $b = 0;
1075		$bits = 0;
1076		$len = strlen($str);
1077		for($i = 0; $i < $len; $i++) {
1078			$c = ord($str[$i]);
1079			if($c > 128) {
1080				if(($c >= 254)) { return false; }
1081				elseif($c >= 252) { $bits = 6; }
1082				elseif($c >= 248) { $bits = 5; }
1083				elseif($c >= 240) { $bits = 4; }
1084				elseif($c >= 224) { $bits = 3; }
1085				elseif($c >= 192) { $bits = 2; }
1086				else { return false; }
1087				if(($i + $bits) > $len) { return false; }
1088				while($bits > 1) {
1089					$i++;
1090					$b = ord($str[$i]);
1091					if($b < 128 || $b > 191) { return false; }
1092					$bits--;
1093				}
1094			}
1095		}
1096		return true;
1097	}
1098
1099	function get_display_size()
1100	{
1101		global $debug_object;
1102
1103		$width = -1;
1104		$height = -1;
1105
1106		if ($this->tag !== 'img') {
1107			return false;
1108		}
1109
1110		// See if there is aheight or width attribute in the tag itself.
1111		if (isset($this->attr['width'])) {
1112			$width = $this->attr['width'];
1113		}
1114
1115		if (isset($this->attr['height'])) {
1116			$height = $this->attr['height'];
1117		}
1118
1119		// Now look for an inline style.
1120		if (isset($this->attr['style'])) {
1121			// Thanks to user gnarf from stackoverflow for this regular expression.
1122			$attributes = array();
1123
1124			preg_match_all(
1125				'/([\w-]+)\s*:\s*([^;]+)\s*;?/',
1126				$this->attr['style'],
1127				$matches,
1128				PREG_SET_ORDER
1129			);
1130
1131			foreach ($matches as $match) {
1132				$attributes[$match[1]] = $match[2];
1133			}
1134
1135			// If there is a width in the style attributes:
1136			if (isset($attributes['width']) && $width == -1) {
1137				// check that the last two characters are px (pixels)
1138				if (strtolower(substr($attributes['width'], -2)) === 'px') {
1139					$proposed_width = substr($attributes['width'], 0, -2);
1140					// Now make sure that it's an integer and not something stupid.
1141					if (filter_var($proposed_width, FILTER_VALIDATE_INT)) {
1142						$width = $proposed_width;
1143					}
1144				}
1145			}
1146
1147			// If there is a width in the style attributes:
1148			if (isset($attributes['height']) && $height == -1) {
1149				// check that the last two characters are px (pixels)
1150				if (strtolower(substr($attributes['height'], -2)) == 'px') {
1151					$proposed_height = substr($attributes['height'], 0, -2);
1152					// Now make sure that it's an integer and not something stupid.
1153					if (filter_var($proposed_height, FILTER_VALIDATE_INT)) {
1154						$height = $proposed_height;
1155					}
1156				}
1157			}
1158
1159		}
1160
1161		// Future enhancement:
1162		// Look in the tag to see if there is a class or id specified that has
1163		// a height or width attribute to it.
1164
1165		// Far future enhancement
1166		// Look at all the parent tags of this image to see if they specify a
1167		// class or id that has an img selector that specifies a height or width
1168		// Note that in this case, the class or id will have the img subselector
1169		// for it to apply to the image.
1170
1171		// ridiculously far future development
1172		// If the class or id is specified in a SEPARATE css file thats not on
1173		// the page, go get it and do what we were just doing for the ones on
1174		// the page.
1175
1176		$result = array(
1177			'height' => $height,
1178			'width' => $width
1179		);
1180
1181		return $result;
1182	}
1183
1184	function save($filepath = '')
1185	{
1186		$ret = $this->outertext();
1187
1188		if ($filepath !== '') {
1189			file_put_contents($filepath, $ret, LOCK_EX);
1190		}
1191
1192		return $ret;
1193	}
1194
1195	function addClass($class)
1196	{
1197		if (is_string($class)) {
1198			$class = explode(' ', $class);
1199		}
1200
1201		if (is_array($class)) {
1202			foreach($class as $c) {
1203				if (isset($this->class)) {
1204					if ($this->hasClass($c)) {
1205						continue;
1206					} else {
1207						$this->class .= ' ' . $c;
1208					}
1209				} else {
1210					$this->class = $c;
1211				}
1212			}
1213		} else {
1214			if (is_object($debug_object)) {
1215				$debug_object->debug_log(2, 'Invalid type: ', gettype($class));
1216			}
1217		}
1218	}
1219
1220	function hasClass($class)
1221	{
1222		if (is_string($class)) {
1223			if (isset($this->class)) {
1224				return in_array($class, explode(' ', $this->class), true);
1225			}
1226		} else {
1227			if (is_object($debug_object)) {
1228				$debug_object->debug_log(2, 'Invalid type: ', gettype($class));
1229			}
1230		}
1231
1232		return false;
1233	}
1234
1235	function removeClass($class = null)
1236	{
1237		if (!isset($this->class)) {
1238			return;
1239		}
1240
1241		if (is_null($class)) {
1242			$this->removeAttribute('class');
1243			return;
1244		}
1245
1246		if (is_string($class)) {
1247			$class = explode(' ', $class);
1248		}
1249
1250		if (is_array($class)) {
1251			$class = array_diff(explode(' ', $this->class), $class);
1252			if (empty($class)) {
1253				$this->removeAttribute('class');
1254			} else {
1255				$this->class = implode(' ', $class);
1256			}
1257		}
1258	}
1259
1260	function getAllAttributes()
1261	{
1262		return $this->attr;
1263	}
1264
1265	function getAttribute($name)
1266	{
1267		return $this->__get($name);
1268	}
1269
1270	function setAttribute($name, $value)
1271	{
1272		$this->__set($name, $value);
1273	}
1274
1275	function hasAttribute($name)
1276	{
1277		return $this->__isset($name);
1278	}
1279
1280	function removeAttribute($name)
1281	{
1282		$this->__set($name, null);
1283	}
1284
1285	function remove()
1286	{
1287		if ($this->parent) {
1288			$this->parent->removeChild($this);
1289		}
1290	}
1291
1292	function removeChild($node)
1293	{
1294		$nidx = array_search($node, $this->nodes, true);
1295		$cidx = array_search($node, $this->children, true);
1296		$didx = array_search($node, $this->dom->nodes, true);
1297
1298		if ($nidx !== false && $cidx !== false && $didx !== false) {
1299
1300			foreach($node->children as $child) {
1301				$node->removeChild($child);
1302			}
1303
1304			foreach($node->nodes as $entity) {
1305				$enidx = array_search($entity, $node->nodes, true);
1306				$edidx = array_search($entity, $node->dom->nodes, true);
1307
1308				if ($enidx !== false && $edidx !== false) {
1309					unset($node->nodes[$enidx]);
1310					unset($node->dom->nodes[$edidx]);
1311				}
1312			}
1313
1314			unset($this->nodes[$nidx]);
1315			unset($this->children[$cidx]);
1316			unset($this->dom->nodes[$didx]);
1317
1318			$node->clear();
1319
1320		}
1321	}
1322
1323	function getElementById($id)
1324	{
1325		return $this->find("#$id", 0);
1326	}
1327
1328	function getElementsById($id, $idx = null)
1329	{
1330		return $this->find("#$id", $idx);
1331	}
1332
1333	function getElementByTagName($name)
1334	{
1335		return $this->find($name, 0);
1336	}
1337
1338	function getElementsByTagName($name, $idx = null)
1339	{
1340		return $this->find($name, $idx);
1341	}
1342
1343	function parentNode()
1344	{
1345		return $this->parent();
1346	}
1347
1348	function childNodes($idx = -1)
1349	{
1350		return $this->children($idx);
1351	}
1352
1353	function firstChild()
1354	{
1355		return $this->first_child();
1356	}
1357
1358	function lastChild()
1359	{
1360		return $this->last_child();
1361	}
1362
1363	function nextSibling()
1364	{
1365		return $this->next_sibling();
1366	}
1367
1368	function previousSibling()
1369	{
1370		return $this->prev_sibling();
1371	}
1372
1373	function hasChildNodes()
1374	{
1375		return $this->has_child();
1376	}
1377
1378	function nodeName()
1379	{
1380		return $this->tag;
1381	}
1382
1383	function appendChild($node)
1384	{
1385		$node->parent($this);
1386		return $node;
1387	}
1388
1389}
1390
1391class simple_html_dom
1392{
1393	public $root = null;
1394	public $nodes = array();
1395	public $callback = null;
1396	public $lowercase = false;
1397	public $original_size;
1398	public $size;
1399
1400	protected $pos;
1401	protected $doc;
1402	protected $char;
1403
1404	protected $cursor;
1405	protected $parent;
1406	protected $noise = array();
1407	protected $token_blank = " \t\r\n";
1408	protected $token_equal = ' =/>';
1409	protected $token_slash = " />\r\n\t";
1410	protected $token_attr = ' >';
1411
1412	public $_charset = '';
1413	public $_target_charset = '';
1414
1415	protected $default_br_text = '';
1416
1417	public $default_span_text = '';
1418
1419	protected $self_closing_tags = array(
1420		'area' => 1,
1421		'base' => 1,
1422		'br' => 1,
1423		'col' => 1,
1424		'embed' => 1,
1425		'hr' => 1,
1426		'img' => 1,
1427		'input' => 1,
1428		'link' => 1,
1429		'meta' => 1,
1430		'param' => 1,
1431		'source' => 1,
1432		'track' => 1,
1433		'wbr' => 1
1434	);
1435	protected $block_tags = array(
1436		'body' => 1,
1437		'div' => 1,
1438		'form' => 1,
1439		'root' => 1,
1440		'span' => 1,
1441		'table' => 1
1442	);
1443	protected $optional_closing_tags = array(
1444		// Not optional, see
1445		// https://www.w3.org/TR/html/textlevel-semantics.html#the-b-element
1446		'b' => array('b' => 1),
1447		'dd' => array('dd' => 1, 'dt' => 1),
1448		// Not optional, see
1449		// https://www.w3.org/TR/html/grouping-content.html#the-dl-element
1450		'dl' => array('dd' => 1, 'dt' => 1),
1451		'dt' => array('dd' => 1, 'dt' => 1),
1452		'li' => array('li' => 1),
1453		'optgroup' => array('optgroup' => 1, 'option' => 1),
1454		'option' => array('optgroup' => 1, 'option' => 1),
1455		'p' => array('p' => 1),
1456		'rp' => array('rp' => 1, 'rt' => 1),
1457		'rt' => array('rp' => 1, 'rt' => 1),
1458		'td' => array('td' => 1, 'th' => 1),
1459		'th' => array('td' => 1, 'th' => 1),
1460		'tr' => array('td' => 1, 'th' => 1, 'tr' => 1),
1461	);
1462
1463	function __construct(
1464		$str = null,
1465		$lowercase = true,
1466		$forceTagsClosed = true,
1467		$target_charset = DEFAULT_TARGET_CHARSET,
1468		$stripRN = true,
1469		$defaultBRText = DEFAULT_BR_TEXT,
1470		$defaultSpanText = DEFAULT_SPAN_TEXT,
1471		$options = 0)
1472	{
1473		if ($str) {
1474			if (preg_match('/^http:\/\//i', $str) || is_file($str)) {
1475				$this->load_file($str);
1476			} else {
1477				$this->load(
1478					$str,
1479					$lowercase,
1480					$stripRN,
1481					$defaultBRText,
1482					$defaultSpanText,
1483					$options
1484				);
1485			}
1486		}
1487		// Forcing tags to be closed implies that we don't trust the html, but
1488		// it can lead to parsing errors if we SHOULD trust the html.
1489		if (!$forceTagsClosed) {
1490			$this->optional_closing_array = array();
1491		}
1492
1493		$this->_target_charset = $target_charset;
1494	}
1495
1496	function __destruct()
1497	{
1498		$this->clear();
1499	}
1500
1501	function load(
1502		$str,
1503		$lowercase = true,
1504		$stripRN = true,
1505		$defaultBRText = DEFAULT_BR_TEXT,
1506		$defaultSpanText = DEFAULT_SPAN_TEXT,
1507		$options = 0)
1508	{
1509		global $debug_object;
1510
1511		// prepare
1512		$this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText);
1513
1514		// Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037
1515		// Script tags removal now preceeds style tag removal.
1516		// strip out <script> tags
1517		$this->remove_noise("'<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>'is");
1518		$this->remove_noise("'<\s*script\s*>(.*?)<\s*/\s*script\s*>'is");
1519
1520		// strip out the \r \n's if we are told to.
1521		if ($stripRN) {
1522			$this->doc = str_replace("\r", ' ', $this->doc);
1523			$this->doc = str_replace("\n", ' ', $this->doc);
1524
1525			// set the length of content since we have changed it.
1526			$this->size = strlen($this->doc);
1527		}
1528
1529		// strip out cdata
1530		$this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is", true);
1531		// strip out comments
1532		$this->remove_noise("'<!--(.*?)-->'is");
1533		// strip out <style> tags
1534		$this->remove_noise("'<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>'is");
1535		$this->remove_noise("'<\s*style\s*>(.*?)<\s*/\s*style\s*>'is");
1536		// strip out preformatted tags
1537		$this->remove_noise("'<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*>'is");
1538		// strip out server side scripts
1539		$this->remove_noise("'(<\?)(.*?)(\?>)'s", true);
1540
1541		if($options & HDOM_SMARTY_AS_TEXT) { // Strip Smarty scripts
1542			$this->remove_noise("'(\{\w)(.*?)(\})'s", true);
1543		}
1544
1545		// parsing
1546		$this->parse();
1547		// end
1548		$this->root->_[HDOM_INFO_END] = $this->cursor;
1549		$this->parse_charset();
1550
1551		// make load function chainable
1552		return $this;
1553	}
1554
1555	function load_file()
1556	{
1557		$args = func_get_args();
1558
1559		if(($doc = call_user_func_array('file_get_contents', $args)) !== false) {
1560			$this->load($doc, true);
1561		} else {
1562			return false;
1563		}
1564	}
1565
1566	function set_callback($function_name)
1567	{
1568		$this->callback = $function_name;
1569	}
1570
1571	function remove_callback()
1572	{
1573		$this->callback = null;
1574	}
1575
1576	function save($filepath = '')
1577	{
1578		$ret = $this->root->innertext();
1579		if ($filepath !== '') { file_put_contents($filepath, $ret, LOCK_EX); }
1580		return $ret;
1581	}
1582
1583	function find($selector, $idx = null, $lowercase = false)
1584	{
1585		return $this->root->find($selector, $idx, $lowercase);
1586	}
1587
1588	function clear()
1589	{
1590		if (isset($this->nodes)) {
1591			foreach ($this->nodes as $n) {
1592				$n->clear();
1593				$n = null;
1594			}
1595		}
1596
1597		// This add next line is documented in the sourceforge repository.
1598		// 2977248 as a fix for ongoing memory leaks that occur even with the
1599		// use of clear.
1600		if (isset($this->children)) {
1601			foreach ($this->children as $n) {
1602				$n->clear();
1603				$n = null;
1604			}
1605		}
1606
1607		if (isset($this->parent)) {
1608			$this->parent->clear();
1609			unset($this->parent);
1610		}
1611
1612		if (isset($this->root)) {
1613			$this->root->clear();
1614			unset($this->root);
1615		}
1616
1617		unset($this->doc);
1618		unset($this->noise);
1619	}
1620
1621	function dump($show_attr = true)
1622	{
1623		$this->root->dump($show_attr);
1624	}
1625
1626	protected function prepare(
1627		$str, $lowercase = true,
1628		$defaultBRText = DEFAULT_BR_TEXT,
1629		$defaultSpanText = DEFAULT_SPAN_TEXT)
1630	{
1631		$this->clear();
1632
1633		$this->doc = trim($str);
1634		$this->size = strlen($this->doc);
1635		$this->original_size = $this->size; // original size of the html
1636		$this->pos = 0;
1637		$this->cursor = 1;
1638		$this->noise = array();
1639		$this->nodes = array();
1640		$this->lowercase = $lowercase;
1641		$this->default_br_text = $defaultBRText;
1642		$this->default_span_text = $defaultSpanText;
1643		$this->root = new simple_html_dom_node($this);
1644		$this->root->tag = 'root';
1645		$this->root->_[HDOM_INFO_BEGIN] = -1;
1646		$this->root->nodetype = HDOM_TYPE_ROOT;
1647		$this->parent = $this->root;
1648		if ($this->size > 0) { $this->char = $this->doc[0]; }
1649	}
1650
1651	protected function parse()
1652	{
1653		while (true) {
1654			// Read next tag if there is no text between current position and the
1655			// next opening tag.
1656			if (($s = $this->copy_until_char('<')) === '') {
1657				if($this->read_tag()) {
1658					continue;
1659				} else {
1660					return true;
1661				}
1662			}
1663
1664			// Add a text node for text between tags
1665			$node = new simple_html_dom_node($this);
1666			++$this->cursor;
1667			$node->_[HDOM_INFO_TEXT] = $s;
1668			$this->link_nodes($node, false);
1669		}
1670	}
1671
1672	protected function parse_charset()
1673	{
1674		global $debug_object;
1675
1676		$charset = null;
1677
1678		if (function_exists('get_last_retrieve_url_contents_content_type')) {
1679			$contentTypeHeader = get_last_retrieve_url_contents_content_type();
1680			$success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches);
1681			if ($success) {
1682				$charset = $matches[1];
1683				if (is_object($debug_object)) {
1684					$debug_object->debug_log(2,
1685						'header content-type found charset of: '
1686						. $charset
1687					);
1688				}
1689			}
1690		}
1691
1692		if (empty($charset)) {
1693			// https://www.w3.org/TR/html/document-metadata.html#statedef-http-equiv-content-type
1694			$el = $this->root->find('meta[http-equiv=Content-Type]', 0, true);
1695
1696			if (!empty($el)) {
1697				$fullvalue = $el->content;
1698				if (is_object($debug_object)) {
1699					$debug_object->debug_log(2,
1700						'meta content-type tag found'
1701						. $fullvalue
1702					);
1703				}
1704
1705				if (!empty($fullvalue)) {
1706					$success = preg_match(
1707						'/charset=(.+)/i',
1708						$fullvalue,
1709						$matches
1710					);
1711
1712					if ($success) {
1713						$charset = $matches[1];
1714					} else {
1715						// If there is a meta tag, and they don't specify the
1716						// character set, research says that it's typically
1717						// ISO-8859-1
1718						if (is_object($debug_object)) {
1719							$debug_object->debug_log(2,
1720								'meta content-type tag couldn\'t be parsed. using iso-8859 default.'
1721							);
1722						}
1723
1724						$charset = 'ISO-8859-1';
1725					}
1726				}
1727			}
1728		}
1729
1730		if (empty($charset)) {
1731			// https://www.w3.org/TR/html/document-metadata.html#character-encoding-declaration
1732			if ($meta = $this->root->find('meta[charset]', 0)) {
1733				$charset = $meta->charset;
1734				if (is_object($debug_object)) {
1735					$debug_object->debug_log(2, 'meta charset: ' . $charset);
1736				}
1737			}
1738		}
1739
1740		if (empty($charset)) {
1741			// Try to guess the charset based on the content
1742			// Requires Multibyte String (mbstring) support (optional)
1743			if (function_exists('mb_detect_encoding')) {
1744				/**
1745				 * mb_detect_encoding() is not intended to distinguish between
1746				 * charsets, especially single-byte charsets. Its primary
1747				 * purpose is to detect which multibyte encoding is in use,
1748				 * i.e. UTF-8, UTF-16, shift-JIS, etc.
1749				 *
1750				 * -- https://bugs.php.net/bug.php?id=38138
1751				 *
1752				 * Adding both CP1251/ISO-8859-5 and CP1252/ISO-8859-1 will
1753				 * always result in CP1251/ISO-8859-5 and vice versa.
1754				 *
1755				 * Thus, only detect if it's either UTF-8 or CP1252/ISO-8859-1
1756				 * to stay compatible.
1757				 */
1758				$encoding = mb_detect_encoding(
1759					$this->doc,
1760					array( 'UTF-8', 'CP1252', 'ISO-8859-1' )
1761				);
1762
1763				if ($encoding === 'CP1252' || $encoding === 'ISO-8859-1') {
1764					// Due to a limitation of mb_detect_encoding
1765					// 'CP1251'/'ISO-8859-5' will be detected as
1766					// 'CP1252'/'ISO-8859-1'. This will cause iconv to fail, in
1767					// which case we can simply assume it is the other charset.
1768					if (!@iconv('CP1252', 'UTF-8', $this->doc)) {
1769						$encoding = 'CP1251';
1770					}
1771				}
1772
1773				if ($encoding !== false) {
1774					$charset = $encoding;
1775					if (is_object($debug_object)) {
1776						$debug_object->debug_log(2, 'mb_detect: ' . $charset);
1777					}
1778				}
1779			}
1780		}
1781
1782		if (empty($charset)) {
1783			// Assume it's UTF-8 as it is the most likely charset to be used
1784			$charset = 'UTF-8';
1785			if (is_object($debug_object)) {
1786				$debug_object->debug_log(2, 'No match found, assume ' . $charset);
1787			}
1788		}
1789
1790		// Since CP1252 is a superset, if we get one of it's subsets, we want
1791		// it instead.
1792		if ((strtolower($charset) == 'iso-8859-1')
1793			|| (strtolower($charset) == 'latin1')
1794			|| (strtolower($charset) == 'latin-1')) {
1795			$charset = 'CP1252';
1796			if (is_object($debug_object)) {
1797				$debug_object->debug_log(2,
1798					'replacing ' . $charset . ' with CP1252 as its a superset'
1799				);
1800			}
1801		}
1802
1803		if (is_object($debug_object)) {
1804			$debug_object->debug_log(1, 'EXIT - ' . $charset);
1805		}
1806
1807		return $this->_charset = $charset;
1808	}
1809
1810	protected function read_tag()
1811	{
1812		// Set end position if no further tags found
1813		if ($this->char !== '<') {
1814			$this->root->_[HDOM_INFO_END] = $this->cursor;
1815			return false;
1816		}
1817
1818		$begin_tag_pos = $this->pos;
1819		$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1820
1821		// end tag
1822		if ($this->char === '/') {
1823			$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1824
1825			// Skip whitespace in end tags (i.e. in "</   html>")
1826			$this->skip($this->token_blank);
1827			$tag = $this->copy_until_char('>');
1828
1829			// Skip attributes in end tags
1830			if (($pos = strpos($tag, ' ')) !== false) {
1831				$tag = substr($tag, 0, $pos);
1832			}
1833
1834			$parent_lower = strtolower($this->parent->tag);
1835			$tag_lower = strtolower($tag);
1836
1837			// The end tag is supposed to close the parent tag. Handle situations
1838			// when it doesn't
1839			if ($parent_lower !== $tag_lower) {
1840				// Parent tag does not have to be closed necessarily (optional closing tag)
1841				// Current tag is a block tag, so it may close an ancestor
1842				if (isset($this->optional_closing_tags[$parent_lower])
1843					&& isset($this->block_tags[$tag_lower])) {
1844
1845					$this->parent->_[HDOM_INFO_END] = 0;
1846					$org_parent = $this->parent;
1847
1848					// Traverse ancestors to find a matching opening tag
1849					// Stop at root node
1850					while (($this->parent->parent)
1851						&& strtolower($this->parent->tag) !== $tag_lower
1852					){
1853						$this->parent = $this->parent->parent;
1854					}
1855
1856					// If we don't have a match add current tag as text node
1857					if (strtolower($this->parent->tag) !== $tag_lower) {
1858						$this->parent = $org_parent; // restore origonal parent
1859
1860						if ($this->parent->parent) {
1861							$this->parent = $this->parent->parent;
1862						}
1863
1864						$this->parent->_[HDOM_INFO_END] = $this->cursor;
1865						return $this->as_text_node($tag);
1866					}
1867				} elseif (($this->parent->parent)
1868					&& isset($this->block_tags[$tag_lower])
1869				) {
1870					// Grandparent exists and current tag is a block tag, so our
1871					// parent doesn't have an end tag
1872					$this->parent->_[HDOM_INFO_END] = 0; // No end tag
1873					$org_parent = $this->parent;
1874
1875					// Traverse ancestors to find a matching opening tag
1876					// Stop at root node
1877					while (($this->parent->parent)
1878						&& strtolower($this->parent->tag) !== $tag_lower
1879					) {
1880						$this->parent = $this->parent->parent;
1881					}
1882
1883					// If we don't have a match add current tag as text node
1884					if (strtolower($this->parent->tag) !== $tag_lower) {
1885						$this->parent = $org_parent; // restore origonal parent
1886						$this->parent->_[HDOM_INFO_END] = $this->cursor;
1887						return $this->as_text_node($tag);
1888					}
1889				} elseif (($this->parent->parent)
1890					&& strtolower($this->parent->parent->tag) === $tag_lower
1891				) { // Grandparent exists and current tag closes it
1892					$this->parent->_[HDOM_INFO_END] = 0;
1893					$this->parent = $this->parent->parent;
1894				} else { // Random tag, add as text node
1895					return $this->as_text_node($tag);
1896				}
1897			}
1898
1899			// Set end position of parent tag to current cursor position
1900			$this->parent->_[HDOM_INFO_END] = $this->cursor;
1901
1902			if ($this->parent->parent) {
1903				$this->parent = $this->parent->parent;
1904			}
1905
1906			$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1907			return true;
1908		}
1909
1910		// start tag
1911		$node = new simple_html_dom_node($this);
1912		$node->_[HDOM_INFO_BEGIN] = $this->cursor;
1913		++$this->cursor;
1914		$tag = $this->copy_until($this->token_slash); // Get tag name
1915		$node->tag_start = $begin_tag_pos;
1916
1917		// doctype, cdata & comments...
1918		// <!DOCTYPE html>
1919		// <![CDATA[ ... ]]>
1920		// <!-- Comment -->
1921		if (isset($tag[0]) && $tag[0] === '!') {
1922			$node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until_char('>');
1923
1924			if (isset($tag[2]) && $tag[1] === '-' && $tag[2] === '-') { // Comment ("<!--")
1925				$node->nodetype = HDOM_TYPE_COMMENT;
1926				$node->tag = 'comment';
1927			} else { // Could be doctype or CDATA but we don't care
1928				$node->nodetype = HDOM_TYPE_UNKNOWN;
1929				$node->tag = 'unknown';
1930			}
1931
1932			if ($this->char === '>') { $node->_[HDOM_INFO_TEXT] .= '>'; }
1933
1934			$this->link_nodes($node, true);
1935			$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1936			return true;
1937		}
1938
1939		// The start tag cannot contain another start tag, if so add as text
1940		// i.e. "<<html>"
1941		if ($pos = strpos($tag, '<') !== false) {
1942			$tag = '<' . substr($tag, 0, -1);
1943			$node->_[HDOM_INFO_TEXT] = $tag;
1944			$this->link_nodes($node, false);
1945			$this->char = $this->doc[--$this->pos]; // prev
1946			return true;
1947		}
1948
1949		// Handle invalid tag names (i.e. "<html#doc>")
1950		if (!preg_match('/^\w[\w:-]*$/', $tag)) {
1951			$node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>');
1952
1953			// Next char is the beginning of a new tag, don't touch it.
1954			if ($this->char === '<') {
1955				$this->link_nodes($node, false);
1956				return true;
1957			}
1958
1959			// Next char closes current tag, add and be done with it.
1960			if ($this->char === '>') { $node->_[HDOM_INFO_TEXT] .= '>'; }
1961			$this->link_nodes($node, false);
1962			$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1963			return true;
1964		}
1965
1966		// begin tag, add new node
1967		$node->nodetype = HDOM_TYPE_ELEMENT;
1968		$tag_lower = strtolower($tag);
1969		$node->tag = ($this->lowercase) ? $tag_lower : $tag;
1970
1971		// handle optional closing tags
1972		if (isset($this->optional_closing_tags[$tag_lower])) {
1973			// Traverse ancestors to close all optional closing tags
1974			while (isset($this->optional_closing_tags[$tag_lower][strtolower($this->parent->tag)])) {
1975				$this->parent->_[HDOM_INFO_END] = 0;
1976				$this->parent = $this->parent->parent;
1977			}
1978			$node->parent = $this->parent;
1979		}
1980
1981		$guard = 0; // prevent infinity loop
1982
1983		// [0] Space between tag and first attribute
1984		$space = array($this->copy_skip($this->token_blank), '', '');
1985
1986		// attributes
1987		do {
1988			// Everything until the first equal sign should be the attribute name
1989			$name = $this->copy_until($this->token_equal);
1990
1991			if ($name === '' && $this->char !== null && $space[0] === '') {
1992				break;
1993			}
1994
1995			if ($guard === $this->pos) { // Escape infinite loop
1996				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1997				continue;
1998			}
1999
2000			$guard = $this->pos;
2001
2002			// handle endless '<'
2003			// Out of bounds before the tag ended
2004			if ($this->pos >= $this->size - 1 && $this->char !== '>') {
2005				$node->nodetype = HDOM_TYPE_TEXT;
2006				$node->_[HDOM_INFO_END] = 0;
2007				$node->_[HDOM_INFO_TEXT] = '<' . $tag . $space[0] . $name;
2008				$node->tag = 'text';
2009				$this->link_nodes($node, false);
2010				return true;
2011			}
2012
2013			// handle mismatch '<'
2014			// Attributes cannot start after opening tag
2015			if ($this->doc[$this->pos - 1] == '<') {
2016				$node->nodetype = HDOM_TYPE_TEXT;
2017				$node->tag = 'text';
2018				$node->attr = array();
2019				$node->_[HDOM_INFO_END] = 0;
2020				$node->_[HDOM_INFO_TEXT] = substr(
2021					$this->doc,
2022					$begin_tag_pos,
2023					$this->pos - $begin_tag_pos - 1
2024				);
2025				$this->pos -= 2;
2026				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2027				$this->link_nodes($node, false);
2028				return true;
2029			}
2030
2031			if ($name !== '/' && $name !== '') { // this is a attribute name
2032				// [1] Whitespace after attribute name
2033				$space[1] = $this->copy_skip($this->token_blank);
2034
2035				$name = $this->restore_noise($name); // might be a noisy name
2036
2037				if ($this->lowercase) { $name = strtolower($name); }
2038
2039				if ($this->char === '=') { // attribute with value
2040					$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2041					$this->parse_attr($node, $name, $space); // get attribute value
2042				} else {
2043					//no value attr: nowrap, checked selected...
2044					$node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_NO;
2045					$node->attr[$name] = true;
2046					if ($this->char != '>') { $this->char = $this->doc[--$this->pos]; } // prev
2047				}
2048
2049				$node->_[HDOM_INFO_SPACE][] = $space;
2050
2051				// prepare for next attribute
2052				$space = array(
2053					$this->copy_skip($this->token_blank),
2054					'',
2055					''
2056				);
2057			} else { // no more attributes
2058				break;
2059			}
2060		} while ($this->char !== '>' && $this->char !== '/'); // go until the tag ended
2061
2062		$this->link_nodes($node, true);
2063		$node->_[HDOM_INFO_ENDSPACE] = $space[0];
2064
2065		// handle empty tags (i.e. "<div/>")
2066		if ($this->copy_until_char('>') === '/') {
2067			$node->_[HDOM_INFO_ENDSPACE] .= '/';
2068			$node->_[HDOM_INFO_END] = 0;
2069		} else {
2070			// reset parent
2071			if (!isset($this->self_closing_tags[strtolower($node->tag)])) {
2072				$this->parent = $node;
2073			}
2074		}
2075
2076		$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2077
2078		// If it's a BR tag, we need to set it's text to the default text.
2079		// This way when we see it in plaintext, we can generate formatting that the user wants.
2080		// since a br tag never has sub nodes, this works well.
2081		if ($node->tag === 'br') {
2082			$node->_[HDOM_INFO_INNER] = $this->default_br_text;
2083		}
2084
2085		return true;
2086	}
2087
2088	protected function parse_attr($node, $name, &$space)
2089	{
2090		$is_duplicate = isset($node->attr[$name]);
2091
2092		if (!$is_duplicate) // Copy whitespace between "=" and value
2093			$space[2] = $this->copy_skip($this->token_blank);
2094
2095		switch ($this->char) {
2096			case '"':
2097				$quote_type = HDOM_QUOTE_DOUBLE;
2098				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2099				$value = $this->copy_until_char('"');
2100				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2101				break;
2102			case '\'':
2103				$quote_type = HDOM_QUOTE_SINGLE;
2104				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2105				$value = $this->copy_until_char('\'');
2106				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2107				break;
2108			default:
2109				$quote_type = HDOM_QUOTE_NO;
2110				$value = $this->copy_until($this->token_attr);
2111		}
2112
2113		$value = $this->restore_noise($value);
2114
2115		// PaperG: Attributes should not have \r or \n in them, that counts as
2116        // html whitespace.
2117
2118//      The following was commented out as it interferes with DokuWiki edit mode - nomadjimbob
2119//
2120//		$value = str_replace("\r", '', $value);
2121//		$value = str_replace("\n", '', $value);
2122
2123		// PaperG: If this is a "class" selector, lets get rid of the preceeding
2124		// and trailing space since some people leave it in the multi class case.
2125		if ($name === 'class') {
2126			$value = trim($value);
2127		}
2128
2129		if (!$is_duplicate) {
2130			$node->_[HDOM_INFO_QUOTE][] = $quote_type;
2131			$node->attr[$name] = $value;
2132		}
2133	}
2134
2135	protected function link_nodes(&$node, $is_child)
2136	{
2137		$node->parent = $this->parent;
2138		$this->parent->nodes[] = $node;
2139		if ($is_child) {
2140			$this->parent->children[] = $node;
2141		}
2142	}
2143
2144	protected function as_text_node($tag)
2145	{
2146		$node = new simple_html_dom_node($this);
2147		++$this->cursor;
2148		$node->_[HDOM_INFO_TEXT] = '</' . $tag . '>';
2149		$this->link_nodes($node, false);
2150		$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2151		return true;
2152	}
2153
2154	protected function skip($chars)
2155	{
2156		$this->pos += strspn($this->doc, $chars, $this->pos);
2157		$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2158	}
2159
2160	protected function copy_skip($chars)
2161	{
2162		$pos = $this->pos;
2163		$len = strspn($this->doc, $chars, $pos);
2164		$this->pos += $len;
2165		$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2166		if ($len === 0) { return ''; }
2167		return substr($this->doc, $pos, $len);
2168	}
2169
2170	protected function copy_until($chars)
2171	{
2172		$pos = $this->pos;
2173		$len = strcspn($this->doc, $chars, $pos);
2174		$this->pos += $len;
2175		$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2176		return substr($this->doc, $pos, $len);
2177	}
2178
2179	protected function copy_until_char($char)
2180	{
2181		if ($this->char === null) { return ''; }
2182
2183		if (($pos = strpos($this->doc, $char, $this->pos)) === false) {
2184			$ret = substr($this->doc, $this->pos, $this->size - $this->pos);
2185			$this->char = null;
2186			$this->pos = $this->size;
2187			return $ret;
2188		}
2189
2190		if ($pos === $this->pos) { return ''; }
2191
2192		$pos_old = $this->pos;
2193		$this->char = $this->doc[$pos];
2194		$this->pos = $pos;
2195		return substr($this->doc, $pos_old, $pos - $pos_old);
2196	}
2197
2198	protected function remove_noise($pattern, $remove_tag = false)
2199	{
2200		global $debug_object;
2201		if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
2202
2203		$count = preg_match_all(
2204			$pattern,
2205			$this->doc,
2206			$matches,
2207			PREG_SET_ORDER | PREG_OFFSET_CAPTURE
2208		);
2209
2210		for ($i = $count - 1; $i > -1; --$i) {
2211			$key = '___noise___' . sprintf('% 5d', count($this->noise) + 1000);
2212
2213			if (is_object($debug_object)) {
2214				$debug_object->debug_log(2, 'key is: ' . $key);
2215			}
2216
2217			$idx = ($remove_tag) ? 0 : 1; // 0 = entire match, 1 = submatch
2218			$this->noise[$key] = $matches[$i][$idx][0];
2219			$this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
2220		}
2221
2222		// reset the length of content
2223		$this->size = strlen($this->doc);
2224
2225		if ($this->size > 0) {
2226			$this->char = $this->doc[0];
2227		}
2228	}
2229
2230	function restore_noise($text)
2231	{
2232		global $debug_object;
2233		if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
2234
2235		while (($pos = strpos($text, '___noise___')) !== false) {
2236			// Sometimes there is a broken piece of markup, and we don't GET the
2237			// pos+11 etc... token which indicates a problem outside of us...
2238
2239			// todo: "___noise___1000" (or any number with four or more digits)
2240			// in the DOM causes an infinite loop which could be utilized by
2241			// malicious software
2242			if (strlen($text) > $pos + 15) {
2243				$key = '___noise___'
2244				. $text[$pos + 11]
2245				. $text[$pos + 12]
2246				. $text[$pos + 13]
2247				. $text[$pos + 14]
2248				. $text[$pos + 15];
2249
2250				if (is_object($debug_object)) {
2251					$debug_object->debug_log(2, 'located key of: ' . $key);
2252				}
2253
2254				if (isset($this->noise[$key])) {
2255					$text = substr($text, 0, $pos)
2256					. $this->noise[$key]
2257					. substr($text, $pos + 16);
2258				} else {
2259					// do this to prevent an infinite loop.
2260					$text = substr($text, 0, $pos)
2261					. 'UNDEFINED NOISE FOR KEY: '
2262					. $key
2263					. substr($text, $pos + 16);
2264				}
2265			} else {
2266				// There is no valid key being given back to us... We must get
2267				// rid of the ___noise___ or we will have a problem.
2268				$text = substr($text, 0, $pos)
2269				. 'NO NUMERIC NOISE KEY'
2270				. substr($text, $pos + 11);
2271			}
2272		}
2273		return $text;
2274	}
2275
2276	function search_noise($text)
2277	{
2278		global $debug_object;
2279		if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
2280
2281		foreach($this->noise as $noiseElement) {
2282			if (strpos($noiseElement, $text) !== false) {
2283				return $noiseElement;
2284			}
2285		}
2286	}
2287
2288	function __toString()
2289	{
2290		return $this->root->innertext();
2291	}
2292
2293	function __get($name)
2294	{
2295		switch ($name) {
2296			case 'outertext':
2297				return $this->root->innertext();
2298			case 'innertext':
2299				return $this->root->innertext();
2300			case 'plaintext':
2301				return $this->root->text();
2302			case 'charset':
2303				return $this->_charset;
2304			case 'target_charset':
2305				return $this->_target_charset;
2306		}
2307	}
2308
2309	function childNodes($idx = -1)
2310	{
2311		return $this->root->childNodes($idx);
2312	}
2313
2314	function firstChild()
2315	{
2316		return $this->root->first_child();
2317	}
2318
2319	function lastChild()
2320	{
2321		return $this->root->last_child();
2322	}
2323
2324	function createElement($name, $value = null)
2325	{
2326		return @str_get_html("<$name>$value</$name>")->firstChild();
2327	}
2328
2329	function createTextNode($value)
2330	{
2331		return @end(str_get_html($value)->nodes);
2332	}
2333
2334	function getElementById($id)
2335	{
2336		return $this->find("#$id", 0);
2337	}
2338
2339	function getElementsById($id, $idx = null)
2340	{
2341		return $this->find("#$id", $idx);
2342	}
2343
2344	function getElementByTagName($name)
2345	{
2346		return $this->find($name, 0);
2347	}
2348
2349	function getElementsByTagName($name, $idx = -1)
2350	{
2351		return $this->find($name, $idx);
2352	}
2353
2354	function loadFile()
2355	{
2356		$args = func_get_args();
2357		$this->load_file($args);
2358	}
2359}
2360