1<?php
2namespace imapmarkers;  // Added by: Kai Thoene <k.git.thoene@gmx.net>
3/**
4 * Website: http://sourceforge.net/projects/simplehtmldom/
5 * Additional projects: http://sourceforge.net/projects/debugobject/
6 * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
7 *
8 * Licensed under The MIT License
9 * See the LICENSE file in the project root for more information.
10 *
11 * Authors:
12 *   S.C. Chen
13 *   John Schlick
14 *   Rus Carroll
15 *   logmanoriginal
16 *
17 * Contributors:
18 *   Yousuke Kumakura
19 *   Vadim Voituk
20 *   Antcs
21 *
22 * Version Rev. 1.9.1 (291)
23 */
24
25define('HDOM_TYPE_ELEMENT', 1);
26define('HDOM_TYPE_COMMENT', 2);
27define('HDOM_TYPE_TEXT', 3);
28define('HDOM_TYPE_ENDTAG', 4);
29define('HDOM_TYPE_ROOT', 5);
30define('HDOM_TYPE_UNKNOWN', 6);
31define('HDOM_QUOTE_DOUBLE', 0);
32define('HDOM_QUOTE_SINGLE', 1);
33define('HDOM_QUOTE_NO', 3);
34define('HDOM_INFO_BEGIN', 0);
35define('HDOM_INFO_END', 1);
36define('HDOM_INFO_QUOTE', 2);
37define('HDOM_INFO_SPACE', 3);
38define('HDOM_INFO_TEXT', 4);
39define('HDOM_INFO_INNER', 5);
40define('HDOM_INFO_OUTER', 6);
41define('HDOM_INFO_ENDSPACE', 7);
42
43defined('DEFAULT_TARGET_CHARSET') || define('DEFAULT_TARGET_CHARSET', 'UTF-8');
44defined('DEFAULT_BR_TEXT') || define('DEFAULT_BR_TEXT', "\r\n");
45defined('DEFAULT_SPAN_TEXT') || define('DEFAULT_SPAN_TEXT', ' ');
46defined('MAX_FILE_SIZE') || define('MAX_FILE_SIZE', 600000);
47define('HDOM_SMARTY_AS_TEXT', 1);
48
49function file_get_html(
50	$url,
51	$use_include_path = false,
52	$context = null,
53	$offset = 0,
54	$maxLen = -1,
55	$lowercase = true,
56	$forceTagsClosed = true,
57	$target_charset = DEFAULT_TARGET_CHARSET,
58	$stripRN = true,
59	$defaultBRText = DEFAULT_BR_TEXT,
60	$defaultSpanText = DEFAULT_SPAN_TEXT) {
61	if ($maxLen <= 0) {
62		$maxLen = MAX_FILE_SIZE;
63	}
64
65	$dom = new simple_html_dom(
66		null,
67		$lowercase,
68		$forceTagsClosed,
69		$target_charset,
70		$stripRN,
71		$defaultBRText,
72		$defaultSpanText
73	);
74
75	/**
76	 * For sourceforge users: uncomment the next line and comment the
77	 * retrieve_url_contents line 2 lines down if it is not already done.
78	 */
79	$contents = file_get_contents(
80		$url,
81		$use_include_path,
82		$context,
83		$offset,
84		$maxLen
85	);
86	// $contents = retrieve_url_contents($url);
87
88	if (empty($contents) || strlen($contents) > $maxLen) {
89		$dom->clear();
90		return false;
91	}
92
93	return $dom->load($contents, $lowercase, $stripRN);
94}
95
96function str_get_html(
97	$str,
98	$lowercase = true,
99	$forceTagsClosed = true,
100	$target_charset = DEFAULT_TARGET_CHARSET,
101	$stripRN = true,
102	$defaultBRText = DEFAULT_BR_TEXT,
103	$defaultSpanText = DEFAULT_SPAN_TEXT) {
104	$dom = new simple_html_dom(
105		null,
106		$lowercase,
107		$forceTagsClosed,
108		$target_charset,
109		$stripRN,
110		$defaultBRText,
111		$defaultSpanText
112	);
113
114	if (empty($str) || strlen($str) > MAX_FILE_SIZE) {
115		$dom->clear();
116		return false;
117	}
118
119	return $dom->load($str, $lowercase, $stripRN);
120}
121
122function dump_html_tree($node, $show_attr = true, $deep = 0) {
123	$node->dump($node);
124}
125
126class simple_html_dom_node {
127	public $nodetype = HDOM_TYPE_TEXT;
128	public $tag = 'text';
129	public $attr = array();
130	public $children = array();
131	public $nodes = array();
132	public $parent = null;
133	public $_ = array();
134	public $tag_start = 0;
135	private $dom = null;
136
137	function __construct($dom) {
138		$this->dom = $dom;
139		$dom->nodes[] = $this;
140	}
141
142	function __destruct() {
143		$this->clear();
144	}
145
146	function __toString() {
147		return $this->outertext();
148	}
149
150	function clear() {
151		$this->dom = null;
152		$this->nodes = null;
153		$this->parent = null;
154		$this->children = null;
155	}
156
157	function dump($show_attr = true, $depth = 0) {
158		echo str_repeat("\t", $depth) . $this->tag;
159
160		if ($show_attr && count($this->attr) > 0) {
161			echo '(';
162			foreach ($this->attr as $k => $v) {
163				echo "[$k]=>\"$v\", ";
164			}
165			echo ')';
166		}
167
168		echo "\n";
169
170		if ($this->nodes) {
171			foreach ($this->nodes as $node) {
172				$node->dump($show_attr, $depth + 1);
173			}
174		}
175	}
176
177	function dump_node($echo = true) {
178		$string = $this->tag;
179
180		if (count($this->attr) > 0) {
181			$string .= '(';
182			foreach ($this->attr as $k => $v) {
183				$string .= "[$k]=>\"$v\", ";
184			}
185			$string .= ')';
186		}
187
188		if (count($this->_) > 0) {
189			$string .= ' $_ (';
190			foreach ($this->_ as $k => $v) {
191				if (is_array($v)) {
192					$string .= "[$k]=>(";
193					foreach ($v as $k2 => $v2) {
194						$string .= "[$k2]=>\"$v2\", ";
195					}
196					$string .= ')';
197				} else {
198					$string .= "[$k]=>\"$v\", ";
199				}
200			}
201			$string .= ')';
202		}
203
204		if (isset($this->text)) {
205			$string .= " text: ({$this->text})";
206		}
207
208		$string .= ' HDOM_INNER_INFO: ';
209
210		if (isset($node->_[HDOM_INFO_INNER])) {
211			$string .= "'" . $node->_[HDOM_INFO_INNER] . "'";
212		} else {
213			$string .= ' NULL ';
214		}
215
216		$string .= ' children: ' . count($this->children);
217		$string .= ' nodes: ' . count($this->nodes);
218		$string .= ' tag_start: ' . $this->tag_start;
219		$string .= "\n";
220
221		if ($echo) {
222			echo $string;
223			return;
224		} else {
225			return $string;
226		}
227	}
228
229	function parent($parent = null) {
230		// I am SURE that this doesn't work properly.
231		// It fails to unset the current node from it's current parents nodes or
232		// children list first.
233		if ($parent !== null) {
234			$this->parent = $parent;
235			$this->parent->nodes[] = $this;
236			$this->parent->children[] = $this;
237		}
238
239		return $this->parent;
240	}
241
242	function has_child() {
243		return !empty($this->children);
244	}
245
246	function children($idx = -1) {
247		if ($idx === -1) {
248			return $this->children;
249		}
250
251		if (isset($this->children[$idx])) {
252			return $this->children[$idx];
253		}
254
255		return null;
256	}
257
258	function first_child() {
259		if (count($this->children) > 0) {
260			return $this->children[0];
261		}
262		return null;
263	}
264
265	function last_child() {
266		if (count($this->children) > 0) {
267			return end($this->children);
268		}
269		return null;
270	}
271
272	function next_sibling() {
273		if ($this->parent === null) {
274			return null;
275		}
276
277		$idx = array_search($this, $this->parent->children, true);
278
279		if ($idx !== false && isset($this->parent->children[$idx + 1])) {
280			return $this->parent->children[$idx + 1];
281		}
282
283		return null;
284	}
285
286	function prev_sibling() {
287		if ($this->parent === null) {
288			return null;
289		}
290
291		$idx = array_search($this, $this->parent->children, true);
292
293		if ($idx !== false && $idx > 0) {
294			return $this->parent->children[$idx - 1];
295		}
296
297		return null;
298	}
299
300	function find_ancestor_tag($tag) {
301		global $debug_object;
302		if (is_object($debug_object)) {
303			$debug_object->debug_log_entry(1);
304		}
305
306		if ($this->parent === null) {
307			return null;
308		}
309
310		$ancestor = $this->parent;
311
312		while (!is_null($ancestor)) {
313			if (is_object($debug_object)) {
314				$debug_object->debug_log(2, 'Current tag is: ' . $ancestor->tag);
315			}
316
317			if ($ancestor->tag === $tag) {
318				break;
319			}
320
321			$ancestor = $ancestor->parent;
322		}
323
324		return $ancestor;
325	}
326
327	function innertext() {
328		if (isset($this->_[HDOM_INFO_INNER])) {
329			return $this->_[HDOM_INFO_INNER];
330		}
331
332		if (isset($this->_[HDOM_INFO_TEXT])) {
333			return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
334		}
335
336		$ret = '';
337
338		foreach ($this->nodes as $n) {
339			$ret .= $n->outertext();
340		}
341
342		return $ret;
343	}
344
345	function outertext() {
346		global $debug_object;
347
348		if (is_object($debug_object)) {
349			$text = '';
350
351			if ($this->tag === 'text') {
352				if (!empty($this->text)) {
353					$text = ' with text: ' . $this->text;
354				}
355			}
356
357			$debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text);
358		}
359
360		if ($this->tag === 'root') {
361			return $this->innertext();
362		}
363
364		// todo: What is the use of this callback? Remove?
365		if ($this->dom && $this->dom->callback !== null) {
366			call_user_func_array($this->dom->callback, array($this));
367		}
368
369		if (isset($this->_[HDOM_INFO_OUTER])) {
370			return $this->_[HDOM_INFO_OUTER];
371		}
372
373		if (isset($this->_[HDOM_INFO_TEXT])) {
374			return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
375		}
376
377		$ret = '';
378
379		if ($this->dom && $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]) {
380			$ret = $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]->makeup();
381		}
382
383		if (isset($this->_[HDOM_INFO_INNER])) {
384			// todo: <br> should either never have HDOM_INFO_INNER or always
385			if ($this->tag !== 'br') {
386				$ret .= $this->_[HDOM_INFO_INNER];
387			}
388		} elseif ($this->nodes) {
389			foreach ($this->nodes as $n) {
390				$ret .= $this->convert_text($n->outertext());
391			}
392		}
393
394		if (isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END] != 0) {
395			$ret .= '</' . $this->tag . '>';
396		}
397
398		return $ret;
399	}
400
401	function text() {
402		if (isset($this->_[HDOM_INFO_INNER])) {
403			return $this->_[HDOM_INFO_INNER];
404		}
405
406		switch ($this->nodetype) {
407			case HDOM_TYPE_TEXT:
408				return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
409			case HDOM_TYPE_COMMENT:
410				return '';
411			case HDOM_TYPE_UNKNOWN:
412				return '';
413		}
414
415		if (strcasecmp($this->tag, 'script') === 0) {
416			return '';
417		}
418		if (strcasecmp($this->tag, 'style') === 0) {
419			return '';
420		}
421
422		$ret = '';
423
424		// In rare cases, (always node type 1 or HDOM_TYPE_ELEMENT - observed
425		// for some span tags, and some p tags) $this->nodes is set to NULL.
426		// NOTE: This indicates that there is a problem where it's set to NULL
427		// without a clear happening.
428		// WHY is this happening?
429		if (!is_null($this->nodes)) {
430			foreach ($this->nodes as $n) {
431				// Start paragraph after a blank line
432				if ($n->tag === 'p') {
433					$ret = trim($ret) . "\n\n";
434				}
435
436				$ret .= $this->convert_text($n->text());
437
438				// If this node is a span... add a space at the end of it so
439				// multiple spans don't run into each other.  This is plaintext
440				// after all.
441				if ($n->tag === 'span') {
442					$ret .= $this->dom->default_span_text;
443				}
444			}
445		}
446		return $ret;
447	}
448
449	function xmltext() {
450		$ret = $this->innertext();
451		$ret = str_ireplace('<![CDATA[', '', $ret);
452		$ret = str_replace(']]>', '', $ret);
453		return $ret;
454	}
455
456	function makeup() {
457		// text, comment, unknown
458		if (isset($this->_[HDOM_INFO_TEXT])) {
459			return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
460		}
461
462		$ret = '<' . $this->tag;
463		$i = -1;
464
465		foreach ($this->attr as $key => $val) {
466			++$i;
467
468			// skip removed attribute
469			if ($val === null || $val === false) {
470				continue;
471			}
472
473			$ret .= $this->_[HDOM_INFO_SPACE][$i][0];
474
475			//no value attr: nowrap, checked selected...
476			if ($val === true) {
477				$ret .= $key;
478			} else {
479				switch ($this->_[HDOM_INFO_QUOTE][$i]) {
480					case HDOM_QUOTE_DOUBLE:
481						$quote = '"';
482						break;
483					case HDOM_QUOTE_SINGLE:
484						$quote = '\'';
485						break;
486					default:
487						$quote = '';
488				}
489
490				$ret .= $key
491					. $this->_[HDOM_INFO_SPACE][$i][1]
492					. '='
493					. $this->_[HDOM_INFO_SPACE][$i][2]
494					. $quote
495					. $val
496					. $quote;
497			}
498		}
499
500		$ret = $this->dom->restore_noise($ret);
501		return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>';
502	}
503
504	function find($selector, $idx = null, $lowercase = false) {
505		$selectors = $this->parse_selector($selector);
506		if (($count = count($selectors)) === 0) {
507			return array();
508		}
509		$found_keys = array();
510
511		// find each selector
512		for ($c = 0; $c < $count; ++$c) {
513			// The change on the below line was documented on the sourceforge
514			// code tracker id 2788009
515			// used to be: if (($levle=count($selectors[0]))===0) return array();
516			if (($levle = count($selectors[$c])) === 0) {
517				return array();
518			}
519			if (!isset($this->_[HDOM_INFO_BEGIN])) {
520				return array();
521			}
522
523			$head = array($this->_[HDOM_INFO_BEGIN] => 1);
524			$cmd = ' '; // Combinator
525
526			// handle descendant selectors, no recursive!
527			for ($l = 0; $l < $levle; ++$l) {
528				$ret = array();
529
530				foreach ($head as $k => $v) {
531					$n = ($k === -1) ? $this->dom->root : $this->dom->nodes[$k];
532					//PaperG - Pass this optional parameter on to the seek function.
533					$n->seek($selectors[$c][$l], $ret, $cmd, $lowercase);
534				}
535
536				$head = $ret;
537				$cmd = $selectors[$c][$l][4]; // Next Combinator
538			}
539
540			foreach ($head as $k => $v) {
541				if (!isset($found_keys[$k])) {
542					$found_keys[$k] = 1;
543				}
544			}
545		}
546
547		// sort keys
548		ksort($found_keys);
549
550		$found = array();
551		foreach ($found_keys as $k => $v) {
552			$found[] = $this->dom->nodes[$k];
553		}
554
555		// return nth-element or array
556		if (is_null($idx)) {
557			return $found;
558		} elseif ($idx < 0) {
559			$idx = count($found) + $idx;
560		}
561		return (isset($found[$idx])) ? $found[$idx] : null;
562	}
563
564	protected function seek($selector, &$ret, $parent_cmd, $lowercase = false) {
565		global $debug_object;
566		if (is_object($debug_object)) {
567			$debug_object->debug_log_entry(1);
568		}
569
570		list($tag, $id, $class, $attributes, $cmb) = $selector;
571		$nodes = array();
572
573		if ($parent_cmd === ' ') { // Descendant Combinator
574			// Find parent closing tag if the current element doesn't have a closing
575			// tag (i.e. void element)
576			$end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0;
577			if ($end == 0) {
578				$parent = $this->parent;
579				while (!isset($parent->_[HDOM_INFO_END]) && $parent !== null) {
580					$end -= 1;
581					$parent = $parent->parent;
582				}
583				$end += $parent->_[HDOM_INFO_END];
584			}
585
586			// Get list of target nodes
587			$nodes_start = $this->_[HDOM_INFO_BEGIN] + 1;
588			$nodes_count = $end - $nodes_start;
589			$nodes = array_slice($this->dom->nodes, $nodes_start, $nodes_count, true);
590		} elseif ($parent_cmd === '>') { // Child Combinator
591			$nodes = $this->children;
592		} elseif ($parent_cmd === '+'
593			&& $this->parent
594			&& in_array($this, $this->parent->children)) { // Next-Sibling Combinator
595			$index = array_search($this, $this->parent->children, true) + 1;
596			if ($index < count($this->parent->children))
597				$nodes[] = $this->parent->children[$index];
598		} elseif ($parent_cmd === '~'
599			&& $this->parent
600			&& in_array($this, $this->parent->children)) { // Subsequent Sibling Combinator
601			$index = array_search($this, $this->parent->children, true);
602			$nodes = array_slice($this->parent->children, $index);
603		}
604
605		// Go throgh each element starting at this element until the end tag
606		// Note: If this element is a void tag, any previous void element is
607		// skipped.
608		foreach ($nodes as $node) {
609			$pass = true;
610
611			// Skip root nodes
612			if (!$node->parent) {
613				$pass = false;
614			}
615
616			// Handle 'text' selector
617			if ($pass && $tag === 'text' && $node->tag === 'text') {
618				$ret[array_search($node, $this->dom->nodes, true)] = 1;
619				unset($node);
620				continue;
621			}
622
623			// Skip if node isn't a child node (i.e. text nodes)
624			if ($pass && !in_array($node, $node->parent->children, true)) {
625				$pass = false;
626			}
627
628			// Skip if tag doesn't match
629			if ($pass && $tag !== '' && $tag !== $node->tag && $tag !== '*') {
630				$pass = false;
631			}
632
633			// Skip if ID doesn't exist
634			if ($pass && $id !== '' && !isset($node->attr['id'])) {
635				$pass = false;
636			}
637
638			// Check if ID matches
639			if ($pass && $id !== '' && isset($node->attr['id'])) {
640				// Note: Only consider the first ID (as browsers do)
641				$node_id = explode(' ', trim($node->attr['id']))[0];
642
643				if ($id !== $node_id) {
644					$pass = false;
645				}
646			}
647
648			// Check if all class(es) exist
649			if ($pass && $class !== '' && is_array($class) && !empty($class)) {
650				if (isset($node->attr['class'])) {
651					$node_classes = explode(' ', $node->attr['class']);
652
653					if ($lowercase) {
654						$node_classes = array_map('strtolower', $node_classes);
655					}
656
657					foreach ($class as $c) {
658						if (!in_array($c, $node_classes)) {
659							$pass = false;
660							break;
661						}
662					}
663				} else {
664					$pass = false;
665				}
666			}
667
668			// Check attributes
669			if ($pass
670				&& $attributes !== ''
671				&& is_array($attributes)
672				&& !empty($attributes)) {
673				foreach ($attributes as $a) {
674					list(
675						$att_name,
676						$att_expr,
677						$att_val,
678						$att_inv,
679						$att_case_sensitivity
680					) = $a;
681
682					// Handle indexing attributes (i.e. "[2]")
683					/**
684					 * Note: This is not supported by the CSS Standard but adds
685					 * the ability to select items compatible to XPath (i.e.
686					 * the 3rd element within it's parent).
687					 *
688					 * Note: This doesn't conflict with the CSS Standard which
689					 * doesn't work on numeric attributes anyway.
690					 */
691					if (is_numeric($att_name)
692						&& $att_expr === ''
693						&& $att_val === '') {
694						$count = 0;
695
696						// Find index of current element in parent
697						foreach ($node->parent->children as $c) {
698							if ($c->tag === $node->tag)
699								++$count;
700							if ($c === $node)
701								break;
702						}
703
704						// If this is the correct node, continue with next
705						// attribute
706						if ($count === (int) $att_name)
707							continue;
708					}
709
710					// Check attribute availability
711					if ($att_inv) { // Attribute should NOT be set
712						if (isset($node->attr[$att_name])) {
713							$pass = false;
714							break;
715						}
716					} else { // Attribute should be set
717						// todo: "plaintext" is not a valid CSS selector!
718						if ($att_name !== 'plaintext'
719							&& !isset($node->attr[$att_name])) {
720							$pass = false;
721							break;
722						}
723					}
724
725					// Continue with next attribute if expression isn't defined
726					if ($att_expr === '')
727						continue;
728
729					// If they have told us that this is a "plaintext"
730					// search then we want the plaintext of the node - right?
731					// todo "plaintext" is not a valid CSS selector!
732					if ($att_name === 'plaintext') {
733						$nodeKeyValue = $node->text();
734					} else {
735						$nodeKeyValue = $node->attr[$att_name];
736					}
737
738					if (is_object($debug_object)) {
739						$debug_object->debug_log(2,
740							'testing node: '
741							. $node->tag
742							. ' for attribute: '
743							. $att_name
744							. $att_expr
745							. $att_val
746							. ' where nodes value is: '
747							. $nodeKeyValue
748						);
749					}
750
751					// If lowercase is set, do a case insensitive test of
752					// the value of the selector.
753					if ($lowercase) {
754						$check = $this->match(
755							$att_expr,
756							strtolower($att_val),
757							strtolower($nodeKeyValue),
758							$att_case_sensitivity
759						);
760					} else {
761						$check = $this->match(
762							$att_expr,
763							$att_val,
764							$nodeKeyValue,
765							$att_case_sensitivity
766						);
767					}
768
769					if (is_object($debug_object)) {
770						$debug_object->debug_log(2,
771							'after match: '
772							. ($check ? 'true' : 'false')
773						);
774					}
775
776					if (!$check) {
777						$pass = false;
778						break;
779					}
780				}
781			}
782
783			// Found a match. Add to list and clear node
784			if ($pass)
785				$ret[$node->_[HDOM_INFO_BEGIN]] = 1;
786			unset($node);
787		}
788		// It's passed by reference so this is actually what this function returns.
789		if (is_object($debug_object)) {
790			$debug_object->debug_log(1, 'EXIT - ret: ', $ret);
791		}
792	}
793
794	protected function match($exp, $pattern, $value, $case_sensitivity) {
795		global $debug_object;
796		if (is_object($debug_object)) {
797			$debug_object->debug_log_entry(1);
798		}
799
800		if ($case_sensitivity === 'i') {
801			$pattern = strtolower($pattern);
802			$value = strtolower($value);
803		}
804
805		switch ($exp) {
806			case '=':
807				return ($value === $pattern);
808			case '!=':
809				return ($value !== $pattern);
810			case '^=':
811				return preg_match('/^' . preg_quote($pattern, '/') . '/', $value);
812			case '$=':
813				return preg_match('/' . preg_quote($pattern, '/') . '$/', $value);
814			case '*=':
815				return preg_match('/' . preg_quote($pattern, '/') . '/', $value);
816			case '|=':
817				/**
818				 * [att|=val]
819				 *
820				 * Represents an element with the att attribute, its value
821				 * either being exactly "val" or beginning with "val"
822				 * immediately followed by "-" (U+002D).
823				 */
824				return strpos($value, $pattern) === 0;
825			case '~=':
826				/**
827				 * [att~=val]
828				 *
829				 * Represents an element with the att attribute whose value is a
830				 * whitespace-separated list of words, one of which is exactly
831				 * "val". If "val" contains whitespace, it will never represent
832				 * anything (since the words are separated by spaces). Also if
833				 * "val" is the empty string, it will never represent anything.
834				 */
835				return in_array($pattern, explode(' ', trim($value)), true);
836		}
837		return false;
838	}
839
840	protected function parse_selector($selector_string) {
841		global $debug_object;
842		if (is_object($debug_object)) {
843			$debug_object->debug_log_entry(1);
844		}
845
846		/**
847		 * Pattern of CSS selectors, modified from mootools (https://mootools.net/)
848		 *
849		 * Paperg: Add the colon to the attribute, so that it properly finds
850		 * <tag attr:ibute="something" > like google does.
851		 *
852		 * Note: if you try to look at this attribute, you MUST use getAttribute
853		 * since $dom->x:y will fail the php syntax check.
854		 *
855		 * Notice the \[ starting the attribute? and the @? following? This
856		 * implies that an attribute can begin with an @ sign that is not
857		 * captured. This implies that an html attribute specifier may start
858		 * with an @ sign that is NOT captured by the expression. Farther study
859		 * is required to determine of this should be documented or removed.
860		 *
861		 * Matches selectors in this order:
862		 *
863		 * [0] - full match
864		 *
865		 * [1] - tag name
866		 *     ([\w:\*-]*)
867		 *     Matches the tag name consisting of zero or more words, colons,
868		 *     asterisks and hyphens.
869		 *
870		 * [2] - id name
871		 *     (?:\#([\w-]+))
872		 *     Optionally matches a id name, consisting of an "#" followed by
873		 *     the id name (one or more words and hyphens).
874		 *
875		 * [3] - class names (including dots)
876		 *     (?:\.([\w\.-]+))?
877		 *     Optionally matches a list of classs, consisting of an "."
878		 *     followed by the class name (one or more words and hyphens)
879		 *     where multiple classes can be chained (i.e. ".foo.bar.baz")
880		 *
881		 * [4] - attributes
882		 *     ((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?
883		 *     Optionally matches the attributes list
884		 *
885		 * [5] - separator
886		 *     ([\/, >+~]+)
887		 *     Matches the selector list separator
888		 */
889		// phpcs:ignore Generic.Files.LineLength
890		$pattern = "/([\w:\*-]*)(?:\#([\w-]+))?(?:|\.([\w\.-]+))?((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?([\/, >+~]+)/is";
891
892		preg_match_all(
893			$pattern,
894			trim($selector_string) . ' ', // Add final ' ' as pseudo separator
895			$matches,
896			PREG_SET_ORDER
897		);
898
899		if (is_object($debug_object)) {
900			$debug_object->debug_log(2, 'Matches Array: ', $matches);
901		}
902
903		$selectors = array();
904		$result = array();
905
906		foreach ($matches as $m) {
907			$m[0] = trim($m[0]);
908
909			// Skip NoOps
910			if ($m[0] === '' || $m[0] === '/' || $m[0] === '//') {
911				continue;
912			}
913
914			// Convert to lowercase
915			if ($this->dom->lowercase) {
916				$m[1] = strtolower($m[1]);
917			}
918
919			// Extract classes
920			if ($m[3] !== '') {
921				$m[3] = explode('.', $m[3]);
922			}
923
924			/* Extract attributes (pattern based on the pattern above!)
925
926					* [0] - full match
927					* [1] - attribute name
928					* [2] - attribute expression
929					* [3] - attribute value
930					* [4] - case sensitivity
931					*
932					* Note: Attributes can be negated with a "!" prefix to their name
933					*/
934			if ($m[4] !== '') {
935				preg_match_all(
936					"/\[@?(!?[\w:-]+)(?:([!*^$|~]?=)[\"']?(.*?)[\"']?)?(?:\s+?([iIsS])?)?\]/is",
937					trim($m[4]),
938					$attributes,
939					PREG_SET_ORDER
940				);
941
942				// Replace element by array
943				$m[4] = array();
944
945				foreach ($attributes as $att) {
946					// Skip empty matches
947					if (trim($att[0]) === '') {
948						continue;
949					}
950
951					$inverted = (isset($att[1][0]) && $att[1][0] === '!');
952					$m[4][] = array(
953						$inverted ? substr($att[1], 1) : $att[1], // Name
954						(isset($att[2])) ? $att[2] : '', // Expression
955						(isset($att[3])) ? $att[3] : '', // Value
956						$inverted, // Inverted Flag
957						(isset($att[4])) ? strtolower($att[4]) : '', // Case-Sensitivity
958					);
959				}
960			}
961
962			// Sanitize Separator
963			if ($m[5] !== '' && trim($m[5]) === '') { // Descendant Separator
964				$m[5] = ' ';
965			} else { // Other Separator
966				$m[5] = trim($m[5]);
967			}
968
969			// Clear Separator if it's a Selector List
970			if ($is_list = ($m[5] === ',')) {
971				$m[5] = '';
972			}
973
974			// Remove full match before adding to results
975			array_shift($m);
976			$result[] = $m;
977
978			if ($is_list) { // Selector List
979				$selectors[] = $result;
980				$result = array();
981			}
982		}
983
984		if (count($result) > 0) {
985			$selectors[] = $result;
986		}
987		return $selectors;
988	}
989
990	function __get($name) {
991		if (isset($this->attr[$name])) {
992			return $this->convert_text($this->attr[$name]);
993		}
994		switch ($name) {
995			case 'outertext':
996				return $this->outertext();
997			case 'innertext':
998				return $this->innertext();
999			case 'plaintext':
1000				return $this->text();
1001			case 'xmltext':
1002				return $this->xmltext();
1003			default:
1004				return array_key_exists($name, $this->attr);
1005		}
1006	}
1007
1008	function __set($name, $value) {
1009		global $debug_object;
1010		if (is_object($debug_object)) {
1011			$debug_object->debug_log_entry(1);
1012		}
1013
1014		switch ($name) {
1015			case 'outertext':
1016				return $this->_[HDOM_INFO_OUTER] = $value;
1017			case 'innertext':
1018				if (isset($this->_[HDOM_INFO_TEXT])) {
1019					return $this->_[HDOM_INFO_TEXT] = $value;
1020				}
1021				return $this->_[HDOM_INFO_INNER] = $value;
1022		}
1023
1024		if (!isset($this->attr[$name])) {
1025			$this->_[HDOM_INFO_SPACE][] = array(' ', '', '');
1026			$this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE;
1027		}
1028
1029		$this->attr[$name] = $value;
1030	}
1031
1032	function __isset($name) {
1033		switch ($name) {
1034			case 'outertext':
1035				return true;
1036			case 'innertext':
1037				return true;
1038			case 'plaintext':
1039				return true;
1040		}
1041		//no value attr: nowrap, checked selected...
1042		return (array_key_exists($name, $this->attr)) ? true : isset($this->attr[$name]);
1043	}
1044
1045	function __unset($name) {
1046		if (isset($this->attr[$name])) {
1047			unset($this->attr[$name]);
1048		}
1049	}
1050
1051	function convert_text($text) {
1052		global $debug_object;
1053		if (is_object($debug_object)) {
1054			$debug_object->debug_log_entry(1);
1055		}
1056
1057		$converted_text = $text;
1058
1059		$sourceCharset = '';
1060		$targetCharset = '';
1061
1062		if ($this->dom) {
1063			$sourceCharset = strtoupper($this->dom->_charset);
1064			$targetCharset = strtoupper($this->dom->_target_charset);
1065		}
1066
1067		if (is_object($debug_object)) {
1068			$debug_object->debug_log(3,
1069				'source charset: '
1070				. $sourceCharset
1071				. ' target charaset: '
1072				. $targetCharset
1073			);
1074		}
1075
1076		if (!empty($sourceCharset)
1077			&& !empty($targetCharset)
1078			&& (strcasecmp($sourceCharset, $targetCharset) != 0)) {
1079			// Check if the reported encoding could have been incorrect and the text is actually already UTF-8
1080			if ((strcasecmp($targetCharset, 'UTF-8') == 0)
1081				&& ($this->is_utf8($text))) {
1082				$converted_text = $text;
1083			} else {
1084				$converted_text = iconv($sourceCharset, $targetCharset, $text);
1085			}
1086		}
1087
1088		// Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output.
1089		if ($targetCharset === 'UTF-8') {
1090			if (substr($converted_text, 0, 3) === "\xef\xbb\xbf") {
1091				$converted_text = substr($converted_text, 3);
1092			}
1093
1094			if (substr($converted_text, -3) === "\xef\xbb\xbf") {
1095				$converted_text = substr($converted_text, 0, -3);
1096			}
1097		}
1098
1099		return $converted_text;
1100	}
1101
1102	static function is_utf8($str) {
1103		$c = 0;
1104		$b = 0;
1105		$bits = 0;
1106		$len = strlen($str);
1107		for ($i = 0; $i < $len; $i++) {
1108			$c = ord($str[$i]);
1109			if ($c > 128) {
1110				if (($c >= 254)) {
1111					return false;
1112				} elseif ($c >= 252) {
1113					$bits = 6;
1114				} elseif ($c >= 248) {
1115					$bits = 5;
1116				} elseif ($c >= 240) {
1117					$bits = 4;
1118				} elseif ($c >= 224) {
1119					$bits = 3;
1120				} elseif ($c >= 192) {
1121					$bits = 2;
1122				} else {
1123					return false;
1124				}
1125				if (($i + $bits) > $len) {
1126					return false;
1127				}
1128				while ($bits > 1) {
1129					$i++;
1130					$b = ord($str[$i]);
1131					if ($b < 128 || $b > 191) {
1132						return false;
1133					}
1134					$bits--;
1135				}
1136			}
1137		}
1138		return true;
1139	}
1140
1141	function get_display_size() {
1142		global $debug_object;
1143
1144		$width = -1;
1145		$height = -1;
1146
1147		if ($this->tag !== 'img') {
1148			return false;
1149		}
1150
1151		// See if there is aheight or width attribute in the tag itself.
1152		if (isset($this->attr['width'])) {
1153			$width = $this->attr['width'];
1154		}
1155
1156		if (isset($this->attr['height'])) {
1157			$height = $this->attr['height'];
1158		}
1159
1160		// Now look for an inline style.
1161		if (isset($this->attr['style'])) {
1162			// Thanks to user gnarf from stackoverflow for this regular expression.
1163			$attributes = array();
1164
1165			preg_match_all(
1166				'/([\w-]+)\s*:\s*([^;]+)\s*;?/',
1167				$this->attr['style'],
1168				$matches,
1169				PREG_SET_ORDER
1170			);
1171
1172			foreach ($matches as $match) {
1173				$attributes[$match[1]] = $match[2];
1174			}
1175
1176			// If there is a width in the style attributes:
1177			if (isset($attributes['width']) && $width == -1) {
1178				// check that the last two characters are px (pixels)
1179				if (strtolower(substr($attributes['width'], -2)) === 'px') {
1180					$proposed_width = substr($attributes['width'], 0, -2);
1181					// Now make sure that it's an integer and not something stupid.
1182					if (filter_var($proposed_width, FILTER_VALIDATE_INT)) {
1183						$width = $proposed_width;
1184					}
1185				}
1186			}
1187
1188			// If there is a width in the style attributes:
1189			if (isset($attributes['height']) && $height == -1) {
1190				// check that the last two characters are px (pixels)
1191				if (strtolower(substr($attributes['height'], -2)) == 'px') {
1192					$proposed_height = substr($attributes['height'], 0, -2);
1193					// Now make sure that it's an integer and not something stupid.
1194					if (filter_var($proposed_height, FILTER_VALIDATE_INT)) {
1195						$height = $proposed_height;
1196					}
1197				}
1198			}
1199
1200		}
1201
1202		// Future enhancement:
1203		// Look in the tag to see if there is a class or id specified that has
1204		// a height or width attribute to it.
1205
1206		// Far future enhancement
1207		// Look at all the parent tags of this image to see if they specify a
1208		// class or id that has an img selector that specifies a height or width
1209		// Note that in this case, the class or id will have the img subselector
1210		// for it to apply to the image.
1211
1212		// ridiculously far future development
1213		// If the class or id is specified in a SEPARATE css file thats not on
1214		// the page, go get it and do what we were just doing for the ones on
1215		// the page.
1216
1217		$result = array(
1218			'height' => $height,
1219			'width' => $width
1220		);
1221
1222		return $result;
1223	}
1224
1225	function save($filepath = '') {
1226		$ret = $this->outertext();
1227
1228		if ($filepath !== '') {
1229			file_put_contents($filepath, $ret, LOCK_EX);
1230		}
1231
1232		return $ret;
1233	}
1234
1235	function addClass($class) {
1236		if (is_string($class)) {
1237			$class = explode(' ', $class);
1238		}
1239
1240		if (is_array($class)) {
1241			foreach ($class as $c) {
1242				if (isset($this->class)) {
1243					if ($this->hasClass($c)) {
1244						continue;
1245					} else {
1246						$this->class .= ' ' . $c;
1247					}
1248				} else {
1249					$this->class = $c;
1250				}
1251			}
1252		} else {
1253			if (is_object($debug_object)) {
1254				$debug_object->debug_log(2, 'Invalid type: ', gettype($class));
1255			}
1256		}
1257	}
1258
1259	function hasClass($class) {
1260		if (is_string($class)) {
1261			if (isset($this->class)) {
1262				return in_array($class, explode(' ', $this->class), true);
1263			}
1264		} else {
1265			if (is_object($debug_object)) {
1266				$debug_object->debug_log(2, 'Invalid type: ', gettype($class));
1267			}
1268		}
1269
1270		return false;
1271	}
1272
1273	function removeClass($class = null) {
1274		if (!isset($this->class)) {
1275			return;
1276		}
1277
1278		if (is_null($class)) {
1279			$this->removeAttribute('class');
1280			return;
1281		}
1282
1283		if (is_string($class)) {
1284			$class = explode(' ', $class);
1285		}
1286
1287		if (is_array($class)) {
1288			$class = array_diff(explode(' ', $this->class), $class);
1289			if (empty($class)) {
1290				$this->removeAttribute('class');
1291			} else {
1292				$this->class = implode(' ', $class);
1293			}
1294		}
1295	}
1296
1297	function getAllAttributes() {
1298		return $this->attr;
1299	}
1300
1301	function getAttribute($name) {
1302		return $this->__get($name);
1303	}
1304
1305	function setAttribute($name, $value) {
1306		$this->__set($name, $value);
1307	}
1308
1309	function hasAttribute($name) {
1310		return $this->__isset($name);
1311	}
1312
1313	function removeAttribute($name) {
1314		$this->__set($name, null);
1315	}
1316
1317	function remove() {
1318		if ($this->parent) {
1319			$this->parent->removeChild($this);
1320		}
1321	}
1322
1323	function removeChild($node) {
1324		$nidx = array_search($node, $this->nodes, true);
1325		$cidx = array_search($node, $this->children, true);
1326		$didx = array_search($node, $this->dom->nodes, true);
1327
1328		if ($nidx !== false && $cidx !== false && $didx !== false) {
1329
1330			foreach ($node->children as $child) {
1331				$node->removeChild($child);
1332			}
1333
1334			foreach ($node->nodes as $entity) {
1335				$enidx = array_search($entity, $node->nodes, true);
1336				$edidx = array_search($entity, $node->dom->nodes, true);
1337
1338				if ($enidx !== false && $edidx !== false) {
1339					unset($node->nodes[$enidx]);
1340					unset($node->dom->nodes[$edidx]);
1341				}
1342			}
1343
1344			unset($this->nodes[$nidx]);
1345			unset($this->children[$cidx]);
1346			unset($this->dom->nodes[$didx]);
1347
1348			$node->clear();
1349
1350		}
1351	}
1352
1353	function getElementById($id) {
1354		return $this->find("#$id", 0);
1355	}
1356
1357	function getElementsById($id, $idx = null) {
1358		return $this->find("#$id", $idx);
1359	}
1360
1361	function getElementByTagName($name) {
1362		return $this->find($name, 0);
1363	}
1364
1365	function getElementsByTagName($name, $idx = null) {
1366		return $this->find($name, $idx);
1367	}
1368
1369	function parentNode() {
1370		return $this->parent();
1371	}
1372
1373	function childNodes($idx = -1) {
1374		return $this->children($idx);
1375	}
1376
1377	function firstChild() {
1378		return $this->first_child();
1379	}
1380
1381	function lastChild() {
1382		return $this->last_child();
1383	}
1384
1385	function nextSibling() {
1386		return $this->next_sibling();
1387	}
1388
1389	function previousSibling() {
1390		return $this->prev_sibling();
1391	}
1392
1393	function hasChildNodes() {
1394		return $this->has_child();
1395	}
1396
1397	function nodeName() {
1398		return $this->tag;
1399	}
1400
1401	function appendChild($node) {
1402		$node->parent($this);
1403		return $node;
1404	}
1405
1406}
1407
1408class simple_html_dom {
1409	public $root = null;
1410	public $nodes = array();
1411	public $callback = null;
1412	public $lowercase = false;
1413	public $original_size;
1414	public $size;
1415
1416	protected $pos;
1417	protected $doc;
1418	protected $char;
1419
1420	protected $cursor;
1421	protected $parent;
1422	protected $noise = array();
1423	protected $token_blank = " \t\r\n";
1424	protected $token_equal = ' =/>';
1425	protected $token_slash = " />\r\n\t";
1426	protected $token_attr = ' >';
1427
1428	public $_charset = '';
1429	public $_target_charset = '';
1430
1431	protected $default_br_text = '';
1432
1433	public $default_span_text = '';
1434
1435	protected $self_closing_tags = array(
1436		'area' => 1,
1437		'base' => 1,
1438		'br' => 1,
1439		'col' => 1,
1440		'embed' => 1,
1441		'hr' => 1,
1442		'img' => 1,
1443		'input' => 1,
1444		'link' => 1,
1445		'meta' => 1,
1446		'param' => 1,
1447		'source' => 1,
1448		'track' => 1,
1449		'wbr' => 1
1450	);
1451	protected $block_tags = array(
1452		'body' => 1,
1453		'div' => 1,
1454		'form' => 1,
1455		'root' => 1,
1456		'span' => 1,
1457		'table' => 1
1458	);
1459	protected $optional_closing_tags = array(
1460		// Not optional, see
1461		// https://www.w3.org/TR/html/textlevel-semantics.html#the-b-element
1462		'b' => array('b' => 1),
1463		'dd' => array('dd' => 1, 'dt' => 1),
1464		// Not optional, see
1465		// https://www.w3.org/TR/html/grouping-content.html#the-dl-element
1466		'dl' => array('dd' => 1, 'dt' => 1),
1467		'dt' => array('dd' => 1, 'dt' => 1),
1468		'li' => array('li' => 1),
1469		'optgroup' => array('optgroup' => 1, 'option' => 1),
1470		'option' => array('optgroup' => 1, 'option' => 1),
1471		'p' => array('p' => 1),
1472		'rp' => array('rp' => 1, 'rt' => 1),
1473		'rt' => array('rp' => 1, 'rt' => 1),
1474		'td' => array('td' => 1, 'th' => 1),
1475		'th' => array('td' => 1, 'th' => 1),
1476		'tr' => array('td' => 1, 'th' => 1, 'tr' => 1),
1477	);
1478
1479	function __construct(
1480		$str = null,
1481		$lowercase = true,
1482		$forceTagsClosed = true,
1483		$target_charset = DEFAULT_TARGET_CHARSET,
1484		$stripRN = true,
1485		$defaultBRText = DEFAULT_BR_TEXT,
1486		$defaultSpanText = DEFAULT_SPAN_TEXT,
1487		$options = 0) {
1488		if ($str) {
1489			if (preg_match('/^http:\/\//i', $str) || is_file($str)) {
1490				$this->load_file($str);
1491			} else {
1492				$this->load(
1493					$str,
1494					$lowercase,
1495					$stripRN,
1496					$defaultBRText,
1497					$defaultSpanText,
1498					$options
1499				);
1500			}
1501		}
1502		// Forcing tags to be closed implies that we don't trust the html, but
1503		// it can lead to parsing errors if we SHOULD trust the html.
1504		if (!$forceTagsClosed) {
1505			$this->optional_closing_array = array();
1506		}
1507
1508		$this->_target_charset = $target_charset;
1509	}
1510
1511	function __destruct() {
1512		$this->clear();
1513	}
1514
1515	function load(
1516		$str,
1517		$lowercase = true,
1518		$stripRN = true,
1519		$defaultBRText = DEFAULT_BR_TEXT,
1520		$defaultSpanText = DEFAULT_SPAN_TEXT,
1521		$options = 0) {
1522		global $debug_object;
1523
1524		// prepare
1525		$this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText);
1526
1527		// Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037
1528		// Script tags removal now preceeds style tag removal.
1529		// strip out <script> tags
1530		$this->remove_noise("'<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>'is");
1531		$this->remove_noise("'<\s*script\s*>(.*?)<\s*/\s*script\s*>'is");
1532
1533		// strip out the \r \n's if we are told to.
1534		if ($stripRN) {
1535			$this->doc = str_replace("\r", ' ', $this->doc);
1536			$this->doc = str_replace("\n", ' ', $this->doc);
1537
1538			// set the length of content since we have changed it.
1539			$this->size = strlen($this->doc);
1540		}
1541
1542		// strip out cdata
1543		$this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is", true);
1544		// strip out comments
1545		$this->remove_noise("'<!--(.*?)-->'is");
1546		// strip out <style> tags
1547		$this->remove_noise("'<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>'is");
1548		$this->remove_noise("'<\s*style\s*>(.*?)<\s*/\s*style\s*>'is");
1549		// strip out preformatted tags
1550		$this->remove_noise("'<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*>'is");
1551		// strip out server side scripts
1552		$this->remove_noise("'(<\?)(.*?)(\?>)'s", true);
1553
1554		if ($options & HDOM_SMARTY_AS_TEXT) { // Strip Smarty scripts
1555			$this->remove_noise("'(\{\w)(.*?)(\})'s", true);
1556		}
1557
1558		// parsing
1559		$this->parse();
1560		// end
1561		$this->root->_[HDOM_INFO_END] = $this->cursor;
1562		$this->parse_charset();
1563
1564		// make load function chainable
1565		return $this;
1566	}
1567
1568	function load_file() {
1569		$args = func_get_args();
1570
1571		if (($doc = call_user_func_array('file_get_contents', $args)) !== false) {
1572			$this->load($doc, true);
1573		} else {
1574			return false;
1575		}
1576	}
1577
1578	function set_callback($function_name) {
1579		$this->callback = $function_name;
1580	}
1581
1582	function remove_callback() {
1583		$this->callback = null;
1584	}
1585
1586	function save($filepath = '') {
1587		$ret = $this->root->innertext();
1588		if ($filepath !== '') {
1589			file_put_contents($filepath, $ret, LOCK_EX);
1590		}
1591		return $ret;
1592	}
1593
1594	function find($selector, $idx = null, $lowercase = false) {
1595		return $this->root->find($selector, $idx, $lowercase);
1596	}
1597
1598	function clear() {
1599		if (isset($this->nodes)) {
1600			foreach ($this->nodes as $n) {
1601				$n->clear();
1602				$n = null;
1603			}
1604		}
1605
1606		// This add next line is documented in the sourceforge repository.
1607		// 2977248 as a fix for ongoing memory leaks that occur even with the
1608		// use of clear.
1609		if (isset($this->children)) {
1610			foreach ($this->children as $n) {
1611				$n->clear();
1612				$n = null;
1613			}
1614		}
1615
1616		if (isset($this->parent)) {
1617			$this->parent->clear();
1618			unset($this->parent);
1619		}
1620
1621		if (isset($this->root)) {
1622			$this->root->clear();
1623			unset($this->root);
1624		}
1625
1626		unset($this->doc);
1627		unset($this->noise);
1628	}
1629
1630	function dump($show_attr = true) {
1631		$this->root->dump($show_attr);
1632	}
1633
1634	protected function prepare(
1635		$str, $lowercase = true,
1636		$defaultBRText = DEFAULT_BR_TEXT,
1637		$defaultSpanText = DEFAULT_SPAN_TEXT) {
1638		$this->clear();
1639
1640		$this->doc = trim($str);
1641		$this->size = strlen($this->doc);
1642		$this->original_size = $this->size; // original size of the html
1643		$this->pos = 0;
1644		$this->cursor = 1;
1645		$this->noise = array();
1646		$this->nodes = array();
1647		$this->lowercase = $lowercase;
1648		$this->default_br_text = $defaultBRText;
1649		$this->default_span_text = $defaultSpanText;
1650		$this->root = new simple_html_dom_node($this);
1651		$this->root->tag = 'root';
1652		$this->root->_[HDOM_INFO_BEGIN] = -1;
1653		$this->root->nodetype = HDOM_TYPE_ROOT;
1654		$this->parent = $this->root;
1655		if ($this->size > 0) {
1656			$this->char = $this->doc[0];
1657		}
1658	}
1659
1660	protected function parse() {
1661		while (true) {
1662			// Read next tag if there is no text between current position and the
1663			// next opening tag.
1664			if (($s = $this->copy_until_char('<')) === '') {
1665				if ($this->read_tag()) {
1666					continue;
1667				} else {
1668					return true;
1669				}
1670			}
1671
1672			// Add a text node for text between tags
1673			$node = new simple_html_dom_node($this);
1674			++$this->cursor;
1675			$node->_[HDOM_INFO_TEXT] = $s;
1676			$this->link_nodes($node, false);
1677		}
1678	}
1679
1680	protected function parse_charset() {
1681		global $debug_object;
1682
1683		$charset = null;
1684
1685		if (function_exists('get_last_retrieve_url_contents_content_type')) {
1686			$contentTypeHeader = get_last_retrieve_url_contents_content_type();
1687			$success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches);
1688			if ($success) {
1689				$charset = $matches[1];
1690				if (is_object($debug_object)) {
1691					$debug_object->debug_log(2,
1692						'header content-type found charset of: '
1693						. $charset
1694					);
1695				}
1696			}
1697		}
1698
1699		if (empty($charset)) {
1700			// https://www.w3.org/TR/html/document-metadata.html#statedef-http-equiv-content-type
1701			$el = $this->root->find('meta[http-equiv=Content-Type]', 0, true);
1702
1703			if (!empty($el)) {
1704				$fullvalue = $el->content;
1705				if (is_object($debug_object)) {
1706					$debug_object->debug_log(2,
1707						'meta content-type tag found'
1708						. $fullvalue
1709					);
1710				}
1711
1712				if (!empty($fullvalue)) {
1713					$success = preg_match(
1714						'/charset=(.+)/i',
1715						$fullvalue,
1716						$matches
1717					);
1718
1719					if ($success) {
1720						$charset = $matches[1];
1721					} else {
1722						// If there is a meta tag, and they don't specify the
1723						// character set, research says that it's typically
1724						// ISO-8859-1
1725						if (is_object($debug_object)) {
1726							$debug_object->debug_log(2,
1727								'meta content-type tag couldn\'t be parsed. using iso-8859 default.'
1728							);
1729						}
1730
1731						$charset = 'ISO-8859-1';
1732					}
1733				}
1734			}
1735		}
1736
1737		if (empty($charset)) {
1738			// https://www.w3.org/TR/html/document-metadata.html#character-encoding-declaration
1739			if ($meta = $this->root->find('meta[charset]', 0)) {
1740				$charset = $meta->charset;
1741				if (is_object($debug_object)) {
1742					$debug_object->debug_log(2, 'meta charset: ' . $charset);
1743				}
1744			}
1745		}
1746
1747		if (empty($charset)) {
1748			// Try to guess the charset based on the content
1749			// Requires Multibyte String (mbstring) support (optional)
1750			if (function_exists('mb_detect_encoding')) {
1751				/**
1752				 * mb_detect_encoding() is not intended to distinguish between
1753				 * charsets, especially single-byte charsets. Its primary
1754				 * purpose is to detect which multibyte encoding is in use,
1755				 * i.e. UTF-8, UTF-16, shift-JIS, etc.
1756				 *
1757				 * -- https://bugs.php.net/bug.php?id=38138
1758				 *
1759				 * Adding both CP1251/ISO-8859-5 and CP1252/ISO-8859-1 will
1760				 * always result in CP1251/ISO-8859-5 and vice versa.
1761				 *
1762				 * Thus, only detect if it's either UTF-8 or CP1252/ISO-8859-1
1763				 * to stay compatible.
1764				 */
1765				$encoding = mb_detect_encoding(
1766					$this->doc,
1767					array('UTF-8', 'CP1252', 'ISO-8859-1')
1768				);
1769
1770				if ($encoding === 'CP1252' || $encoding === 'ISO-8859-1') {
1771					// Due to a limitation of mb_detect_encoding
1772					// 'CP1251'/'ISO-8859-5' will be detected as
1773					// 'CP1252'/'ISO-8859-1'. This will cause iconv to fail, in
1774					// which case we can simply assume it is the other charset.
1775					if (!@iconv('CP1252', 'UTF-8', $this->doc)) {
1776						$encoding = 'CP1251';
1777					}
1778				}
1779
1780				if ($encoding !== false) {
1781					$charset = $encoding;
1782					if (is_object($debug_object)) {
1783						$debug_object->debug_log(2, 'mb_detect: ' . $charset);
1784					}
1785				}
1786			}
1787		}
1788
1789		if (empty($charset)) {
1790			// Assume it's UTF-8 as it is the most likely charset to be used
1791			$charset = 'UTF-8';
1792			if (is_object($debug_object)) {
1793				$debug_object->debug_log(2, 'No match found, assume ' . $charset);
1794			}
1795		}
1796
1797		// Since CP1252 is a superset, if we get one of it's subsets, we want
1798		// it instead.
1799		if ((strtolower($charset) == 'iso-8859-1')
1800			|| (strtolower($charset) == 'latin1')
1801			|| (strtolower($charset) == 'latin-1')) {
1802			$charset = 'CP1252';
1803			if (is_object($debug_object)) {
1804				$debug_object->debug_log(2,
1805					'replacing ' . $charset . ' with CP1252 as its a superset'
1806				);
1807			}
1808		}
1809
1810		if (is_object($debug_object)) {
1811			$debug_object->debug_log(1, 'EXIT - ' . $charset);
1812		}
1813
1814		return $this->_charset = $charset;
1815	}
1816
1817	protected function read_tag() {
1818		// Set end position if no further tags found
1819		if ($this->char !== '<') {
1820			$this->root->_[HDOM_INFO_END] = $this->cursor;
1821			return false;
1822		}
1823
1824		$begin_tag_pos = $this->pos;
1825		$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1826
1827		// end tag
1828		if ($this->char === '/') {
1829			$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1830
1831			// Skip whitespace in end tags (i.e. in "</   html>")
1832			$this->skip($this->token_blank);
1833			$tag = $this->copy_until_char('>');
1834
1835			// Skip attributes in end tags
1836			if (($pos = strpos($tag, ' ')) !== false) {
1837				$tag = substr($tag, 0, $pos);
1838			}
1839
1840			$parent_lower = strtolower($this->parent->tag);
1841			$tag_lower = strtolower($tag);
1842
1843			// The end tag is supposed to close the parent tag. Handle situations
1844			// when it doesn't
1845			if ($parent_lower !== $tag_lower) {
1846				// Parent tag does not have to be closed necessarily (optional closing tag)
1847				// Current tag is a block tag, so it may close an ancestor
1848				if (isset($this->optional_closing_tags[$parent_lower])
1849					&& isset($this->block_tags[$tag_lower])) {
1850
1851					$this->parent->_[HDOM_INFO_END] = 0;
1852					$org_parent = $this->parent;
1853
1854					// Traverse ancestors to find a matching opening tag
1855					// Stop at root node
1856					while (($this->parent->parent)
1857						&& strtolower($this->parent->tag) !== $tag_lower
1858					) {
1859						$this->parent = $this->parent->parent;
1860					}
1861
1862					// If we don't have a match add current tag as text node
1863					if (strtolower($this->parent->tag) !== $tag_lower) {
1864						$this->parent = $org_parent; // restore origonal parent
1865
1866						if ($this->parent->parent) {
1867							$this->parent = $this->parent->parent;
1868						}
1869
1870						$this->parent->_[HDOM_INFO_END] = $this->cursor;
1871						return $this->as_text_node($tag);
1872					}
1873				} elseif (($this->parent->parent)
1874					&& isset($this->block_tags[$tag_lower])
1875				) {
1876					// Grandparent exists and current tag is a block tag, so our
1877					// parent doesn't have an end tag
1878					$this->parent->_[HDOM_INFO_END] = 0; // No end tag
1879					$org_parent = $this->parent;
1880
1881					// Traverse ancestors to find a matching opening tag
1882					// Stop at root node
1883					while (($this->parent->parent)
1884						&& strtolower($this->parent->tag) !== $tag_lower
1885					) {
1886						$this->parent = $this->parent->parent;
1887					}
1888
1889					// If we don't have a match add current tag as text node
1890					if (strtolower($this->parent->tag) !== $tag_lower) {
1891						$this->parent = $org_parent; // restore origonal parent
1892						$this->parent->_[HDOM_INFO_END] = $this->cursor;
1893						return $this->as_text_node($tag);
1894					}
1895				} elseif (($this->parent->parent)
1896					&& strtolower($this->parent->parent->tag) === $tag_lower
1897				) { // Grandparent exists and current tag closes it
1898					$this->parent->_[HDOM_INFO_END] = 0;
1899					$this->parent = $this->parent->parent;
1900				} else { // Random tag, add as text node
1901					return $this->as_text_node($tag);
1902				}
1903			}
1904
1905			// Set end position of parent tag to current cursor position
1906			$this->parent->_[HDOM_INFO_END] = $this->cursor;
1907
1908			if ($this->parent->parent) {
1909				$this->parent = $this->parent->parent;
1910			}
1911
1912			$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1913			return true;
1914		}
1915
1916		// start tag
1917		$node = new simple_html_dom_node($this);
1918		$node->_[HDOM_INFO_BEGIN] = $this->cursor;
1919		++$this->cursor;
1920		$tag = $this->copy_until($this->token_slash); // Get tag name
1921		$node->tag_start = $begin_tag_pos;
1922
1923		// doctype, cdata & comments...
1924		// <!DOCTYPE html>
1925		// <![CDATA[ ... ]]>
1926		// <!-- Comment -->
1927		if (isset($tag[0]) && $tag[0] === '!') {
1928			$node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until_char('>');
1929
1930			if (isset($tag[2]) && $tag[1] === '-' && $tag[2] === '-') { // Comment ("<!--")
1931				$node->nodetype = HDOM_TYPE_COMMENT;
1932				$node->tag = 'comment';
1933			} else { // Could be doctype or CDATA but we don't care
1934				$node->nodetype = HDOM_TYPE_UNKNOWN;
1935				$node->tag = 'unknown';
1936			}
1937
1938			if ($this->char === '>') {
1939				$node->_[HDOM_INFO_TEXT] .= '>';
1940			}
1941
1942			$this->link_nodes($node, true);
1943			$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1944			return true;
1945		}
1946
1947		// The start tag cannot contain another start tag, if so add as text
1948		// i.e. "<<html>"
1949		if ($pos = strpos($tag, '<') !== false) {
1950			$tag = '<' . substr($tag, 0, -1);
1951			$node->_[HDOM_INFO_TEXT] = $tag;
1952			$this->link_nodes($node, false);
1953			$this->char = $this->doc[--$this->pos]; // prev
1954			return true;
1955		}
1956
1957		// Handle invalid tag names (i.e. "<html#doc>")
1958		if (!preg_match('/^\w[\w:-]*$/', $tag)) {
1959			$node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>');
1960
1961			// Next char is the beginning of a new tag, don't touch it.
1962			if ($this->char === '<') {
1963				$this->link_nodes($node, false);
1964				return true;
1965			}
1966
1967			// Next char closes current tag, add and be done with it.
1968			if ($this->char === '>') {
1969				$node->_[HDOM_INFO_TEXT] .= '>';
1970			}
1971			$this->link_nodes($node, false);
1972			$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1973			return true;
1974		}
1975
1976		// begin tag, add new node
1977		$node->nodetype = HDOM_TYPE_ELEMENT;
1978		$tag_lower = strtolower($tag);
1979		$node->tag = ($this->lowercase) ? $tag_lower : $tag;
1980
1981		// handle optional closing tags
1982		if (isset($this->optional_closing_tags[$tag_lower])) {
1983			// Traverse ancestors to close all optional closing tags
1984			while (isset($this->optional_closing_tags[$tag_lower][strtolower($this->parent->tag)])) {
1985				$this->parent->_[HDOM_INFO_END] = 0;
1986				$this->parent = $this->parent->parent;
1987			}
1988			$node->parent = $this->parent;
1989		}
1990
1991		$guard = 0; // prevent infinity loop
1992
1993		// [0] Space between tag and first attribute
1994		$space = array($this->copy_skip($this->token_blank), '', '');
1995
1996		// attributes
1997		do {
1998			// Everything until the first equal sign should be the attribute name
1999			$name = $this->copy_until($this->token_equal);
2000
2001			if ($name === '' && $this->char !== null && $space[0] === '') {
2002				break;
2003			}
2004
2005			if ($guard === $this->pos) { // Escape infinite loop
2006				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2007				continue;
2008			}
2009
2010			$guard = $this->pos;
2011
2012			// handle endless '<'
2013			// Out of bounds before the tag ended
2014			if ($this->pos >= $this->size - 1 && $this->char !== '>') {
2015				$node->nodetype = HDOM_TYPE_TEXT;
2016				$node->_[HDOM_INFO_END] = 0;
2017				$node->_[HDOM_INFO_TEXT] = '<' . $tag . $space[0] . $name;
2018				$node->tag = 'text';
2019				$this->link_nodes($node, false);
2020				return true;
2021			}
2022
2023			// handle mismatch '<'
2024			// Attributes cannot start after opening tag
2025			if ($this->doc[$this->pos - 1] == '<') {
2026				$node->nodetype = HDOM_TYPE_TEXT;
2027				$node->tag = 'text';
2028				$node->attr = array();
2029				$node->_[HDOM_INFO_END] = 0;
2030				$node->_[HDOM_INFO_TEXT] = substr(
2031					$this->doc,
2032					$begin_tag_pos,
2033					$this->pos - $begin_tag_pos - 1
2034				);
2035				$this->pos -= 2;
2036				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2037				$this->link_nodes($node, false);
2038				return true;
2039			}
2040
2041			if ($name !== '/' && $name !== '') { // this is a attribute name
2042				// [1] Whitespace after attribute name
2043				$space[1] = $this->copy_skip($this->token_blank);
2044
2045				$name = $this->restore_noise($name); // might be a noisy name
2046
2047				if ($this->lowercase) {
2048					$name = strtolower($name);
2049				}
2050
2051				if ($this->char === '=') { // attribute with value
2052					$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2053					$this->parse_attr($node, $name, $space); // get attribute value
2054				} else {
2055					//no value attr: nowrap, checked selected...
2056					$node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_NO;
2057					$node->attr[$name] = true;
2058					if ($this->char != '>') {
2059						$this->char = $this->doc[--$this->pos];
2060					} // prev
2061				}
2062
2063				$node->_[HDOM_INFO_SPACE][] = $space;
2064
2065				// prepare for next attribute
2066				$space = array(
2067					$this->copy_skip($this->token_blank),
2068					'',
2069					''
2070				);
2071			} else { // no more attributes
2072				break;
2073			}
2074		} while ($this->char !== '>' && $this->char !== '/'); // go until the tag ended
2075
2076		$this->link_nodes($node, true);
2077		$node->_[HDOM_INFO_ENDSPACE] = $space[0];
2078
2079		// handle empty tags (i.e. "<div/>")
2080		if ($this->copy_until_char('>') === '/') {
2081			$node->_[HDOM_INFO_ENDSPACE] .= '/';
2082			$node->_[HDOM_INFO_END] = 0;
2083		} else {
2084			// reset parent
2085			if (!isset($this->self_closing_tags[strtolower($node->tag)])) {
2086				$this->parent = $node;
2087			}
2088		}
2089
2090		$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2091
2092		// If it's a BR tag, we need to set it's text to the default text.
2093		// This way when we see it in plaintext, we can generate formatting that the user wants.
2094		// since a br tag never has sub nodes, this works well.
2095		if ($node->tag === 'br') {
2096			$node->_[HDOM_INFO_INNER] = $this->default_br_text;
2097		}
2098
2099		return true;
2100	}
2101
2102	protected function parse_attr($node, $name, &$space) {
2103		$is_duplicate = isset($node->attr[$name]);
2104
2105		if (!$is_duplicate) // Copy whitespace between "=" and value
2106			$space[2] = $this->copy_skip($this->token_blank);
2107
2108		switch ($this->char) {
2109			case '"':
2110				$quote_type = HDOM_QUOTE_DOUBLE;
2111				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2112				$value = $this->copy_until_char('"');
2113				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2114				break;
2115			case '\'':
2116				$quote_type = HDOM_QUOTE_SINGLE;
2117				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2118				$value = $this->copy_until_char('\'');
2119				$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2120				break;
2121			default:
2122				$quote_type = HDOM_QUOTE_NO;
2123				$value = $this->copy_until($this->token_attr);
2124		}
2125
2126		$value = $this->restore_noise($value);
2127
2128		// PaperG: Attributes should not have \r or \n in them, that counts as
2129		// html whitespace.
2130		$value = str_replace("\r", '', $value);
2131		$value = str_replace("\n", '', $value);
2132
2133		// PaperG: If this is a "class" selector, lets get rid of the preceeding
2134		// and trailing space since some people leave it in the multi class case.
2135		if ($name === 'class') {
2136			$value = trim($value);
2137		}
2138
2139		if (!$is_duplicate) {
2140			$node->_[HDOM_INFO_QUOTE][] = $quote_type;
2141			$node->attr[$name] = $value;
2142		}
2143	}
2144
2145	protected function link_nodes(&$node, $is_child) {
2146		$node->parent = $this->parent;
2147		$this->parent->nodes[] = $node;
2148		if ($is_child) {
2149			$this->parent->children[] = $node;
2150		}
2151	}
2152
2153	protected function as_text_node($tag) {
2154		$node = new simple_html_dom_node($this);
2155		++$this->cursor;
2156		$node->_[HDOM_INFO_TEXT] = '</' . $tag . '>';
2157		$this->link_nodes($node, false);
2158		$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2159		return true;
2160	}
2161
2162	protected function skip($chars) {
2163		$this->pos += strspn($this->doc, $chars, $this->pos);
2164		$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2165	}
2166
2167	protected function copy_skip($chars) {
2168		$pos = $this->pos;
2169		$len = strspn($this->doc, $chars, $pos);
2170		$this->pos += $len;
2171		$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2172		if ($len === 0) {
2173			return '';
2174		}
2175		return substr($this->doc, $pos, $len);
2176	}
2177
2178	protected function copy_until($chars) {
2179		$pos = $this->pos;
2180		$len = strcspn($this->doc, $chars, $pos);
2181		$this->pos += $len;
2182		$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2183		return substr($this->doc, $pos, $len);
2184	}
2185
2186	protected function copy_until_char($char) {
2187		if ($this->char === null) {
2188			return '';
2189		}
2190
2191		if (($pos = strpos($this->doc, $char, $this->pos)) === false) {
2192			$ret = substr($this->doc, $this->pos, $this->size - $this->pos);
2193			$this->char = null;
2194			$this->pos = $this->size;
2195			return $ret;
2196		}
2197
2198		if ($pos === $this->pos) {
2199			return '';
2200		}
2201
2202		$pos_old = $this->pos;
2203		$this->char = $this->doc[$pos];
2204		$this->pos = $pos;
2205		return substr($this->doc, $pos_old, $pos - $pos_old);
2206	}
2207
2208	protected function remove_noise($pattern, $remove_tag = false) {
2209		global $debug_object;
2210		if (is_object($debug_object)) {
2211			$debug_object->debug_log_entry(1);
2212		}
2213
2214		$count = preg_match_all(
2215			$pattern,
2216			$this->doc,
2217			$matches,
2218			PREG_SET_ORDER | PREG_OFFSET_CAPTURE
2219		);
2220
2221		for ($i = $count - 1; $i > -1; --$i) {
2222			$key = '___noise___' . sprintf('% 5d', count($this->noise) + 1000);
2223
2224			if (is_object($debug_object)) {
2225				$debug_object->debug_log(2, 'key is: ' . $key);
2226			}
2227
2228			$idx = ($remove_tag) ? 0 : 1; // 0 = entire match, 1 = submatch
2229			$this->noise[$key] = $matches[$i][$idx][0];
2230			$this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
2231		}
2232
2233		// reset the length of content
2234		$this->size = strlen($this->doc);
2235
2236		if ($this->size > 0) {
2237			$this->char = $this->doc[0];
2238		}
2239	}
2240
2241	function restore_noise($text) {
2242		global $debug_object;
2243		if (is_object($debug_object)) {
2244			$debug_object->debug_log_entry(1);
2245		}
2246
2247		while (($pos = strpos($text, '___noise___')) !== false) {
2248			// Sometimes there is a broken piece of markup, and we don't GET the
2249			// pos+11 etc... token which indicates a problem outside of us...
2250
2251			// todo: "___noise___1000" (or any number with four or more digits)
2252			// in the DOM causes an infinite loop which could be utilized by
2253			// malicious software
2254			if (strlen($text) > $pos + 15) {
2255				$key = '___noise___'
2256					. $text[$pos + 11]
2257					. $text[$pos + 12]
2258					. $text[$pos + 13]
2259					. $text[$pos + 14]
2260					. $text[$pos + 15];
2261
2262				if (is_object($debug_object)) {
2263					$debug_object->debug_log(2, 'located key of: ' . $key);
2264				}
2265
2266				if (isset($this->noise[$key])) {
2267					$text = substr($text, 0, $pos)
2268						. $this->noise[$key]
2269						. substr($text, $pos + 16);
2270				} else {
2271					// do this to prevent an infinite loop.
2272					$text = substr($text, 0, $pos)
2273						. 'UNDEFINED NOISE FOR KEY: '
2274						. $key
2275						. substr($text, $pos + 16);
2276				}
2277			} else {
2278				// There is no valid key being given back to us... We must get
2279				// rid of the ___noise___ or we will have a problem.
2280				$text = substr($text, 0, $pos)
2281					. 'NO NUMERIC NOISE KEY'
2282					. substr($text, $pos + 11);
2283			}
2284		}
2285		return $text;
2286	}
2287
2288	function search_noise($text) {
2289		global $debug_object;
2290		if (is_object($debug_object)) {
2291			$debug_object->debug_log_entry(1);
2292		}
2293
2294		foreach ($this->noise as $noiseElement) {
2295			if (strpos($noiseElement, $text) !== false) {
2296				return $noiseElement;
2297			}
2298		}
2299	}
2300
2301	function __toString() {
2302		return $this->root->innertext();
2303	}
2304
2305	function __get($name) {
2306		switch ($name) {
2307			case 'outertext':
2308				return $this->root->innertext();
2309			case 'innertext':
2310				return $this->root->innertext();
2311			case 'plaintext':
2312				return $this->root->text();
2313			case 'charset':
2314				return $this->_charset;
2315			case 'target_charset':
2316				return $this->_target_charset;
2317		}
2318	}
2319
2320	function childNodes($idx = -1) {
2321		return $this->root->childNodes($idx);
2322	}
2323
2324	function firstChild() {
2325		return $this->root->first_child();
2326	}
2327
2328	function lastChild() {
2329		return $this->root->last_child();
2330	}
2331
2332	function createElement($name, $value = null) {
2333		return @str_get_html("<$name>$value</$name>")->firstChild();
2334	}
2335
2336	function createTextNode($value) {
2337		return @end(str_get_html($value)->nodes);
2338	}
2339
2340	function getElementById($id) {
2341		return $this->find("#$id", 0);
2342	}
2343
2344	function getElementsById($id, $idx = null) {
2345		return $this->find("#$id", $idx);
2346	}
2347
2348	function getElementByTagName($name) {
2349		return $this->find($name, 0);
2350	}
2351
2352	function getElementsByTagName($name, $idx = -1) {
2353		return $this->find($name, $idx);
2354	}
2355
2356	function loadFile() {
2357		$args = func_get_args();
2358		$this->load_file($args);
2359	}
2360}