1<?php 2class DataFilterDoctype extends DataFilter { 3 function DataFilterDoctype() { } 4 5 function process(&$data) { 6 $html = $data->get_content(); 7 8 $xml_declaration = "<\?.*?\?>"; 9 $doctype = "<!DOCTYPE.*?>"; 10 11 /** 12 * DOCTYPE declaration should be at the very beginning of the document 13 * (with the only exception of XML declaration). 14 * 15 * XML declaration is optional; XML declaration may be surrounded with whitespace 16 */ 17 18 if (preg_match("#^(?:\s*$xml_declaration\s*)?($doctype)#", $html, $matches)) { 19 $doctype_match = $matches[1]; 20 21 /** 22 * remove extra spaces from doctype text; also, DOCTYPE may contain 23 * \n and \r character in its whitespace parts. Here, we replace them 24 * with one single space, converting it to the "normalized" form. 25 */ 26 $doctype_match = preg_replace("/\s+/"," ",$doctype_match); 27 28 29 /** 30 * Match doctype agaist standard doctypes 31 */ 32 switch ($doctype_match) { 33 case '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">': 34 case '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">': 35 case '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd">': 36 $GLOBALS['g_config']['mode'] = 'html'; 37 return $data; 38 case '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">': 39 case '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">': 40 case '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">': 41 $GLOBALS['g_config']['mode'] = 'xhtml'; 42 return $data; 43 }; 44 45 }; 46 47 /** 48 * No DOCTYPE found; fall back to quirks mode 49 */ 50 51 $GLOBALS['g_config']['mode'] = 'quirks'; 52 return $data; 53 } 54} 55?>