1<?php
2class DataFilterDoctype extends DataFilter {
3  function DataFilterDoctype() { }
4
5  function process(&$data) {
6    $html = $data->get_content();
7
8    $xml_declaration = "<\?.*?\?>";
9    $doctype         = "<!DOCTYPE.*?>";
10
11    /**
12     * DOCTYPE declaration should be at the very beginning of the document
13     * (with the only exception of XML declaration).
14     *
15     * XML declaration is optional; XML declaration may be surrounded with whitespace
16     */
17
18    if (preg_match("#^(?:\s*$xml_declaration\s*)?($doctype)#", $html, $matches)) {
19      $doctype_match = $matches[1];
20
21      /**
22       * remove extra spaces from doctype text; also, DOCTYPE may contain
23       * \n and \r character in its whitespace parts. Here, we replace them
24       * with one single space, converting it to the "normalized" form.
25       */
26      $doctype_match = preg_replace("/\s+/"," ",$doctype_match);
27
28
29      /**
30       * Match doctype agaist standard doctypes
31       */
32      switch ($doctype_match) {
33      case '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">':
34      case '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">':
35      case '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd">':
36        $GLOBALS['g_config']['mode'] = 'html';
37        return $data;
38      case '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">':
39      case '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">':
40      case '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">':
41        $GLOBALS['g_config']['mode'] = 'xhtml';
42        return $data;
43      };
44
45    };
46
47    /**
48     * No DOCTYPE found; fall back to quirks mode
49     */
50
51    $GLOBALS['g_config']['mode'] = 'quirks';
52    return $data;
53  }
54}
55?>