1<?php
2
3/**
4 * Abstract class for a set of proprietary modules that clean up (tidy)
5 * poorly written HTML.
6 * @todo Figure out how to protect some of these methods/properties
7 */
8class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
9{
10    /**
11     * List of supported levels.
12     * Index zero is a special case "no fixes" level.
13     * @type array
14     */
15    public $levels = array(0 => 'none', 'light', 'medium', 'heavy');
16
17    /**
18     * Default level to place all fixes in.
19     * Disabled by default.
20     * @type string
21     */
22    public $defaultLevel = null;
23
24    /**
25     * Lists of fixes used by getFixesForLevel().
26     * Format is:
27     *      HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2');
28     * @type array
29     */
30    public $fixesForLevel = array(
31        'light' => array(),
32        'medium' => array(),
33        'heavy' => array()
34    );
35
36    /**
37     * Lazy load constructs the module by determining the necessary
38     * fixes to create and then delegating to the populate() function.
39     * @param HTMLPurifier_Config $config
40     * @todo Wildcard matching and error reporting when an added or
41     *       subtracted fix has no effect.
42     */
43    public function setup($config)
44    {
45        // create fixes, initialize fixesForLevel
46        $fixes = $this->makeFixes();
47        $this->makeFixesForLevel($fixes);
48
49        // figure out which fixes to use
50        $level = $config->get('HTML.TidyLevel');
51        $fixes_lookup = $this->getFixesForLevel($level);
52
53        // get custom fix declarations: these need namespace processing
54        $add_fixes = $config->get('HTML.TidyAdd');
55        $remove_fixes = $config->get('HTML.TidyRemove');
56
57        foreach ($fixes as $name => $fix) {
58            // needs to be refactored a little to implement globbing
59            if (isset($remove_fixes[$name]) ||
60                (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name]))) {
61                unset($fixes[$name]);
62            }
63        }
64
65        // populate this module with necessary fixes
66        $this->populate($fixes);
67    }
68
69    /**
70     * Retrieves all fixes per a level, returning fixes for that specific
71     * level as well as all levels below it.
72     * @param string $level level identifier, see $levels for valid values
73     * @return array Lookup up table of fixes
74     */
75    public function getFixesForLevel($level)
76    {
77        if ($level == $this->levels[0]) {
78            return array();
79        }
80        $activated_levels = array();
81        for ($i = 1, $c = count($this->levels); $i < $c; $i++) {
82            $activated_levels[] = $this->levels[$i];
83            if ($this->levels[$i] == $level) {
84                break;
85            }
86        }
87        if ($i == $c) {
88            trigger_error(
89                'Tidy level ' . htmlspecialchars($level) . ' not recognized',
90                E_USER_WARNING
91            );
92            return array();
93        }
94        $ret = array();
95        foreach ($activated_levels as $level) {
96            foreach ($this->fixesForLevel[$level] as $fix) {
97                $ret[$fix] = true;
98            }
99        }
100        return $ret;
101    }
102
103    /**
104     * Dynamically populates the $fixesForLevel member variable using
105     * the fixes array. It may be custom overloaded, used in conjunction
106     * with $defaultLevel, or not used at all.
107     * @param array $fixes
108     */
109    public function makeFixesForLevel($fixes)
110    {
111        if (!isset($this->defaultLevel)) {
112            return;
113        }
114        if (!isset($this->fixesForLevel[$this->defaultLevel])) {
115            trigger_error(
116                'Default level ' . $this->defaultLevel . ' does not exist',
117                E_USER_ERROR
118            );
119            return;
120        }
121        $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes);
122    }
123
124    /**
125     * Populates the module with transforms and other special-case code
126     * based on a list of fixes passed to it
127     * @param array $fixes Lookup table of fixes to activate
128     */
129    public function populate($fixes)
130    {
131        foreach ($fixes as $name => $fix) {
132            // determine what the fix is for
133            list($type, $params) = $this->getFixType($name);
134            switch ($type) {
135                case 'attr_transform_pre':
136                case 'attr_transform_post':
137                    $attr = $params['attr'];
138                    if (isset($params['element'])) {
139                        $element = $params['element'];
140                        if (empty($this->info[$element])) {
141                            $e = $this->addBlankElement($element);
142                        } else {
143                            $e = $this->info[$element];
144                        }
145                    } else {
146                        $type = "info_$type";
147                        $e = $this;
148                    }
149                    $e->{$type}[$attr] = $fix;
150                    break;
151                case 'tag_transform':
152                    $this->info_tag_transform[$params['element']] = $fix;
153                    break;
154                case 'child':
155                case 'content_model_type':
156                    $element = $params['element'];
157                    if (empty($this->info[$element])) {
158                        $e = $this->addBlankElement($element);
159                    } else {
160                        $e = $this->info[$element];
161                    }
162                    $e->$type = $fix;
163                    break;
164                default:
165                    trigger_error("Fix type $type not supported", E_USER_ERROR);
166                    break;
167            }
168        }
169    }
170
171    /**
172     * Parses a fix name and determines what kind of fix it is, as well
173     * as other information defined by the fix
174     * @param $name String name of fix
175     * @return array(string $fix_type, array $fix_parameters)
176     * @note $fix_parameters is type dependant, see populate() for usage
177     *       of these parameters
178     */
179    public function getFixType($name)
180    {
181        // parse it
182        $property = $attr = null;
183        if (strpos($name, '#') !== false) {
184            list($name, $property) = explode('#', $name);
185        }
186        if (strpos($name, '@') !== false) {
187            list($name, $attr) = explode('@', $name);
188        }
189
190        // figure out the parameters
191        $params = array();
192        if ($name !== '') {
193            $params['element'] = $name;
194        }
195        if (!is_null($attr)) {
196            $params['attr'] = $attr;
197        }
198
199        // special case: attribute transform
200        if (!is_null($attr)) {
201            if (is_null($property)) {
202                $property = 'pre';
203            }
204            $type = 'attr_transform_' . $property;
205            return array($type, $params);
206        }
207
208        // special case: tag transform
209        if (is_null($property)) {
210            return array('tag_transform', $params);
211        }
212
213        return array($property, $params);
214
215    }
216
217    /**
218     * Defines all fixes the module will perform in a compact
219     * associative array of fix name to fix implementation.
220     * @return array
221     */
222    public function makeFixes()
223    {
224    }
225}
226
227// vim: et sw=4 sts=4
228