1<?php
2
3/**
4 * Removes all unrecognized tags from the list of tokens.
5 *
6 * This strategy iterates through all the tokens and removes unrecognized
7 * tokens. If a token is not recognized but a TagTransform is defined for
8 * that element, the element will be transformed accordingly.
9 */
10
11class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
12{
13
14    /**
15     * @param HTMLPurifier_Token[] $tokens
16     * @param HTMLPurifier_Config $config
17     * @param HTMLPurifier_Context $context
18     * @return array|HTMLPurifier_Token[]
19     */
20    public function execute($tokens, $config, $context)
21    {
22        $definition = $config->getHTMLDefinition();
23        $generator = new HTMLPurifier_Generator($config, $context);
24        $result = array();
25
26        $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
27        $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
28
29        // currently only used to determine if comments should be kept
30        $trusted = $config->get('HTML.Trusted');
31        $comment_lookup = $config->get('HTML.AllowedComments');
32        $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
33        $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
34
35        $remove_script_contents = $config->get('Core.RemoveScriptContents');
36        $hidden_elements = $config->get('Core.HiddenElements');
37
38        // remove script contents compatibility
39        if ($remove_script_contents === true) {
40            $hidden_elements['script'] = true;
41        } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
42            unset($hidden_elements['script']);
43        }
44
45        $attr_validator = new HTMLPurifier_AttrValidator();
46
47        // removes tokens until it reaches a closing tag with its value
48        $remove_until = false;
49
50        // converts comments into text tokens when this is equal to a tag name
51        $textify_comments = false;
52
53        $token = false;
54        $context->register('CurrentToken', $token);
55
56        $e = false;
57        if ($config->get('Core.CollectErrors')) {
58            $e =& $context->get('ErrorCollector');
59        }
60
61        foreach ($tokens as $token) {
62            if ($remove_until) {
63                if (empty($token->is_tag) || $token->name !== $remove_until) {
64                    continue;
65                }
66            }
67            if (!empty($token->is_tag)) {
68                // DEFINITION CALL
69
70                // before any processing, try to transform the element
71                if (isset($definition->info_tag_transform[$token->name])) {
72                    $original_name = $token->name;
73                    // there is a transformation for this tag
74                    // DEFINITION CALL
75                    $token = $definition->
76                        info_tag_transform[$token->name]->transform($token, $config, $context);
77                    if ($e) {
78                        $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
79                    }
80                }
81
82                if (isset($definition->info[$token->name])) {
83                    // mostly everything's good, but
84                    // we need to make sure required attributes are in order
85                    if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
86                        $definition->info[$token->name]->required_attr &&
87                        ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
88                    ) {
89                        $attr_validator->validateToken($token, $config, $context);
90                        $ok = true;
91                        foreach ($definition->info[$token->name]->required_attr as $name) {
92                            if (!isset($token->attr[$name])) {
93                                $ok = false;
94                                break;
95                            }
96                        }
97                        if (!$ok) {
98                            if ($e) {
99                                $e->send(
100                                    E_ERROR,
101                                    'Strategy_RemoveForeignElements: Missing required attribute',
102                                    $name
103                                );
104                            }
105                            continue;
106                        }
107                        $token->armor['ValidateAttributes'] = true;
108                    }
109
110                    if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
111                        $textify_comments = $token->name;
112                    } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
113                        $textify_comments = false;
114                    }
115
116                } elseif ($escape_invalid_tags) {
117                    // invalid tag, generate HTML representation and insert in
118                    if ($e) {
119                        $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
120                    }
121                    $token = new HTMLPurifier_Token_Text(
122                        $generator->generateFromToken($token)
123                    );
124                } else {
125                    // check if we need to destroy all of the tag's children
126                    // CAN BE GENERICIZED
127                    if (isset($hidden_elements[$token->name])) {
128                        if ($token instanceof HTMLPurifier_Token_Start) {
129                            $remove_until = $token->name;
130                        } elseif ($token instanceof HTMLPurifier_Token_Empty) {
131                            // do nothing: we're still looking
132                        } else {
133                            $remove_until = false;
134                        }
135                        if ($e) {
136                            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
137                        }
138                    } else {
139                        if ($e) {
140                            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
141                        }
142                    }
143                    continue;
144                }
145            } elseif ($token instanceof HTMLPurifier_Token_Comment) {
146                // textify comments in script tags when they are allowed
147                if ($textify_comments !== false) {
148                    $data = $token->data;
149                    $token = new HTMLPurifier_Token_Text($data);
150                } elseif ($trusted || $check_comments) {
151                    // always cleanup comments
152                    $trailing_hyphen = false;
153                    if ($e) {
154                        // perform check whether or not there's a trailing hyphen
155                        if (substr($token->data, -1) == '-') {
156                            $trailing_hyphen = true;
157                        }
158                    }
159                    $token->data = rtrim($token->data, '-');
160                    $found_double_hyphen = false;
161                    while (strpos($token->data, '--') !== false) {
162                        $found_double_hyphen = true;
163                        $token->data = str_replace('--', '-', $token->data);
164                    }
165                    if ($trusted || !empty($comment_lookup[trim($token->data)]) ||
166                        ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) {
167                        // OK good
168                        if ($e) {
169                            if ($trailing_hyphen) {
170                                $e->send(
171                                    E_NOTICE,
172                                    'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'
173                                );
174                            }
175                            if ($found_double_hyphen) {
176                                $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
177                            }
178                        }
179                    } else {
180                        if ($e) {
181                            $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
182                        }
183                        continue;
184                    }
185                } else {
186                    // strip comments
187                    if ($e) {
188                        $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
189                    }
190                    continue;
191                }
192            } elseif ($token instanceof HTMLPurifier_Token_Text) {
193            } else {
194                continue;
195            }
196            $result[] = $token;
197        }
198        if ($remove_until && $e) {
199            // we removed tokens until the end, throw error
200            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
201        }
202        $context->destroy('CurrentToken');
203        return $result;
204    }
205}
206
207// vim: et sw=4 sts=4
208