1<?php
2
3/*
4 * This file is part of Twig.
5 *
6 * (c) Fabien Potencier
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Twig\Extension {
13use Twig\FileExtensionEscapingStrategy;
14use Twig\NodeVisitor\EscaperNodeVisitor;
15use Twig\TokenParser\AutoEscapeTokenParser;
16use Twig\TwigFilter;
17
18final class EscaperExtension extends AbstractExtension
19{
20    private $defaultStrategy;
21    private $escapers = [];
22
23    /** @internal */
24    public $safeClasses = [];
25
26    /** @internal */
27    public $safeLookup = [];
28
29    /**
30     * @param string|false|callable $defaultStrategy An escaping strategy
31     *
32     * @see setDefaultStrategy()
33     */
34    public function __construct($defaultStrategy = 'html')
35    {
36        $this->setDefaultStrategy($defaultStrategy);
37    }
38
39    public function getTokenParsers()
40    {
41        return [new AutoEscapeTokenParser()];
42    }
43
44    public function getNodeVisitors()
45    {
46        return [new EscaperNodeVisitor()];
47    }
48
49    public function getFilters()
50    {
51        return [
52            new TwigFilter('escape', 'twig_escape_filter', ['needs_environment' => true, 'is_safe_callback' => 'twig_escape_filter_is_safe']),
53            new TwigFilter('e', 'twig_escape_filter', ['needs_environment' => true, 'is_safe_callback' => 'twig_escape_filter_is_safe']),
54            new TwigFilter('raw', 'twig_raw_filter', ['is_safe' => ['all']]),
55        ];
56    }
57
58    /**
59     * Sets the default strategy to use when not defined by the user.
60     *
61     * The strategy can be a valid PHP callback that takes the template
62     * name as an argument and returns the strategy to use.
63     *
64     * @param string|false|callable $defaultStrategy An escaping strategy
65     */
66    public function setDefaultStrategy($defaultStrategy)
67    {
68        if ('name' === $defaultStrategy) {
69            $defaultStrategy = [FileExtensionEscapingStrategy::class, 'guess'];
70        }
71
72        $this->defaultStrategy = $defaultStrategy;
73    }
74
75    /**
76     * Gets the default strategy to use when not defined by the user.
77     *
78     * @param string $name The template name
79     *
80     * @return string|false The default strategy to use for the template
81     */
82    public function getDefaultStrategy($name)
83    {
84        // disable string callables to avoid calling a function named html or js,
85        // or any other upcoming escaping strategy
86        if (!\is_string($this->defaultStrategy) && false !== $this->defaultStrategy) {
87            return \call_user_func($this->defaultStrategy, $name);
88        }
89
90        return $this->defaultStrategy;
91    }
92
93    /**
94     * Defines a new escaper to be used via the escape filter.
95     *
96     * @param string   $strategy The strategy name that should be used as a strategy in the escape call
97     * @param callable $callable A valid PHP callable
98     */
99    public function setEscaper($strategy, callable $callable)
100    {
101        $this->escapers[$strategy] = $callable;
102    }
103
104    /**
105     * Gets all defined escapers.
106     *
107     * @return callable[] An array of escapers
108     */
109    public function getEscapers()
110    {
111        return $this->escapers;
112    }
113
114    public function setSafeClasses(array $safeClasses = [])
115    {
116        $this->safeClasses = [];
117        $this->safeLookup = [];
118        foreach ($safeClasses as $class => $strategies) {
119            $this->addSafeClass($class, $strategies);
120        }
121    }
122
123    public function addSafeClass(string $class, array $strategies)
124    {
125        $class = ltrim($class, '\\');
126        if (!isset($this->safeClasses[$class])) {
127            $this->safeClasses[$class] = [];
128        }
129        $this->safeClasses[$class] = array_merge($this->safeClasses[$class], $strategies);
130
131        foreach ($strategies as $strategy) {
132            $this->safeLookup[$strategy][$class] = true;
133        }
134    }
135}
136
137class_alias('Twig\Extension\EscaperExtension', 'Twig_Extension_Escaper');
138}
139
140namespace {
141use Twig\Environment;
142use Twig\Error\RuntimeError;
143use Twig\Extension\CoreExtension;
144use Twig\Extension\EscaperExtension;
145use Twig\Markup;
146use Twig\Node\Expression\ConstantExpression;
147use Twig\Node\Node;
148
149/**
150 * Marks a variable as being safe.
151 *
152 * @param string $string A PHP variable
153 *
154 * @return string
155 */
156function twig_raw_filter($string)
157{
158    return $string;
159}
160
161/**
162 * Escapes a string.
163 *
164 * @param mixed  $string     The value to be escaped
165 * @param string $strategy   The escaping strategy
166 * @param string $charset    The charset
167 * @param bool   $autoescape Whether the function is called by the auto-escaping feature (true) or by the developer (false)
168 *
169 * @return string
170 */
171function twig_escape_filter(Environment $env, $string, $strategy = 'html', $charset = null, $autoescape = false)
172{
173    if ($autoescape && $string instanceof Markup) {
174        return $string;
175    }
176
177    if (!\is_string($string)) {
178        if (\is_object($string) && method_exists($string, '__toString')) {
179            if ($autoescape) {
180                $c = \get_class($string);
181                $ext = $env->getExtension(EscaperExtension::class);
182                if (!isset($ext->safeClasses[$c])) {
183                    $ext->safeClasses[$c] = [];
184                    foreach (class_parents($string) + class_implements($string) as $class) {
185                        if (isset($ext->safeClasses[$class])) {
186                            $ext->safeClasses[$c] = array_unique(array_merge($ext->safeClasses[$c], $ext->safeClasses[$class]));
187                            foreach ($ext->safeClasses[$class] as $s) {
188                                $ext->safeLookup[$s][$c] = true;
189                            }
190                        }
191                    }
192                }
193                if (isset($ext->safeLookup[$strategy][$c]) || isset($ext->safeLookup['all'][$c])) {
194                    return (string) $string;
195                }
196            }
197
198            $string = (string) $string;
199        } elseif (\in_array($strategy, ['html', 'js', 'css', 'html_attr', 'url'])) {
200            return $string;
201        }
202    }
203
204    if ('' === $string) {
205        return '';
206    }
207
208    if (null === $charset) {
209        $charset = $env->getCharset();
210    }
211
212    switch ($strategy) {
213        case 'html':
214            // see https://www.php.net/htmlspecialchars
215
216            // Using a static variable to avoid initializing the array
217            // each time the function is called. Moving the declaration on the
218            // top of the function slow downs other escaping strategies.
219            static $htmlspecialcharsCharsets = [
220                'ISO-8859-1' => true, 'ISO8859-1' => true,
221                'ISO-8859-15' => true, 'ISO8859-15' => true,
222                'utf-8' => true, 'UTF-8' => true,
223                'CP866' => true, 'IBM866' => true, '866' => true,
224                'CP1251' => true, 'WINDOWS-1251' => true, 'WIN-1251' => true,
225                '1251' => true,
226                'CP1252' => true, 'WINDOWS-1252' => true, '1252' => true,
227                'KOI8-R' => true, 'KOI8-RU' => true, 'KOI8R' => true,
228                'BIG5' => true, '950' => true,
229                'GB2312' => true, '936' => true,
230                'BIG5-HKSCS' => true,
231                'SHIFT_JIS' => true, 'SJIS' => true, '932' => true,
232                'EUC-JP' => true, 'EUCJP' => true,
233                'ISO8859-5' => true, 'ISO-8859-5' => true, 'MACROMAN' => true,
234            ];
235
236            if (isset($htmlspecialcharsCharsets[$charset])) {
237                return htmlspecialchars($string, \ENT_QUOTES | \ENT_SUBSTITUTE, $charset);
238            }
239
240            if (isset($htmlspecialcharsCharsets[strtoupper($charset)])) {
241                // cache the lowercase variant for future iterations
242                $htmlspecialcharsCharsets[$charset] = true;
243
244                return htmlspecialchars($string, \ENT_QUOTES | \ENT_SUBSTITUTE, $charset);
245            }
246
247            $string = twig_convert_encoding($string, 'UTF-8', $charset);
248            $string = htmlspecialchars($string, \ENT_QUOTES | \ENT_SUBSTITUTE, 'UTF-8');
249
250            return iconv('UTF-8', $charset, $string);
251
252        case 'js':
253            // escape all non-alphanumeric characters
254            // into their \x or \uHHHH representations
255            if ('UTF-8' !== $charset) {
256                $string = twig_convert_encoding($string, 'UTF-8', $charset);
257            }
258
259            if (!preg_match('//u', $string)) {
260                throw new RuntimeError('The string to escape is not a valid UTF-8 string.');
261            }
262
263            $string = preg_replace_callback('#[^a-zA-Z0-9,\._]#Su', function ($matches) {
264                $char = $matches[0];
265
266                /*
267                 * A few characters have short escape sequences in JSON and JavaScript.
268                 * Escape sequences supported only by JavaScript, not JSON, are omitted.
269                 * \" is also supported but omitted, because the resulting string is not HTML safe.
270                 */
271                static $shortMap = [
272                    '\\' => '\\\\',
273                    '/' => '\\/',
274                    "\x08" => '\b',
275                    "\x0C" => '\f',
276                    "\x0A" => '\n',
277                    "\x0D" => '\r',
278                    "\x09" => '\t',
279                ];
280
281                if (isset($shortMap[$char])) {
282                    return $shortMap[$char];
283                }
284
285                $codepoint = mb_ord($char, 'UTF-8');
286                if (0x10000 > $codepoint) {
287                    return sprintf('\u%04X', $codepoint);
288                }
289
290                // Split characters outside the BMP into surrogate pairs
291                // https://tools.ietf.org/html/rfc2781.html#section-2.1
292                $u = $codepoint - 0x10000;
293                $high = 0xD800 | ($u >> 10);
294                $low = 0xDC00 | ($u & 0x3FF);
295
296                return sprintf('\u%04X\u%04X', $high, $low);
297            }, $string);
298
299            if ('UTF-8' !== $charset) {
300                $string = iconv('UTF-8', $charset, $string);
301            }
302
303            return $string;
304
305        case 'css':
306            if ('UTF-8' !== $charset) {
307                $string = twig_convert_encoding($string, 'UTF-8', $charset);
308            }
309
310            if (!preg_match('//u', $string)) {
311                throw new RuntimeError('The string to escape is not a valid UTF-8 string.');
312            }
313
314            $string = preg_replace_callback('#[^a-zA-Z0-9]#Su', function ($matches) {
315                $char = $matches[0];
316
317                return sprintf('\\%X ', 1 === \strlen($char) ? \ord($char) : mb_ord($char, 'UTF-8'));
318            }, $string);
319
320            if ('UTF-8' !== $charset) {
321                $string = iconv('UTF-8', $charset, $string);
322            }
323
324            return $string;
325
326        case 'html_attr':
327            if ('UTF-8' !== $charset) {
328                $string = twig_convert_encoding($string, 'UTF-8', $charset);
329            }
330
331            if (!preg_match('//u', $string)) {
332                throw new RuntimeError('The string to escape is not a valid UTF-8 string.');
333            }
334
335            $string = preg_replace_callback('#[^a-zA-Z0-9,\.\-_]#Su', function ($matches) {
336                /**
337                 * This function is adapted from code coming from Zend Framework.
338                 *
339                 * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (https://www.zend.com)
340                 * @license   https://framework.zend.com/license/new-bsd New BSD License
341                 */
342                $chr = $matches[0];
343                $ord = \ord($chr);
344
345                /*
346                 * The following replaces characters undefined in HTML with the
347                 * hex entity for the Unicode replacement character.
348                 */
349                if (($ord <= 0x1f && "\t" != $chr && "\n" != $chr && "\r" != $chr) || ($ord >= 0x7f && $ord <= 0x9f)) {
350                    return '&#xFFFD;';
351                }
352
353                /*
354                 * Check if the current character to escape has a name entity we should
355                 * replace it with while grabbing the hex value of the character.
356                 */
357                if (1 === \strlen($chr)) {
358                    /*
359                     * While HTML supports far more named entities, the lowest common denominator
360                     * has become HTML5's XML Serialisation which is restricted to the those named
361                     * entities that XML supports. Using HTML entities would result in this error:
362                     *     XML Parsing Error: undefined entity
363                     */
364                    static $entityMap = [
365                        34 => '&quot;', /* quotation mark */
366                        38 => '&amp;',  /* ampersand */
367                        60 => '&lt;',   /* less-than sign */
368                        62 => '&gt;',   /* greater-than sign */
369                    ];
370
371                    if (isset($entityMap[$ord])) {
372                        return $entityMap[$ord];
373                    }
374
375                    return sprintf('&#x%02X;', $ord);
376                }
377
378                /*
379                 * Per OWASP recommendations, we'll use hex entities for any other
380                 * characters where a named entity does not exist.
381                 */
382                return sprintf('&#x%04X;', mb_ord($chr, 'UTF-8'));
383            }, $string);
384
385            if ('UTF-8' !== $charset) {
386                $string = iconv('UTF-8', $charset, $string);
387            }
388
389            return $string;
390
391        case 'url':
392            return rawurlencode($string);
393
394        default:
395            // check the ones set on CoreExtension for BC (to be removed in 3.0)
396            $legacyEscapers = $env->getExtension(CoreExtension::class)->getEscapers(false);
397            if (array_key_exists($strategy, $legacyEscapers)) {
398                return $legacyEscapers[$strategy]($env, $string, $charset);
399            }
400
401            $escapers = $env->getExtension(EscaperExtension::class)->getEscapers();
402            if (array_key_exists($strategy, $escapers)) {
403                return $escapers[$strategy]($env, $string, $charset);
404            }
405
406            $escapers = array_merge($legacyEscapers, $escapers);
407            $validStrategies = implode(', ', array_merge(['html', 'js', 'url', 'css', 'html_attr'], array_keys($escapers)));
408
409            throw new RuntimeError(sprintf('Invalid escaping strategy "%s" (valid ones: %s).', $strategy, $validStrategies));
410    }
411}
412
413/**
414 * @internal
415 */
416function twig_escape_filter_is_safe(Node $filterArgs)
417{
418    foreach ($filterArgs as $arg) {
419        if ($arg instanceof ConstantExpression) {
420            return [$arg->getAttribute('value')];
421        }
422
423        return [];
424    }
425
426    return ['html'];
427}
428}
429