1<?php
2/**
3 * Tokenizes JS code.
4 *
5 * PHP version 5
6 *
7 * @category  PHP
8 * @package   PHP_CodeSniffer
9 * @author    Greg Sherwood <gsherwood@squiz.net>
10 * @author    Marc McIntyre <mmcintyre@squiz.net>
11 * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
12 * @license   https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
13 * @link      http://pear.php.net/package/PHP_CodeSniffer
14 */
15
16/**
17 * Tokenizes JS code.
18 *
19 * @category  PHP
20 * @package   PHP_CodeSniffer
21 * @author    Greg Sherwood <gsherwood@squiz.net>
22 * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
23 * @license   https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
24 * @version   Release: @package_version@
25 * @link      http://pear.php.net/package/PHP_CodeSniffer
26 */
27class PHP_CodeSniffer_Tokenizers_JS
28{
29
30    /**
31     * If TRUE, files that appear to be minified will not be processed.
32     *
33     * @var boolean
34     */
35    public $skipMinified = true;
36
37    /**
38     * A list of tokens that are allowed to open a scope.
39     *
40     * This array also contains information about what kind of token the scope
41     * opener uses to open and close the scope, if the token strictly requires
42     * an opener, if the token can share a scope closer, and who it can be shared
43     * with. An example of a token that shares a scope closer is a CASE scope.
44     *
45     * @var array
46     */
47    public $scopeOpeners = array(
48                            T_IF       => array(
49                                           'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
50                                           'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
51                                           'strict' => false,
52                                           'shared' => false,
53                                           'with'   => array(),
54                                          ),
55                            T_TRY      => array(
56                                           'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
57                                           'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
58                                           'strict' => true,
59                                           'shared' => false,
60                                           'with'   => array(),
61                                          ),
62                            T_CATCH    => array(
63                                           'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
64                                           'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
65                                           'strict' => true,
66                                           'shared' => false,
67                                           'with'   => array(),
68                                          ),
69                            T_ELSE     => array(
70                                           'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
71                                           'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
72                                           'strict' => false,
73                                           'shared' => false,
74                                           'with'   => array(),
75                                          ),
76                            T_FOR      => array(
77                                           'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
78                                           'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
79                                           'strict' => false,
80                                           'shared' => false,
81                                           'with'   => array(),
82                                          ),
83                            T_FUNCTION => array(
84                                           'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
85                                           'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
86                                           'strict' => false,
87                                           'shared' => false,
88                                           'with'   => array(),
89                                          ),
90                            T_WHILE    => array(
91                                           'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
92                                           'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
93                                           'strict' => false,
94                                           'shared' => false,
95                                           'with'   => array(),
96                                          ),
97                            T_DO       => array(
98                                           'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
99                                           'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
100                                           'strict' => true,
101                                           'shared' => false,
102                                           'with'   => array(),
103                                          ),
104                            T_SWITCH   => array(
105                                           'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
106                                           'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
107                                           'strict' => true,
108                                           'shared' => false,
109                                           'with'   => array(),
110                                          ),
111                            T_CASE     => array(
112                                           'start'  => array(T_COLON => T_COLON),
113                                           'end'    => array(
114                                                        T_BREAK    => T_BREAK,
115                                                        T_RETURN   => T_RETURN,
116                                                        T_CONTINUE => T_CONTINUE,
117                                                        T_THROW    => T_THROW,
118                                                       ),
119                                           'strict' => true,
120                                           'shared' => true,
121                                           'with'   => array(
122                                                        T_DEFAULT => T_DEFAULT,
123                                                        T_CASE    => T_CASE,
124                                                        T_SWITCH  => T_SWITCH,
125                                                       ),
126                                          ),
127                            T_DEFAULT  => array(
128                                           'start'  => array(T_COLON => T_COLON),
129                                           'end'    => array(
130                                                        T_BREAK    => T_BREAK,
131                                                        T_RETURN   => T_RETURN,
132                                                        T_CONTINUE => T_CONTINUE,
133                                                        T_THROW    => T_THROW,
134                                                       ),
135                                           'strict' => true,
136                                           'shared' => true,
137                                           'with'   => array(
138                                                        T_CASE   => T_CASE,
139                                                        T_SWITCH => T_SWITCH,
140                                                       ),
141                                          ),
142                           );
143
144    /**
145     * A list of tokens that end the scope.
146     *
147     * This array is just a unique collection of the end tokens
148     * from the _scopeOpeners array. The data is duplicated here to
149     * save time during parsing of the file.
150     *
151     * @var array
152     */
153    public $endScopeTokens = array(
154                              T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
155                              T_BREAK               => T_BREAK,
156                             );
157
158    /**
159     * A list of special JS tokens and their types.
160     *
161     * @var array
162     */
163    protected $tokenValues = array(
164                              'function'  => 'T_FUNCTION',
165                              'prototype' => 'T_PROTOTYPE',
166                              'try'       => 'T_TRY',
167                              'catch'     => 'T_CATCH',
168                              'return'    => 'T_RETURN',
169                              'throw'     => 'T_THROW',
170                              'break'     => 'T_BREAK',
171                              'switch'    => 'T_SWITCH',
172                              'continue'  => 'T_CONTINUE',
173                              'if'        => 'T_IF',
174                              'else'      => 'T_ELSE',
175                              'do'        => 'T_DO',
176                              'while'     => 'T_WHILE',
177                              'for'       => 'T_FOR',
178                              'var'       => 'T_VAR',
179                              'case'      => 'T_CASE',
180                              'default'   => 'T_DEFAULT',
181                              'true'      => 'T_TRUE',
182                              'false'     => 'T_FALSE',
183                              'null'      => 'T_NULL',
184                              'this'      => 'T_THIS',
185                              'typeof'    => 'T_TYPEOF',
186                              '('         => 'T_OPEN_PARENTHESIS',
187                              ')'         => 'T_CLOSE_PARENTHESIS',
188                              '{'         => 'T_OPEN_CURLY_BRACKET',
189                              '}'         => 'T_CLOSE_CURLY_BRACKET',
190                              '['         => 'T_OPEN_SQUARE_BRACKET',
191                              ']'         => 'T_CLOSE_SQUARE_BRACKET',
192                              '?'         => 'T_INLINE_THEN',
193                              '.'         => 'T_OBJECT_OPERATOR',
194                              '+'         => 'T_PLUS',
195                              '-'         => 'T_MINUS',
196                              '*'         => 'T_MULTIPLY',
197                              '%'         => 'T_MODULUS',
198                              '/'         => 'T_DIVIDE',
199                              '^'         => 'T_LOGICAL_XOR',
200                              ','         => 'T_COMMA',
201                              ';'         => 'T_SEMICOLON',
202                              ':'         => 'T_COLON',
203                              '<'         => 'T_LESS_THAN',
204                              '>'         => 'T_GREATER_THAN',
205                              '<<'        => 'T_SL',
206                              '>>'        => 'T_SR',
207                              '>>>'       => 'T_ZSR',
208                              '<<='       => 'T_SL_EQUAL',
209                              '>>='       => 'T_SR_EQUAL',
210                              '>>>='      => 'T_ZSR_EQUAL',
211                              '<='        => 'T_IS_SMALLER_OR_EQUAL',
212                              '>='        => 'T_IS_GREATER_OR_EQUAL',
213                              '=>'        => 'T_DOUBLE_ARROW',
214                              '!'         => 'T_BOOLEAN_NOT',
215                              '||'        => 'T_BOOLEAN_OR',
216                              '&&'        => 'T_BOOLEAN_AND',
217                              '|'         => 'T_BITWISE_OR',
218                              '&'         => 'T_BITWISE_AND',
219                              '!='        => 'T_IS_NOT_EQUAL',
220                              '!=='       => 'T_IS_NOT_IDENTICAL',
221                              '='         => 'T_EQUAL',
222                              '=='        => 'T_IS_EQUAL',
223                              '==='       => 'T_IS_IDENTICAL',
224                              '-='        => 'T_MINUS_EQUAL',
225                              '+='        => 'T_PLUS_EQUAL',
226                              '*='        => 'T_MUL_EQUAL',
227                              '/='        => 'T_DIV_EQUAL',
228                              '%='        => 'T_MOD_EQUAL',
229                              '++'        => 'T_INC',
230                              '--'        => 'T_DEC',
231                              '//'        => 'T_COMMENT',
232                              '/*'        => 'T_COMMENT',
233                              '/**'       => 'T_DOC_COMMENT',
234                              '*/'        => 'T_COMMENT',
235                             );
236
237    /**
238     * A list string delimiters.
239     *
240     * @var array
241     */
242    protected $stringTokens = array(
243                               '\'' => '\'',
244                               '"'  => '"',
245                              );
246
247    /**
248     * A list tokens that start and end comments.
249     *
250     * @var array
251     */
252    protected $commentTokens = array(
253                                '//'  => null,
254                                '/*'  => '*/',
255                                '/**' => '*/',
256                               );
257
258
259    /**
260     * Creates an array of tokens when given some JS code.
261     *
262     * @param string $string  The string to tokenize.
263     * @param string $eolChar The EOL character to use for splitting strings.
264     *
265     * @return array
266     */
267    public function tokenizeString($string, $eolChar='\n')
268    {
269        if (PHP_CODESNIFFER_VERBOSITY > 1) {
270            echo "\t*** START JS TOKENIZING ***".PHP_EOL;
271        }
272
273        $maxTokenLength = 0;
274        foreach ($this->tokenValues as $token => $values) {
275            if (strlen($token) > $maxTokenLength) {
276                $maxTokenLength = strlen($token);
277            }
278        }
279
280        $tokens          = array();
281        $inString        = '';
282        $stringChar      = null;
283        $inComment       = '';
284        $buffer          = '';
285        $preStringBuffer = '';
286        $cleanBuffer     = false;
287
288        $commentTokenizer = new PHP_CodeSniffer_Tokenizers_Comment();
289
290        $tokens[] = array(
291                     'code'    => T_OPEN_TAG,
292                     'type'    => 'T_OPEN_TAG',
293                     'content' => '',
294                    );
295
296        // Convert newlines to single characters for ease of
297        // processing. We will change them back later.
298        $string = str_replace($eolChar, "\n", $string);
299
300        $chars    = str_split($string);
301        $numChars = count($chars);
302        for ($i = 0; $i < $numChars; $i++) {
303            $char = $chars[$i];
304
305            if (PHP_CODESNIFFER_VERBOSITY > 1) {
306                $content       = PHP_CodeSniffer::prepareForOutput($char);
307                $bufferContent = PHP_CodeSniffer::prepareForOutput($buffer);
308
309                if ($inString !== '') {
310                    echo "\t";
311                }
312
313                if ($inComment !== '') {
314                    echo "\t";
315                }
316
317                echo "\tProcess char $i => $content (buffer: $bufferContent)".PHP_EOL;
318            }//end if
319
320            if ($inString === '' && $inComment === '' && $buffer !== '') {
321                // If the buffer only has whitespace and we are about to
322                // add a character, store the whitespace first.
323                if (trim($char) !== '' && trim($buffer) === '') {
324                    $tokens[] = array(
325                                 'code'    => T_WHITESPACE,
326                                 'type'    => 'T_WHITESPACE',
327                                 'content' => str_replace("\n", $eolChar, $buffer),
328                                );
329
330                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
331                        $content = PHP_CodeSniffer::prepareForOutput($buffer);
332                        echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
333                    }
334
335                    $buffer = '';
336                }
337
338                // If the buffer is not whitespace and we are about to
339                // add a whitespace character, store the content first.
340                if ($inString === ''
341                    && $inComment === ''
342                    && trim($char) === ''
343                    && trim($buffer) !== ''
344                ) {
345                    $tokens[] = array(
346                                 'code'    => T_STRING,
347                                 'type'    => 'T_STRING',
348                                 'content' => str_replace("\n", $eolChar, $buffer),
349                                );
350
351                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
352                        $content = PHP_CodeSniffer::prepareForOutput($buffer);
353                        echo "\t=> Added token T_STRING ($content)".PHP_EOL;
354                    }
355
356                    $buffer = '';
357                }
358            }//end if
359
360            // Process strings.
361            if ($inComment === '' && isset($this->stringTokens[$char]) === true) {
362                if ($inString === $char) {
363                    // This could be the end of the string, but make sure it
364                    // is not escaped first.
365                    $escapes = 0;
366                    for ($x = ($i - 1); $x >= 0; $x--) {
367                        if ($chars[$x] !== '\\') {
368                            break;
369                        }
370
371                        $escapes++;
372                    }
373
374                    if ($escapes === 0 || ($escapes % 2) === 0) {
375                        // There is an even number escape chars,
376                        // so this is not escaped, it is the end of the string.
377                        $tokens[] = array(
378                                     'code'    => T_CONSTANT_ENCAPSED_STRING,
379                                     'type'    => 'T_CONSTANT_ENCAPSED_STRING',
380                                     'content' => str_replace("\n", $eolChar, $buffer).$char,
381                                    );
382
383                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
384                            echo "\t\t* found end of string *".PHP_EOL;
385                            $content = PHP_CodeSniffer::prepareForOutput($buffer.$char);
386                            echo "\t=> Added token T_CONSTANT_ENCAPSED_STRING ($content)".PHP_EOL;
387                        }
388
389                        $buffer          = '';
390                        $preStringBuffer = '';
391                        $inString        = '';
392                        $stringChar      = null;
393                        continue;
394                    }//end if
395                } else if ($inString === '') {
396                    $inString        = $char;
397                    $stringChar      = $i;
398                    $preStringBuffer = $buffer;
399
400                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
401                        echo "\t\t* looking for string closer *".PHP_EOL;
402                    }
403                }//end if
404            }//end if
405
406            if ($inString !== '' && $char === "\n") {
407                // Unless this newline character is escaped, the string did not
408                // end before the end of the line, which means it probably
409                // wasn't a string at all (maybe a regex).
410                if ($chars[($i - 1)] !== '\\') {
411                    $i      = $stringChar;
412                    $buffer = $preStringBuffer;
413                    $preStringBuffer = '';
414                    $inString        = '';
415                    $stringChar      = null;
416                    $char            = $chars[$i];
417
418                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
419                        echo "\t\t* found newline before end of string, bailing *".PHP_EOL;
420                    }
421                }
422            }
423
424            $buffer .= $char;
425
426            // We don't look for special tokens inside strings,
427            // so if we are in a string, we can continue here now
428            // that the current char is in the buffer.
429            if ($inString !== '') {
430                continue;
431            }
432
433            // Special case for T_DIVIDE which can actually be
434            // the start of a regular expression.
435            if ($buffer === $char && $char === '/' && $chars[($i + 1)] !== '*') {
436                $regex = $this->getRegexToken(
437                    $i,
438                    $string,
439                    $chars,
440                    $tokens,
441                    $eolChar
442                );
443
444                if ($regex !== null) {
445                    $tokens[] = array(
446                                 'code'    => T_REGULAR_EXPRESSION,
447                                 'type'    => 'T_REGULAR_EXPRESSION',
448                                 'content' => $regex['content'],
449                                );
450
451                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
452                        $content = PHP_CodeSniffer::prepareForOutput($regex['content']);
453                        echo "\t=> Added token T_REGULAR_EXPRESSION ($content)".PHP_EOL;
454                    }
455
456                    $i           = $regex['end'];
457                    $buffer      = '';
458                    $cleanBuffer = false;
459                    continue;
460                }//end if
461            }//end if
462
463            // Check for known tokens, but ignore tokens found that are not at
464            // the end of a string, like FOR and this.FORmat.
465            if (isset($this->tokenValues[strtolower($buffer)]) === true
466                && (preg_match('|[a-zA-z0-9_]|', $char) === 0
467                || isset($chars[($i + 1)]) === false
468                || preg_match('|[a-zA-z0-9_]|', $chars[($i + 1)]) === 0)
469            ) {
470                $matchedToken    = false;
471                $lookAheadLength = ($maxTokenLength - strlen($buffer));
472
473                if ($lookAheadLength > 0) {
474                    // The buffer contains a token type, but we need
475                    // to look ahead at the next chars to see if this is
476                    // actually part of a larger token. For example,
477                    // FOR and FOREACH.
478                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
479                        echo "\t\t* buffer possibly contains token, looking ahead $lookAheadLength chars *".PHP_EOL;
480                    }
481
482                    $charBuffer = $buffer;
483                    for ($x = 1; $x <= $lookAheadLength; $x++) {
484                        if (isset($chars[($i + $x)]) === false) {
485                            break;
486                        }
487
488                        $charBuffer .= $chars[($i + $x)];
489
490                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
491                            $content = PHP_CodeSniffer::prepareForOutput($charBuffer);
492                            echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
493                        }
494
495                        if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
496                            // We've found something larger that matches
497                            // so we can ignore this char. Except for 1 very specific
498                            // case where a comment like /**/ needs to tokenize as
499                            // T_COMMENT and not T_DOC_COMMENT.
500                            $oldType = $this->tokenValues[strtolower($buffer)];
501                            $newType = $this->tokenValues[strtolower($charBuffer)];
502                            if ($oldType === 'T_COMMENT'
503                                && $newType === 'T_DOC_COMMENT'
504                                && $chars[($i + $x + 1)] === '/'
505                            ) {
506                                if (PHP_CODESNIFFER_VERBOSITY > 1) {
507                                    echo "\t\t* look ahead ignored T_DOC_COMMENT, continuing *".PHP_EOL;
508                                }
509                            } else {
510                                if (PHP_CODESNIFFER_VERBOSITY > 1) {
511                                    echo "\t\t* look ahead found more specific token ($newType), ignoring $i *".PHP_EOL;
512                                }
513
514                                $matchedToken = true;
515                                break;
516                            }
517                        }//end if
518                    }//end for
519                }//end if
520
521                if ($matchedToken === false) {
522                    if (PHP_CODESNIFFER_VERBOSITY > 1 && $lookAheadLength > 0) {
523                        echo "\t\t* look ahead found nothing *".PHP_EOL;
524                    }
525
526                    $value = $this->tokenValues[strtolower($buffer)];
527
528                    if ($value === 'T_FUNCTION' && $buffer !== 'function') {
529                        // The function keyword needs to be all lowercase or else
530                        // it is just a function called "Function".
531                        $value = 'T_STRING';
532                    }
533
534                    $tokens[] = array(
535                                 'code'    => constant($value),
536                                 'type'    => $value,
537                                 'content' => $buffer,
538                                );
539
540                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
541                        $content = PHP_CodeSniffer::prepareForOutput($buffer);
542                        echo "\t=> Added token $value ($content)".PHP_EOL;
543                    }
544
545                    $cleanBuffer = true;
546                }//end if
547            } else if (isset($this->tokenValues[strtolower($char)]) === true) {
548                // No matter what token we end up using, we don't
549                // need the content in the buffer any more because we have
550                // found a valid token.
551                $newContent = substr(str_replace("\n", $eolChar, $buffer), 0, -1);
552                if ($newContent !== '') {
553                    $tokens[] = array(
554                                 'code'    => T_STRING,
555                                 'type'    => 'T_STRING',
556                                 'content' => $newContent,
557                                );
558
559                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
560                        $content = PHP_CodeSniffer::prepareForOutput(substr($buffer, 0, -1));
561                        echo "\t=> Added token T_STRING ($content)".PHP_EOL;
562                    }
563                }
564
565                if (PHP_CODESNIFFER_VERBOSITY > 1) {
566                    echo "\t\t* char is token, looking ahead ".($maxTokenLength - 1).' chars *'.PHP_EOL;
567                }
568
569                // The char is a token type, but we need to look ahead at the
570                // next chars to see if this is actually part of a larger token.
571                // For example, = and ===.
572                $charBuffer   = $char;
573                $matchedToken = false;
574                for ($x = 1; $x <= $maxTokenLength; $x++) {
575                    if (isset($chars[($i + $x)]) === false) {
576                        break;
577                    }
578
579                    $charBuffer .= $chars[($i + $x)];
580
581                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
582                        $content = PHP_CodeSniffer::prepareForOutput($charBuffer);
583                        echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
584                    }
585
586                    if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
587                        // We've found something larger that matches
588                        // so we can ignore this char.
589                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
590                            $type = $this->tokenValues[strtolower($charBuffer)];
591                            echo "\t\t* look ahead found more specific token ($type), ignoring $i *".PHP_EOL;
592                        }
593
594                        $matchedToken = true;
595                        break;
596                    }
597                }//end for
598
599                if ($matchedToken === false) {
600                    $value    = $this->tokenValues[strtolower($char)];
601                    $tokens[] = array(
602                                 'code'    => constant($value),
603                                 'type'    => $value,
604                                 'content' => $char,
605                                );
606
607                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
608                        echo "\t\t* look ahead found nothing *".PHP_EOL;
609                        $content = PHP_CodeSniffer::prepareForOutput($char);
610                        echo "\t=> Added token $value ($content)".PHP_EOL;
611                    }
612
613                    $cleanBuffer = true;
614                } else {
615                    $buffer = $char;
616                }//end if
617            }//end if
618
619            // Keep track of content inside comments.
620            if ($inComment === ''
621                && array_key_exists($buffer, $this->commentTokens) === true
622            ) {
623                // This is not really a comment if the content
624                // looks like \// (i.e., it is escaped).
625                if (isset($chars[($i - 2)]) === true && $chars[($i - 2)] === '\\') {
626                    $lastToken   = array_pop($tokens);
627                    $lastContent = $lastToken['content'];
628                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
629                        $value   = $this->tokenValues[strtolower($lastContent)];
630                        $content = PHP_CodeSniffer::prepareForOutput($lastContent);
631                        echo "\t=> Removed token $value ($content)".PHP_EOL;
632                    }
633
634                    $lastChars    = str_split($lastContent);
635                    $lastNumChars = count($lastChars);
636                    for ($x = 0; $x < $lastNumChars; $x++) {
637                        $lastChar = $lastChars[$x];
638                        $value    = $this->tokenValues[strtolower($lastChar)];
639                        $tokens[] = array(
640                                     'code'    => constant($value),
641                                     'type'    => $value,
642                                     'content' => $lastChar,
643                                    );
644
645                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
646                            $content = PHP_CodeSniffer::prepareForOutput($lastChar);
647                            echo "\t=> Added token $value ($content)".PHP_EOL;
648                        }
649                    }
650                } else {
651                    // We have started a comment.
652                    $inComment = $buffer;
653
654                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
655                        echo "\t\t* looking for end of comment *".PHP_EOL;
656                    }
657                }//end if
658            } else if ($inComment !== '') {
659                if ($this->commentTokens[$inComment] === null) {
660                    // Comment ends at the next newline.
661                    if (strpos($buffer, "\n") !== false) {
662                        $inComment = '';
663                    }
664                } else {
665                    if ($this->commentTokens[$inComment] === $buffer) {
666                        $inComment = '';
667                    }
668                }
669
670                if (PHP_CODESNIFFER_VERBOSITY > 1) {
671                    if ($inComment === '') {
672                        echo "\t\t* found end of comment *".PHP_EOL;
673                    }
674                }
675
676                if ($inComment === '' && $cleanBuffer === false) {
677                    $tokens[] = array(
678                                 'code'    => T_STRING,
679                                 'type'    => 'T_STRING',
680                                 'content' => str_replace("\n", $eolChar, $buffer),
681                                );
682
683                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
684                        $content = PHP_CodeSniffer::prepareForOutput($buffer);
685                        echo "\t=> Added token T_STRING ($content)".PHP_EOL;
686                    }
687
688                    $buffer = '';
689                }
690            }//end if
691
692            if ($cleanBuffer === true) {
693                $buffer      = '';
694                $cleanBuffer = false;
695            }
696        }//end for
697
698        if (empty($buffer) === false) {
699            // Buffer contains whitespace from the end of the file.
700            $tokens[] = array(
701                         'code'    => T_WHITESPACE,
702                         'type'    => 'T_WHITESPACE',
703                         'content' => str_replace("\n", $eolChar, $buffer),
704                        );
705
706            if (PHP_CODESNIFFER_VERBOSITY > 1) {
707                $content = PHP_CodeSniffer::prepareForOutput($buffer);
708                echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
709            }
710        }
711
712        $tokens[] = array(
713                     'code'    => T_CLOSE_TAG,
714                     'type'    => 'T_CLOSE_TAG',
715                     'content' => '',
716                    );
717
718        /*
719            Now that we have done some basic tokenizing, we need to
720            modify the tokens to join some together and split some apart
721            so they match what the PHP tokenizer does.
722        */
723
724        $finalTokens = array();
725        $newStackPtr = 0;
726        $numTokens   = count($tokens);
727        for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
728            $token = $tokens[$stackPtr];
729
730            /*
731                Look for comments and join the tokens together.
732            */
733
734            if ($token['code'] === T_COMMENT || $token['code'] === T_DOC_COMMENT) {
735                $newContent   = '';
736                $tokenContent = $token['content'];
737
738                $endContent = null;
739                if (isset($this->commentTokens[$tokenContent]) === true) {
740                    $endContent = $this->commentTokens[$tokenContent];
741                }
742
743                while ($tokenContent !== $endContent) {
744                    if ($endContent === null
745                        && strpos($tokenContent, $eolChar) !== false
746                    ) {
747                        // A null end token means the comment ends at the end of
748                        // the line so we look for newlines and split the token.
749                        $tokens[$stackPtr]['content'] = substr(
750                            $tokenContent,
751                            (strpos($tokenContent, $eolChar) + strlen($eolChar))
752                        );
753
754                        $tokenContent = substr(
755                            $tokenContent,
756                            0,
757                            (strpos($tokenContent, $eolChar) + strlen($eolChar))
758                        );
759
760                        // If the substr failed, skip the token as the content
761                        // will now be blank.
762                        if ($tokens[$stackPtr]['content'] !== false
763                            && $tokens[$stackPtr]['content'] !== ''
764                        ) {
765                            $stackPtr--;
766                        }
767
768                        break;
769                    }//end if
770
771                    $stackPtr++;
772                    $newContent .= $tokenContent;
773                    if (isset($tokens[$stackPtr]) === false) {
774                        break;
775                    }
776
777                    $tokenContent = $tokens[$stackPtr]['content'];
778                }//end while
779
780                if ($token['code'] === T_DOC_COMMENT) {
781                    $commentTokens = $commentTokenizer->tokenizeString($newContent.$tokenContent, $eolChar, $newStackPtr);
782                    foreach ($commentTokens as $commentToken) {
783                        $finalTokens[$newStackPtr] = $commentToken;
784                        $newStackPtr++;
785                    }
786
787                    continue;
788                } else {
789                    // Save the new content in the current token so
790                    // the code below can chop it up on newlines.
791                    $token['content'] = $newContent.$tokenContent;
792                }
793            }//end if
794
795            /*
796                If this token has newlines in its content, split each line up
797                and create a new token for each line. We do this so it's easier
798                to ascertain where errors occur on a line.
799                Note that $token[1] is the token's content.
800            */
801
802            if (strpos($token['content'], $eolChar) !== false) {
803                $tokenLines = explode($eolChar, $token['content']);
804                $numLines   = count($tokenLines);
805
806                for ($i = 0; $i < $numLines; $i++) {
807                    $newToken['content'] = $tokenLines[$i];
808                    if ($i === ($numLines - 1)) {
809                        if ($tokenLines[$i] === '') {
810                            break;
811                        }
812                    } else {
813                        $newToken['content'] .= $eolChar;
814                    }
815
816                    $newToken['type']          = $token['type'];
817                    $newToken['code']          = $token['code'];
818                    $finalTokens[$newStackPtr] = $newToken;
819                    $newStackPtr++;
820                }
821            } else {
822                $finalTokens[$newStackPtr] = $token;
823                $newStackPtr++;
824            }//end if
825
826            // Convert numbers, including decimals.
827            if ($token['code'] === T_STRING
828                || $token['code'] === T_OBJECT_OPERATOR
829            ) {
830                $newContent  = '';
831                $oldStackPtr = $stackPtr;
832                while (preg_match('|^[0-9\.]+$|', $tokens[$stackPtr]['content']) !== 0) {
833                    $newContent .= $tokens[$stackPtr]['content'];
834                    $stackPtr++;
835                }
836
837                if ($newContent !== '' && $newContent !== '.') {
838                    $finalTokens[($newStackPtr - 1)]['content'] = $newContent;
839                    if (ctype_digit($newContent) === true) {
840                        $finalTokens[($newStackPtr - 1)]['code'] = constant('T_LNUMBER');
841                        $finalTokens[($newStackPtr - 1)]['type'] = 'T_LNUMBER';
842                    } else {
843                        $finalTokens[($newStackPtr - 1)]['code'] = constant('T_DNUMBER');
844                        $finalTokens[($newStackPtr - 1)]['type'] = 'T_DNUMBER';
845                    }
846
847                    $stackPtr--;
848                    continue;
849                } else {
850                    $stackPtr = $oldStackPtr;
851                }
852            }//end if
853
854            // Convert the token after an object operator into a string, in most cases.
855            if ($token['code'] === T_OBJECT_OPERATOR) {
856                for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
857                    if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$i]['code']]) === true) {
858                        continue;
859                    }
860
861                    if ($tokens[$i]['code'] !== T_PROTOTYPE
862                        && $tokens[$i]['code'] !== T_LNUMBER
863                        && $tokens[$i]['code'] !== T_DNUMBER
864                    ) {
865                        $tokens[$i]['code'] = T_STRING;
866                        $tokens[$i]['type'] = 'T_STRING';
867                    }
868
869                    break;
870                }
871            }
872        }//end for
873
874        if (PHP_CODESNIFFER_VERBOSITY > 1) {
875            echo "\t*** END TOKENIZING ***".PHP_EOL;
876        }
877
878        return $finalTokens;
879
880    }//end tokenizeString()
881
882
883    /**
884     * Tokenizes a regular expression if one is found.
885     *
886     * If a regular expression is not found, NULL is returned.
887     *
888     * @param string $char    The index of the possible regex start character.
889     * @param string $string  The complete content of the string being tokenized.
890     * @param string $chars   An array of characters being tokenized.
891     * @param string $tokens  The current array of tokens found in the string.
892     * @param string $eolChar The EOL character to use for splitting strings.
893     *
894     * @return void
895     */
896    public function getRegexToken($char, $string, $chars, $tokens, $eolChar)
897    {
898        $beforeTokens = array(
899                         T_EQUAL               => true,
900                         T_IS_NOT_EQUAL        => true,
901                         T_IS_IDENTICAL        => true,
902                         T_IS_NOT_IDENTICAL    => true,
903                         T_OPEN_PARENTHESIS    => true,
904                         T_OPEN_SQUARE_BRACKET => true,
905                         T_RETURN              => true,
906                         T_BOOLEAN_OR          => true,
907                         T_BOOLEAN_AND         => true,
908                         T_BITWISE_OR          => true,
909                         T_BITWISE_AND         => true,
910                         T_COMMA               => true,
911                         T_COLON               => true,
912                         T_TYPEOF              => true,
913                         T_INLINE_THEN         => true,
914                         T_INLINE_ELSE         => true,
915                        );
916
917        $afterTokens = array(
918                        ','      => true,
919                        ')'      => true,
920                        ']'      => true,
921                        ';'      => true,
922                        ' '      => true,
923                        '.'      => true,
924                        ':'      => true,
925                        $eolChar => true,
926                       );
927
928        // Find the last non-whitespace token that was added
929        // to the tokens array.
930        $numTokens = count($tokens);
931        for ($prev = ($numTokens - 1); $prev >= 0; $prev--) {
932            if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$prev]['code']]) === false) {
933                break;
934            }
935        }
936
937        if (isset($beforeTokens[$tokens[$prev]['code']]) === false) {
938            return null;
939        }
940
941        // This is probably a regular expression, so look for the end of it.
942        if (PHP_CODESNIFFER_VERBOSITY > 1) {
943            echo "\t* token possibly starts a regular expression *".PHP_EOL;
944        }
945
946        $numChars = count($chars);
947        for ($next = ($char + 1); $next < $numChars; $next++) {
948            if ($chars[$next] === '/') {
949                // Just make sure this is not escaped first.
950                if ($chars[($next - 1)] !== '\\') {
951                    // In the simple form: /.../ so we found the end.
952                    break;
953                } else if ($chars[($next - 2)] === '\\') {
954                    // In the form: /...\\/ so we found the end.
955                    break;
956                }
957            } else {
958                $possibleEolChar = substr($string, $next, strlen($eolChar));
959                if ($possibleEolChar === $eolChar) {
960                    // This is the last token on the line and regular
961                    // expressions need to be defined on a single line,
962                    // so this is not a regular expression.
963                    break;
964                }
965            }
966        }
967
968        if ($chars[$next] !== '/') {
969            if (PHP_CODESNIFFER_VERBOSITY > 1) {
970                echo "\t* could not find end of regular expression *".PHP_EOL;
971            }
972
973            return null;
974        }
975
976        while (preg_match('|[a-zA-Z]|', $chars[($next + 1)]) !== 0) {
977            // The token directly after the end of the regex can
978            // be modifiers like global and case insensitive
979            // (.e.g, /pattern/gi).
980            $next++;
981        }
982
983        $regexEnd = $next;
984        if (PHP_CODESNIFFER_VERBOSITY > 1) {
985            echo "\t* found end of regular expression at token $regexEnd *".PHP_EOL;
986        }
987
988        for ($next = ($next + 1); $next < $numChars; $next++) {
989            if ($chars[$next] !== ' ') {
990                break;
991            } else {
992                $possibleEolChar = substr($string, $next, strlen($eolChar));
993                if ($possibleEolChar === $eolChar) {
994                    // This is the last token on the line.
995                    break;
996                }
997            }
998        }
999
1000        if (isset($afterTokens[$chars[$next]]) === false) {
1001            if (PHP_CODESNIFFER_VERBOSITY > 1) {
1002                echo "\t* tokens after regular expression do not look correct *".PHP_EOL;
1003            }
1004
1005            return null;
1006        }
1007
1008        // This is a regular expression, so join all the tokens together.
1009        $content = '';
1010        for ($x = $char; $x <= $regexEnd; $x++) {
1011            $content .= $chars[$x];
1012        }
1013
1014        $token = array(
1015                  'start'   => $char,
1016                  'end'     => $regexEnd,
1017                  'content' => $content,
1018                 );
1019
1020        return $token;
1021
1022    }//end getRegexToken()
1023
1024
1025    /**
1026     * Performs additional processing after main tokenizing.
1027     *
1028     * This additional processing looks for properties, closures, labels and objects.
1029     *
1030     * @param array  $tokens  The array of tokens to process.
1031     * @param string $eolChar The EOL character to use for splitting strings.
1032     *
1033     * @return void
1034     */
1035    public function processAdditional(&$tokens, $eolChar)
1036    {
1037        if (PHP_CODESNIFFER_VERBOSITY > 1) {
1038            echo "\t*** START ADDITIONAL JS PROCESSING ***".PHP_EOL;
1039        }
1040
1041        $numTokens  = count($tokens);
1042        $classStack = array();
1043
1044        for ($i = 0; $i < $numTokens; $i++) {
1045            if (PHP_CODESNIFFER_VERBOSITY > 1) {
1046                $type    = $tokens[$i]['type'];
1047                $content = PHP_CodeSniffer::prepareForOutput($tokens[$i]['content']);
1048
1049                echo str_repeat("\t", count($classStack));
1050                echo "\tProcess token $i: $type => $content".PHP_EOL;
1051            }
1052
1053            // Looking for functions that are actually closures.
1054            if ($tokens[$i]['code'] === T_FUNCTION && isset($tokens[$i]['scope_opener']) === true) {
1055                for ($x = ($i + 1); $x < $numTokens; $x++) {
1056                    if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
1057                        break;
1058                    }
1059                }
1060
1061                if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
1062                    $tokens[$i]['code'] = T_CLOSURE;
1063                    $tokens[$i]['type'] = 'T_CLOSURE';
1064                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
1065                        $line = $tokens[$i]['line'];
1066                        echo str_repeat("\t", count($classStack));
1067                        echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE".PHP_EOL;
1068                    }
1069
1070                    for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) {
1071                        if (isset($tokens[$x]['conditions'][$i]) === false) {
1072                            continue;
1073                        }
1074
1075                        $tokens[$x]['conditions'][$i] = T_CLOSURE;
1076                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
1077                            $type = $tokens[$x]['type'];
1078                            echo str_repeat("\t", count($classStack));
1079                            echo "\t\t* cleaned $x ($type) *".PHP_EOL;
1080                        }
1081                    }
1082                }//end if
1083
1084                continue;
1085            } else if ($tokens[$i]['code'] === T_OPEN_CURLY_BRACKET
1086                && isset($tokens[$i]['scope_condition']) === false
1087                && isset($tokens[$i]['bracket_closer']) === true
1088            ) {
1089                $classStack[] = $i;
1090
1091                $closer = $tokens[$i]['bracket_closer'];
1092                $tokens[$i]['code']      = T_OBJECT;
1093                $tokens[$i]['type']      = 'T_OBJECT';
1094                $tokens[$closer]['code'] = T_CLOSE_OBJECT;
1095                $tokens[$closer]['type'] = 'T_CLOSE_OBJECT';
1096
1097                if (PHP_CODESNIFFER_VERBOSITY > 1) {
1098                    echo str_repeat("\t", count($classStack));
1099                    echo "\t* token $i converted from T_OPEN_CURLY_BRACKET to T_OBJECT *".PHP_EOL;
1100                    echo str_repeat("\t", count($classStack));
1101                    echo "\t* token $closer converted from T_CLOSE_CURLY_BRACKET to T_CLOSE_OBJECT *".PHP_EOL;
1102                }
1103
1104                for ($x = ($i + 1); $x < $closer; $x++) {
1105                    $tokens[$x]['conditions'][$i] = T_OBJECT;
1106                    ksort($tokens[$x]['conditions'], SORT_NUMERIC);
1107                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
1108                        $type = $tokens[$x]['type'];
1109                        echo str_repeat("\t", count($classStack));
1110                        echo "\t\t* added T_OBJECT condition to $x ($type) *".PHP_EOL;
1111                    }
1112                }
1113            } else if ($tokens[$i]['code'] === T_CLOSE_OBJECT) {
1114                $opener = array_pop($classStack);
1115            } else if ($tokens[$i]['code'] === T_COLON) {
1116                // If it is a scope opener, it belongs to a
1117                // DEFAULT or CASE statement.
1118                if (isset($tokens[$i]['scope_condition']) === true) {
1119                    continue;
1120                }
1121
1122                // Make sure this is not part of an inline IF statement.
1123                for ($x = ($i - 1); $x >= 0; $x--) {
1124                    if ($tokens[$x]['code'] === T_INLINE_THEN) {
1125                        $tokens[$i]['code'] = T_INLINE_ELSE;
1126                        $tokens[$i]['type'] = 'T_INLINE_ELSE';
1127
1128                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
1129                            echo str_repeat("\t", count($classStack));
1130                            echo "\t* token $i converted from T_COLON to T_INLINE_THEN *".PHP_EOL;
1131                        }
1132
1133                        continue(2);
1134                    } else if ($tokens[$x]['line'] < $tokens[$i]['line']) {
1135                        break;
1136                    }
1137                }
1138
1139                // The string to the left of the colon is either a property or label.
1140                for ($label = ($i - 1); $label >= 0; $label--) {
1141                    if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$label]['code']]) === false) {
1142                        break;
1143                    }
1144                }
1145
1146                if ($tokens[$label]['code'] !== T_STRING
1147                    && $tokens[$label]['code'] !== T_CONSTANT_ENCAPSED_STRING
1148                ) {
1149                    continue;
1150                }
1151
1152                if (empty($classStack) === false) {
1153                    $tokens[$label]['code'] = T_PROPERTY;
1154                    $tokens[$label]['type'] = 'T_PROPERTY';
1155
1156                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
1157                        echo str_repeat("\t", count($classStack));
1158                        echo "\t* token $label converted from T_STRING to T_PROPERTY *".PHP_EOL;
1159                    }
1160                } else {
1161                    $tokens[$label]['code'] = T_LABEL;
1162                    $tokens[$label]['type'] = 'T_LABEL';
1163
1164                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
1165                        echo str_repeat("\t", count($classStack));
1166                        echo "\t* token $label converted from T_STRING to T_LABEL *".PHP_EOL;
1167                    }
1168                }//end if
1169            }//end if
1170        }//end for
1171
1172        if (PHP_CODESNIFFER_VERBOSITY > 1) {
1173            echo "\t*** END ADDITIONAL JS PROCESSING ***".PHP_EOL;
1174        }
1175
1176    }//end processAdditional()
1177
1178
1179}//end class
1180