1<?php
2/**
3 * Tokenizes PHP code.
4 *
5 * PHP version 5
6 *
7 * @category  PHP
8 * @package   PHP_CodeSniffer
9 * @author    Greg Sherwood <gsherwood@squiz.net>
10 * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
11 * @license   https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
12 * @link      http://pear.php.net/package/PHP_CodeSniffer
13 */
14
15/**
16 * Tokenizes PHP code.
17 *
18 * @category  PHP
19 * @package   PHP_CodeSniffer
20 * @author    Greg Sherwood <gsherwood@squiz.net>
21 * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
22 * @license   https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
23 * @version   Release: @package_version@
24 * @link      http://pear.php.net/package/PHP_CodeSniffer
25 */
26class PHP_CodeSniffer_Tokenizers_PHP
27{
28
29    /**
30     * If TRUE, files that appear to be minified will not be processed.
31     *
32     * @var boolean
33     */
34    public $skipMinified = false;
35
36    /**
37     * A list of tokens that are allowed to open a scope.
38     *
39     * This array also contains information about what kind of token the scope
40     * opener uses to open and close the scope, if the token strictly requires
41     * an opener, if the token can share a scope closer, and who it can be shared
42     * with. An example of a token that shares a scope closer is a CASE scope.
43     *
44     * @var array
45     */
46    public $scopeOpeners = array(
47                            T_IF            => array(
48                                                'start'  => array(
49                                                             T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET,
50                                                             T_COLON              => T_COLON,
51                                                            ),
52                                                'end'    => array(
53                                                             T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
54                                                             T_ENDIF               => T_ENDIF,
55                                                             T_ELSE                => T_ELSE,
56                                                             T_ELSEIF              => T_ELSEIF,
57                                                            ),
58                                                'strict' => false,
59                                                'shared' => false,
60                                                'with'   => array(
61                                                             T_ELSE   => T_ELSE,
62                                                             T_ELSEIF => T_ELSEIF,
63                                                            ),
64                                               ),
65                            T_TRY           => array(
66                                                'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
67                                                'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
68                                                'strict' => true,
69                                                'shared' => false,
70                                                'with'   => array(),
71                                               ),
72                            T_CATCH         => array(
73                                                'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
74                                                'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
75                                                'strict' => true,
76                                                'shared' => false,
77                                                'with'   => array(),
78                                               ),
79                            T_FINALLY       => array(
80                                                'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
81                                                'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
82                                                'strict' => true,
83                                                'shared' => false,
84                                                'with'   => array(),
85                                               ),
86                            T_ELSE          => array(
87                                                'start'  => array(
88                                                             T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET,
89                                                             T_COLON              => T_COLON,
90                                                            ),
91                                                'end'    => array(
92                                                             T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
93                                                             T_ENDIF               => T_ENDIF,
94                                                            ),
95                                                'strict' => false,
96                                                'shared' => false,
97                                                'with'   => array(
98                                                             T_IF     => T_IF,
99                                                             T_ELSEIF => T_ELSEIF,
100                                                            ),
101                                               ),
102                            T_ELSEIF        => array(
103                                                'start'  => array(
104                                                             T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET,
105                                                             T_COLON              => T_COLON,
106                                                            ),
107                                                'end'    => array(
108                                                             T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
109                                                             T_ENDIF               => T_ENDIF,
110                                                             T_ELSE                => T_ELSE,
111                                                             T_ELSEIF              => T_ELSEIF,
112                                                            ),
113                                                'strict' => false,
114                                                'shared' => false,
115                                                'with'   => array(
116                                                             T_IF   => T_IF,
117                                                             T_ELSE => T_ELSE,
118                                                            ),
119                                               ),
120                            T_FOR           => array(
121                                                'start'  => array(
122                                                             T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET,
123                                                             T_COLON              => T_COLON,
124                                                            ),
125                                                'end'    => array(
126                                                             T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
127                                                             T_ENDFOR              => T_ENDFOR,
128                                                            ),
129                                                'strict' => false,
130                                                'shared' => false,
131                                                'with'   => array(),
132                                               ),
133                            T_FOREACH       => array(
134                                                'start'  => array(
135                                                             T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET,
136                                                             T_COLON              => T_COLON,
137                                                            ),
138                                                'end'    => array(
139                                                             T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
140                                                             T_ENDFOREACH          => T_ENDFOREACH,
141                                                            ),
142                                                'strict' => false,
143                                                'shared' => false,
144                                                'with'   => array(),
145                                               ),
146                            T_INTERFACE     => array(
147                                                'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
148                                                'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
149                                                'strict' => true,
150                                                'shared' => false,
151                                                'with'   => array(),
152                                               ),
153                            T_FUNCTION      => array(
154                                                'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
155                                                'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
156                                                'strict' => true,
157                                                'shared' => false,
158                                                'with'   => array(),
159                                               ),
160                            T_CLASS         => array(
161                                                'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
162                                                'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
163                                                'strict' => true,
164                                                'shared' => false,
165                                                'with'   => array(),
166                                               ),
167                            T_TRAIT         => array(
168                                                'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
169                                                'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
170                                                'strict' => true,
171                                                'shared' => false,
172                                                'with'   => array(),
173                                               ),
174                            T_USE           => array(
175                                                'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
176                                                'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
177                                                'strict' => false,
178                                                'shared' => false,
179                                                'with'   => array(),
180                                               ),
181                            T_DECLARE       => array(
182                                                'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
183                                                'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
184                                                'strict' => false,
185                                                'shared' => false,
186                                                'with'   => array(),
187                                               ),
188                            T_NAMESPACE     => array(
189                                                'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
190                                                'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
191                                                'strict' => false,
192                                                'shared' => false,
193                                                'with'   => array(),
194                                               ),
195                            T_WHILE         => array(
196                                                'start'  => array(
197                                                             T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET,
198                                                             T_COLON              => T_COLON,
199                                                            ),
200                                                'end'    => array(
201                                                             T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
202                                                             T_ENDWHILE            => T_ENDWHILE,
203                                                            ),
204                                                'strict' => false,
205                                                'shared' => false,
206                                                'with'   => array(),
207                                               ),
208                            T_DO            => array(
209                                                'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
210                                                'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
211                                                'strict' => true,
212                                                'shared' => false,
213                                                'with'   => array(),
214                                               ),
215                            T_SWITCH        => array(
216                                                'start'  => array(
217                                                             T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET,
218                                                             T_COLON              => T_COLON,
219                                                            ),
220                                                'end'    => array(
221                                                             T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
222                                                             T_ENDSWITCH           => T_ENDSWITCH,
223                                                            ),
224                                                'strict' => true,
225                                                'shared' => false,
226                                                'with'   => array(),
227                                               ),
228                            T_CASE          => array(
229                                                'start'  => array(
230                                                             T_COLON     => T_COLON,
231                                                             T_SEMICOLON => T_SEMICOLON,
232                                                            ),
233                                                'end'    => array(
234                                                             T_BREAK    => T_BREAK,
235                                                             T_RETURN   => T_RETURN,
236                                                             T_CONTINUE => T_CONTINUE,
237                                                             T_THROW    => T_THROW,
238                                                             T_EXIT     => T_EXIT,
239                                                            ),
240                                                'strict' => true,
241                                                'shared' => true,
242                                                'with'   => array(
243                                                             T_DEFAULT => T_DEFAULT,
244                                                             T_CASE    => T_CASE,
245                                                             T_SWITCH  => T_SWITCH,
246                                                            ),
247                                               ),
248                            T_DEFAULT       => array(
249                                                'start'  => array(
250                                                             T_COLON     => T_COLON,
251                                                             T_SEMICOLON => T_SEMICOLON,
252                                                            ),
253                                                'end'    => array(
254                                                             T_BREAK    => T_BREAK,
255                                                             T_RETURN   => T_RETURN,
256                                                             T_CONTINUE => T_CONTINUE,
257                                                             T_THROW    => T_THROW,
258                                                             T_EXIT     => T_EXIT,
259                                                            ),
260                                                'strict' => true,
261                                                'shared' => true,
262                                                'with'   => array(
263                                                             T_CASE   => T_CASE,
264                                                             T_SWITCH => T_SWITCH,
265                                                            ),
266                                               ),
267                            T_START_HEREDOC => array(
268                                                'start'  => array(T_START_HEREDOC => T_START_HEREDOC),
269                                                'end'    => array(T_END_HEREDOC => T_END_HEREDOC),
270                                                'strict' => true,
271                                                'shared' => false,
272                                                'with'   => array(),
273                                               ),
274                           );
275
276    /**
277     * A list of tokens that end the scope.
278     *
279     * This array is just a unique collection of the end tokens
280     * from the _scopeOpeners array. The data is duplicated here to
281     * save time during parsing of the file.
282     *
283     * @var array
284     */
285    public $endScopeTokens = array(
286                              T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
287                              T_ENDIF               => T_ENDIF,
288                              T_ENDFOR              => T_ENDFOR,
289                              T_ENDFOREACH          => T_ENDFOREACH,
290                              T_ENDWHILE            => T_ENDWHILE,
291                              T_ENDSWITCH           => T_ENDSWITCH,
292                              T_BREAK               => T_BREAK,
293                              T_END_HEREDOC         => T_END_HEREDOC,
294                             );
295
296    /**
297     * A cache of different token types, resolved into arrays.
298     *
299     * @var array()
300     * @see standardiseToken()
301     */
302    private static $_resolveTokenCache = array();
303
304
305    /**
306     * Creates an array of tokens when given some PHP code.
307     *
308     * Starts by using token_get_all() but does a lot of extra processing
309     * to insert information about the context of the token.
310     *
311     * @param string $string  The string to tokenize.
312     * @param string $eolChar The EOL character to use for splitting strings.
313     *
314     * @return array
315     */
316    public function tokenizeString($string, $eolChar='\n')
317    {
318        if (PHP_CODESNIFFER_VERBOSITY > 1) {
319            echo "\t*** START PHP TOKENIZING ***".PHP_EOL;
320            $isWin = false;
321            if (strtoupper(substr(PHP_OS, 0, 3)) === 'WIN') {
322                $isWin = true;
323            }
324        }
325
326        $tokens      = @token_get_all($string);
327        $finalTokens = array();
328
329        $newStackPtr       = 0;
330        $numTokens         = count($tokens);
331        $lastNotEmptyToken = 0;
332
333        $insideInlineIf = array();
334        $insideUseGroup = false;
335
336        $commentTokenizer = new PHP_CodeSniffer_Tokenizers_Comment();
337
338        for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
339            $token        = (array) $tokens[$stackPtr];
340            $tokenIsArray = isset($token[1]);
341
342            if (PHP_CODESNIFFER_VERBOSITY > 1) {
343                if ($tokenIsArray === true) {
344                    $type    = token_name($token[0]);
345                    $content = PHP_CodeSniffer::prepareForOutput($token[1]);
346                } else {
347                    $newToken = self::resolveSimpleToken($token[0]);
348                    $type     = $newToken['type'];
349                    $content  = PHP_CodeSniffer::prepareForOutput($token[0]);
350                }
351
352                echo "\tProcess token ";
353                if ($tokenIsArray === true) {
354                    echo "[$stackPtr]";
355                } else {
356                    echo " $stackPtr ";
357                }
358
359                echo ": $type => $content";
360            }//end if
361
362            if ($newStackPtr > 0 && $finalTokens[($newStackPtr - 1)]['code'] !== T_WHITESPACE) {
363                $lastNotEmptyToken = ($newStackPtr - 1);
364            }
365
366            /*
367                If we are using \r\n newline characters, the \r and \n are sometimes
368                split over two tokens. This normally occurs after comments. We need
369                to merge these two characters together so that our line endings are
370                consistent for all lines.
371            */
372
373            if ($tokenIsArray === true && substr($token[1], -1) === "\r") {
374                if (isset($tokens[($stackPtr + 1)]) === true
375                    && is_array($tokens[($stackPtr + 1)]) === true
376                    && $tokens[($stackPtr + 1)][1][0] === "\n"
377                ) {
378                    $token[1] .= "\n";
379                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
380                        if ($isWin === true) {
381                            echo '\n';
382                        } else {
383                            echo "\033[30;1m\\n\033[0m";
384                        }
385                    }
386
387                    if ($tokens[($stackPtr + 1)][1] === "\n") {
388                        // This token's content has been merged into the previous,
389                        // so we can skip it.
390                        $tokens[($stackPtr + 1)] = '';
391                    } else {
392                        $tokens[($stackPtr + 1)][1] = substr($tokens[($stackPtr + 1)][1], 1);
393                    }
394                }
395            }//end if
396
397            if (PHP_CODESNIFFER_VERBOSITY > 1) {
398                echo PHP_EOL;
399            }
400
401            /*
402                Parse doc blocks into something that can be easily iterated over.
403            */
404
405            if ($tokenIsArray === true && $token[0] === T_DOC_COMMENT) {
406                $commentTokens = $commentTokenizer->tokenizeString($token[1], $eolChar, $newStackPtr);
407                foreach ($commentTokens as $commentToken) {
408                    $finalTokens[$newStackPtr] = $commentToken;
409                    $newStackPtr++;
410                }
411
412                continue;
413            }
414
415            /*
416                If this is a double quoted string, PHP will tokenize the whole
417                thing which causes problems with the scope map when braces are
418                within the string. So we need to merge the tokens together to
419                provide a single string.
420            */
421
422            if ($tokenIsArray === false && ($token[0] === '"' || $token[0] === 'b"')) {
423                // Binary casts need a special token.
424                if ($token[0] === 'b"') {
425                    $finalTokens[$newStackPtr] = array(
426                                                  'code'    => T_BINARY_CAST,
427                                                  'type'    => 'T_BINARY_CAST',
428                                                  'content' => 'b',
429                                                 );
430                    $newStackPtr++;
431                }
432
433                $tokenContent = '"';
434                $nestedVars   = array();
435                for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
436                    $subToken        = (array) $tokens[$i];
437                    $subTokenIsArray = isset($subToken[1]);
438
439                    if ($subTokenIsArray === true) {
440                        $tokenContent .= $subToken[1];
441                        if ($subToken[1] === '{'
442                            && $subToken[0] !== T_ENCAPSED_AND_WHITESPACE
443                        ) {
444                            $nestedVars[] = $i;
445                        }
446                    } else {
447                        $tokenContent .= $subToken[0];
448                        if ($subToken[0] === '}') {
449                            array_pop($nestedVars);
450                        }
451                    }
452
453                    if ($subTokenIsArray === false
454                        && $subToken[0] === '"'
455                        && empty($nestedVars) === true
456                    ) {
457                        // We found the other end of the double quoted string.
458                        break;
459                    }
460                }//end for
461
462                $stackPtr = $i;
463
464                // Convert each line within the double quoted string to a
465                // new token, so it conforms with other multiple line tokens.
466                $tokenLines = explode($eolChar, $tokenContent);
467                $numLines   = count($tokenLines);
468                $newToken   = array();
469
470                for ($j = 0; $j < $numLines; $j++) {
471                    $newToken['content'] = $tokenLines[$j];
472                    if ($j === ($numLines - 1)) {
473                        if ($tokenLines[$j] === '') {
474                            break;
475                        }
476                    } else {
477                        $newToken['content'] .= $eolChar;
478                    }
479
480                    $newToken['code']          = T_DOUBLE_QUOTED_STRING;
481                    $newToken['type']          = 'T_DOUBLE_QUOTED_STRING';
482                    $finalTokens[$newStackPtr] = $newToken;
483                    $newStackPtr++;
484                }
485
486                // Continue, as we're done with this token.
487                continue;
488            }//end if
489
490            /*
491                If this is a heredoc, PHP will tokenize the whole
492                thing which causes problems when heredocs don't
493                contain real PHP code, which is almost never.
494                We want to leave the start and end heredoc tokens
495                alone though.
496            */
497
498            if ($tokenIsArray === true && $token[0] === T_START_HEREDOC) {
499                // Add the start heredoc token to the final array.
500                $finalTokens[$newStackPtr] = self::standardiseToken($token);
501
502                // Check if this is actually a nowdoc and use a different token
503                // to help the sniffs.
504                $nowdoc = false;
505                if ($token[1][3] === "'") {
506                    $finalTokens[$newStackPtr]['code'] = T_START_NOWDOC;
507                    $finalTokens[$newStackPtr]['type'] = 'T_START_NOWDOC';
508                    $nowdoc = true;
509                }
510
511                $tokenContent = '';
512                for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
513                    $subTokenIsArray = is_array($tokens[$i]);
514                    if ($subTokenIsArray === true
515                        && $tokens[$i][0] === T_END_HEREDOC
516                    ) {
517                        // We found the other end of the heredoc.
518                        break;
519                    }
520
521                    if ($subTokenIsArray === true) {
522                        $tokenContent .= $tokens[$i][1];
523                    } else {
524                        $tokenContent .= $tokens[$i];
525                    }
526                }
527
528                if ($i === $numTokens) {
529                    // We got to the end of the file and never
530                    // found the closing token, so this probably wasn't
531                    // a heredoc.
532                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
533                        $type = $finalTokens[$newStackPtr]['type'];
534                        echo "\t\t* failed to find the end of the here/nowdoc".PHP_EOL;
535                        echo "\t\t* token $stackPtr changed from $type to T_STRING".PHP_EOL;
536                    }
537
538                    $finalTokens[$newStackPtr]['code'] = T_STRING;
539                    $finalTokens[$newStackPtr]['type'] = 'T_STRING';
540                    $newStackPtr++;
541                    continue;
542                }
543
544                $stackPtr = $i;
545                $newStackPtr++;
546
547                // Convert each line within the heredoc to a
548                // new token, so it conforms with other multiple line tokens.
549                $tokenLines = explode($eolChar, $tokenContent);
550                $numLines   = count($tokenLines);
551                $newToken   = array();
552
553                for ($j = 0; $j < $numLines; $j++) {
554                    $newToken['content'] = $tokenLines[$j];
555                    if ($j === ($numLines - 1)) {
556                        if ($tokenLines[$j] === '') {
557                            break;
558                        }
559                    } else {
560                        $newToken['content'] .= $eolChar;
561                    }
562
563                    if ($nowdoc === true) {
564                        $newToken['code'] = T_NOWDOC;
565                        $newToken['type'] = 'T_NOWDOC';
566                    } else {
567                        $newToken['code'] = T_HEREDOC;
568                        $newToken['type'] = 'T_HEREDOC';
569                    }
570
571                    $finalTokens[$newStackPtr] = $newToken;
572                    $newStackPtr++;
573                }//end for
574
575                // Add the end heredoc token to the final array.
576                $finalTokens[$newStackPtr] = self::standardiseToken($tokens[$stackPtr]);
577
578                if ($nowdoc === true) {
579                    $finalTokens[$newStackPtr]['code'] = T_END_NOWDOC;
580                    $finalTokens[$newStackPtr]['type'] = 'T_END_NOWDOC';
581                    $nowdoc = true;
582                }
583
584                $newStackPtr++;
585
586                // Continue, as we're done with this token.
587                continue;
588            }//end if
589
590            /*
591                Before PHP 5.6, the ... operator was tokenized as three
592                T_STRING_CONCAT tokens in a row. So look for and combine
593                these tokens in earlier versions.
594            */
595
596            if ($tokenIsArray === false
597                && $token[0] === '.'
598                && isset($tokens[($stackPtr + 1)]) === true
599                && isset($tokens[($stackPtr + 2)]) === true
600                && $tokens[($stackPtr + 1)] === '.'
601                && $tokens[($stackPtr + 2)] === '.'
602            ) {
603                $newToken            = array();
604                $newToken['code']    = T_ELLIPSIS;
605                $newToken['type']    = 'T_ELLIPSIS';
606                $newToken['content'] = '...';
607                $finalTokens[$newStackPtr] = $newToken;
608
609                $newStackPtr++;
610                $stackPtr += 2;
611                continue;
612            }
613
614            /*
615                Before PHP 5.6, the ** operator was tokenized as two
616                T_MULTIPLY tokens in a row. So look for and combine
617                these tokens in earlier versions.
618            */
619
620            if ($tokenIsArray === false
621                && $token[0] === '*'
622                && isset($tokens[($stackPtr + 1)]) === true
623                && $tokens[($stackPtr + 1)] === '*'
624            ) {
625                $newToken            = array();
626                $newToken['code']    = T_POW;
627                $newToken['type']    = 'T_POW';
628                $newToken['content'] = '**';
629                $finalTokens[$newStackPtr] = $newToken;
630
631                $newStackPtr++;
632                $stackPtr++;
633                continue;
634            }
635
636            /*
637                Before PHP 5.6, the **= operator was tokenized as
638                T_MULTIPLY followed by T_MUL_EQUAL. So look for and combine
639                these tokens in earlier versions.
640            */
641
642            if ($tokenIsArray === false
643                && $token[0] === '*'
644                && isset($tokens[($stackPtr + 1)]) === true
645                && is_array($tokens[($stackPtr + 1)]) === true
646                && $tokens[($stackPtr + 1)][1] === '*='
647            ) {
648                $newToken            = array();
649                $newToken['code']    = T_POW_EQUAL;
650                $newToken['type']    = 'T_POW_EQUAL';
651                $newToken['content'] = '**=';
652                $finalTokens[$newStackPtr] = $newToken;
653
654                $newStackPtr++;
655                $stackPtr++;
656                continue;
657            }
658
659            /*
660                Before PHP 7, the ??= operator was tokenized as
661                T_INLINE_THEN, T_INLINE_THEN, T_EQUAL.
662                Between PHP 7.0 and 7.2, the ??= operator was tokenized as
663                T_COALESCE, T_EQUAL.
664                So look for and combine these tokens in earlier versions.
665            */
666
667            if (($tokenIsArray === false
668                && $token[0] === '?'
669                && isset($tokens[($stackPtr + 1)]) === true
670                && $tokens[($stackPtr + 1)][0] === '?'
671                && isset($tokens[($stackPtr + 2)]) === true
672                && $tokens[($stackPtr + 2)][0] === '=')
673                || ($tokenIsArray === true
674                && $token[0] === T_COALESCE
675                && isset($tokens[($stackPtr + 1)]) === true
676                && $tokens[($stackPtr + 1)][0] === '=')
677            ) {
678                $newToken            = array();
679                $newToken['code']    = T_COALESCE_EQUAL;
680                $newToken['type']    = 'T_COALESCE_EQUAL';
681                $newToken['content'] = '??=';
682                $finalTokens[$newStackPtr] = $newToken;
683
684                $newStackPtr++;
685                $stackPtr++;
686
687                if ($tokenIsArray === false) {
688                    // Pre PHP 7.
689                    $stackPtr++;
690                }
691
692                continue;
693            }
694
695            /*
696                Before PHP 7, the ?? operator was tokenized as
697                T_INLINE_THEN followed by T_INLINE_THEN.
698                So look for and combine these tokens in earlier versions.
699            */
700
701            if ($tokenIsArray === false
702                && $token[0] === '?'
703                && isset($tokens[($stackPtr + 1)]) === true
704                && $tokens[($stackPtr + 1)][0] === '?'
705            ) {
706                $newToken            = array();
707                $newToken['code']    = T_COALESCE;
708                $newToken['type']    = 'T_COALESCE';
709                $newToken['content'] = '??';
710                $finalTokens[$newStackPtr] = $newToken;
711
712                $newStackPtr++;
713                $stackPtr++;
714                continue;
715            }
716
717            /*
718                Convert ? to T_NULLABLE OR T_INLINE_THEN
719            */
720
721            if ($tokenIsArray === false && $token[0] === '?') {
722                $newToken            = array();
723                $newToken['content'] = '?';
724
725                for ($i = ($stackPtr - 1); $i >= 0; $i--) {
726                    if (is_array($tokens[$i]) === true) {
727                        $tokenType = $tokens[$i][0];
728                    } else {
729                        $tokenType = $tokens[$i];
730                    }
731
732                    if ($tokenType === T_FUNCTION) {
733                        $newToken['code'] = T_NULLABLE;
734                        $newToken['type'] = 'T_NULLABLE';
735                        break;
736                    } else if (in_array($tokenType, array(T_OPEN_TAG, T_OPEN_TAG_WITH_ECHO, '=', '{', ';')) === true) {
737                        $newToken['code'] = T_INLINE_THEN;
738                        $newToken['type'] = 'T_INLINE_THEN';
739
740                        $insideInlineIf[] = $stackPtr;
741                        break;
742                    }
743                }
744
745                $finalTokens[$newStackPtr] = $newToken;
746                $newStackPtr++;
747                continue;
748            }//end if
749
750            /*
751                Tokens after a double colon may be look like scope openers,
752                such as when writing code like Foo::NAMESPACE, but they are
753                only ever variables or strings.
754            */
755
756            if ($stackPtr > 1
757                && (is_array($tokens[($stackPtr - 1)]) === true
758                && $tokens[($stackPtr - 1)][0] === T_PAAMAYIM_NEKUDOTAYIM)
759                && $tokenIsArray === true
760                && $token[0] !== T_STRING
761                && $token[0] !== T_VARIABLE
762                && $token[0] !== T_DOLLAR
763                && isset(PHP_CodeSniffer_Tokens::$emptyTokens[$token[0]]) === false
764            ) {
765                $newToken            = array();
766                $newToken['code']    = T_STRING;
767                $newToken['type']    = 'T_STRING';
768                $newToken['content'] = $token[1];
769                $finalTokens[$newStackPtr] = $newToken;
770
771                $newStackPtr++;
772                continue;
773            }
774
775            /*
776                The string-like token after a function keyword should always be
777                tokenized as T_STRING even if it appears to be a different token,
778                such as when writing code like: function default(): foo
779                so go forward and change the token type before it is processed.
780            */
781
782            if ($tokenIsArray === true && $token[0] === T_FUNCTION) {
783                for ($x = ($stackPtr + 1); $x < $numTokens; $x++) {
784                    if (is_array($tokens[$x]) === false
785                        || isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x][0]]) === false
786                    ) {
787                        // Non-empty content.
788                        break;
789                    }
790                }
791
792                if ($x < $numTokens && is_array($tokens[$x]) === true) {
793                    $tokens[$x][0] = T_STRING;
794                }
795            }
796
797            /*
798                Before PHP 7, the <=> operator was tokenized as
799                T_IS_SMALLER_OR_EQUAL followed by T_GREATER_THAN.
800                So look for and combine these tokens in earlier versions.
801            */
802
803            if ($tokenIsArray === true
804                && $token[0] === T_IS_SMALLER_OR_EQUAL
805                && isset($tokens[($stackPtr + 1)]) === true
806                && $tokens[($stackPtr + 1)][0] === '>'
807            ) {
808                $newToken            = array();
809                $newToken['code']    = T_SPACESHIP;
810                $newToken['type']    = 'T_SPACESHIP';
811                $newToken['content'] = '<=>';
812                $finalTokens[$newStackPtr] = $newToken;
813
814                $newStackPtr++;
815                $stackPtr++;
816                continue;
817            }
818
819            /*
820                Emulate traits in PHP versions less than 5.4.
821            */
822
823            if ($tokenIsArray === true
824                && $token[0] === T_STRING
825                && strtolower($token[1]) === 'trait'
826                && $tokens[($stackPtr - 1)][0] !== T_OBJECT_OPERATOR
827                && $tokens[($stackPtr - 1)][0] !== T_PAAMAYIM_NEKUDOTAYIM
828            ) {
829                $finalTokens[$newStackPtr] = array(
830                                              'content' => $token[1],
831                                              'code'    => T_TRAIT,
832                                              'type'    => 'T_TRAIT',
833                                             );
834
835                if (PHP_CODESNIFFER_VERBOSITY > 1) {
836                    echo "\t\t* token $stackPtr changed from T_STRING to T_TRAIT".PHP_EOL;
837                }
838
839                $newStackPtr++;
840                continue;
841            }
842
843            /*
844                PHP doesn't assign a token to goto labels, so we have to.
845                These are just string tokens with a single colon after them. Double
846                colons are already tokenized and so don't interfere with this check.
847                But we do have to account for CASE statements, that look just like
848                goto labels.
849            */
850
851            if ($tokenIsArray === true
852                && $token[0] === T_STRING
853                && isset($tokens[($stackPtr + 1)]) === true
854                && $tokens[($stackPtr + 1)] === ':'
855                && $tokens[($stackPtr - 1)][0] !== T_PAAMAYIM_NEKUDOTAYIM
856            ) {
857                $stopTokens = array(
858                               T_CASE               => true,
859                               T_SEMICOLON          => true,
860                               T_OPEN_CURLY_BRACKET => true,
861                               T_INLINE_THEN        => true,
862                              );
863
864                for ($x = ($newStackPtr - 1); $x > 0; $x--) {
865                    if (isset($stopTokens[$finalTokens[$x]['code']]) === true) {
866                        break;
867                    }
868                }
869
870                if ($finalTokens[$x]['code'] !== T_CASE
871                    && $finalTokens[$x]['code'] !== T_INLINE_THEN
872                ) {
873                    $finalTokens[$newStackPtr] = array(
874                                                  'content' => $token[1].':',
875                                                  'code'    => T_GOTO_LABEL,
876                                                  'type'    => 'T_GOTO_LABEL',
877                                                 );
878
879                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
880                        echo "\t\t* token $stackPtr changed from T_STRING to T_GOTO_LABEL".PHP_EOL;
881                        echo "\t\t* skipping T_COLON token ".($stackPtr + 1).PHP_EOL;
882                    }
883
884                    $newStackPtr++;
885                    $stackPtr++;
886                    continue;
887                }
888            }//end if
889
890            /*
891                HHVM 3.5 tokenizes "else[\s]+if" as a T_ELSEIF token while PHP
892                proper only tokenizes "elseif" as a T_ELSEIF token. So split
893                up the HHVM token to make it looks like proper PHP.
894            */
895
896            if ($tokenIsArray === true
897                && $token[0] === T_ELSEIF
898                && strtolower($token[1]) !== 'elseif'
899            ) {
900                $finalTokens[$newStackPtr] = array(
901                                              'content' => substr($token[1], 0, 4),
902                                              'code'    => T_ELSE,
903                                              'type'    => 'T_ELSE',
904                                             );
905
906                $newStackPtr++;
907                $finalTokens[$newStackPtr] = array(
908                                              'content' => substr($token[1], 4, -2),
909                                              'code'    => T_WHITESPACE,
910                                              'type'    => 'T_WHITESPACE',
911                                             );
912
913                $newStackPtr++;
914                $finalTokens[$newStackPtr] = array(
915                                              'content' => substr($token[1], -2),
916                                              'code'    => T_IF,
917                                              'type'    => 'T_IF',
918                                             );
919
920                if (PHP_CODESNIFFER_VERBOSITY > 1) {
921                    echo "\t\t* token $stackPtr changed from T_ELSEIF to T_ELSE/T_WHITESPACE/T_IF".PHP_EOL;
922                }
923
924                $newStackPtr++;
925                continue;
926            }//end if
927
928            /*
929                HHVM 3.5 and 3.6 tokenizes a hashbang line such as #!/usr/bin/php
930                as T_HASHANG while PHP proper uses T_INLINE_HTML.
931            */
932
933            if ($tokenIsArray === true && token_name($token[0]) === 'T_HASHBANG') {
934                $finalTokens[$newStackPtr] = array(
935                                              'content' => $token[1],
936                                              'code'    => T_INLINE_HTML,
937                                              'type'    => 'T_INLINE_HTML',
938                                             );
939
940                if (PHP_CODESNIFFER_VERBOSITY > 1) {
941                    echo "\t\t* token $stackPtr changed from T_HASHBANG to T_INLINE_HTML".PHP_EOL;
942                }
943
944                $newStackPtr++;
945                continue;
946            }//end if
947
948            /*
949                If this token has newlines in its content, split each line up
950                and create a new token for each line. We do this so it's easier
951                to ascertain where errors occur on a line.
952                Note that $token[1] is the token's content.
953            */
954
955            if ($tokenIsArray === true && strpos($token[1], $eolChar) !== false) {
956                $tokenLines = explode($eolChar, $token[1]);
957                $numLines   = count($tokenLines);
958                $newToken   = array(
959                               'type'    => token_name($token[0]),
960                               'code'    => $token[0],
961                               'content' => '',
962                              );
963
964                for ($i = 0; $i < $numLines; $i++) {
965                    $newToken['content'] = $tokenLines[$i];
966                    if ($i === ($numLines - 1)) {
967                        if ($tokenLines[$i] === '') {
968                            break;
969                        }
970                    } else {
971                        $newToken['content'] .= $eolChar;
972                    }
973
974                    $finalTokens[$newStackPtr] = $newToken;
975                    $newStackPtr++;
976                }
977            } else {
978                if ($tokenIsArray === true && $token[0] === T_STRING) {
979                    // Some T_STRING tokens should remain that way
980                    // due to their context.
981                    $context = array(
982                                T_OBJECT_OPERATOR      => true,
983                                T_FUNCTION             => true,
984                                T_CLASS                => true,
985                                T_EXTENDS              => true,
986                                T_IMPLEMENTS           => true,
987                                T_NEW                  => true,
988                                T_CONST                => true,
989                                T_NS_SEPARATOR         => true,
990                                T_USE                  => true,
991                                T_NAMESPACE            => true,
992                                T_PAAMAYIM_NEKUDOTAYIM => true,
993                               );
994                    if (isset($context[$finalTokens[$lastNotEmptyToken]['code']]) === true) {
995                        // Special case for syntax like: return new self
996                        // where self should not be a string.
997                        if ($finalTokens[$lastNotEmptyToken]['code'] === T_NEW
998                            && strtolower($token[1]) === 'self'
999                        ) {
1000                            $finalTokens[$newStackPtr] = array(
1001                                                          'content' => $token[1],
1002                                                          'code'    => T_SELF,
1003                                                          'type'    => 'T_SELF',
1004                                                         );
1005                        } else {
1006                            $finalTokens[$newStackPtr] = array(
1007                                                          'content' => $token[1],
1008                                                          'code'    => T_STRING,
1009                                                          'type'    => 'T_STRING',
1010                                                         );
1011                        }
1012
1013                        $newStackPtr++;
1014                        continue;
1015                    }//end if
1016                }//end if
1017
1018                $newToken = null;
1019                if ($tokenIsArray === false) {
1020                    if (isset(self::$_resolveTokenCache[$token[0]]) === true) {
1021                        $newToken = self::$_resolveTokenCache[$token[0]];
1022                    }
1023                } else {
1024                    $cacheKey = null;
1025                    if ($token[0] === T_STRING) {
1026                        $cacheKey = strtolower($token[1]);
1027                    } else if ($token[0] !== T_CURLY_OPEN) {
1028                        $cacheKey = $token[0];
1029                    }
1030
1031                    if ($cacheKey !== null && isset(self::$_resolveTokenCache[$cacheKey]) === true) {
1032                        $newToken            = self::$_resolveTokenCache[$cacheKey];
1033                        $newToken['content'] = $token[1];
1034                    }
1035                }
1036
1037                if ($newToken === null) {
1038                    $newToken = self::standardiseToken($token);
1039                }
1040
1041                // Convert colons that are actually the ELSE component of an
1042                // inline IF statement.
1043                if (empty($insideInlineIf) === false && $newToken['code'] === T_COLON) {
1044                    array_pop($insideInlineIf);
1045                    $newToken['code'] = T_INLINE_ELSE;
1046                    $newToken['type'] = 'T_INLINE_ELSE';
1047                }
1048
1049                // This is a special condition for T_ARRAY tokens used for
1050                // type hinting function arguments as being arrays. We want to keep
1051                // the parenthesis map clean, so let's tag these tokens as
1052                // T_ARRAY_HINT.
1053                if ($newToken['code'] === T_ARRAY) {
1054                    for ($i = $stackPtr; $i < $numTokens; $i++) {
1055                        if ($tokens[$i] === '(') {
1056                            break;
1057                        } else if ($tokens[$i][0] === T_VARIABLE) {
1058                            $newToken['code'] = T_ARRAY_HINT;
1059                            $newToken['type'] = 'T_ARRAY_HINT';
1060                            break;
1061                        }
1062                    }
1063                }
1064
1065                // This is a special case when checking PHP 5.5+ code in PHP < 5.5
1066                // where "finally" should be T_FINALLY instead of T_STRING.
1067                if ($newToken['code'] === T_STRING
1068                    && strtolower($newToken['content']) === 'finally'
1069                ) {
1070                    $newToken['code'] = T_FINALLY;
1071                    $newToken['type'] = 'T_FINALLY';
1072                }
1073
1074                // This is a special case for the PHP 5.5 classname::class syntax
1075                // where "class" should be T_STRING instead of T_CLASS.
1076                if (($newToken['code'] === T_CLASS
1077                    || $newToken['code'] === T_FUNCTION)
1078                    && $finalTokens[($newStackPtr - 1)]['code'] === T_DOUBLE_COLON
1079                ) {
1080                    $newToken['code'] = T_STRING;
1081                    $newToken['type'] = 'T_STRING';
1082                }
1083
1084                // This is a special case for PHP 5.6 use function and use const
1085                // where "function" and "const" should be T_STRING instead of T_FUNCTION
1086                // and T_CONST.
1087                if (($newToken['code'] === T_FUNCTION
1088                    || $newToken['code'] === T_CONST)
1089                    && $finalTokens[$lastNotEmptyToken]['code'] === T_USE
1090                ) {
1091                    $newToken['code'] = T_STRING;
1092                    $newToken['type'] = 'T_STRING';
1093                }
1094
1095                // This is a special case for use groups in PHP 7+ where leaving
1096                // the curly braces as their normal tokens would confuse
1097                // the scope map and sniffs.
1098                if ($newToken['code'] === T_OPEN_CURLY_BRACKET
1099                    && $finalTokens[$lastNotEmptyToken]['code'] === T_NS_SEPARATOR
1100                ) {
1101                    $newToken['code'] = T_OPEN_USE_GROUP;
1102                    $newToken['type'] = 'T_OPEN_USE_GROUP';
1103                    $insideUseGroup   = true;
1104                }
1105
1106                if ($insideUseGroup === true && $newToken['code'] === T_CLOSE_CURLY_BRACKET) {
1107                    $newToken['code'] = T_CLOSE_USE_GROUP;
1108                    $newToken['type'] = 'T_CLOSE_USE_GROUP';
1109                    $insideUseGroup   = false;
1110                }
1111
1112                $finalTokens[$newStackPtr] = $newToken;
1113                $newStackPtr++;
1114            }//end if
1115        }//end for
1116
1117        if (PHP_CODESNIFFER_VERBOSITY > 1) {
1118            echo "\t*** END PHP TOKENIZING ***".PHP_EOL;
1119        }
1120
1121        return $finalTokens;
1122
1123    }//end tokenizeString()
1124
1125
1126    /**
1127     * Performs additional processing after main tokenizing.
1128     *
1129     * This additional processing checks for CASE statements that are using curly
1130     * braces for scope openers and closers. It also turns some T_FUNCTION tokens
1131     * into T_CLOSURE when they are not standard function definitions. It also
1132     * detects short array syntax and converts those square brackets into new tokens.
1133     * It also corrects some usage of the static and class keywords. It also
1134     * assigns tokens to function return types.
1135     *
1136     * @param array  $tokens  The array of tokens to process.
1137     * @param string $eolChar The EOL character to use for splitting strings.
1138     *
1139     * @return void
1140     */
1141    public function processAdditional(&$tokens, $eolChar)
1142    {
1143        if (PHP_CODESNIFFER_VERBOSITY > 1) {
1144            echo "\t*** START ADDITIONAL PHP PROCESSING ***".PHP_EOL;
1145        }
1146
1147        $numTokens = count($tokens);
1148        for ($i = ($numTokens - 1); $i >= 0; $i--) {
1149            // Check for any unset scope conditions due to alternate IF/ENDIF syntax.
1150            if (isset($tokens[$i]['scope_opener']) === true
1151                && isset($tokens[$i]['scope_condition']) === false
1152            ) {
1153                $tokens[$i]['scope_condition'] = $tokens[$tokens[$i]['scope_opener']]['scope_condition'];
1154            }
1155
1156            if ($tokens[$i]['code'] === T_FUNCTION) {
1157                /*
1158                    Detect functions that are actually closures and
1159                    assign them a different token.
1160                */
1161
1162                if (isset($tokens[$i]['scope_opener']) === true) {
1163                    for ($x = ($i + 1); $x < $numTokens; $x++) {
1164                        if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false
1165                            && $tokens[$x]['code'] !== T_BITWISE_AND
1166                        ) {
1167                            break;
1168                        }
1169                    }
1170
1171                    if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
1172                        $tokens[$i]['code'] = T_CLOSURE;
1173                        $tokens[$i]['type'] = 'T_CLOSURE';
1174                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
1175                            $line = $tokens[$i]['line'];
1176                            echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE".PHP_EOL;
1177                        }
1178
1179                        for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) {
1180                            if (isset($tokens[$x]['conditions'][$i]) === false) {
1181                                continue;
1182                            }
1183
1184                            $tokens[$x]['conditions'][$i] = T_CLOSURE;
1185                            if (PHP_CODESNIFFER_VERBOSITY > 1) {
1186                                $type = $tokens[$x]['type'];
1187                                echo "\t\t* cleaned $x ($type) *".PHP_EOL;
1188                            }
1189                        }
1190                    }
1191
1192                    $tokenAfterReturnTypeHint = $tokens[$i]['scope_opener'];
1193                } else if (isset($tokens[$i]['parenthesis_closer']) === true) {
1194                    $tokenAfterReturnTypeHint = null;
1195                    for ($x = ($tokens[$i]['parenthesis_closer'] + 1); $x < $numTokens; $x++) {
1196                        if ($tokens[$x]['code'] === T_SEMICOLON) {
1197                            $tokenAfterReturnTypeHint = $x;
1198                            break;
1199                        }
1200                    }
1201
1202                    if ($tokenAfterReturnTypeHint === null) {
1203                        // Probably a syntax error.
1204                        continue;
1205                    }
1206                } else {
1207                    // Probably a syntax error.
1208                    continue;
1209                }//end if
1210
1211                /*
1212                    Detect function return values and assign them
1213                    a special token, because PHP doesn't.
1214                */
1215
1216                for ($x = ($tokenAfterReturnTypeHint - 1); $x > $i; $x--) {
1217                    if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
1218                        if (in_array($tokens[$x]['code'], array(T_STRING, T_ARRAY, T_ARRAY_HINT, T_CALLABLE, T_SELF, T_PARENT), true) === true) {
1219                            if (PHP_CODESNIFFER_VERBOSITY > 1) {
1220                                $line = $tokens[$x]['line'];
1221                                $type = $tokens[$x]['type'];
1222                                echo "\t* token $x on line $line changed from $type to T_RETURN_TYPE".PHP_EOL;
1223                            }
1224
1225                            $tokens[$x]['code'] = T_RETURN_TYPE;
1226                            $tokens[$x]['type'] = 'T_RETURN_TYPE';
1227                        }
1228
1229                        break;
1230                    }
1231                }
1232
1233                continue;
1234            } else if ($tokens[$i]['code'] === T_CLASS && isset($tokens[$i]['scope_opener']) === true) {
1235                /*
1236                    Detect anonymous classes and assign them a different token.
1237                */
1238
1239                for ($x = ($i + 1); $x < $numTokens; $x++) {
1240                    if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
1241                        break;
1242                    }
1243                }
1244
1245                if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS
1246                    || $tokens[$x]['code'] === T_OPEN_CURLY_BRACKET
1247                    || $tokens[$x]['code'] === T_EXTENDS
1248                    || $tokens[$x]['code'] === T_IMPLEMENTS
1249                ) {
1250                    $tokens[$i]['code'] = T_ANON_CLASS;
1251                    $tokens[$i]['type'] = 'T_ANON_CLASS';
1252                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
1253                        $line = $tokens[$i]['line'];
1254                        echo "\t* token $i on line $line changed from T_CLASS to T_ANON_CLASS".PHP_EOL;
1255                    }
1256
1257                    for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) {
1258                        if (isset($tokens[$x]['conditions'][$i]) === false) {
1259                            continue;
1260                        }
1261
1262                        $tokens[$x]['conditions'][$i] = T_ANON_CLASS;
1263                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
1264                            $type = $tokens[$x]['type'];
1265                            echo "\t\t* cleaned $x ($type) *".PHP_EOL;
1266                        }
1267                    }
1268                }
1269
1270                continue;
1271            } else if ($tokens[$i]['code'] === T_OPEN_SQUARE_BRACKET) {
1272                if (isset($tokens[$i]['bracket_closer']) === false) {
1273                    continue;
1274                }
1275
1276                // Unless there is a variable or a bracket before this token,
1277                // it is the start of an array being defined using the short syntax.
1278                $isShortArray = false;
1279                $allowed      = array(
1280                                 T_CLOSE_SQUARE_BRACKET     => T_CLOSE_SQUARE_BRACKET,
1281                                 T_CLOSE_CURLY_BRACKET      => T_CLOSE_CURLY_BRACKET,
1282                                 T_CLOSE_PARENTHESIS        => T_CLOSE_PARENTHESIS,
1283                                 T_VARIABLE                 => T_VARIABLE,
1284                                 T_OBJECT_OPERATOR          => T_OBJECT_OPERATOR,
1285                                 T_STRING                   => T_STRING,
1286                                 T_CONSTANT_ENCAPSED_STRING => T_CONSTANT_ENCAPSED_STRING,
1287                                );
1288
1289                for ($x = ($i - 1); $x > 0; $x--) {
1290                    // If we hit a scope opener, the statement has ended
1291                    // without finding anything, so it's probably an array
1292                    // using PHP 7.1 short list syntax.
1293                    if (isset($tokens[$x]['scope_opener']) === true) {
1294                        $isShortArray = true;
1295                        break;
1296                    }
1297
1298                    if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
1299                        if (isset($allowed[$tokens[$x]['code']]) === false) {
1300                            $isShortArray = true;
1301                        }
1302
1303                        break;
1304                    }
1305                }
1306
1307                if ($isShortArray === true) {
1308                    $tokens[$i]['code'] = T_OPEN_SHORT_ARRAY;
1309                    $tokens[$i]['type'] = 'T_OPEN_SHORT_ARRAY';
1310
1311                    $closer = $tokens[$i]['bracket_closer'];
1312                    $tokens[$closer]['code'] = T_CLOSE_SHORT_ARRAY;
1313                    $tokens[$closer]['type'] = 'T_CLOSE_SHORT_ARRAY';
1314                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
1315                        $line = $tokens[$i]['line'];
1316                        echo "\t* token $i on line $line changed from T_OPEN_SQUARE_BRACKET to T_OPEN_SHORT_ARRAY".PHP_EOL;
1317                        $line = $tokens[$closer]['line'];
1318                        echo "\t* token $closer on line $line changed from T_CLOSE_SQUARE_BRACKET to T_CLOSE_SHORT_ARRAY".PHP_EOL;
1319                    }
1320                }
1321
1322                continue;
1323            } else if ($tokens[$i]['code'] === T_STATIC) {
1324                for ($x = ($i - 1); $x > 0; $x--) {
1325                    if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
1326                        break;
1327                    }
1328                }
1329
1330                if ($tokens[$x]['code'] === T_INSTANCEOF) {
1331                    $tokens[$i]['code'] = T_STRING;
1332                    $tokens[$i]['type'] = 'T_STRING';
1333
1334                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
1335                        $line = $tokens[$i]['line'];
1336                        echo "\t* token $i on line $line changed from T_STATIC to T_STRING".PHP_EOL;
1337                    }
1338                }
1339
1340                continue;
1341            } else if ($tokens[$i]['code'] === T_ECHO && $tokens[$i]['content'] === '<?=') {
1342                // HHVM tokenizes <?= as T_ECHO but it should be T_OPEN_TAG_WITH_ECHO.
1343                $tokens[$i]['code'] = T_OPEN_TAG_WITH_ECHO;
1344                $tokens[$i]['type'] = 'T_OPEN_TAG_WITH_ECHO';
1345
1346                if (PHP_CODESNIFFER_VERBOSITY > 1) {
1347                    $line = $tokens[$i]['line'];
1348                    echo "\t* token $i on line $line changed from T_ECHO to T_OPEN_TAG_WITH_ECHO".PHP_EOL;
1349                }
1350            } else if ($tokens[$i]['code'] === T_TRUE
1351                || $tokens[$i]['code'] === T_FALSE
1352                || $tokens[$i]['code'] === T_NULL
1353            ) {
1354                for ($x = ($i + 1); $i < $numTokens; $x++) {
1355                    if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
1356                        // Non-whitespace content.
1357                        break;
1358                    }
1359                }
1360
1361                $context = array(
1362                            T_OBJECT_OPERATOR      => true,
1363                            T_NS_SEPARATOR         => true,
1364                            T_PAAMAYIM_NEKUDOTAYIM => true,
1365                           );
1366                if (isset($context[$tokens[$x]['code']]) === true) {
1367                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
1368                        $line = $tokens[$i]['line'];
1369                        $type = $tokens[$i]['type'];
1370                        echo "\t* token $i on line $line changed from $type to T_STRING".PHP_EOL;
1371                    }
1372
1373                    $tokens[$i]['code'] = T_STRING;
1374                    $tokens[$i]['type'] = 'T_STRING';
1375                }
1376            } else if ($tokens[$i]['code'] === T_CONST) {
1377                // Context sensitive keywords support.
1378                for ($x = ($i + 1); $i < $numTokens; $x++) {
1379                    if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
1380                        // Non-whitespace content.
1381                        break;
1382                    }
1383                }
1384
1385                if ($tokens[$x]['code'] !== T_STRING) {
1386                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
1387                        $line = $tokens[$x]['line'];
1388                        $type = $tokens[$x]['type'];
1389                        echo "\t* token $x on line $line changed from $type to T_STRING".PHP_EOL;
1390                    }
1391
1392                    $tokens[$x]['code'] = T_STRING;
1393                    $tokens[$x]['type'] = 'T_STRING';
1394                }
1395            }//end if
1396
1397            if (($tokens[$i]['code'] !== T_CASE
1398                && $tokens[$i]['code'] !== T_DEFAULT)
1399                || isset($tokens[$i]['scope_opener']) === false
1400            ) {
1401                // Only interested in CASE and DEFAULT statements from here on in.
1402                continue;
1403            }
1404
1405            $scopeOpener = $tokens[$i]['scope_opener'];
1406            $scopeCloser = $tokens[$i]['scope_closer'];
1407
1408            // If the first char after the opener is a curly brace
1409            // and that brace has been ignored, it is actually
1410            // opening this case statement and the opener and closer are
1411            // probably set incorrectly.
1412            for ($x = ($scopeOpener + 1); $x < $numTokens; $x++) {
1413                if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
1414                    // Non-whitespace content.
1415                    break;
1416                }
1417            }
1418
1419            if ($tokens[$x]['code'] === T_CASE || $tokens[$x]['code'] === T_DEFAULT) {
1420                // Special case for multiple CASE statements that share the same
1421                // closer. Because we are going backwards through the file, this next
1422                // CASE/DEFAULT statement is already fixed, so just use its closer
1423                // and don't worry about fixing anything.
1424                $newCloser = $tokens[$x]['scope_closer'];
1425                $tokens[$i]['scope_closer'] = $newCloser;
1426                if (PHP_CODESNIFFER_VERBOSITY > 1) {
1427                    $oldType = $tokens[$scopeCloser]['type'];
1428                    $newType = $tokens[$newCloser]['type'];
1429                    $line    = $tokens[$i]['line'];
1430                    echo "\t* token $i (T_CASE) on line $line closer changed from $scopeCloser ($oldType) to $newCloser ($newType)".PHP_EOL;
1431                }
1432
1433                continue;
1434            }
1435
1436            if ($tokens[$x]['code'] !== T_OPEN_CURLY_BRACKET
1437                || isset($tokens[$x]['scope_condition']) === true
1438            ) {
1439                // Not a CASE/DEFAULT with a curly brace opener.
1440                continue;
1441            }
1442
1443            // The closer for this CASE/DEFAULT should be the closing curly brace and
1444            // not whatever it already is. The opener needs to be the opening curly
1445            // brace so everything matches up.
1446            $newCloser = $tokens[$x]['bracket_closer'];
1447            foreach (array($i, $x, $newCloser) as $index) {
1448                $tokens[$index]['scope_condition'] = $i;
1449                $tokens[$index]['scope_opener']    = $x;
1450                $tokens[$index]['scope_closer']    = $newCloser;
1451            }
1452
1453            unset($tokens[$scopeOpener]['scope_condition']);
1454            unset($tokens[$scopeOpener]['scope_opener']);
1455            unset($tokens[$scopeOpener]['scope_closer']);
1456            unset($tokens[$scopeCloser]['scope_condition']);
1457            unset($tokens[$scopeCloser]['scope_opener']);
1458            unset($tokens[$scopeCloser]['scope_closer']);
1459            unset($tokens[$x]['bracket_opener']);
1460            unset($tokens[$x]['bracket_closer']);
1461            unset($tokens[$newCloser]['bracket_opener']);
1462            unset($tokens[$newCloser]['bracket_closer']);
1463            $tokens[$scopeCloser]['conditions'][] = $i;
1464
1465            if (PHP_CODESNIFFER_VERBOSITY > 1) {
1466                $line      = $tokens[$i]['line'];
1467                $tokenType = $tokens[$i]['type'];
1468
1469                $oldType = $tokens[$scopeOpener]['type'];
1470                $newType = $tokens[$x]['type'];
1471                echo "\t* token $i ($tokenType) on line $line opener changed from $scopeOpener ($oldType) to $x ($newType)".PHP_EOL;
1472
1473                $oldType = $tokens[$scopeCloser]['type'];
1474                $newType = $tokens[$newCloser]['type'];
1475                echo "\t* token $i ($tokenType) on line $line closer changed from $scopeCloser ($oldType) to $newCloser ($newType)".PHP_EOL;
1476            }
1477
1478            // Now fix up all the tokens that think they are
1479            // inside the CASE/DEFAULT statement when they are really outside.
1480            for ($x = $newCloser; $x < $scopeCloser; $x++) {
1481                foreach ($tokens[$x]['conditions'] as $num => $oldCond) {
1482                    if ($oldCond === $tokens[$i]['code']) {
1483                        $oldConditions = $tokens[$x]['conditions'];
1484                        unset($tokens[$x]['conditions'][$num]);
1485
1486                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
1487                            $type     = $tokens[$x]['type'];
1488                            $oldConds = '';
1489                            foreach ($oldConditions as $condition) {
1490                                $oldConds .= token_name($condition).',';
1491                            }
1492
1493                            $oldConds = rtrim($oldConds, ',');
1494
1495                            $newConds = '';
1496                            foreach ($tokens[$x]['conditions'] as $condition) {
1497                                $newConds .= token_name($condition).',';
1498                            }
1499
1500                            $newConds = rtrim($newConds, ',');
1501
1502                            echo "\t\t* cleaned $x ($type) *".PHP_EOL;
1503                            echo "\t\t\t=> conditions changed from $oldConds to $newConds".PHP_EOL;
1504                        }
1505
1506                        break;
1507                    }//end if
1508                }//end foreach
1509            }//end for
1510        }//end for
1511
1512        if (PHP_CODESNIFFER_VERBOSITY > 1) {
1513            echo "\t*** END ADDITIONAL PHP PROCESSING ***".PHP_EOL;
1514        }
1515
1516    }//end processAdditional()
1517
1518
1519    /**
1520     * Takes a token produced from <code>token_get_all()</code> and produces a
1521     * more uniform token.
1522     *
1523     * @param string|array $token The token to convert.
1524     *
1525     * @return array The new token.
1526     */
1527    public static function standardiseToken($token)
1528    {
1529        if (isset($token[1]) === false) {
1530            if (isset(self::$_resolveTokenCache[$token[0]]) === true) {
1531                return self::$_resolveTokenCache[$token[0]];
1532            }
1533        } else {
1534            $cacheKey = null;
1535            if ($token[0] === T_STRING) {
1536                $cacheKey = strtolower($token[1]);
1537            } else if ($token[0] !== T_CURLY_OPEN) {
1538                $cacheKey = $token[0];
1539            }
1540
1541            if ($cacheKey !== null && isset(self::$_resolveTokenCache[$cacheKey]) === true) {
1542                $newToken            = self::$_resolveTokenCache[$cacheKey];
1543                $newToken['content'] = $token[1];
1544                return $newToken;
1545            }
1546        }
1547
1548        if (isset($token[1]) === false) {
1549            return self::resolveSimpleToken($token[0]);
1550        }
1551
1552        if ($token[0] === T_STRING) {
1553            switch ($cacheKey) {
1554            case 'false':
1555                $newToken['type'] = 'T_FALSE';
1556                break;
1557            case 'true':
1558                $newToken['type'] = 'T_TRUE';
1559                break;
1560            case 'null':
1561                $newToken['type'] = 'T_NULL';
1562                break;
1563            case 'self':
1564                $newToken['type'] = 'T_SELF';
1565                break;
1566            case 'parent':
1567                $newToken['type'] = 'T_PARENT';
1568                break;
1569            default:
1570                $newToken['type'] = 'T_STRING';
1571                break;
1572            }
1573
1574            $newToken['code'] = constant($newToken['type']);
1575
1576            self::$_resolveTokenCache[$cacheKey] = $newToken;
1577        } else if ($token[0] === T_CURLY_OPEN) {
1578            $newToken = array(
1579                         'code' => T_OPEN_CURLY_BRACKET,
1580                         'type' => 'T_OPEN_CURLY_BRACKET',
1581                        );
1582        } else {
1583            $newToken = array(
1584                         'code' => $token[0],
1585                         'type' => token_name($token[0]),
1586                        );
1587
1588            self::$_resolveTokenCache[$token[0]] = $newToken;
1589        }//end if
1590
1591        $newToken['content'] = $token[1];
1592        return $newToken;
1593
1594    }//end standardiseToken()
1595
1596
1597    /**
1598     * Converts simple tokens into a format that conforms to complex tokens
1599     * produced by token_get_all().
1600     *
1601     * Simple tokens are tokens that are not in array form when produced from
1602     * token_get_all().
1603     *
1604     * @param string $token The simple token to convert.
1605     *
1606     * @return array The new token in array format.
1607     */
1608    public static function resolveSimpleToken($token)
1609    {
1610        $newToken = array();
1611
1612        switch ($token) {
1613        case '{':
1614            $newToken['type'] = 'T_OPEN_CURLY_BRACKET';
1615            break;
1616        case '}':
1617            $newToken['type'] = 'T_CLOSE_CURLY_BRACKET';
1618            break;
1619        case '[':
1620            $newToken['type'] = 'T_OPEN_SQUARE_BRACKET';
1621            break;
1622        case ']':
1623            $newToken['type'] = 'T_CLOSE_SQUARE_BRACKET';
1624            break;
1625        case '(':
1626            $newToken['type'] = 'T_OPEN_PARENTHESIS';
1627            break;
1628        case ')':
1629            $newToken['type'] = 'T_CLOSE_PARENTHESIS';
1630            break;
1631        case ':':
1632            $newToken['type'] = 'T_COLON';
1633            break;
1634        case '.':
1635            $newToken['type'] = 'T_STRING_CONCAT';
1636            break;
1637        case ';':
1638            $newToken['type'] = 'T_SEMICOLON';
1639            break;
1640        case '=':
1641            $newToken['type'] = 'T_EQUAL';
1642            break;
1643        case '*':
1644            $newToken['type'] = 'T_MULTIPLY';
1645            break;
1646        case '/':
1647            $newToken['type'] = 'T_DIVIDE';
1648            break;
1649        case '+':
1650            $newToken['type'] = 'T_PLUS';
1651            break;
1652        case '-':
1653            $newToken['type'] = 'T_MINUS';
1654            break;
1655        case '%':
1656            $newToken['type'] = 'T_MODULUS';
1657            break;
1658        case '^':
1659            $newToken['type'] = 'T_BITWISE_XOR';
1660            break;
1661        case '&':
1662            $newToken['type'] = 'T_BITWISE_AND';
1663            break;
1664        case '|':
1665            $newToken['type'] = 'T_BITWISE_OR';
1666            break;
1667        case '<':
1668            $newToken['type'] = 'T_LESS_THAN';
1669            break;
1670        case '>':
1671            $newToken['type'] = 'T_GREATER_THAN';
1672            break;
1673        case '!':
1674            $newToken['type'] = 'T_BOOLEAN_NOT';
1675            break;
1676        case ',':
1677            $newToken['type'] = 'T_COMMA';
1678            break;
1679        case '@':
1680            $newToken['type'] = 'T_ASPERAND';
1681            break;
1682        case '$':
1683            $newToken['type'] = 'T_DOLLAR';
1684            break;
1685        case '`':
1686            $newToken['type'] = 'T_BACKTICK';
1687            break;
1688        default:
1689            $newToken['type'] = 'T_NONE';
1690            break;
1691        }//end switch
1692
1693        $newToken['code']    = constant($newToken['type']);
1694        $newToken['content'] = $token;
1695
1696        self::$_resolveTokenCache[$token] = $newToken;
1697        return $newToken;
1698
1699    }//end resolveSimpleToken()
1700
1701
1702}//end class
1703