1<?php 2/** 3 * Tokenizes PHP code. 4 * 5 * PHP version 5 6 * 7 * @category PHP 8 * @package PHP_CodeSniffer 9 * @author Greg Sherwood <gsherwood@squiz.net> 10 * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600) 11 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence 12 * @link http://pear.php.net/package/PHP_CodeSniffer 13 */ 14 15/** 16 * Tokenizes PHP code. 17 * 18 * @category PHP 19 * @package PHP_CodeSniffer 20 * @author Greg Sherwood <gsherwood@squiz.net> 21 * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600) 22 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence 23 * @version Release: @package_version@ 24 * @link http://pear.php.net/package/PHP_CodeSniffer 25 */ 26class PHP_CodeSniffer_Tokenizers_PHP 27{ 28 29 /** 30 * If TRUE, files that appear to be minified will not be processed. 31 * 32 * @var boolean 33 */ 34 public $skipMinified = false; 35 36 /** 37 * A list of tokens that are allowed to open a scope. 38 * 39 * This array also contains information about what kind of token the scope 40 * opener uses to open and close the scope, if the token strictly requires 41 * an opener, if the token can share a scope closer, and who it can be shared 42 * with. An example of a token that shares a scope closer is a CASE scope. 43 * 44 * @var array 45 */ 46 public $scopeOpeners = array( 47 T_IF => array( 48 'start' => array( 49 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, 50 T_COLON => T_COLON, 51 ), 52 'end' => array( 53 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, 54 T_ENDIF => T_ENDIF, 55 T_ELSE => T_ELSE, 56 T_ELSEIF => T_ELSEIF, 57 ), 58 'strict' => false, 59 'shared' => false, 60 'with' => array( 61 T_ELSE => T_ELSE, 62 T_ELSEIF => T_ELSEIF, 63 ), 64 ), 65 T_TRY => array( 66 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 67 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 68 'strict' => true, 69 'shared' => false, 70 'with' => array(), 71 ), 72 T_CATCH => array( 73 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 74 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 75 'strict' => true, 76 'shared' => false, 77 'with' => array(), 78 ), 79 T_FINALLY => array( 80 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 81 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 82 'strict' => true, 83 'shared' => false, 84 'with' => array(), 85 ), 86 T_ELSE => array( 87 'start' => array( 88 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, 89 T_COLON => T_COLON, 90 ), 91 'end' => array( 92 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, 93 T_ENDIF => T_ENDIF, 94 ), 95 'strict' => false, 96 'shared' => false, 97 'with' => array( 98 T_IF => T_IF, 99 T_ELSEIF => T_ELSEIF, 100 ), 101 ), 102 T_ELSEIF => array( 103 'start' => array( 104 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, 105 T_COLON => T_COLON, 106 ), 107 'end' => array( 108 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, 109 T_ENDIF => T_ENDIF, 110 T_ELSE => T_ELSE, 111 T_ELSEIF => T_ELSEIF, 112 ), 113 'strict' => false, 114 'shared' => false, 115 'with' => array( 116 T_IF => T_IF, 117 T_ELSE => T_ELSE, 118 ), 119 ), 120 T_FOR => array( 121 'start' => array( 122 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, 123 T_COLON => T_COLON, 124 ), 125 'end' => array( 126 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, 127 T_ENDFOR => T_ENDFOR, 128 ), 129 'strict' => false, 130 'shared' => false, 131 'with' => array(), 132 ), 133 T_FOREACH => array( 134 'start' => array( 135 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, 136 T_COLON => T_COLON, 137 ), 138 'end' => array( 139 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, 140 T_ENDFOREACH => T_ENDFOREACH, 141 ), 142 'strict' => false, 143 'shared' => false, 144 'with' => array(), 145 ), 146 T_INTERFACE => array( 147 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 148 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 149 'strict' => true, 150 'shared' => false, 151 'with' => array(), 152 ), 153 T_FUNCTION => array( 154 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 155 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 156 'strict' => true, 157 'shared' => false, 158 'with' => array(), 159 ), 160 T_CLASS => array( 161 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 162 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 163 'strict' => true, 164 'shared' => false, 165 'with' => array(), 166 ), 167 T_TRAIT => array( 168 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 169 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 170 'strict' => true, 171 'shared' => false, 172 'with' => array(), 173 ), 174 T_USE => array( 175 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 176 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 177 'strict' => false, 178 'shared' => false, 179 'with' => array(), 180 ), 181 T_DECLARE => array( 182 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 183 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 184 'strict' => false, 185 'shared' => false, 186 'with' => array(), 187 ), 188 T_NAMESPACE => array( 189 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 190 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 191 'strict' => false, 192 'shared' => false, 193 'with' => array(), 194 ), 195 T_WHILE => array( 196 'start' => array( 197 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, 198 T_COLON => T_COLON, 199 ), 200 'end' => array( 201 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, 202 T_ENDWHILE => T_ENDWHILE, 203 ), 204 'strict' => false, 205 'shared' => false, 206 'with' => array(), 207 ), 208 T_DO => array( 209 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 210 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 211 'strict' => true, 212 'shared' => false, 213 'with' => array(), 214 ), 215 T_SWITCH => array( 216 'start' => array( 217 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, 218 T_COLON => T_COLON, 219 ), 220 'end' => array( 221 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, 222 T_ENDSWITCH => T_ENDSWITCH, 223 ), 224 'strict' => true, 225 'shared' => false, 226 'with' => array(), 227 ), 228 T_CASE => array( 229 'start' => array( 230 T_COLON => T_COLON, 231 T_SEMICOLON => T_SEMICOLON, 232 ), 233 'end' => array( 234 T_BREAK => T_BREAK, 235 T_RETURN => T_RETURN, 236 T_CONTINUE => T_CONTINUE, 237 T_THROW => T_THROW, 238 T_EXIT => T_EXIT, 239 ), 240 'strict' => true, 241 'shared' => true, 242 'with' => array( 243 T_DEFAULT => T_DEFAULT, 244 T_CASE => T_CASE, 245 T_SWITCH => T_SWITCH, 246 ), 247 ), 248 T_DEFAULT => array( 249 'start' => array( 250 T_COLON => T_COLON, 251 T_SEMICOLON => T_SEMICOLON, 252 ), 253 'end' => array( 254 T_BREAK => T_BREAK, 255 T_RETURN => T_RETURN, 256 T_CONTINUE => T_CONTINUE, 257 T_THROW => T_THROW, 258 T_EXIT => T_EXIT, 259 ), 260 'strict' => true, 261 'shared' => true, 262 'with' => array( 263 T_CASE => T_CASE, 264 T_SWITCH => T_SWITCH, 265 ), 266 ), 267 T_START_HEREDOC => array( 268 'start' => array(T_START_HEREDOC => T_START_HEREDOC), 269 'end' => array(T_END_HEREDOC => T_END_HEREDOC), 270 'strict' => true, 271 'shared' => false, 272 'with' => array(), 273 ), 274 ); 275 276 /** 277 * A list of tokens that end the scope. 278 * 279 * This array is just a unique collection of the end tokens 280 * from the _scopeOpeners array. The data is duplicated here to 281 * save time during parsing of the file. 282 * 283 * @var array 284 */ 285 public $endScopeTokens = array( 286 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, 287 T_ENDIF => T_ENDIF, 288 T_ENDFOR => T_ENDFOR, 289 T_ENDFOREACH => T_ENDFOREACH, 290 T_ENDWHILE => T_ENDWHILE, 291 T_ENDSWITCH => T_ENDSWITCH, 292 T_BREAK => T_BREAK, 293 T_END_HEREDOC => T_END_HEREDOC, 294 ); 295 296 /** 297 * A cache of different token types, resolved into arrays. 298 * 299 * @var array() 300 * @see standardiseToken() 301 */ 302 private static $_resolveTokenCache = array(); 303 304 305 /** 306 * Creates an array of tokens when given some PHP code. 307 * 308 * Starts by using token_get_all() but does a lot of extra processing 309 * to insert information about the context of the token. 310 * 311 * @param string $string The string to tokenize. 312 * @param string $eolChar The EOL character to use for splitting strings. 313 * 314 * @return array 315 */ 316 public function tokenizeString($string, $eolChar='\n') 317 { 318 if (PHP_CODESNIFFER_VERBOSITY > 1) { 319 echo "\t*** START PHP TOKENIZING ***".PHP_EOL; 320 $isWin = false; 321 if (strtoupper(substr(PHP_OS, 0, 3)) === 'WIN') { 322 $isWin = true; 323 } 324 } 325 326 $tokens = @token_get_all($string); 327 $finalTokens = array(); 328 329 $newStackPtr = 0; 330 $numTokens = count($tokens); 331 $lastNotEmptyToken = 0; 332 333 $insideInlineIf = array(); 334 $insideUseGroup = false; 335 336 $commentTokenizer = new PHP_CodeSniffer_Tokenizers_Comment(); 337 338 for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) { 339 $token = (array) $tokens[$stackPtr]; 340 $tokenIsArray = isset($token[1]); 341 342 if (PHP_CODESNIFFER_VERBOSITY > 1) { 343 if ($tokenIsArray === true) { 344 $type = token_name($token[0]); 345 $content = PHP_CodeSniffer::prepareForOutput($token[1]); 346 } else { 347 $newToken = self::resolveSimpleToken($token[0]); 348 $type = $newToken['type']; 349 $content = PHP_CodeSniffer::prepareForOutput($token[0]); 350 } 351 352 echo "\tProcess token "; 353 if ($tokenIsArray === true) { 354 echo "[$stackPtr]"; 355 } else { 356 echo " $stackPtr "; 357 } 358 359 echo ": $type => $content"; 360 }//end if 361 362 if ($newStackPtr > 0 && $finalTokens[($newStackPtr - 1)]['code'] !== T_WHITESPACE) { 363 $lastNotEmptyToken = ($newStackPtr - 1); 364 } 365 366 /* 367 If we are using \r\n newline characters, the \r and \n are sometimes 368 split over two tokens. This normally occurs after comments. We need 369 to merge these two characters together so that our line endings are 370 consistent for all lines. 371 */ 372 373 if ($tokenIsArray === true && substr($token[1], -1) === "\r") { 374 if (isset($tokens[($stackPtr + 1)]) === true 375 && is_array($tokens[($stackPtr + 1)]) === true 376 && $tokens[($stackPtr + 1)][1][0] === "\n" 377 ) { 378 $token[1] .= "\n"; 379 if (PHP_CODESNIFFER_VERBOSITY > 1) { 380 if ($isWin === true) { 381 echo '\n'; 382 } else { 383 echo "\033[30;1m\\n\033[0m"; 384 } 385 } 386 387 if ($tokens[($stackPtr + 1)][1] === "\n") { 388 // This token's content has been merged into the previous, 389 // so we can skip it. 390 $tokens[($stackPtr + 1)] = ''; 391 } else { 392 $tokens[($stackPtr + 1)][1] = substr($tokens[($stackPtr + 1)][1], 1); 393 } 394 } 395 }//end if 396 397 if (PHP_CODESNIFFER_VERBOSITY > 1) { 398 echo PHP_EOL; 399 } 400 401 /* 402 Parse doc blocks into something that can be easily iterated over. 403 */ 404 405 if ($tokenIsArray === true && $token[0] === T_DOC_COMMENT) { 406 $commentTokens = $commentTokenizer->tokenizeString($token[1], $eolChar, $newStackPtr); 407 foreach ($commentTokens as $commentToken) { 408 $finalTokens[$newStackPtr] = $commentToken; 409 $newStackPtr++; 410 } 411 412 continue; 413 } 414 415 /* 416 If this is a double quoted string, PHP will tokenize the whole 417 thing which causes problems with the scope map when braces are 418 within the string. So we need to merge the tokens together to 419 provide a single string. 420 */ 421 422 if ($tokenIsArray === false && ($token[0] === '"' || $token[0] === 'b"')) { 423 // Binary casts need a special token. 424 if ($token[0] === 'b"') { 425 $finalTokens[$newStackPtr] = array( 426 'code' => T_BINARY_CAST, 427 'type' => 'T_BINARY_CAST', 428 'content' => 'b', 429 ); 430 $newStackPtr++; 431 } 432 433 $tokenContent = '"'; 434 $nestedVars = array(); 435 for ($i = ($stackPtr + 1); $i < $numTokens; $i++) { 436 $subToken = (array) $tokens[$i]; 437 $subTokenIsArray = isset($subToken[1]); 438 439 if ($subTokenIsArray === true) { 440 $tokenContent .= $subToken[1]; 441 if ($subToken[1] === '{' 442 && $subToken[0] !== T_ENCAPSED_AND_WHITESPACE 443 ) { 444 $nestedVars[] = $i; 445 } 446 } else { 447 $tokenContent .= $subToken[0]; 448 if ($subToken[0] === '}') { 449 array_pop($nestedVars); 450 } 451 } 452 453 if ($subTokenIsArray === false 454 && $subToken[0] === '"' 455 && empty($nestedVars) === true 456 ) { 457 // We found the other end of the double quoted string. 458 break; 459 } 460 }//end for 461 462 $stackPtr = $i; 463 464 // Convert each line within the double quoted string to a 465 // new token, so it conforms with other multiple line tokens. 466 $tokenLines = explode($eolChar, $tokenContent); 467 $numLines = count($tokenLines); 468 $newToken = array(); 469 470 for ($j = 0; $j < $numLines; $j++) { 471 $newToken['content'] = $tokenLines[$j]; 472 if ($j === ($numLines - 1)) { 473 if ($tokenLines[$j] === '') { 474 break; 475 } 476 } else { 477 $newToken['content'] .= $eolChar; 478 } 479 480 $newToken['code'] = T_DOUBLE_QUOTED_STRING; 481 $newToken['type'] = 'T_DOUBLE_QUOTED_STRING'; 482 $finalTokens[$newStackPtr] = $newToken; 483 $newStackPtr++; 484 } 485 486 // Continue, as we're done with this token. 487 continue; 488 }//end if 489 490 /* 491 If this is a heredoc, PHP will tokenize the whole 492 thing which causes problems when heredocs don't 493 contain real PHP code, which is almost never. 494 We want to leave the start and end heredoc tokens 495 alone though. 496 */ 497 498 if ($tokenIsArray === true && $token[0] === T_START_HEREDOC) { 499 // Add the start heredoc token to the final array. 500 $finalTokens[$newStackPtr] = self::standardiseToken($token); 501 502 // Check if this is actually a nowdoc and use a different token 503 // to help the sniffs. 504 $nowdoc = false; 505 if ($token[1][3] === "'") { 506 $finalTokens[$newStackPtr]['code'] = T_START_NOWDOC; 507 $finalTokens[$newStackPtr]['type'] = 'T_START_NOWDOC'; 508 $nowdoc = true; 509 } 510 511 $tokenContent = ''; 512 for ($i = ($stackPtr + 1); $i < $numTokens; $i++) { 513 $subTokenIsArray = is_array($tokens[$i]); 514 if ($subTokenIsArray === true 515 && $tokens[$i][0] === T_END_HEREDOC 516 ) { 517 // We found the other end of the heredoc. 518 break; 519 } 520 521 if ($subTokenIsArray === true) { 522 $tokenContent .= $tokens[$i][1]; 523 } else { 524 $tokenContent .= $tokens[$i]; 525 } 526 } 527 528 if ($i === $numTokens) { 529 // We got to the end of the file and never 530 // found the closing token, so this probably wasn't 531 // a heredoc. 532 if (PHP_CODESNIFFER_VERBOSITY > 1) { 533 $type = $finalTokens[$newStackPtr]['type']; 534 echo "\t\t* failed to find the end of the here/nowdoc".PHP_EOL; 535 echo "\t\t* token $stackPtr changed from $type to T_STRING".PHP_EOL; 536 } 537 538 $finalTokens[$newStackPtr]['code'] = T_STRING; 539 $finalTokens[$newStackPtr]['type'] = 'T_STRING'; 540 $newStackPtr++; 541 continue; 542 } 543 544 $stackPtr = $i; 545 $newStackPtr++; 546 547 // Convert each line within the heredoc to a 548 // new token, so it conforms with other multiple line tokens. 549 $tokenLines = explode($eolChar, $tokenContent); 550 $numLines = count($tokenLines); 551 $newToken = array(); 552 553 for ($j = 0; $j < $numLines; $j++) { 554 $newToken['content'] = $tokenLines[$j]; 555 if ($j === ($numLines - 1)) { 556 if ($tokenLines[$j] === '') { 557 break; 558 } 559 } else { 560 $newToken['content'] .= $eolChar; 561 } 562 563 if ($nowdoc === true) { 564 $newToken['code'] = T_NOWDOC; 565 $newToken['type'] = 'T_NOWDOC'; 566 } else { 567 $newToken['code'] = T_HEREDOC; 568 $newToken['type'] = 'T_HEREDOC'; 569 } 570 571 $finalTokens[$newStackPtr] = $newToken; 572 $newStackPtr++; 573 }//end for 574 575 // Add the end heredoc token to the final array. 576 $finalTokens[$newStackPtr] = self::standardiseToken($tokens[$stackPtr]); 577 578 if ($nowdoc === true) { 579 $finalTokens[$newStackPtr]['code'] = T_END_NOWDOC; 580 $finalTokens[$newStackPtr]['type'] = 'T_END_NOWDOC'; 581 $nowdoc = true; 582 } 583 584 $newStackPtr++; 585 586 // Continue, as we're done with this token. 587 continue; 588 }//end if 589 590 /* 591 Before PHP 5.6, the ... operator was tokenized as three 592 T_STRING_CONCAT tokens in a row. So look for and combine 593 these tokens in earlier versions. 594 */ 595 596 if ($tokenIsArray === false 597 && $token[0] === '.' 598 && isset($tokens[($stackPtr + 1)]) === true 599 && isset($tokens[($stackPtr + 2)]) === true 600 && $tokens[($stackPtr + 1)] === '.' 601 && $tokens[($stackPtr + 2)] === '.' 602 ) { 603 $newToken = array(); 604 $newToken['code'] = T_ELLIPSIS; 605 $newToken['type'] = 'T_ELLIPSIS'; 606 $newToken['content'] = '...'; 607 $finalTokens[$newStackPtr] = $newToken; 608 609 $newStackPtr++; 610 $stackPtr += 2; 611 continue; 612 } 613 614 /* 615 Before PHP 5.6, the ** operator was tokenized as two 616 T_MULTIPLY tokens in a row. So look for and combine 617 these tokens in earlier versions. 618 */ 619 620 if ($tokenIsArray === false 621 && $token[0] === '*' 622 && isset($tokens[($stackPtr + 1)]) === true 623 && $tokens[($stackPtr + 1)] === '*' 624 ) { 625 $newToken = array(); 626 $newToken['code'] = T_POW; 627 $newToken['type'] = 'T_POW'; 628 $newToken['content'] = '**'; 629 $finalTokens[$newStackPtr] = $newToken; 630 631 $newStackPtr++; 632 $stackPtr++; 633 continue; 634 } 635 636 /* 637 Before PHP 5.6, the **= operator was tokenized as 638 T_MULTIPLY followed by T_MUL_EQUAL. So look for and combine 639 these tokens in earlier versions. 640 */ 641 642 if ($tokenIsArray === false 643 && $token[0] === '*' 644 && isset($tokens[($stackPtr + 1)]) === true 645 && is_array($tokens[($stackPtr + 1)]) === true 646 && $tokens[($stackPtr + 1)][1] === '*=' 647 ) { 648 $newToken = array(); 649 $newToken['code'] = T_POW_EQUAL; 650 $newToken['type'] = 'T_POW_EQUAL'; 651 $newToken['content'] = '**='; 652 $finalTokens[$newStackPtr] = $newToken; 653 654 $newStackPtr++; 655 $stackPtr++; 656 continue; 657 } 658 659 /* 660 Before PHP 7, the ??= operator was tokenized as 661 T_INLINE_THEN, T_INLINE_THEN, T_EQUAL. 662 Between PHP 7.0 and 7.2, the ??= operator was tokenized as 663 T_COALESCE, T_EQUAL. 664 So look for and combine these tokens in earlier versions. 665 */ 666 667 if (($tokenIsArray === false 668 && $token[0] === '?' 669 && isset($tokens[($stackPtr + 1)]) === true 670 && $tokens[($stackPtr + 1)][0] === '?' 671 && isset($tokens[($stackPtr + 2)]) === true 672 && $tokens[($stackPtr + 2)][0] === '=') 673 || ($tokenIsArray === true 674 && $token[0] === T_COALESCE 675 && isset($tokens[($stackPtr + 1)]) === true 676 && $tokens[($stackPtr + 1)][0] === '=') 677 ) { 678 $newToken = array(); 679 $newToken['code'] = T_COALESCE_EQUAL; 680 $newToken['type'] = 'T_COALESCE_EQUAL'; 681 $newToken['content'] = '??='; 682 $finalTokens[$newStackPtr] = $newToken; 683 684 $newStackPtr++; 685 $stackPtr++; 686 687 if ($tokenIsArray === false) { 688 // Pre PHP 7. 689 $stackPtr++; 690 } 691 692 continue; 693 } 694 695 /* 696 Before PHP 7, the ?? operator was tokenized as 697 T_INLINE_THEN followed by T_INLINE_THEN. 698 So look for and combine these tokens in earlier versions. 699 */ 700 701 if ($tokenIsArray === false 702 && $token[0] === '?' 703 && isset($tokens[($stackPtr + 1)]) === true 704 && $tokens[($stackPtr + 1)][0] === '?' 705 ) { 706 $newToken = array(); 707 $newToken['code'] = T_COALESCE; 708 $newToken['type'] = 'T_COALESCE'; 709 $newToken['content'] = '??'; 710 $finalTokens[$newStackPtr] = $newToken; 711 712 $newStackPtr++; 713 $stackPtr++; 714 continue; 715 } 716 717 /* 718 Convert ? to T_NULLABLE OR T_INLINE_THEN 719 */ 720 721 if ($tokenIsArray === false && $token[0] === '?') { 722 $newToken = array(); 723 $newToken['content'] = '?'; 724 725 for ($i = ($stackPtr - 1); $i >= 0; $i--) { 726 if (is_array($tokens[$i]) === true) { 727 $tokenType = $tokens[$i][0]; 728 } else { 729 $tokenType = $tokens[$i]; 730 } 731 732 if ($tokenType === T_FUNCTION) { 733 $newToken['code'] = T_NULLABLE; 734 $newToken['type'] = 'T_NULLABLE'; 735 break; 736 } else if (in_array($tokenType, array(T_OPEN_TAG, T_OPEN_TAG_WITH_ECHO, '=', '{', ';')) === true) { 737 $newToken['code'] = T_INLINE_THEN; 738 $newToken['type'] = 'T_INLINE_THEN'; 739 740 $insideInlineIf[] = $stackPtr; 741 break; 742 } 743 } 744 745 $finalTokens[$newStackPtr] = $newToken; 746 $newStackPtr++; 747 continue; 748 }//end if 749 750 /* 751 Tokens after a double colon may be look like scope openers, 752 such as when writing code like Foo::NAMESPACE, but they are 753 only ever variables or strings. 754 */ 755 756 if ($stackPtr > 1 757 && (is_array($tokens[($stackPtr - 1)]) === true 758 && $tokens[($stackPtr - 1)][0] === T_PAAMAYIM_NEKUDOTAYIM) 759 && $tokenIsArray === true 760 && $token[0] !== T_STRING 761 && $token[0] !== T_VARIABLE 762 && $token[0] !== T_DOLLAR 763 && isset(PHP_CodeSniffer_Tokens::$emptyTokens[$token[0]]) === false 764 ) { 765 $newToken = array(); 766 $newToken['code'] = T_STRING; 767 $newToken['type'] = 'T_STRING'; 768 $newToken['content'] = $token[1]; 769 $finalTokens[$newStackPtr] = $newToken; 770 771 $newStackPtr++; 772 continue; 773 } 774 775 /* 776 The string-like token after a function keyword should always be 777 tokenized as T_STRING even if it appears to be a different token, 778 such as when writing code like: function default(): foo 779 so go forward and change the token type before it is processed. 780 */ 781 782 if ($tokenIsArray === true && $token[0] === T_FUNCTION) { 783 for ($x = ($stackPtr + 1); $x < $numTokens; $x++) { 784 if (is_array($tokens[$x]) === false 785 || isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x][0]]) === false 786 ) { 787 // Non-empty content. 788 break; 789 } 790 } 791 792 if ($x < $numTokens && is_array($tokens[$x]) === true) { 793 $tokens[$x][0] = T_STRING; 794 } 795 } 796 797 /* 798 Before PHP 7, the <=> operator was tokenized as 799 T_IS_SMALLER_OR_EQUAL followed by T_GREATER_THAN. 800 So look for and combine these tokens in earlier versions. 801 */ 802 803 if ($tokenIsArray === true 804 && $token[0] === T_IS_SMALLER_OR_EQUAL 805 && isset($tokens[($stackPtr + 1)]) === true 806 && $tokens[($stackPtr + 1)][0] === '>' 807 ) { 808 $newToken = array(); 809 $newToken['code'] = T_SPACESHIP; 810 $newToken['type'] = 'T_SPACESHIP'; 811 $newToken['content'] = '<=>'; 812 $finalTokens[$newStackPtr] = $newToken; 813 814 $newStackPtr++; 815 $stackPtr++; 816 continue; 817 } 818 819 /* 820 Emulate traits in PHP versions less than 5.4. 821 */ 822 823 if ($tokenIsArray === true 824 && $token[0] === T_STRING 825 && strtolower($token[1]) === 'trait' 826 && $tokens[($stackPtr - 1)][0] !== T_OBJECT_OPERATOR 827 && $tokens[($stackPtr - 1)][0] !== T_PAAMAYIM_NEKUDOTAYIM 828 ) { 829 $finalTokens[$newStackPtr] = array( 830 'content' => $token[1], 831 'code' => T_TRAIT, 832 'type' => 'T_TRAIT', 833 ); 834 835 if (PHP_CODESNIFFER_VERBOSITY > 1) { 836 echo "\t\t* token $stackPtr changed from T_STRING to T_TRAIT".PHP_EOL; 837 } 838 839 $newStackPtr++; 840 continue; 841 } 842 843 /* 844 PHP doesn't assign a token to goto labels, so we have to. 845 These are just string tokens with a single colon after them. Double 846 colons are already tokenized and so don't interfere with this check. 847 But we do have to account for CASE statements, that look just like 848 goto labels. 849 */ 850 851 if ($tokenIsArray === true 852 && $token[0] === T_STRING 853 && isset($tokens[($stackPtr + 1)]) === true 854 && $tokens[($stackPtr + 1)] === ':' 855 && $tokens[($stackPtr - 1)][0] !== T_PAAMAYIM_NEKUDOTAYIM 856 ) { 857 $stopTokens = array( 858 T_CASE => true, 859 T_SEMICOLON => true, 860 T_OPEN_CURLY_BRACKET => true, 861 T_INLINE_THEN => true, 862 ); 863 864 for ($x = ($newStackPtr - 1); $x > 0; $x--) { 865 if (isset($stopTokens[$finalTokens[$x]['code']]) === true) { 866 break; 867 } 868 } 869 870 if ($finalTokens[$x]['code'] !== T_CASE 871 && $finalTokens[$x]['code'] !== T_INLINE_THEN 872 ) { 873 $finalTokens[$newStackPtr] = array( 874 'content' => $token[1].':', 875 'code' => T_GOTO_LABEL, 876 'type' => 'T_GOTO_LABEL', 877 ); 878 879 if (PHP_CODESNIFFER_VERBOSITY > 1) { 880 echo "\t\t* token $stackPtr changed from T_STRING to T_GOTO_LABEL".PHP_EOL; 881 echo "\t\t* skipping T_COLON token ".($stackPtr + 1).PHP_EOL; 882 } 883 884 $newStackPtr++; 885 $stackPtr++; 886 continue; 887 } 888 }//end if 889 890 /* 891 HHVM 3.5 tokenizes "else[\s]+if" as a T_ELSEIF token while PHP 892 proper only tokenizes "elseif" as a T_ELSEIF token. So split 893 up the HHVM token to make it looks like proper PHP. 894 */ 895 896 if ($tokenIsArray === true 897 && $token[0] === T_ELSEIF 898 && strtolower($token[1]) !== 'elseif' 899 ) { 900 $finalTokens[$newStackPtr] = array( 901 'content' => substr($token[1], 0, 4), 902 'code' => T_ELSE, 903 'type' => 'T_ELSE', 904 ); 905 906 $newStackPtr++; 907 $finalTokens[$newStackPtr] = array( 908 'content' => substr($token[1], 4, -2), 909 'code' => T_WHITESPACE, 910 'type' => 'T_WHITESPACE', 911 ); 912 913 $newStackPtr++; 914 $finalTokens[$newStackPtr] = array( 915 'content' => substr($token[1], -2), 916 'code' => T_IF, 917 'type' => 'T_IF', 918 ); 919 920 if (PHP_CODESNIFFER_VERBOSITY > 1) { 921 echo "\t\t* token $stackPtr changed from T_ELSEIF to T_ELSE/T_WHITESPACE/T_IF".PHP_EOL; 922 } 923 924 $newStackPtr++; 925 continue; 926 }//end if 927 928 /* 929 HHVM 3.5 and 3.6 tokenizes a hashbang line such as #!/usr/bin/php 930 as T_HASHANG while PHP proper uses T_INLINE_HTML. 931 */ 932 933 if ($tokenIsArray === true && token_name($token[0]) === 'T_HASHBANG') { 934 $finalTokens[$newStackPtr] = array( 935 'content' => $token[1], 936 'code' => T_INLINE_HTML, 937 'type' => 'T_INLINE_HTML', 938 ); 939 940 if (PHP_CODESNIFFER_VERBOSITY > 1) { 941 echo "\t\t* token $stackPtr changed from T_HASHBANG to T_INLINE_HTML".PHP_EOL; 942 } 943 944 $newStackPtr++; 945 continue; 946 }//end if 947 948 /* 949 If this token has newlines in its content, split each line up 950 and create a new token for each line. We do this so it's easier 951 to ascertain where errors occur on a line. 952 Note that $token[1] is the token's content. 953 */ 954 955 if ($tokenIsArray === true && strpos($token[1], $eolChar) !== false) { 956 $tokenLines = explode($eolChar, $token[1]); 957 $numLines = count($tokenLines); 958 $newToken = array( 959 'type' => token_name($token[0]), 960 'code' => $token[0], 961 'content' => '', 962 ); 963 964 for ($i = 0; $i < $numLines; $i++) { 965 $newToken['content'] = $tokenLines[$i]; 966 if ($i === ($numLines - 1)) { 967 if ($tokenLines[$i] === '') { 968 break; 969 } 970 } else { 971 $newToken['content'] .= $eolChar; 972 } 973 974 $finalTokens[$newStackPtr] = $newToken; 975 $newStackPtr++; 976 } 977 } else { 978 if ($tokenIsArray === true && $token[0] === T_STRING) { 979 // Some T_STRING tokens should remain that way 980 // due to their context. 981 $context = array( 982 T_OBJECT_OPERATOR => true, 983 T_FUNCTION => true, 984 T_CLASS => true, 985 T_EXTENDS => true, 986 T_IMPLEMENTS => true, 987 T_NEW => true, 988 T_CONST => true, 989 T_NS_SEPARATOR => true, 990 T_USE => true, 991 T_NAMESPACE => true, 992 T_PAAMAYIM_NEKUDOTAYIM => true, 993 ); 994 if (isset($context[$finalTokens[$lastNotEmptyToken]['code']]) === true) { 995 // Special case for syntax like: return new self 996 // where self should not be a string. 997 if ($finalTokens[$lastNotEmptyToken]['code'] === T_NEW 998 && strtolower($token[1]) === 'self' 999 ) { 1000 $finalTokens[$newStackPtr] = array( 1001 'content' => $token[1], 1002 'code' => T_SELF, 1003 'type' => 'T_SELF', 1004 ); 1005 } else { 1006 $finalTokens[$newStackPtr] = array( 1007 'content' => $token[1], 1008 'code' => T_STRING, 1009 'type' => 'T_STRING', 1010 ); 1011 } 1012 1013 $newStackPtr++; 1014 continue; 1015 }//end if 1016 }//end if 1017 1018 $newToken = null; 1019 if ($tokenIsArray === false) { 1020 if (isset(self::$_resolveTokenCache[$token[0]]) === true) { 1021 $newToken = self::$_resolveTokenCache[$token[0]]; 1022 } 1023 } else { 1024 $cacheKey = null; 1025 if ($token[0] === T_STRING) { 1026 $cacheKey = strtolower($token[1]); 1027 } else if ($token[0] !== T_CURLY_OPEN) { 1028 $cacheKey = $token[0]; 1029 } 1030 1031 if ($cacheKey !== null && isset(self::$_resolveTokenCache[$cacheKey]) === true) { 1032 $newToken = self::$_resolveTokenCache[$cacheKey]; 1033 $newToken['content'] = $token[1]; 1034 } 1035 } 1036 1037 if ($newToken === null) { 1038 $newToken = self::standardiseToken($token); 1039 } 1040 1041 // Convert colons that are actually the ELSE component of an 1042 // inline IF statement. 1043 if (empty($insideInlineIf) === false && $newToken['code'] === T_COLON) { 1044 array_pop($insideInlineIf); 1045 $newToken['code'] = T_INLINE_ELSE; 1046 $newToken['type'] = 'T_INLINE_ELSE'; 1047 } 1048 1049 // This is a special condition for T_ARRAY tokens used for 1050 // type hinting function arguments as being arrays. We want to keep 1051 // the parenthesis map clean, so let's tag these tokens as 1052 // T_ARRAY_HINT. 1053 if ($newToken['code'] === T_ARRAY) { 1054 for ($i = $stackPtr; $i < $numTokens; $i++) { 1055 if ($tokens[$i] === '(') { 1056 break; 1057 } else if ($tokens[$i][0] === T_VARIABLE) { 1058 $newToken['code'] = T_ARRAY_HINT; 1059 $newToken['type'] = 'T_ARRAY_HINT'; 1060 break; 1061 } 1062 } 1063 } 1064 1065 // This is a special case when checking PHP 5.5+ code in PHP < 5.5 1066 // where "finally" should be T_FINALLY instead of T_STRING. 1067 if ($newToken['code'] === T_STRING 1068 && strtolower($newToken['content']) === 'finally' 1069 ) { 1070 $newToken['code'] = T_FINALLY; 1071 $newToken['type'] = 'T_FINALLY'; 1072 } 1073 1074 // This is a special case for the PHP 5.5 classname::class syntax 1075 // where "class" should be T_STRING instead of T_CLASS. 1076 if (($newToken['code'] === T_CLASS 1077 || $newToken['code'] === T_FUNCTION) 1078 && $finalTokens[($newStackPtr - 1)]['code'] === T_DOUBLE_COLON 1079 ) { 1080 $newToken['code'] = T_STRING; 1081 $newToken['type'] = 'T_STRING'; 1082 } 1083 1084 // This is a special case for PHP 5.6 use function and use const 1085 // where "function" and "const" should be T_STRING instead of T_FUNCTION 1086 // and T_CONST. 1087 if (($newToken['code'] === T_FUNCTION 1088 || $newToken['code'] === T_CONST) 1089 && $finalTokens[$lastNotEmptyToken]['code'] === T_USE 1090 ) { 1091 $newToken['code'] = T_STRING; 1092 $newToken['type'] = 'T_STRING'; 1093 } 1094 1095 // This is a special case for use groups in PHP 7+ where leaving 1096 // the curly braces as their normal tokens would confuse 1097 // the scope map and sniffs. 1098 if ($newToken['code'] === T_OPEN_CURLY_BRACKET 1099 && $finalTokens[$lastNotEmptyToken]['code'] === T_NS_SEPARATOR 1100 ) { 1101 $newToken['code'] = T_OPEN_USE_GROUP; 1102 $newToken['type'] = 'T_OPEN_USE_GROUP'; 1103 $insideUseGroup = true; 1104 } 1105 1106 if ($insideUseGroup === true && $newToken['code'] === T_CLOSE_CURLY_BRACKET) { 1107 $newToken['code'] = T_CLOSE_USE_GROUP; 1108 $newToken['type'] = 'T_CLOSE_USE_GROUP'; 1109 $insideUseGroup = false; 1110 } 1111 1112 $finalTokens[$newStackPtr] = $newToken; 1113 $newStackPtr++; 1114 }//end if 1115 }//end for 1116 1117 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1118 echo "\t*** END PHP TOKENIZING ***".PHP_EOL; 1119 } 1120 1121 return $finalTokens; 1122 1123 }//end tokenizeString() 1124 1125 1126 /** 1127 * Performs additional processing after main tokenizing. 1128 * 1129 * This additional processing checks for CASE statements that are using curly 1130 * braces for scope openers and closers. It also turns some T_FUNCTION tokens 1131 * into T_CLOSURE when they are not standard function definitions. It also 1132 * detects short array syntax and converts those square brackets into new tokens. 1133 * It also corrects some usage of the static and class keywords. It also 1134 * assigns tokens to function return types. 1135 * 1136 * @param array $tokens The array of tokens to process. 1137 * @param string $eolChar The EOL character to use for splitting strings. 1138 * 1139 * @return void 1140 */ 1141 public function processAdditional(&$tokens, $eolChar) 1142 { 1143 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1144 echo "\t*** START ADDITIONAL PHP PROCESSING ***".PHP_EOL; 1145 } 1146 1147 $numTokens = count($tokens); 1148 for ($i = ($numTokens - 1); $i >= 0; $i--) { 1149 // Check for any unset scope conditions due to alternate IF/ENDIF syntax. 1150 if (isset($tokens[$i]['scope_opener']) === true 1151 && isset($tokens[$i]['scope_condition']) === false 1152 ) { 1153 $tokens[$i]['scope_condition'] = $tokens[$tokens[$i]['scope_opener']]['scope_condition']; 1154 } 1155 1156 if ($tokens[$i]['code'] === T_FUNCTION) { 1157 /* 1158 Detect functions that are actually closures and 1159 assign them a different token. 1160 */ 1161 1162 if (isset($tokens[$i]['scope_opener']) === true) { 1163 for ($x = ($i + 1); $x < $numTokens; $x++) { 1164 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false 1165 && $tokens[$x]['code'] !== T_BITWISE_AND 1166 ) { 1167 break; 1168 } 1169 } 1170 1171 if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS) { 1172 $tokens[$i]['code'] = T_CLOSURE; 1173 $tokens[$i]['type'] = 'T_CLOSURE'; 1174 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1175 $line = $tokens[$i]['line']; 1176 echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE".PHP_EOL; 1177 } 1178 1179 for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) { 1180 if (isset($tokens[$x]['conditions'][$i]) === false) { 1181 continue; 1182 } 1183 1184 $tokens[$x]['conditions'][$i] = T_CLOSURE; 1185 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1186 $type = $tokens[$x]['type']; 1187 echo "\t\t* cleaned $x ($type) *".PHP_EOL; 1188 } 1189 } 1190 } 1191 1192 $tokenAfterReturnTypeHint = $tokens[$i]['scope_opener']; 1193 } else if (isset($tokens[$i]['parenthesis_closer']) === true) { 1194 $tokenAfterReturnTypeHint = null; 1195 for ($x = ($tokens[$i]['parenthesis_closer'] + 1); $x < $numTokens; $x++) { 1196 if ($tokens[$x]['code'] === T_SEMICOLON) { 1197 $tokenAfterReturnTypeHint = $x; 1198 break; 1199 } 1200 } 1201 1202 if ($tokenAfterReturnTypeHint === null) { 1203 // Probably a syntax error. 1204 continue; 1205 } 1206 } else { 1207 // Probably a syntax error. 1208 continue; 1209 }//end if 1210 1211 /* 1212 Detect function return values and assign them 1213 a special token, because PHP doesn't. 1214 */ 1215 1216 for ($x = ($tokenAfterReturnTypeHint - 1); $x > $i; $x--) { 1217 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) { 1218 if (in_array($tokens[$x]['code'], array(T_STRING, T_ARRAY, T_ARRAY_HINT, T_CALLABLE, T_SELF, T_PARENT), true) === true) { 1219 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1220 $line = $tokens[$x]['line']; 1221 $type = $tokens[$x]['type']; 1222 echo "\t* token $x on line $line changed from $type to T_RETURN_TYPE".PHP_EOL; 1223 } 1224 1225 $tokens[$x]['code'] = T_RETURN_TYPE; 1226 $tokens[$x]['type'] = 'T_RETURN_TYPE'; 1227 } 1228 1229 break; 1230 } 1231 } 1232 1233 continue; 1234 } else if ($tokens[$i]['code'] === T_CLASS && isset($tokens[$i]['scope_opener']) === true) { 1235 /* 1236 Detect anonymous classes and assign them a different token. 1237 */ 1238 1239 for ($x = ($i + 1); $x < $numTokens; $x++) { 1240 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) { 1241 break; 1242 } 1243 } 1244 1245 if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS 1246 || $tokens[$x]['code'] === T_OPEN_CURLY_BRACKET 1247 || $tokens[$x]['code'] === T_EXTENDS 1248 || $tokens[$x]['code'] === T_IMPLEMENTS 1249 ) { 1250 $tokens[$i]['code'] = T_ANON_CLASS; 1251 $tokens[$i]['type'] = 'T_ANON_CLASS'; 1252 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1253 $line = $tokens[$i]['line']; 1254 echo "\t* token $i on line $line changed from T_CLASS to T_ANON_CLASS".PHP_EOL; 1255 } 1256 1257 for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) { 1258 if (isset($tokens[$x]['conditions'][$i]) === false) { 1259 continue; 1260 } 1261 1262 $tokens[$x]['conditions'][$i] = T_ANON_CLASS; 1263 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1264 $type = $tokens[$x]['type']; 1265 echo "\t\t* cleaned $x ($type) *".PHP_EOL; 1266 } 1267 } 1268 } 1269 1270 continue; 1271 } else if ($tokens[$i]['code'] === T_OPEN_SQUARE_BRACKET) { 1272 if (isset($tokens[$i]['bracket_closer']) === false) { 1273 continue; 1274 } 1275 1276 // Unless there is a variable or a bracket before this token, 1277 // it is the start of an array being defined using the short syntax. 1278 $isShortArray = false; 1279 $allowed = array( 1280 T_CLOSE_SQUARE_BRACKET => T_CLOSE_SQUARE_BRACKET, 1281 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, 1282 T_CLOSE_PARENTHESIS => T_CLOSE_PARENTHESIS, 1283 T_VARIABLE => T_VARIABLE, 1284 T_OBJECT_OPERATOR => T_OBJECT_OPERATOR, 1285 T_STRING => T_STRING, 1286 T_CONSTANT_ENCAPSED_STRING => T_CONSTANT_ENCAPSED_STRING, 1287 ); 1288 1289 for ($x = ($i - 1); $x > 0; $x--) { 1290 // If we hit a scope opener, the statement has ended 1291 // without finding anything, so it's probably an array 1292 // using PHP 7.1 short list syntax. 1293 if (isset($tokens[$x]['scope_opener']) === true) { 1294 $isShortArray = true; 1295 break; 1296 } 1297 1298 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) { 1299 if (isset($allowed[$tokens[$x]['code']]) === false) { 1300 $isShortArray = true; 1301 } 1302 1303 break; 1304 } 1305 } 1306 1307 if ($isShortArray === true) { 1308 $tokens[$i]['code'] = T_OPEN_SHORT_ARRAY; 1309 $tokens[$i]['type'] = 'T_OPEN_SHORT_ARRAY'; 1310 1311 $closer = $tokens[$i]['bracket_closer']; 1312 $tokens[$closer]['code'] = T_CLOSE_SHORT_ARRAY; 1313 $tokens[$closer]['type'] = 'T_CLOSE_SHORT_ARRAY'; 1314 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1315 $line = $tokens[$i]['line']; 1316 echo "\t* token $i on line $line changed from T_OPEN_SQUARE_BRACKET to T_OPEN_SHORT_ARRAY".PHP_EOL; 1317 $line = $tokens[$closer]['line']; 1318 echo "\t* token $closer on line $line changed from T_CLOSE_SQUARE_BRACKET to T_CLOSE_SHORT_ARRAY".PHP_EOL; 1319 } 1320 } 1321 1322 continue; 1323 } else if ($tokens[$i]['code'] === T_STATIC) { 1324 for ($x = ($i - 1); $x > 0; $x--) { 1325 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) { 1326 break; 1327 } 1328 } 1329 1330 if ($tokens[$x]['code'] === T_INSTANCEOF) { 1331 $tokens[$i]['code'] = T_STRING; 1332 $tokens[$i]['type'] = 'T_STRING'; 1333 1334 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1335 $line = $tokens[$i]['line']; 1336 echo "\t* token $i on line $line changed from T_STATIC to T_STRING".PHP_EOL; 1337 } 1338 } 1339 1340 continue; 1341 } else if ($tokens[$i]['code'] === T_ECHO && $tokens[$i]['content'] === '<?=') { 1342 // HHVM tokenizes <?= as T_ECHO but it should be T_OPEN_TAG_WITH_ECHO. 1343 $tokens[$i]['code'] = T_OPEN_TAG_WITH_ECHO; 1344 $tokens[$i]['type'] = 'T_OPEN_TAG_WITH_ECHO'; 1345 1346 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1347 $line = $tokens[$i]['line']; 1348 echo "\t* token $i on line $line changed from T_ECHO to T_OPEN_TAG_WITH_ECHO".PHP_EOL; 1349 } 1350 } else if ($tokens[$i]['code'] === T_TRUE 1351 || $tokens[$i]['code'] === T_FALSE 1352 || $tokens[$i]['code'] === T_NULL 1353 ) { 1354 for ($x = ($i + 1); $i < $numTokens; $x++) { 1355 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) { 1356 // Non-whitespace content. 1357 break; 1358 } 1359 } 1360 1361 $context = array( 1362 T_OBJECT_OPERATOR => true, 1363 T_NS_SEPARATOR => true, 1364 T_PAAMAYIM_NEKUDOTAYIM => true, 1365 ); 1366 if (isset($context[$tokens[$x]['code']]) === true) { 1367 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1368 $line = $tokens[$i]['line']; 1369 $type = $tokens[$i]['type']; 1370 echo "\t* token $i on line $line changed from $type to T_STRING".PHP_EOL; 1371 } 1372 1373 $tokens[$i]['code'] = T_STRING; 1374 $tokens[$i]['type'] = 'T_STRING'; 1375 } 1376 } else if ($tokens[$i]['code'] === T_CONST) { 1377 // Context sensitive keywords support. 1378 for ($x = ($i + 1); $i < $numTokens; $x++) { 1379 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) { 1380 // Non-whitespace content. 1381 break; 1382 } 1383 } 1384 1385 if ($tokens[$x]['code'] !== T_STRING) { 1386 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1387 $line = $tokens[$x]['line']; 1388 $type = $tokens[$x]['type']; 1389 echo "\t* token $x on line $line changed from $type to T_STRING".PHP_EOL; 1390 } 1391 1392 $tokens[$x]['code'] = T_STRING; 1393 $tokens[$x]['type'] = 'T_STRING'; 1394 } 1395 }//end if 1396 1397 if (($tokens[$i]['code'] !== T_CASE 1398 && $tokens[$i]['code'] !== T_DEFAULT) 1399 || isset($tokens[$i]['scope_opener']) === false 1400 ) { 1401 // Only interested in CASE and DEFAULT statements from here on in. 1402 continue; 1403 } 1404 1405 $scopeOpener = $tokens[$i]['scope_opener']; 1406 $scopeCloser = $tokens[$i]['scope_closer']; 1407 1408 // If the first char after the opener is a curly brace 1409 // and that brace has been ignored, it is actually 1410 // opening this case statement and the opener and closer are 1411 // probably set incorrectly. 1412 for ($x = ($scopeOpener + 1); $x < $numTokens; $x++) { 1413 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) { 1414 // Non-whitespace content. 1415 break; 1416 } 1417 } 1418 1419 if ($tokens[$x]['code'] === T_CASE || $tokens[$x]['code'] === T_DEFAULT) { 1420 // Special case for multiple CASE statements that share the same 1421 // closer. Because we are going backwards through the file, this next 1422 // CASE/DEFAULT statement is already fixed, so just use its closer 1423 // and don't worry about fixing anything. 1424 $newCloser = $tokens[$x]['scope_closer']; 1425 $tokens[$i]['scope_closer'] = $newCloser; 1426 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1427 $oldType = $tokens[$scopeCloser]['type']; 1428 $newType = $tokens[$newCloser]['type']; 1429 $line = $tokens[$i]['line']; 1430 echo "\t* token $i (T_CASE) on line $line closer changed from $scopeCloser ($oldType) to $newCloser ($newType)".PHP_EOL; 1431 } 1432 1433 continue; 1434 } 1435 1436 if ($tokens[$x]['code'] !== T_OPEN_CURLY_BRACKET 1437 || isset($tokens[$x]['scope_condition']) === true 1438 ) { 1439 // Not a CASE/DEFAULT with a curly brace opener. 1440 continue; 1441 } 1442 1443 // The closer for this CASE/DEFAULT should be the closing curly brace and 1444 // not whatever it already is. The opener needs to be the opening curly 1445 // brace so everything matches up. 1446 $newCloser = $tokens[$x]['bracket_closer']; 1447 foreach (array($i, $x, $newCloser) as $index) { 1448 $tokens[$index]['scope_condition'] = $i; 1449 $tokens[$index]['scope_opener'] = $x; 1450 $tokens[$index]['scope_closer'] = $newCloser; 1451 } 1452 1453 unset($tokens[$scopeOpener]['scope_condition']); 1454 unset($tokens[$scopeOpener]['scope_opener']); 1455 unset($tokens[$scopeOpener]['scope_closer']); 1456 unset($tokens[$scopeCloser]['scope_condition']); 1457 unset($tokens[$scopeCloser]['scope_opener']); 1458 unset($tokens[$scopeCloser]['scope_closer']); 1459 unset($tokens[$x]['bracket_opener']); 1460 unset($tokens[$x]['bracket_closer']); 1461 unset($tokens[$newCloser]['bracket_opener']); 1462 unset($tokens[$newCloser]['bracket_closer']); 1463 $tokens[$scopeCloser]['conditions'][] = $i; 1464 1465 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1466 $line = $tokens[$i]['line']; 1467 $tokenType = $tokens[$i]['type']; 1468 1469 $oldType = $tokens[$scopeOpener]['type']; 1470 $newType = $tokens[$x]['type']; 1471 echo "\t* token $i ($tokenType) on line $line opener changed from $scopeOpener ($oldType) to $x ($newType)".PHP_EOL; 1472 1473 $oldType = $tokens[$scopeCloser]['type']; 1474 $newType = $tokens[$newCloser]['type']; 1475 echo "\t* token $i ($tokenType) on line $line closer changed from $scopeCloser ($oldType) to $newCloser ($newType)".PHP_EOL; 1476 } 1477 1478 // Now fix up all the tokens that think they are 1479 // inside the CASE/DEFAULT statement when they are really outside. 1480 for ($x = $newCloser; $x < $scopeCloser; $x++) { 1481 foreach ($tokens[$x]['conditions'] as $num => $oldCond) { 1482 if ($oldCond === $tokens[$i]['code']) { 1483 $oldConditions = $tokens[$x]['conditions']; 1484 unset($tokens[$x]['conditions'][$num]); 1485 1486 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1487 $type = $tokens[$x]['type']; 1488 $oldConds = ''; 1489 foreach ($oldConditions as $condition) { 1490 $oldConds .= token_name($condition).','; 1491 } 1492 1493 $oldConds = rtrim($oldConds, ','); 1494 1495 $newConds = ''; 1496 foreach ($tokens[$x]['conditions'] as $condition) { 1497 $newConds .= token_name($condition).','; 1498 } 1499 1500 $newConds = rtrim($newConds, ','); 1501 1502 echo "\t\t* cleaned $x ($type) *".PHP_EOL; 1503 echo "\t\t\t=> conditions changed from $oldConds to $newConds".PHP_EOL; 1504 } 1505 1506 break; 1507 }//end if 1508 }//end foreach 1509 }//end for 1510 }//end for 1511 1512 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1513 echo "\t*** END ADDITIONAL PHP PROCESSING ***".PHP_EOL; 1514 } 1515 1516 }//end processAdditional() 1517 1518 1519 /** 1520 * Takes a token produced from <code>token_get_all()</code> and produces a 1521 * more uniform token. 1522 * 1523 * @param string|array $token The token to convert. 1524 * 1525 * @return array The new token. 1526 */ 1527 public static function standardiseToken($token) 1528 { 1529 if (isset($token[1]) === false) { 1530 if (isset(self::$_resolveTokenCache[$token[0]]) === true) { 1531 return self::$_resolveTokenCache[$token[0]]; 1532 } 1533 } else { 1534 $cacheKey = null; 1535 if ($token[0] === T_STRING) { 1536 $cacheKey = strtolower($token[1]); 1537 } else if ($token[0] !== T_CURLY_OPEN) { 1538 $cacheKey = $token[0]; 1539 } 1540 1541 if ($cacheKey !== null && isset(self::$_resolveTokenCache[$cacheKey]) === true) { 1542 $newToken = self::$_resolveTokenCache[$cacheKey]; 1543 $newToken['content'] = $token[1]; 1544 return $newToken; 1545 } 1546 } 1547 1548 if (isset($token[1]) === false) { 1549 return self::resolveSimpleToken($token[0]); 1550 } 1551 1552 if ($token[0] === T_STRING) { 1553 switch ($cacheKey) { 1554 case 'false': 1555 $newToken['type'] = 'T_FALSE'; 1556 break; 1557 case 'true': 1558 $newToken['type'] = 'T_TRUE'; 1559 break; 1560 case 'null': 1561 $newToken['type'] = 'T_NULL'; 1562 break; 1563 case 'self': 1564 $newToken['type'] = 'T_SELF'; 1565 break; 1566 case 'parent': 1567 $newToken['type'] = 'T_PARENT'; 1568 break; 1569 default: 1570 $newToken['type'] = 'T_STRING'; 1571 break; 1572 } 1573 1574 $newToken['code'] = constant($newToken['type']); 1575 1576 self::$_resolveTokenCache[$cacheKey] = $newToken; 1577 } else if ($token[0] === T_CURLY_OPEN) { 1578 $newToken = array( 1579 'code' => T_OPEN_CURLY_BRACKET, 1580 'type' => 'T_OPEN_CURLY_BRACKET', 1581 ); 1582 } else { 1583 $newToken = array( 1584 'code' => $token[0], 1585 'type' => token_name($token[0]), 1586 ); 1587 1588 self::$_resolveTokenCache[$token[0]] = $newToken; 1589 }//end if 1590 1591 $newToken['content'] = $token[1]; 1592 return $newToken; 1593 1594 }//end standardiseToken() 1595 1596 1597 /** 1598 * Converts simple tokens into a format that conforms to complex tokens 1599 * produced by token_get_all(). 1600 * 1601 * Simple tokens are tokens that are not in array form when produced from 1602 * token_get_all(). 1603 * 1604 * @param string $token The simple token to convert. 1605 * 1606 * @return array The new token in array format. 1607 */ 1608 public static function resolveSimpleToken($token) 1609 { 1610 $newToken = array(); 1611 1612 switch ($token) { 1613 case '{': 1614 $newToken['type'] = 'T_OPEN_CURLY_BRACKET'; 1615 break; 1616 case '}': 1617 $newToken['type'] = 'T_CLOSE_CURLY_BRACKET'; 1618 break; 1619 case '[': 1620 $newToken['type'] = 'T_OPEN_SQUARE_BRACKET'; 1621 break; 1622 case ']': 1623 $newToken['type'] = 'T_CLOSE_SQUARE_BRACKET'; 1624 break; 1625 case '(': 1626 $newToken['type'] = 'T_OPEN_PARENTHESIS'; 1627 break; 1628 case ')': 1629 $newToken['type'] = 'T_CLOSE_PARENTHESIS'; 1630 break; 1631 case ':': 1632 $newToken['type'] = 'T_COLON'; 1633 break; 1634 case '.': 1635 $newToken['type'] = 'T_STRING_CONCAT'; 1636 break; 1637 case ';': 1638 $newToken['type'] = 'T_SEMICOLON'; 1639 break; 1640 case '=': 1641 $newToken['type'] = 'T_EQUAL'; 1642 break; 1643 case '*': 1644 $newToken['type'] = 'T_MULTIPLY'; 1645 break; 1646 case '/': 1647 $newToken['type'] = 'T_DIVIDE'; 1648 break; 1649 case '+': 1650 $newToken['type'] = 'T_PLUS'; 1651 break; 1652 case '-': 1653 $newToken['type'] = 'T_MINUS'; 1654 break; 1655 case '%': 1656 $newToken['type'] = 'T_MODULUS'; 1657 break; 1658 case '^': 1659 $newToken['type'] = 'T_BITWISE_XOR'; 1660 break; 1661 case '&': 1662 $newToken['type'] = 'T_BITWISE_AND'; 1663 break; 1664 case '|': 1665 $newToken['type'] = 'T_BITWISE_OR'; 1666 break; 1667 case '<': 1668 $newToken['type'] = 'T_LESS_THAN'; 1669 break; 1670 case '>': 1671 $newToken['type'] = 'T_GREATER_THAN'; 1672 break; 1673 case '!': 1674 $newToken['type'] = 'T_BOOLEAN_NOT'; 1675 break; 1676 case ',': 1677 $newToken['type'] = 'T_COMMA'; 1678 break; 1679 case '@': 1680 $newToken['type'] = 'T_ASPERAND'; 1681 break; 1682 case '$': 1683 $newToken['type'] = 'T_DOLLAR'; 1684 break; 1685 case '`': 1686 $newToken['type'] = 'T_BACKTICK'; 1687 break; 1688 default: 1689 $newToken['type'] = 'T_NONE'; 1690 break; 1691 }//end switch 1692 1693 $newToken['code'] = constant($newToken['type']); 1694 $newToken['content'] = $token; 1695 1696 self::$_resolveTokenCache[$token] = $newToken; 1697 return $newToken; 1698 1699 }//end resolveSimpleToken() 1700 1701 1702}//end class 1703