1<?php 2/** 3 * Tokenizes JS code. 4 * 5 * PHP version 5 6 * 7 * @category PHP 8 * @package PHP_CodeSniffer 9 * @author Greg Sherwood <gsherwood@squiz.net> 10 * @author Marc McIntyre <mmcintyre@squiz.net> 11 * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600) 12 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence 13 * @link http://pear.php.net/package/PHP_CodeSniffer 14 */ 15 16/** 17 * Tokenizes JS code. 18 * 19 * @category PHP 20 * @package PHP_CodeSniffer 21 * @author Greg Sherwood <gsherwood@squiz.net> 22 * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600) 23 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence 24 * @version Release: @package_version@ 25 * @link http://pear.php.net/package/PHP_CodeSniffer 26 */ 27class PHP_CodeSniffer_Tokenizers_JS 28{ 29 30 /** 31 * If TRUE, files that appear to be minified will not be processed. 32 * 33 * @var boolean 34 */ 35 public $skipMinified = true; 36 37 /** 38 * A list of tokens that are allowed to open a scope. 39 * 40 * This array also contains information about what kind of token the scope 41 * opener uses to open and close the scope, if the token strictly requires 42 * an opener, if the token can share a scope closer, and who it can be shared 43 * with. An example of a token that shares a scope closer is a CASE scope. 44 * 45 * @var array 46 */ 47 public $scopeOpeners = array( 48 T_IF => array( 49 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 50 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 51 'strict' => false, 52 'shared' => false, 53 'with' => array(), 54 ), 55 T_TRY => array( 56 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 57 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 58 'strict' => true, 59 'shared' => false, 60 'with' => array(), 61 ), 62 T_CATCH => array( 63 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 64 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 65 'strict' => true, 66 'shared' => false, 67 'with' => array(), 68 ), 69 T_ELSE => array( 70 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 71 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 72 'strict' => false, 73 'shared' => false, 74 'with' => array(), 75 ), 76 T_FOR => array( 77 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 78 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 79 'strict' => false, 80 'shared' => false, 81 'with' => array(), 82 ), 83 T_FUNCTION => array( 84 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 85 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 86 'strict' => false, 87 'shared' => false, 88 'with' => array(), 89 ), 90 T_WHILE => array( 91 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 92 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 93 'strict' => false, 94 'shared' => false, 95 'with' => array(), 96 ), 97 T_DO => array( 98 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 99 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 100 'strict' => true, 101 'shared' => false, 102 'with' => array(), 103 ), 104 T_SWITCH => array( 105 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), 106 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), 107 'strict' => true, 108 'shared' => false, 109 'with' => array(), 110 ), 111 T_CASE => array( 112 'start' => array(T_COLON => T_COLON), 113 'end' => array( 114 T_BREAK => T_BREAK, 115 T_RETURN => T_RETURN, 116 T_CONTINUE => T_CONTINUE, 117 T_THROW => T_THROW, 118 ), 119 'strict' => true, 120 'shared' => true, 121 'with' => array( 122 T_DEFAULT => T_DEFAULT, 123 T_CASE => T_CASE, 124 T_SWITCH => T_SWITCH, 125 ), 126 ), 127 T_DEFAULT => array( 128 'start' => array(T_COLON => T_COLON), 129 'end' => array( 130 T_BREAK => T_BREAK, 131 T_RETURN => T_RETURN, 132 T_CONTINUE => T_CONTINUE, 133 T_THROW => T_THROW, 134 ), 135 'strict' => true, 136 'shared' => true, 137 'with' => array( 138 T_CASE => T_CASE, 139 T_SWITCH => T_SWITCH, 140 ), 141 ), 142 ); 143 144 /** 145 * A list of tokens that end the scope. 146 * 147 * This array is just a unique collection of the end tokens 148 * from the _scopeOpeners array. The data is duplicated here to 149 * save time during parsing of the file. 150 * 151 * @var array 152 */ 153 public $endScopeTokens = array( 154 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, 155 T_BREAK => T_BREAK, 156 ); 157 158 /** 159 * A list of special JS tokens and their types. 160 * 161 * @var array 162 */ 163 protected $tokenValues = array( 164 'function' => 'T_FUNCTION', 165 'prototype' => 'T_PROTOTYPE', 166 'try' => 'T_TRY', 167 'catch' => 'T_CATCH', 168 'return' => 'T_RETURN', 169 'throw' => 'T_THROW', 170 'break' => 'T_BREAK', 171 'switch' => 'T_SWITCH', 172 'continue' => 'T_CONTINUE', 173 'if' => 'T_IF', 174 'else' => 'T_ELSE', 175 'do' => 'T_DO', 176 'while' => 'T_WHILE', 177 'for' => 'T_FOR', 178 'var' => 'T_VAR', 179 'case' => 'T_CASE', 180 'default' => 'T_DEFAULT', 181 'true' => 'T_TRUE', 182 'false' => 'T_FALSE', 183 'null' => 'T_NULL', 184 'this' => 'T_THIS', 185 'typeof' => 'T_TYPEOF', 186 '(' => 'T_OPEN_PARENTHESIS', 187 ')' => 'T_CLOSE_PARENTHESIS', 188 '{' => 'T_OPEN_CURLY_BRACKET', 189 '}' => 'T_CLOSE_CURLY_BRACKET', 190 '[' => 'T_OPEN_SQUARE_BRACKET', 191 ']' => 'T_CLOSE_SQUARE_BRACKET', 192 '?' => 'T_INLINE_THEN', 193 '.' => 'T_OBJECT_OPERATOR', 194 '+' => 'T_PLUS', 195 '-' => 'T_MINUS', 196 '*' => 'T_MULTIPLY', 197 '%' => 'T_MODULUS', 198 '/' => 'T_DIVIDE', 199 '^' => 'T_LOGICAL_XOR', 200 ',' => 'T_COMMA', 201 ';' => 'T_SEMICOLON', 202 ':' => 'T_COLON', 203 '<' => 'T_LESS_THAN', 204 '>' => 'T_GREATER_THAN', 205 '<<' => 'T_SL', 206 '>>' => 'T_SR', 207 '>>>' => 'T_ZSR', 208 '<<=' => 'T_SL_EQUAL', 209 '>>=' => 'T_SR_EQUAL', 210 '>>>=' => 'T_ZSR_EQUAL', 211 '<=' => 'T_IS_SMALLER_OR_EQUAL', 212 '>=' => 'T_IS_GREATER_OR_EQUAL', 213 '=>' => 'T_DOUBLE_ARROW', 214 '!' => 'T_BOOLEAN_NOT', 215 '||' => 'T_BOOLEAN_OR', 216 '&&' => 'T_BOOLEAN_AND', 217 '|' => 'T_BITWISE_OR', 218 '&' => 'T_BITWISE_AND', 219 '!=' => 'T_IS_NOT_EQUAL', 220 '!==' => 'T_IS_NOT_IDENTICAL', 221 '=' => 'T_EQUAL', 222 '==' => 'T_IS_EQUAL', 223 '===' => 'T_IS_IDENTICAL', 224 '-=' => 'T_MINUS_EQUAL', 225 '+=' => 'T_PLUS_EQUAL', 226 '*=' => 'T_MUL_EQUAL', 227 '/=' => 'T_DIV_EQUAL', 228 '%=' => 'T_MOD_EQUAL', 229 '++' => 'T_INC', 230 '--' => 'T_DEC', 231 '//' => 'T_COMMENT', 232 '/*' => 'T_COMMENT', 233 '/**' => 'T_DOC_COMMENT', 234 '*/' => 'T_COMMENT', 235 ); 236 237 /** 238 * A list string delimiters. 239 * 240 * @var array 241 */ 242 protected $stringTokens = array( 243 '\'' => '\'', 244 '"' => '"', 245 ); 246 247 /** 248 * A list tokens that start and end comments. 249 * 250 * @var array 251 */ 252 protected $commentTokens = array( 253 '//' => null, 254 '/*' => '*/', 255 '/**' => '*/', 256 ); 257 258 259 /** 260 * Creates an array of tokens when given some JS code. 261 * 262 * @param string $string The string to tokenize. 263 * @param string $eolChar The EOL character to use for splitting strings. 264 * 265 * @return array 266 */ 267 public function tokenizeString($string, $eolChar='\n') 268 { 269 if (PHP_CODESNIFFER_VERBOSITY > 1) { 270 echo "\t*** START JS TOKENIZING ***".PHP_EOL; 271 } 272 273 $maxTokenLength = 0; 274 foreach ($this->tokenValues as $token => $values) { 275 if (strlen($token) > $maxTokenLength) { 276 $maxTokenLength = strlen($token); 277 } 278 } 279 280 $tokens = array(); 281 $inString = ''; 282 $stringChar = null; 283 $inComment = ''; 284 $buffer = ''; 285 $preStringBuffer = ''; 286 $cleanBuffer = false; 287 288 $commentTokenizer = new PHP_CodeSniffer_Tokenizers_Comment(); 289 290 $tokens[] = array( 291 'code' => T_OPEN_TAG, 292 'type' => 'T_OPEN_TAG', 293 'content' => '', 294 ); 295 296 // Convert newlines to single characters for ease of 297 // processing. We will change them back later. 298 $string = str_replace($eolChar, "\n", $string); 299 300 $chars = str_split($string); 301 $numChars = count($chars); 302 for ($i = 0; $i < $numChars; $i++) { 303 $char = $chars[$i]; 304 305 if (PHP_CODESNIFFER_VERBOSITY > 1) { 306 $content = PHP_CodeSniffer::prepareForOutput($char); 307 $bufferContent = PHP_CodeSniffer::prepareForOutput($buffer); 308 309 if ($inString !== '') { 310 echo "\t"; 311 } 312 313 if ($inComment !== '') { 314 echo "\t"; 315 } 316 317 echo "\tProcess char $i => $content (buffer: $bufferContent)".PHP_EOL; 318 }//end if 319 320 if ($inString === '' && $inComment === '' && $buffer !== '') { 321 // If the buffer only has whitespace and we are about to 322 // add a character, store the whitespace first. 323 if (trim($char) !== '' && trim($buffer) === '') { 324 $tokens[] = array( 325 'code' => T_WHITESPACE, 326 'type' => 'T_WHITESPACE', 327 'content' => str_replace("\n", $eolChar, $buffer), 328 ); 329 330 if (PHP_CODESNIFFER_VERBOSITY > 1) { 331 $content = PHP_CodeSniffer::prepareForOutput($buffer); 332 echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL; 333 } 334 335 $buffer = ''; 336 } 337 338 // If the buffer is not whitespace and we are about to 339 // add a whitespace character, store the content first. 340 if ($inString === '' 341 && $inComment === '' 342 && trim($char) === '' 343 && trim($buffer) !== '' 344 ) { 345 $tokens[] = array( 346 'code' => T_STRING, 347 'type' => 'T_STRING', 348 'content' => str_replace("\n", $eolChar, $buffer), 349 ); 350 351 if (PHP_CODESNIFFER_VERBOSITY > 1) { 352 $content = PHP_CodeSniffer::prepareForOutput($buffer); 353 echo "\t=> Added token T_STRING ($content)".PHP_EOL; 354 } 355 356 $buffer = ''; 357 } 358 }//end if 359 360 // Process strings. 361 if ($inComment === '' && isset($this->stringTokens[$char]) === true) { 362 if ($inString === $char) { 363 // This could be the end of the string, but make sure it 364 // is not escaped first. 365 $escapes = 0; 366 for ($x = ($i - 1); $x >= 0; $x--) { 367 if ($chars[$x] !== '\\') { 368 break; 369 } 370 371 $escapes++; 372 } 373 374 if ($escapes === 0 || ($escapes % 2) === 0) { 375 // There is an even number escape chars, 376 // so this is not escaped, it is the end of the string. 377 $tokens[] = array( 378 'code' => T_CONSTANT_ENCAPSED_STRING, 379 'type' => 'T_CONSTANT_ENCAPSED_STRING', 380 'content' => str_replace("\n", $eolChar, $buffer).$char, 381 ); 382 383 if (PHP_CODESNIFFER_VERBOSITY > 1) { 384 echo "\t\t* found end of string *".PHP_EOL; 385 $content = PHP_CodeSniffer::prepareForOutput($buffer.$char); 386 echo "\t=> Added token T_CONSTANT_ENCAPSED_STRING ($content)".PHP_EOL; 387 } 388 389 $buffer = ''; 390 $preStringBuffer = ''; 391 $inString = ''; 392 $stringChar = null; 393 continue; 394 }//end if 395 } else if ($inString === '') { 396 $inString = $char; 397 $stringChar = $i; 398 $preStringBuffer = $buffer; 399 400 if (PHP_CODESNIFFER_VERBOSITY > 1) { 401 echo "\t\t* looking for string closer *".PHP_EOL; 402 } 403 }//end if 404 }//end if 405 406 if ($inString !== '' && $char === "\n") { 407 // Unless this newline character is escaped, the string did not 408 // end before the end of the line, which means it probably 409 // wasn't a string at all (maybe a regex). 410 if ($chars[($i - 1)] !== '\\') { 411 $i = $stringChar; 412 $buffer = $preStringBuffer; 413 $preStringBuffer = ''; 414 $inString = ''; 415 $stringChar = null; 416 $char = $chars[$i]; 417 418 if (PHP_CODESNIFFER_VERBOSITY > 1) { 419 echo "\t\t* found newline before end of string, bailing *".PHP_EOL; 420 } 421 } 422 } 423 424 $buffer .= $char; 425 426 // We don't look for special tokens inside strings, 427 // so if we are in a string, we can continue here now 428 // that the current char is in the buffer. 429 if ($inString !== '') { 430 continue; 431 } 432 433 // Special case for T_DIVIDE which can actually be 434 // the start of a regular expression. 435 if ($buffer === $char && $char === '/' && $chars[($i + 1)] !== '*') { 436 $regex = $this->getRegexToken( 437 $i, 438 $string, 439 $chars, 440 $tokens, 441 $eolChar 442 ); 443 444 if ($regex !== null) { 445 $tokens[] = array( 446 'code' => T_REGULAR_EXPRESSION, 447 'type' => 'T_REGULAR_EXPRESSION', 448 'content' => $regex['content'], 449 ); 450 451 if (PHP_CODESNIFFER_VERBOSITY > 1) { 452 $content = PHP_CodeSniffer::prepareForOutput($regex['content']); 453 echo "\t=> Added token T_REGULAR_EXPRESSION ($content)".PHP_EOL; 454 } 455 456 $i = $regex['end']; 457 $buffer = ''; 458 $cleanBuffer = false; 459 continue; 460 }//end if 461 }//end if 462 463 // Check for known tokens, but ignore tokens found that are not at 464 // the end of a string, like FOR and this.FORmat. 465 if (isset($this->tokenValues[strtolower($buffer)]) === true 466 && (preg_match('|[a-zA-z0-9_]|', $char) === 0 467 || isset($chars[($i + 1)]) === false 468 || preg_match('|[a-zA-z0-9_]|', $chars[($i + 1)]) === 0) 469 ) { 470 $matchedToken = false; 471 $lookAheadLength = ($maxTokenLength - strlen($buffer)); 472 473 if ($lookAheadLength > 0) { 474 // The buffer contains a token type, but we need 475 // to look ahead at the next chars to see if this is 476 // actually part of a larger token. For example, 477 // FOR and FOREACH. 478 if (PHP_CODESNIFFER_VERBOSITY > 1) { 479 echo "\t\t* buffer possibly contains token, looking ahead $lookAheadLength chars *".PHP_EOL; 480 } 481 482 $charBuffer = $buffer; 483 for ($x = 1; $x <= $lookAheadLength; $x++) { 484 if (isset($chars[($i + $x)]) === false) { 485 break; 486 } 487 488 $charBuffer .= $chars[($i + $x)]; 489 490 if (PHP_CODESNIFFER_VERBOSITY > 1) { 491 $content = PHP_CodeSniffer::prepareForOutput($charBuffer); 492 echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL; 493 } 494 495 if (isset($this->tokenValues[strtolower($charBuffer)]) === true) { 496 // We've found something larger that matches 497 // so we can ignore this char. Except for 1 very specific 498 // case where a comment like /**/ needs to tokenize as 499 // T_COMMENT and not T_DOC_COMMENT. 500 $oldType = $this->tokenValues[strtolower($buffer)]; 501 $newType = $this->tokenValues[strtolower($charBuffer)]; 502 if ($oldType === 'T_COMMENT' 503 && $newType === 'T_DOC_COMMENT' 504 && $chars[($i + $x + 1)] === '/' 505 ) { 506 if (PHP_CODESNIFFER_VERBOSITY > 1) { 507 echo "\t\t* look ahead ignored T_DOC_COMMENT, continuing *".PHP_EOL; 508 } 509 } else { 510 if (PHP_CODESNIFFER_VERBOSITY > 1) { 511 echo "\t\t* look ahead found more specific token ($newType), ignoring $i *".PHP_EOL; 512 } 513 514 $matchedToken = true; 515 break; 516 } 517 }//end if 518 }//end for 519 }//end if 520 521 if ($matchedToken === false) { 522 if (PHP_CODESNIFFER_VERBOSITY > 1 && $lookAheadLength > 0) { 523 echo "\t\t* look ahead found nothing *".PHP_EOL; 524 } 525 526 $value = $this->tokenValues[strtolower($buffer)]; 527 528 if ($value === 'T_FUNCTION' && $buffer !== 'function') { 529 // The function keyword needs to be all lowercase or else 530 // it is just a function called "Function". 531 $value = 'T_STRING'; 532 } 533 534 $tokens[] = array( 535 'code' => constant($value), 536 'type' => $value, 537 'content' => $buffer, 538 ); 539 540 if (PHP_CODESNIFFER_VERBOSITY > 1) { 541 $content = PHP_CodeSniffer::prepareForOutput($buffer); 542 echo "\t=> Added token $value ($content)".PHP_EOL; 543 } 544 545 $cleanBuffer = true; 546 }//end if 547 } else if (isset($this->tokenValues[strtolower($char)]) === true) { 548 // No matter what token we end up using, we don't 549 // need the content in the buffer any more because we have 550 // found a valid token. 551 $newContent = substr(str_replace("\n", $eolChar, $buffer), 0, -1); 552 if ($newContent !== '') { 553 $tokens[] = array( 554 'code' => T_STRING, 555 'type' => 'T_STRING', 556 'content' => $newContent, 557 ); 558 559 if (PHP_CODESNIFFER_VERBOSITY > 1) { 560 $content = PHP_CodeSniffer::prepareForOutput(substr($buffer, 0, -1)); 561 echo "\t=> Added token T_STRING ($content)".PHP_EOL; 562 } 563 } 564 565 if (PHP_CODESNIFFER_VERBOSITY > 1) { 566 echo "\t\t* char is token, looking ahead ".($maxTokenLength - 1).' chars *'.PHP_EOL; 567 } 568 569 // The char is a token type, but we need to look ahead at the 570 // next chars to see if this is actually part of a larger token. 571 // For example, = and ===. 572 $charBuffer = $char; 573 $matchedToken = false; 574 for ($x = 1; $x <= $maxTokenLength; $x++) { 575 if (isset($chars[($i + $x)]) === false) { 576 break; 577 } 578 579 $charBuffer .= $chars[($i + $x)]; 580 581 if (PHP_CODESNIFFER_VERBOSITY > 1) { 582 $content = PHP_CodeSniffer::prepareForOutput($charBuffer); 583 echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL; 584 } 585 586 if (isset($this->tokenValues[strtolower($charBuffer)]) === true) { 587 // We've found something larger that matches 588 // so we can ignore this char. 589 if (PHP_CODESNIFFER_VERBOSITY > 1) { 590 $type = $this->tokenValues[strtolower($charBuffer)]; 591 echo "\t\t* look ahead found more specific token ($type), ignoring $i *".PHP_EOL; 592 } 593 594 $matchedToken = true; 595 break; 596 } 597 }//end for 598 599 if ($matchedToken === false) { 600 $value = $this->tokenValues[strtolower($char)]; 601 $tokens[] = array( 602 'code' => constant($value), 603 'type' => $value, 604 'content' => $char, 605 ); 606 607 if (PHP_CODESNIFFER_VERBOSITY > 1) { 608 echo "\t\t* look ahead found nothing *".PHP_EOL; 609 $content = PHP_CodeSniffer::prepareForOutput($char); 610 echo "\t=> Added token $value ($content)".PHP_EOL; 611 } 612 613 $cleanBuffer = true; 614 } else { 615 $buffer = $char; 616 }//end if 617 }//end if 618 619 // Keep track of content inside comments. 620 if ($inComment === '' 621 && array_key_exists($buffer, $this->commentTokens) === true 622 ) { 623 // This is not really a comment if the content 624 // looks like \// (i.e., it is escaped). 625 if (isset($chars[($i - 2)]) === true && $chars[($i - 2)] === '\\') { 626 $lastToken = array_pop($tokens); 627 $lastContent = $lastToken['content']; 628 if (PHP_CODESNIFFER_VERBOSITY > 1) { 629 $value = $this->tokenValues[strtolower($lastContent)]; 630 $content = PHP_CodeSniffer::prepareForOutput($lastContent); 631 echo "\t=> Removed token $value ($content)".PHP_EOL; 632 } 633 634 $lastChars = str_split($lastContent); 635 $lastNumChars = count($lastChars); 636 for ($x = 0; $x < $lastNumChars; $x++) { 637 $lastChar = $lastChars[$x]; 638 $value = $this->tokenValues[strtolower($lastChar)]; 639 $tokens[] = array( 640 'code' => constant($value), 641 'type' => $value, 642 'content' => $lastChar, 643 ); 644 645 if (PHP_CODESNIFFER_VERBOSITY > 1) { 646 $content = PHP_CodeSniffer::prepareForOutput($lastChar); 647 echo "\t=> Added token $value ($content)".PHP_EOL; 648 } 649 } 650 } else { 651 // We have started a comment. 652 $inComment = $buffer; 653 654 if (PHP_CODESNIFFER_VERBOSITY > 1) { 655 echo "\t\t* looking for end of comment *".PHP_EOL; 656 } 657 }//end if 658 } else if ($inComment !== '') { 659 if ($this->commentTokens[$inComment] === null) { 660 // Comment ends at the next newline. 661 if (strpos($buffer, "\n") !== false) { 662 $inComment = ''; 663 } 664 } else { 665 if ($this->commentTokens[$inComment] === $buffer) { 666 $inComment = ''; 667 } 668 } 669 670 if (PHP_CODESNIFFER_VERBOSITY > 1) { 671 if ($inComment === '') { 672 echo "\t\t* found end of comment *".PHP_EOL; 673 } 674 } 675 676 if ($inComment === '' && $cleanBuffer === false) { 677 $tokens[] = array( 678 'code' => T_STRING, 679 'type' => 'T_STRING', 680 'content' => str_replace("\n", $eolChar, $buffer), 681 ); 682 683 if (PHP_CODESNIFFER_VERBOSITY > 1) { 684 $content = PHP_CodeSniffer::prepareForOutput($buffer); 685 echo "\t=> Added token T_STRING ($content)".PHP_EOL; 686 } 687 688 $buffer = ''; 689 } 690 }//end if 691 692 if ($cleanBuffer === true) { 693 $buffer = ''; 694 $cleanBuffer = false; 695 } 696 }//end for 697 698 if (empty($buffer) === false) { 699 // Buffer contains whitespace from the end of the file. 700 $tokens[] = array( 701 'code' => T_WHITESPACE, 702 'type' => 'T_WHITESPACE', 703 'content' => str_replace("\n", $eolChar, $buffer), 704 ); 705 706 if (PHP_CODESNIFFER_VERBOSITY > 1) { 707 $content = PHP_CodeSniffer::prepareForOutput($buffer); 708 echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL; 709 } 710 } 711 712 $tokens[] = array( 713 'code' => T_CLOSE_TAG, 714 'type' => 'T_CLOSE_TAG', 715 'content' => '', 716 ); 717 718 /* 719 Now that we have done some basic tokenizing, we need to 720 modify the tokens to join some together and split some apart 721 so they match what the PHP tokenizer does. 722 */ 723 724 $finalTokens = array(); 725 $newStackPtr = 0; 726 $numTokens = count($tokens); 727 for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) { 728 $token = $tokens[$stackPtr]; 729 730 /* 731 Look for comments and join the tokens together. 732 */ 733 734 if ($token['code'] === T_COMMENT || $token['code'] === T_DOC_COMMENT) { 735 $newContent = ''; 736 $tokenContent = $token['content']; 737 738 $endContent = null; 739 if (isset($this->commentTokens[$tokenContent]) === true) { 740 $endContent = $this->commentTokens[$tokenContent]; 741 } 742 743 while ($tokenContent !== $endContent) { 744 if ($endContent === null 745 && strpos($tokenContent, $eolChar) !== false 746 ) { 747 // A null end token means the comment ends at the end of 748 // the line so we look for newlines and split the token. 749 $tokens[$stackPtr]['content'] = substr( 750 $tokenContent, 751 (strpos($tokenContent, $eolChar) + strlen($eolChar)) 752 ); 753 754 $tokenContent = substr( 755 $tokenContent, 756 0, 757 (strpos($tokenContent, $eolChar) + strlen($eolChar)) 758 ); 759 760 // If the substr failed, skip the token as the content 761 // will now be blank. 762 if ($tokens[$stackPtr]['content'] !== false 763 && $tokens[$stackPtr]['content'] !== '' 764 ) { 765 $stackPtr--; 766 } 767 768 break; 769 }//end if 770 771 $stackPtr++; 772 $newContent .= $tokenContent; 773 if (isset($tokens[$stackPtr]) === false) { 774 break; 775 } 776 777 $tokenContent = $tokens[$stackPtr]['content']; 778 }//end while 779 780 if ($token['code'] === T_DOC_COMMENT) { 781 $commentTokens = $commentTokenizer->tokenizeString($newContent.$tokenContent, $eolChar, $newStackPtr); 782 foreach ($commentTokens as $commentToken) { 783 $finalTokens[$newStackPtr] = $commentToken; 784 $newStackPtr++; 785 } 786 787 continue; 788 } else { 789 // Save the new content in the current token so 790 // the code below can chop it up on newlines. 791 $token['content'] = $newContent.$tokenContent; 792 } 793 }//end if 794 795 /* 796 If this token has newlines in its content, split each line up 797 and create a new token for each line. We do this so it's easier 798 to ascertain where errors occur on a line. 799 Note that $token[1] is the token's content. 800 */ 801 802 if (strpos($token['content'], $eolChar) !== false) { 803 $tokenLines = explode($eolChar, $token['content']); 804 $numLines = count($tokenLines); 805 806 for ($i = 0; $i < $numLines; $i++) { 807 $newToken['content'] = $tokenLines[$i]; 808 if ($i === ($numLines - 1)) { 809 if ($tokenLines[$i] === '') { 810 break; 811 } 812 } else { 813 $newToken['content'] .= $eolChar; 814 } 815 816 $newToken['type'] = $token['type']; 817 $newToken['code'] = $token['code']; 818 $finalTokens[$newStackPtr] = $newToken; 819 $newStackPtr++; 820 } 821 } else { 822 $finalTokens[$newStackPtr] = $token; 823 $newStackPtr++; 824 }//end if 825 826 // Convert numbers, including decimals. 827 if ($token['code'] === T_STRING 828 || $token['code'] === T_OBJECT_OPERATOR 829 ) { 830 $newContent = ''; 831 $oldStackPtr = $stackPtr; 832 while (preg_match('|^[0-9\.]+$|', $tokens[$stackPtr]['content']) !== 0) { 833 $newContent .= $tokens[$stackPtr]['content']; 834 $stackPtr++; 835 } 836 837 if ($newContent !== '' && $newContent !== '.') { 838 $finalTokens[($newStackPtr - 1)]['content'] = $newContent; 839 if (ctype_digit($newContent) === true) { 840 $finalTokens[($newStackPtr - 1)]['code'] = constant('T_LNUMBER'); 841 $finalTokens[($newStackPtr - 1)]['type'] = 'T_LNUMBER'; 842 } else { 843 $finalTokens[($newStackPtr - 1)]['code'] = constant('T_DNUMBER'); 844 $finalTokens[($newStackPtr - 1)]['type'] = 'T_DNUMBER'; 845 } 846 847 $stackPtr--; 848 continue; 849 } else { 850 $stackPtr = $oldStackPtr; 851 } 852 }//end if 853 854 // Convert the token after an object operator into a string, in most cases. 855 if ($token['code'] === T_OBJECT_OPERATOR) { 856 for ($i = ($stackPtr + 1); $i < $numTokens; $i++) { 857 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$i]['code']]) === true) { 858 continue; 859 } 860 861 if ($tokens[$i]['code'] !== T_PROTOTYPE 862 && $tokens[$i]['code'] !== T_LNUMBER 863 && $tokens[$i]['code'] !== T_DNUMBER 864 ) { 865 $tokens[$i]['code'] = T_STRING; 866 $tokens[$i]['type'] = 'T_STRING'; 867 } 868 869 break; 870 } 871 } 872 }//end for 873 874 if (PHP_CODESNIFFER_VERBOSITY > 1) { 875 echo "\t*** END TOKENIZING ***".PHP_EOL; 876 } 877 878 return $finalTokens; 879 880 }//end tokenizeString() 881 882 883 /** 884 * Tokenizes a regular expression if one is found. 885 * 886 * If a regular expression is not found, NULL is returned. 887 * 888 * @param string $char The index of the possible regex start character. 889 * @param string $string The complete content of the string being tokenized. 890 * @param string $chars An array of characters being tokenized. 891 * @param string $tokens The current array of tokens found in the string. 892 * @param string $eolChar The EOL character to use for splitting strings. 893 * 894 * @return void 895 */ 896 public function getRegexToken($char, $string, $chars, $tokens, $eolChar) 897 { 898 $beforeTokens = array( 899 T_EQUAL => true, 900 T_IS_NOT_EQUAL => true, 901 T_IS_IDENTICAL => true, 902 T_IS_NOT_IDENTICAL => true, 903 T_OPEN_PARENTHESIS => true, 904 T_OPEN_SQUARE_BRACKET => true, 905 T_RETURN => true, 906 T_BOOLEAN_OR => true, 907 T_BOOLEAN_AND => true, 908 T_BITWISE_OR => true, 909 T_BITWISE_AND => true, 910 T_COMMA => true, 911 T_COLON => true, 912 T_TYPEOF => true, 913 T_INLINE_THEN => true, 914 T_INLINE_ELSE => true, 915 ); 916 917 $afterTokens = array( 918 ',' => true, 919 ')' => true, 920 ']' => true, 921 ';' => true, 922 ' ' => true, 923 '.' => true, 924 ':' => true, 925 $eolChar => true, 926 ); 927 928 // Find the last non-whitespace token that was added 929 // to the tokens array. 930 $numTokens = count($tokens); 931 for ($prev = ($numTokens - 1); $prev >= 0; $prev--) { 932 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$prev]['code']]) === false) { 933 break; 934 } 935 } 936 937 if (isset($beforeTokens[$tokens[$prev]['code']]) === false) { 938 return null; 939 } 940 941 // This is probably a regular expression, so look for the end of it. 942 if (PHP_CODESNIFFER_VERBOSITY > 1) { 943 echo "\t* token possibly starts a regular expression *".PHP_EOL; 944 } 945 946 $numChars = count($chars); 947 for ($next = ($char + 1); $next < $numChars; $next++) { 948 if ($chars[$next] === '/') { 949 // Just make sure this is not escaped first. 950 if ($chars[($next - 1)] !== '\\') { 951 // In the simple form: /.../ so we found the end. 952 break; 953 } else if ($chars[($next - 2)] === '\\') { 954 // In the form: /...\\/ so we found the end. 955 break; 956 } 957 } else { 958 $possibleEolChar = substr($string, $next, strlen($eolChar)); 959 if ($possibleEolChar === $eolChar) { 960 // This is the last token on the line and regular 961 // expressions need to be defined on a single line, 962 // so this is not a regular expression. 963 break; 964 } 965 } 966 } 967 968 if ($chars[$next] !== '/') { 969 if (PHP_CODESNIFFER_VERBOSITY > 1) { 970 echo "\t* could not find end of regular expression *".PHP_EOL; 971 } 972 973 return null; 974 } 975 976 while (preg_match('|[a-zA-Z]|', $chars[($next + 1)]) !== 0) { 977 // The token directly after the end of the regex can 978 // be modifiers like global and case insensitive 979 // (.e.g, /pattern/gi). 980 $next++; 981 } 982 983 $regexEnd = $next; 984 if (PHP_CODESNIFFER_VERBOSITY > 1) { 985 echo "\t* found end of regular expression at token $regexEnd *".PHP_EOL; 986 } 987 988 for ($next = ($next + 1); $next < $numChars; $next++) { 989 if ($chars[$next] !== ' ') { 990 break; 991 } else { 992 $possibleEolChar = substr($string, $next, strlen($eolChar)); 993 if ($possibleEolChar === $eolChar) { 994 // This is the last token on the line. 995 break; 996 } 997 } 998 } 999 1000 if (isset($afterTokens[$chars[$next]]) === false) { 1001 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1002 echo "\t* tokens after regular expression do not look correct *".PHP_EOL; 1003 } 1004 1005 return null; 1006 } 1007 1008 // This is a regular expression, so join all the tokens together. 1009 $content = ''; 1010 for ($x = $char; $x <= $regexEnd; $x++) { 1011 $content .= $chars[$x]; 1012 } 1013 1014 $token = array( 1015 'start' => $char, 1016 'end' => $regexEnd, 1017 'content' => $content, 1018 ); 1019 1020 return $token; 1021 1022 }//end getRegexToken() 1023 1024 1025 /** 1026 * Performs additional processing after main tokenizing. 1027 * 1028 * This additional processing looks for properties, closures, labels and objects. 1029 * 1030 * @param array $tokens The array of tokens to process. 1031 * @param string $eolChar The EOL character to use for splitting strings. 1032 * 1033 * @return void 1034 */ 1035 public function processAdditional(&$tokens, $eolChar) 1036 { 1037 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1038 echo "\t*** START ADDITIONAL JS PROCESSING ***".PHP_EOL; 1039 } 1040 1041 $numTokens = count($tokens); 1042 $classStack = array(); 1043 1044 for ($i = 0; $i < $numTokens; $i++) { 1045 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1046 $type = $tokens[$i]['type']; 1047 $content = PHP_CodeSniffer::prepareForOutput($tokens[$i]['content']); 1048 1049 echo str_repeat("\t", count($classStack)); 1050 echo "\tProcess token $i: $type => $content".PHP_EOL; 1051 } 1052 1053 // Looking for functions that are actually closures. 1054 if ($tokens[$i]['code'] === T_FUNCTION && isset($tokens[$i]['scope_opener']) === true) { 1055 for ($x = ($i + 1); $x < $numTokens; $x++) { 1056 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) { 1057 break; 1058 } 1059 } 1060 1061 if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS) { 1062 $tokens[$i]['code'] = T_CLOSURE; 1063 $tokens[$i]['type'] = 'T_CLOSURE'; 1064 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1065 $line = $tokens[$i]['line']; 1066 echo str_repeat("\t", count($classStack)); 1067 echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE".PHP_EOL; 1068 } 1069 1070 for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) { 1071 if (isset($tokens[$x]['conditions'][$i]) === false) { 1072 continue; 1073 } 1074 1075 $tokens[$x]['conditions'][$i] = T_CLOSURE; 1076 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1077 $type = $tokens[$x]['type']; 1078 echo str_repeat("\t", count($classStack)); 1079 echo "\t\t* cleaned $x ($type) *".PHP_EOL; 1080 } 1081 } 1082 }//end if 1083 1084 continue; 1085 } else if ($tokens[$i]['code'] === T_OPEN_CURLY_BRACKET 1086 && isset($tokens[$i]['scope_condition']) === false 1087 && isset($tokens[$i]['bracket_closer']) === true 1088 ) { 1089 $classStack[] = $i; 1090 1091 $closer = $tokens[$i]['bracket_closer']; 1092 $tokens[$i]['code'] = T_OBJECT; 1093 $tokens[$i]['type'] = 'T_OBJECT'; 1094 $tokens[$closer]['code'] = T_CLOSE_OBJECT; 1095 $tokens[$closer]['type'] = 'T_CLOSE_OBJECT'; 1096 1097 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1098 echo str_repeat("\t", count($classStack)); 1099 echo "\t* token $i converted from T_OPEN_CURLY_BRACKET to T_OBJECT *".PHP_EOL; 1100 echo str_repeat("\t", count($classStack)); 1101 echo "\t* token $closer converted from T_CLOSE_CURLY_BRACKET to T_CLOSE_OBJECT *".PHP_EOL; 1102 } 1103 1104 for ($x = ($i + 1); $x < $closer; $x++) { 1105 $tokens[$x]['conditions'][$i] = T_OBJECT; 1106 ksort($tokens[$x]['conditions'], SORT_NUMERIC); 1107 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1108 $type = $tokens[$x]['type']; 1109 echo str_repeat("\t", count($classStack)); 1110 echo "\t\t* added T_OBJECT condition to $x ($type) *".PHP_EOL; 1111 } 1112 } 1113 } else if ($tokens[$i]['code'] === T_CLOSE_OBJECT) { 1114 $opener = array_pop($classStack); 1115 } else if ($tokens[$i]['code'] === T_COLON) { 1116 // If it is a scope opener, it belongs to a 1117 // DEFAULT or CASE statement. 1118 if (isset($tokens[$i]['scope_condition']) === true) { 1119 continue; 1120 } 1121 1122 // Make sure this is not part of an inline IF statement. 1123 for ($x = ($i - 1); $x >= 0; $x--) { 1124 if ($tokens[$x]['code'] === T_INLINE_THEN) { 1125 $tokens[$i]['code'] = T_INLINE_ELSE; 1126 $tokens[$i]['type'] = 'T_INLINE_ELSE'; 1127 1128 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1129 echo str_repeat("\t", count($classStack)); 1130 echo "\t* token $i converted from T_COLON to T_INLINE_THEN *".PHP_EOL; 1131 } 1132 1133 continue(2); 1134 } else if ($tokens[$x]['line'] < $tokens[$i]['line']) { 1135 break; 1136 } 1137 } 1138 1139 // The string to the left of the colon is either a property or label. 1140 for ($label = ($i - 1); $label >= 0; $label--) { 1141 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$label]['code']]) === false) { 1142 break; 1143 } 1144 } 1145 1146 if ($tokens[$label]['code'] !== T_STRING 1147 && $tokens[$label]['code'] !== T_CONSTANT_ENCAPSED_STRING 1148 ) { 1149 continue; 1150 } 1151 1152 if (empty($classStack) === false) { 1153 $tokens[$label]['code'] = T_PROPERTY; 1154 $tokens[$label]['type'] = 'T_PROPERTY'; 1155 1156 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1157 echo str_repeat("\t", count($classStack)); 1158 echo "\t* token $label converted from T_STRING to T_PROPERTY *".PHP_EOL; 1159 } 1160 } else { 1161 $tokens[$label]['code'] = T_LABEL; 1162 $tokens[$label]['type'] = 'T_LABEL'; 1163 1164 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1165 echo str_repeat("\t", count($classStack)); 1166 echo "\t* token $label converted from T_STRING to T_LABEL *".PHP_EOL; 1167 } 1168 }//end if 1169 }//end if 1170 }//end for 1171 1172 if (PHP_CODESNIFFER_VERBOSITY > 1) { 1173 echo "\t*** END ADDITIONAL JS PROCESSING ***".PHP_EOL; 1174 } 1175 1176 }//end processAdditional() 1177 1178 1179}//end class 1180