* @copyright 2006-2012 Squiz Pty Ltd (ABN 77 084 670 600) * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence * @link http://pear.php.net/package/PHP_CodeSniffer */ /** * Tokenizes doc block comments. * * @category PHP * @package PHP_CodeSniffer * @author Greg Sherwood * @copyright 2006-2012 Squiz Pty Ltd (ABN 77 084 670 600) * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence * @version Release: @package_version@ * @link http://pear.php.net/package/PHP_CodeSniffer */ class PHP_CodeSniffer_Tokenizers_Comment { /** * Creates an array of tokens when given some PHP code. * * Starts by using token_get_all() but does a lot of extra processing * to insert information about the context of the token. * * @param string $string The string to tokenize. * @param string $eolChar The EOL character to use for splitting strings. * @param int $stackPtr The position of the first token in the file. * * @return array */ public function tokenizeString($string, $eolChar, $stackPtr) { if (PHP_CODESNIFFER_VERBOSITY > 1) { echo "\t\t*** START COMMENT TOKENIZING ***".PHP_EOL; } $tokens = array(); $numChars = strlen($string); /* Doc block comments start with /*, but typically contain an extra star when they are used for function and class comments. */ $char = ($numChars - strlen(ltrim($string, '/*'))); $openTag = substr($string, 0, $char); $string = ltrim($string, '/*'); $tokens[$stackPtr] = array( 'content' => $openTag, 'code' => T_DOC_COMMENT_OPEN_TAG, 'type' => 'T_DOC_COMMENT_OPEN_TAG', 'comment_tags' => array(), ); $openPtr = $stackPtr; $stackPtr++; if (PHP_CODESNIFFER_VERBOSITY > 1) { $content = PHP_CodeSniffer::prepareForOutput($openTag); echo "\t\tCreate comment token: T_DOC_COMMENT_OPEN_TAG => $content".PHP_EOL; } /* Strip off the close tag so it doesn't interfere with any of our comment line processing. The token will be added to the stack just before we return it. */ $closeTag = array( 'content' => substr($string, strlen(rtrim($string, '/*'))), 'code' => T_DOC_COMMENT_CLOSE_TAG, 'type' => 'T_DOC_COMMENT_CLOSE_TAG', 'comment_opener' => $openPtr, ); if ($closeTag['content'] === false) { $closeTag['content'] = ''; } $string = rtrim($string, '/*'); /* Process each line of the comment. */ $lines = explode($eolChar, $string); $numLines = count($lines); foreach ($lines as $lineNum => $string) { if ($lineNum !== ($numLines - 1)) { $string .= $eolChar; } $char = 0; $numChars = strlen($string); // We've started a new line, so process the indent. $space = $this->_collectWhitespace($string, $char, $numChars); if ($space !== null) { $tokens[$stackPtr] = $space; $stackPtr++; if (PHP_CODESNIFFER_VERBOSITY > 1) { $content = PHP_CodeSniffer::prepareForOutput($space['content']); echo "\t\tCreate comment token: T_DOC_COMMENT_WHITESPACE => $content".PHP_EOL; } $char += strlen($space['content']); if ($char === $numChars) { break; } } if ($string === '') { continue; } if ($string[$char] === '*') { // This is a function or class doc block line. $char++; $tokens[$stackPtr] = array( 'content' => '*', 'code' => T_DOC_COMMENT_STAR, 'type' => 'T_DOC_COMMENT_STAR', ); $stackPtr++; if (PHP_CODESNIFFER_VERBOSITY > 1) { echo "\t\tCreate comment token: T_DOC_COMMENT_STAR => *".PHP_EOL; } } // Now we are ready to process the actual content of the line. $lineTokens = $this->_processLine($string, $eolChar, $char, $numChars); foreach ($lineTokens as $lineToken) { $tokens[$stackPtr] = $lineToken; if (PHP_CODESNIFFER_VERBOSITY > 1) { $content = PHP_CodeSniffer::prepareForOutput($lineToken['content']); $type = $lineToken['type']; echo "\t\tCreate comment token: $type => $content".PHP_EOL; } if ($lineToken['code'] === T_DOC_COMMENT_TAG) { $tokens[$openPtr]['comment_tags'][] = $stackPtr; } $stackPtr++; } }//end foreach $tokens[$stackPtr] = $closeTag; $tokens[$openPtr]['comment_closer'] = $stackPtr; if (PHP_CODESNIFFER_VERBOSITY > 1) { $content = PHP_CodeSniffer::prepareForOutput($closeTag['content']); echo "\t\tCreate comment token: T_DOC_COMMENT_CLOSE_TAG => $content".PHP_EOL; } if (PHP_CODESNIFFER_VERBOSITY > 1) { echo "\t\t*** END COMMENT TOKENIZING ***".PHP_EOL; } return $tokens; }//end tokenizeString() /** * Process a single line of a comment. * * @param string $string The comment string being tokenized. * @param string $eolChar The EOL character to use for splitting strings. * @param int $start The position in the string to start processing. * @param int $end The position in the string to end processing. * * @return array */ private function _processLine($string, $eolChar, $start, $end) { $tokens = array(); // Collect content padding. $space = $this->_collectWhitespace($string, $start, $end); if ($space !== null) { $tokens[] = $space; $start += strlen($space['content']); } if (isset($string[$start]) === false) { return $tokens; } if ($string[$start] === '@') { // The content up until the first whitespace is the tag name. $matches = array(); preg_match('/@[^\s]+/', $string, $matches, 0, $start); if (isset($matches[0]) === true) { $tagName = $matches[0]; $start += strlen($tagName); $tokens[] = array( 'content' => $tagName, 'code' => T_DOC_COMMENT_TAG, 'type' => 'T_DOC_COMMENT_TAG', ); // Then there will be some whitespace. $space = $this->_collectWhitespace($string, $start, $end); if ($space !== null) { $tokens[] = $space; $start += strlen($space['content']); } } }//end if // Process the rest of the line. $eol = strpos($string, $eolChar, $start); if ($eol === false) { $eol = $end; } if ($eol > $start) { $tokens[] = array( 'content' => substr($string, $start, ($eol - $start)), 'code' => T_DOC_COMMENT_STRING, 'type' => 'T_DOC_COMMENT_STRING', ); } if ($eol !== $end) { $tokens[] = array( 'content' => substr($string, $eol, strlen($eolChar)), 'code' => T_DOC_COMMENT_WHITESPACE, 'type' => 'T_DOC_COMMENT_WHITESPACE', ); } return $tokens; }//end _processLine() /** * Collect consecutive whitespace into a single token. * * @param string $string The comment string being tokenized. * @param int $start The position in the string to start processing. * @param int $end The position in the string to end processing. * * @return array|null */ private function _collectWhitespace($string, $start, $end) { $space = ''; for ($start; $start < $end; $start++) { if ($string[$start] !== ' ' && $string[$start] !== "\t") { break; } $space .= $string[$start]; } if ($space === '') { return null; } $token = array( 'content' => $space, 'code' => T_DOC_COMMENT_WHITESPACE, 'type' => 'T_DOC_COMMENT_WHITESPACE', ); return $token; }//end _collectWhitespace() }//end class