1<?php 2/** 3 * Tokenizes doc block comments. 4 * 5 * PHP version 5 6 * 7 * @category PHP 8 * @package PHP_CodeSniffer 9 * @author Greg Sherwood <gsherwood@squiz.net> 10 * @copyright 2006-2012 Squiz Pty Ltd (ABN 77 084 670 600) 11 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence 12 * @link http://pear.php.net/package/PHP_CodeSniffer 13 */ 14 15/** 16 * Tokenizes doc block comments. 17 * 18 * @category PHP 19 * @package PHP_CodeSniffer 20 * @author Greg Sherwood <gsherwood@squiz.net> 21 * @copyright 2006-2012 Squiz Pty Ltd (ABN 77 084 670 600) 22 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence 23 * @version Release: @package_version@ 24 * @link http://pear.php.net/package/PHP_CodeSniffer 25 */ 26class PHP_CodeSniffer_Tokenizers_Comment 27{ 28 29 30 /** 31 * Creates an array of tokens when given some PHP code. 32 * 33 * Starts by using token_get_all() but does a lot of extra processing 34 * to insert information about the context of the token. 35 * 36 * @param string $string The string to tokenize. 37 * @param string $eolChar The EOL character to use for splitting strings. 38 * @param int $stackPtr The position of the first token in the file. 39 * 40 * @return array 41 */ 42 public function tokenizeString($string, $eolChar, $stackPtr) 43 { 44 if (PHP_CODESNIFFER_VERBOSITY > 1) { 45 echo "\t\t*** START COMMENT TOKENIZING ***".PHP_EOL; 46 } 47 48 $tokens = array(); 49 $numChars = strlen($string); 50 51 /* 52 Doc block comments start with /*, but typically contain an 53 extra star when they are used for function and class comments. 54 */ 55 56 $char = ($numChars - strlen(ltrim($string, '/*'))); 57 $openTag = substr($string, 0, $char); 58 $string = ltrim($string, '/*'); 59 60 $tokens[$stackPtr] = array( 61 'content' => $openTag, 62 'code' => T_DOC_COMMENT_OPEN_TAG, 63 'type' => 'T_DOC_COMMENT_OPEN_TAG', 64 'comment_tags' => array(), 65 ); 66 67 $openPtr = $stackPtr; 68 $stackPtr++; 69 70 if (PHP_CODESNIFFER_VERBOSITY > 1) { 71 $content = PHP_CodeSniffer::prepareForOutput($openTag); 72 echo "\t\tCreate comment token: T_DOC_COMMENT_OPEN_TAG => $content".PHP_EOL; 73 } 74 75 /* 76 Strip off the close tag so it doesn't interfere with any 77 of our comment line processing. The token will be added to the 78 stack just before we return it. 79 */ 80 81 $closeTag = array( 82 'content' => substr($string, strlen(rtrim($string, '/*'))), 83 'code' => T_DOC_COMMENT_CLOSE_TAG, 84 'type' => 'T_DOC_COMMENT_CLOSE_TAG', 85 'comment_opener' => $openPtr, 86 ); 87 88 if ($closeTag['content'] === false) { 89 $closeTag['content'] = ''; 90 } 91 92 $string = rtrim($string, '/*'); 93 94 /* 95 Process each line of the comment. 96 */ 97 98 $lines = explode($eolChar, $string); 99 $numLines = count($lines); 100 foreach ($lines as $lineNum => $string) { 101 if ($lineNum !== ($numLines - 1)) { 102 $string .= $eolChar; 103 } 104 105 $char = 0; 106 $numChars = strlen($string); 107 108 // We've started a new line, so process the indent. 109 $space = $this->_collectWhitespace($string, $char, $numChars); 110 if ($space !== null) { 111 $tokens[$stackPtr] = $space; 112 $stackPtr++; 113 if (PHP_CODESNIFFER_VERBOSITY > 1) { 114 $content = PHP_CodeSniffer::prepareForOutput($space['content']); 115 echo "\t\tCreate comment token: T_DOC_COMMENT_WHITESPACE => $content".PHP_EOL; 116 } 117 118 $char += strlen($space['content']); 119 if ($char === $numChars) { 120 break; 121 } 122 } 123 124 if ($string === '') { 125 continue; 126 } 127 128 if ($string[$char] === '*') { 129 // This is a function or class doc block line. 130 $char++; 131 $tokens[$stackPtr] = array( 132 'content' => '*', 133 'code' => T_DOC_COMMENT_STAR, 134 'type' => 'T_DOC_COMMENT_STAR', 135 ); 136 137 $stackPtr++; 138 139 if (PHP_CODESNIFFER_VERBOSITY > 1) { 140 echo "\t\tCreate comment token: T_DOC_COMMENT_STAR => *".PHP_EOL; 141 } 142 } 143 144 // Now we are ready to process the actual content of the line. 145 $lineTokens = $this->_processLine($string, $eolChar, $char, $numChars); 146 foreach ($lineTokens as $lineToken) { 147 $tokens[$stackPtr] = $lineToken; 148 if (PHP_CODESNIFFER_VERBOSITY > 1) { 149 $content = PHP_CodeSniffer::prepareForOutput($lineToken['content']); 150 $type = $lineToken['type']; 151 echo "\t\tCreate comment token: $type => $content".PHP_EOL; 152 } 153 154 if ($lineToken['code'] === T_DOC_COMMENT_TAG) { 155 $tokens[$openPtr]['comment_tags'][] = $stackPtr; 156 } 157 158 $stackPtr++; 159 } 160 }//end foreach 161 162 $tokens[$stackPtr] = $closeTag; 163 $tokens[$openPtr]['comment_closer'] = $stackPtr; 164 if (PHP_CODESNIFFER_VERBOSITY > 1) { 165 $content = PHP_CodeSniffer::prepareForOutput($closeTag['content']); 166 echo "\t\tCreate comment token: T_DOC_COMMENT_CLOSE_TAG => $content".PHP_EOL; 167 } 168 169 if (PHP_CODESNIFFER_VERBOSITY > 1) { 170 echo "\t\t*** END COMMENT TOKENIZING ***".PHP_EOL; 171 } 172 173 return $tokens; 174 175 }//end tokenizeString() 176 177 178 /** 179 * Process a single line of a comment. 180 * 181 * @param string $string The comment string being tokenized. 182 * @param string $eolChar The EOL character to use for splitting strings. 183 * @param int $start The position in the string to start processing. 184 * @param int $end The position in the string to end processing. 185 * 186 * @return array 187 */ 188 private function _processLine($string, $eolChar, $start, $end) 189 { 190 $tokens = array(); 191 192 // Collect content padding. 193 $space = $this->_collectWhitespace($string, $start, $end); 194 if ($space !== null) { 195 $tokens[] = $space; 196 $start += strlen($space['content']); 197 } 198 199 if (isset($string[$start]) === false) { 200 return $tokens; 201 } 202 203 if ($string[$start] === '@') { 204 // The content up until the first whitespace is the tag name. 205 $matches = array(); 206 preg_match('/@[^\s]+/', $string, $matches, 0, $start); 207 if (isset($matches[0]) === true) { 208 $tagName = $matches[0]; 209 $start += strlen($tagName); 210 $tokens[] = array( 211 'content' => $tagName, 212 'code' => T_DOC_COMMENT_TAG, 213 'type' => 'T_DOC_COMMENT_TAG', 214 ); 215 216 // Then there will be some whitespace. 217 $space = $this->_collectWhitespace($string, $start, $end); 218 if ($space !== null) { 219 $tokens[] = $space; 220 $start += strlen($space['content']); 221 } 222 } 223 }//end if 224 225 // Process the rest of the line. 226 $eol = strpos($string, $eolChar, $start); 227 if ($eol === false) { 228 $eol = $end; 229 } 230 231 if ($eol > $start) { 232 $tokens[] = array( 233 'content' => substr($string, $start, ($eol - $start)), 234 'code' => T_DOC_COMMENT_STRING, 235 'type' => 'T_DOC_COMMENT_STRING', 236 ); 237 } 238 239 if ($eol !== $end) { 240 $tokens[] = array( 241 'content' => substr($string, $eol, strlen($eolChar)), 242 'code' => T_DOC_COMMENT_WHITESPACE, 243 'type' => 'T_DOC_COMMENT_WHITESPACE', 244 ); 245 } 246 247 return $tokens; 248 249 }//end _processLine() 250 251 252 /** 253 * Collect consecutive whitespace into a single token. 254 * 255 * @param string $string The comment string being tokenized. 256 * @param int $start The position in the string to start processing. 257 * @param int $end The position in the string to end processing. 258 * 259 * @return array|null 260 */ 261 private function _collectWhitespace($string, $start, $end) 262 { 263 $space = ''; 264 for ($start; $start < $end; $start++) { 265 if ($string[$start] !== ' ' && $string[$start] !== "\t") { 266 break; 267 } 268 269 $space .= $string[$start]; 270 } 271 272 if ($space === '') { 273 return null; 274 } 275 276 $token = array( 277 'content' => $space, 278 'code' => T_DOC_COMMENT_WHITESPACE, 279 'type' => 'T_DOC_COMMENT_WHITESPACE', 280 ); 281 282 return $token; 283 284 }//end _collectWhitespace() 285 286 287}//end class 288