1<?php
2/**
3 * Tokenizes doc block comments.
4 *
5 * PHP version 5
6 *
7 * @category  PHP
8 * @package   PHP_CodeSniffer
9 * @author    Greg Sherwood <gsherwood@squiz.net>
10 * @copyright 2006-2012 Squiz Pty Ltd (ABN 77 084 670 600)
11 * @license   https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
12 * @link      http://pear.php.net/package/PHP_CodeSniffer
13 */
14
15/**
16 * Tokenizes doc block comments.
17 *
18 * @category  PHP
19 * @package   PHP_CodeSniffer
20 * @author    Greg Sherwood <gsherwood@squiz.net>
21 * @copyright 2006-2012 Squiz Pty Ltd (ABN 77 084 670 600)
22 * @license   https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
23 * @version   Release: @package_version@
24 * @link      http://pear.php.net/package/PHP_CodeSniffer
25 */
26class PHP_CodeSniffer_Tokenizers_Comment
27{
28
29
30    /**
31     * Creates an array of tokens when given some PHP code.
32     *
33     * Starts by using token_get_all() but does a lot of extra processing
34     * to insert information about the context of the token.
35     *
36     * @param string $string   The string to tokenize.
37     * @param string $eolChar  The EOL character to use for splitting strings.
38     * @param int    $stackPtr The position of the first token in the file.
39     *
40     * @return array
41     */
42    public function tokenizeString($string, $eolChar, $stackPtr)
43    {
44        if (PHP_CODESNIFFER_VERBOSITY > 1) {
45            echo "\t\t*** START COMMENT TOKENIZING ***".PHP_EOL;
46        }
47
48        $tokens   = array();
49        $numChars = strlen($string);
50
51        /*
52            Doc block comments start with /*, but typically contain an
53            extra star when they are used for function and class comments.
54        */
55
56        $char    = ($numChars - strlen(ltrim($string, '/*')));
57        $openTag = substr($string, 0, $char);
58        $string  = ltrim($string, '/*');
59
60        $tokens[$stackPtr] = array(
61                              'content'      => $openTag,
62                              'code'         => T_DOC_COMMENT_OPEN_TAG,
63                              'type'         => 'T_DOC_COMMENT_OPEN_TAG',
64                              'comment_tags' => array(),
65                             );
66
67        $openPtr = $stackPtr;
68        $stackPtr++;
69
70        if (PHP_CODESNIFFER_VERBOSITY > 1) {
71            $content = PHP_CodeSniffer::prepareForOutput($openTag);
72            echo "\t\tCreate comment token: T_DOC_COMMENT_OPEN_TAG => $content".PHP_EOL;
73        }
74
75        /*
76            Strip off the close tag so it doesn't interfere with any
77            of our comment line processing. The token will be added to the
78            stack just before we return it.
79        */
80
81        $closeTag = array(
82                     'content'        => substr($string, strlen(rtrim($string, '/*'))),
83                     'code'           => T_DOC_COMMENT_CLOSE_TAG,
84                     'type'           => 'T_DOC_COMMENT_CLOSE_TAG',
85                     'comment_opener' => $openPtr,
86                    );
87
88        if ($closeTag['content'] === false) {
89            $closeTag['content'] = '';
90        }
91
92        $string = rtrim($string, '/*');
93
94        /*
95            Process each line of the comment.
96        */
97
98        $lines    = explode($eolChar, $string);
99        $numLines = count($lines);
100        foreach ($lines as $lineNum => $string) {
101            if ($lineNum !== ($numLines - 1)) {
102                $string .= $eolChar;
103            }
104
105            $char     = 0;
106            $numChars = strlen($string);
107
108            // We've started a new line, so process the indent.
109            $space = $this->_collectWhitespace($string, $char, $numChars);
110            if ($space !== null) {
111                $tokens[$stackPtr] = $space;
112                $stackPtr++;
113                if (PHP_CODESNIFFER_VERBOSITY > 1) {
114                    $content = PHP_CodeSniffer::prepareForOutput($space['content']);
115                    echo "\t\tCreate comment token: T_DOC_COMMENT_WHITESPACE => $content".PHP_EOL;
116                }
117
118                $char += strlen($space['content']);
119                if ($char === $numChars) {
120                    break;
121                }
122            }
123
124            if ($string === '') {
125                continue;
126            }
127
128            if ($string[$char] === '*') {
129                // This is a function or class doc block line.
130                $char++;
131                $tokens[$stackPtr] = array(
132                                      'content' => '*',
133                                      'code'    => T_DOC_COMMENT_STAR,
134                                      'type'    => 'T_DOC_COMMENT_STAR',
135                                     );
136
137                $stackPtr++;
138
139                if (PHP_CODESNIFFER_VERBOSITY > 1) {
140                    echo "\t\tCreate comment token: T_DOC_COMMENT_STAR => *".PHP_EOL;
141                }
142            }
143
144            // Now we are ready to process the actual content of the line.
145            $lineTokens = $this->_processLine($string, $eolChar, $char, $numChars);
146            foreach ($lineTokens as $lineToken) {
147                $tokens[$stackPtr] = $lineToken;
148                if (PHP_CODESNIFFER_VERBOSITY > 1) {
149                    $content = PHP_CodeSniffer::prepareForOutput($lineToken['content']);
150                    $type    = $lineToken['type'];
151                    echo "\t\tCreate comment token: $type => $content".PHP_EOL;
152                }
153
154                if ($lineToken['code'] === T_DOC_COMMENT_TAG) {
155                    $tokens[$openPtr]['comment_tags'][] = $stackPtr;
156                }
157
158                $stackPtr++;
159            }
160        }//end foreach
161
162        $tokens[$stackPtr] = $closeTag;
163        $tokens[$openPtr]['comment_closer'] = $stackPtr;
164        if (PHP_CODESNIFFER_VERBOSITY > 1) {
165            $content = PHP_CodeSniffer::prepareForOutput($closeTag['content']);
166            echo "\t\tCreate comment token: T_DOC_COMMENT_CLOSE_TAG => $content".PHP_EOL;
167        }
168
169        if (PHP_CODESNIFFER_VERBOSITY > 1) {
170            echo "\t\t*** END COMMENT TOKENIZING ***".PHP_EOL;
171        }
172
173        return $tokens;
174
175    }//end tokenizeString()
176
177
178    /**
179     * Process a single line of a comment.
180     *
181     * @param string $string  The comment string being tokenized.
182     * @param string $eolChar The EOL character to use for splitting strings.
183     * @param int    $start   The position in the string to start processing.
184     * @param int    $end     The position in the string to end processing.
185     *
186     * @return array
187     */
188    private function _processLine($string, $eolChar, $start, $end)
189    {
190        $tokens = array();
191
192        // Collect content padding.
193        $space = $this->_collectWhitespace($string, $start, $end);
194        if ($space !== null) {
195            $tokens[] = $space;
196            $start   += strlen($space['content']);
197        }
198
199        if (isset($string[$start]) === false) {
200            return $tokens;
201        }
202
203        if ($string[$start] === '@') {
204            // The content up until the first whitespace is the tag name.
205            $matches = array();
206            preg_match('/@[^\s]+/', $string, $matches, 0, $start);
207            if (isset($matches[0]) === true) {
208                $tagName  = $matches[0];
209                $start   += strlen($tagName);
210                $tokens[] = array(
211                             'content' => $tagName,
212                             'code'    => T_DOC_COMMENT_TAG,
213                             'type'    => 'T_DOC_COMMENT_TAG',
214                            );
215
216                // Then there will be some whitespace.
217                $space = $this->_collectWhitespace($string, $start, $end);
218                if ($space !== null) {
219                    $tokens[] = $space;
220                    $start   += strlen($space['content']);
221                }
222            }
223        }//end if
224
225        // Process the rest of the line.
226        $eol = strpos($string, $eolChar, $start);
227        if ($eol === false) {
228            $eol = $end;
229        }
230
231        if ($eol > $start) {
232            $tokens[] = array(
233                         'content' => substr($string, $start, ($eol - $start)),
234                         'code'    => T_DOC_COMMENT_STRING,
235                         'type'    => 'T_DOC_COMMENT_STRING',
236                        );
237        }
238
239        if ($eol !== $end) {
240            $tokens[] = array(
241                         'content' => substr($string, $eol, strlen($eolChar)),
242                         'code'    => T_DOC_COMMENT_WHITESPACE,
243                         'type'    => 'T_DOC_COMMENT_WHITESPACE',
244                        );
245        }
246
247        return $tokens;
248
249    }//end _processLine()
250
251
252    /**
253     * Collect consecutive whitespace into a single token.
254     *
255     * @param string $string The comment string being tokenized.
256     * @param int    $start  The position in the string to start processing.
257     * @param int    $end    The position in the string to end processing.
258     *
259     * @return array|null
260     */
261    private function _collectWhitespace($string, $start, $end)
262    {
263        $space = '';
264        for ($start; $start < $end; $start++) {
265            if ($string[$start] !== ' ' && $string[$start] !== "\t") {
266                break;
267            }
268
269            $space .= $string[$start];
270        }
271
272        if ($space === '') {
273            return null;
274        }
275
276        $token = array(
277                  'content' => $space,
278                  'code'    => T_DOC_COMMENT_WHITESPACE,
279                  'type'    => 'T_DOC_COMMENT_WHITESPACE',
280                 );
281
282        return $token;
283
284    }//end _collectWhitespace()
285
286
287}//end class
288