1<?php
2/**
3 * This file is part of phpDocumentor.
4 *
5 * For the full copyright and license information, please view the LICENSE
6 * file that was distributed with this source code.
7 *
8 * @copyright 2010-2015 Mike van Riel<mike@phpdoc.org>
9 * @license   http://www.opensource.org/licenses/mit-license.php MIT
10 * @link      http://phpdoc.org
11 */
12
13namespace phpDocumentor\Reflection\DocBlock;
14
15use phpDocumentor\Reflection\Types\Context as TypeContext;
16
17/**
18 * Creates a new Description object given a body of text.
19 *
20 * Descriptions in phpDocumentor are somewhat complex entities as they can contain one or more tags inside their
21 * body that can be replaced with a readable output. The replacing is done by passing a Formatter object to the
22 * Description object's `render` method.
23 *
24 * In addition to the above does a Description support two types of escape sequences:
25 *
26 * 1. `{@}` to escape the `@` character to prevent it from being interpreted as part of a tag, i.e. `{{@}link}`
27 * 2. `{}` to escape the `}` character, this can be used if you want to use the `}` character in the description
28 *    of an inline tag.
29 *
30 * If a body consists of multiple lines then this factory will also remove any superfluous whitespace at the beginning
31 * of each line while maintaining any indentation that is used. This will prevent formatting parsers from tripping
32 * over unexpected spaces as can be observed with tag descriptions.
33 */
34class DescriptionFactory
35{
36    /** @var TagFactory */
37    private $tagFactory;
38
39    /**
40     * Initializes this factory with the means to construct (inline) tags.
41     *
42     * @param TagFactory $tagFactory
43     */
44    public function __construct(TagFactory $tagFactory)
45    {
46        $this->tagFactory = $tagFactory;
47    }
48
49    /**
50     * Returns the parsed text of this description.
51     *
52     * @param string $contents
53     * @param TypeContext $context
54     *
55     * @return Description
56     */
57    public function create($contents, TypeContext $context = null)
58    {
59        list($text, $tags) = $this->parse($this->lex($contents), $context);
60
61        return new Description($text, $tags);
62    }
63
64    /**
65     * Strips the contents from superfluous whitespace and splits the description into a series of tokens.
66     *
67     * @param string $contents
68     *
69     * @return string[] A series of tokens of which the description text is composed.
70     */
71    private function lex($contents)
72    {
73        $contents = $this->removeSuperfluousStartingWhitespace($contents);
74
75        // performance optimalization; if there is no inline tag, don't bother splitting it up.
76        if (strpos($contents, '{@') === false) {
77            return [$contents];
78        }
79
80        return preg_split(
81            '/\{
82                # "{@}" is not a valid inline tag. This ensures that we do not treat it as one, but treat it literally.
83                (?!@\})
84                # We want to capture the whole tag line, but without the inline tag delimiters.
85                (\@
86                    # Match everything up to the next delimiter.
87                    [^{}]*
88                    # Nested inline tag content should not be captured, or it will appear in the result separately.
89                    (?:
90                        # Match nested inline tags.
91                        (?:
92                            # Because we did not catch the tag delimiters earlier, we must be explicit with them here.
93                            # Notice that this also matches "{}", as a way to later introduce it as an escape sequence.
94                            \{(?1)?\}
95                            |
96                            # Make sure we match hanging "{".
97                            \{
98                        )
99                        # Match content after the nested inline tag.
100                        [^{}]*
101                    )* # If there are more inline tags, match them as well. We use "*" since there may not be any
102                       # nested inline tags.
103                )
104            \}/Sux',
105            $contents,
106            null,
107            PREG_SPLIT_DELIM_CAPTURE
108        );
109    }
110
111    /**
112     * Parses the stream of tokens in to a new set of tokens containing Tags.
113     *
114     * @param string[] $tokens
115     * @param TypeContext $context
116     *
117     * @return string[]|Tag[]
118     */
119    private function parse($tokens, TypeContext $context)
120    {
121        $count = count($tokens);
122        $tagCount = 0;
123        $tags  = [];
124
125        for ($i = 1; $i < $count; $i += 2) {
126            $tags[] = $this->tagFactory->create($tokens[$i], $context);
127            $tokens[$i] = '%' . ++$tagCount . '$s';
128        }
129
130        //In order to allow "literal" inline tags, the otherwise invalid
131        //sequence "{@}" is changed to "@", and "{}" is changed to "}".
132        //"%" is escaped to "%%" because of vsprintf.
133        //See unit tests for examples.
134        for ($i = 0; $i < $count; $i += 2) {
135            $tokens[$i] = str_replace(['{@}', '{}', '%'], ['@', '}', '%%'], $tokens[$i]);
136        }
137
138        return [implode('', $tokens), $tags];
139    }
140
141    /**
142     * Removes the superfluous from a multi-line description.
143     *
144     * When a description has more than one line then it can happen that the second and subsequent lines have an
145     * additional indentation. This is commonly in use with tags like this:
146     *
147     *     {@}since 1.1.0 This is an example
148     *         description where we have an
149     *         indentation in the second and
150     *         subsequent lines.
151     *
152     * If we do not normalize the indentation then we have superfluous whitespace on the second and subsequent
153     * lines and this may cause rendering issues when, for example, using a Markdown converter.
154     *
155     * @param string $contents
156     *
157     * @return string
158     */
159    private function removeSuperfluousStartingWhitespace($contents)
160    {
161        $lines = explode("\n", $contents);
162
163        // if there is only one line then we don't have lines with superfluous whitespace and
164        // can use the contents as-is
165        if (count($lines) <= 1) {
166            return $contents;
167        }
168
169        // determine how many whitespace characters need to be stripped
170        $startingSpaceCount = 9999999;
171        for ($i = 1; $i < count($lines); $i++) {
172            // lines with a no length do not count as they are not indented at all
173            if (strlen(trim($lines[$i])) === 0) {
174                continue;
175            }
176
177            // determine the number of prefixing spaces by checking the difference in line length before and after
178            // an ltrim
179            $startingSpaceCount = min($startingSpaceCount, strlen($lines[$i]) - strlen(ltrim($lines[$i])));
180        }
181
182        // strip the number of spaces from each line
183        if ($startingSpaceCount > 0) {
184            for ($i = 1; $i < count($lines); $i++) {
185                $lines[$i] = substr($lines[$i], $startingSpaceCount);
186            }
187        }
188
189        return implode("\n", $lines);
190    }
191}
192