1<?php
2
3/**
4 * Hoa
5 *
6 *
7 * @license
8 *
9 * New BSD License
10 *
11 * Copyright © 2007-2017, Hoa community. All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions are met:
15 *     * Redistributions of source code must retain the above copyright
16 *       notice, this list of conditions and the following disclaimer.
17 *     * Redistributions in binary form must reproduce the above copyright
18 *       notice, this list of conditions and the following disclaimer in the
19 *       documentation and/or other materials provided with the distribution.
20 *     * Neither the name of the Hoa nor the names of its contributors may be
21 *       used to endorse or promote products derived from this software without
22 *       specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE
28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36
37namespace Hoa\Compiler\Llk;
38
39use Hoa\Compiler;
40use Hoa\Consistency;
41use Hoa\Stream;
42
43/**
44 * Class \Hoa\Compiler\Llk.
45 *
46 * This class provides a set of static helpers to manipulate (load and save) a
47 * compiler more easily.
48 *
49 * @copyright  Copyright © 2007-2017 Hoa community
50 * @license    New BSD License
51 */
52abstract class Llk
53{
54    /**
55     * Load in-memory parser from a grammar description file.
56     * The grammar description language is PP. See
57     * `hoa://Library/Compiler/Llk/Llk.pp` for an example, or the documentation.
58     *
59     * @param   \Hoa\Stream\IStream\In  $stream    Stream to read to grammar.
60     * @return  \Hoa\Compiler\Llk\Parser
61     * @throws  \Hoa\Compiler\Exception
62     */
63    public static function load(Stream\IStream\In $stream)
64    {
65        $pp = $stream->readAll();
66
67        if (empty($pp)) {
68            $message = 'The grammar is empty';
69
70            if ($stream instanceof Stream\IStream\Pointable) {
71                if (0 < $stream->tell()) {
72                    $message .=
73                        ': The stream ' . $stream->getStreamName() .
74                        ' is pointable and not rewinded, maybe it ' .
75                        'could be the reason';
76                } else {
77                    $message .=
78                        ': Nothing to read on the stream ' .
79                        $stream->getStreamName();
80                }
81            }
82
83            throw new Compiler\Exception($message . '.', 0);
84        }
85
86        static::parsePP($pp, $tokens, $rawRules, $pragmas, $stream->getStreamName());
87
88        $ruleAnalyzer = new Rule\Analyzer($tokens);
89        $rules        = $ruleAnalyzer->analyzeRules($rawRules);
90
91        return new Parser($tokens, $rules, $pragmas);
92    }
93
94    /**
95     * Save in-memory parser to PHP code.
96     * The generated PHP code will load the same in-memory parser. The state
97     * will be reset. The parser will be saved as a class, named after
98     * `$className`. To retrieve the parser, one must instanciate this class.
99     *
100     * @param   \Hoa\Compiler\Llk\Parser  $parser       Parser to save.
101     * @param   string                    $className    Parser classname.
102     * @return  string
103     */
104    public static function save(Parser $parser, $className)
105    {
106        $out        = null;
107        $outTokens  = null;
108        $outRules   = null;
109        $outPragmas = null;
110        $outExtra   = null;
111
112        $escapeRuleName = function ($ruleName) use ($parser) {
113            if (true == $parser->getRule($ruleName)->isTransitional()) {
114                return $ruleName;
115            }
116
117            return '\'' . $ruleName . '\'';
118        };
119
120        foreach ($parser->getTokens() as $namespace => $tokens) {
121            $outTokens .= '                \'' . $namespace . '\' => [' . "\n";
122
123            foreach ($tokens as $tokenName => $tokenValue) {
124                $outTokens .=
125                    '                    \'' . $tokenName . '\' => \'' .
126                    str_replace(
127                        ['\'', '\\\\'],
128                        ['\\\'', '\\\\\\'],
129                        $tokenValue
130                    ) . '\',' . "\n";
131            }
132
133            $outTokens .= '                ],' . "\n";
134        }
135
136        foreach ($parser->getRules() as $rule) {
137            $arguments = [];
138
139            // Name.
140            $arguments['name'] = $escapeRuleName($rule->getName());
141
142            if ($rule instanceof Rule\Token) {
143                // Token name.
144                $arguments['tokenName'] = '\'' . $rule->getTokenName() . '\'';
145            } else {
146                if ($rule instanceof Rule\Repetition) {
147                    // Minimum.
148                    $arguments['min'] = $rule->getMin();
149
150                    // Maximum.
151                    $arguments['max'] = $rule->getMax();
152                }
153
154                // Children.
155                $ruleChildren = $rule->getChildren();
156
157                if (null === $ruleChildren) {
158                    $arguments['children'] = 'null';
159                } elseif (false === is_array($ruleChildren)) {
160                    $arguments['children'] = $escapeRuleName($ruleChildren);
161                } else {
162                    $arguments['children'] =
163                        '[' .
164                        implode(', ', array_map($escapeRuleName, $ruleChildren)) .
165                        ']';
166                }
167            }
168
169            // Node ID.
170            $nodeId = $rule->getNodeId();
171
172            if (null === $nodeId) {
173                $arguments['nodeId'] = 'null';
174            } else {
175                $arguments['nodeId'] = '\'' . $nodeId . '\'';
176            }
177
178            if ($rule instanceof Rule\Token) {
179                // Unification.
180                $arguments['unification'] = $rule->getUnificationIndex();
181
182                // Kept.
183                $arguments['kept'] = $rule->isKept() ? 'true' : 'false';
184            }
185
186            // Default node ID.
187            if (null !== $defaultNodeId = $rule->getDefaultId()) {
188                $defaultNodeOptions = $rule->getDefaultOptions();
189
190                if (!empty($defaultNodeOptions)) {
191                    $defaultNodeId .= ':' . implode('', $defaultNodeOptions);
192                }
193
194                $outExtra .=
195                    "\n" .
196                    '        $this->getRule(' . $arguments['name'] . ')->setDefaultId(' .
197                        '\'' . $defaultNodeId . '\'' .
198                    ');';
199            }
200
201            // PP representation.
202            if (null !== $ppRepresentation = $rule->getPPRepresentation()) {
203                $outExtra .=
204                    "\n" .
205                    '        $this->getRule(' . $arguments['name'] . ')->setPPRepresentation(' .
206                        '\'' . str_replace('\'', '\\\'', $ppRepresentation) . '\'' .
207                    ');';
208            }
209
210            $outRules .=
211                "\n" .
212                '                ' . $arguments['name'] . ' => new \\' . get_class($rule) . '(' .
213                implode(', ', $arguments) .
214                '),';
215        }
216
217        foreach ($parser->getPragmas() as $pragmaName => $pragmaValue) {
218            $outPragmas .=
219                "\n" .
220                '                \'' . $pragmaName . '\' => ' .
221                (is_bool($pragmaValue)
222                    ? (true === $pragmaValue ? 'true' : 'false')
223                    : (is_int($pragmaValue)
224                        ? $pragmaValue
225                        : '\'' . $pragmaValue . '\'')) .
226                ',';
227        }
228
229        $out .=
230            'class ' . $className . ' extends \Hoa\Compiler\Llk\Parser' . "\n" .
231            '{' . "\n" .
232            '    public function __construct()' . "\n" .
233            '    {' . "\n" .
234            '        parent::__construct(' . "\n" .
235            '            [' . "\n" .
236            $outTokens .
237            '            ],' . "\n" .
238            '            [' .
239            $outRules . "\n" .
240            '            ],' . "\n" .
241            '            [' .
242            $outPragmas . "\n" .
243            '            ]' . "\n" .
244            '        );' . "\n" .
245            $outExtra . "\n" .
246            '    }' . "\n" .
247            '}' . "\n";
248
249        return $out;
250    }
251
252    /**
253     * Parse the grammar description language.
254     *
255     * @param   string  $pp            Grammar description.
256     * @param   array   $tokens        Extracted tokens.
257     * @param   array   $rules         Extracted raw rules.
258     * @param   array   $pragmas       Extracted raw pragmas.
259     * @param   string  $streamName    The name of the stream containing the grammar.
260     * @return  void
261     * @throws  \Hoa\Compiler\Exception
262     */
263    public static function parsePP($pp, &$tokens, &$rules, &$pragmas, $streamName)
264    {
265        $lines   = explode("\n", $pp);
266        $pragmas = [];
267        $tokens  = ['default' => []];
268        $rules   = [];
269
270        for ($i = 0, $m = count($lines); $i < $m; ++$i) {
271            $line = rtrim($lines[$i]);
272
273            if (0 === strlen($line) || '//' == substr($line, 0, 2)) {
274                continue;
275            }
276
277            if ('%' == $line[0]) {
278                if (0 !== preg_match('#^%pragma\h+([^\h]+)\h+(.*)$#u', $line, $matches)) {
279                    switch ($matches[2]) {
280                        case 'true':
281                            $pragmaValue = true;
282
283                            break;
284
285                        case 'false':
286                            $pragmaValue = false;
287
288                            break;
289
290                        default:
291                            if (true === ctype_digit($matches[2])) {
292                                $pragmaValue = intval($matches[2]);
293                            } else {
294                                $pragmaValue = $matches[2];
295                            }
296                    }
297
298                    $pragmas[$matches[1]] = $pragmaValue;
299                } elseif (0 !== preg_match('#^%skip\h+(?:([^:]+):)?([^\h]+)\h+(.*)$#u', $line, $matches)) {
300                    if (empty($matches[1])) {
301                        $matches[1] = 'default';
302                    }
303
304                    if (!isset($tokens[$matches[1]])) {
305                        $tokens[$matches[1]] = [];
306                    }
307
308                    if (!isset($tokens[$matches[1]]['skip'])) {
309                        $tokens[$matches[1]]['skip'] = $matches[3];
310                    } else {
311                        $tokens[$matches[1]]['skip'] =
312                            '(?:' .
313                                $tokens[$matches[1]]['skip'] . '|' .
314                                $matches[3] .
315                            ')';
316                    }
317                } elseif (0 !== preg_match('#^%token\h+(?:([^:]+):)?([^\h]+)\h+(.*?)(?:\h+->\h+(.*))?$#u', $line, $matches)) {
318                    if (empty($matches[1])) {
319                        $matches[1] = 'default';
320                    }
321
322                    if (isset($matches[4]) && !empty($matches[4])) {
323                        $matches[2] = $matches[2] . ':' . $matches[4];
324                    }
325
326                    if (!isset($tokens[$matches[1]])) {
327                        $tokens[$matches[1]] = [];
328                    }
329
330                    $tokens[$matches[1]][$matches[2]] = $matches[3];
331                } else {
332                    throw new Compiler\Exception(
333                        'Unrecognized instructions:' . "\n" .
334                        '    %s' . "\n" . 'in file %s at line %d.',
335                        1,
336                        [
337                            $line,
338                            $streamName,
339                            $i + 1
340                        ]
341                    );
342                }
343
344                continue;
345            }
346
347            $ruleName = substr($line, 0, -1);
348            $rule     = null;
349            ++$i;
350
351            while ($i < $m &&
352                   isset($lines[$i][0]) &&
353                   (' '  === $lines[$i][0] ||
354                    "\t" === $lines[$i][0] ||
355                    '//' === substr($lines[$i], 0, 2))) {
356                if ('//' === substr($lines[$i], 0, 2)) {
357                    ++$i;
358
359                    continue;
360                }
361
362                $rule .= ' ' . trim($lines[$i++]);
363            }
364
365            if (isset($lines[$i][0])) {
366                --$i;
367            }
368
369            $rules[$ruleName] = $rule;
370        }
371
372        return;
373    }
374}
375
376/**
377 * Flex entity.
378 */
379Consistency::flexEntity('Hoa\Compiler\Llk\Llk');
380