1<?php
2
3/**
4 * Hoa
5 *
6 *
7 * @license
8 *
9 * New BSD License
10 *
11 * Copyright © 2007-2017, Hoa community. All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions are met:
15 *     * Redistributions of source code must retain the above copyright
16 *       notice, this list of conditions and the following disclaimer.
17 *     * Redistributions in binary form must reproduce the above copyright
18 *       notice, this list of conditions and the following disclaimer in the
19 *       documentation and/or other materials provided with the distribution.
20 *     * Neither the name of the Hoa nor the names of its contributors may be
21 *       used to endorse or promote products derived from this software without
22 *       specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE
28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36
37namespace Hoa\Compiler\Llk\Rule;
38
39use Hoa\Compiler;
40use Hoa\Iterator;
41
42/**
43 * Class \Hoa\Compiler\Llk\Rule\Analyzer.
44 *
45 * Analyze rules and transform them into atomic rules operations.
46 *
47 * @copyright  Copyright © 2007-2017 Hoa community
48 * @license    New BSD License
49 */
50class Analyzer
51{
52    /**
53     * PP lexemes.
54     */
55    protected static $_ppLexemes = [
56        'default' => [
57            'skip'          => '\s',
58            'or'            => '\|',
59            'zero_or_one'   => '\?',
60            'one_or_more'   => '\+',
61            'zero_or_more'  => '\*',
62            'n_to_m'        => '\{[0-9]+,[0-9]+\}',
63            'zero_to_m'     => '\{,[0-9]+\}',
64            'n_or_more'     => '\{[0-9]+,\}',
65            'exactly_n'     => '\{[0-9]+\}',
66            'skipped'       => '::[a-zA-Z_][a-zA-Z0-9_]*(\[\d+\])?::',
67            'kept'          => '<[a-zA-Z_][a-zA-Z0-9_]*(\[\d+\])?>',
68            'named'         => '[a-zA-Z_][a-zA-Z0-9_]*\(\)',
69            'node'          => '#[a-zA-Z_][a-zA-Z0-9_]*(:[mM])?',
70            'capturing_'    => '\(',
71            '_capturing'    => '\)'
72        ]
73    ];
74
75    /**
76     * Lexer iterator.
77     *
78     * @var \Hoa\Iterator\Lookahead
79     */
80    protected $_lexer                   = null;
81
82    /**
83     * Tokens representing rules.
84     *
85     * @var array
86     */
87    protected $_tokens                  = null;
88
89    /**
90     * Rules.
91     *
92     * @var array
93     */
94    protected $_rules                   = null;
95
96    /**
97     * Rule name being analyzed.
98     *
99     * @var string
100     */
101    private $_ruleName                  = null;
102
103    /**
104     * Parsed rules.
105     *
106     * @var array
107     */
108    protected $_parsedRules             = null;
109
110    /**
111     * Counter to auto-name transitional rules.
112     *
113     * @var int
114     */
115    protected $_transitionalRuleCounter = 0;
116
117
118
119    /**
120     * Constructor.
121     *
122     * @param   array  $tokens    Tokens.
123     */
124    public function __construct(array $tokens)
125    {
126        $this->_tokens = $tokens;
127
128        return;
129    }
130
131   /**
132     * Build the analyzer of the rules (does not analyze the rules).
133     *
134     * @param   array  $rules    Rule to be analyzed.
135     * @return  void
136     * @throws  \Hoa\Compiler\Exception
137     */
138    public function analyzeRules(array $rules)
139    {
140        if (empty($rules)) {
141            throw new Compiler\Exception\Rule('No rules specified!', 0);
142        }
143
144        $this->_parsedRules = [];
145        $this->_rules       = $rules;
146        $lexer              = new Compiler\Llk\Lexer();
147
148        foreach ($rules as $key => $value) {
149            $this->_lexer = new Iterator\Lookahead($lexer->lexMe($value, static::$_ppLexemes));
150            $this->_lexer->rewind();
151
152            $this->_ruleName = $key;
153            $nodeId          = null;
154
155            if ('#' === $key[0]) {
156                $nodeId = $key;
157                $key    = substr($key, 1);
158            }
159
160            $pNodeId = $nodeId;
161            $rule    = $this->rule($pNodeId);
162
163            if (null === $rule) {
164                throw new Compiler\Exception(
165                    'Error while parsing rule %s.',
166                    1,
167                    $key
168                );
169            }
170
171            $zeRule = $this->_parsedRules[$rule];
172            $zeRule->setName($key);
173            $zeRule->setPPRepresentation($value);
174
175            if (null !== $nodeId) {
176                $zeRule->setDefaultId($nodeId);
177            }
178
179            unset($this->_parsedRules[$rule]);
180            $this->_parsedRules[$key] = $zeRule;
181        }
182
183        return $this->_parsedRules;
184    }
185
186    /**
187     * Implementation of “rule”.
188     *
189     * @return  mixed
190     */
191    protected function rule(&$pNodeId)
192    {
193        return $this->choice($pNodeId);
194    }
195
196    /**
197     * Implementation of “choice”.
198     *
199     * @return  mixed
200     */
201    protected function choice(&$pNodeId)
202    {
203        $children = [];
204
205        // concatenation() …
206        $nNodeId = $pNodeId;
207        $rule    = $this->concatenation($nNodeId);
208
209        if (null === $rule) {
210            return null;
211        }
212
213        if (null !== $nNodeId) {
214            $this->_parsedRules[$rule]->setNodeId($nNodeId);
215        }
216
217        $children[] = $rule;
218        $others     = false;
219
220        // … ( ::or:: concatenation() )*
221        while ('or' === $this->_lexer->current()['token']) {
222            $this->_lexer->next();
223            $others   = true;
224            $nNodeId  = $pNodeId;
225            $rule     = $this->concatenation($nNodeId);
226
227            if (null === $rule) {
228                return null;
229            }
230
231            if (null !== $nNodeId) {
232                $this->_parsedRules[$rule]->setNodeId($nNodeId);
233            }
234
235            $children[] = $rule;
236        }
237
238        $pNodeId = null;
239
240        if (false === $others) {
241            return $rule;
242        }
243
244        $name                      = $this->_transitionalRuleCounter++;
245        $this->_parsedRules[$name] = new Choice($name, $children);
246
247        return $name;
248    }
249
250    /**
251     * Implementation of “concatenation”.
252     *
253     * @return  mixed
254     */
255    protected function concatenation(&$pNodeId)
256    {
257        $children = [];
258
259        // repetition() …
260        $rule = $this->repetition($pNodeId);
261
262        if (null === $rule) {
263            return null;
264        }
265
266        $children[] = $rule;
267        $others     = false;
268
269        // … repetition()*
270        while (null !== $r1 = $this->repetition($pNodeId)) {
271            $children[] = $r1;
272            $others     = true;
273        }
274
275        if (false === $others && null === $pNodeId) {
276            return $rule;
277        }
278
279        $name                      = $this->_transitionalRuleCounter++;
280        $this->_parsedRules[$name] = new Concatenation(
281            $name,
282            $children,
283            null
284        );
285
286        return $name;
287    }
288
289    /**
290     * Implementation of “repetition”.
291     *
292     * @return  mixed
293     * @throws  \Hoa\Compiler\Exception
294     */
295    protected function repetition(&$pNodeId)
296    {
297        // simple() …
298        $children = $this->simple($pNodeId);
299
300        if (null === $children) {
301            return null;
302        }
303
304        // … quantifier()?
305        switch ($this->_lexer->current()['token']) {
306            case 'zero_or_one':
307                $min = 0;
308                $max = 1;
309                $this->_lexer->next();
310
311                break;
312
313            case 'one_or_more':
314                $min =  1;
315                $max = -1;
316                $this->_lexer->next();
317
318               break;
319
320            case 'zero_or_more':
321                $min =  0;
322                $max = -1;
323                $this->_lexer->next();
324
325                break;
326
327            case 'n_to_m':
328                $handle = trim($this->_lexer->current()['value'], '{}');
329                $nm     = explode(',', $handle);
330                $min    = (int) trim($nm[0]);
331                $max    = (int) trim($nm[1]);
332                $this->_lexer->next();
333
334                break;
335
336            case 'zero_to_m':
337                $min = 0;
338                $max = (int) trim($this->_lexer->current()['value'], '{,}');
339                $this->_lexer->next();
340
341                break;
342
343            case 'n_or_more':
344                $min = (int) trim($this->_lexer->current()['value'], '{,}');
345                $max = -1;
346                $this->_lexer->next();
347
348                break;
349
350            case 'exactly_n':
351                $handle = trim($this->_lexer->current()['value'], '{}');
352                $min    = (int) $handle;
353                $max    = $min;
354                $this->_lexer->next();
355
356                break;
357        }
358
359        // … <node>?
360        if ('node' === $this->_lexer->current()['token']) {
361            $pNodeId = $this->_lexer->current()['value'];
362            $this->_lexer->next();
363        }
364
365        if (!isset($min)) {
366            return $children;
367        }
368
369        if (-1 != $max && $max < $min) {
370            throw new Compiler\Exception(
371                'Upper bound %d must be greater or ' .
372                'equal to lower bound %d in rule %s.',
373                2,
374                [$max, $min, $this->_ruleName]
375            );
376        }
377
378        $name                      = $this->_transitionalRuleCounter++;
379        $this->_parsedRules[$name] = new Repetition(
380            $name,
381            $min,
382            $max,
383            $children,
384            null
385        );
386
387        return $name;
388    }
389
390    /**
391     * Implementation of “simple”.
392     *
393     * @return  mixed
394     * @throws  \Hoa\Compiler\Exception
395     * @throws  \Hoa\Compiler\Exception\Rule
396     */
397    protected function simple(&$pNodeId)
398    {
399        if ('capturing_' === $this->_lexer->current()['token']) {
400            $this->_lexer->next();
401            $rule = $this->choice($pNodeId);
402
403            if (null === $rule) {
404                return null;
405            }
406
407            if ('_capturing' != $this->_lexer->current()['token']) {
408                return null;
409            }
410
411            $this->_lexer->next();
412
413            return $rule;
414        }
415
416        if ('skipped' === $this->_lexer->current()['token']) {
417            $tokenName = trim($this->_lexer->current()['value'], ':');
418
419            if (']' === substr($tokenName, -1)) {
420                $uId       = (int) substr($tokenName, strpos($tokenName, '[') + 1, -1);
421                $tokenName = substr($tokenName, 0, strpos($tokenName, '['));
422            } else {
423                $uId = -1;
424            }
425
426            $exists = false;
427
428            foreach ($this->_tokens as $namespace => $tokens) {
429                foreach ($tokens as $token => $value) {
430                    if ($token === $tokenName ||
431                        substr($token, 0, strpos($token, ':')) === $tokenName) {
432                        $exists = true;
433
434                        break 2;
435                    }
436                }
437            }
438
439            if (false == $exists) {
440                throw new Compiler\Exception(
441                    'Token ::%s:: does not exist in rule %s.',
442                    3,
443                    [$tokenName, $this->_ruleName]
444                );
445            }
446
447            $name                      = $this->_transitionalRuleCounter++;
448            $this->_parsedRules[$name] = new Token(
449                $name,
450                $tokenName,
451                null,
452                $uId
453            );
454            $this->_lexer->next();
455
456            return $name;
457        }
458
459        if ('kept' === $this->_lexer->current()['token']) {
460            $tokenName = trim($this->_lexer->current()['value'], '<>');
461
462            if (']' === substr($tokenName, -1)) {
463                $uId       = (int) substr($tokenName, strpos($tokenName, '[') + 1, -1);
464                $tokenName = substr($tokenName, 0, strpos($tokenName, '['));
465            } else {
466                $uId = -1;
467            }
468
469            $exists = false;
470
471            foreach ($this->_tokens as $namespace => $tokens) {
472                foreach ($tokens as $token => $value) {
473                    if ($token === $tokenName
474                       || substr($token, 0, strpos($token, ':')) === $tokenName) {
475                        $exists = true;
476
477                        break 2;
478                    }
479                }
480            }
481
482            if (false == $exists) {
483                throw new Compiler\Exception(
484                    'Token <%s> does not exist in rule %s.',
485                    4,
486                    [$tokenName, $this->_ruleName]
487                );
488            }
489
490            $name  = $this->_transitionalRuleCounter++;
491            $token = new Token(
492                $name,
493                $tokenName,
494                null,
495                $uId,
496                true
497            );
498            $this->_parsedRules[$name] = $token;
499            $this->_lexer->next();
500
501            return $name;
502        }
503
504        if ('named' === $this->_lexer->current()['token']) {
505            $tokenName = rtrim($this->_lexer->current()['value'], '()');
506
507            if (false === array_key_exists($tokenName, $this->_rules) &&
508                false === array_key_exists('#' . $tokenName, $this->_rules)) {
509                throw new Compiler\Exception\Rule(
510                    'Cannot call rule %s() in rule %s because it does not exist.',
511                    5,
512                    [$tokenName, $this->_ruleName]
513                );
514            }
515
516            if (0     === $this->_lexer->key() &&
517                'EOF' === $this->_lexer->getNext()['token']) {
518                $name                      = $this->_transitionalRuleCounter++;
519                $this->_parsedRules[$name] = new Concatenation(
520                    $name,
521                    [$tokenName],
522                    null
523                );
524            } else {
525                $name = $tokenName;
526            }
527
528            $this->_lexer->next();
529
530            return $name;
531        }
532
533        return null;
534    }
535}
536