1<?php
2
3/**
4 * Hoa
5 *
6 *
7 * @license
8 *
9 * New BSD License
10 *
11 * Copyright © 2007-2017, Hoa community. All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions are met:
15 *     * Redistributions of source code must retain the above copyright
16 *       notice, this list of conditions and the following disclaimer.
17 *     * Redistributions in binary form must reproduce the above copyright
18 *       notice, this list of conditions and the following disclaimer in the
19 *       documentation and/or other materials provided with the distribution.
20 *     * Neither the name of the Hoa nor the names of its contributors may be
21 *       used to endorse or promote products derived from this software without
22 *       specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE
28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36
37namespace Hoa\Compiler\Llk;
38
39use Hoa\Compiler;
40use Hoa\Iterator;
41
42/**
43 * Class \Hoa\Compiler\Llk\Parser.
44 *
45 * LL(k) parser.
46 *
47 * @copyright  Copyright © 2007-2017 Hoa community
48 * @license    New BSD License
49 */
50class Parser
51{
52    /**
53     * List of pragmas.
54     *
55     * @var array
56     */
57    protected $_pragmas       = null;
58
59    /**
60     * List of skipped tokens.
61     *
62     * @var array
63     */
64    protected $_skip          = null;
65
66    /**
67     * Associative array (token name => token regex), to be defined in
68     * precedence order.
69     *
70     * @var array
71     */
72    protected $_tokens        = null;
73
74    /**
75     * Rules, to be defined as associative array, name => Rule object.
76     *
77     * @var array
78     */
79    protected $_rules         = null;
80
81    /**
82     * Lexer iterator.
83     *
84     * @var \Hoa\Iterator\Lookahead
85     */
86    protected $_tokenSequence = null;
87
88    /**
89     * Possible token causing an error.
90     *
91     * @var array
92     */
93    protected $_errorToken    = null;
94
95    /**
96     * Trace of activated rules.
97     *
98     * @var array
99     */
100    protected $_trace         = [];
101
102    /**
103     * Stack of todo list.
104     *
105     * @var array
106     */
107    protected $_todo          = null;
108
109    /**
110     * AST.
111     *
112     * @var \Hoa\Compiler\Llk\TreeNode
113     */
114    protected $_tree          = null;
115
116    /**
117     * Current depth while building the trace.
118     *
119     * @var int
120     */
121    protected $_depth         = -1;
122
123
124
125    /**
126     * Construct the parser.
127     *
128     * @param   array  $tokens     Tokens.
129     * @param   array  $rules      Rules.
130     * @param   array  $pragmas    Pragmas.
131     */
132    public function __construct(
133        array $tokens  = [],
134        array $rules   = [],
135        array $pragmas = []
136    ) {
137        $this->_tokens  = $tokens;
138        $this->_rules   = $rules;
139        $this->_pragmas = $pragmas;
140
141        return;
142    }
143
144    /**
145     * Parse :-).
146     *
147     * @param   string  $text    Text to parse.
148     * @param   string  $rule    The axiom, i.e. root rule.
149     * @param   bool    $tree    Whether build tree or not.
150     * @return  mixed
151     * @throws  \Hoa\Compiler\Exception\UnexpectedToken
152     */
153    public function parse($text, $rule = null, $tree = true)
154    {
155        $k = 1024;
156
157        if (isset($this->_pragmas['parser.lookahead'])) {
158            $k = max(0, intval($this->_pragmas['parser.lookahead']));
159        }
160
161        $lexer                = new Lexer($this->_pragmas);
162        $this->_tokenSequence = new Iterator\Buffer(
163            $lexer->lexMe($text, $this->_tokens),
164            $k
165        );
166        $this->_tokenSequence->rewind();
167
168        $this->_errorToken = null;
169        $this->_trace      = [];
170        $this->_todo       = [];
171
172        if (false === array_key_exists($rule, $this->_rules)) {
173            $rule = $this->getRootRule();
174        }
175
176        $closeRule   = new Rule\Ekzit($rule, 0);
177        $openRule    = new Rule\Entry($rule, 0, [$closeRule]);
178        $this->_todo = [$closeRule, $openRule];
179
180        do {
181            $out = $this->unfold();
182
183            if (null  !== $out &&
184                'EOF' === $this->_tokenSequence->current()['token']) {
185                break;
186            }
187
188            if (false === $this->backtrack()) {
189                $token  = $this->_errorToken;
190
191                if (null === $this->_errorToken) {
192                    $token = $this->_tokenSequence->current();
193                }
194
195                $offset = $token['offset'];
196                $line   = 1;
197                $column = 1;
198
199                if (!empty($text)) {
200                    if (0 === $offset) {
201                        $leftnl = 0;
202                    } else {
203                        $leftnl = strrpos($text, "\n", -(strlen($text) - $offset) - 1) ?: 0;
204                    }
205
206                    $rightnl = strpos($text, "\n", $offset);
207                    $line    = substr_count($text, "\n", 0, $leftnl + 1) + 1;
208                    $column  = $offset - $leftnl + (0 === $leftnl);
209
210                    if (false !== $rightnl) {
211                        $text = trim(substr($text, $leftnl, $rightnl - $leftnl), "\n");
212                    }
213                }
214
215                throw new Compiler\Exception\UnexpectedToken(
216                    'Unexpected token "%s" (%s) at line %d and column %d:' .
217                    "\n" . '%s' . "\n" . str_repeat(' ', $column - 1) . '↑',
218                    0,
219                    [
220                        $token['value'],
221                        $token['token'],
222                        $line,
223                        $column,
224                        $text
225                    ],
226                    $line,
227                    $column
228                );
229            }
230        } while (true);
231
232        if (false === $tree) {
233            return true;
234        }
235
236        $tree = $this->_buildTree();
237
238        if (!($tree instanceof TreeNode)) {
239            throw new Compiler\Exception(
240                'Parsing error: cannot build AST, the trace is corrupted.',
241                1
242            );
243        }
244
245        return $this->_tree = $tree;
246    }
247
248    /**
249     * Unfold trace.
250     *
251     * @return  mixed
252     */
253    protected function unfold()
254    {
255        while (0 < count($this->_todo)) {
256            $rule = array_pop($this->_todo);
257
258            if ($rule instanceof Rule\Ekzit) {
259                $rule->setDepth($this->_depth);
260                $this->_trace[] = $rule;
261
262                if (false === $rule->isTransitional()) {
263                    --$this->_depth;
264                }
265            } else {
266                $ruleName = $rule->getRule();
267                $next     = $rule->getData();
268                $zeRule   = $this->_rules[$ruleName];
269                $out      = $this->_parse($zeRule, $next);
270
271                if (false === $out && false === $this->backtrack()) {
272                    return null;
273                }
274            }
275        }
276
277        return true;
278    }
279
280    /**
281     * Parse current rule.
282     *
283     * @param   \Hoa\Compiler\Llk\Rule  $zeRule    Current rule.
284     * @param   int                     $next      Next rule index.
285     * @return  bool
286     */
287    protected function _parse(Rule $zeRule, $next)
288    {
289        if ($zeRule instanceof Rule\Token) {
290            $name = $this->_tokenSequence->current()['token'];
291
292            if ($zeRule->getTokenName() !== $name) {
293                return false;
294            }
295
296            $value = $this->_tokenSequence->current()['value'];
297
298            if (0 <= $unification = $zeRule->getUnificationIndex()) {
299                for ($skip = 0, $i = count($this->_trace) - 1; $i >= 0; --$i) {
300                    $trace = $this->_trace[$i];
301
302                    if ($trace instanceof Rule\Entry) {
303                        if (false === $trace->isTransitional()) {
304                            if ($trace->getDepth() <= $this->_depth) {
305                                break;
306                            }
307
308                            --$skip;
309                        }
310                    } elseif ($trace instanceof Rule\Ekzit &&
311                              false === $trace->isTransitional()) {
312                        $skip += $trace->getDepth() > $this->_depth;
313                    }
314
315                    if (0 < $skip) {
316                        continue;
317                    }
318
319                    if ($trace instanceof Rule\Token &&
320                        $unification === $trace->getUnificationIndex() &&
321                        $value       !== $trace->getValue()) {
322                        return false;
323                    }
324                }
325            }
326
327            $namespace = $this->_tokenSequence->current()['namespace'];
328            $zzeRule   = clone $zeRule;
329            $zzeRule->setValue($value);
330            $zzeRule->setNamespace($namespace);
331
332            if (isset($this->_tokens[$namespace][$name])) {
333                $zzeRule->setRepresentation($this->_tokens[$namespace][$name]);
334            } else {
335                foreach ($this->_tokens[$namespace] as $_name => $regex) {
336                    if (false === $pos = strpos($_name, ':')) {
337                        continue;
338                    }
339
340                    $_name = substr($_name, 0, $pos);
341
342                    if ($_name === $name) {
343                        break;
344                    }
345                }
346
347                $zzeRule->setRepresentation($regex);
348            }
349
350            array_pop($this->_todo);
351            $this->_trace[] = $zzeRule;
352            $this->_tokenSequence->next();
353            $this->_errorToken = $this->_tokenSequence->current();
354
355            return true;
356        } elseif ($zeRule instanceof Rule\Concatenation) {
357            if (false === $zeRule->isTransitional()) {
358                ++$this->_depth;
359            }
360
361            $this->_trace[] = new Rule\Entry(
362                $zeRule->getName(),
363                0,
364                null,
365                $this->_depth
366            );
367            $children = $zeRule->getChildren();
368
369            for ($i = count($children) - 1; $i >= 0; --$i) {
370                $nextRule      = $children[$i];
371                $this->_todo[] = new Rule\Ekzit($nextRule, 0);
372                $this->_todo[] = new Rule\Entry($nextRule, 0);
373            }
374
375            return true;
376        } elseif ($zeRule instanceof Rule\Choice) {
377            $children = $zeRule->getChildren();
378
379            if ($next >= count($children)) {
380                return false;
381            }
382
383            if (false === $zeRule->isTransitional()) {
384                ++$this->_depth;
385            }
386
387            $this->_trace[] = new Rule\Entry(
388                $zeRule->getName(),
389                $next,
390                $this->_todo,
391                $this->_depth
392            );
393            $nextRule      = $children[$next];
394            $this->_todo[] = new Rule\Ekzit($nextRule, 0);
395            $this->_todo[] = new Rule\Entry($nextRule, 0);
396
397            return true;
398        } elseif ($zeRule instanceof Rule\Repetition) {
399            $nextRule = $zeRule->getChildren();
400
401            if (0 === $next) {
402                $name = $zeRule->getName();
403                $min  = $zeRule->getMin();
404
405                if (false === $zeRule->isTransitional()) {
406                    ++$this->_depth;
407                }
408
409                $this->_trace[] = new Rule\Entry(
410                    $name,
411                    $min,
412                    null,
413                    $this->_depth
414                );
415                array_pop($this->_todo);
416                $this->_todo[]  = new Rule\Ekzit(
417                    $name,
418                    $min,
419                    $this->_todo
420                );
421
422                for ($i = 0; $i < $min; ++$i) {
423                    $this->_todo[] = new Rule\Ekzit($nextRule, 0);
424                    $this->_todo[] = new Rule\Entry($nextRule, 0);
425                }
426
427                return true;
428            } else {
429                $max = $zeRule->getMax();
430
431                if (-1 != $max && $next > $max) {
432                    return false;
433                }
434
435                $this->_todo[] = new Rule\Ekzit(
436                    $zeRule->getName(),
437                    $next,
438                    $this->_todo
439                );
440                $this->_todo[] = new Rule\Ekzit($nextRule, 0);
441                $this->_todo[] = new Rule\Entry($nextRule, 0);
442
443                return true;
444            }
445        }
446
447        return false;
448    }
449
450    /**
451     * Backtrack the trace.
452     *
453     * @return  bool
454     */
455    protected function backtrack()
456    {
457        $found = false;
458
459        do {
460            $last = array_pop($this->_trace);
461
462            if ($last instanceof Rule\Entry) {
463                $zeRule = $this->_rules[$last->getRule()];
464                $found  = $zeRule instanceof Rule\Choice;
465            } elseif ($last instanceof Rule\Ekzit) {
466                $zeRule = $this->_rules[$last->getRule()];
467                $found  = $zeRule instanceof Rule\Repetition;
468            } elseif ($last instanceof Rule\Token) {
469                $this->_tokenSequence->previous();
470
471                if (false === $this->_tokenSequence->valid()) {
472                    return false;
473                }
474            }
475        } while (0 < count($this->_trace) && false === $found);
476
477        if (false === $found) {
478            return false;
479        }
480
481        $rule          = $last->getRule();
482        $next          = $last->getData() + 1;
483        $this->_depth  = $last->getDepth();
484        $this->_todo   = $last->getTodo();
485        $this->_todo[] = new Rule\Entry($rule, $next);
486
487        return true;
488    }
489
490    /**
491     * Build AST from trace.
492     * Walk through the trace iteratively and recursively.
493     *
494     * @param   int      $i            Current trace index.
495     * @param   array    &$children    Collected children.
496     * @return  \Hoa\Compiler\Llk\TreeNode
497     */
498    protected function _buildTree($i = 0, &$children = [])
499    {
500        $max = count($this->_trace);
501
502        while ($i < $max) {
503            $trace = $this->_trace[$i];
504
505            if ($trace instanceof Rule\Entry) {
506                $ruleName  = $trace->getRule();
507                $rule      = $this->_rules[$ruleName];
508                $isRule    = false === $trace->isTransitional();
509                $nextTrace = $this->_trace[$i + 1];
510                $id        = $rule->getNodeId();
511
512                // Optimization: Skip empty trace sequence.
513                if ($nextTrace instanceof Rule\Ekzit &&
514                    $ruleName == $nextTrace->getRule()) {
515                    $i += 2;
516
517                    continue;
518                }
519
520                if (true === $isRule) {
521                    $children[] = $ruleName;
522                }
523
524                if (null !== $id) {
525                    $children[] = [
526                        'id'      => $id,
527                        'options' => $rule->getNodeOptions()
528                    ];
529                }
530
531                $i = $this->_buildTree($i + 1, $children);
532
533                if (false === $isRule) {
534                    continue;
535                }
536
537                $handle   = [];
538                $cId      = null;
539                $cOptions = [];
540
541                do {
542                    $pop = array_pop($children);
543
544                    if (true === is_object($pop)) {
545                        $handle[] = $pop;
546                    } elseif (true === is_array($pop) && null === $cId) {
547                        $cId      = $pop['id'];
548                        $cOptions = $pop['options'];
549                    } elseif ($ruleName == $pop) {
550                        break;
551                    }
552                } while (null !== $pop);
553
554                if (null === $cId) {
555                    $cId      = $rule->getDefaultId();
556                    $cOptions = $rule->getDefaultOptions();
557                }
558
559                if (null === $cId) {
560                    for ($j = count($handle) - 1; $j >= 0; --$j) {
561                        $children[] = $handle[$j];
562                    }
563
564                    continue;
565                }
566
567                if (true === in_array('M', $cOptions) &&
568                    true === $this->mergeTree($children, $handle, $cId)) {
569                    continue;
570                }
571
572                if (true === in_array('m', $cOptions) &&
573                    true === $this->mergeTree($children, $handle, $cId, true)) {
574                    continue;
575                }
576
577                $cTree = new TreeNode($id ?: $cId);
578
579                foreach ($handle as $child) {
580                    $child->setParent($cTree);
581                    $cTree->prependChild($child);
582                }
583
584                $children[] = $cTree;
585            } elseif ($trace instanceof Rule\Ekzit) {
586                return $i + 1;
587            } else {
588                if (false === $trace->isKept()) {
589                    ++$i;
590
591                    continue;
592                }
593
594                $child = new TreeNode('token', [
595                    'token'     => $trace->getTokenName(),
596                    'value'     => $trace->getValue(),
597                    'namespace' => $trace->getNamespace(),
598                ]);
599                $children[] = $child;
600                ++$i;
601            }
602        }
603
604        return $children[0];
605    }
606
607    /**
608     * Try to merge directly children into an existing node.
609     *
610     * @param   array   &$children    Current children being gathering.
611     * @param   array   &$handle      Children of the new node.
612     * @param   string  $cId          Node ID.
613     * @param   bool    $recursive    Whether we should merge recursively or
614     *                                not.
615     * @return  bool
616     */
617    protected function mergeTree(
618        &$children,
619        &$handle,
620        $cId,
621        $recursive = false
622    ) {
623        end($children);
624        $last = current($children);
625
626        if (!is_object($last)) {
627            return false;
628        }
629
630        if ($cId !== $last->getId()) {
631            return false;
632        }
633
634        if (true === $recursive) {
635            foreach ($handle as $child) {
636                $this->mergeTreeRecursive($last, $child);
637            }
638
639            return true;
640        }
641
642        foreach ($handle as $child) {
643            $last->appendChild($child);
644            $child->setParent($last);
645        }
646
647        return true;
648    }
649
650    /**
651     * Merge recursively.
652     * Please, see self::mergeTree() to know the context.
653     *
654     * @param   \Hoa\Compiler\Llk\TreeNode  $node       Node that receives.
655     * @param   \Hoa\Compiler\Llk\TreeNode  $newNode    Node to merge.
656     * @return  void
657     */
658    protected function mergeTreeRecursive(TreeNode $node, TreeNode $newNode)
659    {
660        $nNId = $newNode->getId();
661
662        if ('token' === $nNId) {
663            $node->appendChild($newNode);
664            $newNode->setParent($node);
665
666            return;
667        }
668
669        $children = $node->getChildren();
670        end($children);
671        $last     = current($children);
672
673        if ($last->getId() !== $nNId) {
674            $node->appendChild($newNode);
675            $newNode->setParent($node);
676
677            return;
678        }
679
680        foreach ($newNode->getChildren() as $child) {
681            $this->mergeTreeRecursive($last, $child);
682        }
683
684        return;
685    }
686
687    /**
688     * Get AST.
689     *
690     * @return  \Hoa\Compiler\Llk\TreeNode
691     */
692    public function getTree()
693    {
694        return $this->_tree;
695    }
696
697    /**
698     * Get trace.
699     *
700     * @return  array
701     */
702    public function getTrace()
703    {
704        return $this->_trace;
705    }
706
707    /**
708     * Get pragmas.
709     *
710     * @return  array
711     */
712    public function getPragmas()
713    {
714        return $this->_pragmas;
715    }
716
717    /**
718     * Get tokens.
719     *
720     * @return  array
721     */
722    public function getTokens()
723    {
724        return $this->_tokens;
725    }
726
727    /**
728     * Get the lexer iterator.
729     *
730     * @return  \Hoa\Iterator\Buffer
731     */
732    public function getTokenSequence()
733    {
734        return $this->_tokenSequence;
735    }
736
737    /**
738     * Get rule by name.
739     *
740     * @param   string  $name    Rule name.
741     * @return  \Hoa\Compiler\Llk\Rule
742     */
743    public function getRule($name)
744    {
745        if (!isset($this->_rules[$name])) {
746            return null;
747        }
748
749        return $this->_rules[$name];
750    }
751
752    /**
753     * Get rules.
754     *
755     * @return  array
756     */
757    public function getRules()
758    {
759        return $this->_rules;
760    }
761
762    /**
763     * Get root rule.
764     *
765     * @return  string
766     */
767    public function getRootRule()
768    {
769        foreach ($this->_rules as $rule => $_) {
770            if (!is_int($rule)) {
771                break;
772            }
773        }
774
775        return $rule;
776    }
777}
778