1<?php
2
3namespace AST;
4use \InvalidArgumentException;
5use \LogicException;
6use \RuntimeException;
7
8require_once "tokenizer.php";
9require_once "exceptions.php";
10
11class ParseConfig {
12    public $EXPR_DEPTH_LIMIT = 50;
13}
14
15function parse_config() {
16    static $config = null;
17    if ($config == null) {
18        $config = new ParseConfig();
19    }
20    return $config;
21}
22
23abstract class Fixing {
24    const None = 0;
25    const Prefix = 1;
26    const Postfix = 2;
27    const Infix = 3;
28    const Wrap = 4;
29}
30
31class ElementInstance {
32    private $_definition = null;
33    private $_args = null;
34
35    public function __construct($definition, $args) {
36        $this->_definition = $definition;
37        $this->_args = $args;
38    }
39
40    public function definition() { return $this->_definition; }
41    public function args() { return $this->_args; }
42
43    public function getStringValue() {
44        $pieces = array();
45        foreach ($this->args() as $arg) {
46            if ($arg instanceof ElementInstance) {
47                $pieces[] = $arg->getStringValue();
48            } else {
49                $pieces[] = $arg->match();
50            }
51        }
52        return implode('', $pieces);
53    }
54
55    public function getRepresentation() {
56        static $stack_depth = 0;
57        if (++$stack_depth > parse_config()->EXPR_DEPTH_LIMIT) {
58            throw new RuntimeException('Depth limit exceeded.');
59        }
60        try {
61            if ($this->definition() === null) {
62                $this->ensureWellFormed(false);
63                // The expression is guaranteed to have 0 or 1 arguments because it's well formed.
64                if (count($this->args()) == 0) {
65                    return '';
66                }
67                if ($this->args()[0] instanceof ElementInstance) {
68                    return $this->args()[0]->getRepresentation();
69                } else {
70                    return $this->args()[0]->match();
71                }
72            } else {
73                return $this->definition()->getRepresentation($this);
74            }
75        } finally {
76            --$stack_depth;
77        }
78    }
79
80    public function isExpanded($recursive=true) {
81        static $stack_depth = 0;
82        if (++$stack_depth > parse_config()->EXPR_DEPTH_LIMIT) {
83            throw new RuntimeException('Depth limit exceeded.');
84        }
85        try {
86            if ($this->definition() !== null && $this->definition()->fixing() == Fixing::None) {
87                return true;
88            }
89            foreach ($this->args() as $arg) {
90                if ($arg instanceof TokenInstance || ($recursive && !$arg->isExpanded($recursive))) {
91                    return false;
92                }
93            }
94        } finally {
95            --$stack_depth;
96        }
97        return true;
98    }
99
100    public function expand($elmDef, $recursive=true) {
101        static $stack_depth = 0;
102        if (++$stack_depth > parse_config()->EXPR_DEPTH_LIMIT) {
103            throw new RuntimeException('Depth limit exceeded.');
104        }
105        try {
106            if ($this->isExpanded($recursive)) {
107                return;
108            }
109            $elmDef->spliceInstancesIn($this->_args);
110            if ($recursive) {
111                foreach ($this->args() as $arg) {
112                    if ($arg instanceof ElementInstance && !$arg->isExpanded($recursive)) {
113                        $arg->expand($elmDef);
114                    }
115                }
116            }
117        } finally {
118            --$stack_depth;
119        }
120    }
121
122    public function findUnexpandedToken($recursive=true) {
123        static $stack_depth = 0;
124        if (++$stack_depth > parse_config()->EXPR_DEPTH_LIMIT) {
125            throw new RuntimeException('Depth limit exceeded.');
126        }
127        try {
128            if ($this->isExpanded($recursive)) {
129                return null;
130            }
131            foreach ($this->args() as $arg) {
132                if ($arg instanceof TokenInstance) {
133                    return $arg;
134                } elseif ($recursive) {
135                    $tok = $arg->findUnexpandedToken($recursive);
136                    if ($tok !== null) {
137                        return $tok;
138                    }
139                }
140            }
141            throw LogicException('A fully expanded element instance has no stray token!');
142        } finally {
143            --$stack_depth;
144        }
145    }
146
147    public function printTree($indentStr='', $isLastChild=false) {
148        static $stack_depth = 0;
149        if (++$stack_depth > parse_config()->EXPR_DEPTH_LIMIT) {
150            throw new RuntimeException('Depth limit exceeded.');
151        }
152        try {
153            if ($this->definition() !== null) {
154                echo $indentStr . '+-' . $this->definition()->name() . "\n";
155            }
156            $extraIndentStr = ($isLastChild ? '  ' : '| ');
157            $argIdx = 0;
158            foreach ($this->args() as $arg) {
159                ++$argIdx;
160                if ($arg instanceof TokenInstance) {
161                    echo $indentStr . $extraIndentStr . '+-' . $arg . "\n";
162                } else {
163                    $arg->printTree($indentStr . $extraIndentStr, $argIdx == count($this->args()));
164                }
165            }
166        } finally {
167            --$stack_depth;
168        }
169    }
170
171    public function evaluateArgs() {
172        $values = array();
173        foreach ($this->args() as $arg) {
174            if ($arg instanceof ElementInstance) {
175                $values[] = $arg->evaluate();
176            } else {
177                $values[] = $arg;
178            }
179        }
180        return $values;
181    }
182
183    public function ensureWellFormed($recursive=true) {
184        static $stack_depth = 0;
185        if (++$stack_depth > parse_config()->EXPR_DEPTH_LIMIT) {
186            throw new RuntimeException('Depth limit exceeded.');
187        }
188        try {
189            if ($this->definition() === null) {
190                if (count($this->args()) > 1) {
191                    throw new MalformedExpressionException($this, 'An expression that has more than a single root is not well defined.');
192                }
193            } else {
194                $this->definition()->ensureWellFormed($this);
195            }
196            if ($recursive) {
197                foreach ($this->args() as $arg) {
198                    if ($arg instanceof ElementInstance) {
199                        $arg->ensureWellFormed($recursive);
200                    }
201                }
202            }
203        } finally {
204            --$stack_depth;
205        }
206    }
207
208    public function evaluate() {
209        static $stack_depth = 0;
210        if (++$stack_depth > parse_config()->EXPR_DEPTH_LIMIT) {
211            throw new RuntimeException('Depth limit exceeded.');
212        }
213        try {
214            if ($this->definition() === null) {
215                $this->ensureWellFormed(false);
216                // The expression is guaranteed to have 0 or 1 arguments because it's well formed.
217                if (count($this->args()) == 0) {
218                    return null;
219                }
220                return $this->evaluateArgs()[0];
221            } else {
222                return $this->definition()->evaluate($this);
223            }
224        } finally {
225            --$stack_depth;
226        }
227    }
228}
229
230class ElementDefinition {
231    private $_name = null;
232    private $_arity = null;
233    private $_nested = null;
234    private $_fixing = null;
235    private $_priority = null;
236    private $_tokenDefs = null;
237
238    public function arity() { return $this->_arity; }
239    public function nested() { return $this->_nested; }
240    public function fixing() { return $this->_fixing; }
241    public function priority() { return $this->_priority; }
242    public function tokenDefs() { return $this->_tokenDefs; }
243    public function name() { return $this->_name; }
244
245    public function __construct($name, $fixing, $tokenDefs, $priority, $arity=null, $nested=null) {
246        if ($tokenDefs instanceof TokenDefinition) {
247            $tokenDefs = array($tokenDefs);
248        }
249        if (!is_array($tokenDefs)) {
250            throw new InvalidArgumentException('tokenDefs can only be a TokenDefinition or an array of them.');
251        }
252        if ($arity === null) {
253            switch ($fixing) {
254                case Fixing::None:
255                    $arity = 0;
256                    break;
257                case Fixing::Prefix:
258                case Fixing::Postfix:
259                    $arity = 1;
260                    break;
261                case Fixing::Infix:
262                    $arity = -1;
263                    break;
264                case Fixing::Wrap:
265                    break;
266                default:
267                    throw new InvalidArgumentException('Invalid fixing.');
268                    break;
269            }
270        }
271        $this->_name = $name;
272        $this->_tokenDefs = $tokenDefs;
273        $this->_arity = $arity;
274        $this->_nested = $nested;
275        $this->_fixing = $fixing;
276        $this->_priority = $priority;
277
278        switch ($this->fixing()) {
279            case Fixing::None:
280                if ($this->arity() != 0) {
281                    throw new LogicException('An element with no fixing must be 0-ary.');
282                }
283                if (count($this->tokenDefs()) != 1) {
284                    throw new LogicException('An element with no fixing must have exactly 1 token.');
285                }
286                break;
287            case Fixing::Prefix:
288                if ($this->arity() == 0) {
289                    throw new LogicException('An prefix element must be n-ary with n > 0.');
290                }
291                if (count($this->tokenDefs()) != 1 && count($this->tokenDefs()) != $this->arity()) {
292                    throw new LogicException('A n-ary prefix operator must have either 1 or n tokens.');
293                }
294                break;
295            case Fixing::Postfix:
296                if ($this->arity() == 0) {
297                    throw new LogicException('An postfix element must be n-ary with n > 0.');
298                }
299                if (count($this->tokenDefs()) != 1 && count($this->tokenDefs()) != $this->arity()) {
300                    throw new LogicException('A n-ary postfix operator must have either 1 or n tokens.');
301                }
302                break;
303            case Fixing::Infix:
304                if ($this->arity() == 0 || $this->arity() == 1) {
305                    throw new LogicException('An infix element must be n-ary with n > 1.');
306                }
307                if (count($this->tokenDefs()) != 1 && count($this->tokenDefs()) != $this->arity() - 1) {
308                    throw new LogicException('A n-ary infix operator must have either 1 or n-1 tokens.');
309                }
310                break;
311            case Fixing::Wrap:
312                if ($this->arity() !== null) {
313                    throw new LogicException('Arity does not apply to a wrapping element.');
314                }
315                if (count($this->tokenDefs()) != 2) {
316                    throw new LogicException('Wrapping operators are identified by exactly two tokens.');
317                }
318                break;
319            default:
320                throw new InvalidArgumentException('Invalid fixing specified.');
321                break;
322        }
323
324        if ($this->fixing() == Fixing::Wrap) {
325            if ($this->nested() === null) {
326                throw new LogicException('You must specify whether a wrapping operator is nested.');
327            }
328        } else {
329            if ($this->nested() !== null) {
330                throw new LogicException('Nested applies only to wrapping operators.');
331            }
332        }
333    }
334
335    private static function _getLongestAlternateChain($args, $position, $tokDef, $stopAt=-1) {
336        $nFound = 0;
337        for ($lastFound = $position; $lastFound < count($args); $lastFound += 2) {
338            if ($args[$lastFound]->definition() == $tokDef) {
339                if ($stopAt >= 0 && $nFound >= $stopAt) {
340                    break;
341                }
342                ++$nFound;
343            } else {
344                break;
345            }
346        }
347        return $nFound;
348    }
349
350    private static function _isMatchingAlternateChain($args, $position, $tokDefs) {
351        $tokDefIdx = 0;
352        for ($lastFound = $position; $lastFound < count($args) && $tokDefIdx < count($tokDefs); $lastFound += 2) {
353            if ($args[$lastFound]->definition() == $tokDefs[$tokDefIdx]) {
354                ++$tokDefIdx;
355            } else {
356                return false;
357            }
358        }
359        return ($tokDefIdx == count($tokDefs));
360    }
361
362    private static function _getWrappedSequence($args, $position, $tokDefs, $nested) {
363        if (count($tokDefs) != 2) {
364            throw new LogicException('Wrapping operators must have exactly 2 tokens.');
365        }
366        list($openTokDef, $closeTokDef) = $tokDefs;
367        if ($args[$position]->definition() != $openTokDef) {
368            return 0;
369        }
370        $nest = 1;
371        for ($i = $position + 1; $i < count($args); ++$i) {
372            if ($args[$i]->definition() == $closeTokDef) {
373                if (--$nest <= 0) {
374                    return $i - $position + 1;
375                }
376            } else if ($nested && $args[$i]->definition() == $openTokDef) {
377                ++$nest;
378            }
379        }
380        return 1;  // Which means unmatched sequence
381    }
382
383    private static function _extractAlternateChain($args, $position, $length) {
384        $retval = array();
385        for ($i = $position; $i < count($args) && count($retval) < $length; $i += 2) {
386            $retval[] = $args[$i];
387        }
388        return $retval;
389    }
390
391    private static function _splicePrefix(&$args, $firstTokPosition, $chainLength, $definition) {
392        if ($firstTokPosition < 0) {
393            throw new InvalidArgumentException('Attempt to _splicePrefix with a negative offset.');
394        }
395        $elmArgs = self::_extractAlternateChain($args, $firstTokPosition + 1, $chainLength);
396        $elmInst = new ElementInstance($definition, $elmArgs);
397        if ($firstTokPosition + $chainLength * 2 > count($args)) {
398            throw new NotEnoughArgumentsException($definition);
399        }
400        array_splice($args, $firstTokPosition, $chainLength * 2, array($elmInst));
401        return $firstTokPosition;
402    }
403
404    private static function _splicePostfix(&$args, $firstTokPosition, $chainLength, $definition) {
405        if ($firstTokPosition < 0) {
406            throw new InvalidArgumentException('Attempt to _splicePostfix with a negative offset.');
407        }
408        $elmArgs = self::_extractAlternateChain($args, $firstTokPosition - 1, $chainLength);
409        $elmInst = new ElementInstance($definition, $elmArgs);
410        if ($firstTokPosition == 0) {
411            throw new NotEnoughArgumentsException($definition);
412        } elseif ($firstTokPosition + $chainLength * 2 - 1 > count($args)) {
413            throw new NotEnoughArgumentsException($definition);
414        }
415        array_splice($args, $firstTokPosition - 1, $chainLength * 2, array($elmInst));
416        return $firstTokPosition - 1;
417    }
418
419    private static function _spliceInfix(&$args, $firstTokPosition, $chainLength, $definition) {
420        if ($firstTokPosition < 0) {
421            throw new InvalidArgumentException('Attempt to _spliceInfix with a negative offset.');
422        }
423        $elmArgs = self::_extractAlternateChain($args, $firstTokPosition - 1, $chainLength + 1);
424        $elmInst = new ElementInstance($definition, $elmArgs);
425        if ($firstTokPosition == 0) {
426            throw new NotEnoughArgumentsException($definition);
427        } elseif ($firstTokPosition + $chainLength * 2 > count($args)) {
428            throw new NotEnoughArgumentsException($definition);
429        }
430        array_splice($args, $firstTokPosition - 1, $chainLength * 2 + 1, array($elmInst));
431        return $firstTokPosition - 1;
432    }
433
434    private static function _spliceWrap(&$args, $firstTokPosition, $sequenceLength, $definition) {
435        if ($firstTokPosition < 0) {
436            throw new InvalidArgumentException('Attempt to _spliceWrap with a negative offset.');
437        }
438        if ($sequenceLength < 2) {
439            throw new LogicException('A wrapping sequence must consist of at least the two wrapping tokens.');
440        }
441        if ($firstTokPosition + $sequenceLength > count($args)) {
442            throw new LogicException('You requested to cut a sequence longer than the number of tokens.');
443        }
444        $elmArgs = array_slice($args, $firstTokPosition + 1, $sequenceLength - 2);
445        $elmInst = new ElementInstance($definition, $elmArgs);
446        array_splice($args, $firstTokPosition, $sequenceLength, array($elmInst));
447        return $firstTokPosition;
448    }
449
450    private static function _spliceNone(&$args, $firstTokPosition, $definition) {
451        if ($firstTokPosition < 0) {
452            throw new InvalidArgumentException('Attempt to _spliceNone with a negative offset.');
453        }
454        if ($firstTokPosition + 1 > count($args)) {
455            throw new LogicException('You requested to cut a token at the end of the tokens array.');
456        }
457        array_splice($args, $firstTokPosition, 1, array(new ElementInstance($definition, array($args[$firstTokPosition]))));
458        return $firstTokPosition;
459    }
460
461    public function trySpliceAt(&$args, &$position) {
462        switch ($this->fixing()) {
463            case Fixing::None:
464                if ($args[$position]->definition() == $this->tokenDefs()[0]) {
465                    $position = self::_spliceNone($args, $position, $this);
466                    return true;
467                }
468                break;
469            case Fixing::Prefix:
470                if ($this->arity() < 0) {
471                    $chainLength = self::_getLongestAlternateChain($args, $position, $this->tokenDefs()[0]);
472                    if ($chainLength > 0) {
473                        $position = self::_splicePrefix($args, $position, $chainLength, $this);
474                        return true;
475                    }
476                } else if (count($this->tokenDefs()) == 1) {
477                    $chainLength = self::_getLongestAlternateChain($args, $position, $this->tokenDefs()[0], $this->arity());
478                    if ($chainLength == $this->arity()) {
479                        $position = self::_splicePrefix($args, $position, $chainLength, $this);
480                        return true;
481                    }
482                } else if (self::_isMatchingAlternateChain($args, $position, $this->tokenDefs())) {
483                    $position = self::_splicePrefix($args, $position, $this->arity(), $this);
484                    return true;
485                }
486                break;
487            case Fixing::Postfix:
488                if ($this->arity() < 0) {
489                    $chainLength = self::_getLongestAlternateChain($args, $position + 1, $this->tokenDefs()[0]);
490                    if ($chainLength > 0) {
491                        $position = self::_splicePostfix($args, $position + 1, $chainLength, $this);
492                        return true;
493                    }
494                } else if (count($this->tokenDefs()) == 1) {
495                    $chainLength = self::_getLongestAlternateChain($args, $position + 1, $this->tokenDefs()[0], $this->arity());
496                    if ($chainLength == $this->arity()) {
497                        $position = self::_splicePostfix($args, $position + 1, $chainLength, $this);
498                        return true;
499                    }
500                } else if (self::_isMatchingAlternateChain($args, $position + 1, $this->tokenDefs())) {
501                    $position = self::_splicePostfix($args, $position + 1, $this->arity(), $this);
502                    return true;
503                }
504                break;
505            case Fixing::Infix:
506                if ($this->arity() < 0) {
507                    $chainLength = self::_getLongestAlternateChain($args, $position + 1, $this->tokenDefs()[0]);
508                    if ($chainLength > 0) {
509                        $position = self::_spliceInfix($args, $position + 1, $chainLength, $this);
510                        return true;
511                    }
512                } else if (count($this->tokenDefs()) == 1) {
513                    $chainLength = self::_getLongestAlternateChain($args, $position + 1, $this->tokenDefs()[0], $this->arity());
514                    if ($chainLength == $this->arity() - 1) {
515                        $position = self::_spliceInfix($args, $position + 1, $chainLength, $this);
516                        return true;
517                    }
518                } else if (self::_isMatchingAlternateChain($args, $position + 1, $this->tokenDefs())) {
519                    $position = self::_spliceInfix($args, $position + 1, $this->arity() - 1, $this);
520                    return true;
521                }
522                break;
523            case Fixing::Wrap:
524                $sequenceLength = self::_getWrappedSequence($args, $position, $this->tokenDefs(), $this->nested());
525                if ($sequenceLength >= 2) {
526                    $position = self::_spliceWrap($args, $position, $sequenceLength, $this);
527                    return true;
528                } elseif ($sequenceLength == 1) {
529                    throw new UnmatchedWrapperException($this, $args[$position]);
530                }
531                break;
532        }
533    }
534
535    public function spliceInstancesIn(&$args) {
536        $somethingHappened = false;
537        for ($i = 0; $i < count($args); ++$i) {
538            if ($this->trySpliceAt($args, $i)) {
539                $somethingHappened = true;
540            }
541        }
542        return $somethingHappened;
543    }
544
545    public function _evaluateWellFormed($elmInstance) {
546        return null;
547    }
548
549    public function evaluate($elmInstance) {
550        $this->ensureWellFormed($elmInstance);
551        return $this->_evaluateWellFormed($elmInstance);
552    }
553
554    public function getTokenDefRepr($idx) {
555        if ($idx < count($this->tokenDefs())) {
556            return $this->tokenDefs()[$idx]->representation();
557        } elseif (count($this->tokenDefs()) == 1) {
558            return $this->tokenDefs()[0]->representation();
559        }
560        return '';
561    }
562
563    public function getRepresentation($elmInstance) {
564        $this->ensureWellFormed($elmInstance);
565        $argsRepr = array();
566        foreach ($elmInstance->args() as $arg) {
567            if ($arg instanceof TokenInstance) {
568                $argsRepr[] = $arg->match();
569            } else {
570                $argsRepr[] = $arg->getRepresentation();
571            }
572        }
573        $pieces = array();
574        if ($this->fixing() == Fixing::Prefix || $this->fixing() == Fixing::Wrap) {
575            $pieces[] = $this->getTokenDefRepr(0);
576        }
577        $argIdx = ($this->fixing() == Fixing::Postfix || $this->fixing() == Fixing::Infix ? -1 : 0);
578        foreach ($argsRepr as $argRepr) {
579            $pieces[] = $argRepr;
580            ++$argIdx;
581            if ($this->fixing() == Fixing::None && $this->fixing() == Fixing::Wrap) {
582                continue;
583            }
584            if ($argIdx >= count($argsRepr) || ($this->fixing() == Fixing::Infix && $argIdx >= count($argsRepr) - 1)) {
585                continue;
586            }
587            $pieces[] = $this->getTokenDefRepr($argIdx);
588        }
589        if ($this->fixing() == Fixing::Wrap) {
590            $pieces[] = $this->getTokenDefRepr(1);
591        }
592        return implode('', $pieces);
593    }
594
595    static public function extractUsedTokens($elmDefs) {
596        $retval = array();
597        foreach ($elmDefs as $elmDef) {
598            $retval = array_merge($retval, $elmDef->tokenDefs());
599        }
600        return $retval;
601    }
602
603    public function ensureWellFormed($elmInstance) {
604        if ($elmInstance->definition() != $this) {
605            throw new LogicException('This instance is not an instance of the given definition.');
606        }
607        $args = $elmInstance->args();
608        // Check arity
609        switch ($this->fixing()) {
610            case Fixing::None:
611                if (count($args) != 1) {
612                    throw new MalformedExpressionException($elmInstance, 'Expected exactly one token.');
613                } else if (!($args[0] instanceof TokenInstance)) {
614                    throw new MalformedExpressionException($elmInstance, 'Expected a token instance.');
615                } else if ($args[0]->definition() != $this->tokenDefs()[0]) {
616                    throw new MalformedExpressionException($elmInstance, 'Expected a token of type ' . $this->tokenDefs()[0]->name());
617                }
618                break;
619            case Fixing::Prefix:
620            case Fixing::Postfix:
621            case Fixing::Infix:
622                if ($this->arity() < 0) { // Any number of tokens, > 0
623                    if (count($args) < 1 && $this->fixing() == Fixing::Infix) {
624                        throw new MalformedExpressionException($elmInstance, 'Expected at least two arguments.');
625                    } elseif (count($args) == 0) {
626                        throw new MalformedExpressionException($elmInstance, 'Expected at least one argument.');
627                    }
628                } elseif (count($args) != $this->arity()) {
629                    throw new MalformedExpressionException($elmInstance, 'Expected exactly ' . $this->arity() . ' arguments.');
630                }
631                break;
632        }
633    }
634}
635
636function parse(array $tokInsts, array $elmDefs) {
637    usort($elmDefs, function ($a, $b) { return $a->priority() - $b->priority(); });
638    $root = new ElementInstance(null, $tokInsts);
639    foreach ($elmDefs as $elmDef) {
640        $root->expand($elmDef);
641    }
642    if (!$root->isExpanded()) {
643        throw new StrayTokenException($root->findUnexpandedToken());
644    }
645    return $root;
646}
647
648?>