0, it means a link to a sub-automata (sub-automata index + 1). * * When we write our consume() method, it's just a simple switch receiving * an action. It receives only character. It's like a “goto” in our * compiler, and allows us to execute code when skiming through the graph. * * Negative/special actions are used to auto-fill or empty buffers. * E.g: -1 will fill the buffer 0, -2 will empty the buffer 0, * -3 will fill the buffer 1, -4 will empty the buffer 1, * -5 will fill the buffer 2, -6 will empty the buffer 2 etc. * A formula appears: * y = |x| * fill buffer (x - 2) / 2 if x & 1 = 1 * empty buffer (x - 1) / 2 if x & 1 = 0 * * Positive/link actions are used to make an epsilon-transition (or a link) * between two sub-automata. * Sub-automata are indexed from 0, but our links must be index + 1. Example * given: the sub-automata 0 in our example has a link to the sub-automata 1 * through OK[{] = 2. Take attention to this :-). * And another thing which must be carefully studying is the place of the * link. For example, with our sub-automata 1 (the big one), we have an * epsilon-transition to the sub-automata 2 through VA["] = 3. It means: * when we arrived in the state VA from the token ", we go in the * sub-automata 3 (the 2nd one actually). And when the linked sub-automata * has finished, we are back in our state and continue our parsing. Take * care of this :-). * * Finally, it is possible to combine positive and char action, separated by a comma. Thus: 7,f is equivalent to make an epsilon-transition to the * automata 7, then consume the action f. * * @var array */ protected $_actions = []; /** * Names of automata. */ protected $_names = []; /** * Recursive stack. * * @var array */ private $_stack = []; /** * Buffers. * * @var array */ protected $buffers = []; /** * Current token's line. * * @var int */ protected $line = 0; /** * Current token's column. * * @var int */ protected $column = 0; /** * Cache compiling result. * * @var array */ protected static $_cache = []; /** * Whether cache is enabled or not. * * @var bool */ protected static $_cacheEnabled = true; /** * Singleton, and set parameters. * * @param array $skip Skip. * @param array $tokens Tokens. * @param array $states States. * @param array $terminal Terminal states. * @param array $transitions Transitions table. * @param array $actions Actions table. * @param array $names Names of automata. */ public function __construct( array $skip, array $tokens, array $states, array $terminal, array $transitions, array $actions, array $names = [] ) { $this->setSkip($skip); $this->setTokens($tokens); $this->setStates($states); $this->setTerminal($terminal); $this->setTransitions($transitions); $this->setActions($actions); $this->setNames($names); return; } /** * Compile a source code. * * @param string $in Source code. * @return void * @throws \Hoa\Compiler\Exception\FinalStateHasNotBeenReached * @throws \Hoa\Compiler\Exception\IllegalToken */ public function compile($in) { $cacheId = md5($in); if (true === self::$_cacheEnabled && true === array_key_exists($cacheId, self::$_cache)) { return self::$_cache[$cacheId]; } $d = 0; $c = 0; // current automata. $_skip = array_flip($this->_skip); $_tokens = array_flip($this->_tokens[$c]); $_states = array_flip($this->_states[$c]); $_actions = [$c => 0]; $nextChar = null; $nextToken = 0; $nextState = $_states['GO']; $nextAction = $_states['GO']; $this->line = $this->getInitialLine(); $this->column = 0; $this->buffers = []; $line = $this->line; $column = $this->column; $this->pre($in); for ($i = 0, $max = strlen($in); $i <= $max; $i++) { //echo "\n---\n\n"; // End of parsing (not automata). if ($i == $max) { while ($c > 0 && in_array($this->_states[$c][$nextState], $this->_terminal[$c])) { list($c, $nextState, ) = array_pop($this->_stack); } if (in_array($this->_states[$c][$nextState], $this->_terminal[$c]) && 0 === $c && true === $this->end()) { //echo '*********** END REACHED **********' . "\n"; if (true === self::$_cacheEnabled) { self::$_cache[$cacheId] = $this->getResult(); } return true; } throw new Exception\FinalStateHasNotBeenReached( 'End of code has been reached but not correctly; ' . 'maybe your program is not complete?', 0 ); } $nextChar = $in[$i]; // Skip. if (isset($_skip[$nextChar])) { if ("\n" === $nextChar) { $line++; $column = 0; } else { $column++; } continue; } else { $continue = false; $handle = substr($in, $i); foreach ($_skip as $sk => $e) { if ($sk[0] != '#') { continue; } $sk = str_replace('#', '\#', substr($sk, 1)); if (0 != preg_match('#^(' . $sk . ')#u', $handle, $match)) { $strlen = strlen($match[1]); if ($strlen > 0) { if (false !== $offset = strrpos($match[1], "\n")) { $column = $strlen - $offset - 1; } else { $column += $strlen; } $line += substr_count($match[1], "\n"); $i += $strlen - 1; $continue = true; break; } } } if (true === $continue) { continue; } } // Epsilon-transition. $epsilon = false; while (array_key_exists($nextToken, $this->_actions[$c][$nextState]) && ( ( is_array($this->_actions[$c][$nextState][$nextToken]) && 0 < $foo = $this->_actions[$c][$nextState][$nextToken][0] ) || ( is_int($this->_actions[$c][$nextState][$nextToken]) && 0 < $foo = $this->_actions[$c][$nextState][$nextToken] ) ) ) { $epsilon = true; if ($_actions[$c] == 0) { //echo '*** Change automata (up to ' . ($foo - 1) . ')' . "\n"; $this->_stack[$d] = [$c, $nextState, $nextToken]; end($this->_stack); $c = $foo - 1; $_tokens = array_flip($this->_tokens[$c]); $_states = array_flip($this->_states[$c]); $nextState = $_states['GO']; $nextAction = $_states['GO']; $nextToken = 0; $_actions[$c] = 0; $d++; } elseif ($_actions[$c] == 2) { $_actions[$c] = 0; break; } } if (true === $epsilon) { $epsilon = false; $nextToken = false; } // Token. if (isset($_tokens[$nextChar])) { $token = $nextChar; $nextToken = $_tokens[$token]; if ("\n" === $nextChar) { $line++; $column = 0; } else { $column++; } } else { $nextToken = false; $handle = substr($in, $i); foreach ($_tokens as $token => $e) { if ('#' !== $token[0]) { continue; } $ntoken = str_replace('#', '\#', substr($token, 1)); if (0 != preg_match('#^(' . $ntoken . ')#u', $handle, $match)) { $strlen = strlen($match[1]); if ($strlen > 0) { if (false !== $offset = strrpos($match[1], "\n")) { $column = $strlen - $offset - 1; } else { $column += $strlen; } $nextChar = $match[1]; $nextToken = $e; $i += $strlen - 1; $line += substr_count($match[1], "\n"); break; } } } } /* echo '>>> Automata ' . $c . "\n" . '>>> Next state ' . $nextState . "\n" . '>>> Token ' . $token . "\n" . '>>> Next char ' . $nextChar . "\n"; */ // Got it! if (false !== $nextToken) { if (is_array($this->_actions[$c][$nextState][$nextToken])) { $nextAction = $this->_actions[$c][$nextState][$nextToken][1]; } else { $nextAction = $this->_actions[$c][$nextState][$nextToken]; } $nextState = $_states[$this->_transitions[$c][$nextState][$nextToken]]; } // Oh :-(. if (false === $nextToken || $nextState === $_states['__']) { $pop = array_pop($this->_stack); $d--; // Go back to a parent automata. if ((in_array($this->_states[$c][$nextState], $this->_terminal[$c]) && null !== $pop) || ($nextState === $_states['__'] && null !== $pop)) { //echo '!!! Change automata (down)' . "\n"; list($c, $nextState, $nextToken) = $pop; $_actions[$c] = 2; $i -= strlen($nextChar); $_tokens = array_flip($this->_tokens[$c]); $_states = array_flip($this->_states[$c]); /* echo '!!! Automata ' . $c . "\n" . '!!! Next state ' . $nextState . "\n"; */ continue; } $error = explode("\n", $in); $error = $error[$this->line]; throw new Exception\IllegalToken( 'Illegal token at line ' . ($this->line + 1) . ' and column ' . ($this->column + 1) . "\n" . $error . "\n" . str_repeat(' ', $this->column) . '↑', 1, [], $this->line + 1, $this->column + 1 ); } $this->line = $line; $this->column = $column; //echo '<<< Next state ' . $nextState . "\n"; $this->buffers[-1] = $nextChar; // Special actions. if ($nextAction < 0) { $buffer = abs($nextAction); if (($buffer & 1) == 0) { $this->buffers[($buffer - 2) / 2] = null; } else { $buffer = ($buffer - 1) / 2; if (!(isset($this->buffers[$buffer]))) { $this->buffers[$buffer] = null; } $this->buffers[$buffer] .= $nextChar; } continue; } if (0 !== $nextAction) { $this->consume($nextAction); } } return; } /** * Consume actions. * Please, see the actions table definition to learn more. * * @param int $action Action. * @return void */ abstract protected function consume($action); /** * Compute source code before compiling it. * * @param string &$in Source code. * @return void */ protected function pre(&$in) { return; } /** * Verify compiler state when ending the source code. * * @return bool */ protected function end() { return true; } /** * Get the result of the compiling. * * @return mixed */ abstract public function getResult(); /** * Set initial line. * * @param int $line Initial line. * @return int */ public function setInitialLine($line) { $old = $this->_initialLine; $this->_initialLine = $line; return $old; } /** * Set tokens to skip. * * @param array $skip Skip. * @return array */ public function setSkip(array $skip) { $old = $this->_skip; $this->_skip = $skip; return $old; } /** * Set tokens. * * @param array $tokens Tokens. * @return array */ public function setTokens(array $tokens) { $old = $this->_tokens; $this->_tokens = $tokens; return $old; } /** * Set states. * * @param array $states States. * @return array */ public function setStates(array $states) { $old = $this->_states; $this->_states = $states; return $old; } /** * Set terminal states. * * @param array $terminal Terminal states. * @return array */ public function setTerminal(array $terminal) { $old = $this->_terminal; $this->_terminal = $terminal; return $old; } /** * Set transitions table. * * @param array $transitions Transitions table. * @return array */ public function setTransitions(array $transitions) { $old = $this->_transitions; $this->_transitions = $transitions; return $old; } /** * Set actions table. * * @param array $actions Actions table. * @return array */ public function setActions(array $actions) { foreach ($actions as $e => $automata) { foreach ($automata as $i => $state) { foreach ($state as $j => $token) { if (0 != preg_match('#^(\d+),(.*)$#', $token, $matches)) { $actions[$e][$i][$j] = [(int) $matches[1], $matches[2]]; } } } } $old = $this->_actions; $this->_actions = $actions; return $old; } /** * Set names of automata. * * @param array $names Names of automata. * @return array */ public function setNames(array $names) { $old = $this->_names; $this->_names = $names; return $old; } /** * Get initial line. * * @return int */ public function getInitialLine() { return $this->_initialLine; } /** * Get skip tokens. * * @return array */ public function getSkip() { return $this->_skip; } /** * Get tokens. * * @return array */ public function getTokens() { return $this->_tokens; } /** * Get states. * * @return array */ public function getStates() { return $this->_states; } /** * Get terminal states. * * @return array */ public function getTerminal() { return $this->_terminal; } /** * Get transitions table. * * @return array */ public function getTransitions() { return $this->_transitions; } /** * Get actions table. * * @return array */ public function getActions() { return $this->_actions; } /** * Get names of automata. * * @return array */ public function getNames() { return $this->_names; } /** * Enable cache * * @return bool */ public static function enableCache() { $old = self::$_cacheEnabled; self::$_cacheEnabled = true; return $old; } /** * Disable cache * * @return bool */ public static function disableCache() { $old = self::$_cacheEnabled; self::$_cacheEnabled = false; return $old; } /** * Transform automatas into DOT language. * * @return void */ public function __toString() { $out = 'digraph ' . str_replace('\\', '', get_class($this)) . ' {' . "\n" . ' rankdir=LR;' . "\n" . ' label="Automata of ' . str_replace('\\', '\\\\', get_class($this)) . '";'; $transitions = array_reverse($this->_transitions, true); foreach ($transitions as $e => $automata) { $out .= "\n\n" . ' subgraph cluster_' . $e . ' {' . "\n" . ' label="Automata #' . $e . (isset($this->_names[$e]) ? ' (' . str_replace('"', '\\"', $this->_names[$e]) . ')' : '') . '";' . "\n"; if (!empty($this->_terminal[$e])) { $out .= ' node[shape=doublecircle] "' . $e . '_' . implode('" "' . $e . '_', $this->_terminal[$e]) . '";' . "\n"; } $out .= ' node[shape=circle];' . "\n"; foreach ($this->_states[$e] as $i => $state) { $name = []; $label = $state; if (__ != $state) { foreach ($this->_transitions[$e][$i] as $j => $foo) { $ep = $this->_actions[$e][$i][$j]; if (is_array($ep)) { $ep = $ep[0]; } if (is_int($ep)) { $ep--; if (0 < $ep && !isset($name[$ep])) { $name[$ep] = $ep; } } } if (!empty($name)) { $label .= ' (' . implode(', ', $name) . ')'; } $out .= ' "' . $e . '_' . $state . '" ' . '[label="' . $label . '"];' . "\n"; } } foreach ($automata as $i => $transition) { $transition = array_reverse($transition, true); foreach ($transition as $j => $state) { if (__ != $this->_states[$e][$i] && __ != $state) { $label = str_replace('\\', '\\\\', $this->_tokens[$e][$j]); $label = str_replace('"', '\\"', $label); if ('#' === $label[0]) { $label = substr($label, 1); } $out .= ' "' . $e . '_' . $this->_states[$e][$i] . '" -> "' . $e . '_' . $state . '"' . ' [label="' . $label . '"];' . "\n"; } } } $out .= ' node[shape=point,label=""] "' . $e . '_";' . "\n" . ' "' . $e . '_" -> "' . $e . '_GO";' . "\n" . ' }'; } $out .= "\n" . '}' . "\n"; return $out; } }