1<?php 2 3/** 4 * Hoa 5 * 6 * 7 * @license 8 * 9 * New BSD License 10 * 11 * Copyright © 2007-2017, Hoa community. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions are met: 15 * * Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * * Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * * Neither the name of the Hoa nor the names of its contributors may be 21 * used to endorse or promote products derived from this software without 22 * specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE 28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37namespace Hoa\Compiler\Llk; 38 39use Hoa\Compiler; 40use Hoa\Consistency; 41use Hoa\Stream; 42 43/** 44 * Class \Hoa\Compiler\Llk. 45 * 46 * This class provides a set of static helpers to manipulate (load and save) a 47 * compiler more easily. 48 * 49 * @copyright Copyright © 2007-2017 Hoa community 50 * @license New BSD License 51 */ 52abstract class Llk 53{ 54 /** 55 * Load in-memory parser from a grammar description file. 56 * The grammar description language is PP. See 57 * `hoa://Library/Compiler/Llk/Llk.pp` for an example, or the documentation. 58 * 59 * @param \Hoa\Stream\IStream\In $stream Stream to read to grammar. 60 * @return \Hoa\Compiler\Llk\Parser 61 * @throws \Hoa\Compiler\Exception 62 */ 63 public static function load(Stream\IStream\In $stream) 64 { 65 $pp = $stream->readAll(); 66 67 if (empty($pp)) { 68 $message = 'The grammar is empty'; 69 70 if ($stream instanceof Stream\IStream\Pointable) { 71 if (0 < $stream->tell()) { 72 $message .= 73 ': The stream ' . $stream->getStreamName() . 74 ' is pointable and not rewinded, maybe it ' . 75 'could be the reason'; 76 } else { 77 $message .= 78 ': Nothing to read on the stream ' . 79 $stream->getStreamName(); 80 } 81 } 82 83 throw new Compiler\Exception($message . '.', 0); 84 } 85 86 static::parsePP($pp, $tokens, $rawRules, $pragmas, $stream->getStreamName()); 87 88 $ruleAnalyzer = new Rule\Analyzer($tokens); 89 $rules = $ruleAnalyzer->analyzeRules($rawRules); 90 91 return new Parser($tokens, $rules, $pragmas); 92 } 93 94 /** 95 * Save in-memory parser to PHP code. 96 * The generated PHP code will load the same in-memory parser. The state 97 * will be reset. The parser will be saved as a class, named after 98 * `$className`. To retrieve the parser, one must instanciate this class. 99 * 100 * @param \Hoa\Compiler\Llk\Parser $parser Parser to save. 101 * @param string $className Parser classname. 102 * @return string 103 */ 104 public static function save(Parser $parser, $className) 105 { 106 $out = null; 107 $outTokens = null; 108 $outRules = null; 109 $outPragmas = null; 110 $outExtra = null; 111 112 $escapeRuleName = function ($ruleName) use ($parser) { 113 if (true == $parser->getRule($ruleName)->isTransitional()) { 114 return $ruleName; 115 } 116 117 return '\'' . $ruleName . '\''; 118 }; 119 120 foreach ($parser->getTokens() as $namespace => $tokens) { 121 $outTokens .= ' \'' . $namespace . '\' => [' . "\n"; 122 123 foreach ($tokens as $tokenName => $tokenValue) { 124 $outTokens .= 125 ' \'' . $tokenName . '\' => \'' . 126 str_replace( 127 ['\'', '\\\\'], 128 ['\\\'', '\\\\\\'], 129 $tokenValue 130 ) . '\',' . "\n"; 131 } 132 133 $outTokens .= ' ],' . "\n"; 134 } 135 136 foreach ($parser->getRules() as $rule) { 137 $arguments = []; 138 139 // Name. 140 $arguments['name'] = $escapeRuleName($rule->getName()); 141 142 if ($rule instanceof Rule\Token) { 143 // Token name. 144 $arguments['tokenName'] = '\'' . $rule->getTokenName() . '\''; 145 } else { 146 if ($rule instanceof Rule\Repetition) { 147 // Minimum. 148 $arguments['min'] = $rule->getMin(); 149 150 // Maximum. 151 $arguments['max'] = $rule->getMax(); 152 } 153 154 // Children. 155 $ruleChildren = $rule->getChildren(); 156 157 if (null === $ruleChildren) { 158 $arguments['children'] = 'null'; 159 } elseif (false === is_array($ruleChildren)) { 160 $arguments['children'] = $escapeRuleName($ruleChildren); 161 } else { 162 $arguments['children'] = 163 '[' . 164 implode(', ', array_map($escapeRuleName, $ruleChildren)) . 165 ']'; 166 } 167 } 168 169 // Node ID. 170 $nodeId = $rule->getNodeId(); 171 172 if (null === $nodeId) { 173 $arguments['nodeId'] = 'null'; 174 } else { 175 $arguments['nodeId'] = '\'' . $nodeId . '\''; 176 } 177 178 if ($rule instanceof Rule\Token) { 179 // Unification. 180 $arguments['unification'] = $rule->getUnificationIndex(); 181 182 // Kept. 183 $arguments['kept'] = $rule->isKept() ? 'true' : 'false'; 184 } 185 186 // Default node ID. 187 if (null !== $defaultNodeId = $rule->getDefaultId()) { 188 $defaultNodeOptions = $rule->getDefaultOptions(); 189 190 if (!empty($defaultNodeOptions)) { 191 $defaultNodeId .= ':' . implode('', $defaultNodeOptions); 192 } 193 194 $outExtra .= 195 "\n" . 196 ' $this->getRule(' . $arguments['name'] . ')->setDefaultId(' . 197 '\'' . $defaultNodeId . '\'' . 198 ');'; 199 } 200 201 // PP representation. 202 if (null !== $ppRepresentation = $rule->getPPRepresentation()) { 203 $outExtra .= 204 "\n" . 205 ' $this->getRule(' . $arguments['name'] . ')->setPPRepresentation(' . 206 '\'' . str_replace('\'', '\\\'', $ppRepresentation) . '\'' . 207 ');'; 208 } 209 210 $outRules .= 211 "\n" . 212 ' ' . $arguments['name'] . ' => new \\' . get_class($rule) . '(' . 213 implode(', ', $arguments) . 214 '),'; 215 } 216 217 foreach ($parser->getPragmas() as $pragmaName => $pragmaValue) { 218 $outPragmas .= 219 "\n" . 220 ' \'' . $pragmaName . '\' => ' . 221 (is_bool($pragmaValue) 222 ? (true === $pragmaValue ? 'true' : 'false') 223 : (is_int($pragmaValue) 224 ? $pragmaValue 225 : '\'' . $pragmaValue . '\'')) . 226 ','; 227 } 228 229 $out .= 230 'class ' . $className . ' extends \Hoa\Compiler\Llk\Parser' . "\n" . 231 '{' . "\n" . 232 ' public function __construct()' . "\n" . 233 ' {' . "\n" . 234 ' parent::__construct(' . "\n" . 235 ' [' . "\n" . 236 $outTokens . 237 ' ],' . "\n" . 238 ' [' . 239 $outRules . "\n" . 240 ' ],' . "\n" . 241 ' [' . 242 $outPragmas . "\n" . 243 ' ]' . "\n" . 244 ' );' . "\n" . 245 $outExtra . "\n" . 246 ' }' . "\n" . 247 '}' . "\n"; 248 249 return $out; 250 } 251 252 /** 253 * Parse the grammar description language. 254 * 255 * @param string $pp Grammar description. 256 * @param array $tokens Extracted tokens. 257 * @param array $rules Extracted raw rules. 258 * @param array $pragmas Extracted raw pragmas. 259 * @param string $streamName The name of the stream containing the grammar. 260 * @return void 261 * @throws \Hoa\Compiler\Exception 262 */ 263 public static function parsePP($pp, &$tokens, &$rules, &$pragmas, $streamName) 264 { 265 $lines = explode("\n", $pp); 266 $pragmas = []; 267 $tokens = ['default' => []]; 268 $rules = []; 269 270 for ($i = 0, $m = count($lines); $i < $m; ++$i) { 271 $line = rtrim($lines[$i]); 272 273 if (0 === strlen($line) || '//' == substr($line, 0, 2)) { 274 continue; 275 } 276 277 if ('%' == $line[0]) { 278 if (0 !== preg_match('#^%pragma\h+([^\h]+)\h+(.*)$#u', $line, $matches)) { 279 switch ($matches[2]) { 280 case 'true': 281 $pragmaValue = true; 282 283 break; 284 285 case 'false': 286 $pragmaValue = false; 287 288 break; 289 290 default: 291 if (true === ctype_digit($matches[2])) { 292 $pragmaValue = intval($matches[2]); 293 } else { 294 $pragmaValue = $matches[2]; 295 } 296 } 297 298 $pragmas[$matches[1]] = $pragmaValue; 299 } elseif (0 !== preg_match('#^%skip\h+(?:([^:]+):)?([^\h]+)\h+(.*)$#u', $line, $matches)) { 300 if (empty($matches[1])) { 301 $matches[1] = 'default'; 302 } 303 304 if (!isset($tokens[$matches[1]])) { 305 $tokens[$matches[1]] = []; 306 } 307 308 if (!isset($tokens[$matches[1]]['skip'])) { 309 $tokens[$matches[1]]['skip'] = $matches[3]; 310 } else { 311 $tokens[$matches[1]]['skip'] = 312 '(?:' . 313 $tokens[$matches[1]]['skip'] . '|' . 314 $matches[3] . 315 ')'; 316 } 317 } elseif (0 !== preg_match('#^%token\h+(?:([^:]+):)?([^\h]+)\h+(.*?)(?:\h+->\h+(.*))?$#u', $line, $matches)) { 318 if (empty($matches[1])) { 319 $matches[1] = 'default'; 320 } 321 322 if (isset($matches[4]) && !empty($matches[4])) { 323 $matches[2] = $matches[2] . ':' . $matches[4]; 324 } 325 326 if (!isset($tokens[$matches[1]])) { 327 $tokens[$matches[1]] = []; 328 } 329 330 $tokens[$matches[1]][$matches[2]] = $matches[3]; 331 } else { 332 throw new Compiler\Exception( 333 'Unrecognized instructions:' . "\n" . 334 ' %s' . "\n" . 'in file %s at line %d.', 335 1, 336 [ 337 $line, 338 $streamName, 339 $i + 1 340 ] 341 ); 342 } 343 344 continue; 345 } 346 347 $ruleName = substr($line, 0, -1); 348 $rule = null; 349 ++$i; 350 351 while ($i < $m && 352 isset($lines[$i][0]) && 353 (' ' === $lines[$i][0] || 354 "\t" === $lines[$i][0] || 355 '//' === substr($lines[$i], 0, 2))) { 356 if ('//' === substr($lines[$i], 0, 2)) { 357 ++$i; 358 359 continue; 360 } 361 362 $rule .= ' ' . trim($lines[$i++]); 363 } 364 365 if (isset($lines[$i][0])) { 366 --$i; 367 } 368 369 $rules[$ruleName] = $rule; 370 } 371 372 return; 373 } 374} 375 376/** 377 * Flex entity. 378 */ 379Consistency::flexEntity('Hoa\Compiler\Llk\Llk'); 380