1<?php 2 3/** 4 * Hoa 5 * 6 * 7 * @license 8 * 9 * New BSD License 10 * 11 * Copyright © 2007-2017, Hoa community. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions are met: 15 * * Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * * Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * * Neither the name of the Hoa nor the names of its contributors may be 21 * used to endorse or promote products derived from this software without 22 * specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE 28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37namespace Hoa\Compiler\Test\Unit\Llk; 38 39use Hoa\Compiler as LUT; 40use Hoa\Compiler\Llk\Lexer as SUT; 41use Hoa\Test; 42 43/** 44 * Class \Hoa\Compiler\Test\Unit\Llk\Lexer. 45 * 46 * Test suite of the lexer class. 47 * 48 * @copyright Copyright © 2007-2017 Hoa community 49 * @license New BSD License 50 */ 51class Lexer extends Test\Unit\Suite 52{ 53 public function case_is_a_generator() 54 { 55 $this 56 ->given( 57 $lexer = new SUT(), 58 $datum = 'abc', 59 $tokens = [ 60 'default' => [ 61 'abc'=> 'abc' 62 ] 63 ] 64 ) 65 ->when($result = $lexer->lexMe($datum, $tokens)) 66 ->then 67 ->object($result) 68 ->isInstanceOf(\Generator::class); 69 } 70 71 public function case_last_token_is_EOF() 72 { 73 $this 74 ->given( 75 $lexer = new SUT(), 76 $datum = 'ghidefabc', 77 $tokens = [ 78 'default' => [ 79 'abc' => 'abc', 80 'def' => 'def', 81 'tail' => '\w{3}' 82 ] 83 ] 84 ) 85 ->when($result = $lexer->lexMe($datum, $tokens)) 86 ->then 87 ->object($result) 88 ->isInstanceOf(\Generator::class) 89 ->array($result->current()) 90 ->isEqualTo([ 91 'token' => 'tail', 92 'value' => 'ghi', 93 'length' => 3, 94 'namespace' => 'default', 95 'keep' => true, 96 'offset' => 0 97 ]) 98 ->let($result->next()) 99 ->array($result->current()) 100 ->isEqualTo([ 101 'token' => 'def', 102 'value' => 'def', 103 'length' => 3, 104 'namespace' => 'default', 105 'keep' => true, 106 'offset' => 3 107 ]) 108 ->let($result->next()) 109 ->array($result->current()) 110 ->isEqualTo([ 111 'token' => 'abc', 112 'value' => 'abc', 113 'length' => 3, 114 'namespace' => 'default', 115 'keep' => true, 116 'offset' => 6 117 ]) 118 ->let($result->next()) 119 ->array($result->current()) 120 ->isEqualTo([ 121 'token' => 'EOF', 122 'value' => 'EOF', 123 'length' => 0, 124 'namespace' => 'default', 125 'keep' => true, 126 'offset' => 9 127 ]) 128 ->let($result->next()) 129 ->variable($result->current()) 130 ->isNull(); 131 } 132 133 public function case_unrecognized_token() 134 { 135 $this 136 ->given( 137 $lexer = new SUT(), 138 $datum = 'abczdef', 139 $tokens = [ 140 'default' => [ 141 'abc' => 'abc', 142 'def' => 'def' 143 ] 144 ] 145 ) 146 ->when($result = $lexer->lexMe($datum, $tokens)) 147 ->then 148 ->array($result->current()) 149 ->isEqualTo([ 150 'token' => 'abc', 151 'value' => 'abc', 152 'length' => 3, 153 'namespace' => 'default', 154 'keep' => true, 155 'offset' => 0 156 ]) 157 ->exception(function () use ($result) { 158 $result->next(); 159 }) 160 ->isInstanceOf(LUT\Exception\UnrecognizedToken::class) 161 ->hasMessage( 162 'Unrecognized token "z" at line 1 and column 4:' . "\n" . 163 'abczdef' . "\n" . 164 ' ↑' 165 ); 166 } 167 168 public function case_namespace() 169 { 170 $this 171 ->given( 172 $lexer = new SUT(), 173 $datum = 'abcdefghiabc', 174 $tokens = [ 175 'default' => ['abc:one' => 'abc'], 176 'one' => ['def:two' => 'def'], 177 'two' => ['ghi:default' => 'ghi'] 178 ] 179 ) 180 ->when($result = $lexer->lexMe($datum, $tokens)) 181 ->then 182 ->array(iterator_to_array($result)) 183 ->isEqualTo([ 184 [ 185 'token' => 'abc', 186 'value' => 'abc', 187 'length' => 3, 188 'namespace' => 'default', 189 'keep' => true, 190 'offset' => 0 191 ], 192 [ 193 'token' => 'def', 194 'value' => 'def', 195 'length' => 3, 196 'namespace' => 'one', 197 'keep' => true, 198 'offset' => 3 199 ], 200 [ 201 'token' => 'ghi', 202 'value' => 'ghi', 203 'length' => 3, 204 'namespace' => 'two', 205 'keep' => true, 206 'offset' => 6 207 ], 208 [ 209 'token' => 'abc', 210 'value' => 'abc', 211 'length' => 3, 212 'namespace' => 'default', 213 'keep' => true, 214 'offset' => 9 215 ], 216 [ 217 'token' => 'EOF', 218 'value' => 'EOF', 219 'length' => 0, 220 'namespace' => 'default', 221 'keep' => true, 222 'offset' => 12 223 ] 224 ]); 225 } 226 227 public function case_namespace_with_shift() 228 { 229 $this 230 ->given( 231 $lexer = new SUT(), 232 $datum = 'abcdefghiabc', 233 $tokens = [ 234 'default' => ['abc:one' => 'abc'], 235 'one' => ['def:two' => 'def'], 236 'two' => ['ghi:__shift__ * 2' => 'ghi'] 237 ] 238 ) 239 ->when($result = $lexer->lexMe($datum, $tokens)) 240 ->then 241 ->array(iterator_to_array($result)) 242 ->isEqualTo([ 243 [ 244 'token' => 'abc', 245 'value' => 'abc', 246 'length' => 3, 247 'namespace' => 'default', 248 'keep' => true, 249 'offset' => 0 250 ], 251 [ 252 'token' => 'def', 253 'value' => 'def', 254 'length' => 3, 255 'namespace' => 'one', 256 'keep' => true, 257 'offset' => 3 258 ], 259 [ 260 'token' => 'ghi', 261 'value' => 'ghi', 262 'length' => 3, 263 'namespace' => 'two', 264 'keep' => true, 265 'offset' => 6 266 ], 267 [ 268 'token' => 'abc', 269 'value' => 'abc', 270 'length' => 3, 271 'namespace' => 'default', 272 'keep' => true, 273 'offset' => 9 274 ], 275 [ 276 'token' => 'EOF', 277 'value' => 'EOF', 278 'length' => 0, 279 'namespace' => 'default', 280 'keep' => true, 281 'offset' => 12 282 ] 283 ]); 284 } 285 286 public function case_namespace_shift_too_much() 287 { 288 $this 289 ->given( 290 $lexer = new SUT(), 291 $datum = 'abcdefghiabc', 292 $tokens = [ 293 'default' => ['abc:__shift__' => 'abc'] 294 ] 295 ) 296 ->when($result = $lexer->lexMe($datum, $tokens)) 297 ->then 298 ->exception(function () use ($result) { 299 $result->next(); 300 }) 301 ->isInstanceOf(LUT\Exception\Lexer::class) 302 ->hasMessage( 303 'Cannot shift namespace 1-times, from token abc ' . 304 'in namespace default, because the stack contains ' . 305 'only 0 namespaces.' 306 ); 307 } 308 309 public function case_namespace_does_not_exist() 310 { 311 $this 312 ->given( 313 $lexer = new SUT(), 314 $datum = 'abcdef', 315 $tokens = [ 316 'default' => [ 317 'abc:foo' => 'abc', 318 'def' => 'def' 319 ] 320 ] 321 ) 322 ->when($result = $lexer->lexMe($datum, $tokens)) 323 ->then 324 ->exception(function () use ($result) { 325 $result->next(); 326 }) 327 ->isInstanceOf(LUT\Exception\Lexer::class) 328 ->hasMessage( 329 'Namespace foo does not exist, called by token abc ' . 330 'in namespace default.' 331 ); 332 } 333 334 public function case_skip() 335 { 336 $this 337 ->given( 338 $lexer = new SUT(), 339 $datum = 'abc def ghi abc', 340 $tokens = [ 341 'default' => [ 342 'skip' => '\s+', 343 'abc' => 'abc', 344 'def' => 'def', 345 'ghi' => 'ghi' 346 ] 347 ] 348 ) 349 ->when($result = $lexer->lexMe($datum, $tokens)) 350 ->then 351 ->array(iterator_to_array($result)) 352 ->isEqualTo([ 353 [ 354 'token' => 'abc', 355 'value' => 'abc', 356 'length' => 3, 357 'namespace' => 'default', 358 'keep' => true, 359 'offset' => 0 360 ], 361 [ 362 'token' => 'def', 363 'value' => 'def', 364 'length' => 3, 365 'namespace' => 'default', 366 'keep' => true, 367 'offset' => 4 368 ], 369 [ 370 'token' => 'ghi', 371 'value' => 'ghi', 372 'length' => 3, 373 'namespace' => 'default', 374 'keep' => true, 375 'offset' => 10 376 ], 377 [ 378 'token' => 'abc', 379 'value' => 'abc', 380 'length' => 3, 381 'namespace' => 'default', 382 'keep' => true, 383 'offset' => 15 384 ], 385 [ 386 'token' => 'EOF', 387 'value' => 'EOF', 388 'length' => 0, 389 'namespace' => 'default', 390 'keep' => true, 391 'offset' => 18 392 ] 393 ]); 394 } 395 396 public function case_match_empty_lexeme() 397 { 398 $this 399 ->given( 400 $lexer = new SUT(), 401 $datum = 'abcdef', 402 $tokens = [ 403 'default' => [ 404 'abc' => '\d?', 405 'def' => 'def' 406 ] 407 ] 408 ) 409 ->when($result = $lexer->lexMe($datum, $tokens)) 410 ->then 411 ->exception(function () use ($result) { 412 $result->next(); 413 }) 414 ->isInstanceOf(LUT\Exception\Lexer::class) 415 ->hasMessage( 416 'A lexeme must not match an empty value, which is ' . 417 'the case of "abc" (\d?).' 418 ); 419 } 420 421 public function case_unicode_enabled_by_default() 422 { 423 $this 424 ->given( 425 $lexer = new SUT(), 426 $datum = '…ß', 427 $tokens = [ 428 'default' => [ 429 'foo' => '…', 430 'bar' => '\w' 431 ] 432 ] 433 ) 434 ->when($result = $lexer->lexMe($datum, $tokens)) 435 ->then 436 ->array(iterator_to_array($result)) 437 ->isEqualTo([ 438 [ 439 'token' => 'foo', 440 'value' => '…', 441 'length' => 1, 442 'namespace' => 'default', 443 'keep' => true, 444 'offset' => 0 445 ], 446 [ 447 'token' => 'bar', 448 'value' => 'ß', 449 'length' => 1, 450 'namespace' => 'default', 451 'keep' => true, 452 'offset' => 3 453 ], 454 [ 455 'token' => 'EOF', 456 'value' => 'EOF', 457 'length' => 0, 458 'namespace' => 'default', 459 'keep' => true, 460 'offset' => 5 461 ] 462 ]); 463 } 464 465 public function case_unicode_disabled() 466 { 467 $this 468 ->given( 469 $lexer = new SUT(['lexer.unicode' => false]), 470 $datum = '…ß', 471 $tokens = [ 472 'default' => [ 473 'foo' => '…', 474 'bar' => '\w' 475 ] 476 ] 477 ) 478 ->when($result = $lexer->lexMe($datum, $tokens)) 479 ->then 480 ->array($result->current()) 481 ->isEqualTo([ 482 'token' => 'foo', 483 'value' => '…', 484 'length' => 1, 485 'namespace' => 'default', 486 'keep' => true, 487 'offset' => 0 488 ]) 489 ->exception(function () use ($result) { 490 $result->next(); 491 }) 492 ->isInstanceOf(LUT\Exception\UnrecognizedToken::class) 493 ->hasMessage( 494 'Unrecognized token "ß" at line 1 and column 4:' . "\n" . 495 '…ß' . "\n" . 496 ' ↑' 497 ); 498 } 499} 500