given( $lexer = new SUT(), $datum = 'abc', $tokens = [ 'default' => [ 'abc'=> 'abc' ] ] ) ->when($result = $lexer->lexMe($datum, $tokens)) ->then ->object($result) ->isInstanceOf(\Generator::class); } public function case_last_token_is_EOF() { $this ->given( $lexer = new SUT(), $datum = 'ghidefabc', $tokens = [ 'default' => [ 'abc' => 'abc', 'def' => 'def', 'tail' => '\w{3}' ] ] ) ->when($result = $lexer->lexMe($datum, $tokens)) ->then ->object($result) ->isInstanceOf(\Generator::class) ->array($result->current()) ->isEqualTo([ 'token' => 'tail', 'value' => 'ghi', 'length' => 3, 'namespace' => 'default', 'keep' => true, 'offset' => 0 ]) ->let($result->next()) ->array($result->current()) ->isEqualTo([ 'token' => 'def', 'value' => 'def', 'length' => 3, 'namespace' => 'default', 'keep' => true, 'offset' => 3 ]) ->let($result->next()) ->array($result->current()) ->isEqualTo([ 'token' => 'abc', 'value' => 'abc', 'length' => 3, 'namespace' => 'default', 'keep' => true, 'offset' => 6 ]) ->let($result->next()) ->array($result->current()) ->isEqualTo([ 'token' => 'EOF', 'value' => 'EOF', 'length' => 0, 'namespace' => 'default', 'keep' => true, 'offset' => 9 ]) ->let($result->next()) ->variable($result->current()) ->isNull(); } public function case_unrecognized_token() { $this ->given( $lexer = new SUT(), $datum = 'abczdef', $tokens = [ 'default' => [ 'abc' => 'abc', 'def' => 'def' ] ] ) ->when($result = $lexer->lexMe($datum, $tokens)) ->then ->array($result->current()) ->isEqualTo([ 'token' => 'abc', 'value' => 'abc', 'length' => 3, 'namespace' => 'default', 'keep' => true, 'offset' => 0 ]) ->exception(function () use ($result) { $result->next(); }) ->isInstanceOf(LUT\Exception\UnrecognizedToken::class) ->hasMessage( 'Unrecognized token "z" at line 1 and column 4:' . "\n" . 'abczdef' . "\n" . ' ↑' ); } public function case_namespace() { $this ->given( $lexer = new SUT(), $datum = 'abcdefghiabc', $tokens = [ 'default' => ['abc:one' => 'abc'], 'one' => ['def:two' => 'def'], 'two' => ['ghi:default' => 'ghi'] ] ) ->when($result = $lexer->lexMe($datum, $tokens)) ->then ->array(iterator_to_array($result)) ->isEqualTo([ [ 'token' => 'abc', 'value' => 'abc', 'length' => 3, 'namespace' => 'default', 'keep' => true, 'offset' => 0 ], [ 'token' => 'def', 'value' => 'def', 'length' => 3, 'namespace' => 'one', 'keep' => true, 'offset' => 3 ], [ 'token' => 'ghi', 'value' => 'ghi', 'length' => 3, 'namespace' => 'two', 'keep' => true, 'offset' => 6 ], [ 'token' => 'abc', 'value' => 'abc', 'length' => 3, 'namespace' => 'default', 'keep' => true, 'offset' => 9 ], [ 'token' => 'EOF', 'value' => 'EOF', 'length' => 0, 'namespace' => 'default', 'keep' => true, 'offset' => 12 ] ]); } public function case_namespace_with_shift() { $this ->given( $lexer = new SUT(), $datum = 'abcdefghiabc', $tokens = [ 'default' => ['abc:one' => 'abc'], 'one' => ['def:two' => 'def'], 'two' => ['ghi:__shift__ * 2' => 'ghi'] ] ) ->when($result = $lexer->lexMe($datum, $tokens)) ->then ->array(iterator_to_array($result)) ->isEqualTo([ [ 'token' => 'abc', 'value' => 'abc', 'length' => 3, 'namespace' => 'default', 'keep' => true, 'offset' => 0 ], [ 'token' => 'def', 'value' => 'def', 'length' => 3, 'namespace' => 'one', 'keep' => true, 'offset' => 3 ], [ 'token' => 'ghi', 'value' => 'ghi', 'length' => 3, 'namespace' => 'two', 'keep' => true, 'offset' => 6 ], [ 'token' => 'abc', 'value' => 'abc', 'length' => 3, 'namespace' => 'default', 'keep' => true, 'offset' => 9 ], [ 'token' => 'EOF', 'value' => 'EOF', 'length' => 0, 'namespace' => 'default', 'keep' => true, 'offset' => 12 ] ]); } public function case_namespace_shift_too_much() { $this ->given( $lexer = new SUT(), $datum = 'abcdefghiabc', $tokens = [ 'default' => ['abc:__shift__' => 'abc'] ] ) ->when($result = $lexer->lexMe($datum, $tokens)) ->then ->exception(function () use ($result) { $result->next(); }) ->isInstanceOf(LUT\Exception\Lexer::class) ->hasMessage( 'Cannot shift namespace 1-times, from token abc ' . 'in namespace default, because the stack contains ' . 'only 0 namespaces.' ); } public function case_namespace_does_not_exist() { $this ->given( $lexer = new SUT(), $datum = 'abcdef', $tokens = [ 'default' => [ 'abc:foo' => 'abc', 'def' => 'def' ] ] ) ->when($result = $lexer->lexMe($datum, $tokens)) ->then ->exception(function () use ($result) { $result->next(); }) ->isInstanceOf(LUT\Exception\Lexer::class) ->hasMessage( 'Namespace foo does not exist, called by token abc ' . 'in namespace default.' ); } public function case_skip() { $this ->given( $lexer = new SUT(), $datum = 'abc def ghi abc', $tokens = [ 'default' => [ 'skip' => '\s+', 'abc' => 'abc', 'def' => 'def', 'ghi' => 'ghi' ] ] ) ->when($result = $lexer->lexMe($datum, $tokens)) ->then ->array(iterator_to_array($result)) ->isEqualTo([ [ 'token' => 'abc', 'value' => 'abc', 'length' => 3, 'namespace' => 'default', 'keep' => true, 'offset' => 0 ], [ 'token' => 'def', 'value' => 'def', 'length' => 3, 'namespace' => 'default', 'keep' => true, 'offset' => 4 ], [ 'token' => 'ghi', 'value' => 'ghi', 'length' => 3, 'namespace' => 'default', 'keep' => true, 'offset' => 10 ], [ 'token' => 'abc', 'value' => 'abc', 'length' => 3, 'namespace' => 'default', 'keep' => true, 'offset' => 15 ], [ 'token' => 'EOF', 'value' => 'EOF', 'length' => 0, 'namespace' => 'default', 'keep' => true, 'offset' => 18 ] ]); } public function case_match_empty_lexeme() { $this ->given( $lexer = new SUT(), $datum = 'abcdef', $tokens = [ 'default' => [ 'abc' => '\d?', 'def' => 'def' ] ] ) ->when($result = $lexer->lexMe($datum, $tokens)) ->then ->exception(function () use ($result) { $result->next(); }) ->isInstanceOf(LUT\Exception\Lexer::class) ->hasMessage( 'A lexeme must not match an empty value, which is ' . 'the case of "abc" (\d?).' ); } public function case_unicode_enabled_by_default() { $this ->given( $lexer = new SUT(), $datum = '…ß', $tokens = [ 'default' => [ 'foo' => '…', 'bar' => '\w' ] ] ) ->when($result = $lexer->lexMe($datum, $tokens)) ->then ->array(iterator_to_array($result)) ->isEqualTo([ [ 'token' => 'foo', 'value' => '…', 'length' => 1, 'namespace' => 'default', 'keep' => true, 'offset' => 0 ], [ 'token' => 'bar', 'value' => 'ß', 'length' => 1, 'namespace' => 'default', 'keep' => true, 'offset' => 3 ], [ 'token' => 'EOF', 'value' => 'EOF', 'length' => 0, 'namespace' => 'default', 'keep' => true, 'offset' => 5 ] ]); } public function case_unicode_disabled() { $this ->given( $lexer = new SUT(['lexer.unicode' => false]), $datum = '…ß', $tokens = [ 'default' => [ 'foo' => '…', 'bar' => '\w' ] ] ) ->when($result = $lexer->lexMe($datum, $tokens)) ->then ->array($result->current()) ->isEqualTo([ 'token' => 'foo', 'value' => '…', 'length' => 1, 'namespace' => 'default', 'keep' => true, 'offset' => 0 ]) ->exception(function () use ($result) { $result->next(); }) ->isInstanceOf(LUT\Exception\UnrecognizedToken::class) ->hasMessage( 'Unrecognized token "ß" at line 1 and column 4:' . "\n" . '…ß' . "\n" . ' ↑' ); } }