1<?php
2
3/**
4 * Hoa
5 *
6 *
7 * @license
8 *
9 * New BSD License
10 *
11 * Copyright © 2007-2017, Hoa community. All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions are met:
15 *     * Redistributions of source code must retain the above copyright
16 *       notice, this list of conditions and the following disclaimer.
17 *     * Redistributions in binary form must reproduce the above copyright
18 *       notice, this list of conditions and the following disclaimer in the
19 *       documentation and/or other materials provided with the distribution.
20 *     * Neither the name of the Hoa nor the names of its contributors may be
21 *       used to endorse or promote products derived from this software without
22 *       specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE
28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36
37namespace Hoa\Compiler\Test\Unit\Llk;
38
39use Hoa\Compiler as LUT;
40use Hoa\Compiler\Llk\Lexer as SUT;
41use Hoa\Test;
42
43/**
44 * Class \Hoa\Compiler\Test\Unit\Llk\Lexer.
45 *
46 * Test suite of the lexer class.
47 *
48 * @copyright  Copyright © 2007-2017 Hoa community
49 * @license    New BSD License
50 */
51class Lexer extends Test\Unit\Suite
52{
53    public function case_is_a_generator()
54    {
55        $this
56            ->given(
57                $lexer  = new SUT(),
58                $datum  = 'abc',
59                $tokens = [
60                    'default' => [
61                        'abc'=> 'abc'
62                    ]
63                ]
64            )
65            ->when($result = $lexer->lexMe($datum, $tokens))
66            ->then
67                ->object($result)
68                    ->isInstanceOf(\Generator::class);
69    }
70
71    public function case_last_token_is_EOF()
72    {
73        $this
74            ->given(
75                $lexer  = new SUT(),
76                $datum  = 'ghidefabc',
77                $tokens = [
78                    'default' => [
79                        'abc'  => 'abc',
80                        'def'  => 'def',
81                        'tail' => '\w{3}'
82                    ]
83                ]
84            )
85            ->when($result = $lexer->lexMe($datum, $tokens))
86            ->then
87                ->object($result)
88                    ->isInstanceOf(\Generator::class)
89                ->array($result->current())
90                    ->isEqualTo([
91                        'token'     => 'tail',
92                        'value'     => 'ghi',
93                        'length'    => 3,
94                        'namespace' => 'default',
95                        'keep'      => true,
96                        'offset'    => 0
97                    ])
98                ->let($result->next())
99                ->array($result->current())
100                    ->isEqualTo([
101                        'token'     => 'def',
102                        'value'     => 'def',
103                        'length'    => 3,
104                        'namespace' => 'default',
105                        'keep'      => true,
106                        'offset'    => 3
107                    ])
108                ->let($result->next())
109                ->array($result->current())
110                    ->isEqualTo([
111                        'token'     => 'abc',
112                        'value'     => 'abc',
113                        'length'    => 3,
114                        'namespace' => 'default',
115                        'keep'      => true,
116                        'offset'    => 6
117                    ])
118                ->let($result->next())
119                ->array($result->current())
120                    ->isEqualTo([
121                        'token'     => 'EOF',
122                        'value'     => 'EOF',
123                        'length'    => 0,
124                        'namespace' => 'default',
125                        'keep'      => true,
126                        'offset'    => 9
127                    ])
128                ->let($result->next())
129                ->variable($result->current())
130                    ->isNull();
131    }
132
133    public function case_unrecognized_token()
134    {
135        $this
136            ->given(
137                $lexer  = new SUT(),
138                $datum  = 'abczdef',
139                $tokens = [
140                    'default' => [
141                        'abc'  => 'abc',
142                        'def'  => 'def'
143                    ]
144                ]
145            )
146            ->when($result = $lexer->lexMe($datum, $tokens))
147            ->then
148                ->array($result->current())
149                    ->isEqualTo([
150                        'token'     => 'abc',
151                        'value'     => 'abc',
152                        'length'    => 3,
153                        'namespace' => 'default',
154                        'keep'      => true,
155                        'offset'    => 0
156                    ])
157                ->exception(function () use ($result) {
158                    $result->next();
159                })
160                    ->isInstanceOf(LUT\Exception\UnrecognizedToken::class)
161                    ->hasMessage(
162                        'Unrecognized token "z" at line 1 and column 4:' . "\n" .
163                        'abczdef' . "\n" .
164                        '   ↑'
165                    );
166    }
167
168    public function case_namespace()
169    {
170        $this
171            ->given(
172                $lexer  = new SUT(),
173                $datum  = 'abcdefghiabc',
174                $tokens = [
175                    'default' => ['abc:one'     => 'abc'],
176                    'one'     => ['def:two'     => 'def'],
177                    'two'     => ['ghi:default' => 'ghi']
178                ]
179            )
180            ->when($result = $lexer->lexMe($datum, $tokens))
181            ->then
182                ->array(iterator_to_array($result))
183                    ->isEqualTo([
184                        [
185                            'token'     => 'abc',
186                            'value'     => 'abc',
187                            'length'    => 3,
188                            'namespace' => 'default',
189                            'keep'      => true,
190                            'offset'    => 0
191                        ],
192                        [
193                            'token'     => 'def',
194                            'value'     => 'def',
195                            'length'    => 3,
196                            'namespace' => 'one',
197                            'keep'      => true,
198                            'offset'    => 3
199                        ],
200                        [
201                            'token'     => 'ghi',
202                            'value'     => 'ghi',
203                            'length'    => 3,
204                            'namespace' => 'two',
205                            'keep'      => true,
206                            'offset'    => 6
207                        ],
208                        [
209                            'token'     => 'abc',
210                            'value'     => 'abc',
211                            'length'    => 3,
212                            'namespace' => 'default',
213                            'keep'      => true,
214                            'offset'    => 9
215                        ],
216                        [
217                            'token'     => 'EOF',
218                            'value'     => 'EOF',
219                            'length'    => 0,
220                            'namespace' => 'default',
221                            'keep'      => true,
222                            'offset'    => 12
223                        ]
224                    ]);
225    }
226
227    public function case_namespace_with_shift()
228    {
229        $this
230            ->given(
231                $lexer  = new SUT(),
232                $datum  = 'abcdefghiabc',
233                $tokens = [
234                    'default' => ['abc:one'           => 'abc'],
235                    'one'     => ['def:two'           => 'def'],
236                    'two'     => ['ghi:__shift__ * 2' => 'ghi']
237                ]
238            )
239            ->when($result = $lexer->lexMe($datum, $tokens))
240            ->then
241                ->array(iterator_to_array($result))
242                    ->isEqualTo([
243                        [
244                            'token'     => 'abc',
245                            'value'     => 'abc',
246                            'length'    => 3,
247                            'namespace' => 'default',
248                            'keep'      => true,
249                            'offset'    => 0
250                        ],
251                        [
252                            'token'     => 'def',
253                            'value'     => 'def',
254                            'length'    => 3,
255                            'namespace' => 'one',
256                            'keep'      => true,
257                            'offset'    => 3
258                        ],
259                        [
260                            'token'     => 'ghi',
261                            'value'     => 'ghi',
262                            'length'    => 3,
263                            'namespace' => 'two',
264                            'keep'      => true,
265                            'offset'    => 6
266                        ],
267                        [
268                            'token'     => 'abc',
269                            'value'     => 'abc',
270                            'length'    => 3,
271                            'namespace' => 'default',
272                            'keep'      => true,
273                            'offset'    => 9
274                        ],
275                        [
276                            'token'     => 'EOF',
277                            'value'     => 'EOF',
278                            'length'    => 0,
279                            'namespace' => 'default',
280                            'keep'      => true,
281                            'offset'    => 12
282                        ]
283                    ]);
284    }
285
286    public function case_namespace_shift_too_much()
287    {
288        $this
289            ->given(
290                $lexer  = new SUT(),
291                $datum  = 'abcdefghiabc',
292                $tokens = [
293                    'default' => ['abc:__shift__' => 'abc']
294                ]
295            )
296            ->when($result = $lexer->lexMe($datum, $tokens))
297            ->then
298                ->exception(function () use ($result) {
299                    $result->next();
300                })
301                    ->isInstanceOf(LUT\Exception\Lexer::class)
302                    ->hasMessage(
303                        'Cannot shift namespace 1-times, from token abc ' .
304                        'in namespace default, because the stack contains ' .
305                        'only 0 namespaces.'
306                    );
307    }
308
309    public function case_namespace_does_not_exist()
310    {
311        $this
312            ->given(
313                $lexer  = new SUT(),
314                $datum  = 'abcdef',
315                $tokens = [
316                    'default' => [
317                        'abc:foo' => 'abc',
318                        'def'     => 'def'
319                    ]
320                ]
321            )
322            ->when($result = $lexer->lexMe($datum, $tokens))
323            ->then
324                ->exception(function () use ($result) {
325                    $result->next();
326                })
327                    ->isInstanceOf(LUT\Exception\Lexer::class)
328                    ->hasMessage(
329                        'Namespace foo does not exist, called by token abc ' .
330                        'in namespace default.'
331                    );
332    }
333
334    public function case_skip()
335    {
336        $this
337            ->given(
338                $lexer  = new SUT(),
339                $datum  = 'abc def   ghi  abc',
340                $tokens = [
341                    'default' => [
342                        'skip' => '\s+',
343                        'abc'  => 'abc',
344                        'def'  => 'def',
345                        'ghi'  => 'ghi'
346                    ]
347                ]
348            )
349            ->when($result = $lexer->lexMe($datum, $tokens))
350            ->then
351                ->array(iterator_to_array($result))
352                    ->isEqualTo([
353                        [
354                            'token'     => 'abc',
355                            'value'     => 'abc',
356                            'length'    => 3,
357                            'namespace' => 'default',
358                            'keep'      => true,
359                            'offset'    => 0
360                        ],
361                        [
362                            'token'     => 'def',
363                            'value'     => 'def',
364                            'length'    => 3,
365                            'namespace' => 'default',
366                            'keep'      => true,
367                            'offset'    => 4
368                        ],
369                        [
370                            'token'     => 'ghi',
371                            'value'     => 'ghi',
372                            'length'    => 3,
373                            'namespace' => 'default',
374                            'keep'      => true,
375                            'offset'    => 10
376                        ],
377                        [
378                            'token'     => 'abc',
379                            'value'     => 'abc',
380                            'length'    => 3,
381                            'namespace' => 'default',
382                            'keep'      => true,
383                            'offset'    => 15
384                        ],
385                        [
386                            'token'     => 'EOF',
387                            'value'     => 'EOF',
388                            'length'    => 0,
389                            'namespace' => 'default',
390                            'keep'      => true,
391                            'offset'    => 18
392                        ]
393                    ]);
394    }
395
396    public function case_match_empty_lexeme()
397    {
398        $this
399            ->given(
400                $lexer  = new SUT(),
401                $datum  = 'abcdef',
402                $tokens = [
403                    'default' => [
404                        'abc' => '\d?',
405                        'def' => 'def'
406                    ]
407                ]
408            )
409            ->when($result = $lexer->lexMe($datum, $tokens))
410            ->then
411                ->exception(function () use ($result) {
412                    $result->next();
413                })
414                    ->isInstanceOf(LUT\Exception\Lexer::class)
415                    ->hasMessage(
416                        'A lexeme must not match an empty value, which is ' .
417                        'the case of "abc" (\d?).'
418                    );
419    }
420
421    public function case_unicode_enabled_by_default()
422    {
423        $this
424            ->given(
425                $lexer  = new SUT(),
426                $datum  = '…ß',
427                $tokens = [
428                    'default' => [
429                        'foo' => '…',
430                        'bar' => '\w'
431                    ]
432                ]
433            )
434            ->when($result = $lexer->lexMe($datum, $tokens))
435            ->then
436                ->array(iterator_to_array($result))
437                    ->isEqualTo([
438                        [
439                            'token'     => 'foo',
440                            'value'     => '…',
441                            'length'    => 1,
442                            'namespace' => 'default',
443                            'keep'      => true,
444                            'offset'    => 0
445                        ],
446                        [
447                            'token'     => 'bar',
448                            'value'     => 'ß',
449                            'length'    => 1,
450                            'namespace' => 'default',
451                            'keep'      => true,
452                            'offset'    => 3
453                        ],
454                        [
455                            'token'     => 'EOF',
456                            'value'     => 'EOF',
457                            'length'    => 0,
458                            'namespace' => 'default',
459                            'keep'      => true,
460                            'offset'    => 5
461                        ]
462                    ]);
463    }
464
465    public function case_unicode_disabled()
466    {
467        $this
468            ->given(
469                $lexer  = new SUT(['lexer.unicode' => false]),
470                $datum  = '…ß',
471                $tokens = [
472                    'default' => [
473                        'foo' => '…',
474                        'bar' => '\w'
475                    ]
476                ]
477            )
478            ->when($result = $lexer->lexMe($datum, $tokens))
479            ->then
480                ->array($result->current())
481                    ->isEqualTo([
482                        'token'     => 'foo',
483                        'value'     => '…',
484                        'length'    => 1,
485                        'namespace' => 'default',
486                        'keep'      => true,
487                        'offset'    => 0
488                    ])
489                ->exception(function () use ($result) {
490                    $result->next();
491                })
492                    ->isInstanceOf(LUT\Exception\UnrecognizedToken::class)
493                    ->hasMessage(
494                        'Unrecognized token "ß" at line 1 and column 4:' . "\n" .
495                        '…ß' . "\n" .
496                        ' ↑'
497                    );
498    }
499}
500