xref: /dokuwiki/_test/tests/Parsing/Markdown/SpecReader.php (revision 9af82229f03804fb3198cbdf48d60d34d8afb191)
1<?php
2
3namespace dokuwiki\test\Parsing\Markdown;
4
5/**
6 * Parses cmark-gfm's `test/spec.txt` fenced-example format.
7 *
8 * Each example is a block like:
9 *
10 *     ```````````````` example [optional label]
11 *     markdown input
12 *     .
13 *     expected html output
14 *     ````````````````
15 *
16 * Fences are 10+ backticks; the opening line includes the word `example`,
17 * optionally followed by a whitespace-separated label (used by the GFM
18 * extensions, e.g. `example table`, `example disallowed_raw_html`).
19 * A single `.` on its own line separates markdown from HTML.
20 *
21 * Examples are numbered sequentially from 1 in document order — the same
22 * numbers shown in the rendered spec ("Example 42").
23 *
24 * The most recent `## Heading` line is carried as section context for each
25 * example, to make test names informative.
26 */
27class SpecReader
28{
29    private string $path;
30
31    public function __construct(string $path)
32    {
33        $this->path = $path;
34    }
35
36    /**
37     * Yield one record per example found in the spec file.
38     *
39     * @return iterable<array{
40     *     number: int,
41     *     section: string,
42     *     extension: ?string,
43     *     markdown: string,
44     *     html: string
45     * }>
46     */
47    public function examples(): iterable
48    {
49        if (!is_file($this->path)) {
50            throw new \RuntimeException("spec file not found: {$this->path}");
51        }
52        $lines = file($this->path, FILE_IGNORE_NEW_LINES);
53        if ($lines === false) {
54            throw new \RuntimeException("cannot read spec file: {$this->path}");
55        }
56
57        $section    = '';
58        $number     = 0;
59        $state      = 'body';     // body | md | html
60        $fenceLen   = 0;
61        $extension  = null;
62        $mdLines    = [];
63        $htmlLines  = [];
64
65        foreach ($lines as $raw) {
66            if ($state === 'body') {
67                if (preg_match('/^#{1,6}\s+(.*?)\s*#*\s*$/', $raw, $m)) {
68                    $section = $m[1];
69                    continue;
70                }
71                if (preg_match('/^(`{10,})\s+example(?:\s+(\S.*?))?\s*$/', $raw, $m)) {
72                    $number++;
73                    $fenceLen  = strlen($m[1]);
74                    $extension = isset($m[2]) && $m[2] !== '' ? $m[2] : null;
75                    $state     = 'md';
76                    $mdLines   = [];
77                    $htmlLines = [];
78                }
79                continue;
80            }
81
82            // Close-fence check: same char, same length, line is exactly the fence
83            if (preg_match('/^(`{' . $fenceLen . ',})\s*$/', $raw, $m)
84                && strlen($m[1]) === $fenceLen
85            ) {
86                yield [
87                    'number'    => $number,
88                    'section'   => $section,
89                    'extension' => $extension,
90                    'markdown'  => implode("\n", $mdLines),
91                    'html'      => implode("\n", $htmlLines),
92                ];
93                $state = 'body';
94                continue;
95            }
96
97            if ($state === 'md') {
98                if ($raw === '.') {
99                    $state = 'html';
100                    continue;
101                }
102                $mdLines[] = $raw;
103                continue;
104            }
105
106            // state === 'html'
107            $htmlLines[] = $raw;
108        }
109
110        if ($state !== 'body') {
111            throw new \RuntimeException(
112                "spec file ended mid-example (#$number); opening fence of length $fenceLen was not closed"
113            );
114        }
115    }
116}
117