xref: /dokuwiki/_test/tests/Parsing/Markdown/SpecReader.php (revision 72b2703b4f922ca520e7cb2e7765a252175f30d3)
1*72b2703bSAndreas Gohr<?php
2*72b2703bSAndreas Gohr
3*72b2703bSAndreas Gohrnamespace dokuwiki\test\Parsing\Markdown;
4*72b2703bSAndreas Gohr
5*72b2703bSAndreas Gohr/**
6*72b2703bSAndreas Gohr * Parses cmark-gfm's `test/spec.txt` fenced-example format.
7*72b2703bSAndreas Gohr *
8*72b2703bSAndreas Gohr * Each example is a block like:
9*72b2703bSAndreas Gohr *
10*72b2703bSAndreas Gohr *     ```````````````` example [optional label]
11*72b2703bSAndreas Gohr *     markdown input
12*72b2703bSAndreas Gohr *     .
13*72b2703bSAndreas Gohr *     expected html output
14*72b2703bSAndreas Gohr *     ````````````````
15*72b2703bSAndreas Gohr *
16*72b2703bSAndreas Gohr * Fences are 10+ backticks; the opening line includes the word `example`,
17*72b2703bSAndreas Gohr * optionally followed by a whitespace-separated label (used by the GFM
18*72b2703bSAndreas Gohr * extensions, e.g. `example table`, `example disallowed_raw_html`).
19*72b2703bSAndreas Gohr * A single `.` on its own line separates markdown from HTML.
20*72b2703bSAndreas Gohr *
21*72b2703bSAndreas Gohr * Examples are numbered sequentially from 1 in document order — the same
22*72b2703bSAndreas Gohr * numbers shown in the rendered spec ("Example 42").
23*72b2703bSAndreas Gohr *
24*72b2703bSAndreas Gohr * The most recent `## Heading` line is carried as section context for each
25*72b2703bSAndreas Gohr * example, to make test names informative.
26*72b2703bSAndreas Gohr */
27*72b2703bSAndreas Gohrclass SpecReader
28*72b2703bSAndreas Gohr{
29*72b2703bSAndreas Gohr    private string $path;
30*72b2703bSAndreas Gohr
31*72b2703bSAndreas Gohr    public function __construct(string $path)
32*72b2703bSAndreas Gohr    {
33*72b2703bSAndreas Gohr        $this->path = $path;
34*72b2703bSAndreas Gohr    }
35*72b2703bSAndreas Gohr
36*72b2703bSAndreas Gohr    /**
37*72b2703bSAndreas Gohr     * Yield one record per example found in the spec file.
38*72b2703bSAndreas Gohr     *
39*72b2703bSAndreas Gohr     * @return iterable<array{
40*72b2703bSAndreas Gohr     *     number: int,
41*72b2703bSAndreas Gohr     *     section: string,
42*72b2703bSAndreas Gohr     *     extension: ?string,
43*72b2703bSAndreas Gohr     *     markdown: string,
44*72b2703bSAndreas Gohr     *     html: string
45*72b2703bSAndreas Gohr     * }>
46*72b2703bSAndreas Gohr     */
47*72b2703bSAndreas Gohr    public function examples(): iterable
48*72b2703bSAndreas Gohr    {
49*72b2703bSAndreas Gohr        if (!is_file($this->path)) {
50*72b2703bSAndreas Gohr            throw new \RuntimeException("spec file not found: {$this->path}");
51*72b2703bSAndreas Gohr        }
52*72b2703bSAndreas Gohr        $lines = file($this->path, FILE_IGNORE_NEW_LINES);
53*72b2703bSAndreas Gohr        if ($lines === false) {
54*72b2703bSAndreas Gohr            throw new \RuntimeException("cannot read spec file: {$this->path}");
55*72b2703bSAndreas Gohr        }
56*72b2703bSAndreas Gohr
57*72b2703bSAndreas Gohr        $section    = '';
58*72b2703bSAndreas Gohr        $number     = 0;
59*72b2703bSAndreas Gohr        $state      = 'body';     // body | md | html
60*72b2703bSAndreas Gohr        $fenceLen   = 0;
61*72b2703bSAndreas Gohr        $extension  = null;
62*72b2703bSAndreas Gohr        $mdLines    = [];
63*72b2703bSAndreas Gohr        $htmlLines  = [];
64*72b2703bSAndreas Gohr
65*72b2703bSAndreas Gohr        foreach ($lines as $raw) {
66*72b2703bSAndreas Gohr            if ($state === 'body') {
67*72b2703bSAndreas Gohr                if (preg_match('/^#{1,6}\s+(.*?)\s*#*\s*$/', $raw, $m)) {
68*72b2703bSAndreas Gohr                    $section = $m[1];
69*72b2703bSAndreas Gohr                    continue;
70*72b2703bSAndreas Gohr                }
71*72b2703bSAndreas Gohr                if (preg_match('/^(`{10,})\s+example(?:\s+(\S.*?))?\s*$/', $raw, $m)) {
72*72b2703bSAndreas Gohr                    $number++;
73*72b2703bSAndreas Gohr                    $fenceLen  = strlen($m[1]);
74*72b2703bSAndreas Gohr                    $extension = isset($m[2]) && $m[2] !== '' ? $m[2] : null;
75*72b2703bSAndreas Gohr                    $state     = 'md';
76*72b2703bSAndreas Gohr                    $mdLines   = [];
77*72b2703bSAndreas Gohr                    $htmlLines = [];
78*72b2703bSAndreas Gohr                }
79*72b2703bSAndreas Gohr                continue;
80*72b2703bSAndreas Gohr            }
81*72b2703bSAndreas Gohr
82*72b2703bSAndreas Gohr            // Close-fence check: same char, same length, line is exactly the fence
83*72b2703bSAndreas Gohr            if (preg_match('/^(`{' . $fenceLen . ',})\s*$/', $raw, $m)
84*72b2703bSAndreas Gohr                && strlen($m[1]) === $fenceLen
85*72b2703bSAndreas Gohr            ) {
86*72b2703bSAndreas Gohr                yield [
87*72b2703bSAndreas Gohr                    'number'    => $number,
88*72b2703bSAndreas Gohr                    'section'   => $section,
89*72b2703bSAndreas Gohr                    'extension' => $extension,
90*72b2703bSAndreas Gohr                    'markdown'  => implode("\n", $mdLines),
91*72b2703bSAndreas Gohr                    'html'      => implode("\n", $htmlLines),
92*72b2703bSAndreas Gohr                ];
93*72b2703bSAndreas Gohr                $state = 'body';
94*72b2703bSAndreas Gohr                continue;
95*72b2703bSAndreas Gohr            }
96*72b2703bSAndreas Gohr
97*72b2703bSAndreas Gohr            if ($state === 'md') {
98*72b2703bSAndreas Gohr                if ($raw === '.') {
99*72b2703bSAndreas Gohr                    $state = 'html';
100*72b2703bSAndreas Gohr                    continue;
101*72b2703bSAndreas Gohr                }
102*72b2703bSAndreas Gohr                $mdLines[] = $raw;
103*72b2703bSAndreas Gohr                continue;
104*72b2703bSAndreas Gohr            }
105*72b2703bSAndreas Gohr
106*72b2703bSAndreas Gohr            // state === 'html'
107*72b2703bSAndreas Gohr            $htmlLines[] = $raw;
108*72b2703bSAndreas Gohr        }
109*72b2703bSAndreas Gohr
110*72b2703bSAndreas Gohr        if ($state !== 'body') {
111*72b2703bSAndreas Gohr            throw new \RuntimeException(
112*72b2703bSAndreas Gohr                "spec file ended mid-example (#$number); opening fence of length $fenceLen was not closed"
113*72b2703bSAndreas Gohr            );
114*72b2703bSAndreas Gohr        }
115*72b2703bSAndreas Gohr    }
116*72b2703bSAndreas Gohr}
117