1<?php 2 3namespace dokuwiki\test\Parsing\Markdown; 4 5/** 6 * Parses cmark-gfm's `test/spec.txt` fenced-example format. 7 * 8 * Each example is a block like: 9 * 10 * ```````````````` example [optional label] 11 * markdown input 12 * . 13 * expected html output 14 * ```````````````` 15 * 16 * Fences are 10+ backticks; the opening line includes the word `example`, 17 * optionally followed by a whitespace-separated label (used by the GFM 18 * extensions, e.g. `example table`, `example disallowed_raw_html`). 19 * A single `.` on its own line separates markdown from HTML. 20 * 21 * Examples are numbered sequentially from 1 in document order — the same 22 * numbers shown in the rendered spec ("Example 42"). 23 * 24 * The most recent `## Heading` line is carried as section context for each 25 * example, to make test names informative. 26 */ 27class SpecReader 28{ 29 private string $path; 30 31 public function __construct(string $path) 32 { 33 $this->path = $path; 34 } 35 36 /** 37 * Yield one record per example found in the spec file. 38 * 39 * @return iterable<array{ 40 * number: int, 41 * section: string, 42 * extension: ?string, 43 * markdown: string, 44 * html: string 45 * }> 46 */ 47 public function examples(): iterable 48 { 49 if (!is_file($this->path)) { 50 throw new \RuntimeException("spec file not found: {$this->path}"); 51 } 52 $lines = file($this->path, FILE_IGNORE_NEW_LINES); 53 if ($lines === false) { 54 throw new \RuntimeException("cannot read spec file: {$this->path}"); 55 } 56 57 $section = ''; 58 $number = 0; 59 $state = 'body'; // body | md | html 60 $fenceLen = 0; 61 $extension = null; 62 $mdLines = []; 63 $htmlLines = []; 64 65 foreach ($lines as $raw) { 66 if ($state === 'body') { 67 if (preg_match('/^#{1,6}\s+(.*?)\s*#*\s*$/', $raw, $m)) { 68 $section = $m[1]; 69 continue; 70 } 71 if (preg_match('/^(`{10,})\s+example(?:\s+(\S.*?))?\s*$/', $raw, $m)) { 72 $number++; 73 $fenceLen = strlen($m[1]); 74 $extension = isset($m[2]) && $m[2] !== '' ? $m[2] : null; 75 $state = 'md'; 76 $mdLines = []; 77 $htmlLines = []; 78 } 79 continue; 80 } 81 82 // Close-fence check: same char, same length, line is exactly the fence 83 if (preg_match('/^(`{' . $fenceLen . ',})\s*$/', $raw, $m) 84 && strlen($m[1]) === $fenceLen 85 ) { 86 yield [ 87 'number' => $number, 88 'section' => $section, 89 'extension' => $extension, 90 'markdown' => implode("\n", $mdLines), 91 'html' => implode("\n", $htmlLines), 92 ]; 93 $state = 'body'; 94 continue; 95 } 96 97 if ($state === 'md') { 98 if ($raw === '.') { 99 $state = 'html'; 100 continue; 101 } 102 $mdLines[] = $raw; 103 continue; 104 } 105 106 // state === 'html' 107 $htmlLines[] = $raw; 108 } 109 110 if ($state !== 'body') { 111 throw new \RuntimeException( 112 "spec file ended mid-example (#$number); opening fence of length $fenceLen was not closed" 113 ); 114 } 115 } 116} 117