1685560ebSAndreas Gohr<?php 2685560ebSAndreas Gohr 3685560ebSAndreas Gohrnamespace dokuwiki\Parsing\ParserMode; 4685560ebSAndreas Gohr 5685560ebSAndreas Gohruse dokuwiki\Parsing\Handler; 6685560ebSAndreas Gohruse dokuwiki\Parsing\Handler\GfmLists; 7685560ebSAndreas Gohruse dokuwiki\Parsing\Handler\Nest; 8685560ebSAndreas Gohruse dokuwiki\Parsing\ModeRegistry; 9685560ebSAndreas Gohr 10685560ebSAndreas Gohr/** 11685560ebSAndreas Gohr * GFM list block. 12685560ebSAndreas Gohr * 13685560ebSAndreas Gohr * Captures an entire list block atomically (one addSpecialPattern match) and 14309a0852SAndreas Gohr * walks the captured text in handle(), grouping lines into items. The per-item 15309a0852SAndreas Gohr * loop runs inside a ModeRegistry::withSubParser() callback so each item's 16309a0852SAndreas Gohr * body is dedented to its content column and parsed by a pooled sub-parser, 17309a0852SAndreas Gohr * and block content - paragraphs, fenced code, blockquotes, plugin blocks - 18309a0852SAndreas Gohr * works inside items uniformly without depending on column-0 anchoring of 19309a0852SAndreas Gohr * nested mode patterns. If any nested mode requests a sub-parser with the 20309a0852SAndreas Gohr * same exclusion key while ours is in use, the registry's pool hands them a 21309a0852SAndreas Gohr * different slot so their reset() does not corrupt our state. 22685560ebSAndreas Gohr * 23685560ebSAndreas Gohr * Sub-parser mode set: every active mode except CATEGORY_BASEONLY (i.e. no 24685560ebSAndreas Gohr * Header inside list items, since `<h1>`-`<h6>` inside `<li>` is never 25685560ebSAndreas Gohr * desirable and section nesting must not span into items) and gfm_listblock 26685560ebSAndreas Gohr * itself (defensive guard against lexer re-entry on pathological inputs; 27685560ebSAndreas Gohr * normal nested lists are caught by the outer pattern instead). 28685560ebSAndreas Gohr * 29685560ebSAndreas Gohr * Each item's sub-parsed calls are wrapped in a `nest` instruction (see 30685560ebSAndreas Gohr * Handler\Nest) before they reach the outer handler. This is essential: 31685560ebSAndreas Gohr * the sub-parser's Block rewriter has already wrapped multi-paragraph 32685560ebSAndreas Gohr * content in `p_open`/`p_close`, and without nest-wrapping the main 33685560ebSAndreas Gohr * handler's Block rewriter would see those paragraphs and add another 34685560ebSAndreas Gohr * `<p>` around the entire replayed range, producing nested `<p>` tags. 35685560ebSAndreas Gohr * Block treats `nest` as opaque and the renderer base class unwraps it 36685560ebSAndreas Gohr * transparently — the same pattern Footnote uses. 37685560ebSAndreas Gohr * 38685560ebSAndreas Gohr * Indentation rule: depth = (indent / 2) + 1. Tabs become two spaces. 1- and 39685560ebSAndreas Gohr * 3-space indents round down. Marker characters: -, *, + (unordered) and 40685560ebSAndreas Gohr * digits followed by . or ) (ordered). Nested lists are caught by the 41685560ebSAndreas Gohr * outer pattern (each marker at any 2-space-multiple indent is its own 42685560ebSAndreas Gohr * item at the corresponding depth) and stitched back into nested HTML by 43685560ebSAndreas Gohr * the GfmLists rewriter. 44685560ebSAndreas Gohr */ 45685560ebSAndreas Gohrclass GfmListblock extends AbstractMode 46685560ebSAndreas Gohr{ 47685560ebSAndreas Gohr /** 48685560ebSAndreas Gohr * Regex fragment matching one list marker. 49685560ebSAndreas Gohr * 50685560ebSAndreas Gohr * Either an unordered marker (`-`, `*`, `+`) or an ordered marker 51685560ebSAndreas Gohr * (1-9 digits followed by `.` or `)`). Used by the entry pattern in 52685560ebSAndreas Gohr * connectTo() and by the per-line classifier in parseItems(). 53685560ebSAndreas Gohr */ 54685560ebSAndreas Gohr protected const MARKER = '(?:[-*+]|\d{1,9}[.)])'; 55685560ebSAndreas Gohr 56685560ebSAndreas Gohr /** @inheritdoc */ 57685560ebSAndreas Gohr public function getSort() 58685560ebSAndreas Gohr { 59685560ebSAndreas Gohr return 10; 60685560ebSAndreas Gohr } 61685560ebSAndreas Gohr 62685560ebSAndreas Gohr /** @inheritdoc */ 63685560ebSAndreas Gohr public function preConnect() 64685560ebSAndreas Gohr { 65685560ebSAndreas Gohr ModeRegistry::getInstance()->registerBlockEolMode('gfm_listblock'); 66685560ebSAndreas Gohr } 67685560ebSAndreas Gohr 68685560ebSAndreas Gohr /** 69685560ebSAndreas Gohr * Register the special pattern that captures a whole list block. 70685560ebSAndreas Gohr * 71685560ebSAndreas Gohr * The pattern starts on a marker line (any indent) and then loops over 72685560ebSAndreas Gohr * four alternatives until none matches: 73685560ebSAndreas Gohr * 74685560ebSAndreas Gohr * 1. A subsequent marker line at any indent. 75685560ebSAndreas Gohr * 2. An indented continuation line (>= 2 leading spaces with content). 76685560ebSAndreas Gohr * 3. A blank line followed by indented content (any number of 77685560ebSAndreas Gohr * intervening blank lines tolerated via the lookahead). 78685560ebSAndreas Gohr * 4. A blank line followed by a next marker (same multi-blank 79685560ebSAndreas Gohr * tolerance as alt 3). 80685560ebSAndreas Gohr * 81685560ebSAndreas Gohr * The block ends naturally when none of the alternatives match — for 82685560ebSAndreas Gohr * example a column-0 non-marker line, or two-or-more blank lines 83685560ebSAndreas Gohr * followed by non-list content. 84685560ebSAndreas Gohr * 85685560ebSAndreas Gohr * @inheritdoc 86685560ebSAndreas Gohr */ 87685560ebSAndreas Gohr public function connectTo($mode) 88685560ebSAndreas Gohr { 89685560ebSAndreas Gohr $pattern = 90685560ebSAndreas Gohr '\n[ \t]*' . self::MARKER . '(?:[ \t][^\n]*|(?=\n))' . 91685560ebSAndreas Gohr '(?:' . 92685560ebSAndreas Gohr '\n[ \t]*' . self::MARKER . '(?:[ \t][^\n]*|(?=\n))' . 93685560ebSAndreas Gohr '|' . '\n[ \t]{2,}\S[^\n]*' . 94685560ebSAndreas Gohr '|' . '\n[ \t]*(?=(?:\n[ \t]*)*\n[ \t]{2,}\S)' . 95685560ebSAndreas Gohr '|' . '\n[ \t]*(?=(?:\n[ \t]*)*\n[ \t]*' . self::MARKER . ')' . 96685560ebSAndreas Gohr ')*'; 97685560ebSAndreas Gohr $this->Lexer->addSpecialPattern($pattern, $mode, 'gfm_listblock'); 98685560ebSAndreas Gohr } 99685560ebSAndreas Gohr 100685560ebSAndreas Gohr /** 101685560ebSAndreas Gohr * Convert the captured block into handler calls. 102685560ebSAndreas Gohr * 103685560ebSAndreas Gohr * Sequence: 104685560ebSAndreas Gohr * 1. parseItems() splits the captured text into per-item records. 105685560ebSAndreas Gohr * 2. Install GfmLists as a CallWriter rewriter on the main handler. 106685560ebSAndreas Gohr * 3. Emit list_open carrying the first item's marker — the rewriter's 107685560ebSAndreas Gohr * handleListOpen opens the `<ul>`/`<ol>` and the first `<li>`. 108685560ebSAndreas Gohr * 4. For each item: 109685560ebSAndreas Gohr * - If not the first, emit list_item (closes the previous `<li>` 110685560ebSAndreas Gohr * and opens a new one in the rewriter). 111685560ebSAndreas Gohr * - Sub-parse the dedented item body via the cached sub-parser. 112685560ebSAndreas Gohr * - Filter document_start/end and the outer p_open/p_close pair 113685560ebSAndreas Gohr * for tight items (single paragraph). 114685560ebSAndreas Gohr * - Wrap the filtered calls in a Nest so the main handler's 115685560ebSAndreas Gohr * Block rewriter treats them as opaque. 116685560ebSAndreas Gohr * 5. Emit list_close and finalise the GfmLists rewriter. 117685560ebSAndreas Gohr * 118685560ebSAndreas Gohr * @inheritdoc 119685560ebSAndreas Gohr */ 120685560ebSAndreas Gohr public function handle($match, $state, $pos, Handler $handler) 121685560ebSAndreas Gohr { 122685560ebSAndreas Gohr $items = $this->parseItems($match); 123685560ebSAndreas Gohr if (empty($items)) { 124685560ebSAndreas Gohr $handler->addCall('cdata', [$match], $pos); 125685560ebSAndreas Gohr return true; 126685560ebSAndreas Gohr } 127685560ebSAndreas Gohr 128685560ebSAndreas Gohr $handler->setCallWriter(new GfmLists($handler->getCallWriter())); 129685560ebSAndreas Gohr $handler->addCall('list_open', [$items[0]['markerMatch']], $pos); 130685560ebSAndreas Gohr 131309a0852SAndreas Gohr $registry = ModeRegistry::getInstance(); 132309a0852SAndreas Gohr $excludeCats = [ModeRegistry::CATEGORY_BASEONLY]; 133309a0852SAndreas Gohr $excludeModes = ['gfm_listblock']; 134309a0852SAndreas Gohr $subParser = $registry->acquireSubParser($excludeCats, $excludeModes); 135685560ebSAndreas Gohr $subHandler = $subParser->getHandler(); 136685560ebSAndreas Gohr 137685560ebSAndreas Gohr foreach ($items as $i => $item) { 138685560ebSAndreas Gohr $itemPos = $pos + $item['offset']; 139685560ebSAndreas Gohr if ($i > 0) { 140685560ebSAndreas Gohr $handler->addCall('list_item', [$item['markerMatch']], $itemPos); 141685560ebSAndreas Gohr } 142685560ebSAndreas Gohr 143685560ebSAndreas Gohr $subHandler->reset(); 144685560ebSAndreas Gohr $subParser->parse($item['body']); 145685560ebSAndreas Gohr $itemCalls = $this->filterSubCalls($subHandler->calls); 146685560ebSAndreas Gohr if (empty($itemCalls)) continue; // empty item — nothing to emit 147685560ebSAndreas Gohr 148685560ebSAndreas Gohr // Wrap the item content in a Nest so the main handler's Block 149685560ebSAndreas Gohr // rewriter does not double-wrap our already-paragraphed content. 150685560ebSAndreas Gohr // Block treats `nest` as opaque and the renderer base class 151685560ebSAndreas Gohr // unwraps it transparently, the same pattern Footnote uses. 152685560ebSAndreas Gohr $outer = $handler->getCallWriter(); 153685560ebSAndreas Gohr $nest = new Nest($outer); 154685560ebSAndreas Gohr $handler->setCallWriter($nest); 155685560ebSAndreas Gohr foreach ($itemCalls as $call) { 156685560ebSAndreas Gohr // sub-handler positions are relative to the item body; offset 157685560ebSAndreas Gohr // them back into the source so section-edit anchors work. 158685560ebSAndreas Gohr $handler->addCall($call[0], $call[1], $itemPos + $call[2]); 159685560ebSAndreas Gohr } 160685560ebSAndreas Gohr $handler->setCallWriter($nest->process()); 161685560ebSAndreas Gohr } 162685560ebSAndreas Gohr 163309a0852SAndreas Gohr $registry->releaseSubParser($excludeCats, $excludeModes); 164309a0852SAndreas Gohr 165685560ebSAndreas Gohr $handler->addCall('list_close', [], $pos + strlen($match)); 166685560ebSAndreas Gohr $reWriter = $handler->getCallWriter(); 167685560ebSAndreas Gohr $handler->setCallWriter($reWriter->process()); 168685560ebSAndreas Gohr 169685560ebSAndreas Gohr return true; 170685560ebSAndreas Gohr } 171685560ebSAndreas Gohr 172685560ebSAndreas Gohr /** 173685560ebSAndreas Gohr * Walk the captured block, grouping lines into items. 174685560ebSAndreas Gohr * 175685560ebSAndreas Gohr * Each returned item describes one list_item: its marker (in the form 176685560ebSAndreas Gohr * "\n{indent}{marker}" so GfmLists::interpretSyntax can parse it), the 177685560ebSAndreas Gohr * dedented body, dedent column, and absolute offset within $match. 178685560ebSAndreas Gohr * 179685560ebSAndreas Gohr * Lines are classified as marker / continuation / blank. A marker line 180685560ebSAndreas Gohr * starts a new item; continuation and blank lines accumulate into the 181685560ebSAndreas Gohr * current item's body. Continuation lines are dedented by up to 182685560ebSAndreas Gohr * indent + marker_width + 1 leading spaces (the item's content column 183685560ebSAndreas Gohr * for single-space-after-marker cases). Blank lines are kept as empty 184685560ebSAndreas Gohr * body lines while they're in the middle of the body and stripped 185685560ebSAndreas Gohr * from the trailing edge by joinBody() so single-paragraph items 186685560ebSAndreas Gohr * parse tight. 187685560ebSAndreas Gohr * 188685560ebSAndreas Gohr * @param string $match the raw special-pattern match (starts with \n) 189685560ebSAndreas Gohr * @return array<int, array{markerMatch: string, dedent: int, body: string, offset: int}> 190685560ebSAndreas Gohr */ 191685560ebSAndreas Gohr protected function parseItems($match) 192685560ebSAndreas Gohr { 193685560ebSAndreas Gohr $stripped = ltrim($match, "\n"); 194685560ebSAndreas Gohr $offsetBase = strlen($match) - strlen($stripped); 195685560ebSAndreas Gohr $lines = explode("\n", $stripped); 196685560ebSAndreas Gohr 197685560ebSAndreas Gohr $items = []; 198685560ebSAndreas Gohr $current = null; 199685560ebSAndreas Gohr $bodyLines = []; 200685560ebSAndreas Gohr $cursor = $offsetBase; 201685560ebSAndreas Gohr 202685560ebSAndreas Gohr foreach ($lines as $line) { 203685560ebSAndreas Gohr $isMarker = preg_match( 204685560ebSAndreas Gohr '/^([ \t]*)(' . self::MARKER . ')(?:[ \t](.*)|$)/', 205685560ebSAndreas Gohr $line, 206685560ebSAndreas Gohr $m 207685560ebSAndreas Gohr ); 208685560ebSAndreas Gohr 209685560ebSAndreas Gohr if ($isMarker) { 210685560ebSAndreas Gohr if ($current !== null) { 211685560ebSAndreas Gohr $current['body'] = $this->joinBody($bodyLines); 212685560ebSAndreas Gohr $items[] = $current; 213685560ebSAndreas Gohr } 214685560ebSAndreas Gohr $indent = str_replace("\t", " ", $m[1]); 215685560ebSAndreas Gohr $marker = $m[2]; 216685560ebSAndreas Gohr $firstLine = $m[3] ?? ''; 217685560ebSAndreas Gohr $current = [ 218685560ebSAndreas Gohr 'markerMatch' => "\n" . $indent . $marker, 219685560ebSAndreas Gohr 'dedent' => strlen($indent) + strlen($marker) + 1, 220685560ebSAndreas Gohr 'offset' => $cursor, 221685560ebSAndreas Gohr ]; 222685560ebSAndreas Gohr $bodyLines = [$firstLine]; 223685560ebSAndreas Gohr } elseif ($current !== null) { 224685560ebSAndreas Gohr if (trim($line) === '') { 225685560ebSAndreas Gohr $bodyLines[] = ''; 226685560ebSAndreas Gohr } else { 227685560ebSAndreas Gohr $expanded = str_replace("\t", " ", $line); 228685560ebSAndreas Gohr $available = strlen($expanded) - strlen(ltrim($expanded, ' ')); 229685560ebSAndreas Gohr $strip = min($current['dedent'], $available); 230685560ebSAndreas Gohr $bodyLines[] = substr($expanded, $strip); 231685560ebSAndreas Gohr } 232685560ebSAndreas Gohr } 233685560ebSAndreas Gohr 234685560ebSAndreas Gohr $cursor += strlen($line) + 1; // +1 for the \n consumed by explode 235685560ebSAndreas Gohr } 236685560ebSAndreas Gohr 237685560ebSAndreas Gohr if ($current !== null) { 238685560ebSAndreas Gohr $current['body'] = $this->joinBody($bodyLines); 239685560ebSAndreas Gohr $items[] = $current; 240685560ebSAndreas Gohr } 241685560ebSAndreas Gohr 242685560ebSAndreas Gohr return $items; 243685560ebSAndreas Gohr } 244685560ebSAndreas Gohr 245685560ebSAndreas Gohr /** 246685560ebSAndreas Gohr * Join body lines into a string, trimming trailing blank lines. 247685560ebSAndreas Gohr * 248685560ebSAndreas Gohr * Trailing blanks would reach the sub-parser and cause Block to wrap 249685560ebSAndreas Gohr * the otherwise-single paragraph content in `p_open`/`p_close`, 250685560ebSAndreas Gohr * forcing a tight item into loose-item shape. Stripping them here 251685560ebSAndreas Gohr * preserves the tight rendering for items that look tight in source. 252685560ebSAndreas Gohr * 253685560ebSAndreas Gohr * @param string[] $lines 254685560ebSAndreas Gohr */ 255685560ebSAndreas Gohr protected function joinBody(array $lines): string 256685560ebSAndreas Gohr { 257685560ebSAndreas Gohr return rtrim(implode("\n", $lines), "\n"); 258685560ebSAndreas Gohr } 259685560ebSAndreas Gohr 260685560ebSAndreas Gohr /** 261685560ebSAndreas Gohr * Filter the sub-parser's flat call list before nest-wrapping it. 262685560ebSAndreas Gohr * 263685560ebSAndreas Gohr * Drops `document_start` / `document_end` (always emitted by 264685560ebSAndreas Gohr * Handler::finalize), and strips the outer `p_open` / `p_close` pair 265685560ebSAndreas Gohr * for tight items so their content sits inline inside `<li>`. Loose 266685560ebSAndreas Gohr * items (multiple paragraphs, more than one `p_open`) keep their 267685560ebSAndreas Gohr * inner pairs untouched. The filtered calls are then wrapped in a 268685560ebSAndreas Gohr * Nest by handle() before they reach the GfmLists rewriter. 269685560ebSAndreas Gohr * 270685560ebSAndreas Gohr * @param array $calls 271685560ebSAndreas Gohr * @return array 272685560ebSAndreas Gohr */ 273685560ebSAndreas Gohr protected function filterSubCalls(array $calls) 274685560ebSAndreas Gohr { 275685560ebSAndreas Gohr if ($calls && $calls[0][0] === 'document_start') array_shift($calls); 276685560ebSAndreas Gohr if ($calls && end($calls)[0] === 'document_end') array_pop($calls); 277685560ebSAndreas Gohr 278685560ebSAndreas Gohr $pCount = 0; 279685560ebSAndreas Gohr foreach ($calls as $c) { 280685560ebSAndreas Gohr if ($c[0] === 'p_open') $pCount++; 281685560ebSAndreas Gohr } 282685560ebSAndreas Gohr 283*e7dae73bSAndreas Gohr if ( 284*e7dae73bSAndreas Gohr $pCount === 1 285685560ebSAndreas Gohr && $calls[0][0] === 'p_open' 286*e7dae73bSAndreas Gohr && end($calls)[0] === 'p_close' 287*e7dae73bSAndreas Gohr ) { 288685560ebSAndreas Gohr array_shift($calls); 289685560ebSAndreas Gohr array_pop($calls); 290685560ebSAndreas Gohr } 291685560ebSAndreas Gohr 292685560ebSAndreas Gohr return $calls; 293685560ebSAndreas Gohr } 294685560ebSAndreas Gohr} 295