1*685560ebSAndreas Gohr<?php 2*685560ebSAndreas Gohr 3*685560ebSAndreas Gohrnamespace dokuwiki\Parsing\ParserMode; 4*685560ebSAndreas Gohr 5*685560ebSAndreas Gohruse dokuwiki\Parsing\Handler; 6*685560ebSAndreas Gohruse dokuwiki\Parsing\Handler\GfmLists; 7*685560ebSAndreas Gohruse dokuwiki\Parsing\Handler\Nest; 8*685560ebSAndreas Gohruse dokuwiki\Parsing\ModeRegistry; 9*685560ebSAndreas Gohr 10*685560ebSAndreas Gohr/** 11*685560ebSAndreas Gohr * GFM list block. 12*685560ebSAndreas Gohr * 13*685560ebSAndreas Gohr * Captures an entire list block atomically (one addSpecialPattern match) and 14*685560ebSAndreas Gohr * walks the captured text in handle(), grouping lines into items. Each item's 15*685560ebSAndreas Gohr * body is dedented to its content column and parsed by a cached sub-parser 16*685560ebSAndreas Gohr * (ModeRegistry::getSubParser) so block content - paragraphs, fenced code, 17*685560ebSAndreas Gohr * blockquotes, plugin blocks - work inside items uniformly without depending 18*685560ebSAndreas Gohr * on column-0 anchoring of nested mode patterns. 19*685560ebSAndreas Gohr * 20*685560ebSAndreas Gohr * Sub-parser mode set: every active mode except CATEGORY_BASEONLY (i.e. no 21*685560ebSAndreas Gohr * Header inside list items, since `<h1>`-`<h6>` inside `<li>` is never 22*685560ebSAndreas Gohr * desirable and section nesting must not span into items) and gfm_listblock 23*685560ebSAndreas Gohr * itself (defensive guard against lexer re-entry on pathological inputs; 24*685560ebSAndreas Gohr * normal nested lists are caught by the outer pattern instead). 25*685560ebSAndreas Gohr * 26*685560ebSAndreas Gohr * Each item's sub-parsed calls are wrapped in a `nest` instruction (see 27*685560ebSAndreas Gohr * Handler\Nest) before they reach the outer handler. This is essential: 28*685560ebSAndreas Gohr * the sub-parser's Block rewriter has already wrapped multi-paragraph 29*685560ebSAndreas Gohr * content in `p_open`/`p_close`, and without nest-wrapping the main 30*685560ebSAndreas Gohr * handler's Block rewriter would see those paragraphs and add another 31*685560ebSAndreas Gohr * `<p>` around the entire replayed range, producing nested `<p>` tags. 32*685560ebSAndreas Gohr * Block treats `nest` as opaque and the renderer base class unwraps it 33*685560ebSAndreas Gohr * transparently — the same pattern Footnote uses. 34*685560ebSAndreas Gohr * 35*685560ebSAndreas Gohr * Indentation rule: depth = (indent / 2) + 1. Tabs become two spaces. 1- and 36*685560ebSAndreas Gohr * 3-space indents round down. Marker characters: -, *, + (unordered) and 37*685560ebSAndreas Gohr * digits followed by . or ) (ordered). Nested lists are caught by the 38*685560ebSAndreas Gohr * outer pattern (each marker at any 2-space-multiple indent is its own 39*685560ebSAndreas Gohr * item at the corresponding depth) and stitched back into nested HTML by 40*685560ebSAndreas Gohr * the GfmLists rewriter. 41*685560ebSAndreas Gohr */ 42*685560ebSAndreas Gohrclass GfmListblock extends AbstractMode 43*685560ebSAndreas Gohr{ 44*685560ebSAndreas Gohr /** 45*685560ebSAndreas Gohr * Regex fragment matching one list marker. 46*685560ebSAndreas Gohr * 47*685560ebSAndreas Gohr * Either an unordered marker (`-`, `*`, `+`) or an ordered marker 48*685560ebSAndreas Gohr * (1-9 digits followed by `.` or `)`). Used by the entry pattern in 49*685560ebSAndreas Gohr * connectTo() and by the per-line classifier in parseItems(). 50*685560ebSAndreas Gohr */ 51*685560ebSAndreas Gohr protected const MARKER = '(?:[-*+]|\d{1,9}[.)])'; 52*685560ebSAndreas Gohr 53*685560ebSAndreas Gohr /** @inheritdoc */ 54*685560ebSAndreas Gohr public function getSort() 55*685560ebSAndreas Gohr { 56*685560ebSAndreas Gohr return 10; 57*685560ebSAndreas Gohr } 58*685560ebSAndreas Gohr 59*685560ebSAndreas Gohr /** @inheritdoc */ 60*685560ebSAndreas Gohr public function preConnect() 61*685560ebSAndreas Gohr { 62*685560ebSAndreas Gohr ModeRegistry::getInstance()->registerBlockEolMode('gfm_listblock'); 63*685560ebSAndreas Gohr } 64*685560ebSAndreas Gohr 65*685560ebSAndreas Gohr /** 66*685560ebSAndreas Gohr * Register the special pattern that captures a whole list block. 67*685560ebSAndreas Gohr * 68*685560ebSAndreas Gohr * The pattern starts on a marker line (any indent) and then loops over 69*685560ebSAndreas Gohr * four alternatives until none matches: 70*685560ebSAndreas Gohr * 71*685560ebSAndreas Gohr * 1. A subsequent marker line at any indent. 72*685560ebSAndreas Gohr * 2. An indented continuation line (>= 2 leading spaces with content). 73*685560ebSAndreas Gohr * 3. A blank line followed by indented content (any number of 74*685560ebSAndreas Gohr * intervening blank lines tolerated via the lookahead). 75*685560ebSAndreas Gohr * 4. A blank line followed by a next marker (same multi-blank 76*685560ebSAndreas Gohr * tolerance as alt 3). 77*685560ebSAndreas Gohr * 78*685560ebSAndreas Gohr * The block ends naturally when none of the alternatives match — for 79*685560ebSAndreas Gohr * example a column-0 non-marker line, or two-or-more blank lines 80*685560ebSAndreas Gohr * followed by non-list content. 81*685560ebSAndreas Gohr * 82*685560ebSAndreas Gohr * @inheritdoc 83*685560ebSAndreas Gohr */ 84*685560ebSAndreas Gohr public function connectTo($mode) 85*685560ebSAndreas Gohr { 86*685560ebSAndreas Gohr $pattern = 87*685560ebSAndreas Gohr '\n[ \t]*' . self::MARKER . '(?:[ \t][^\n]*|(?=\n))' . 88*685560ebSAndreas Gohr '(?:' . 89*685560ebSAndreas Gohr '\n[ \t]*' . self::MARKER . '(?:[ \t][^\n]*|(?=\n))' . 90*685560ebSAndreas Gohr '|' . '\n[ \t]{2,}\S[^\n]*' . 91*685560ebSAndreas Gohr '|' . '\n[ \t]*(?=(?:\n[ \t]*)*\n[ \t]{2,}\S)' . 92*685560ebSAndreas Gohr '|' . '\n[ \t]*(?=(?:\n[ \t]*)*\n[ \t]*' . self::MARKER . ')' . 93*685560ebSAndreas Gohr ')*'; 94*685560ebSAndreas Gohr $this->Lexer->addSpecialPattern($pattern, $mode, 'gfm_listblock'); 95*685560ebSAndreas Gohr } 96*685560ebSAndreas Gohr 97*685560ebSAndreas Gohr /** 98*685560ebSAndreas Gohr * Convert the captured block into handler calls. 99*685560ebSAndreas Gohr * 100*685560ebSAndreas Gohr * Sequence: 101*685560ebSAndreas Gohr * 1. parseItems() splits the captured text into per-item records. 102*685560ebSAndreas Gohr * 2. Install GfmLists as a CallWriter rewriter on the main handler. 103*685560ebSAndreas Gohr * 3. Emit list_open carrying the first item's marker — the rewriter's 104*685560ebSAndreas Gohr * handleListOpen opens the `<ul>`/`<ol>` and the first `<li>`. 105*685560ebSAndreas Gohr * 4. For each item: 106*685560ebSAndreas Gohr * - If not the first, emit list_item (closes the previous `<li>` 107*685560ebSAndreas Gohr * and opens a new one in the rewriter). 108*685560ebSAndreas Gohr * - Sub-parse the dedented item body via the cached sub-parser. 109*685560ebSAndreas Gohr * - Filter document_start/end and the outer p_open/p_close pair 110*685560ebSAndreas Gohr * for tight items (single paragraph). 111*685560ebSAndreas Gohr * - Wrap the filtered calls in a Nest so the main handler's 112*685560ebSAndreas Gohr * Block rewriter treats them as opaque. 113*685560ebSAndreas Gohr * 5. Emit list_close and finalise the GfmLists rewriter. 114*685560ebSAndreas Gohr * 115*685560ebSAndreas Gohr * @inheritdoc 116*685560ebSAndreas Gohr */ 117*685560ebSAndreas Gohr public function handle($match, $state, $pos, Handler $handler) 118*685560ebSAndreas Gohr { 119*685560ebSAndreas Gohr $items = $this->parseItems($match); 120*685560ebSAndreas Gohr if (empty($items)) { 121*685560ebSAndreas Gohr $handler->addCall('cdata', [$match], $pos); 122*685560ebSAndreas Gohr return true; 123*685560ebSAndreas Gohr } 124*685560ebSAndreas Gohr 125*685560ebSAndreas Gohr $handler->setCallWriter(new GfmLists($handler->getCallWriter())); 126*685560ebSAndreas Gohr $handler->addCall('list_open', [$items[0]['markerMatch']], $pos); 127*685560ebSAndreas Gohr 128*685560ebSAndreas Gohr $subParser = ModeRegistry::getInstance() 129*685560ebSAndreas Gohr ->getSubParser([ModeRegistry::CATEGORY_BASEONLY], ['gfm_listblock']); 130*685560ebSAndreas Gohr $subHandler = $subParser->getHandler(); 131*685560ebSAndreas Gohr 132*685560ebSAndreas Gohr foreach ($items as $i => $item) { 133*685560ebSAndreas Gohr $itemPos = $pos + $item['offset']; 134*685560ebSAndreas Gohr if ($i > 0) { 135*685560ebSAndreas Gohr $handler->addCall('list_item', [$item['markerMatch']], $itemPos); 136*685560ebSAndreas Gohr } 137*685560ebSAndreas Gohr 138*685560ebSAndreas Gohr $subHandler->reset(); 139*685560ebSAndreas Gohr $subParser->parse($item['body']); 140*685560ebSAndreas Gohr $itemCalls = $this->filterSubCalls($subHandler->calls); 141*685560ebSAndreas Gohr if (empty($itemCalls)) continue; // empty item — nothing to emit 142*685560ebSAndreas Gohr 143*685560ebSAndreas Gohr // Wrap the item content in a Nest so the main handler's Block 144*685560ebSAndreas Gohr // rewriter does not double-wrap our already-paragraphed content. 145*685560ebSAndreas Gohr // Block treats `nest` as opaque and the renderer base class 146*685560ebSAndreas Gohr // unwraps it transparently, the same pattern Footnote uses. 147*685560ebSAndreas Gohr $outer = $handler->getCallWriter(); 148*685560ebSAndreas Gohr $nest = new Nest($outer); 149*685560ebSAndreas Gohr $handler->setCallWriter($nest); 150*685560ebSAndreas Gohr foreach ($itemCalls as $call) { 151*685560ebSAndreas Gohr // sub-handler positions are relative to the item body; offset 152*685560ebSAndreas Gohr // them back into the source so section-edit anchors work. 153*685560ebSAndreas Gohr $handler->addCall($call[0], $call[1], $itemPos + $call[2]); 154*685560ebSAndreas Gohr } 155*685560ebSAndreas Gohr $handler->setCallWriter($nest->process()); 156*685560ebSAndreas Gohr } 157*685560ebSAndreas Gohr 158*685560ebSAndreas Gohr $handler->addCall('list_close', [], $pos + strlen($match)); 159*685560ebSAndreas Gohr $reWriter = $handler->getCallWriter(); 160*685560ebSAndreas Gohr $handler->setCallWriter($reWriter->process()); 161*685560ebSAndreas Gohr 162*685560ebSAndreas Gohr return true; 163*685560ebSAndreas Gohr } 164*685560ebSAndreas Gohr 165*685560ebSAndreas Gohr /** 166*685560ebSAndreas Gohr * Walk the captured block, grouping lines into items. 167*685560ebSAndreas Gohr * 168*685560ebSAndreas Gohr * Each returned item describes one list_item: its marker (in the form 169*685560ebSAndreas Gohr * "\n{indent}{marker}" so GfmLists::interpretSyntax can parse it), the 170*685560ebSAndreas Gohr * dedented body, dedent column, and absolute offset within $match. 171*685560ebSAndreas Gohr * 172*685560ebSAndreas Gohr * Lines are classified as marker / continuation / blank. A marker line 173*685560ebSAndreas Gohr * starts a new item; continuation and blank lines accumulate into the 174*685560ebSAndreas Gohr * current item's body. Continuation lines are dedented by up to 175*685560ebSAndreas Gohr * indent + marker_width + 1 leading spaces (the item's content column 176*685560ebSAndreas Gohr * for single-space-after-marker cases). Blank lines are kept as empty 177*685560ebSAndreas Gohr * body lines while they're in the middle of the body and stripped 178*685560ebSAndreas Gohr * from the trailing edge by joinBody() so single-paragraph items 179*685560ebSAndreas Gohr * parse tight. 180*685560ebSAndreas Gohr * 181*685560ebSAndreas Gohr * @param string $match the raw special-pattern match (starts with \n) 182*685560ebSAndreas Gohr * @return array<int, array{markerMatch: string, dedent: int, body: string, offset: int}> 183*685560ebSAndreas Gohr */ 184*685560ebSAndreas Gohr protected function parseItems($match) 185*685560ebSAndreas Gohr { 186*685560ebSAndreas Gohr $stripped = ltrim($match, "\n"); 187*685560ebSAndreas Gohr $offsetBase = strlen($match) - strlen($stripped); 188*685560ebSAndreas Gohr $lines = explode("\n", $stripped); 189*685560ebSAndreas Gohr 190*685560ebSAndreas Gohr $items = []; 191*685560ebSAndreas Gohr $current = null; 192*685560ebSAndreas Gohr $bodyLines = []; 193*685560ebSAndreas Gohr $cursor = $offsetBase; 194*685560ebSAndreas Gohr 195*685560ebSAndreas Gohr foreach ($lines as $line) { 196*685560ebSAndreas Gohr $isMarker = preg_match( 197*685560ebSAndreas Gohr '/^([ \t]*)(' . self::MARKER . ')(?:[ \t](.*)|$)/', 198*685560ebSAndreas Gohr $line, 199*685560ebSAndreas Gohr $m 200*685560ebSAndreas Gohr ); 201*685560ebSAndreas Gohr 202*685560ebSAndreas Gohr if ($isMarker) { 203*685560ebSAndreas Gohr if ($current !== null) { 204*685560ebSAndreas Gohr $current['body'] = $this->joinBody($bodyLines); 205*685560ebSAndreas Gohr $items[] = $current; 206*685560ebSAndreas Gohr } 207*685560ebSAndreas Gohr $indent = str_replace("\t", " ", $m[1]); 208*685560ebSAndreas Gohr $marker = $m[2]; 209*685560ebSAndreas Gohr $firstLine = $m[3] ?? ''; 210*685560ebSAndreas Gohr $current = [ 211*685560ebSAndreas Gohr 'markerMatch' => "\n" . $indent . $marker, 212*685560ebSAndreas Gohr 'dedent' => strlen($indent) + strlen($marker) + 1, 213*685560ebSAndreas Gohr 'offset' => $cursor, 214*685560ebSAndreas Gohr ]; 215*685560ebSAndreas Gohr $bodyLines = [$firstLine]; 216*685560ebSAndreas Gohr } elseif ($current !== null) { 217*685560ebSAndreas Gohr if (trim($line) === '') { 218*685560ebSAndreas Gohr $bodyLines[] = ''; 219*685560ebSAndreas Gohr } else { 220*685560ebSAndreas Gohr $expanded = str_replace("\t", " ", $line); 221*685560ebSAndreas Gohr $available = strlen($expanded) - strlen(ltrim($expanded, ' ')); 222*685560ebSAndreas Gohr $strip = min($current['dedent'], $available); 223*685560ebSAndreas Gohr $bodyLines[] = substr($expanded, $strip); 224*685560ebSAndreas Gohr } 225*685560ebSAndreas Gohr } 226*685560ebSAndreas Gohr 227*685560ebSAndreas Gohr $cursor += strlen($line) + 1; // +1 for the \n consumed by explode 228*685560ebSAndreas Gohr } 229*685560ebSAndreas Gohr 230*685560ebSAndreas Gohr if ($current !== null) { 231*685560ebSAndreas Gohr $current['body'] = $this->joinBody($bodyLines); 232*685560ebSAndreas Gohr $items[] = $current; 233*685560ebSAndreas Gohr } 234*685560ebSAndreas Gohr 235*685560ebSAndreas Gohr return $items; 236*685560ebSAndreas Gohr } 237*685560ebSAndreas Gohr 238*685560ebSAndreas Gohr /** 239*685560ebSAndreas Gohr * Join body lines into a string, trimming trailing blank lines. 240*685560ebSAndreas Gohr * 241*685560ebSAndreas Gohr * Trailing blanks would reach the sub-parser and cause Block to wrap 242*685560ebSAndreas Gohr * the otherwise-single paragraph content in `p_open`/`p_close`, 243*685560ebSAndreas Gohr * forcing a tight item into loose-item shape. Stripping them here 244*685560ebSAndreas Gohr * preserves the tight rendering for items that look tight in source. 245*685560ebSAndreas Gohr * 246*685560ebSAndreas Gohr * @param string[] $lines 247*685560ebSAndreas Gohr */ 248*685560ebSAndreas Gohr protected function joinBody(array $lines): string 249*685560ebSAndreas Gohr { 250*685560ebSAndreas Gohr return rtrim(implode("\n", $lines), "\n"); 251*685560ebSAndreas Gohr } 252*685560ebSAndreas Gohr 253*685560ebSAndreas Gohr /** 254*685560ebSAndreas Gohr * Filter the sub-parser's flat call list before nest-wrapping it. 255*685560ebSAndreas Gohr * 256*685560ebSAndreas Gohr * Drops `document_start` / `document_end` (always emitted by 257*685560ebSAndreas Gohr * Handler::finalize), and strips the outer `p_open` / `p_close` pair 258*685560ebSAndreas Gohr * for tight items so their content sits inline inside `<li>`. Loose 259*685560ebSAndreas Gohr * items (multiple paragraphs, more than one `p_open`) keep their 260*685560ebSAndreas Gohr * inner pairs untouched. The filtered calls are then wrapped in a 261*685560ebSAndreas Gohr * Nest by handle() before they reach the GfmLists rewriter. 262*685560ebSAndreas Gohr * 263*685560ebSAndreas Gohr * @param array $calls 264*685560ebSAndreas Gohr * @return array 265*685560ebSAndreas Gohr */ 266*685560ebSAndreas Gohr protected function filterSubCalls(array $calls) 267*685560ebSAndreas Gohr { 268*685560ebSAndreas Gohr if ($calls && $calls[0][0] === 'document_start') array_shift($calls); 269*685560ebSAndreas Gohr if ($calls && end($calls)[0] === 'document_end') array_pop($calls); 270*685560ebSAndreas Gohr 271*685560ebSAndreas Gohr $pCount = 0; 272*685560ebSAndreas Gohr foreach ($calls as $c) { 273*685560ebSAndreas Gohr if ($c[0] === 'p_open') $pCount++; 274*685560ebSAndreas Gohr } 275*685560ebSAndreas Gohr 276*685560ebSAndreas Gohr if ($pCount === 1 277*685560ebSAndreas Gohr && $calls[0][0] === 'p_open' 278*685560ebSAndreas Gohr && end($calls)[0] === 'p_close') { 279*685560ebSAndreas Gohr array_shift($calls); 280*685560ebSAndreas Gohr array_pop($calls); 281*685560ebSAndreas Gohr } 282*685560ebSAndreas Gohr 283*685560ebSAndreas Gohr return $calls; 284*685560ebSAndreas Gohr } 285*685560ebSAndreas Gohr} 286