1*309a0852SAndreas Gohr<?php 2*309a0852SAndreas Gohr 3*309a0852SAndreas Gohrnamespace dokuwiki\Parsing\ParserMode; 4*309a0852SAndreas Gohr 5*309a0852SAndreas Gohruse dokuwiki\Parsing\Handler; 6*309a0852SAndreas Gohruse dokuwiki\Parsing\Handler\Nest; 7*309a0852SAndreas Gohruse dokuwiki\Parsing\ModeRegistry; 8*309a0852SAndreas Gohr 9*309a0852SAndreas Gohr/** 10*309a0852SAndreas Gohr * Block quotes — single mode covering both DokuWiki and GFM dialects. 11*309a0852SAndreas Gohr * 12*309a0852SAndreas Gohr * Captures one or more consecutive column-0 `>`-prefixed lines via 13*309a0852SAndreas Gohr * addSpecialPattern. Nesting is resolved at this level by counting 14*309a0852SAndreas Gohr * leading `>` markers per line and emitting `quote_open` / `quote_close` 15*309a0852SAndreas Gohr * pairs around per-depth body segments — sub-parser recursion is 16*309a0852SAndreas Gohr * deliberately not used because each sub-parser invocation needs its 17*309a0852SAndreas Gohr * own Handler instance and threading the nesting through the registry 18*309a0852SAndreas Gohr * pool would only buy us back what depth-walking already provides. 19*309a0852SAndreas Gohr * 20*309a0852SAndreas Gohr * Each per-depth segment's body is sub-parsed via 21*309a0852SAndreas Gohr * ModeRegistry::withSubParser() so block content (lists, fenced code, 22*309a0852SAndreas Gohr * tables) works inside the body. The sub-parser excludes BASEONLY so 23*309a0852SAndreas Gohr * headers do not fire inside a blockquote — same rationale as 24*309a0852SAndreas Gohr * GfmListblock: header instructions drive TOC entries, section-edit 25*309a0852SAndreas Gohr * anchors, and section_open/section_close ranges that don't compose 26*309a0852SAndreas Gohr * with a `<blockquote>` container. The sub-parser also excludes 27*309a0852SAndreas Gohr * gfm_quote itself; nesting is handled at this level, not via 28*309a0852SAndreas Gohr * sub-parser recursion. When a list inside a quote re-fires gfm_quote 29*309a0852SAndreas Gohr * during the list-item sub-parse, the registry's pool hands the 30*309a0852SAndreas Gohr * inner call a different parser instance for the same exclusion key, 31*309a0852SAndreas Gohr * so the outer parse state is not corrupted. 32*309a0852SAndreas Gohr * 33*309a0852SAndreas Gohr * Lazy continuation is deliberately not supported. Every quote line 34*309a0852SAndreas Gohr * must begin with `>` at column 0; the first non-`>` line ends the 35*309a0852SAndreas Gohr * quote. This matches the policy GfmListblock enforces for lists — 36*309a0852SAndreas Gohr * markers required on every line. Trade-off: a few CommonMark 37*309a0852SAndreas Gohr * blockquote spec examples that rely on lazy continuation stay red, 38*309a0852SAndreas Gohr * but the parser stays single-pass and predictable. 39*309a0852SAndreas Gohr * 40*309a0852SAndreas Gohr * Rendering shape depends on syntax preference. Under MD-preferred 41*309a0852SAndreas Gohr * (`markdown`, `md+dw`) the sub-parser's paragraph wrapping survives: 42*309a0852SAndreas Gohr * a quote with one paragraph emits `<blockquote><p>...</p></blockquote>`. 43*309a0852SAndreas Gohr * Under DW-preferred (`dokuwiki`, `dw+md`) a post-pass flattens 44*309a0852SAndreas Gohr * paragraph wrapping into explicit `linebreak` calls so existing DW 45*309a0852SAndreas Gohr * pages keep their `<blockquote>...line1<br/>line2...</blockquote>` 46*309a0852SAndreas Gohr * rendering. Same `quote_open` / `quote_close` instructions in both 47*309a0852SAndreas Gohr * modes — no renderer change required. 48*309a0852SAndreas Gohr */ 49*309a0852SAndreas Gohrclass GfmQuote extends AbstractMode 50*309a0852SAndreas Gohr{ 51*309a0852SAndreas Gohr /** @inheritdoc */ 52*309a0852SAndreas Gohr public function getSort() 53*309a0852SAndreas Gohr { 54*309a0852SAndreas Gohr return 220; 55*309a0852SAndreas Gohr } 56*309a0852SAndreas Gohr 57*309a0852SAndreas Gohr /** @inheritdoc */ 58*309a0852SAndreas Gohr public function preConnect() 59*309a0852SAndreas Gohr { 60*309a0852SAndreas Gohr ModeRegistry::getInstance()->registerBlockEolMode('gfm_quote'); 61*309a0852SAndreas Gohr } 62*309a0852SAndreas Gohr 63*309a0852SAndreas Gohr /** 64*309a0852SAndreas Gohr * Capture an entire blockquote in one match. 65*309a0852SAndreas Gohr * 66*309a0852SAndreas Gohr * The pattern requires a column-0 `>` on every line. The first 67*309a0852SAndreas Gohr * non-`>` line ends the capture (no lazy continuation). A bare `>` 68*309a0852SAndreas Gohr * with no body is valid — it represents an empty paragraph break 69*309a0852SAndreas Gohr * inside the quote (spec 240) or an empty quote (spec 239). 70*309a0852SAndreas Gohr * 71*309a0852SAndreas Gohr * @param string $mode the lexer state name to wire the pattern into 72*309a0852SAndreas Gohr */ 73*309a0852SAndreas Gohr public function connectTo($mode) 74*309a0852SAndreas Gohr { 75*309a0852SAndreas Gohr $this->Lexer->addSpecialPattern('\n>[^\n]*(?:\n>[^\n]*)*', $mode, 'gfm_quote'); 76*309a0852SAndreas Gohr } 77*309a0852SAndreas Gohr 78*309a0852SAndreas Gohr /** @inheritdoc */ 79*309a0852SAndreas Gohr public function handle($match, $state, $pos, Handler $handler) 80*309a0852SAndreas Gohr { 81*309a0852SAndreas Gohr $stripped = ltrim($match, "\n"); 82*309a0852SAndreas Gohr $cursor = strlen($match) - strlen($stripped); 83*309a0852SAndreas Gohr 84*309a0852SAndreas Gohr $parsed = []; 85*309a0852SAndreas Gohr foreach (explode("\n", $stripped) as $line) { 86*309a0852SAndreas Gohr $parsed[] = $this->parseLine($line, $pos + $cursor); 87*309a0852SAndreas Gohr $cursor += strlen($line) + 1; // +1 for the \n consumed by explode 88*309a0852SAndreas Gohr } 89*309a0852SAndreas Gohr 90*309a0852SAndreas Gohr $currentDepth = 0; 91*309a0852SAndreas Gohr $buffer = []; 92*309a0852SAndreas Gohr $segmentStart = $pos; 93*309a0852SAndreas Gohr 94*309a0852SAndreas Gohr foreach ($parsed as $p) { 95*309a0852SAndreas Gohr if ($p['depth'] !== $currentDepth) { 96*309a0852SAndreas Gohr if ($buffer) { 97*309a0852SAndreas Gohr $this->emitBody($handler, $segmentStart, implode("\n", $buffer)); 98*309a0852SAndreas Gohr $buffer = []; 99*309a0852SAndreas Gohr } 100*309a0852SAndreas Gohr while ($currentDepth < $p['depth']) { 101*309a0852SAndreas Gohr $handler->addCall('quote_open', [], $pos); 102*309a0852SAndreas Gohr $currentDepth++; 103*309a0852SAndreas Gohr } 104*309a0852SAndreas Gohr while ($currentDepth > $p['depth']) { 105*309a0852SAndreas Gohr $handler->addCall('quote_close', [], $pos); 106*309a0852SAndreas Gohr $currentDepth--; 107*309a0852SAndreas Gohr } 108*309a0852SAndreas Gohr } 109*309a0852SAndreas Gohr if (!$buffer) $segmentStart = $p['offset']; 110*309a0852SAndreas Gohr $buffer[] = $p['content']; 111*309a0852SAndreas Gohr } 112*309a0852SAndreas Gohr 113*309a0852SAndreas Gohr if ($buffer) { 114*309a0852SAndreas Gohr $this->emitBody($handler, $segmentStart, implode("\n", $buffer)); 115*309a0852SAndreas Gohr } 116*309a0852SAndreas Gohr while ($currentDepth > 0) { 117*309a0852SAndreas Gohr $handler->addCall('quote_close', [], $pos + strlen($match)); 118*309a0852SAndreas Gohr $currentDepth--; 119*309a0852SAndreas Gohr } 120*309a0852SAndreas Gohr 121*309a0852SAndreas Gohr return true; 122*309a0852SAndreas Gohr } 123*309a0852SAndreas Gohr 124*309a0852SAndreas Gohr /** 125*309a0852SAndreas Gohr * Parse one captured line into depth, content, and content offset. 126*309a0852SAndreas Gohr * 127*309a0852SAndreas Gohr * Counts leading `>` characters (each consuming one optional 128*309a0852SAndreas Gohr * trailing space) to compute the depth. The remainder of the line 129*309a0852SAndreas Gohr * is the content for that depth. The returned `offset` is the 130*309a0852SAndreas Gohr * absolute byte position of the content's first character within 131*309a0852SAndreas Gohr * the source (`$lineStart` plus the length of the consumed marker 132*309a0852SAndreas Gohr * prefix). 133*309a0852SAndreas Gohr * 134*309a0852SAndreas Gohr * `> > foo` → depth 2, content `foo`. `>>foo` → depth 2, content 135*309a0852SAndreas Gohr * `foo`. `>` alone → depth 1, content empty. 136*309a0852SAndreas Gohr * 137*309a0852SAndreas Gohr * @param string $line one line of captured blockquote text, with 138*309a0852SAndreas Gohr * no surrounding newlines 139*309a0852SAndreas Gohr * @param int $lineStart absolute byte offset of the line's first 140*309a0852SAndreas Gohr * character within the source 141*309a0852SAndreas Gohr * @return array{depth: int, content: string, offset: int} 142*309a0852SAndreas Gohr */ 143*309a0852SAndreas Gohr protected function parseLine(string $line, int $lineStart): array 144*309a0852SAndreas Gohr { 145*309a0852SAndreas Gohr $depth = 0; 146*309a0852SAndreas Gohr $i = 0; 147*309a0852SAndreas Gohr $len = strlen($line); 148*309a0852SAndreas Gohr while ($i < $len && $line[$i] === '>') { 149*309a0852SAndreas Gohr $depth++; 150*309a0852SAndreas Gohr $i++; 151*309a0852SAndreas Gohr if ($i < $len && $line[$i] === ' ') $i++; 152*309a0852SAndreas Gohr } 153*309a0852SAndreas Gohr return [ 154*309a0852SAndreas Gohr 'depth' => $depth, 155*309a0852SAndreas Gohr 'content' => substr($line, $i), 156*309a0852SAndreas Gohr 'offset' => $lineStart + $i, 157*309a0852SAndreas Gohr ]; 158*309a0852SAndreas Gohr } 159*309a0852SAndreas Gohr 160*309a0852SAndreas Gohr /** 161*309a0852SAndreas Gohr * Sub-parse a body segment and emit its calls inside a Nest. 162*309a0852SAndreas Gohr * 163*309a0852SAndreas Gohr * Drops `document_start` / `document_end` from the sub-parser 164*309a0852SAndreas Gohr * output. Under DW-preferred syntax, also runs the linebreak 165*309a0852SAndreas Gohr * post-pass so paragraph wrapping is flattened into explicit 166*309a0852SAndreas Gohr * `linebreak` calls. Empty bodies emit nothing. 167*309a0852SAndreas Gohr * 168*309a0852SAndreas Gohr * `$segmentStart` is the absolute byte offset of the segment's 169*309a0852SAndreas Gohr * first content character within the source. Sub-handler positions 170*309a0852SAndreas Gohr * are relative to the sub-parsed body, which begins at the first 171*309a0852SAndreas Gohr * line of the segment, so adding `$segmentStart` to each 172*309a0852SAndreas Gohr * sub-handler position lands the call back on the right byte in 173*309a0852SAndreas Gohr * the source. Lines after the first drift slightly because the 174*309a0852SAndreas Gohr * `>[ ]?` prefix between source lines collapses to a single `\n` 175*309a0852SAndreas Gohr * in the sub-parsed body — drift is bounded by the prefix length 176*309a0852SAndreas Gohr * (one or two bytes per line skipped). 177*309a0852SAndreas Gohr * 178*309a0852SAndreas Gohr * @param Handler $handler outer handler to emit calls on 179*309a0852SAndreas Gohr * @param int $segmentStart absolute byte offset of the segment's 180*309a0852SAndreas Gohr * first content character within the source 181*309a0852SAndreas Gohr * @param string $body concatenated content of the buffered lines, 182*309a0852SAndreas Gohr * separated by `\n` 183*309a0852SAndreas Gohr */ 184*309a0852SAndreas Gohr protected function emitBody(Handler $handler, int $segmentStart, string $body): void 185*309a0852SAndreas Gohr { 186*309a0852SAndreas Gohr $registry = ModeRegistry::getInstance(); 187*309a0852SAndreas Gohr $calls = $registry->withSubParser( 188*309a0852SAndreas Gohr [ModeRegistry::CATEGORY_BASEONLY], 189*309a0852SAndreas Gohr ['gfm_quote'], 190*309a0852SAndreas Gohr static function ($subParser) use ($body) { 191*309a0852SAndreas Gohr $subParser->getHandler()->reset(); 192*309a0852SAndreas Gohr $subParser->parse($body); 193*309a0852SAndreas Gohr return $subParser->getHandler()->calls; 194*309a0852SAndreas Gohr } 195*309a0852SAndreas Gohr ); 196*309a0852SAndreas Gohr 197*309a0852SAndreas Gohr if ($calls && $calls[0][0] === 'document_start') array_shift($calls); 198*309a0852SAndreas Gohr if ($calls && end($calls)[0] === 'document_end') array_pop($calls); 199*309a0852SAndreas Gohr 200*309a0852SAndreas Gohr if ($registry->isDwPreferred()) { 201*309a0852SAndreas Gohr $calls = $this->flattenForDwRendering($calls); 202*309a0852SAndreas Gohr } 203*309a0852SAndreas Gohr 204*309a0852SAndreas Gohr if (!$calls) return; 205*309a0852SAndreas Gohr 206*309a0852SAndreas Gohr $outer = $handler->getCallWriter(); 207*309a0852SAndreas Gohr $nest = new Nest($outer); 208*309a0852SAndreas Gohr $handler->setCallWriter($nest); 209*309a0852SAndreas Gohr foreach ($calls as $call) { 210*309a0852SAndreas Gohr $handler->addCall($call[0], $call[1], $segmentStart + $call[2]); 211*309a0852SAndreas Gohr } 212*309a0852SAndreas Gohr $handler->setCallWriter($nest->process()); 213*309a0852SAndreas Gohr } 214*309a0852SAndreas Gohr 215*309a0852SAndreas Gohr /** 216*309a0852SAndreas Gohr * Flatten paragraph structure into linebreak-separated cdata. 217*309a0852SAndreas Gohr * 218*309a0852SAndreas Gohr * DW Quote historically rendered each `>`-line as a separate visible 219*309a0852SAndreas Gohr * line via an explicit `<br/>` between same-depth markers. To 220*309a0852SAndreas Gohr * preserve that rendering for DW-preferred installs, this pass: 221*309a0852SAndreas Gohr * 222*309a0852SAndreas Gohr * 1. Replaces every `p_open` and `p_close` with a `linebreak` 223*309a0852SAndreas Gohr * call. After this, paragraph boundaries become two adjacent 224*309a0852SAndreas Gohr * linebreaks (the close-of-prev plus the open-of-next), which 225*309a0852SAndreas Gohr * matches the DW two-`<br/>`-for-blank-line shape. 226*309a0852SAndreas Gohr * 2. Drops the first and last `linebreak` calls so the run starts 227*309a0852SAndreas Gohr * and ends with content, not break markers. 228*309a0852SAndreas Gohr * 3. Splits any `cdata` containing `\n` into multiple `cdata` 229*309a0852SAndreas Gohr * calls separated by `linebreak` — sub-parsed paragraphs may 230*309a0852SAndreas Gohr * contain soft breaks that a renderer would otherwise collapse 231*309a0852SAndreas Gohr * to a single space. 232*309a0852SAndreas Gohr * 233*309a0852SAndreas Gohr * Block-level calls inside the body (list_open from a list inside 234*309a0852SAndreas Gohr * a quote, code, etc.) are passed through unchanged. 235*309a0852SAndreas Gohr * 236*309a0852SAndreas Gohr * @param array $calls sub-parsed call list to flatten 237*309a0852SAndreas Gohr * @return array the flattened call list 238*309a0852SAndreas Gohr */ 239*309a0852SAndreas Gohr protected function flattenForDwRendering(array $calls): array 240*309a0852SAndreas Gohr { 241*309a0852SAndreas Gohr $stage = []; 242*309a0852SAndreas Gohr foreach ($calls as $call) { 243*309a0852SAndreas Gohr if ($call[0] === 'p_open' || $call[0] === 'p_close') { 244*309a0852SAndreas Gohr $stage[] = ['linebreak', [], $call[2]]; 245*309a0852SAndreas Gohr } else { 246*309a0852SAndreas Gohr $stage[] = $call; 247*309a0852SAndreas Gohr } 248*309a0852SAndreas Gohr } 249*309a0852SAndreas Gohr 250*309a0852SAndreas Gohr while ($stage && $stage[0][0] === 'linebreak') array_shift($stage); 251*309a0852SAndreas Gohr while ($stage && end($stage)[0] === 'linebreak') array_pop($stage); 252*309a0852SAndreas Gohr 253*309a0852SAndreas Gohr $out = []; 254*309a0852SAndreas Gohr foreach ($stage as $call) { 255*309a0852SAndreas Gohr if ($call[0] === 'cdata' && str_contains($call[1][0], "\n")) { 256*309a0852SAndreas Gohr $parts = explode("\n", $call[1][0]); 257*309a0852SAndreas Gohr foreach ($parts as $i => $part) { 258*309a0852SAndreas Gohr if ($i > 0) $out[] = ['linebreak', [], $call[2]]; 259*309a0852SAndreas Gohr if ($part !== '') $out[] = ['cdata', [$part], $call[2]]; 260*309a0852SAndreas Gohr } 261*309a0852SAndreas Gohr } else { 262*309a0852SAndreas Gohr $out[] = $call; 263*309a0852SAndreas Gohr } 264*309a0852SAndreas Gohr } 265*309a0852SAndreas Gohr 266*309a0852SAndreas Gohr return $out; 267*309a0852SAndreas Gohr } 268*309a0852SAndreas Gohr} 269