1<?php 2 3declare(strict_types=1); 4 5/** 6 * Converts Markdown content to DokuWiki syntax. 7 * 8 * This class processes Markdown line by line, maintaining state for 9 * code blocks, tables, lists (with nesting), and paragraphs. It supports: 10 * - Headers (levels 1-6) 11 * - Bold, italic, inline code 12 * - Links and images 13 * - Unordered and ordered lists (with indentation) 14 * - Tables (with alignment detection for headers) 15 * - Code blocks (```) 16 * - Blockquotes (simple) 17 * - Horizontal rules 18 * 19 * @license GPL 3 http://www.gnu.org/licenses/gpl-3.0.html 20 * @author sioc-de-narf 21 */ 22class MarkdownToDokuWikiConverter 23{ 24 /** @var bool Whether we are currently inside a code block */ 25 private bool $inCodeBlock = false; 26 27 /** @var bool Whether we are currently inside a table */ 28 private bool $inTable = false; 29 30 /** @var array<int, array<int, string>> Rows of the current table */ 31 private array $tableRows = []; 32 33 /** @var array<int, string> Alignments for each column of the current table */ 34 private array $tableAlignments = []; 35 36 /** @var array<int, array{indent: int, type: string}> Stack tracking list nesting (indentation and type) */ 37 private array $listStack = []; 38 39 /** @var array<int, string> Buffer for paragraph lines before they are flushed */ 40 private array $paragraphBuffer = []; 41 42 /** 43 * Remove YAML front matter from the beginning of the document. 44 * 45 * Detects a block starting with '---' at the very first line, 46 * followed by any lines, and ending with '---' or '...'. 47 * If such a block is found, it is stripped. 48 * 49 * @param string $markdown The raw Markdown. 50 * @return string Markdown without the front matter. 51 */ 52 private function stripYamlFrontMatter(string $markdown): string 53 { 54 $lines = explode("\n", $markdown); 55 if (count($lines) === 0) { 56 return $markdown; 57 } 58 59 // Trim leading empty lines to find the first non-empty line 60 $firstNonEmpty = 0; 61 while ($firstNonEmpty < count($lines) && trim($lines[$firstNonEmpty]) === '') { 62 $firstNonEmpty++; 63 } 64 65 // If the first non-empty line is exactly '---', we have a front matter candidate 66 if ($firstNonEmpty < count($lines) && trim($lines[$firstNonEmpty]) === '---') { 67 $endLine = null; 68 // Look for the closing '---' or '...' after the opening 69 for ($i = $firstNonEmpty + 1; $i < count($lines); $i++) { 70 if (trim($lines[$i]) === '---' || trim($lines[$i]) === '...') { 71 $endLine = $i; 72 break; 73 } 74 } 75 // If we found a closing delimiter, remove all lines from start to end (inclusive) 76 if ($endLine !== null) { 77 $lines = array_slice($lines, $endLine + 1); 78 return implode("\n", $lines); 79 } 80 } 81 82 // No front matter detected, return original 83 return $markdown; 84 } 85 86 /** 87 * Convert Markdown to DokuWiki syntax. 88 * 89 * @param string $markdown The input Markdown text. 90 * @return string The converted DokuWiki text. 91 */ 92 public function convert(string $markdown): string 93 { 94 // Strip YAML front matter 95 $markdown = $this->stripYamlFrontMatter($markdown); 96 97 // Normalize line endings and replace tabs with 4 spaces 98 $lines = explode("\n", str_replace(["\r\n", "\r", "\t"], ["\n", "\n", " "], $markdown)); 99 $output = []; 100 $this->reset(); 101 102 $i = 0; 103 while ($i < count($lines)) { 104 $line = $lines[$i]; 105 $nextLine = $i + 1 < count($lines) ? $lines[$i + 1] : null; 106 107 // Code block handling 108 if (str_starts_with(trim($line), '```')) { 109 $this->handleCodeBlock($line, $output); 110 $i++; 111 continue; 112 } 113 if ($this->inCodeBlock) { 114 $output[] = $line; 115 $i++; 116 continue; 117 } 118 119 // Table detection 120 if ($this->isTableStart($line, $nextLine)) { 121 $this->parseTable($lines, $i); 122 $output[] = $this->renderTable(); 123 continue; 124 } 125 126 // Horizontal rule 127 if ($this->isHorizontalRule($line)) { 128 $this->flushParagraph($output); 129 $output[] = '----'; 130 $i++; 131 continue; 132 } 133 134 // Blockquote 135 if ($this->isBlockquote($line)) { 136 $this->flushParagraph($output); 137 $output[] = $this->renderBlockquote($line); 138 $i++; 139 continue; 140 } 141 142 // List item 143 if ($this->isListItem($line)) { 144 $this->handleList($line, $output); 145 $i++; 146 continue; 147 } 148 149 // Header 150 if ($this->isTitle($line)) { 151 $this->flushParagraph($output); 152 $output[] = $this->renderTitle($line); 153 $i++; 154 continue; 155 } 156 157 // Empty line 158 if (trim($line) === '') { 159 $this->flushParagraph($output); 160 $output[] = ''; 161 $i++; 162 continue; 163 } 164 165 // Normal paragraph line 166 $this->paragraphBuffer[] = $this->convertInline($line); 167 $i++; 168 } 169 170 $this->flushParagraph($output); 171 $this->closeLists($output); 172 173 return implode("\n", $output); 174 } 175 176 /** 177 * Reset internal state. 178 */ 179 private function reset(): void 180 { 181 $this->inCodeBlock = false; 182 $this->inTable = false; 183 $this->tableRows = []; 184 $this->tableAlignments = []; 185 $this->listStack = []; 186 $this->paragraphBuffer = []; 187 } 188 189 /** 190 * Handle a code block delimiter (```). 191 * 192 * @param string $line The current line. 193 * @param string[] &$output The output array being built. 194 */ 195 private function handleCodeBlock(string $line, array &$output): void 196 { 197 if (!$this->inCodeBlock) { 198 $lang = trim(substr(trim($line), 3)); 199 $output[] = "<code" . ($lang ? " $lang" : "") . ">"; 200 $this->inCodeBlock = true; 201 } else { 202 $output[] = "</code>"; 203 $this->inCodeBlock = false; 204 } 205 } 206 207 /** 208 * Determine if a line starts a Markdown table. 209 * 210 * @param string $line The current line. 211 * @param string|null $nextLine The next line (if any). 212 * @return bool True if a table starts here. 213 */ 214 private function isTableStart(string $line, ?string $nextLine): bool 215 { 216 return strpos($line, '|') !== false && $nextLine && preg_match('/^[\s\|:\-]+$/', $nextLine); 217 } 218 219 /** 220 * Parse a Markdown table from the current position. 221 * 222 * @param string[] $lines The whole array of lines. 223 * @param int &$i Current index (will be advanced to after the table). 224 */ 225 private function parseTable(array $lines, int &$i): void 226 { 227 $headerLine = $lines[$i++]; 228 $separatorLine = $lines[$i++]; 229 230 // Detect column alignments from separator line 231 $this->tableAlignments = array_map( 232 fn($part) => match (true) { 233 str_starts_with(trim($part), ':') && str_ends_with(trim($part), ':') => 'center', 234 str_ends_with(trim($part), ':') => 'right', 235 str_starts_with(trim($part), ':') => 'left', 236 default => 'left', 237 }, 238 explode('|', trim($separatorLine, '|')) 239 ); 240 241 $this->tableRows = [$this->parseTableRow($headerLine)]; 242 while ($i < count($lines) && strpos($lines[$i], '|') !== false && !preg_match('/^[\s\|:\-]+$/', $lines[$i])) { 243 $this->tableRows[] = $this->parseTableRow($lines[$i]); 244 $i++; 245 } 246 } 247 248 /** 249 * Parse a single Markdown table row into an array of cells. 250 * 251 * @param string $line The table row line. 252 * @return string[] Array of cell contents. 253 */ 254 private function parseTableRow(string $line): array 255 { 256 return array_map('trim', explode('|', trim($line, '|'))); 257 } 258 259 /** 260 * Render the parsed table as DokuWiki syntax. 261 * 262 * @return string DokuWiki table representation. 263 */ 264 private function renderTable(): string 265 { 266 $output = []; 267 foreach ($this->tableRows as $rowIndex => $row) { 268 $dokuRow = []; 269 foreach ($row as $colIndex => $cell) { 270 $cell = $this->convertInline($cell); 271 $dokuRow[] = ($rowIndex === 0 ? '^ ' : '| ') . $cell . ($rowIndex === 0 ? ' ^' : ' |'); 272 } 273 $output[] = implode('', $dokuRow); 274 } 275 return implode("\n", $output); 276 } 277 278 /** 279 * Check if a line is a Markdown list item. 280 * 281 * @param string $line The line. 282 * @return bool True if it's a list item. 283 */ 284 private function isListItem(string $line): bool 285 { 286 return preg_match('/^\s*([\*\-\+]|\d+\.)\s/', $line) === 1; 287 } 288 289 /** 290 * Handle a list item line, managing nesting via indentation. 291 * 292 * @param string $line The list item line. 293 * @param string[] &$output The output array. 294 */ 295 private function handleList(string $line, array &$output): void 296 { 297 $this->flushParagraph($output); 298 $indent = $this->calculateIndent($line); 299 $type = preg_match('/^\s*\d+\.\s/', $line) ? 'ordered' : 'unordered'; 300 301 // Close deeper lists if indentation decreased 302 while (!empty($this->listStack) && $indent <= $this->listStack[count($this->listStack) - 1]['indent']) { 303 array_pop($this->listStack); 304 } 305 306 $this->listStack[] = ['indent' => $indent, 'type' => $type]; 307 $dokuIndent = str_repeat(' ', count($this->listStack) - 1); 308 309 // Remove the list marker and any leading spaces, then convert inline 310 $content = $this->convertInline(preg_replace('/^\s*([\*\-\+]|\d+\.)\s+/', '', $line)); 311 $output[] = $dokuIndent . ($type === 'ordered' ? '- ' : '* ') . $content; 312 } 313 314 /** 315 * Calculate the indentation level (number of leading spaces) of a line. 316 * 317 * @param string $line The line. 318 * @return int Number of leading spaces. 319 */ 320 private function calculateIndent(string $line): int 321 { 322 return strlen($line) - strlen(ltrim($line)); 323 } 324 325 /** 326 * Close any remaining open lists (reset stack). 327 * 328 * @param string[] &$output The output array (unused, kept for consistency). 329 */ 330 private function closeLists(array &$output): void 331 { 332 $this->listStack = []; 333 } 334 335 /** 336 * Check if a line is a Markdown header (starts with #). 337 * 338 * @param string $line The line. 339 * @return bool True if it's a header. 340 */ 341 private function isTitle(string $line): bool 342 { 343 return preg_match('/^(#{1,6})\s+(.+)$/', trim($line)) === 1; 344 } 345 346 /** 347 * Render a Markdown header as a DokuWiki header. 348 * 349 * @param string $line The header line. 350 * @return string DokuWiki header. 351 */ 352 private function renderTitle(string $line): string 353 { 354 preg_match('/^(#{1,6})\s+(.+)$/', trim($line), $matches); 355 $level = strlen($matches[1]); 356 $title = trim($matches[2]); 357 $equals = str_repeat('=', 7 - $level); 358 return "$equals $title $equals"; 359 } 360 361 /** 362 * Check if a line is a horizontal rule (three or more -, *, _). 363 * 364 * @param string $line The line. 365 * @return bool True if it's a horizontal rule. 366 */ 367 private function isHorizontalRule(string $line): bool 368 { 369 return preg_match('/^[-*_]{3,}\s*$/', trim($line)) === 1; 370 } 371 372 /** 373 * Check if a line is a blockquote (starts with >). 374 * 375 * @param string $line The line. 376 * @return bool True if it's a blockquote. 377 */ 378 private function isBlockquote(string $line): bool 379 { 380 return str_starts_with(ltrim($line), '>'); 381 } 382 383 /** 384 * Render a blockquote line. 385 * 386 * @param string $line The blockquote line. 387 * @return string DokuWiki blockquote (>> ...). 388 */ 389 private function renderBlockquote(string $line): string 390 { 391 // Remove leading '>' and any following space, then convert inline 392 return '>> ' . $this->convertInline(substr(ltrim($line), 1)); 393 } 394 395 /** 396 * Convert inline Markdown formatting to DokuWiki. 397 * 398 * Handles bold, italic, inline code, images, and links. 399 * 400 * @param string $text The text to convert. 401 * @return string Converted text. 402 */ 403 private function convertInline(string $text): string 404 { 405 // Bold: **text** or __text__ → **text** (same in DokuWiki) 406 $text = preg_replace('/\*\*(.+?)\*\*/', '**$1**', $text); 407 $text = preg_replace('/__(.+?)__/', '**$1**', $text); 408 409 // Italic: *text* or _text_ → //text// 410 $text = preg_replace('/\*(.+?)\*/', '//$1//', $text); 411 $text = preg_replace('/_(.+?)_/', '//$1//', $text); 412 413 // Inline code: `code` → ''code'' 414 $text = preg_replace('/`(.+?)`/', "''$1''", $text); 415 416 // Images:  → {{url|alt}} 417 $text = preg_replace('/!\[([^\]]*)\]\(([^)]+)\)/', '{{$2|$1}}', $text); 418 419 // Links: [text](url) → [[url|text]] 420 $text = preg_replace('/\[([^\]]+)\]\(([^)]+)\)/', '[[$2|$1]]', $text); 421 422 return $text; 423 } 424 425 /** 426 * Flush any buffered paragraph lines to the output. 427 * 428 * @param string[] &$output The output array. 429 */ 430 private function flushParagraph(array &$output): void 431 { 432 if (!empty($this->paragraphBuffer)) { 433 $output[] = implode(' ', $this->paragraphBuffer); 434 $this->paragraphBuffer = []; 435 } 436 } 437} 438