1*1e28e406SAndreas Gohr<?php 2*1e28e406SAndreas Gohr 3*1e28e406SAndreas Gohrnamespace dokuwiki\Parsing\Helpers; 4*1e28e406SAndreas Gohr 5*1e28e406SAndreas Gohr/** 6*1e28e406SAndreas Gohr * Pure helpers for parsing code / file attribute blocks. 7*1e28e406SAndreas Gohr * 8*1e28e406SAndreas Gohr * Side-effect-free: returns data and leaves handler emission to the 9*1e28e406SAndreas Gohr * caller. Shared by DokuWiki's Code / File modes and GfmCode / GfmFile. 10*1e28e406SAndreas Gohr */ 11*1e28e406SAndreas Gohrclass Code 12*1e28e406SAndreas Gohr{ 13*1e28e406SAndreas Gohr /** 14*1e28e406SAndreas Gohr * Parse the attribute block of a code / file tag or fence opener. 15*1e28e406SAndreas Gohr * 16*1e28e406SAndreas Gohr * Accepts the text between <code and > (DokuWiki) or the info 17*1e28e406SAndreas Gohr * string after a fence opener (GFM). The grammar is the same in both 18*1e28e406SAndreas Gohr * places: an optional [key=value,...] bracket block appears 19*1e28e406SAndreas Gohr * anywhere in the string and contains highlight options; what 20*1e28e406SAndreas Gohr * remains, whitespace-split, is language then filename. 21*1e28e406SAndreas Gohr * 22*1e28e406SAndreas Gohr * Conventions carried over from DokuWiki's Code mode: 23*1e28e406SAndreas Gohr * - "-" as the language means "no language" (returned as null); 24*1e28e406SAndreas Gohr * - "html" is aliased to GeSHi's "html4strict" identifier. 25*1e28e406SAndreas Gohr * 26*1e28e406SAndreas Gohr * @param string $attr raw attribute text (no <code/> or fence chars) 27*1e28e406SAndreas Gohr * @return array{0: ?string, 1: ?string, 2: ?array} [language, filename, options] 28*1e28e406SAndreas Gohr */ 29*1e28e406SAndreas Gohr public static function parseAttributes(string $attr): array 30*1e28e406SAndreas Gohr { 31*1e28e406SAndreas Gohr $options = null; 32*1e28e406SAndreas Gohr if (preg_match('/\[.*\]/', $attr, $optMatch)) { 33*1e28e406SAndreas Gohr $attr = str_replace($optMatch[0], '', $attr); 34*1e28e406SAndreas Gohr $options = self::parseHighlightOptions($optMatch[0]); 35*1e28e406SAndreas Gohr } 36*1e28e406SAndreas Gohr 37*1e28e406SAndreas Gohr $parts = preg_split('/\s+/', trim($attr), 2, PREG_SPLIT_NO_EMPTY); 38*1e28e406SAndreas Gohr $language = $parts[0] ?? null; 39*1e28e406SAndreas Gohr $filename = $parts[1] ?? null; 40*1e28e406SAndreas Gohr 41*1e28e406SAndreas Gohr if ($language === 'html') $language = 'html4strict'; 42*1e28e406SAndreas Gohr if ($language === '-') $language = null; 43*1e28e406SAndreas Gohr 44*1e28e406SAndreas Gohr return [$language, $filename, $options]; 45*1e28e406SAndreas Gohr } 46*1e28e406SAndreas Gohr 47*1e28e406SAndreas Gohr /** 48*1e28e406SAndreas Gohr * Parse a [key=value,...] block of highlight options. 49*1e28e406SAndreas Gohr * 50*1e28e406SAndreas Gohr * Keys without a value are treated as booleans (1). Values may be 51*1e28e406SAndreas Gohr * bare or "quoted"; quoted values may contain commas. Only a 52*1e28e406SAndreas Gohr * fixed whitelist of keys is retained (see below); unknown keys are 53*1e28e406SAndreas Gohr * silently dropped. 54*1e28e406SAndreas Gohr * 55*1e28e406SAndreas Gohr * @param string $options the [...] string including the brackets 56*1e28e406SAndreas Gohr * @return array|null key/value map, or null if nothing recognised 57*1e28e406SAndreas Gohr */ 58*1e28e406SAndreas Gohr public static function parseHighlightOptions(string $options): ?array 59*1e28e406SAndreas Gohr { 60*1e28e406SAndreas Gohr $result = []; 61*1e28e406SAndreas Gohr preg_match_all('/(\w+(?:="[^"]*"))|(\w+(?:=[^\s]*))|(\w+[^=\s\]])(?:\s*)/', $options, $matches, PREG_SET_ORDER); 62*1e28e406SAndreas Gohr foreach ($matches as $match) { 63*1e28e406SAndreas Gohr $equal_sign = strpos($match[0], '='); 64*1e28e406SAndreas Gohr if ($equal_sign === false) { 65*1e28e406SAndreas Gohr $key = trim($match[0]); 66*1e28e406SAndreas Gohr $result[$key] = 1; 67*1e28e406SAndreas Gohr } else { 68*1e28e406SAndreas Gohr $key = substr($match[0], 0, $equal_sign); 69*1e28e406SAndreas Gohr $value = substr($match[0], $equal_sign + 1); 70*1e28e406SAndreas Gohr $value = trim($value, '"'); 71*1e28e406SAndreas Gohr if ($value !== '') { 72*1e28e406SAndreas Gohr $result[$key] = $value; 73*1e28e406SAndreas Gohr } else { 74*1e28e406SAndreas Gohr $result[$key] = 1; 75*1e28e406SAndreas Gohr } 76*1e28e406SAndreas Gohr } 77*1e28e406SAndreas Gohr } 78*1e28e406SAndreas Gohr 79*1e28e406SAndreas Gohr $result = array_intersect_key( 80*1e28e406SAndreas Gohr $result, 81*1e28e406SAndreas Gohr array_flip([ 82*1e28e406SAndreas Gohr 'enable_line_numbers', 83*1e28e406SAndreas Gohr 'start_line_numbers_at', 84*1e28e406SAndreas Gohr 'highlight_lines_extra', 85*1e28e406SAndreas Gohr 'enable_keyword_links' 86*1e28e406SAndreas Gohr ]) 87*1e28e406SAndreas Gohr ); 88*1e28e406SAndreas Gohr 89*1e28e406SAndreas Gohr if (isset($result['enable_line_numbers'])) { 90*1e28e406SAndreas Gohr if ($result['enable_line_numbers'] === 'false') { 91*1e28e406SAndreas Gohr $result['enable_line_numbers'] = false; 92*1e28e406SAndreas Gohr } 93*1e28e406SAndreas Gohr $result['enable_line_numbers'] = (bool)$result['enable_line_numbers']; 94*1e28e406SAndreas Gohr } 95*1e28e406SAndreas Gohr if (isset($result['highlight_lines_extra'])) { 96*1e28e406SAndreas Gohr $result['highlight_lines_extra'] = array_map(intval(...), explode(',', $result['highlight_lines_extra'])); 97*1e28e406SAndreas Gohr $result['highlight_lines_extra'] = array_filter($result['highlight_lines_extra']); 98*1e28e406SAndreas Gohr $result['highlight_lines_extra'] = array_unique($result['highlight_lines_extra']); 99*1e28e406SAndreas Gohr } 100*1e28e406SAndreas Gohr if (isset($result['start_line_numbers_at'])) { 101*1e28e406SAndreas Gohr $result['start_line_numbers_at'] = (int)$result['start_line_numbers_at']; 102*1e28e406SAndreas Gohr } 103*1e28e406SAndreas Gohr if (isset($result['enable_keyword_links'])) { 104*1e28e406SAndreas Gohr if ($result['enable_keyword_links'] === 'false') { 105*1e28e406SAndreas Gohr $result['enable_keyword_links'] = false; 106*1e28e406SAndreas Gohr } 107*1e28e406SAndreas Gohr $result['enable_keyword_links'] = (bool)$result['enable_keyword_links']; 108*1e28e406SAndreas Gohr } 109*1e28e406SAndreas Gohr if (count($result) == 0) { 110*1e28e406SAndreas Gohr return null; 111*1e28e406SAndreas Gohr } 112*1e28e406SAndreas Gohr 113*1e28e406SAndreas Gohr return $result; 114*1e28e406SAndreas Gohr } 115*1e28e406SAndreas Gohr} 116