xref: /dokuwiki/inc/Parsing/Helpers/Code.php (revision 1e28e406b358f79221c515b2a56520d5dbbfb6c8)
1*1e28e406SAndreas Gohr<?php
2*1e28e406SAndreas Gohr
3*1e28e406SAndreas Gohrnamespace dokuwiki\Parsing\Helpers;
4*1e28e406SAndreas Gohr
5*1e28e406SAndreas Gohr/**
6*1e28e406SAndreas Gohr * Pure helpers for parsing code / file attribute blocks.
7*1e28e406SAndreas Gohr *
8*1e28e406SAndreas Gohr * Side-effect-free: returns data and leaves handler emission to the
9*1e28e406SAndreas Gohr * caller. Shared by DokuWiki's Code / File modes and GfmCode / GfmFile.
10*1e28e406SAndreas Gohr */
11*1e28e406SAndreas Gohrclass Code
12*1e28e406SAndreas Gohr{
13*1e28e406SAndreas Gohr    /**
14*1e28e406SAndreas Gohr     * Parse the attribute block of a code / file tag or fence opener.
15*1e28e406SAndreas Gohr     *
16*1e28e406SAndreas Gohr     * Accepts the text between <code and > (DokuWiki) or the info
17*1e28e406SAndreas Gohr     * string after a fence opener (GFM). The grammar is the same in both
18*1e28e406SAndreas Gohr     * places: an optional [key=value,...] bracket block appears
19*1e28e406SAndreas Gohr     * anywhere in the string and contains highlight options; what
20*1e28e406SAndreas Gohr     * remains, whitespace-split, is language then filename.
21*1e28e406SAndreas Gohr     *
22*1e28e406SAndreas Gohr     * Conventions carried over from DokuWiki's Code mode:
23*1e28e406SAndreas Gohr     *   - "-" as the language means "no language" (returned as null);
24*1e28e406SAndreas Gohr     *   - "html" is aliased to GeSHi's "html4strict" identifier.
25*1e28e406SAndreas Gohr     *
26*1e28e406SAndreas Gohr     * @param string $attr raw attribute text (no <code/> or fence chars)
27*1e28e406SAndreas Gohr     * @return array{0: ?string, 1: ?string, 2: ?array} [language, filename, options]
28*1e28e406SAndreas Gohr     */
29*1e28e406SAndreas Gohr    public static function parseAttributes(string $attr): array
30*1e28e406SAndreas Gohr    {
31*1e28e406SAndreas Gohr        $options = null;
32*1e28e406SAndreas Gohr        if (preg_match('/\[.*\]/', $attr, $optMatch)) {
33*1e28e406SAndreas Gohr            $attr = str_replace($optMatch[0], '', $attr);
34*1e28e406SAndreas Gohr            $options = self::parseHighlightOptions($optMatch[0]);
35*1e28e406SAndreas Gohr        }
36*1e28e406SAndreas Gohr
37*1e28e406SAndreas Gohr        $parts = preg_split('/\s+/', trim($attr), 2, PREG_SPLIT_NO_EMPTY);
38*1e28e406SAndreas Gohr        $language = $parts[0] ?? null;
39*1e28e406SAndreas Gohr        $filename = $parts[1] ?? null;
40*1e28e406SAndreas Gohr
41*1e28e406SAndreas Gohr        if ($language === 'html') $language = 'html4strict';
42*1e28e406SAndreas Gohr        if ($language === '-') $language = null;
43*1e28e406SAndreas Gohr
44*1e28e406SAndreas Gohr        return [$language, $filename, $options];
45*1e28e406SAndreas Gohr    }
46*1e28e406SAndreas Gohr
47*1e28e406SAndreas Gohr    /**
48*1e28e406SAndreas Gohr     * Parse a [key=value,...] block of highlight options.
49*1e28e406SAndreas Gohr     *
50*1e28e406SAndreas Gohr     * Keys without a value are treated as booleans (1). Values may be
51*1e28e406SAndreas Gohr     * bare or "quoted"; quoted values may contain commas. Only a
52*1e28e406SAndreas Gohr     * fixed whitelist of keys is retained (see below); unknown keys are
53*1e28e406SAndreas Gohr     * silently dropped.
54*1e28e406SAndreas Gohr     *
55*1e28e406SAndreas Gohr     * @param string $options the [...] string including the brackets
56*1e28e406SAndreas Gohr     * @return array|null key/value map, or null if nothing recognised
57*1e28e406SAndreas Gohr     */
58*1e28e406SAndreas Gohr    public static function parseHighlightOptions(string $options): ?array
59*1e28e406SAndreas Gohr    {
60*1e28e406SAndreas Gohr        $result = [];
61*1e28e406SAndreas Gohr        preg_match_all('/(\w+(?:="[^"]*"))|(\w+(?:=[^\s]*))|(\w+[^=\s\]])(?:\s*)/', $options, $matches, PREG_SET_ORDER);
62*1e28e406SAndreas Gohr        foreach ($matches as $match) {
63*1e28e406SAndreas Gohr            $equal_sign = strpos($match[0], '=');
64*1e28e406SAndreas Gohr            if ($equal_sign === false) {
65*1e28e406SAndreas Gohr                $key = trim($match[0]);
66*1e28e406SAndreas Gohr                $result[$key] = 1;
67*1e28e406SAndreas Gohr            } else {
68*1e28e406SAndreas Gohr                $key = substr($match[0], 0, $equal_sign);
69*1e28e406SAndreas Gohr                $value = substr($match[0], $equal_sign + 1);
70*1e28e406SAndreas Gohr                $value = trim($value, '"');
71*1e28e406SAndreas Gohr                if ($value !== '') {
72*1e28e406SAndreas Gohr                    $result[$key] = $value;
73*1e28e406SAndreas Gohr                } else {
74*1e28e406SAndreas Gohr                    $result[$key] = 1;
75*1e28e406SAndreas Gohr                }
76*1e28e406SAndreas Gohr            }
77*1e28e406SAndreas Gohr        }
78*1e28e406SAndreas Gohr
79*1e28e406SAndreas Gohr        $result = array_intersect_key(
80*1e28e406SAndreas Gohr            $result,
81*1e28e406SAndreas Gohr            array_flip([
82*1e28e406SAndreas Gohr                'enable_line_numbers',
83*1e28e406SAndreas Gohr                'start_line_numbers_at',
84*1e28e406SAndreas Gohr                'highlight_lines_extra',
85*1e28e406SAndreas Gohr                'enable_keyword_links'
86*1e28e406SAndreas Gohr            ])
87*1e28e406SAndreas Gohr        );
88*1e28e406SAndreas Gohr
89*1e28e406SAndreas Gohr        if (isset($result['enable_line_numbers'])) {
90*1e28e406SAndreas Gohr            if ($result['enable_line_numbers'] === 'false') {
91*1e28e406SAndreas Gohr                $result['enable_line_numbers'] = false;
92*1e28e406SAndreas Gohr            }
93*1e28e406SAndreas Gohr            $result['enable_line_numbers'] = (bool)$result['enable_line_numbers'];
94*1e28e406SAndreas Gohr        }
95*1e28e406SAndreas Gohr        if (isset($result['highlight_lines_extra'])) {
96*1e28e406SAndreas Gohr            $result['highlight_lines_extra'] = array_map(intval(...), explode(',', $result['highlight_lines_extra']));
97*1e28e406SAndreas Gohr            $result['highlight_lines_extra'] = array_filter($result['highlight_lines_extra']);
98*1e28e406SAndreas Gohr            $result['highlight_lines_extra'] = array_unique($result['highlight_lines_extra']);
99*1e28e406SAndreas Gohr        }
100*1e28e406SAndreas Gohr        if (isset($result['start_line_numbers_at'])) {
101*1e28e406SAndreas Gohr            $result['start_line_numbers_at'] = (int)$result['start_line_numbers_at'];
102*1e28e406SAndreas Gohr        }
103*1e28e406SAndreas Gohr        if (isset($result['enable_keyword_links'])) {
104*1e28e406SAndreas Gohr            if ($result['enable_keyword_links'] === 'false') {
105*1e28e406SAndreas Gohr                $result['enable_keyword_links'] = false;
106*1e28e406SAndreas Gohr            }
107*1e28e406SAndreas Gohr            $result['enable_keyword_links'] = (bool)$result['enable_keyword_links'];
108*1e28e406SAndreas Gohr        }
109*1e28e406SAndreas Gohr        if (count($result) == 0) {
110*1e28e406SAndreas Gohr            return null;
111*1e28e406SAndreas Gohr        }
112*1e28e406SAndreas Gohr
113*1e28e406SAndreas Gohr        return $result;
114*1e28e406SAndreas Gohr    }
115*1e28e406SAndreas Gohr}
116