1<?php
2
3use dokuwiki\Extension\SyntaxPlugin;
4
5/**
6 * DokuWiki Plugin character (Syntax Component)
7 *
8 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
9 * @author Vincent Tscherter <vincent@tscherter.net>
10 */
11class syntax_plugin_character extends SyntaxPlugin {
12
13    public function getType() {
14        return 'substition';
15    }
16
17    public function getPType() {
18        return 'normal';
19    }
20
21    public function getSort() {
22        return 100;
23    }
24
25    public function connectTo($mode) {
26        $this->Lexer->addSpecialPattern('{{c>[^}]*}?}}', $mode, 'plugin_character');
27    }
28
29    const ES = [
30        0 => ['\\0', 'NULL'],
31        8 => ['\\b', 'BACKSPACE'],
32        9 => ['\\t', 'CHARACTER TABULATION'],
33        10 => ['\\n', 'LINE FEED'],
34        11 => ['\\v', 'LINE TABULATION'],
35        12 => ['\\f', 'FORM FEED'],
36        13 => ['\\r', 'CARRIAGE RETURN'],
37        92 => ['\\\\', 'REVERSE SOLIDUS']
38    ];
39    public function handle($match, $state, $pos, Doku_Handler $handler) {
40        $data = [];
41
42        $raw = substr($match, 4, -2);
43        $data['raw'] = $raw;
44
45        // U+0000 to U+10FFFFF Notation
46        if (preg_match('/^u\+([0-9a-f]{1,7})$/i', $raw)) {
47            $data['type'] = 'codepoint';
48            $data['codepoint'] = hexdec(substr($raw, 2));
49        }  else if (preg_match('/^\\\\x[0-9a-f]{2}$/i', $raw)) {
50            $data['type'] = 'hex_esc_seq';
51            $data['codepoint'] = hexdec(substr($raw, 2));
52        } else if (preg_match('/^\\\\u\{[0-9a-f]{1,6}\}$/i', $raw)) {
53            $data['type'] = 'codepoint_escape';
54            $data['codepoint'] = hexdec(substr($raw, 3, -1));
55        } else if (preg_match('/^&#[0-9]+;$/i', $raw)) {
56            $data['type'] = 'dec_html_entity';
57            $data['codepoint'] = intval(substr($raw, 2, -1));
58        } else if (preg_match('/^&#x[0-9a-f]+;$/i', $raw)) {
59            $data['type'] = 'hex_html_entity';
60            $data['codepoint'] = hexdec(substr($raw, 3, -1));
61        } else if (preg_match('/^(%[0-9a-f]{2}){1,4}/i', $raw)) {
62            $data['type'] = 'url_encoding';
63            $char = urldecode($raw);
64            $data['codepoint'] = mb_ord($char);
65            if (mb_strlen($char)!=1) $data['type'] = 'error';
66        } else if (mb_strlen($raw) == 1) {
67            $data['type'] = 'character_literal';
68            $data['codepoint'] = mb_ord($raw);
69        } else {
70            $data['type'] = 'error';
71            $data['msg'] = mb_strlen($raw);
72            $data['codepoint'] = -1;
73            foreach (self::ES as $key => $entry) {
74                if ($raw==$entry[0]) {
75                    $data['type'] = 'escape_sequence';
76                    $data['codepoint'] = $key;
77                }
78            }
79        }
80        return $data;
81    }
82
83    public function render($mode, Doku_Renderer $renderer, $data) {
84        if ($mode !== 'xhtml') return false;
85
86        $codepoint = $data['codepoint'];
87        if ($data['type']=='error' || $codepoint > 0x10FFFFF) {
88            if (empty($data['msg'])) $data['msg'] = '-';
89            $renderer->doc .= '<code style="color: red">⚠️ invalid input ['
90                . htmlentities($data['raw']) .']: '.$data['msg'].'</code>';
91            return true;
92        }
93
94        $char = mb_chr($codepoint);
95
96        $rendered = 'invalid';
97        $hex = strtoupper(dechex($codepoint));
98        $paddedHex = str_pad($hex, 4, '0', STR_PAD_LEFT);
99        $charname  = extension_loaded('intl') ? ' '.IntlChar::charName($codepoint).'' : '';
100        $escapesequence = '\u{'.dechex($codepoint).'}';
101        if (!empty(self::ES[$codepoint])) {
102            //$escapesequence = self::ES[$codepoint][0];
103            $charname = ' '.self::ES[$codepoint][1];
104        }
105        $urlencoding = urlencode($char);
106        if ($urlencoding=='+') $urlencoding = '%20';
107
108        $orange = ' style="color: goldenrod"';
109        $dred =  ' style="color: darkred"';
110        $green =  ' style="color: green"';
111        $blue =  ' style="color: blue"';
112
113        $bytes = unpack('C*', mb_convert_encoding($char, 'UTF-8', 'UTF-8'));
114        $type = $data['type'];
115
116        if ($type=='codepoint') {
117            $rendered = "<code$dred><span$orange>U+</span>$paddedHex</code>";
118            $tip = "unicode code point";
119        } else if ($type=='hex_esc_seq') {
120            $p = strtoupper(str_pad(dechex($codepoint), 2, '0', STR_PAD_LEFT));
121            $rendered = "<code$dred><span$orange>\x</span>$p</code>";
122            $tip = "hexadecimal escape sequence";
123        } else if ($type=='codepoint_escape') {
124            $p = strtoupper(dechex($codepoint));
125            $rendered = "<code$dred><span$orange>\u&#x7B;</span>$p<span$orange>}</span></code>";
126            $tip = 'code point escape sequence';
127        } else if ($type=='dec_html_entity') {
128            $rendered = "<code$blue><span$orange>&#</span>$codepoint<span$orange>;</span></code>";
129            $tip = "decimal HTML entity";
130        } else if ($type=='hex_html_entity') {
131            $rendered = "<code$dred><span$orange>&amp;#x</span>$hex<span$orange>;</span></code>";
132            $tip = "hexadecimal HTML entity";
133        } else if ($type=='url_encoding') {
134            $rendered = "<code$green>".htmlentities($data['raw'])."</code>";
135            $tip = "URL encoding";
136        } else  if ($type=='character_literal') {
137            $rendered = '<code style="white-space: pre; padding: 0 0.25ex; color: green">'.htmlentities($data['raw']).'</code>';
138            $tip = 'character literal';
139        } else  if ($type=='escape_sequence') {
140            $rendered = '<code style="white-space: pre; padding: 0 0.25ex; color: hotpink">'.htmlentities($data['raw']).'</code>';
141            $tip = 'escape sequence';
142        }
143
144        $renderer->doc .= '<span class="plugin-character"><span class="plugin-character-tooltip" >'
145            .'<span style="display:inline-grid; grid-template-columns: auto 1fr auto; grid-gap: 0.5ex">'
146            .'<span style="padding-bottom: 0.5ex; border-bottom: 1px solid silver;grid-column: 1 / span 3; text-align: center; font-weight: bold">'
147            .$tip.'</span>'
148            .'<strong>character</strong><span style="grid-column: 2 / span 2;"><code style="padding: 0 0.5ex;margin-right: 0.5ex;color:green;">'
149            .htmlentities($char)."</code> $charname</span>"
150            ."<strong>code point</strong><code style='text-align: center; color: blue'>$codepoint</code><span>DEC</span>";
151        if ($type != 'escape_sequence'   && !empty(self::ES[$codepoint]))
152            $renderer->doc .= '<strong>escape sequence</strong><code style="text-align: center; color: green">'
153                .self::ES[$codepoint][0].'</code><span></span>';
154        if ($type != 'codepoint_escape')
155           $renderer->doc .= "<strong>escape sequence</strong><code style='text-align: center; color: green'>$escapesequence</code><span>HEX</span>";
156        if ($type != 'hex_html_entity')
157            $renderer->doc .= "<strong>HTML entity</strong><code style='text-align: center; color: green'>&amp;#$codepoint;</code><span>DEC</span>";
158        if ($type != 'dec_html_entity')
159            $renderer->doc .= "<strong>HTML entity</strong><code style='text-align: center; color: green'>&amp;#x$hex;</code><span>HEX</span>";
160        if ($type != 'url_encoding' && ($urlencoding=='+' || strlen($urlencoding)>1))
161            $renderer->doc .= "<strong>URL encoding</strong><code style='text-align: center; color: green'>$urlencoding</code><span>HEX</span>";
162        $renderer->doc .= '<strong>UTF-8 code units</strong><code style="text-align: center; color: purple">'
163            .join(' ', array_map(function ($i) { return str_pad(decbin($i), 8, '0', STR_PAD_LEFT); }, $bytes))
164            .'</code><span>BIN</span>';
165        $renderer->doc .= "</span></span>$rendered</span>";
166
167        return true;
168    }
169
170}