1 <?php
2 
3 use dokuwiki\Extension\SyntaxPlugin;
4 
5 /**
6  * DokuWiki Plugin character (Syntax Component)
7  *
8  * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
9  * @author Vincent Tscherter <vincent@tscherter.net>
10  */
11 class syntax_plugin_character extends SyntaxPlugin {
12 
13     public function getType() {
14         return 'substition';
15     }
16 
17     public function getPType() {
18         return 'normal';
19     }
20 
21     public function getSort() {
22         return 100;
23     }
24 
25     public function connectTo($mode) {
26         $this->Lexer->addSpecialPattern('{{c>[^}]*}?}}', $mode, 'plugin_character');
27     }
28 
29     const ES = [
30         0 => ['\\0', 'NULL'],
31         8 => ['\\b', 'BACKSPACE'],
32         9 => ['\\t', 'CHARACTER TABULATION'],
33         10 => ['\\n', 'LINE FEED'],
34         11 => ['\\v', 'LINE TABULATION'],
35         12 => ['\\f', 'FORM FEED'],
36         13 => ['\\r', 'CARRIAGE RETURN'],
37         92 => ['\\\\', 'REVERSE SOLIDUS']
38     ];
39     public function handle($match, $state, $pos, Doku_Handler $handler) {
40         $data = [];
41 
42         $raw = substr($match, 4, -2);
43         $data['raw'] = $raw;
44 
45         // U+0000 to U+10FFFFF Notation
46         if (preg_match('/^u\+([0-9a-f]{1,7})$/i', $raw)) {
47             $data['type'] = 'codepoint';
48             $data['codepoint'] = hexdec(substr($raw, 2));
49         }  else if (preg_match('/^\\\\x[0-9a-f]{2}$/i', $raw)) {
50             $data['type'] = 'hex_esc_seq';
51             $data['codepoint'] = hexdec(substr($raw, 2));
52         } else if (preg_match('/^\\\\u\{[0-9a-f]{1,6}\}$/i', $raw)) {
53             $data['type'] = 'codepoint_escape';
54             $data['codepoint'] = hexdec(substr($raw, 3, -1));
55         } else if (preg_match('/^&#[0-9]+;$/i', $raw)) {
56             $data['type'] = 'dec_html_entity';
57             $data['codepoint'] = intval(substr($raw, 2, -1));
58         } else if (preg_match('/^&#x[0-9a-f]+;$/i', $raw)) {
59             $data['type'] = 'hex_html_entity';
60             $data['codepoint'] = hexdec(substr($raw, 3, -1));
61         } else if (preg_match('/^(%[0-9a-f]{2}){1,4}/i', $raw)) {
62             $data['type'] = 'url_encoding';
63             $char = urldecode($raw);
64             $data['codepoint'] = mb_ord($char);
65             if (mb_strlen($char)!=1) $data['type'] = 'error';
66         } else if (mb_strlen($raw) == 1) {
67             $data['type'] = 'character_literal';
68             $data['codepoint'] = mb_ord($raw);
69         } else {
70             $data['type'] = 'error';
71             $data['msg'] = mb_strlen($raw);
72             $data['codepoint'] = -1;
73             foreach (self::ES as $key => $entry) {
74                 if ($raw==$entry[0]) {
75                     $data['type'] = 'escape_sequence';
76                     $data['codepoint'] = $key;
77                 }
78             }
79         }
80         return $data;
81     }
82 
83     public function render($mode, Doku_Renderer $renderer, $data) {
84         if ($mode !== 'xhtml') return false;
85 
86         $codepoint = $data['codepoint'];
87         if ($data['type']=='error' || $codepoint > 0x10FFFFF) {
88             if (empty($data['msg'])) $data['msg'] = '-';
89             $renderer->doc .= '<code style="color: red">⚠️ invalid input ['
90                 . htmlentities($data['raw']) .']: '.$data['msg'].'</code>';
91             return true;
92         }
93 
94         $char = mb_chr($codepoint);
95 
96         $rendered = 'invalid';
97         $hex = strtoupper(dechex($codepoint));
98         $paddedHex = str_pad($hex, 4, '0', STR_PAD_LEFT);
99         $charname  = extension_loaded('intl') ? ' '.IntlChar::charName($codepoint).'' : '';
100         $escapesequence = '\u{'.dechex($codepoint).'}';
101         if (!empty(self::ES[$codepoint])) {
102             //$escapesequence = self::ES[$codepoint][0];
103             $charname = ' '.self::ES[$codepoint][1];
104         }
105         $urlencoding = urlencode($char);
106         if ($urlencoding=='+') $urlencoding = '%20';
107 
108         $orange = ' style="color: goldenrod"';
109         $dred =  ' style="color: darkred"';
110         $green =  ' style="color: green"';
111         $blue =  ' style="color: blue"';
112 
113         $bytes = unpack('C*', mb_convert_encoding($char, 'UTF-8', 'UTF-8'));
114         $type = $data['type'];
115 
116         if ($type=='codepoint') {
117             $rendered = "<code$dred><span$orange>U+</span>$paddedHex</code>";
118             $tip = "unicode code point";
119         } else if ($type=='hex_esc_seq') {
120             $p = strtoupper(str_pad(dechex($codepoint), 2, '0', STR_PAD_LEFT));
121             $rendered = "<code$dred><span$orange>\x</span>$p</code>";
122             $tip = "hexadecimal escape sequence";
123         } else if ($type=='codepoint_escape') {
124             $p = strtoupper(dechex($codepoint));
125             $rendered = "<code$dred><span$orange>\u&#x7B;</span>$p<span$orange>}</span></code>";
126             $tip = 'code point escape sequence';
127         } else if ($type=='dec_html_entity') {
128             $rendered = "<code$blue><span$orange>&#</span>$codepoint<span$orange>;</span></code>";
129             $tip = "decimal HTML entity";
130         } else if ($type=='hex_html_entity') {
131             $rendered = "<code$dred><span$orange>&amp;#x</span>$hex<span$orange>;</span></code>";
132             $tip = "hexadecimal HTML entity";
133         } else if ($type=='url_encoding') {
134             $rendered = "<code$green>".htmlentities($data['raw'])."</code>";
135             $tip = "URL encoding";
136         } else  if ($type=='character_literal') {
137             $rendered = '<code style="white-space: pre; padding: 0 0.25ex; color: green">'.htmlentities($data['raw']).'</code>';
138             $tip = 'character literal';
139         } else  if ($type=='escape_sequence') {
140             $rendered = '<code style="white-space: pre; padding: 0 0.25ex; color: hotpink">'.htmlentities($data['raw']).'</code>';
141             $tip = 'escape sequence';
142         }
143 
144         $renderer->doc .= '<span class="plugin-character"><span class="plugin-character-tooltip" >'
145             .'<span style="display:inline-grid; grid-template-columns: auto 1fr auto; grid-gap: 0.5ex">'
146             .'<span style="padding-bottom: 0.5ex; border-bottom: 1px solid silver;grid-column: 1 / span 3; text-align: center; font-weight: bold">'
147             .$tip.'</span>'
148             .'<strong>character</strong><span style="grid-column: 2 / span 2;"><code style="padding: 0 0.5ex;margin-right: 0.5ex;color:green;">'
149             .htmlentities($char)."</code> $charname</span>"
150             ."<strong>code point</strong><code style='text-align: center; color: blue'>$codepoint</code><span>DEC</span>";
151         if ($type != 'escape_sequence'   && !empty(self::ES[$codepoint]))
152             $renderer->doc .= '<strong>escape sequence</strong><code style="text-align: center; color: green">'
153                 .self::ES[$codepoint][0].'</code><span></span>';
154         if ($type != 'codepoint_escape')
155            $renderer->doc .= "<strong>escape sequence</strong><code style='text-align: center; color: green'>$escapesequence</code><span>HEX</span>";
156         if ($type != 'hex_html_entity')
157             $renderer->doc .= "<strong>HTML entity</strong><code style='text-align: center; color: green'>&amp;#$codepoint;</code><span>DEC</span>";
158         if ($type != 'dec_html_entity')
159             $renderer->doc .= "<strong>HTML entity</strong><code style='text-align: center; color: green'>&amp;#x$hex;</code><span>HEX</span>";
160         if ($type != 'url_encoding' && ($urlencoding=='+' || strlen($urlencoding)>1))
161             $renderer->doc .= "<strong>URL encoding</strong><code style='text-align: center; color: green'>$urlencoding</code><span>HEX</span>";
162         $renderer->doc .= '<strong>UTF-8 code units</strong><code style="text-align: center; color: purple">'
163             .join(' ', array_map(function ($i) { return str_pad(decbin($i), 8, '0', STR_PAD_LEFT); }, $bytes))
164             .'</code><span>BIN</span>';
165         $renderer->doc .= "</span></span>$rendered</span>";
166 
167         return true;
168     }
169 
170 }