1 <?php 2 3 use dokuwiki\Extension\SyntaxPlugin; 4 5 /** 6 * DokuWiki Plugin character (Syntax Component) 7 * 8 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 9 * @author Vincent Tscherter <vincent@tscherter.net> 10 */ 11 class syntax_plugin_character extends SyntaxPlugin { 12 13 public function getType() { 14 return 'substition'; 15 } 16 17 public function getPType() { 18 return 'normal'; 19 } 20 21 public function getSort() { 22 return 100; 23 } 24 25 public function connectTo($mode) { 26 $this->Lexer->addSpecialPattern('{{c>[^}]*}?}}', $mode, 'plugin_character'); 27 } 28 29 const ES = [ 30 0 => ['\\0', 'NULL'], 31 8 => ['\\b', 'BACKSPACE'], 32 9 => ['\\t', 'CHARACTER TABULATION'], 33 10 => ['\\n', 'LINE FEED'], 34 11 => ['\\v', 'LINE TABULATION'], 35 12 => ['\\f', 'FORM FEED'], 36 13 => ['\\r', 'CARRIAGE RETURN'], 37 92 => ['\\\\', 'REVERSE SOLIDUS'] 38 ]; 39 public function handle($match, $state, $pos, Doku_Handler $handler) { 40 $data = []; 41 42 $raw = substr($match, 4, -2); 43 $data['raw'] = $raw; 44 45 // U+0000 to U+10FFFFF Notation 46 if (preg_match('/^u\+([0-9a-f]{1,7})$/i', $raw)) { 47 $data['type'] = 'codepoint'; 48 $data['codepoint'] = hexdec(substr($raw, 2)); 49 } else if (preg_match('/^\\\\x[0-9a-f]{2}$/i', $raw)) { 50 $data['type'] = 'hex_esc_seq'; 51 $data['codepoint'] = hexdec(substr($raw, 2)); 52 } else if (preg_match('/^\\\\u\{[0-9a-f]{1,6}\}$/i', $raw)) { 53 $data['type'] = 'codepoint_escape'; 54 $data['codepoint'] = hexdec(substr($raw, 3, -1)); 55 } else if (preg_match('/^&#[0-9]+;$/i', $raw)) { 56 $data['type'] = 'dec_html_entity'; 57 $data['codepoint'] = intval(substr($raw, 2, -1)); 58 } else if (preg_match('/^&#x[0-9a-f]+;$/i', $raw)) { 59 $data['type'] = 'hex_html_entity'; 60 $data['codepoint'] = hexdec(substr($raw, 3, -1)); 61 } else if (preg_match('/^(%[0-9a-f]{2}){1,4}/i', $raw)) { 62 $data['type'] = 'url_encoding'; 63 $char = urldecode($raw); 64 $data['codepoint'] = mb_ord($char); 65 if (mb_strlen($char)!=1) $data['type'] = 'error'; 66 } else if (mb_strlen($raw) == 1) { 67 $data['type'] = 'character_literal'; 68 $data['codepoint'] = mb_ord($raw); 69 } else { 70 $data['type'] = 'error'; 71 $data['msg'] = mb_strlen($raw); 72 $data['codepoint'] = -1; 73 foreach (self::ES as $key => $entry) { 74 if ($raw==$entry[0]) { 75 $data['type'] = 'escape_sequence'; 76 $data['codepoint'] = $key; 77 } 78 } 79 } 80 return $data; 81 } 82 83 public function render($mode, Doku_Renderer $renderer, $data) { 84 if ($mode !== 'xhtml') return false; 85 86 $codepoint = $data['codepoint']; 87 if ($data['type']=='error' || $codepoint > 0x10FFFFF) { 88 if (empty($data['msg'])) $data['msg'] = '-'; 89 $renderer->doc .= '<code style="color: red">⚠️ invalid input [' 90 . htmlentities($data['raw']) .']: '.$data['msg'].'</code>'; 91 return true; 92 } 93 94 $char = mb_chr($codepoint); 95 96 $rendered = 'invalid'; 97 $hex = strtoupper(dechex($codepoint)); 98 $paddedHex = str_pad($hex, 4, '0', STR_PAD_LEFT); 99 $charname = extension_loaded('intl') ? ' '.IntlChar::charName($codepoint).'' : ''; 100 $escapesequence = '\u{'.dechex($codepoint).'}'; 101 if (!empty(self::ES[$codepoint])) { 102 //$escapesequence = self::ES[$codepoint][0]; 103 $charname = ' '.self::ES[$codepoint][1]; 104 } 105 $urlencoding = urlencode($char); 106 if ($urlencoding=='+') $urlencoding = '%20'; 107 108 $orange = ' style="color: goldenrod"'; 109 $dred = ' style="color: darkred"'; 110 $green = ' style="color: green"'; 111 $blue = ' style="color: blue"'; 112 113 $bytes = unpack('C*', mb_convert_encoding($char, 'UTF-8', 'UTF-8')); 114 $type = $data['type']; 115 116 if ($type=='codepoint') { 117 $rendered = "<code$dred><span$orange>U+</span>$paddedHex</code>"; 118 $tip = "unicode code point"; 119 } else if ($type=='hex_esc_seq') { 120 $p = strtoupper(str_pad(dechex($codepoint), 2, '0', STR_PAD_LEFT)); 121 $rendered = "<code$dred><span$orange>\x</span>$p</code>"; 122 $tip = "hexadecimal escape sequence"; 123 } else if ($type=='codepoint_escape') { 124 $p = strtoupper(dechex($codepoint)); 125 $rendered = "<code$dred><span$orange>\u{</span>$p<span$orange>}</span></code>"; 126 $tip = 'code point escape sequence'; 127 } else if ($type=='dec_html_entity') { 128 $rendered = "<code$blue><span$orange>&#</span>$codepoint<span$orange>;</span></code>"; 129 $tip = "decimal HTML entity"; 130 } else if ($type=='hex_html_entity') { 131 $rendered = "<code$dred><span$orange>&#x</span>$hex<span$orange>;</span></code>"; 132 $tip = "hexadecimal HTML entity"; 133 } else if ($type=='url_encoding') { 134 $rendered = "<code$green>".htmlentities($data['raw'])."</code>"; 135 $tip = "URL encoding"; 136 } else if ($type=='character_literal') { 137 $rendered = '<code style="white-space: pre; padding: 0 0.25ex; color: green">'.htmlentities($data['raw']).'</code>'; 138 $tip = 'character literal'; 139 } else if ($type=='escape_sequence') { 140 $rendered = '<code style="white-space: pre; padding: 0 0.25ex; color: hotpink">'.htmlentities($data['raw']).'</code>'; 141 $tip = 'escape sequence'; 142 } 143 144 $renderer->doc .= '<span class="plugin-character"><span class="plugin-character-tooltip" >' 145 .'<span style="display:inline-grid; grid-template-columns: auto 1fr auto; grid-gap: 0.5ex">' 146 .'<span style="padding-bottom: 0.5ex; border-bottom: 1px solid silver;grid-column: 1 / span 3; text-align: center; font-weight: bold">' 147 .$tip.'</span>' 148 .'<strong>character</strong><span style="grid-column: 2 / span 2;"><code style="padding: 0 0.5ex;margin-right: 0.5ex;color:green;">' 149 .htmlentities($char)."</code> $charname</span>" 150 ."<strong>code point</strong><code style='text-align: center; color: blue'>$codepoint</code><span>DEC</span>"; 151 if ($type != 'escape_sequence' && !empty(self::ES[$codepoint])) 152 $renderer->doc .= '<strong>escape sequence</strong><code style="text-align: center; color: green">' 153 .self::ES[$codepoint][0].'</code><span></span>'; 154 if ($type != 'codepoint_escape') 155 $renderer->doc .= "<strong>escape sequence</strong><code style='text-align: center; color: green'>$escapesequence</code><span>HEX</span>"; 156 if ($type != 'hex_html_entity') 157 $renderer->doc .= "<strong>HTML entity</strong><code style='text-align: center; color: green'>&#$codepoint;</code><span>DEC</span>"; 158 if ($type != 'dec_html_entity') 159 $renderer->doc .= "<strong>HTML entity</strong><code style='text-align: center; color: green'>&#x$hex;</code><span>HEX</span>"; 160 if ($type != 'url_encoding' && ($urlencoding=='+' || strlen($urlencoding)>1)) 161 $renderer->doc .= "<strong>URL encoding</strong><code style='text-align: center; color: green'>$urlencoding</code><span>HEX</span>"; 162 $renderer->doc .= '<strong>UTF-8 code units</strong><code style="text-align: center; color: purple">' 163 .join(' ', array_map(function ($i) { return str_pad(decbin($i), 8, '0', STR_PAD_LEFT); }, $bytes)) 164 .'</code><span>BIN</span>'; 165 $renderer->doc .= "</span></span>$rendered</span>"; 166 167 return true; 168 } 169 170 }