*/ class syntax_plugin_character extends SyntaxPlugin { public function getType() { return 'substition'; } public function getPType() { return 'normal'; } public function getSort() { return 100; } public function connectTo($mode) { $this->Lexer->addSpecialPattern('{{c>[^}]*}?}}', $mode, 'plugin_character'); } const ES = [ 0 => ['\\0', 'NULL'], 8 => ['\\b', 'BACKSPACE'], 9 => ['\\t', 'CHARACTER TABULATION'], 10 => ['\\n', 'LINE FEED'], 11 => ['\\v', 'LINE TABULATION'], 12 => ['\\f', 'FORM FEED'], 13 => ['\\r', 'CARRIAGE RETURN'], 92 => ['\\\\', 'REVERSE SOLIDUS'] ]; public function handle($match, $state, $pos, Doku_Handler $handler) { $data = []; $raw = substr($match, 4, -2); $data['raw'] = $raw; // U+0000 to U+10FFFFF Notation if (preg_match('/^u\+([0-9a-f]{1,7})$/i', $raw)) { $data['type'] = 'codepoint'; $data['codepoint'] = hexdec(substr($raw, 2)); } else if (preg_match('/^\\\\x[0-9a-f]{2}$/i', $raw)) { $data['type'] = 'hex_esc_seq'; $data['codepoint'] = hexdec(substr($raw, 2)); } else if (preg_match('/^\\\\u\{[0-9a-f]{1,6}\}$/i', $raw)) { $data['type'] = 'codepoint_escape'; $data['codepoint'] = hexdec(substr($raw, 3, -1)); } else if (preg_match('/^&#[0-9]+;$/i', $raw)) { $data['type'] = 'dec_html_entity'; $data['codepoint'] = intval(substr($raw, 2, -1)); } else if (preg_match('/^&#x[0-9a-f]+;$/i', $raw)) { $data['type'] = 'hex_html_entity'; $data['codepoint'] = hexdec(substr($raw, 3, -1)); } else if (preg_match('/^(%[0-9a-f]{2}){1,4}/i', $raw)) { $data['type'] = 'url_encoding'; $char = urldecode($raw); $data['codepoint'] = mb_ord($char); if (mb_strlen($char)!=1) $data['type'] = 'error'; } else if (mb_strlen($raw) == 1) { $data['type'] = 'character_literal'; $data['codepoint'] = mb_ord($raw); } else { $data['type'] = 'error'; $data['msg'] = mb_strlen($raw); $data['codepoint'] = -1; foreach (self::ES as $key => $entry) { if ($raw==$entry[0]) { $data['type'] = 'escape_sequence'; $data['codepoint'] = $key; } } } return $data; } public function render($mode, Doku_Renderer $renderer, $data) { if ($mode !== 'xhtml') return false; $codepoint = $data['codepoint']; if ($data['type']=='error' || $codepoint > 0x10FFFFF) { if (empty($data['msg'])) $data['msg'] = '-'; $renderer->doc .= '⚠️ invalid input [' . htmlentities($data['raw']) .']: '.$data['msg'].''; return true; } $char = mb_chr($codepoint); $rendered = 'invalid'; $hex = strtoupper(dechex($codepoint)); $paddedHex = str_pad($hex, 4, '0', STR_PAD_LEFT); $charname = extension_loaded('intl') ? ' '.IntlChar::charName($codepoint).'' : ''; $escapesequence = '\u{'.dechex($codepoint).'}'; if (!empty(self::ES[$codepoint])) { //$escapesequence = self::ES[$codepoint][0]; $charname = ' '.self::ES[$codepoint][1]; } $urlencoding = urlencode($char); if ($urlencoding=='+') $urlencoding = '%20'; $orange = ' style="color: goldenrod"'; $dred = ' style="color: darkred"'; $green = ' style="color: green"'; $blue = ' style="color: blue"'; $bytes = unpack('C*', mb_convert_encoding($char, 'UTF-8', 'UTF-8')); $type = $data['type']; if ($type=='codepoint') { $rendered = "U+$paddedHex"; $tip = "unicode code point"; } else if ($type=='hex_esc_seq') { $p = strtoupper(str_pad(dechex($codepoint), 2, '0', STR_PAD_LEFT)); $rendered = "\x$p"; $tip = "hexadecimal escape sequence"; } else if ($type=='codepoint_escape') { $p = strtoupper(dechex($codepoint)); $rendered = "\u{$p}"; $tip = 'code point escape sequence'; } else if ($type=='dec_html_entity') { $rendered = "&#$codepoint;"; $tip = "decimal HTML entity"; } else if ($type=='hex_html_entity') { $rendered = "&#x$hex;"; $tip = "hexadecimal HTML entity"; } else if ($type=='url_encoding') { $rendered = "".htmlentities($data['raw']).""; $tip = "URL encoding"; } else if ($type=='character_literal') { $rendered = ''.htmlentities($data['raw']).''; $tip = 'character literal'; } else if ($type=='escape_sequence') { $rendered = ''.htmlentities($data['raw']).''; $tip = 'escape sequence'; } $renderer->doc .= '' .'' .'' .$tip.'' .'character' .htmlentities($char)." $charname" ."code point$codepointDEC"; if ($type != 'escape_sequence' && !empty(self::ES[$codepoint])) $renderer->doc .= 'escape sequence' .self::ES[$codepoint][0].''; if ($type != 'codepoint_escape') $renderer->doc .= "escape sequence$escapesequenceHEX"; if ($type != 'hex_html_entity') $renderer->doc .= "HTML entity&#$codepoint;DEC"; if ($type != 'dec_html_entity') $renderer->doc .= "HTML entity&#x$hex;HEX"; if ($type != 'url_encoding' && ($urlencoding=='+' || strlen($urlencoding)>1)) $renderer->doc .= "URL encoding$urlencodingHEX"; $renderer->doc .= 'UTF-8 code units' .join(' ', array_map(function ($i) { return str_pad(decbin($i), 8, '0', STR_PAD_LEFT); }, $bytes)) .'BIN'; $renderer->doc .= "$rendered"; return true; } }