1<?php 2/** 3 * Deepl Autotranslate Plugin 4 * 5 * @author Jennifer Graul <me@netali.de> 6 */ 7 8if(!defined('DOKU_INC')) die(); 9 10use \dokuwiki\HTTP\DokuHTTPClient; 11use \dokuwiki\plugin\deeplautotranslate\MenuItem; 12 13class action_plugin_deeplautotranslate extends DokuWiki_Action_Plugin { 14 15 // manual mapping of ISO-languages to DeepL-languages to fix inconsistent naming 16 private $langs = [ 17 'bg' => 'BG', 18 'cs' => 'CS', 19 'da' => 'DA', 20 'de' => 'DE', 21 'de-informal' => 'DE', 22 'el' => 'EL', 23 'en' => 'EN-GB', 24 'es' => 'ES', 25 'et' => 'ET', 26 'fi' => 'FI', 27 'fr' => 'FR', 28 'hu' => 'HU', 29 'hu-formal' => 'HU', 30 'it' => 'IT', 31 'ja' => 'JA', 32 'lt' => 'LT', 33 'lv' => 'LV', 34 'nl' => 'NL', 35 'pl' => 'PL', 36 'pt' => 'PT-PT', 37 'ro' => 'RO', 38 'ru' => 'RU', 39 'sk' => 'SK', 40 'sl' => 'SL', 41 'sv' => 'SV', 42 'zh' => 'ZH' 43 ]; 44 45 /** 46 * Register its handlers with the DokuWiki's event controller 47 */ 48 public function register(Doku_Event_Handler $controller) { 49 $controller->register_hook('ACTION_ACT_PREPROCESS','BEFORE', $this, 'preprocess'); 50 $controller->register_hook('COMMON_PAGETPL_LOAD','AFTER', $this, 'autotrans_editor'); 51 $controller->register_hook('MENU_ITEMS_ASSEMBLY', 'AFTER', $this, 'add_menu_button'); 52 } 53 54 public function add_menu_button(Doku_Event $event): void { 55 global $ID; 56 global $ACT; 57 58 if ($ACT != 'show') return; 59 60 if ($event->data['view'] != 'page') return; 61 62 if (!$this->getConf('show_button')) return; 63 64 $split_id = explode(':', $ID); 65 $lang_ns = array_shift($split_id); 66 // check if we are in a language namespace 67 if (array_key_exists($lang_ns, $this->langs)) { 68 if($this->getConf('default_lang_in_ns') and $lang_ns === $this->get_default_lang()) { 69 // if the default lang is in a namespace and we are in that namespace --> check for push translation 70 if (!$this->check_do_push_translate()) return; 71 } else { 72 // in language namespace --> check if we should translate 73 if (!$this->check_do_translation(true)) return; 74 } 75 } else { 76 // do not show the button if we are not in a language namespace and the default language is in a namespace 77 if($this->getConf('default_lang_in_ns')) return; 78 // not in language namespace and default language is npt in a namespace --> check if we should show the push translate button 79 if (!$this->check_do_push_translate()) return; 80 } 81 82 array_splice($event->data['items'], -1, 0, [new MenuItem()]); 83 } 84 85 public function preprocess(Doku_Event $event, $param): void { 86 global $ID; 87 88 // check if action is show or translate 89 if ($event->data != 'show' and $event->data != 'translate') return; 90 91 $split_id = explode(':', $ID); 92 $lang_ns = array_shift($split_id); 93 // check if we are in a language namespace 94 if (array_key_exists($lang_ns, $this->langs)) { 95 if($this->getConf('default_lang_in_ns') and $lang_ns === $this->get_default_lang()) { 96 // if the default lang is in a namespace and we are in that namespace --> push translate 97 $this->push_translate($event); 98 } else { 99 // in language namespace --> autotrans direct 100 $this->autotrans_direct($event); 101 } 102 } else { 103 // not in language namespace --> push translate 104 $this->push_translate($event); 105 } 106 } 107 108 private function autotrans_direct(Doku_Event $event): void { 109 global $ID; 110 111 // abort if action is translate and the translate button is disabled 112 if ($event->data == 'translate' and !$this->getConf('show_button')) return; 113 114 // do nothing on show action when mode is not direct 115 if ($event->data == 'show' and $this->get_mode() != 'direct') return; 116 117 // allow translation of existing pages is we are in the translate action 118 $allow_existing = ($event->data == 'translate'); 119 120 // reset action to show 121 $event->data = 'show'; 122 123 if (!$this->check_do_translation($allow_existing)) { 124 return; 125 } 126 127 $org_page_info = $this->get_org_page_info(); 128 $translated_text = $this->deepl_translate($org_page_info["text"], $this->get_target_lang(), $org_page_info["ns"]); 129 130 if ($translated_text === '') { 131 return; 132 } 133 134 saveWikiText($ID, $translated_text, 'Automatic translation'); 135 136 msg($this->getLang('msg_translation_success'), 1); 137 138 // reload the page after translation 139 send_redirect(wl($ID)); 140 } 141 142 public function autotrans_editor(Doku_Event $event, $param): void { 143 if ($this->get_mode() != 'editor') return; 144 145 if (!$this->check_do_translation()) return; 146 147 $org_page_info = $this->get_org_page_info(); 148 149 $event->data['tpl'] = $this->deepl_translate($org_page_info["text"], $this->get_target_lang(), $org_page_info["ns"]); 150 } 151 152 private function push_translate(Doku_Event $event): void { 153 global $ID; 154 155 // check if action is translate 156 if ($event->data != 'translate') return; 157 158 // check if button is enabled 159 if (!$this->getConf('show_button')) { 160 send_redirect(wl($ID)); 161 return; 162 } 163 164 if (!$this->check_do_push_translate()) { 165 send_redirect(wl($ID)); 166 return; 167 } 168 169 // push translate 170 $push_langs = $this->get_push_langs(); 171 $org_page_text = rawWiki($ID); 172 foreach ($push_langs as $lang) { 173 // skip invalid languages 174 if (!array_key_exists($lang, $this->langs)) { 175 msg($this->getLang('msg_translation_fail_invalid_lang') . $lang, -1); 176 continue; 177 } 178 179 if ($this->getConf('default_lang_in_ns')) { 180 // if default lang is in ns: replace language namespace in ID 181 $split_id = explode(':', $ID); 182 array_shift($split_id); 183 $lang_id = implode(':', $split_id); 184 $lang_id = $lang . ':' . $lang_id; 185 } else { 186 // if default lang is not in ns: add language namespace to ID 187 $lang_id = $lang . ':' . $ID; 188 } 189 190 // check permissions 191 $perm = auth_quickaclcheck($lang_id); 192 $exists = page_exists($lang_id); 193 if (($exists and $perm < AUTH_EDIT) or (!$exists and $perm < AUTH_CREATE)) { 194 msg($this->getLang('msg_translation_fail_no_permissions') . $lang_id, -1); 195 continue; 196 } 197 198 $translated_text = $this->deepl_translate($org_page_text, $lang, getNS($ID)); 199 saveWikiText($lang_id, $translated_text, 'Automatic push translation'); 200 } 201 202 msg($this->getLang('msg_translation_success'), 1); 203 204 // reload the page after translation to clear the action 205 send_redirect(wl($ID)); 206 } 207 208 private function get_mode(): string { 209 global $ID; 210 if ($this->getConf('editor_regex')) { 211 if (preg_match('/' . $this->getConf('editor_regex') . '/', $ID) === 1) return 'editor'; 212 } 213 if ($this->getConf('direct_regex')) { 214 if (preg_match('/' . $this->getConf('direct_regex') . '/', $ID) === 1) return 'direct'; 215 } 216 return $this->getConf('mode'); 217 } 218 219 private function get_target_lang(): string { 220 global $ID; 221 $split_id = explode(':', $ID); 222 return array_shift($split_id); 223 } 224 225 private function get_default_lang(): string { 226 global $conf; 227 228 if (empty($conf['lang_before_translation'])) { 229 $default_lang = $conf['lang']; 230 } else { 231 $default_lang = $conf['lang_before_translation']; 232 } 233 234 return $default_lang; 235 } 236 237 private function get_org_page_info(): array { 238 global $ID; 239 240 $split_id = explode(':', $ID); 241 array_shift($split_id); 242 $org_id = implode(':', $split_id); 243 244 // if default lang is in ns: add default ns in front of org id 245 if ($this->getConf('default_lang_in_ns')) { 246 $org_id = $this->get_default_lang() . ':' . $org_id; 247 } 248 249 return array("ns" => getNS($org_id), "text" => rawWiki($org_id)); 250 } 251 252 private function check_do_translation($allow_existing = false): bool { 253 global $INFO; 254 global $ID; 255 256 // only translate if the current page does not exist 257 if ($INFO['exists'] and !$allow_existing) return false; 258 259 // permission check 260 $perm = auth_quickaclcheck($ID); 261 if (($INFO['exists'] and $perm < AUTH_EDIT) or (!$INFO['exists'] and $perm < AUTH_CREATE)) return false; 262 263 // skip blacklisted namespaces and pages 264 if ($this->getConf('blacklist_regex')) { 265 if (preg_match('/' . $this->getConf('blacklist_regex') . '/', $ID) === 1) return false; 266 } 267 268 $split_id = explode(':', $ID); 269 $lang_ns = array_shift($split_id); 270 // only translate if the current page is in a language namespace 271 if (!array_key_exists($lang_ns, $this->langs)) return false; 272 273 $org_id = implode(':', $split_id); 274 275 // if default lang is in ns: add default ns in front of org id 276 if ($this->getConf('default_lang_in_ns')) { 277 $org_id = $this->get_default_lang() . ':' . $org_id; 278 } 279 280 // check if the original page exists 281 if (!page_exists($org_id)) return false; 282 283 return true; 284 } 285 286 private function check_do_push_translate(): bool { 287 global $ID; 288 global $INFO; 289 290 if (!$INFO['exists']) return false; 291 292 // only allow push translation if the user can edit this page 293 $perm = auth_quickaclcheck($ID); 294 if ($perm < AUTH_EDIT) return false; 295 296 // if default language is in namespace: only allow push translation from that namespace 297 if($this->getConf('default_lang_in_ns')) { 298 $split_id = explode(':', $ID); 299 $lang_ns = array_shift($split_id); 300 301 if ($lang_ns !== $this->get_default_lang()) return false; 302 } 303 304 $push_langs = $this->get_push_langs(); 305 // push_langs empty --> push_translate disabled --> abort 306 if (empty($push_langs)) return false; 307 308 // skip blacklisted namespaces and pages 309 if ($this->getConf('blacklist_regex')) { 310 // blacklist regex match --> abort 311 if (preg_match('/' . $this->getConf('blacklist_regex') . '/', $ID) === 1) return false; 312 } 313 314 return true; 315 } 316 317 private function deepl_translate($text, $target_lang, $org_ns): string { 318 if (!trim($this->getConf('api_key'))) return ''; 319 320 $text = $this->patch_links($text, $target_lang, $org_ns); 321 322 $text = $this->insert_ignore_tags($text); 323 324 $data = [ 325 'auth_key' => $this->getConf('api_key'), 326 'target_lang' => $this->langs[$target_lang], 327 'tag_handling' => 'xml', 328 'ignore_tags' => 'ignore', 329 'text' => $text 330 ]; 331 332 if ($this->getConf('api') == 'free') { 333 $url = 'https://api-free.deepl.com/v2/translate'; 334 } else { 335 $url = 'https://api.deepl.com/v2/translate'; 336 } 337 338 $http = new DokuHTTPClient(); 339 $raw_response = $http->post($url, $data); 340 341 if ($http->status >= 400) { 342 // add error messages 343 switch ($http->status) { 344 case 403: 345 msg($this->getLang('msg_translation_fail_bad_key'), -1); 346 break; 347 case 456: 348 msg($this->getLang('msg_translation_fail_quota_exceeded'), -1); 349 break; 350 default: 351 msg($this->getLang('msg_translation_fail'), -1); 352 break; 353 } 354 355 // if any error occurred return an empty string 356 return ''; 357 } 358 359 $json_response = json_decode($raw_response, true); 360 $translated_text = $json_response['translations'][0]['text']; 361 362 $translated_text = $this->remove_ignore_tags($translated_text); 363 364 return $translated_text; 365 } 366 367 private function get_push_langs(): array { 368 $push_langs = trim($this->getConf('push_langs')); 369 370 if ($push_langs === '') return array(); 371 372 return explode(' ', $push_langs); 373 } 374 375 private function patch_links($text, $target_lang, $ns): string { 376 /* 377 * 1. Find links in [[ aa:bb ]] or [[ aa:bb | cc ]] 378 * 2. Extract aa:bb 379 * 3. Check if lang:aa:bb exists 380 * 3.1. --> Yes --> replace 381 * 3.2. --> No --> leave it as it is 382 */ 383 384 385 /* 386 * LINKS 387 */ 388 389 preg_match_all('/\[\[([\s\S]*?)(#[\s\S]*?)?((\|)([\s\S]*?))?]]/', $text, $matches, PREG_SET_ORDER); 390 391 foreach ($matches as $match) { 392 393 // external link --> skip 394 if (strpos($match[1], '://') !== false) continue; 395 396 // skip interwiki links 397 if (strpos($match[1], '>') !== false) continue; 398 399 // skip windows share links 400 if (strpos($match[1], '\\\\') !== false) continue; 401 402 $resolved_id = trim($match[1]); 403 404 resolve_pageid($ns, $resolved_id, $exists); 405 406 $resolved_id_full = $resolved_id; 407 408 // if the link already points to a target in a language namespace drop it and add the new language namespace 409 $split_id = explode(':', $resolved_id); 410 $lang_ns = array_shift($split_id); 411 if (array_key_exists($lang_ns, $this->langs)) { 412 $resolved_id = implode(':', $split_id); 413 } 414 415 $lang_id = $target_lang . ':' . $resolved_id; 416 417 if (!page_exists($lang_id)) { 418 // Page in target lang does not exist --> replace with absolute ID in case it was a relative ID 419 $new_link = '[[' . $resolved_id_full . $match[2] . $match[3] . ']]'; 420 } else { 421 // Page in target lang exists --> replace link 422 $new_link = '[[' . $lang_id . $match[2] . $match[3] . ']]'; 423 } 424 425 $text = str_replace($match[0], $new_link, $text); 426 427 } 428 429 /* 430 * MEDIA 431 */ 432 433 preg_match_all('/\{\{(([\s\S]*?)(\?[\s\S]*?)?)(\|([\s\S]*?))?}}/', $text, $matches, PREG_SET_ORDER); 434 435 foreach ($matches as $match) { 436 437 // external image --> skip 438 if (strpos($match[1], '://') !== false) continue; 439 440 // skip things like {{tag>...}} 441 if (strpos($match[1], '>') !== false) continue; 442 443 // keep alignment 444 $align_left = ""; 445 $align_right = ""; 446 447 // align left --> space in front of ID 448 if (substr($match[1], 0, 1) == " ") $align_left = " "; 449 // align right --> space behind id 450 if (substr($match[1], -1) == " ") $align_right = " "; 451 452 $resolved_id = trim($match[2]); 453 $params = trim($match[3]); 454 455 resolve_mediaid($ns, $resolved_id, $exists); 456 457 $resolved_id_full = $resolved_id; 458 459 // if the link already points to a target in a language namespace drop it and add the new language namespace 460 $split_id = explode(':', $resolved_id); 461 $lang_ns = array_shift($split_id); 462 if (array_key_exists($lang_ns, $this->langs)) { 463 $resolved_id = implode(':', $split_id); 464 } 465 466 $lang_id = $target_lang . ':' . $resolved_id; 467 468 $lang_id_fn = mediaFN($lang_id); 469 470 if (!file_exists($lang_id_fn)) { 471 // media in target lang does not exist --> replace with absolute ID in case it was a relative ID 472 $new_link = '{{' . $align_left . $resolved_id_full . $params . $align_right . $match[4] . '}}'; 473 } else { 474 // media in target lang exists --> replace it 475 $new_link = '{{' . $align_left . $lang_id . $params . $align_right . $match[4] . '}}'; 476 } 477 478 $text = str_replace($match[0], $new_link, $text); 479 480 } 481 482 return $text; 483 } 484 485 private function insert_ignore_tags($text): string { 486 // ignore every other xml-like tags (the tags themselves, not their content), otherwise deepl would break the formatting 487 $text = preg_replace('/<[\s\S]+?>/', '<ignore>${0}</ignore>', $text); 488 489 // prevent deepl from breaking headings 490 $text = preg_replace('/={1,6}/', '<ignore>${0}</ignore>', $text); 491 492 // fix for plugins like tag or template 493 $text = preg_replace('/\{\{[\s\w]+?>[\s\S]*?}}/', '<ignore>${0}</ignore>', $text); 494 495 // ignore links in wikitext (outside of dokuwiki-links) 496 $text = preg_replace('/\S+:\/\/\S+/', '<ignore>${0}</ignore>', $text); 497 498 // ignore link/media ids but translate the text (if existing) 499 $text = preg_replace('/\[\[([\s\S]*?)(#[\s\S]*?)?((\|)([\s\S]*?))?]]/', '<ignore>[[${1}${2}${4}</ignore>${5}<ignore>]]</ignore>', $text); 500 $text = preg_replace('/\{\{([\s\S]*?)(\?[\s\S]*?)?((\|)([\s\S]*?))?}}/', '<ignore>{{${1}${2}${4}</ignore>${5}<ignore>}}</ignore>', $text); 501 502 // prevent deepl from messing with tables 503 $text = str_replace("^", "<ignore>^</ignore>", $text); 504 $text = str_replace("|", "<ignore>|</ignore>", $text); 505 506 // prevent deepl from doing strange things with dokuwiki syntax 507 $text = str_replace("''", "<ignore>''</ignore>", $text); 508 $text = str_replace("//", "<ignore>//</ignore>", $text); 509 $text = str_replace("**", "<ignore>**</ignore>", $text); 510 $text = str_replace("__", "<ignore>__</ignore>", $text); 511 $text = str_replace("\\\\", "<ignore>\\\\</ignore>", $text); 512 513 // prevent deepl from messing with smileys 514 $smileys = array_keys(getSmileys()); 515 foreach ($smileys as $smiley) { 516 $text = str_replace($smiley, "<ignore>" . $smiley . "</ignore>", $text); 517 } 518 519 // ignore code tags 520 $text = preg_replace('/(<php[\s\S]*?>[\s\S]*?<\/php>)/', '<ignore>${1}</ignore>', $text); 521 $text = preg_replace('/(<file[\s\S]*?>[\s\S]*?<\/file>)/', '<ignore>${1}</ignore>', $text); 522 $text = preg_replace('/(<code[\s\S]*?>[\s\S]*?<\/code>)/', '<ignore>${1}</ignore>', $text); 523 524 // ignore the expressions from the ignore list 525 $ignored_expressions = explode(':', $this->getConf('ignored_expressions')); 526 527 foreach ($ignored_expressions as $expression) { 528 $text = str_replace($expression, '<ignore>' . $expression . '</ignore>', $text); 529 } 530 531 return $text; 532 } 533 534 private function remove_ignore_tags($text): string { 535 $ignored_expressions = explode(':', $this->getConf('ignored_expressions')); 536 537 foreach ($ignored_expressions as $expression) { 538 $text = str_replace('<ignore>' . $expression . '</ignore>', $expression, $text); 539 } 540 541 // prevent deepl from messing with tables 542 $text = str_replace("<ignore>^</ignore>", "^", $text); 543 $text = str_replace("<ignore>|</ignore>", "|", $text); 544 545 $text = str_replace("<ignore>''</ignore>", "''", $text); 546 $text = str_replace("<ignore>//</ignore>", "//", $text); 547 $text = str_replace("<ignore>**</ignore>", "**", $text); 548 $text = str_replace("<ignore>__</ignore>", "__", $text); 549 $text = str_replace("<ignore>\\\\</ignore>", "\\\\", $text); 550 551 // ignore links in wikitext (outside of dokuwiki-links) 552 $text = preg_replace('/<ignore>(\S+:\/\/\S+)<\/ignore>/', '${1}', $text); 553 554 $text = preg_replace('/<ignore>\[\[([\s\S]*?)(\|)?(<\/ignore>)([\s\S]*?)?<ignore>]]<\/ignore>/', '[[${1}${2}${4}]]', $text); 555 $text = preg_replace('/<ignore>\{\{([\s\S]*?)(\|)?(<\/ignore>)([\s\S]*?)?<ignore>}}<\/ignore>/', '{{${1}${2}${4}}}', $text); 556 557 // prevent deepl from messing with smileys 558 $smileys = array_keys(getSmileys()); 559 foreach ($smileys as $smiley) { 560 $text = str_replace("<ignore>" . $smiley . "</ignore>", $smiley, $text); 561 } 562 563 $text = preg_replace('/<ignore>(<php[\s\S]*?>[\s\S]*?<\/php>)<\/ignore>/', '${1}', $text); 564 $text = preg_replace('/<ignore>(<file[\s\S]*?>[\s\S]*?<\/file>)<\/ignore>/', '${1}', $text); 565 $text = preg_replace('/<ignore>(<code[\s\S]*?>[\s\S]*?<\/code>)<\/ignore>/', '${1}', $text); 566 567 // fix for plugins like tag or template 568 $text = preg_replace('/<ignore>(\{\{[\s\w]+?>[\s\S]*?}})<\/ignore>/', '${1}', $text); 569 570 // prevent deepl from breaking headings 571 $text = preg_replace('/<ignore>(={1,6})<\/ignore>/','${1}', $text); 572 573 // ignore every other xml-like tags (the tags themselves, not their content), otherwise deepl would break the formatting 574 $text = preg_replace('/<ignore>(<[\s\S]+?>)<\/ignore>/', '${1}', $text); 575 576 // restore < and > for example from arrows (-->) in wikitext 577 $text = str_replace('>', '>', $text); 578 $text = str_replace('<', '<', $text); 579 580 // restore & in wikitext 581 $text = str_replace('&', '&', $text); 582 583 return $text; 584 } 585} 586 587