1<?php 2/** 3 * Deepl Autotranslate Plugin 4 * 5 * @author Jennifer Graul <me@netali.de> 6 */ 7 8if(!defined('DOKU_INC')) die(); 9 10use \dokuwiki\HTTP\DokuHTTPClient; 11use \dokuwiki\plugin\deeplautotranslate\MenuItem; 12 13class action_plugin_deeplautotranslate extends DokuWiki_Action_Plugin { 14 15 // manual mapping of ISO-languages to DeepL-languages to fix inconsistent naming 16 private $langs = [ 17 'bg' => 'BG', 18 'cs' => 'CS', 19 'da' => 'DA', 20 'de' => 'DE', 21 'de-informal' => 'DE', 22 'el' => 'EL', 23 'en' => 'EN-GB', 24 'es' => 'ES', 25 'et' => 'ET', 26 'fi' => 'FI', 27 'fr' => 'FR', 28 'hu' => 'HU', 29 'hu-formal' => 'HU', 30 'it' => 'IT', 31 'ja' => 'JA', 32 'lt' => 'LT', 33 'lv' => 'LV', 34 'nl' => 'NL', 35 'pl' => 'PL', 36 'pt' => 'PT-PT', 37 'ro' => 'RO', 38 'ru' => 'RU', 39 'sk' => 'SK', 40 'sl' => 'SL', 41 'sv' => 'SV', 42 'zh' => 'ZH' 43 ]; 44 45 /** 46 * Register its handlers with the DokuWiki's event controller 47 */ 48 public function register(Doku_Event_Handler $controller) { 49 $controller->register_hook('ACTION_ACT_PREPROCESS','BEFORE', $this, 'preprocess'); 50 $controller->register_hook('COMMON_PAGETPL_LOAD','AFTER', $this, 'autotrans_editor'); 51 $controller->register_hook('MENU_ITEMS_ASSEMBLY', 'AFTER', $this, 'add_menu_button'); 52 } 53 54 public function add_menu_button(Doku_Event $event): void { 55 global $ID; 56 global $ACT; 57 58 if ($ACT != 'show') return; 59 60 if ($event->data['view'] != 'page') return; 61 62 if (!$this->getConf('show_button')) return; 63 64 $split_id = explode(':', $ID); 65 $lang_ns = array_shift($split_id); 66 // check if we are in a language namespace 67 if (array_key_exists($lang_ns, $this->langs)) { 68 if($this->getConf('default_lang_in_ns') and $lang_ns === $this->get_default_lang()) { 69 // if the default lang is in a namespace and we are in that namespace --> check for push translation 70 if (!$this->check_do_push_translate()) return; 71 } else { 72 // in language namespace --> check if we should translate 73 if (!$this->check_do_translation(true)) return; 74 } 75 } else { 76 // do not show the button if we are not in a language namespace and the default language is in a namespace 77 if($this->getConf('default_lang_in_ns')) return; 78 // not in language namespace and default language is npt in a namespace --> check if we should show the push translate button 79 if (!$this->check_do_push_translate()) return; 80 } 81 82 array_splice($event->data['items'], -1, 0, [new MenuItem()]); 83 } 84 85 public function preprocess(Doku_Event $event, $param): void { 86 global $ID; 87 88 // check if action is show or translate 89 if ($event->data != 'show' and $event->data != 'translate') return; 90 91 $split_id = explode(':', $ID); 92 $lang_ns = array_shift($split_id); 93 // check if we are in a language namespace 94 if (array_key_exists($lang_ns, $this->langs)) { 95 if($this->getConf('default_lang_in_ns') and $lang_ns === $this->get_default_lang()) { 96 // if the default lang is in a namespace and we are in that namespace --> push translate 97 $this->push_translate($event); 98 } else { 99 // in language namespace --> autotrans direct 100 $this->autotrans_direct($event); 101 } 102 } else { 103 // not in language namespace --> push translate 104 $this->push_translate($event); 105 } 106 } 107 108 private function autotrans_direct(Doku_Event $event): void { 109 global $ID; 110 111 // abort if action is translate and the translate button is disabled 112 if ($event->data == 'translate' and !$this->getConf('show_button')) return; 113 114 // do nothing on show action when mode is not direct 115 if ($event->data == 'show' and $this->get_mode() != 'direct') return; 116 117 // allow translation of existing pages is we are in the translate action 118 $allow_existing = ($event->data == 'translate'); 119 120 // reset action to show 121 $event->data = 'show'; 122 123 if (!$this->check_do_translation($allow_existing)) { 124 return; 125 } 126 127 $org_page_info = $this->get_org_page_info(); 128 $translated_text = $this->deepl_translate($org_page_info["text"], $this->get_target_lang(), $org_page_info["ns"]); 129 130 if ($translated_text === '') { 131 return; 132 } 133 134 saveWikiText($ID, $translated_text, 'Automatic translation'); 135 136 msg($this->getLang('msg_translation_success'), 1); 137 138 // reload the page after translation 139 send_redirect(wl($ID)); 140 } 141 142 public function autotrans_editor(Doku_Event $event, $param): void { 143 if ($this->get_mode() != 'editor') return; 144 145 if (!$this->check_do_translation()) return; 146 147 $org_page_info = $this->get_org_page_info(); 148 149 $event->data['tpl'] = $this->deepl_translate($org_page_info["text"], $this->get_target_lang(), $org_page_info["ns"]); 150 } 151 152 private function push_translate(Doku_Event $event): void { 153 global $ID; 154 155 // check if action is translate 156 if ($event->data != 'translate') return; 157 158 // check if button is enabled 159 if (!$this->getConf('show_button')) { 160 send_redirect(wl($ID)); 161 return; 162 } 163 164 if (!$this->check_do_push_translate()) { 165 send_redirect(wl($ID)); 166 return; 167 } 168 169 // push translate 170 $push_langs = $this->get_push_langs(); 171 $org_page_text = rawWiki($ID); 172 foreach ($push_langs as $lang) { 173 // skip invalid languages 174 if (!array_key_exists($lang, $this->langs)) { 175 msg($this->getLang('msg_translation_fail_invalid_lang') . $lang, -1); 176 continue; 177 } 178 179 if ($this->getConf('default_lang_in_ns')) { 180 // if default lang is in ns: replace language namespace in ID 181 $split_id = explode(':', $ID); 182 array_shift($split_id); 183 $lang_id = implode(':', $split_id); 184 $lang_id = $lang . ':' . $lang_id; 185 } else { 186 // if default lang is not in ns: add language namespace to ID 187 $lang_id = $lang . ':' . $ID; 188 } 189 190 // check permissions 191 $perm = auth_quickaclcheck($ID); 192 $exists = page_exists($lang_id); 193 if (($exists and $perm < AUTH_EDIT) or (!$exists and $perm < AUTH_CREATE)) { 194 msg($this->getLang('msg_translation_fail_no_permissions') . $lang_id, -1); 195 continue; 196 } 197 198 $translated_text = $this->deepl_translate($org_page_text, $lang, getNS($ID)); 199 saveWikiText($lang_id, $translated_text, 'Automatic push translation'); 200 } 201 202 msg($this->getLang('msg_translation_success'), 1); 203 204 // reload the page after translation to clear the action 205 send_redirect(wl($ID)); 206 } 207 208 private function get_mode(): string { 209 global $ID; 210 if ($this->getConf('editor_regex')) { 211 if (preg_match('/' . $this->getConf('editor_regex') . '/', $ID) === 1) return 'editor'; 212 } 213 if ($this->getConf('direct_regex')) { 214 if (preg_match('/' . $this->getConf('direct_regex') . '/', $ID) === 1) return 'direct'; 215 } 216 return $this->getConf('mode'); 217 } 218 219 private function get_target_lang(): string { 220 global $ID; 221 $split_id = explode(':', $ID); 222 return array_shift($split_id); 223 } 224 225 private function get_default_lang(): string { 226 global $conf; 227 228 if (empty($conf['lang_before_translation'])) { 229 $default_lang = $conf['lang']; 230 } else { 231 $default_lang = $conf['lang_before_translation']; 232 } 233 234 return $default_lang; 235 } 236 237 private function get_org_page_info(): array { 238 global $ID; 239 240 $split_id = explode(':', $ID); 241 array_shift($split_id); 242 $org_id = implode(':', $split_id); 243 244 // if default lang is in ns: add default ns in front of org id 245 if ($this->getConf('default_lang_in_ns')) { 246 $org_id = $this->get_default_lang() . ':' . $org_id; 247 } 248 249 return array("ns" => getNS($org_id), "text" => rawWiki($org_id)); 250 } 251 252 private function check_do_translation($allow_existing = false): bool { 253 global $INFO; 254 global $ID; 255 256 // only translate if the current page does not exist 257 if ($INFO['exists'] and !$allow_existing) return false; 258 259 // permission check 260 $perm = auth_quickaclcheck($ID); 261 if (($INFO['exists'] and $perm < AUTH_EDIT) or (!$INFO['exists'] and $perm < AUTH_CREATE)) return false; 262 263 // skip blacklisted namespaces and pages 264 if ($this->getConf('blacklist_regex')) { 265 if (preg_match('/' . $this->getConf('blacklist_regex') . '/', $ID) === 1) return false; 266 } 267 268 $split_id = explode(':', $ID); 269 $lang_ns = array_shift($split_id); 270 // only translate if the current page is in a language namespace 271 if (!array_key_exists($lang_ns, $this->langs)) return false; 272 273 $org_id = implode(':', $split_id); 274 275 // if default lang is in ns: add default ns in front of org id 276 if ($this->getConf('default_lang_in_ns')) { 277 $org_id = $this->get_default_lang() . ':' . $org_id; 278 } 279 280 // check if the original page exists 281 if (!page_exists($org_id)) return false; 282 283 return true; 284 } 285 286 private function check_do_push_translate(): bool { 287 global $ID; 288 global $INFO; 289 290 if (!$INFO['exists']) return false; 291 292 // if default language is in namespace: only allow push translation from that namespace 293 if($this->getConf('default_lang_in_ns')) { 294 $split_id = explode(':', $ID); 295 $lang_ns = array_shift($split_id); 296 297 if ($lang_ns !== $this->get_default_lang()) return false; 298 } 299 300 $push_langs = $this->get_push_langs(); 301 // push_langs empty --> push_translate disabled --> abort 302 if (empty($push_langs)) return false; 303 304 // skip blacklisted namespaces and pages 305 if ($this->getConf('blacklist_regex')) { 306 // blacklist regex match --> abort 307 if (preg_match('/' . $this->getConf('blacklist_regex') . '/', $ID) === 1) return false; 308 } 309 310 return true; 311 } 312 313 private function deepl_translate($text, $target_lang, $org_ns): string { 314 if (!trim($this->getConf('api_key'))) return ''; 315 316 $text = $this->patch_links($text, $target_lang, $org_ns); 317 318 $text = $this->insert_ignore_tags($text); 319 320 $data = [ 321 'auth_key' => $this->getConf('api_key'), 322 'target_lang' => $this->langs[$target_lang], 323 'tag_handling' => 'xml', 324 'ignore_tags' => 'ignore', 325 'text' => $text 326 ]; 327 328 if ($this->getConf('api') == 'free') { 329 $url = 'https://api-free.deepl.com/v2/translate'; 330 } else { 331 $url = 'https://api.deepl.com/v2/translate'; 332 } 333 334 $http = new DokuHTTPClient(); 335 $raw_response = $http->post($url, $data); 336 337 if ($http->status >= 400) { 338 // add error messages 339 switch ($http->status) { 340 case 403: 341 msg($this->getLang('msg_translation_fail_bad_key'), -1); 342 break; 343 case 456: 344 msg($this->getLang('msg_translation_fail_quota_exceeded'), -1); 345 break; 346 default: 347 msg($this->getLang('msg_translation_fail'), -1); 348 break; 349 } 350 351 // if any error occurred return an empty string 352 return ''; 353 } 354 355 $json_response = json_decode($raw_response, true); 356 $translated_text = $json_response['translations'][0]['text']; 357 358 $translated_text = $this->remove_ignore_tags($translated_text); 359 360 return $translated_text; 361 } 362 363 private function get_push_langs(): array { 364 $push_langs = trim($this->getConf('push_langs')); 365 366 if ($push_langs === '') return array(); 367 368 return explode(' ', $push_langs); 369 } 370 371 private function patch_links($text, $target_lang, $ns): string { 372 /* 373 * 1. Find links in [[ aa:bb ]] or [[ aa:bb | cc ]] 374 * 2. Extract aa:bb 375 * 3. Check if lang:aa:bb exists 376 * 3.1. --> Yes --> replace 377 * 3.2. --> No --> leave it as it is 378 */ 379 380 381 /* 382 * LINKS 383 */ 384 385 preg_match_all('/\[\[([\s\S]*?)(\|([\s\S]*?))?]]/', $text, $matches, PREG_SET_ORDER); 386 387 foreach ($matches as $match) { 388 389 if (strpos($match[1], '://') !== false) { 390 // external link --> skip 391 continue; 392 } 393 394 $resolved_id = $match[1]; 395 396 resolve_pageid($ns, $resolved_id, $exists); 397 398 if (!$exists) { 399 // redlink --> skip 400 continue; 401 } 402 403 // if the link already points to a target in a language namespace drop it and add the new language namespace 404 $split_id = explode(':', $resolved_id); 405 $lang_ns = array_shift($split_id); 406 if (array_key_exists($lang_ns, $this->langs)) { 407 $resolved_id = implode(':', $split_id); 408 } 409 410 $lang_id = $target_lang . ':' . $resolved_id; 411 412 if (!page_exists($lang_id)) { 413 // Page in target lang does not exist --> skip 414 continue; 415 } 416 417 $new_link = '[[' . $lang_id . $match[2] . ']]'; 418 419 $text = str_replace($match[0], $new_link, $text); 420 421 } 422 423 /* 424 * MEDIA 425 */ 426 427 preg_match_all('/\{\{([\s\S]*?)(\?[\s\S]*?)?(\|([\s\S]*?))?}}/', $text, $matches, PREG_SET_ORDER); 428 429 foreach ($matches as $match) { 430 431 if (strpos($match[1], '://') !== false) { 432 // external image --> skip 433 continue; 434 } 435 436 $resolved_id = $match[1]; 437 438 resolve_mediaid($ns, $resolved_id, $exists); 439 440 if (!$exists) { 441 // redlink --> skip 442 continue; 443 } 444 445 // if the link already points to a target in a language namespace drop it and add the new language namespace 446 $split_id = explode(':', $resolved_id); 447 $lang_ns = array_shift($split_id); 448 if (array_key_exists($lang_ns, $this->langs)) { 449 $resolved_id = implode(':', $split_id); 450 } 451 452 $lang_id = $target_lang . ':' . $resolved_id; 453 454 $lang_id_fn = mediaFN($lang_id); 455 456 if (!file_exists($lang_id_fn)) { 457 // media in target lang does not exist --> skip 458 continue; 459 } 460 461 $new_link = '{{' . $lang_id . $match[2] . $match[3] . '}}'; 462 463 $text = str_replace($match[0], $new_link, $text); 464 465 } 466 467 return $text; 468 } 469 470 private function insert_ignore_tags($text): string { 471 // ignore every other xml-like tags (the tags themselves, not their content), otherwise deepl would break the formatting 472 $text = preg_replace('/<[\s\S]+?>/', '<ignore>${0}</ignore>', $text); 473 474 // prevent deepl from breaking headings 475 $text = preg_replace('/={1,6}/', '<ignore>${0}</ignore>', $text); 476 477 // fix for the template plugin 478 $text = preg_replace('/\{\{template>[\s\S]*?}}/', '<ignore>${0}</ignore>', $text); 479 480 // ignore link/media ids but translate the text (if existing) 481 $text = preg_replace('/\[\[([\s\S]*?)((\|)([\s\S]*?))?]]/', '<ignore>[[${1}${3}</ignore>${4}<ignore>]]</ignore>', $text); 482 $text = preg_replace('/\{\{([\s\S]*?)(\?[\s\S]*?)?((\|)([\s\S]*?))?}}/', '<ignore>{{${1}${2}${4}</ignore>${5}<ignore>}}</ignore>', $text); 483 484 // prevent deepl from doing strange things with dokuwiki syntax 485 $text = str_replace("''", "<ignore>''</ignore>", $text); 486 $text = str_replace("\\\\", "<ignore>\\\\</ignore>", $text); 487 488 // ignore code tags 489 $text = preg_replace('/(<php[\s\S]*?>[\s\S]*?<\/php>)/', '<ignore>${1}</ignore>', $text); 490 $text = preg_replace('/(<file[\s\S]*?>[\s\S]*?<\/file>)/', '<ignore>${1}</ignore>', $text); 491 $text = preg_replace('/(<code[\s\S]*?>[\s\S]*?<\/code>)/', '<ignore>${1}</ignore>', $text); 492 493 // ignore the expressions from the ignore list 494 $ignored_expressions = explode(':', $this->getConf('ignored_expressions')); 495 496 foreach ($ignored_expressions as $expression) { 497 $text = str_replace($expression, '<ignore>' . $expression . '</ignore>', $text); 498 } 499 500 return $text; 501 } 502 503 private function remove_ignore_tags($text): string { 504 $ignored_expressions = explode(':', $this->getConf('ignored_expressions')); 505 506 foreach ($ignored_expressions as $expression) { 507 $text = str_replace('<ignore>' . $expression . '</ignore>', $expression, $text); 508 } 509 510 $text = preg_replace('/<ignore>\[\[([\s\S]*?)(\|)?(<\/ignore>)([\s\S]*?)?<ignore>]]<\/ignore>/', '[[${1}${2}${4}]]', $text); 511 $text = preg_replace('/<ignore>\{\{([\s\S]*?)(\|)?(<\/ignore>)([\s\S]*?)?<ignore>}}<\/ignore>/', '{{${1}${2}${4}}}', $text); 512 513 $text = str_replace("<ignore>''</ignore>", "''", $text); 514 $text = str_replace("<ignore>\\\\</ignore>", "\\\\", $text); 515 516 $text = preg_replace('/<ignore>(<php[\s\S]*?>[\s\S]*?<\/php>)<\/ignore>/', '${1}', $text); 517 $text = preg_replace('/<ignore>(<file[\s\S]*?>[\s\S]*?<\/file>)<\/ignore>/', '${1}', $text); 518 $text = preg_replace('/<ignore>(<code[\s\S]*?>[\s\S]*?<\/code>)<\/ignore>/', '${1}', $text); 519 520 // fix for the template plugin 521 $text = preg_replace('/<ignore>(\{\{template>[\s\S]*?}})<\/ignore>/', '${1}', $text); 522 523 // prevent deepl from breaking headings 524 $text = preg_replace('/<ignore>(={1,6})<\/ignore>/','${1}', $text); 525 526 // ignore every other xml-like tags (the tags themselves, not their content), otherwise deepl would break the formatting 527 $text = preg_replace('/<ignore>(<[\s\S]+?>)<\/ignore>/', '${1}', $text); 528 529 // restore < and > for example from arrows (-->) in wikitext 530 $text = str_replace('>', '>', $text); 531 $text = str_replace('<', '<', $text); 532 533 return $text; 534 } 535} 536 537