xref: /plugin/deeplautotranslate/action.php (revision 84cda41fb0c9f0aa3a42a9cd8c476c04c815e89d)
1<?php
2/**
3 * Deepl Autotranslate Plugin
4 *
5 * @author     Jennifer Graul <me@netali.de>
6 */
7
8if(!defined('DOKU_INC')) die();
9
10use \dokuwiki\HTTP\DokuHTTPClient;
11use \dokuwiki\plugin\deeplautotranslate\MenuItem;
12
13class action_plugin_deeplautotranslate extends DokuWiki_Action_Plugin {
14
15    // manual mapping of ISO-languages to DeepL-languages to fix inconsistent naming
16    private $langs = [
17        'bg' => 'BG',
18        'cs' => 'CS',
19        'da' => 'DA',
20        'de' => 'DE',
21        'de-informal' => 'DE',
22        'el' => 'EL',
23        'en' => 'EN-GB',
24        'es' => 'ES',
25        'et' => 'ET',
26        'fi' => 'FI',
27        'fr' => 'FR',
28        'hu' => 'HU',
29        'hu-formal' => 'HU',
30        'it' => 'IT',
31        'ja' => 'JA',
32        'lt' => 'LT',
33        'lv' => 'LV',
34        'nl' => 'NL',
35        'pl' => 'PL',
36        'pt' => 'PT-PT',
37        'ro' => 'RO',
38        'ru' => 'RU',
39        'sk' => 'SK',
40        'sl' => 'SL',
41        'sv' => 'SV',
42        'zh' => 'ZH'
43    ];
44
45    /**
46     * Register its handlers with the DokuWiki's event controller
47     */
48    public function register(Doku_Event_Handler $controller) {
49        $controller->register_hook('ACTION_ACT_PREPROCESS','BEFORE', $this, 'preprocess');
50        $controller->register_hook('COMMON_PAGETPL_LOAD','AFTER', $this, 'autotrans_editor');
51        $controller->register_hook('MENU_ITEMS_ASSEMBLY', 'AFTER', $this, 'add_menu_button');
52    }
53
54    public function add_menu_button(Doku_Event $event): void {
55        global $ID;
56        global $ACT;
57
58        if ($ACT != 'show') return;
59
60        if ($event->data['view'] != 'page') return;
61
62        if (!$this->getConf('show_button')) return;
63
64        $split_id = explode(':', $ID);
65        $lang_ns = array_shift($split_id);
66        // check if we are in a language namespace
67        if (array_key_exists($lang_ns, $this->langs)) {
68            if($this->getConf('default_lang_in_ns') and $lang_ns === $this->get_default_lang()) {
69                // if the default lang is in a namespace and we are in that namespace --> check for push translation
70                if (!$this->check_do_push_translate()) return;
71            } else {
72                // in language namespace --> check if we should translate
73                if (!$this->check_do_translation(true)) return;
74            }
75        } else {
76            // do not show the button if we are not in a language namespace and the default language is in a namespace
77            if($this->getConf('default_lang_in_ns')) return;
78            // not in language namespace and default language is npt in a namespace --> check if we should show the push translate button
79            if (!$this->check_do_push_translate()) return;
80        }
81
82        array_splice($event->data['items'], -1, 0, [new MenuItem()]);
83    }
84
85    public function preprocess(Doku_Event  $event, $param): void {
86        global $ID;
87
88        // check if action is show or translate
89        if ($event->data != 'show' and $event->data != 'translate') return;
90
91        $split_id = explode(':', $ID);
92        $lang_ns = array_shift($split_id);
93        // check if we are in a language namespace
94        if (array_key_exists($lang_ns, $this->langs)) {
95            if($this->getConf('default_lang_in_ns') and $lang_ns === $this->get_default_lang()) {
96                // if the default lang is in a namespace and we are in that namespace --> push translate
97                $this->push_translate($event);
98            } else {
99                // in language namespace --> autotrans direct
100                $this->autotrans_direct($event);
101            }
102        } else {
103            // not in language namespace --> push translate
104            $this->push_translate($event);
105        }
106    }
107
108    private function autotrans_direct(Doku_Event $event): void {
109        global $ID;
110
111        // abort if action is translate and the translate button is disabled
112        if ($event->data == 'translate' and !$this->getConf('show_button')) return;
113
114        // do nothing on show action when mode is not direct
115        if ($event->data == 'show' and $this->get_mode() != 'direct') return;
116
117        // allow translation of existing pages is we are in the translate action
118        $allow_existing = ($event->data == 'translate');
119
120        // reset action to show
121        $event->data = 'show';
122
123        if (!$this->check_do_translation($allow_existing)) {
124            return;
125        }
126
127        $org_page_info = $this->get_org_page_info();
128        $translated_text = $this->deepl_translate($org_page_info["text"], $this->get_target_lang(), $org_page_info["ns"]);
129
130        if ($translated_text === '') {
131            return;
132        }
133
134        saveWikiText($ID, $translated_text, 'Automatic translation');
135
136        msg($this->getLang('msg_translation_success'), 1);
137
138        // reload the page after translation
139        send_redirect(wl($ID));
140    }
141
142    public function autotrans_editor(Doku_Event $event, $param): void {
143        if ($this->get_mode() != 'editor') return;
144
145        if (!$this->check_do_translation()) return;
146
147        $org_page_info = $this->get_org_page_info();
148
149        $event->data['tpl'] = $this->deepl_translate($org_page_info["text"], $this->get_target_lang(), $org_page_info["ns"]);
150    }
151
152    private function push_translate(Doku_Event $event): void {
153        global $ID;
154
155        // check if action is translate
156        if ($event->data != 'translate') return;
157
158        // check if button is enabled
159        if (!$this->getConf('show_button')) {
160            send_redirect(wl($ID));
161            return;
162        }
163
164        if (!$this->check_do_push_translate()) {
165            send_redirect(wl($ID));
166            return;
167        }
168
169        // push translate
170        $push_langs = $this->get_push_langs();
171        $org_page_text = rawWiki($ID);
172        foreach ($push_langs as $lang) {
173            // skip invalid languages
174            if (!array_key_exists($lang, $this->langs)) {
175                msg($this->getLang('msg_translation_fail_invalid_lang') . $lang, -1);
176                continue;
177            }
178
179            if ($this->getConf('default_lang_in_ns')) {
180                // if default lang is in ns: replace language namespace in ID
181                $split_id = explode(':', $ID);
182                array_shift($split_id);
183                $lang_id = implode(':', $split_id);
184                $lang_id = $lang . ':' . $lang_id;
185            } else {
186                // if default lang is not in ns: add language namespace to ID
187                $lang_id = $lang . ':' . $ID;
188            }
189
190            // check permissions
191            $perm = auth_quickaclcheck($lang_id);
192            $exists = page_exists($lang_id);
193            if (($exists and $perm < AUTH_EDIT) or (!$exists and $perm < AUTH_CREATE)) {
194                msg($this->getLang('msg_translation_fail_no_permissions') . $lang_id, -1);
195                continue;
196            }
197
198            $translated_text = $this->deepl_translate($org_page_text, $lang, getNS($ID));
199            saveWikiText($lang_id, $translated_text, 'Automatic push translation');
200        }
201
202        msg($this->getLang('msg_translation_success'), 1);
203
204        // reload the page after translation to clear the action
205        send_redirect(wl($ID));
206    }
207
208    private function get_mode(): string {
209        global $ID;
210        if ($this->getConf('editor_regex')) {
211            if (preg_match('/' . $this->getConf('editor_regex') . '/', $ID) === 1) return 'editor';
212        }
213        if ($this->getConf('direct_regex')) {
214            if (preg_match('/' . $this->getConf('direct_regex') . '/', $ID) === 1) return 'direct';
215        }
216        return $this->getConf('mode');
217    }
218
219    private function get_target_lang(): string {
220        global $ID;
221        $split_id = explode(':', $ID);
222        return array_shift($split_id);
223    }
224
225    private function get_default_lang(): string {
226        global $conf;
227
228        if (empty($conf['lang_before_translation'])) {
229            $default_lang = $conf['lang'];
230        } else {
231            $default_lang = $conf['lang_before_translation'];
232        }
233
234        return $default_lang;
235    }
236
237    private function get_org_page_info(): array {
238        global $ID;
239
240        $split_id = explode(':', $ID);
241        array_shift($split_id);
242        $org_id = implode(':', $split_id);
243
244        // if default lang is in ns: add default ns in front of org id
245        if ($this->getConf('default_lang_in_ns')) {
246            $org_id = $this->get_default_lang() . ':' . $org_id;
247        }
248
249        return array("ns" => getNS($org_id), "text" => rawWiki($org_id));
250    }
251
252    private function check_do_translation($allow_existing = false): bool {
253        global $INFO;
254        global $ID;
255
256        // only translate if the current page does not exist
257        if ($INFO['exists'] and !$allow_existing) return false;
258
259        // permission check
260        $perm = auth_quickaclcheck($ID);
261        if (($INFO['exists'] and $perm < AUTH_EDIT) or (!$INFO['exists'] and $perm < AUTH_CREATE)) return false;
262
263        // skip blacklisted namespaces and pages
264        if ($this->getConf('blacklist_regex')) {
265            if (preg_match('/' . $this->getConf('blacklist_regex') . '/', $ID) === 1) return false;
266        }
267
268        $split_id = explode(':', $ID);
269        $lang_ns = array_shift($split_id);
270        // only translate if the current page is in a language namespace
271        if (!array_key_exists($lang_ns, $this->langs)) return false;
272
273        $org_id = implode(':', $split_id);
274
275        // if default lang is in ns: add default ns in front of org id
276        if ($this->getConf('default_lang_in_ns')) {
277            $org_id = $this->get_default_lang() . ':' . $org_id;
278        }
279
280        // check if the original page exists
281        if (!page_exists($org_id)) return false;
282
283        return true;
284    }
285
286    private function check_do_push_translate(): bool {
287        global $ID;
288        global $INFO;
289
290        if (!$INFO['exists']) return false;
291
292        // only allow push translation if the user can edit this page
293        $perm = auth_quickaclcheck($ID);
294        if ($perm < AUTH_EDIT) return false;
295
296        // if default language is in namespace: only allow push translation from that namespace
297        if($this->getConf('default_lang_in_ns')) {
298            $split_id = explode(':', $ID);
299            $lang_ns = array_shift($split_id);
300
301            if ($lang_ns !== $this->get_default_lang()) return false;
302        }
303
304        $push_langs = $this->get_push_langs();
305        // push_langs empty --> push_translate disabled --> abort
306        if (empty($push_langs)) return false;
307
308        // skip blacklisted namespaces and pages
309        if ($this->getConf('blacklist_regex')) {
310            // blacklist regex match --> abort
311            if (preg_match('/' . $this->getConf('blacklist_regex') . '/', $ID) === 1) return false;
312        }
313
314        return true;
315    }
316
317    private function deepl_translate($text, $target_lang, $org_ns): string {
318        if (!trim($this->getConf('api_key'))) return '';
319
320        $text = $this->patch_links($text, $target_lang, $org_ns);
321
322        $text = $this->insert_ignore_tags($text);
323
324        $data = [
325            'auth_key' => $this->getConf('api_key'),
326            'target_lang' => $this->langs[$target_lang],
327            'tag_handling' => 'xml',
328            'ignore_tags' => 'ignore',
329            'text' => $text
330        ];
331
332        if ($this->getConf('api') == 'free') {
333            $url = 'https://api-free.deepl.com/v2/translate';
334        } else {
335            $url = 'https://api.deepl.com/v2/translate';
336        }
337
338        $http = new DokuHTTPClient();
339        $raw_response = $http->post($url, $data);
340
341        if ($http->status >= 400) {
342            // add error messages
343            switch ($http->status) {
344                case 403:
345                    msg($this->getLang('msg_translation_fail_bad_key'), -1);
346                    break;
347                case 456:
348                    msg($this->getLang('msg_translation_fail_quota_exceeded'), -1);
349                    break;
350                default:
351                    msg($this->getLang('msg_translation_fail'), -1);
352                    break;
353            }
354
355            // if any error occurred return an empty string
356            return '';
357        }
358
359        $json_response = json_decode($raw_response, true);
360        $translated_text = $json_response['translations'][0]['text'];
361
362        $translated_text = $this->remove_ignore_tags($translated_text);
363
364        return $translated_text;
365    }
366
367    private function get_push_langs(): array {
368        $push_langs = trim($this->getConf('push_langs'));
369
370        if ($push_langs === '') return array();
371
372        return explode(' ', $push_langs);
373    }
374
375    private function patch_links($text, $target_lang, $ns): string {
376        /*
377         * 1. Find links in [[ aa:bb ]] or [[ aa:bb | cc ]]
378         * 2. Extract aa:bb
379         * 3. Check if lang:aa:bb exists
380         * 3.1. --> Yes --> replace
381         * 3.2. --> No --> leave it as it is
382         */
383
384
385        /*
386         * LINKS
387         */
388
389        preg_match_all('/\[\[([\s\S]*?)(#[\s\S]*?)?((\|)([\s\S]*?))?]]/', $text, $matches, PREG_SET_ORDER);
390
391        foreach ($matches as $match) {
392
393            // external link --> skip
394            if (strpos($match[1], '://') !== false) continue;
395
396            // skip interwiki links
397            if (strpos($match[1], '>') !== false) continue;
398
399            // skip windows share links
400            if (strpos($match[1], '\\\\') !== false) continue;
401
402            $resolved_id = trim($match[1]);
403
404            resolve_pageid($ns, $resolved_id, $exists);
405
406            $resolved_id_full = $resolved_id;
407
408            // if the link already points to a target in a language namespace drop it and add the new language namespace
409            $split_id = explode(':', $resolved_id);
410            $lang_ns = array_shift($split_id);
411            if (array_key_exists($lang_ns, $this->langs)) {
412                $resolved_id = implode(':', $split_id);
413            }
414
415            $lang_id = $target_lang . ':' . $resolved_id;
416
417            if (!page_exists($lang_id)) {
418                // Page in target lang does not exist --> replace with absolute ID in case it was a relative ID
419                $new_link = '[[' . $resolved_id_full . $match[2] . $match[3] . ']]';
420            } else {
421                // Page in target lang exists --> replace link
422                $new_link = '[[' . $lang_id . $match[2] . $match[3] . ']]';
423            }
424
425            $text = str_replace($match[0], $new_link, $text);
426
427        }
428
429        /*
430         * MEDIA
431         */
432
433        preg_match_all('/\{\{(([\s\S]*?)(\?[\s\S]*?)?)(\|([\s\S]*?))?}}/', $text, $matches, PREG_SET_ORDER);
434
435        foreach ($matches as $match) {
436
437            // external image --> skip
438            if (strpos($match[1], '://') !== false) continue;
439
440            // skip things like {{tag>...}}
441            if (strpos($match[1], '>') !== false) continue;
442
443            // keep alignment
444            $align_left = "";
445            $align_right = "";
446
447            // align left --> space in front of ID
448            if (substr($match[1], 0, 1) == " ") $align_left = " ";
449            // align right --> space behind id
450            if (substr($match[1], -1) == " ") $align_right = " ";
451
452            $resolved_id = trim($match[2]);
453            $params = trim($match[3]);
454
455            resolve_mediaid($ns, $resolved_id, $exists);
456
457            $resolved_id_full = $resolved_id;
458
459            // if the link already points to a target in a language namespace drop it and add the new language namespace
460            $split_id = explode(':', $resolved_id);
461            $lang_ns = array_shift($split_id);
462            if (array_key_exists($lang_ns, $this->langs)) {
463                $resolved_id = implode(':', $split_id);
464            }
465
466            $lang_id = $target_lang . ':' . $resolved_id;
467
468            $lang_id_fn = mediaFN($lang_id);
469
470            if (!file_exists($lang_id_fn)) {
471                // media in target lang does not exist --> replace with absolute ID in case it was a relative ID
472                $new_link = '{{' . $align_left . $resolved_id_full . $params . $align_right . $match[4] . '}}';
473            } else {
474                // media in target lang exists --> replace it
475                $new_link = '{{' . $align_left . $lang_id . $params . $align_right . $match[4] . '}}';
476            }
477
478            $text = str_replace($match[0], $new_link, $text);
479
480        }
481
482        return $text;
483    }
484
485    private function insert_ignore_tags($text): string {
486        // ignore every other xml-like tags (the tags themselves, not their content), otherwise deepl would break the formatting
487        $text = preg_replace('/<[\s\S]+?>/', '<ignore>${0}</ignore>', $text);
488
489        // prevent deepl from breaking headings
490        $text = preg_replace('/={1,6}/', '<ignore>${0}</ignore>', $text);
491
492        // fix for plugins like tag or template
493        $text = preg_replace('/\{\{[\s\w]+?>[\s\S]*?}}/', '<ignore>${0}</ignore>', $text);
494
495        // ignore links in wikitext (outside of dokuwiki-links)
496        $text = preg_replace('/\S+:\/\/\S+/', '<ignore>${0}</ignore>', $text);
497
498        // ignore link/media ids but translate the text (if existing)
499        $text = preg_replace('/\[\[([\s\S]*?)(#[\s\S]*?)?((\|)([\s\S]*?))?]]/', '<ignore>[[${1}${2}${4}</ignore>${5}<ignore>]]</ignore>', $text);
500        $text = preg_replace('/\{\{([\s\S]*?)(\?[\s\S]*?)?((\|)([\s\S]*?))?}}/', '<ignore>{{${1}${2}${4}</ignore>${5}<ignore>}}</ignore>', $text);
501
502        // prevent deepl from messing with tables
503        $text = str_replace("^", "<ignore>^</ignore>", $text);
504        $text = str_replace("|", "<ignore>|</ignore>", $text);
505
506        // prevent deepl from doing strange things with dokuwiki syntax
507        $text = str_replace("''", "<ignore>''</ignore>", $text);
508        $text = str_replace("//", "<ignore>//</ignore>", $text);
509        $text = str_replace("**", "<ignore>**</ignore>", $text);
510        $text = str_replace("__", "<ignore>__</ignore>", $text);
511        $text = str_replace("\\\\", "<ignore>\\\\</ignore>", $text);
512
513        // prevent deepl from messing with smileys
514        $smileys = array_keys(getSmileys());
515        foreach ($smileys as $smiley) {
516            $text = str_replace($smiley, "<ignore>" . $smiley . "</ignore>", $text);
517        }
518
519        // ignore code tags
520        $text = preg_replace('/(<php[\s\S]*?>[\s\S]*?<\/php>)/', '<ignore>${1}</ignore>', $text);
521        $text = preg_replace('/(<file[\s\S]*?>[\s\S]*?<\/file>)/', '<ignore>${1}</ignore>', $text);
522        $text = preg_replace('/(<code[\s\S]*?>[\s\S]*?<\/code>)/', '<ignore>${1}</ignore>', $text);
523
524        // ignore the expressions from the ignore list
525        $ignored_expressions = explode(':', $this->getConf('ignored_expressions'));
526
527        foreach ($ignored_expressions as $expression) {
528            $text = str_replace($expression, '<ignore>' . $expression . '</ignore>', $text);
529        }
530
531        return $text;
532    }
533
534    private function remove_ignore_tags($text): string {
535        $ignored_expressions = explode(':', $this->getConf('ignored_expressions'));
536
537        foreach ($ignored_expressions as $expression) {
538            $text = str_replace('<ignore>' . $expression . '</ignore>', $expression, $text);
539        }
540
541        // prevent deepl from messing with tables
542        $text = str_replace("<ignore>^</ignore>", "^", $text);
543        $text = str_replace("<ignore>|</ignore>", "|", $text);
544
545        $text = str_replace("<ignore>''</ignore>", "''", $text);
546        $text = str_replace("<ignore>//</ignore>", "//", $text);
547        $text = str_replace("<ignore>**</ignore>", "**", $text);
548        $text = str_replace("<ignore>__</ignore>", "__", $text);
549        $text = str_replace("<ignore>\\\\</ignore>", "\\\\", $text);
550
551        // ignore links in wikitext (outside of dokuwiki-links)
552        $text = preg_replace('/<ignore>(\S+:\/\/\S+)<\/ignore>/', '${1}', $text);
553
554        $text = preg_replace('/<ignore>\[\[([\s\S]*?)(\|)?(<\/ignore>)([\s\S]*?)?<ignore>]]<\/ignore>/', '[[${1}${2}${4}]]', $text);
555        $text = preg_replace('/<ignore>\{\{([\s\S]*?)(\|)?(<\/ignore>)([\s\S]*?)?<ignore>}}<\/ignore>/', '{{${1}${2}${4}}}', $text);
556
557        // prevent deepl from messing with smileys
558        $smileys = array_keys(getSmileys());
559        foreach ($smileys as $smiley) {
560            $text = str_replace("<ignore>" . $smiley . "</ignore>", $smiley, $text);
561        }
562
563        $text = preg_replace('/<ignore>(<php[\s\S]*?>[\s\S]*?<\/php>)<\/ignore>/', '${1}', $text);
564        $text = preg_replace('/<ignore>(<file[\s\S]*?>[\s\S]*?<\/file>)<\/ignore>/', '${1}', $text);
565        $text = preg_replace('/<ignore>(<code[\s\S]*?>[\s\S]*?<\/code>)<\/ignore>/', '${1}', $text);
566
567        // fix for plugins like tag or template
568        $text = preg_replace('/<ignore>(\{\{[\s\w]+?>[\s\S]*?}})<\/ignore>/', '${1}', $text);
569
570        // prevent deepl from breaking headings
571        $text = preg_replace('/<ignore>(={1,6})<\/ignore>/','${1}', $text);
572
573        // ignore every other xml-like tags (the tags themselves, not their content), otherwise deepl would break the formatting
574        $text = preg_replace('/<ignore>(<[\s\S]+?>)<\/ignore>/', '${1}', $text);
575
576        // restore < and > for example from arrows (-->) in wikitext
577        $text = str_replace('&gt;', '>', $text);
578        $text = str_replace('&lt;', '<', $text);
579
580        // restore & in wikitext
581        $text = str_replace('&amp;', '&', $text);
582
583        return $text;
584    }
585}
586
587