xref: /plugin/deeplautotranslate/action.php (revision a3a5150738607b40da94c9daba8a30867ee9d5df)
1<?php
2/**
3 * Deepl Autotranslate Plugin
4 *
5 * @author     Jennifer Graul <me@netali.de>
6 */
7
8if(!defined('DOKU_INC')) die();
9
10use \dokuwiki\HTTP\DokuHTTPClient;
11use \dokuwiki\plugin\deeplautotranslate\MenuItem;
12
13class action_plugin_deeplautotranslate extends DokuWiki_Action_Plugin {
14
15    // manual mapping of ISO-languages to DeepL-languages to fix inconsistent naming
16    private $langs = [
17        'bg' => 'BG',
18        'cs' => 'CS',
19        'da' => 'DA',
20        'de' => 'DE',
21        'de-informal' => 'DE',
22        'el' => 'EL',
23        'en' => 'EN-GB',
24        'es' => 'ES',
25        'et' => 'ET',
26        'fi' => 'FI',
27        'fr' => 'FR',
28        'hu' => 'HU',
29        'hu-formal' => 'HU',
30        'it' => 'IT',
31        'ja' => 'JA',
32        'lt' => 'LT',
33        'lv' => 'LV',
34        'nl' => 'NL',
35        'pl' => 'PL',
36        'pt' => 'PT-PT',
37        'ro' => 'RO',
38        'ru' => 'RU',
39        'sk' => 'SK',
40        'sl' => 'SL',
41        'sv' => 'SV',
42        'zh' => 'ZH'
43    ];
44
45    /**
46     * Register its handlers with the DokuWiki's event controller
47     */
48    public function register(Doku_Event_Handler $controller) {
49        $controller->register_hook('ACTION_ACT_PREPROCESS','BEFORE', $this, 'preprocess');
50        $controller->register_hook('COMMON_PAGETPL_LOAD','AFTER', $this, 'autotrans_editor');
51        $controller->register_hook('MENU_ITEMS_ASSEMBLY', 'AFTER', $this, 'add_menu_button');
52    }
53
54    public function add_menu_button(Doku_Event $event): void {
55        global $ID;
56        global $ACT;
57
58        if ($ACT != 'show') return;
59
60        if ($event->data['view'] != 'page') return;
61
62        if (!$this->getConf('show_button')) return;
63
64        $split_id = explode(':', $ID);
65        $lang_ns = array_shift($split_id);
66        // check if we are in a language namespace
67        if (array_key_exists($lang_ns, $this->langs)) {
68            if($this->getConf('default_lang_in_ns') and $lang_ns === $this->get_default_lang()) {
69                // if the default lang is in a namespace and we are in that namespace --> check for push translation
70                if (!$this->check_do_push_translate()) return;
71            } else {
72                // in language namespace --> check if we should translate
73                if (!$this->check_do_translation(true)) return;
74            }
75        } else {
76            // do not show the button if we are not in a language namespace and the default language is in a namespace
77            if($this->getConf('default_lang_in_ns')) return;
78            // not in language namespace and default language is npt in a namespace --> check if we should show the push translate button
79            if (!$this->check_do_push_translate()) return;
80        }
81
82        array_splice($event->data['items'], -1, 0, [new MenuItem()]);
83    }
84
85    public function preprocess(Doku_Event  $event, $param): void {
86        global $ID;
87
88        // check if action is show or translate
89        if ($event->data != 'show' and $event->data != 'translate') return;
90
91        $split_id = explode(':', $ID);
92        $lang_ns = array_shift($split_id);
93        // check if we are in a language namespace
94        if (array_key_exists($lang_ns, $this->langs)) {
95            if($this->getConf('default_lang_in_ns') and $lang_ns === $this->get_default_lang()) {
96                // if the default lang is in a namespace and we are in that namespace --> push translate
97                $this->push_translate($event);
98            } else {
99                // in language namespace --> autotrans direct
100                $this->autotrans_direct($event);
101            }
102        } else {
103            // not in language namespace --> push translate
104            $this->push_translate($event);
105        }
106    }
107
108    private function autotrans_direct(Doku_Event $event): void {
109        global $ID;
110
111        // abort if action is translate and the translate button is disabled
112        if ($event->data == 'translate' and !$this->getConf('show_button')) return;
113
114        // do nothing on show action when mode is not direct
115        if ($event->data == 'show' and $this->get_mode() != 'direct') return;
116
117        // allow translation of existing pages is we are in the translate action
118        $allow_existing = ($event->data == 'translate');
119
120        // reset action to show
121        $event->data = 'show';
122
123        if (!$this->check_do_translation($allow_existing)) {
124            return;
125        }
126
127        $org_page_info = $this->get_org_page_info();
128        $translated_text = $this->deepl_translate($org_page_info["text"], $this->get_target_lang(), $org_page_info["ns"]);
129
130        if ($translated_text === '') {
131            return;
132        }
133
134        saveWikiText($ID, $translated_text, 'Automatic translation');
135
136        msg($this->getLang('msg_translation_success'), 1);
137
138        // reload the page after translation
139        send_redirect(wl($ID));
140    }
141
142    public function autotrans_editor(Doku_Event $event, $param): void {
143        if ($this->get_mode() != 'editor') return;
144
145        if (!$this->check_do_translation()) return;
146
147        $org_page_info = $this->get_org_page_info();
148
149        $event->data['tpl'] = $this->deepl_translate($org_page_info["text"], $this->get_target_lang(), $org_page_info["ns"]);
150    }
151
152    private function push_translate(Doku_Event $event): void {
153        global $ID;
154
155        // check if action is translate
156        if ($event->data != 'translate') return;
157
158        // check if button is enabled
159        if (!$this->getConf('show_button')) {
160            send_redirect(wl($ID));
161            return;
162        }
163
164        if (!$this->check_do_push_translate()) {
165            send_redirect(wl($ID));
166            return;
167        }
168
169        // push translate
170        $push_langs = $this->get_push_langs();
171        $org_page_text = rawWiki($ID);
172        foreach ($push_langs as $lang) {
173            // skip invalid languages
174            if (!array_key_exists($lang, $this->langs)) {
175                msg($this->getLang('msg_translation_fail_invalid_lang') . $lang, -1);
176                continue;
177            }
178
179            if ($this->getConf('default_lang_in_ns')) {
180                // if default lang is in ns: replace language namespace in ID
181                $split_id = explode(':', $ID);
182                array_shift($split_id);
183                $lang_id = implode(':', $split_id);
184                $lang_id = $lang . ':' . $lang_id;
185            } else {
186                // if default lang is not in ns: add language namespace to ID
187                $lang_id = $lang . ':' . $ID;
188            }
189
190            // check permissions
191            $perm = auth_quickaclcheck($lang_id);
192            $exists = page_exists($lang_id);
193            if (($exists and $perm < AUTH_EDIT) or (!$exists and $perm < AUTH_CREATE)) {
194                msg($this->getLang('msg_translation_fail_no_permissions') . $lang_id, -1);
195                continue;
196            }
197
198            $translated_text = $this->deepl_translate($org_page_text, $lang, getNS($ID));
199            saveWikiText($lang_id, $translated_text, 'Automatic push translation');
200        }
201
202        msg($this->getLang('msg_translation_success'), 1);
203
204        // reload the page after translation to clear the action
205        send_redirect(wl($ID));
206    }
207
208    private function get_mode(): string {
209        global $ID;
210        if ($this->getConf('editor_regex')) {
211            if (preg_match('/' . $this->getConf('editor_regex') . '/', $ID) === 1) return 'editor';
212        }
213        if ($this->getConf('direct_regex')) {
214            if (preg_match('/' . $this->getConf('direct_regex') . '/', $ID) === 1) return 'direct';
215        }
216        return $this->getConf('mode');
217    }
218
219    private function get_target_lang(): string {
220        global $ID;
221        $split_id = explode(':', $ID);
222        return array_shift($split_id);
223    }
224
225    private function get_default_lang(): string {
226        global $conf;
227
228        if (empty($conf['lang_before_translation'])) {
229            $default_lang = $conf['lang'];
230        } else {
231            $default_lang = $conf['lang_before_translation'];
232        }
233
234        return $default_lang;
235    }
236
237    private function get_org_page_info(): array {
238        global $ID;
239
240        $split_id = explode(':', $ID);
241        array_shift($split_id);
242        $org_id = implode(':', $split_id);
243
244        // if default lang is in ns: add default ns in front of org id
245        if ($this->getConf('default_lang_in_ns')) {
246            $org_id = $this->get_default_lang() . ':' . $org_id;
247        }
248
249        return array("ns" => getNS($org_id), "text" => rawWiki($org_id));
250    }
251
252    private function check_do_translation($allow_existing = false): bool {
253        global $INFO;
254        global $ID;
255
256        // only translate if the current page does not exist
257        if ($INFO['exists'] and !$allow_existing) return false;
258
259        // permission check
260        $perm = auth_quickaclcheck($ID);
261        if (($INFO['exists'] and $perm < AUTH_EDIT) or (!$INFO['exists'] and $perm < AUTH_CREATE)) return false;
262
263        // skip blacklisted namespaces and pages
264        if ($this->getConf('blacklist_regex')) {
265            if (preg_match('/' . $this->getConf('blacklist_regex') . '/', $ID) === 1) return false;
266        }
267
268        $split_id = explode(':', $ID);
269        $lang_ns = array_shift($split_id);
270        // only translate if the current page is in a language namespace
271        if (!array_key_exists($lang_ns, $this->langs)) return false;
272
273        $org_id = implode(':', $split_id);
274
275        // if default lang is in ns: add default ns in front of org id
276        if ($this->getConf('default_lang_in_ns')) {
277            $org_id = $this->get_default_lang() . ':' . $org_id;
278        }
279
280        // check if the original page exists
281        if (!page_exists($org_id)) return false;
282
283        return true;
284    }
285
286    private function check_do_push_translate(): bool {
287        global $ID;
288        global $INFO;
289
290        if (!$INFO['exists']) return false;
291
292        // only allow push translation if the user can edit this page
293        $perm = auth_quickaclcheck($ID);
294        if ($perm < AUTH_EDIT) return false;
295
296        // if default language is in namespace: only allow push translation from that namespace
297        if($this->getConf('default_lang_in_ns')) {
298            $split_id = explode(':', $ID);
299            $lang_ns = array_shift($split_id);
300
301            if ($lang_ns !== $this->get_default_lang()) return false;
302        }
303
304        $push_langs = $this->get_push_langs();
305        // push_langs empty --> push_translate disabled --> abort
306        if (empty($push_langs)) return false;
307
308        // skip blacklisted namespaces and pages
309        if ($this->getConf('blacklist_regex')) {
310            // blacklist regex match --> abort
311            if (preg_match('/' . $this->getConf('blacklist_regex') . '/', $ID) === 1) return false;
312        }
313
314        return true;
315    }
316
317    private function deepl_translate($text, $target_lang, $org_ns): string {
318        if (!trim($this->getConf('api_key'))) return '';
319
320        $text = $this->patch_links($text, $target_lang, $org_ns);
321
322        $text = $this->insert_ignore_tags($text);
323
324        $data = [
325            'auth_key' => $this->getConf('api_key'),
326            'target_lang' => $this->langs[$target_lang],
327            'tag_handling' => 'xml',
328            'ignore_tags' => 'ignore',
329            'text' => $text
330        ];
331
332        if ($this->getConf('api') == 'free') {
333            $url = 'https://api-free.deepl.com/v2/translate';
334        } else {
335            $url = 'https://api.deepl.com/v2/translate';
336        }
337
338        $http = new DokuHTTPClient();
339        $raw_response = $http->post($url, $data);
340
341        if ($http->status >= 400) {
342            // add error messages
343            switch ($http->status) {
344                case 403:
345                    msg($this->getLang('msg_translation_fail_bad_key'), -1);
346                    break;
347                case 456:
348                    msg($this->getLang('msg_translation_fail_quota_exceeded'), -1);
349                    break;
350                default:
351                    msg($this->getLang('msg_translation_fail'), -1);
352                    break;
353            }
354
355            // if any error occurred return an empty string
356            return '';
357        }
358
359        $json_response = json_decode($raw_response, true);
360        $translated_text = $json_response['translations'][0]['text'];
361
362        $translated_text = $this->remove_ignore_tags($translated_text);
363
364        return $translated_text;
365    }
366
367    private function get_push_langs(): array {
368        $push_langs = trim($this->getConf('push_langs'));
369
370        if ($push_langs === '') return array();
371
372        return explode(' ', $push_langs);
373    }
374
375    private function patch_links($text, $target_lang, $ns): string {
376        /*
377         * 1. Find links in [[ aa:bb ]] or [[ aa:bb | cc ]]
378         * 2. Extract aa:bb
379         * 3. Check if lang:aa:bb exists
380         * 3.1. --> Yes --> replace
381         * 3.2. --> No --> leave it as it is
382         */
383
384
385        /*
386         * LINKS
387         */
388
389        preg_match_all('/\[\[([\s\S]*?)(#[\s\S]*?)?((\|)([\s\S]*?))?]]/', $text, $matches, PREG_SET_ORDER);
390
391        foreach ($matches as $match) {
392
393            // external link --> skip
394            if (strpos($match[1], '://') !== false) continue;
395
396            $resolved_id = $match[1];
397
398            resolve_pageid($ns, $resolved_id, $exists);
399
400            $resolved_id_full = $resolved_id;
401
402            // if the link already points to a target in a language namespace drop it and add the new language namespace
403            $split_id = explode(':', $resolved_id);
404            $lang_ns = array_shift($split_id);
405            if (array_key_exists($lang_ns, $this->langs)) {
406                $resolved_id = implode(':', $split_id);
407            }
408
409            $lang_id = $target_lang . ':' . $resolved_id;
410
411            if (!page_exists($lang_id)) {
412                // Page in target lang does not exist --> replace with absolute ID in case it was a relative ID
413                $new_link = '[[' . $resolved_id_full . $match[2] . $match[3] . ']]';
414            } else {
415                // Page in target lang exists --> replace link
416                $new_link = '[[' . $lang_id . $match[2] . $match[3] . ']]';
417            }
418
419            $text = str_replace($match[0], $new_link, $text);
420
421        }
422
423        /*
424         * MEDIA
425         */
426
427        preg_match_all('/\{\{([\s\S]*?)(\?[\s\S]*?)?(\|([\s\S]*?))?}}/', $text, $matches, PREG_SET_ORDER);
428
429        foreach ($matches as $match) {
430
431            // external image --> skip
432            if (strpos($match[1], '://') !== false) continue;
433
434            // skip things like {{tag>...}}
435            if (strpos($match[1], '>') !== false) continue;
436
437            $resolved_id = $match[1];
438
439            resolve_mediaid($ns, $resolved_id, $exists);
440
441            $resolved_id_full = $resolved_id;
442
443            // if the link already points to a target in a language namespace drop it and add the new language namespace
444            $split_id = explode(':', $resolved_id);
445            $lang_ns = array_shift($split_id);
446            if (array_key_exists($lang_ns, $this->langs)) {
447                $resolved_id = implode(':', $split_id);
448            }
449
450            $lang_id = $target_lang . ':' . $resolved_id;
451
452            $lang_id_fn = mediaFN($lang_id);
453
454            if (!file_exists($lang_id_fn)) {
455                // media in target lang does not exist --> replace with absolute ID in case it was a relative ID
456                $new_link = '{{' . $resolved_id_full . $match[2] . $match[3] . '}}';
457            } else {
458                // media in target lang exists --> replace it
459                $new_link = '{{' . $lang_id . $match[2] . $match[3] . '}}';
460            }
461
462            $text = str_replace($match[0], $new_link, $text);
463
464        }
465
466        return $text;
467    }
468
469    private function insert_ignore_tags($text): string {
470        // ignore every other xml-like tags (the tags themselves, not their content), otherwise deepl would break the formatting
471        $text = preg_replace('/<[\s\S]+?>/', '<ignore>${0}</ignore>', $text);
472
473        // prevent deepl from breaking headings
474        $text = preg_replace('/={1,6}/', '<ignore>${0}</ignore>', $text);
475
476        // fix for plugins like tag or template
477        $text = preg_replace('/\{\{[\s\w]+?>[\s\S]*?}}/', '<ignore>${0}</ignore>', $text);
478
479        // ignore links in wikitext (outside of dokuwiki-links)
480        $text = preg_replace('/\S+:\/\/\S+/', '<ignore>${0}</ignore>', $text);
481
482        // ignore link/media ids but translate the text (if existing)
483        $text = preg_replace('/\[\[([\s\S]*?)(#[\s\S]*?)?((\|)([\s\S]*?))?]]/', '<ignore>[[${1}${2}${4}</ignore>${5}<ignore>]]</ignore>', $text);
484        $text = preg_replace('/\{\{([\s\S]*?)(\?[\s\S]*?)?((\|)([\s\S]*?))?}}/', '<ignore>{{${1}${2}${4}</ignore>${5}<ignore>}}</ignore>', $text);
485
486        // prevent deepl from messing with tables
487        $text = str_replace("^", "<ignore>^</ignore>", $text);
488        $text = str_replace("|", "<ignore>|</ignore>", $text);
489
490        // prevent deepl from doing strange things with dokuwiki syntax
491        $text = str_replace("''", "<ignore>''</ignore>", $text);
492        $text = str_replace("//", "<ignore>//</ignore>", $text);
493        $text = str_replace("**", "<ignore>**</ignore>", $text);
494        $text = str_replace("__", "<ignore>__</ignore>", $text);
495        $text = str_replace("\\\\", "<ignore>\\\\</ignore>", $text);
496
497        // prevent deepl from messing with smileys
498        $smileys = array_keys(getSmileys());
499        foreach ($smileys as $smiley) {
500            $text = str_replace($smiley, "<ignore>" . $smiley . "</ignore>", $text);
501        }
502
503        // ignore code tags
504        $text = preg_replace('/(<php[\s\S]*?>[\s\S]*?<\/php>)/', '<ignore>${1}</ignore>', $text);
505        $text = preg_replace('/(<file[\s\S]*?>[\s\S]*?<\/file>)/', '<ignore>${1}</ignore>', $text);
506        $text = preg_replace('/(<code[\s\S]*?>[\s\S]*?<\/code>)/', '<ignore>${1}</ignore>', $text);
507
508        // ignore the expressions from the ignore list
509        $ignored_expressions = explode(':', $this->getConf('ignored_expressions'));
510
511        foreach ($ignored_expressions as $expression) {
512            $text = str_replace($expression, '<ignore>' . $expression . '</ignore>', $text);
513        }
514
515        return $text;
516    }
517
518    private function remove_ignore_tags($text): string {
519        $ignored_expressions = explode(':', $this->getConf('ignored_expressions'));
520
521        foreach ($ignored_expressions as $expression) {
522            $text = str_replace('<ignore>' . $expression . '</ignore>', $expression, $text);
523        }
524
525        // prevent deepl from messing with tables
526        $text = str_replace("<ignore>^</ignore>", "^", $text);
527        $text = str_replace("<ignore>|</ignore>", "|", $text);
528
529        $text = str_replace("<ignore>''</ignore>", "''", $text);
530        $text = str_replace("<ignore>//</ignore>", "//", $text);
531        $text = str_replace("<ignore>**</ignore>", "**", $text);
532        $text = str_replace("<ignore>__</ignore>", "__", $text);
533        $text = str_replace("<ignore>\\\\</ignore>", "\\\\", $text);
534
535        // ignore links in wikitext (outside of dokuwiki-links)
536        $text = preg_replace('/<ignore>(\S+:\/\/\S+)<\/ignore>/', '${1}', $text);
537
538        $text = preg_replace('/<ignore>\[\[([\s\S]*?)(\|)?(<\/ignore>)([\s\S]*?)?<ignore>]]<\/ignore>/', '[[${1}${2}${4}]]', $text);
539        $text = preg_replace('/<ignore>\{\{([\s\S]*?)(\|)?(<\/ignore>)([\s\S]*?)?<ignore>}}<\/ignore>/', '{{${1}${2}${4}}}', $text);
540
541        // prevent deepl from messing with smileys
542        $smileys = array_keys(getSmileys());
543        foreach ($smileys as $smiley) {
544            $text = str_replace("<ignore>" . $smiley . "</ignore>", $smiley, $text);
545        }
546
547        $text = preg_replace('/<ignore>(<php[\s\S]*?>[\s\S]*?<\/php>)<\/ignore>/', '${1}', $text);
548        $text = preg_replace('/<ignore>(<file[\s\S]*?>[\s\S]*?<\/file>)<\/ignore>/', '${1}', $text);
549        $text = preg_replace('/<ignore>(<code[\s\S]*?>[\s\S]*?<\/code>)<\/ignore>/', '${1}', $text);
550
551        // fix for plugins like tag or template
552        $text = preg_replace('/<ignore>(\{\{[\s\w]+?>[\s\S]*?}})<\/ignore>/', '${1}', $text);
553
554        // prevent deepl from breaking headings
555        $text = preg_replace('/<ignore>(={1,6})<\/ignore>/','${1}', $text);
556
557        // ignore every other xml-like tags (the tags themselves, not their content), otherwise deepl would break the formatting
558        $text = preg_replace('/<ignore>(<[\s\S]+?>)<\/ignore>/', '${1}', $text);
559
560        // restore < and > for example from arrows (-->) in wikitext
561        $text = str_replace('&gt;', '>', $text);
562        $text = str_replace('&lt;', '<', $text);
563
564        // restore & in wikitext
565        $text = str_replace('&amp;', '&', $text);
566
567        return $text;
568    }
569}
570
571