xref: /plugin/deeplautotranslate/action.php (revision 53f3766c993b4a647bb3d728337580c326e5c1af)
1<?php
2/**
3 * Deepl Autotranslate Plugin
4 *
5 * @author     Jennifer Graul <me@netali.de>
6 */
7
8if(!defined('DOKU_INC')) die();
9
10use \dokuwiki\HTTP\DokuHTTPClient;
11use \dokuwiki\plugin\deeplautotranslate\MenuItem;
12
13class action_plugin_deeplautotranslate extends DokuWiki_Action_Plugin {
14
15    // manual mapping of ISO-languages to DeepL-languages to fix inconsistent naming
16    private $langs = [
17        'bg' => 'BG',
18        'cs' => 'CS',
19        'da' => 'DA',
20        'de' => 'DE',
21        'de-informal' => 'DE',
22        'el' => 'EL',
23        'en' => 'EN-GB',
24        'es' => 'ES',
25        'et' => 'ET',
26        'fi' => 'FI',
27        'fr' => 'FR',
28        'hu' => 'HU',
29        'hu-formal' => 'HU',
30        'it' => 'IT',
31        'ja' => 'JA',
32        'lt' => 'LT',
33        'lv' => 'LV',
34        'nl' => 'NL',
35        'pl' => 'PL',
36        'pt' => 'PT-PT',
37        'ro' => 'RO',
38        'ru' => 'RU',
39        'sk' => 'SK',
40        'sl' => 'SL',
41        'sv' => 'SV',
42        'zh' => 'ZH'
43    ];
44
45    /**
46     * Register its handlers with the DokuWiki's event controller
47     */
48    public function register(Doku_Event_Handler $controller) {
49        $controller->register_hook('ACTION_ACT_PREPROCESS','BEFORE', $this, 'preprocess');
50        $controller->register_hook('COMMON_PAGETPL_LOAD','AFTER', $this, 'autotrans_editor');
51        $controller->register_hook('MENU_ITEMS_ASSEMBLY', 'AFTER', $this, 'add_menu_button');
52    }
53
54    public function add_menu_button(Doku_Event $event): void {
55        global $ID;
56        global $ACT;
57
58        if ($ACT != 'show') return;
59
60        if ($event->data['view'] != 'page') return;
61
62        if (!$this->getConf('show_button')) return;
63
64        $split_id = explode(':', $ID);
65        $lang_ns = array_shift($split_id);
66        // check if we are in a language namespace
67        if (array_key_exists($lang_ns, $this->langs)) {
68            if($this->getConf('default_lang_in_ns') and $lang_ns === $this->get_default_lang()) {
69                // if the default lang is in a namespace and we are in that namespace --> check for push translation
70                if (!$this->check_do_push_translate()) return;
71            } else {
72                // in language namespace --> check if we should translate
73                if (!$this->check_do_translation(true)) return;
74            }
75        } else {
76            // do not show the button if we are not in a language namespace and the default language is in a namespace
77            if($this->getConf('default_lang_in_ns')) return;
78            // not in language namespace and default language is npt in a namespace --> check if we should show the push translate button
79            if (!$this->check_do_push_translate()) return;
80        }
81
82        array_splice($event->data['items'], -1, 0, [new MenuItem()]);
83    }
84
85    public function preprocess(Doku_Event  $event, $param): void {
86        global $ID;
87
88        // check if action is show or translate
89        if ($event->data != 'show' and $event->data != 'translate') return;
90
91        $split_id = explode(':', $ID);
92        $lang_ns = array_shift($split_id);
93        // check if we are in a language namespace
94        if (array_key_exists($lang_ns, $this->langs)) {
95            if($this->getConf('default_lang_in_ns') and $lang_ns === $this->get_default_lang()) {
96                // if the default lang is in a namespace and we are in that namespace --> push translate
97                $this->push_translate($event);
98            } else {
99                // in language namespace --> autotrans direct
100                $this->autotrans_direct($event);
101            }
102        } else {
103            // not in language namespace --> push translate
104            $this->push_translate($event);
105        }
106    }
107
108    private function autotrans_direct(Doku_Event $event): void {
109        global $ID;
110
111        // abort if action is translate and the translate button is disabled
112        if ($event->data == 'translate' and !$this->getConf('show_button')) return;
113
114        // do nothing on show action when mode is not direct
115        if ($event->data == 'show' and $this->get_mode() != 'direct') return;
116
117        // allow translation of existing pages is we are in the translate action
118        $allow_existing = ($event->data == 'translate');
119
120        // reset action to show
121        $event->data = 'show';
122
123        if (!$this->check_do_translation($allow_existing)) {
124            return;
125        }
126
127        $org_page_info = $this->get_org_page_info();
128        $translated_text = $this->deepl_translate($org_page_info["text"], $this->get_target_lang(), $org_page_info["ns"]);
129
130        if ($translated_text === '') {
131            return;
132        }
133
134        saveWikiText($ID, $translated_text, 'Automatic translation');
135
136        msg($this->getLang('msg_translation_success'), 1);
137
138        // reload the page after translation
139        send_redirect(wl($ID));
140    }
141
142    public function autotrans_editor(Doku_Event $event, $param): void {
143        if ($this->get_mode() != 'editor') return;
144
145        if (!$this->check_do_translation()) return;
146
147        $org_page_info = $this->get_org_page_info();
148
149        $event->data['tpl'] = $this->deepl_translate($org_page_info["text"], $this->get_target_lang(), $org_page_info["ns"]);
150    }
151
152    private function push_translate(Doku_Event $event): void {
153        global $ID;
154
155        // check if action is translate
156        if ($event->data != 'translate') return;
157
158        // check if button is enabled
159        if (!$this->getConf('show_button')) {
160            send_redirect(wl($ID));
161            return;
162        }
163
164        if (!$this->check_do_push_translate()) {
165            send_redirect(wl($ID));
166            return;
167        }
168
169        // push translate
170        $push_langs = $this->get_push_langs();
171        $org_page_text = rawWiki($ID);
172        foreach ($push_langs as $lang) {
173            // skip invalid languages
174            if (!array_key_exists($lang, $this->langs)) {
175                msg($this->getLang('msg_translation_fail_invalid_lang') . $lang, -1);
176                continue;
177            }
178
179            if ($this->getConf('default_lang_in_ns')) {
180                // if default lang is in ns: replace language namespace in ID
181                $split_id = explode(':', $ID);
182                array_shift($split_id);
183                $lang_id = implode(':', $split_id);
184                $lang_id = $lang . ':' . $lang_id;
185            } else {
186                // if default lang is not in ns: add language namespace to ID
187                $lang_id = $lang . ':' . $ID;
188            }
189
190            // check permissions
191            $perm = auth_quickaclcheck($ID);
192            $exists = page_exists($lang_id);
193            if (($exists and $perm < AUTH_EDIT) or (!$exists and $perm < AUTH_CREATE)) {
194                msg($this->getLang('msg_translation_fail_no_permissions') . $lang_id, -1);
195                continue;
196            }
197
198            $translated_text = $this->deepl_translate($org_page_text, $lang, getNS($ID));
199            saveWikiText($lang_id, $translated_text, 'Automatic push translation');
200        }
201
202        msg($this->getLang('msg_translation_success'), 1);
203
204        // reload the page after translation to clear the action
205        send_redirect(wl($ID));
206    }
207
208    private function get_mode(): string {
209        global $ID;
210        if ($this->getConf('editor_regex')) {
211            if (preg_match('/' . $this->getConf('editor_regex') . '/', $ID) === 1) return 'editor';
212        }
213        if ($this->getConf('direct_regex')) {
214            if (preg_match('/' . $this->getConf('direct_regex') . '/', $ID) === 1) return 'direct';
215        }
216        return $this->getConf('mode');
217    }
218
219    private function get_target_lang(): string {
220        global $ID;
221        $split_id = explode(':', $ID);
222        return array_shift($split_id);
223    }
224
225    private function get_default_lang(): string {
226        global $conf;
227
228        if (empty($conf['lang_before_translation'])) {
229            $default_lang = $conf['lang'];
230        } else {
231            $default_lang = $conf['lang_before_translation'];
232        }
233
234        return $default_lang;
235    }
236
237    private function get_org_page_info(): array {
238        global $ID;
239
240        $split_id = explode(':', $ID);
241        array_shift($split_id);
242        $org_id = implode(':', $split_id);
243
244        // if default lang is in ns: add default ns in front of org id
245        if ($this->getConf('default_lang_in_ns')) {
246            $org_id = $this->get_default_lang() . ':' . $org_id;
247        }
248
249        return array("ns" => getNS($org_id), "text" => rawWiki($org_id));
250    }
251
252    private function check_do_translation($allow_existing = false): bool {
253        global $INFO;
254        global $ID;
255
256        // only translate if the current page does not exist
257        if ($INFO['exists'] and !$allow_existing) return false;
258
259        // permission check
260        $perm = auth_quickaclcheck($ID);
261        if (($INFO['exists'] and $perm < AUTH_EDIT) or (!$INFO['exists'] and $perm < AUTH_CREATE)) return false;
262
263        // skip blacklisted namespaces and pages
264        if ($this->getConf('blacklist_regex')) {
265            if (preg_match('/' . $this->getConf('blacklist_regex') . '/', $ID) === 1) return false;
266        }
267
268        $split_id = explode(':', $ID);
269        $lang_ns = array_shift($split_id);
270        // only translate if the current page is in a language namespace
271        if (!array_key_exists($lang_ns, $this->langs)) return false;
272
273        $org_id = implode(':', $split_id);
274
275        // if default lang is in ns: add default ns in front of org id
276        if ($this->getConf('default_lang_in_ns')) {
277            $org_id = $this->get_default_lang() . ':' . $org_id;
278        }
279
280        // check if the original page exists
281        if (!page_exists($org_id)) return false;
282
283        return true;
284    }
285
286    private function check_do_push_translate(): bool {
287        global $ID;
288        global $INFO;
289
290        if (!$INFO['exists']) return false;
291
292        // if default language is in namespace: only allow push translation from that namespace
293        if($this->getConf('default_lang_in_ns')) {
294            $split_id = explode(':', $ID);
295            $lang_ns = array_shift($split_id);
296
297            if ($lang_ns !== $this->get_default_lang()) return false;
298        }
299
300        $push_langs = $this->get_push_langs();
301        // push_langs empty --> push_translate disabled --> abort
302        if (empty($push_langs)) return false;
303
304        // skip blacklisted namespaces and pages
305        if ($this->getConf('blacklist_regex')) {
306            // blacklist regex match --> abort
307            if (preg_match('/' . $this->getConf('blacklist_regex') . '/', $ID) === 1) return false;
308        }
309
310        return true;
311    }
312
313    private function deepl_translate($text, $target_lang, $org_ns): string {
314        if (!trim($this->getConf('api_key'))) return '';
315
316        $text = $this->patch_links($text, $target_lang, $org_ns);
317
318        $text = $this->insert_ignore_tags($text);
319
320        $data = [
321            'auth_key' => $this->getConf('api_key'),
322            'target_lang' => $this->langs[$target_lang],
323            'tag_handling' => 'xml',
324            'ignore_tags' => 'ignore',
325            'text' => $text
326        ];
327
328        if ($this->getConf('api') == 'free') {
329            $url = 'https://api-free.deepl.com/v2/translate';
330        } else {
331            $url = 'https://api.deepl.com/v2/translate';
332        }
333
334        $http = new DokuHTTPClient();
335        $raw_response = $http->post($url, $data);
336
337        if ($http->status >= 400) {
338            // add error messages
339            switch ($http->status) {
340                case 403:
341                    msg($this->getLang('msg_translation_fail_bad_key'), -1);
342                    break;
343                case 456:
344                    msg($this->getLang('msg_translation_fail_quota_exceeded'), -1);
345                    break;
346                default:
347                    msg($this->getLang('msg_translation_fail'), -1);
348                    break;
349            }
350
351            // if any error occurred return an empty string
352            return '';
353        }
354
355        $json_response = json_decode($raw_response, true);
356        $translated_text = $json_response['translations'][0]['text'];
357
358        $translated_text = $this->remove_ignore_tags($translated_text);
359
360        return $translated_text;
361    }
362
363    private function get_push_langs(): array {
364        $push_langs = trim($this->getConf('push_langs'));
365
366        if ($push_langs === '') return array();
367
368        return explode(' ', $push_langs);
369    }
370
371    private function patch_links($text, $target_lang, $ns): string {
372        /*
373         * 1. Find links in [[ aa:bb ]] or [[ aa:bb | cc ]]
374         * 2. Extract aa:bb
375         * 3. Check if lang:aa:bb exists
376         * 3.1. --> Yes --> replace
377         * 3.2. --> No --> leave it as it is
378         */
379
380
381        /*
382         * LINKS
383         */
384
385        preg_match_all('/\[\[([\s\S]*?)(#[\s\S]*?)?((\|)([\s\S]*?))?]]/', $text, $matches, PREG_SET_ORDER);
386
387        foreach ($matches as $match) {
388
389            if (strpos($match[1], '://') !== false) {
390                // external link --> skip
391                continue;
392            }
393
394            $resolved_id = $match[1];
395
396            resolve_pageid($ns, $resolved_id, $exists);
397
398            $resolved_id_full = $resolved_id;
399
400            // if the link already points to a target in a language namespace drop it and add the new language namespace
401            $split_id = explode(':', $resolved_id);
402            $lang_ns = array_shift($split_id);
403            if (array_key_exists($lang_ns, $this->langs)) {
404                $resolved_id = implode(':', $split_id);
405            }
406
407            $lang_id = $target_lang . ':' . $resolved_id;
408
409            if (!page_exists($lang_id)) {
410                // Page in target lang does not exist --> replace with absolute ID in case it was a relative ID
411                $new_link = '[[' . $resolved_id_full . $match[2] . $match[3] . ']]';
412            } else {
413                // Page in target lang exists --> replace link
414                $new_link = '[[' . $lang_id . $match[2] . $match[3] . ']]';
415            }
416
417            $text = str_replace($match[0], $new_link, $text);
418
419        }
420
421        /*
422         * MEDIA
423         */
424
425        preg_match_all('/\{\{([\s\S]*?)(\?[\s\S]*?)?(\|([\s\S]*?))?}}/', $text, $matches, PREG_SET_ORDER);
426
427        foreach ($matches as $match) {
428
429            if (strpos($match[1], '://') !== false) {
430                // external image --> skip
431                continue;
432            }
433
434            $resolved_id = $match[1];
435
436            resolve_mediaid($ns, $resolved_id, $exists);
437
438            $resolved_id_full = $resolved_id;
439
440            // if the link already points to a target in a language namespace drop it and add the new language namespace
441            $split_id = explode(':', $resolved_id);
442            $lang_ns = array_shift($split_id);
443            if (array_key_exists($lang_ns, $this->langs)) {
444                $resolved_id = implode(':', $split_id);
445            }
446
447            $lang_id = $target_lang . ':' . $resolved_id;
448
449            $lang_id_fn = mediaFN($lang_id);
450
451            if (!file_exists($lang_id_fn)) {
452                // media in target lang does not exist --> replace with absolute ID in case it was a relative ID
453                $new_link = '{{' . $resolved_id_full . $match[2] . $match[3] . '}}';
454            } else {
455                // media in target lang exists --> replace it
456                $new_link = '{{' . $lang_id . $match[2] . $match[3] . '}}';
457            }
458
459            $text = str_replace($match[0], $new_link, $text);
460
461        }
462
463        return $text;
464    }
465
466    private function insert_ignore_tags($text): string {
467        // ignore every other xml-like tags (the tags themselves, not their content), otherwise deepl would break the formatting
468        $text = preg_replace('/<[\s\S]+?>/', '<ignore>${0}</ignore>', $text);
469
470        // prevent deepl from breaking headings
471        $text = preg_replace('/={1,6}/', '<ignore>${0}</ignore>', $text);
472
473        // fix for the template plugin
474        $text = preg_replace('/\{\{template>[\s\S]*?}}/', '<ignore>${0}</ignore>', $text);
475
476        // ignore links in wikitext (outside of dokuwiki-links)
477        $text = preg_replace('/\S+:\/\/\S+/', '<ignore>${0}</ignore>', $text);
478
479        // ignore link/media ids but translate the text (if existing)
480        $text = preg_replace('/\[\[([\s\S]*?)(#[\s\S]*?)?((\|)([\s\S]*?))?]]/', '<ignore>[[${1}${2}${4}</ignore>${5}<ignore>]]</ignore>', $text);
481        $text = preg_replace('/\{\{([\s\S]*?)(\?[\s\S]*?)?((\|)([\s\S]*?))?}}/', '<ignore>{{${1}${2}${4}</ignore>${5}<ignore>}}</ignore>', $text);
482
483        // prevent deepl from messing with tables
484        $text = str_replace("^", "<ignore>^</ignore>", $text);
485        $text = str_replace("|", "<ignore>|</ignore>", $text);
486
487        // prevent deepl from doing strange things with dokuwiki syntax
488        $text = str_replace("''", "<ignore>''</ignore>", $text);
489        $text = str_replace("//", "<ignore>//</ignore>", $text);
490        $text = str_replace("**", "<ignore>**</ignore>", $text);
491        $text = str_replace("__", "<ignore>__</ignore>", $text);
492        $text = str_replace("\\\\", "<ignore>\\\\</ignore>", $text);
493
494        // prevent deepl from messing with smileys
495        $smileys = array_keys(getSmileys());
496        foreach ($smileys as $smiley) {
497            $text = str_replace($smiley, "<ignore>" . $smiley . "</ignore>", $text);
498        }
499
500        // ignore code tags
501        $text = preg_replace('/(<php[\s\S]*?>[\s\S]*?<\/php>)/', '<ignore>${1}</ignore>', $text);
502        $text = preg_replace('/(<file[\s\S]*?>[\s\S]*?<\/file>)/', '<ignore>${1}</ignore>', $text);
503        $text = preg_replace('/(<code[\s\S]*?>[\s\S]*?<\/code>)/', '<ignore>${1}</ignore>', $text);
504
505        // ignore the expressions from the ignore list
506        $ignored_expressions = explode(':', $this->getConf('ignored_expressions'));
507
508        foreach ($ignored_expressions as $expression) {
509            $text = str_replace($expression, '<ignore>' . $expression . '</ignore>', $text);
510        }
511
512        return $text;
513    }
514
515    private function remove_ignore_tags($text): string {
516        $ignored_expressions = explode(':', $this->getConf('ignored_expressions'));
517
518        foreach ($ignored_expressions as $expression) {
519            $text = str_replace('<ignore>' . $expression . '</ignore>', $expression, $text);
520        }
521
522        // prevent deepl from messing with tables
523        $text = str_replace("<ignore>^</ignore>", "^", $text);
524        $text = str_replace("<ignore>|</ignore>", "|", $text);
525
526        $text = str_replace("<ignore>''</ignore>", "''", $text);
527        $text = str_replace("<ignore>//</ignore>", "//", $text);
528        $text = str_replace("<ignore>**</ignore>", "**", $text);
529        $text = str_replace("<ignore>__</ignore>", "__", $text);
530        $text = str_replace("<ignore>\\\\</ignore>", "\\\\", $text);
531
532        // ignore links in wikitext (outside of dokuwiki-links)
533        $text = preg_replace('/<ignore>(\S+:\/\/\S+)<\/ignore>/', '${1}', $text);
534
535        $text = preg_replace('/<ignore>\[\[([\s\S]*?)(\|)?(<\/ignore>)([\s\S]*?)?<ignore>]]<\/ignore>/', '[[${1}${2}${4}]]', $text);
536        $text = preg_replace('/<ignore>\{\{([\s\S]*?)(\|)?(<\/ignore>)([\s\S]*?)?<ignore>}}<\/ignore>/', '{{${1}${2}${4}}}', $text);
537
538        // prevent deepl from messing with smileys
539        $smileys = array_keys(getSmileys());
540        foreach ($smileys as $smiley) {
541            $text = str_replace("<ignore>" . $smiley . "</ignore>", $smiley, $text);
542        }
543
544        $text = preg_replace('/<ignore>(<php[\s\S]*?>[\s\S]*?<\/php>)<\/ignore>/', '${1}', $text);
545        $text = preg_replace('/<ignore>(<file[\s\S]*?>[\s\S]*?<\/file>)<\/ignore>/', '${1}', $text);
546        $text = preg_replace('/<ignore>(<code[\s\S]*?>[\s\S]*?<\/code>)<\/ignore>/', '${1}', $text);
547
548        // fix for the template plugin
549        $text = preg_replace('/<ignore>(\{\{template>[\s\S]*?}})<\/ignore>/', '${1}', $text);
550
551        // prevent deepl from breaking headings
552        $text = preg_replace('/<ignore>(={1,6})<\/ignore>/','${1}', $text);
553
554        // ignore every other xml-like tags (the tags themselves, not their content), otherwise deepl would break the formatting
555        $text = preg_replace('/<ignore>(<[\s\S]+?>)<\/ignore>/', '${1}', $text);
556
557        // restore < and > for example from arrows (-->) in wikitext
558        $text = str_replace('&gt;', '>', $text);
559        $text = str_replace('&lt;', '<', $text);
560
561        // restore & in wikitext
562        $text = str_replace('&amp;', '&', $text);
563
564        return $text;
565    }
566}
567
568