xref: /plugin/deeplautotranslate/action.php (revision 0180404ccea277c5f70ae1505724568a0f2b8976)
1<?php
2/**
3 * Deepl Autotranslate Plugin
4 *
5 * @author     Jennifer Graul <me@netali.de>
6 */
7
8if(!defined('DOKU_INC')) die();
9
10use \dokuwiki\HTTP\DokuHTTPClient;
11use \dokuwiki\plugin\deeplautotranslate\MenuItem;
12
13class action_plugin_deeplautotranslate extends DokuWiki_Action_Plugin {
14
15    // manual mapping of ISO-languages to DeepL-languages to fix inconsistent naming
16    private $langs = [
17        'bg' => 'BG',
18        'cs' => 'CS',
19        'da' => 'DA',
20        'de' => 'DE',
21        'de-informal' => 'DE',
22        'el' => 'EL',
23        'en' => 'EN-GB',
24        'es' => 'ES',
25        'et' => 'ET',
26        'fi' => 'FI',
27        'fr' => 'FR',
28        'hu' => 'HU',
29        'hu-formal' => 'HU',
30        'it' => 'IT',
31        'ja' => 'JA',
32        'lt' => 'LT',
33        'lv' => 'LV',
34        'nl' => 'NL',
35        'pl' => 'PL',
36        'pt' => 'PT-PT',
37        'ro' => 'RO',
38        'ru' => 'RU',
39        'sk' => 'SK',
40        'sl' => 'SL',
41        'sv' => 'SV',
42        'zh' => 'ZH'
43    ];
44
45    /**
46     * Register its handlers with the DokuWiki's event controller
47     */
48    public function register(Doku_Event_Handler $controller) {
49        $controller->register_hook('ACTION_ACT_PREPROCESS','BEFORE', $this, 'preprocess');
50        $controller->register_hook('COMMON_PAGETPL_LOAD','AFTER', $this, 'autotrans_editor');
51        $controller->register_hook('MENU_ITEMS_ASSEMBLY', 'AFTER', $this, 'add_menu_button');
52    }
53
54    public function add_menu_button(Doku_Event $event): void {
55        global $ID;
56        global $ACT;
57        global $conf;
58
59        if ($ACT != 'show') return;
60
61        if ($event->data['view'] != 'page') return;
62
63        if (!$this->getConf('show_button')) return;
64
65        $split_id = explode(':', $ID);
66        $lang_ns = array_shift($split_id);
67        // check if we are in a language namespace
68        if (array_key_exists($lang_ns, $this->langs)) {
69            if($this->getConf('default_lang_in_ns') and $lang_ns === $conf['lang']) {
70                // if the default lang is in a namespace and we are in that namespace --> check for push translation
71                if (!$this->check_do_push_translate()) return;
72            } else {
73                // in language namespace --> check if we should translate
74                if (!$this->check_do_translation(true)) return;
75            }
76        } else {
77            // do not show the button if we are not in a language namespace and the default language is in a namespace
78            if($this->getConf('default_lang_in_ns')) return;
79            // not in language namespace and default language is npt in a namespace --> check if we should show the push translate button
80            if (!$this->check_do_push_translate()) return;
81        }
82
83        array_splice($event->data['items'], -1, 0, [new MenuItem()]);
84    }
85
86    public function preprocess(Doku_Event  $event, $param): void {
87        global $ID;
88        global $conf;
89
90        // check if action is show or translate
91        if ($event->data != 'show' and $event->data != 'translate') return;
92
93        $split_id = explode(':', $ID);
94        $lang_ns = array_shift($split_id);
95        // check if we are in a language namespace
96        if (array_key_exists($lang_ns, $this->langs)) {
97            if($this->getConf('default_lang_in_ns') and $lang_ns === $conf['lang']) {
98                // if the default lang is in a namespace and we are in that namespace --> push translate
99                $this->push_translate($event);
100            } else {
101                // in language namespace --> autotrans direct
102                $this->autotrans_direct($event);
103            }
104        } else {
105            // not in language namespace --> push translate
106            $this->push_translate($event);
107        }
108    }
109
110    private function autotrans_direct(Doku_Event $event): void {
111        global $ID;
112
113        // abort if action is translate and the translate button is disabled
114        if ($event->data == 'translate' and !$this->getConf('show_button')) return;
115
116        // do nothing on show action when mode is not direct
117        if ($event->data == 'show' and $this->get_mode() != 'direct') return;
118
119        // allow translation of existing pages is we are in the translate action
120        $allow_existing = ($event->data == 'translate');
121
122        // reset action to show
123        $event->data = 'show';
124
125        if (!$this->check_do_translation($allow_existing)) {
126            send_redirect(wl($ID));
127            return;
128        }
129
130        $org_page_info = $this->get_org_page_info();
131        $translated_text = $this->deepl_translate($org_page_info["text"], $this->get_target_lang(), $org_page_info["ns"]);
132
133        if ($translated_text === '') {
134            send_redirect(wl($ID));
135            return;
136        }
137
138        saveWikiText($ID, $translated_text, 'Automatic translation');
139
140        msg($this->getLang('msg_translation_success'), 1);
141
142        // reload the page after translation
143        send_redirect(wl($ID));
144    }
145
146    public function autotrans_editor(Doku_Event $event, $param): void {
147        if ($this->get_mode() != 'editor') return;
148
149        if (!$this->check_do_translation()) return;
150
151        $org_page_info = $this->get_org_page_info();
152
153        $event->data['tpl'] = $this->deepl_translate($org_page_info["text"], $this->get_target_lang(), $org_page_info["ns"]);
154    }
155
156    private function push_translate(Doku_Event $event): void {
157        global $ID;
158
159        // check if action is translate
160        if ($event->data != 'translate') return;
161
162        // check if button is enabled
163        if (!$this->getConf('show_button')) {
164            send_redirect(wl($ID));
165            return;
166        }
167
168        if (!$this->check_do_push_translate()) {
169            send_redirect(wl($ID));
170            return;
171        }
172
173        // push translate
174        $push_langs = $this->get_push_langs();
175        $org_page_text = rawWiki($ID);
176        foreach ($push_langs as $lang) {
177            // skip invalid languages
178            if (!array_key_exists($lang, $this->langs)) {
179                msg($this->getLang('msg_translation_fail_invalid_lang') . $lang, -1);
180                continue;
181            }
182
183            if ($this->getConf('default_lang_in_ns')) {
184                // if default lang is in ns: replace language namespace in ID
185                $split_id = explode(':', $ID);
186                array_shift($split_id);
187                $lang_id = implode(':', $split_id);
188                $lang_id = $lang . ':' . $lang_id;
189            } else {
190                // if default lang is not in ns: add language namespace to ID
191                $lang_id = $lang . ':' . $ID;
192            }
193
194            // check permissions
195            $perm = auth_quickaclcheck($ID);
196            $exists = page_exists($lang_id);
197            if (($exists and $perm < AUTH_EDIT) or (!$exists and $perm < AUTH_CREATE)) {
198                msg($this->getLang('msg_translation_fail_no_permissions') . $lang_id, -1);
199                continue;
200            }
201
202            $translated_text = $this->deepl_translate($org_page_text, $lang, getNS($ID));
203            saveWikiText($lang_id, $translated_text, 'Automatic push translation');
204        }
205
206        msg($this->getLang('msg_translation_success'), 1);
207
208        // reload the page after translation to clear the action
209        send_redirect(wl($ID));
210    }
211
212    private function get_mode(): string {
213        global $ID;
214        if ($this->getConf('editor_regex')) {
215            if (preg_match('/' . $this->getConf('editor_regex') . '/', $ID) === 1) return 'editor';
216        }
217        if ($this->getConf('direct_regex')) {
218            if (preg_match('/' . $this->getConf('direct_regex') . '/', $ID) === 1) return 'direct';
219        }
220        return $this->getConf('mode');
221    }
222
223    private function get_target_lang(): string {
224        global $ID;
225        $split_id = explode(':', $ID);
226        return array_shift($split_id);
227    }
228
229    private function get_org_page_info(): array {
230        global $ID;
231        global $conf;
232
233        $split_id = explode(':', $ID);
234        array_shift($split_id);
235        $org_id = implode(':', $split_id);
236
237        // if default lang is in ns: add default ns in front of org id
238        if ($this->getConf('default_lang_in_ns')) {
239            $org_id = $conf['lang'] . ':' . $org_id;
240        }
241
242        return array("ns" => getNS($org_id), "text" => rawWiki($org_id));
243    }
244
245    private function check_do_translation($allow_existing = false): bool {
246        global $INFO;
247        global $ID;
248        global $conf;
249
250        // only translate if the current page does not exist
251        if ($INFO['exists'] and !$allow_existing) return false;
252
253        // permission check
254        $perm = auth_quickaclcheck($ID);
255        if (($INFO['exists'] and $perm < AUTH_EDIT) or (!$INFO['exists'] and $perm < AUTH_CREATE)) return false;
256
257        // skip blacklisted namespaces and pages
258        if ($this->getConf('blacklist_regex')) {
259            if (preg_match('/' . $this->getConf('blacklist_regex') . '/', $ID) === 1) return false;
260        }
261
262        $split_id = explode(':', $ID);
263        $lang_ns = array_shift($split_id);
264        // only translate if the current page is in a language namespace
265        if (!array_key_exists($lang_ns, $this->langs)) return false;
266
267        $org_id = implode(':', $split_id);
268
269        // if default lang is in ns: add default ns in front of org id
270        if ($this->getConf('default_lang_in_ns')) {
271            $org_id = $conf['lang'] . ':' . $org_id;
272        }
273
274        // check if the original page exists
275        if (!page_exists($org_id)) return false;
276
277        return true;
278    }
279
280    private function check_do_push_translate(): bool {
281        global $ID;
282        global $INFO;
283        global $conf;
284
285        if (!$INFO['exists']) return false;
286
287        // if default language is in namespace: only allow push translation from that namespace
288        if($this->getConf('default_lang_in_ns')) {
289            $split_id = explode(':', $ID);
290            $lang_ns = array_shift($split_id);
291
292            if ($lang_ns !== $conf['lang']) return false;
293        }
294
295        $push_langs = $this->get_push_langs();
296        // push_langs empty --> push_translate disabled --> abort
297        if (empty($push_langs)) return false;
298
299        // skip blacklisted namespaces and pages
300        if ($this->getConf('blacklist_regex')) {
301            // blacklist regex match --> abort
302            if (preg_match('/' . $this->getConf('blacklist_regex') . '/', $ID) === 1) return false;
303        }
304
305        return true;
306    }
307
308    private function deepl_translate($text, $target_lang, $org_ns): string {
309        if (!trim($this->getConf('api_key'))) return '';
310
311        $text = $this->patch_links($text, $target_lang, $org_ns);
312
313        $text = $this->insert_ignore_tags($text);
314
315        $data = [
316            'auth_key' => $this->getConf('api_key'),
317            'target_lang' => $this->langs[$target_lang],
318            'tag_handling' => 'xml',
319            'ignore_tags' => 'ignore',
320            'text' => $text
321        ];
322
323        if ($this->getConf('api') == 'free') {
324            $url = 'https://api-free.deepl.com/v2/translate';
325        } else {
326            $url = 'https://api.deepl.com/v2/translate';
327        }
328
329        $http = new DokuHTTPClient();
330        $raw_response = $http->post($url, $data);
331
332        if ($http->status >= 400) {
333            // add error messages
334            switch ($http->status) {
335                case 403:
336                    msg($this->getLang('msg_translation_fail_bad_key'), -1);
337                    break;
338                case 456:
339                    msg($this->getLang('msg_translation_fail_quota_exceeded'), -1);
340                    break;
341                default:
342                    msg($this->getLang('msg_translation_fail'), -1);
343                    break;
344            }
345
346            // if any error occurred return an empty string
347            return '';
348        }
349
350        $json_response = json_decode($raw_response, true);
351        $translated_text = $json_response['translations'][0]['text'];
352
353        $translated_text = $this->remove_ignore_tags($translated_text);
354
355        return $translated_text;
356    }
357
358    private function get_push_langs(): array {
359        $push_langs = trim($this->getConf('push_langs'));
360
361        if ($push_langs === '') return array();
362
363        return explode(' ', $push_langs);
364    }
365
366    private function patch_links($text, $target_lang, $ns): string {
367        /*
368         * 1. Find links in [[ aa:bb ]] or [[ aa:bb | cc ]]
369         * 2. Extract aa:bb
370         * 3. Check if lang:aa:bb exists
371         * 3.1. --> Yes --> replace
372         * 3.2. --> No --> leave it as it is
373         */
374
375
376        /*
377         * LINKS
378         */
379
380        preg_match_all('/\[\[([\s\S]*?)(\|([\s\S]*?))?]]/', $text, $matches, PREG_SET_ORDER);
381
382        foreach ($matches as $match) {
383
384            if (strpos($match[1], '://') !== false) {
385                // external link --> skip
386                continue;
387            }
388
389            $resolved_id = $match[1];
390
391            resolve_pageid($ns, $resolved_id, $exists);
392
393            if (!$exists) {
394                // redlink --> skip
395                continue;
396            }
397
398            $lang_id = $target_lang . ':' . $resolved_id;
399
400            if (!page_exists($lang_id)) {
401                // Page in target lang does not exist --> skip
402                continue;
403            }
404
405            $new_link = '[[' . $lang_id . $match[2] . ']]';
406
407            $text = str_replace($match[0], $new_link, $text);
408
409        }
410
411        /*
412         * MEDIA
413         */
414
415        preg_match_all('/\{\{([\s\S]*?)(\?[\s\S]*?)?(\|([\s\S]*?))?}}/', $text, $matches, PREG_SET_ORDER);
416
417        foreach ($matches as $match) {
418
419            if (strpos($match[1], '://') !== false) {
420                // external image --> skip
421                continue;
422            }
423
424            $resolved_id = $match[1];
425
426            resolve_mediaid($ns, $resolved_id, $exists);
427
428            if (!$exists) {
429                // redlink --> skip
430                continue;
431            }
432
433            $lang_id = $target_lang . ':' . $resolved_id;
434
435            $lang_id_fn = mediaFN($lang_id);
436
437            if (!file_exists($lang_id_fn)) {
438                // media in target lang does not exist --> skip
439                continue;
440            }
441
442            $new_link = '{{' . $lang_id . $match[2] . $match[3] . '}}';
443
444            $text = str_replace($match[0], $new_link, $text);
445
446        }
447
448        return $text;
449    }
450
451    private function insert_ignore_tags($text): string {
452        // ignore every other xml-like tags (the tags themselves, not their content), otherwise deepl would break the formatting
453        $text = preg_replace('/<[\s\S]+?>/', '<ignore>${0}</ignore>', $text);
454
455        // fix for the template plugin
456        $text = preg_replace('/\{\{template>[\s\S]*?}}/', '<ignore>${0}</ignore>', $text);
457
458        // ignore link/media ids but translate the text (if existing)
459        $text = preg_replace('/\[\[([\s\S]*?)((\|)([\s\S]*?))?]]/', '<ignore>[[${1}${3}</ignore>${4}<ignore>]]</ignore>', $text);
460        $text = preg_replace('/\{\{([\s\S]*?)(\?[\s\S]*?)?((\|)([\s\S]*?))?}}/', '<ignore>{{${1}${2}${4}</ignore>${5}<ignore>}}</ignore>', $text);
461
462        // prevent deepl from doing strange things with dokuwiki syntax
463        $text = str_replace("''", "<ignore>''</ignore>", $text);
464        $text = str_replace("\\\\", "<ignore>\\\\</ignore>", $text);
465
466        // ignore code tags
467        $text = preg_replace('/(<php[\s\S]*?>[\s\S]*?<\/php>)/', '<ignore>${1}</ignore>', $text);
468        $text = preg_replace('/(<file[\s\S]*?>[\s\S]*?<\/file>)/', '<ignore>${1}</ignore>', $text);
469        $text = preg_replace('/(<code[\s\S]*?>[\s\S]*?<\/code>)/', '<ignore>${1}</ignore>', $text);
470
471        // ignore the expressions from the ignore list
472        $ignored_expressions = explode(':', $this->getConf('ignored_expressions'));
473
474        foreach ($ignored_expressions as $expression) {
475            $text = str_replace($expression, '<ignore>' . $expression . '</ignore>', $text);
476        }
477
478        return $text;
479    }
480
481    private function remove_ignore_tags($text): string {
482        // ignore every other xml-like tags (the tags themselves, not their content), otherwise deepl would break the formatting
483        $text = preg_replace('/<ignore>(<[\s\S]+?>)<\/ignore>/', '${1}', $text);
484
485        $ignored_expressions = explode(':', $this->getConf('ignored_expressions'));
486
487        foreach ($ignored_expressions as $expression) {
488            $text = str_replace('<ignore>' . $expression . '</ignore>', $expression, $text);
489        }
490
491        $text = preg_replace('/<ignore>\[\[([\s\S]*?)(\|)?(<\/ignore>)([\s\S]*?)?<ignore>]]<\/ignore>/', '[[${1}${2}${4}]]', $text);
492        $text = preg_replace('/<ignore>\{\{([\s\S]*?)(\|)?(<\/ignore>)([\s\S]*?)?<ignore>}}<\/ignore>/', '{{${1}${2}${4}}}', $text);
493
494        $text = str_replace("<ignore>''</ignore>", "''", $text);
495        $text = str_replace("<ignore>\\\\</ignore>", "\\\\", $text);
496
497        $text = preg_replace('/<ignore>(<php[\s\S]*?>[\s\S]*?<\/php>)<\/ignore>/', '${1}', $text);
498        $text = preg_replace('/<ignore>(<file[\s\S]*?>[\s\S]*?<\/file>)<\/ignore>/', '${1}', $text);
499        $text = preg_replace('/<ignore>(<code[\s\S]*?>[\s\S]*?<\/code>)<\/ignore>/', '${1}', $text);
500
501        // fix for the template plugin
502        $text = preg_replace('/<ignore>(\{\{template>[\s\S]*?}})<\/ignore>/', '${1}', $text);
503
504        // restore < and > for example from arrows (-->) in wikitext
505        $text = str_replace('&gt;', '>', $text);
506        $text = str_replace('&lt;', '<', $text);
507
508        return $text;
509    }
510}
511
512