xref: /plugin/deeplautotranslate/action.php (revision 1cd781c4b20267480f888b9e4ddbbbcb7f3b3e81)
1<?php
2/**
3 * Deepl Autotranslate Plugin
4 *
5 * @author     Jennifer Graul <me@netali.de>
6 */
7
8if(!defined('DOKU_INC')) die();
9
10use \dokuwiki\HTTP\DokuHTTPClient;
11use \dokuwiki\plugin\deeplautotranslate\MenuItem;
12
13class action_plugin_deeplautotranslate extends DokuWiki_Action_Plugin {
14
15    // manual mapping of ISO-languages to DeepL-languages to fix inconsistent naming
16    private $langs = [
17        'bg' => 'BG',
18        'cs' => 'CS',
19        'da' => 'DA',
20        'de' => 'DE',
21        'de-informal' => 'DE',
22        'el' => 'EL',
23        'en' => 'EN-GB',
24        'es' => 'ES',
25        'et' => 'ET',
26        'fi' => 'FI',
27        'fr' => 'FR',
28        'hu' => 'HU',
29        'hu-formal' => 'HU',
30        'it' => 'IT',
31        'ja' => 'JA',
32        'lt' => 'LT',
33        'lv' => 'LV',
34        'nl' => 'NL',
35        'pl' => 'PL',
36        'pt' => 'PT-PT',
37        'ro' => 'RO',
38        'ru' => 'RU',
39        'sk' => 'SK',
40        'sl' => 'SL',
41        'sv' => 'SV',
42        'zh' => 'ZH'
43    ];
44
45    /**
46     * Register its handlers with the DokuWiki's event controller
47     */
48    public function register(Doku_Event_Handler $controller) {
49        $controller->register_hook('ACTION_ACT_PREPROCESS','BEFORE', $this, 'preprocess');
50        $controller->register_hook('COMMON_PAGETPL_LOAD','AFTER', $this, 'autotrans_editor');
51        $controller->register_hook('MENU_ITEMS_ASSEMBLY', 'AFTER', $this, 'add_menu_button');
52    }
53
54    public function add_menu_button(Doku_Event $event): void {
55        global $ID;
56        global $ACT;
57        global $conf;
58
59        if ($ACT != 'show') return;
60
61        if ($event->data['view'] != 'page') return;
62
63        if (!$this->getConf('show_button')) return;
64
65        $split_id = explode(':', $ID);
66        $lang_ns = array_shift($split_id);
67        // check if we are in a language namespace
68        if (array_key_exists($lang_ns, $this->langs)) {
69            if($this->getConf('default_lang_in_ns') and $lang_ns === $conf['lang']) {
70                // if the default lang is in a namespace and we are in that namespace --> check for push translation
71                if (!$this->check_do_push_translate()) return;
72            } else {
73                // in language namespace --> check if we should translate
74                if (!$this->check_do_translation(true)) return;
75            }
76        } else {
77            // do not show the button if we are not in a language namespace and the default language is in a namespace
78            if($this->getConf('default_lang_in_ns')) return;
79            // not in language namespace and default language is npt in a namespace --> check if we should show the push translate button
80            if (!$this->check_do_push_translate()) return;
81        }
82
83        array_splice($event->data['items'], -1, 0, [new MenuItem()]);
84    }
85
86    public function preprocess(Doku_Event  $event, $param): void {
87        global $ID;
88        global $conf;
89
90        // check if action is show or translate
91        if ($event->data != 'show' and $event->data != 'translate') return;
92
93        $split_id = explode(':', $ID);
94        $lang_ns = array_shift($split_id);
95        // check if we are in a language namespace
96        if (array_key_exists($lang_ns, $this->langs)) {
97            if($this->getConf('default_lang_in_ns') and $lang_ns === $conf['lang']) {
98                // if the default lang is in a namespace and we are in that namespace --> push translate
99                $this->push_translate($event);
100            } else {
101                // in language namespace --> autotrans direct
102                $this->autotrans_direct($event);
103            }
104        } else {
105            // not in language namespace --> push translate
106            $this->push_translate($event);
107        }
108    }
109
110    private function autotrans_direct(Doku_Event $event): void {
111        global $ID;
112
113        // abort if action is translate and the translate button is disabled
114        if ($event->data == 'translate' and !$this->getConf('show_button')) return;
115
116        // do nothing on show action when mode is not direct
117        if ($event->data == 'show' and $this->get_mode() != 'direct') return;
118
119        // allow translation of existing pages is we are in the translate action
120        $allow_existing = ($event->data == 'translate');
121
122        // reset action to show
123        $event->data = 'show';
124
125        if (!$this->check_do_translation($allow_existing)) {
126            return;
127        }
128
129        $org_page_info = $this->get_org_page_info();
130        $translated_text = $this->deepl_translate($org_page_info["text"], $this->get_target_lang(), $org_page_info["ns"]);
131
132        if ($translated_text === '') {
133            return;
134        }
135
136        saveWikiText($ID, $translated_text, 'Automatic translation');
137
138        msg($this->getLang('msg_translation_success'), 1);
139
140        // reload the page after translation
141        send_redirect(wl($ID));
142    }
143
144    public function autotrans_editor(Doku_Event $event, $param): void {
145        if ($this->get_mode() != 'editor') return;
146
147        if (!$this->check_do_translation()) return;
148
149        $org_page_info = $this->get_org_page_info();
150
151        $event->data['tpl'] = $this->deepl_translate($org_page_info["text"], $this->get_target_lang(), $org_page_info["ns"]);
152    }
153
154    private function push_translate(Doku_Event $event): void {
155        global $ID;
156
157        // check if action is translate
158        if ($event->data != 'translate') return;
159
160        // check if button is enabled
161        if (!$this->getConf('show_button')) {
162            send_redirect(wl($ID));
163            return;
164        }
165
166        if (!$this->check_do_push_translate()) {
167            send_redirect(wl($ID));
168            return;
169        }
170
171        // push translate
172        $push_langs = $this->get_push_langs();
173        $org_page_text = rawWiki($ID);
174        foreach ($push_langs as $lang) {
175            // skip invalid languages
176            if (!array_key_exists($lang, $this->langs)) {
177                msg($this->getLang('msg_translation_fail_invalid_lang') . $lang, -1);
178                continue;
179            }
180
181            if ($this->getConf('default_lang_in_ns')) {
182                // if default lang is in ns: replace language namespace in ID
183                $split_id = explode(':', $ID);
184                array_shift($split_id);
185                $lang_id = implode(':', $split_id);
186                $lang_id = $lang . ':' . $lang_id;
187            } else {
188                // if default lang is not in ns: add language namespace to ID
189                $lang_id = $lang . ':' . $ID;
190            }
191
192            // check permissions
193            $perm = auth_quickaclcheck($ID);
194            $exists = page_exists($lang_id);
195            if (($exists and $perm < AUTH_EDIT) or (!$exists and $perm < AUTH_CREATE)) {
196                msg($this->getLang('msg_translation_fail_no_permissions') . $lang_id, -1);
197                continue;
198            }
199
200            $translated_text = $this->deepl_translate($org_page_text, $lang, getNS($ID));
201            saveWikiText($lang_id, $translated_text, 'Automatic push translation');
202        }
203
204        msg($this->getLang('msg_translation_success'), 1);
205
206        // reload the page after translation to clear the action
207        send_redirect(wl($ID));
208    }
209
210    private function get_mode(): string {
211        global $ID;
212        if ($this->getConf('editor_regex')) {
213            if (preg_match('/' . $this->getConf('editor_regex') . '/', $ID) === 1) return 'editor';
214        }
215        if ($this->getConf('direct_regex')) {
216            if (preg_match('/' . $this->getConf('direct_regex') . '/', $ID) === 1) return 'direct';
217        }
218        return $this->getConf('mode');
219    }
220
221    private function get_target_lang(): string {
222        global $ID;
223        $split_id = explode(':', $ID);
224        return array_shift($split_id);
225    }
226
227    private function get_org_page_info(): array {
228        global $ID;
229        global $conf;
230
231        $split_id = explode(':', $ID);
232        array_shift($split_id);
233        $org_id = implode(':', $split_id);
234
235        // if default lang is in ns: add default ns in front of org id
236        if ($this->getConf('default_lang_in_ns')) {
237            $org_id = $conf['lang'] . ':' . $org_id;
238        }
239
240        return array("ns" => getNS($org_id), "text" => rawWiki($org_id));
241    }
242
243    private function check_do_translation($allow_existing = false): bool {
244        global $INFO;
245        global $ID;
246        global $conf;
247
248        // only translate if the current page does not exist
249        if ($INFO['exists'] and !$allow_existing) return false;
250
251        // permission check
252        $perm = auth_quickaclcheck($ID);
253        if (($INFO['exists'] and $perm < AUTH_EDIT) or (!$INFO['exists'] and $perm < AUTH_CREATE)) return false;
254
255        // skip blacklisted namespaces and pages
256        if ($this->getConf('blacklist_regex')) {
257            if (preg_match('/' . $this->getConf('blacklist_regex') . '/', $ID) === 1) return false;
258        }
259
260        $split_id = explode(':', $ID);
261        $lang_ns = array_shift($split_id);
262        // only translate if the current page is in a language namespace
263        if (!array_key_exists($lang_ns, $this->langs)) return false;
264
265        $org_id = implode(':', $split_id);
266
267        // if default lang is in ns: add default ns in front of org id
268        if ($this->getConf('default_lang_in_ns')) {
269            $org_id = $conf['lang'] . ':' . $org_id;
270        }
271
272        // check if the original page exists
273        if (!page_exists($org_id)) return false;
274
275        return true;
276    }
277
278    private function check_do_push_translate(): bool {
279        global $ID;
280        global $INFO;
281        global $conf;
282
283        if (!$INFO['exists']) return false;
284
285        // if default language is in namespace: only allow push translation from that namespace
286        if($this->getConf('default_lang_in_ns')) {
287            $split_id = explode(':', $ID);
288            $lang_ns = array_shift($split_id);
289
290            if ($lang_ns !== $conf['lang']) return false;
291        }
292
293        $push_langs = $this->get_push_langs();
294        // push_langs empty --> push_translate disabled --> abort
295        if (empty($push_langs)) return false;
296
297        // skip blacklisted namespaces and pages
298        if ($this->getConf('blacklist_regex')) {
299            // blacklist regex match --> abort
300            if (preg_match('/' . $this->getConf('blacklist_regex') . '/', $ID) === 1) return false;
301        }
302
303        return true;
304    }
305
306    private function deepl_translate($text, $target_lang, $org_ns): string {
307        if (!trim($this->getConf('api_key'))) return '';
308
309        $text = $this->patch_links($text, $target_lang, $org_ns);
310
311        $text = $this->insert_ignore_tags($text);
312
313        $data = [
314            'auth_key' => $this->getConf('api_key'),
315            'target_lang' => $this->langs[$target_lang],
316            'tag_handling' => 'xml',
317            'ignore_tags' => 'ignore',
318            'text' => $text
319        ];
320
321        if ($this->getConf('api') == 'free') {
322            $url = 'https://api-free.deepl.com/v2/translate';
323        } else {
324            $url = 'https://api.deepl.com/v2/translate';
325        }
326
327        $http = new DokuHTTPClient();
328        $raw_response = $http->post($url, $data);
329
330        if ($http->status >= 400) {
331            // add error messages
332            switch ($http->status) {
333                case 403:
334                    msg($this->getLang('msg_translation_fail_bad_key'), -1);
335                    break;
336                case 456:
337                    msg($this->getLang('msg_translation_fail_quota_exceeded'), -1);
338                    break;
339                default:
340                    msg($this->getLang('msg_translation_fail'), -1);
341                    break;
342            }
343
344            // if any error occurred return an empty string
345            return '';
346        }
347
348        $json_response = json_decode($raw_response, true);
349        $translated_text = $json_response['translations'][0]['text'];
350
351        $translated_text = $this->remove_ignore_tags($translated_text);
352
353        return $translated_text;
354    }
355
356    private function get_push_langs(): array {
357        $push_langs = trim($this->getConf('push_langs'));
358
359        if ($push_langs === '') return array();
360
361        return explode(' ', $push_langs);
362    }
363
364    private function patch_links($text, $target_lang, $ns): string {
365        /*
366         * 1. Find links in [[ aa:bb ]] or [[ aa:bb | cc ]]
367         * 2. Extract aa:bb
368         * 3. Check if lang:aa:bb exists
369         * 3.1. --> Yes --> replace
370         * 3.2. --> No --> leave it as it is
371         */
372
373
374        /*
375         * LINKS
376         */
377
378        preg_match_all('/\[\[([\s\S]*?)(\|([\s\S]*?))?]]/', $text, $matches, PREG_SET_ORDER);
379
380        foreach ($matches as $match) {
381
382            if (strpos($match[1], '://') !== false) {
383                // external link --> skip
384                continue;
385            }
386
387            $resolved_id = $match[1];
388
389            resolve_pageid($ns, $resolved_id, $exists);
390
391            if (!$exists) {
392                // redlink --> skip
393                continue;
394            }
395
396            $lang_id = $target_lang . ':' . $resolved_id;
397
398            if (!page_exists($lang_id)) {
399                // Page in target lang does not exist --> skip
400                continue;
401            }
402
403            $new_link = '[[' . $lang_id . $match[2] . ']]';
404
405            $text = str_replace($match[0], $new_link, $text);
406
407        }
408
409        /*
410         * MEDIA
411         */
412
413        preg_match_all('/\{\{([\s\S]*?)(\?[\s\S]*?)?(\|([\s\S]*?))?}}/', $text, $matches, PREG_SET_ORDER);
414
415        foreach ($matches as $match) {
416
417            if (strpos($match[1], '://') !== false) {
418                // external image --> skip
419                continue;
420            }
421
422            $resolved_id = $match[1];
423
424            resolve_mediaid($ns, $resolved_id, $exists);
425
426            if (!$exists) {
427                // redlink --> skip
428                continue;
429            }
430
431            $lang_id = $target_lang . ':' . $resolved_id;
432
433            $lang_id_fn = mediaFN($lang_id);
434
435            if (!file_exists($lang_id_fn)) {
436                // media in target lang does not exist --> skip
437                continue;
438            }
439
440            $new_link = '{{' . $lang_id . $match[2] . $match[3] . '}}';
441
442            $text = str_replace($match[0], $new_link, $text);
443
444        }
445
446        return $text;
447    }
448
449    private function insert_ignore_tags($text): string {
450        // ignore every other xml-like tags (the tags themselves, not their content), otherwise deepl would break the formatting
451        $text = preg_replace('/<[\s\S]+?>/', '<ignore>${0}</ignore>', $text);
452
453        // prevent deepl from breaking headings
454        $text = preg_replace('/={1,6}/', '<ignore>${0}</ignore>', $text);
455
456        // fix for the template plugin
457        $text = preg_replace('/\{\{template>[\s\S]*?}}/', '<ignore>${0}</ignore>', $text);
458
459        // ignore link/media ids but translate the text (if existing)
460        $text = preg_replace('/\[\[([\s\S]*?)((\|)([\s\S]*?))?]]/', '<ignore>[[${1}${3}</ignore>${4}<ignore>]]</ignore>', $text);
461        $text = preg_replace('/\{\{([\s\S]*?)(\?[\s\S]*?)?((\|)([\s\S]*?))?}}/', '<ignore>{{${1}${2}${4}</ignore>${5}<ignore>}}</ignore>', $text);
462
463        // prevent deepl from doing strange things with dokuwiki syntax
464        $text = str_replace("''", "<ignore>''</ignore>", $text);
465        $text = str_replace("\\\\", "<ignore>\\\\</ignore>", $text);
466
467        // ignore code tags
468        $text = preg_replace('/(<php[\s\S]*?>[\s\S]*?<\/php>)/', '<ignore>${1}</ignore>', $text);
469        $text = preg_replace('/(<file[\s\S]*?>[\s\S]*?<\/file>)/', '<ignore>${1}</ignore>', $text);
470        $text = preg_replace('/(<code[\s\S]*?>[\s\S]*?<\/code>)/', '<ignore>${1}</ignore>', $text);
471
472        // ignore the expressions from the ignore list
473        $ignored_expressions = explode(':', $this->getConf('ignored_expressions'));
474
475        foreach ($ignored_expressions as $expression) {
476            $text = str_replace($expression, '<ignore>' . $expression . '</ignore>', $text);
477        }
478
479        return $text;
480    }
481
482    private function remove_ignore_tags($text): string {
483        $ignored_expressions = explode(':', $this->getConf('ignored_expressions'));
484
485        foreach ($ignored_expressions as $expression) {
486            $text = str_replace('<ignore>' . $expression . '</ignore>', $expression, $text);
487        }
488
489        $text = preg_replace('/<ignore>\[\[([\s\S]*?)(\|)?(<\/ignore>)([\s\S]*?)?<ignore>]]<\/ignore>/', '[[${1}${2}${4}]]', $text);
490        $text = preg_replace('/<ignore>\{\{([\s\S]*?)(\|)?(<\/ignore>)([\s\S]*?)?<ignore>}}<\/ignore>/', '{{${1}${2}${4}}}', $text);
491
492        $text = str_replace("<ignore>''</ignore>", "''", $text);
493        $text = str_replace("<ignore>\\\\</ignore>", "\\\\", $text);
494
495        $text = preg_replace('/<ignore>(<php[\s\S]*?>[\s\S]*?<\/php>)<\/ignore>/', '${1}', $text);
496        $text = preg_replace('/<ignore>(<file[\s\S]*?>[\s\S]*?<\/file>)<\/ignore>/', '${1}', $text);
497        $text = preg_replace('/<ignore>(<code[\s\S]*?>[\s\S]*?<\/code>)<\/ignore>/', '${1}', $text);
498
499        // fix for the template plugin
500        $text = preg_replace('/<ignore>(\{\{template>[\s\S]*?}})<\/ignore>/', '${1}', $text);
501
502        // prevent deepl from breaking headings
503        $text = preg_replace('/<ignore>(={1,6})<\/ignore>/','${1}', $text);
504
505        // ignore every other xml-like tags (the tags themselves, not their content), otherwise deepl would break the formatting
506        $text = preg_replace('/<ignore>(<[\s\S]+?>)<\/ignore>/', '${1}', $text);
507
508        // restore < and > for example from arrows (-->) in wikitext
509        $text = str_replace('&gt;', '>', $text);
510        $text = str_replace('&lt;', '<', $text);
511
512        return $text;
513    }
514}
515
516