1<?php
2/**
3 * Deepl Autotranslate Plugin
4 *
5 * @author     Jennifer Graul <me@netali.de>
6 */
7
8if(!defined('DOKU_INC')) die();
9
10use \dokuwiki\HTTP\DokuHTTPClient;
11use \dokuwiki\plugin\deeplautotranslate\MenuItem;
12
13class action_plugin_deeplautotranslate extends DokuWiki_Action_Plugin {
14
15    // manual mapping of ISO-languages to DeepL-languages to fix inconsistent naming
16    private $langs = array(
17        'bg' => 'BG',
18        'cs' => 'CS',
19        'da' => 'DA',
20        'de' => 'DE',
21        'de-informal' => 'DE',
22        'el' => 'EL',
23        'en' => 'EN-GB',
24        'es' => 'ES',
25        'et' => 'ET',
26        'fi' => 'FI',
27        'fr' => 'FR',
28        'hu' => 'HU',
29        'hu-formal' => 'HU',
30        'it' => 'IT',
31        'ja' => 'JA',
32        'lt' => 'LT',
33        'lv' => 'LV',
34        'nl' => 'NL',
35        'pl' => 'PL',
36        'pt' => 'PT-PT',
37        'ro' => 'RO',
38        'ru' => 'RU',
39        'sk' => 'SK',
40        'sl' => 'SL',
41        'sv' => 'SV',
42        'uk' => 'UK',
43        'zh' => 'ZH'
44    );
45
46    /**
47     * Register its handlers with the DokuWiki's event controller
48     */
49    public function register(Doku_Event_Handler $controller) {
50        $controller->register_hook('ACTION_ACT_PREPROCESS','BEFORE', $this, 'preprocess');
51        $controller->register_hook('COMMON_PAGETPL_LOAD','AFTER', $this, 'pagetpl_load');
52        $controller->register_hook('COMMON_WIKIPAGE_SAVE','AFTER', $this, 'update_glossary');
53        $controller->register_hook('MENU_ITEMS_ASSEMBLY', 'AFTER', $this, 'add_menu_button');
54    }
55
56    public function add_menu_button(Doku_Event $event): void {
57        global $ID;
58        global $ACT;
59
60        if ($ACT != 'show') return;
61
62        if ($event->data['view'] != 'page') return;
63
64        if (!$this->getConf('show_button')) return;
65
66        // no translations for the glossary namespace
67        if ($this->check_in_glossary_ns()) return;
68
69        $split_id = explode(':', $ID);
70        $lang_ns = array_shift($split_id);
71        // check if we are in a language namespace
72        if (array_key_exists($lang_ns, $this->langs)) {
73            if($this->getConf('default_lang_in_ns') and $lang_ns === $this->get_default_lang()) {
74                // if the default lang is in a namespace and we are in that namespace --> check for push translation
75                if (!$this->check_do_push_translate()) return;
76            } else {
77                // in language namespace --> check if we should translate
78                if (!$this->check_do_translation(true)) return;
79            }
80        } else {
81            // do not show the button if we are not in a language namespace and the default language is in a namespace
82            if($this->getConf('default_lang_in_ns')) return;
83            // not in language namespace and default language is not in a namespace --> check if we should show the push translate button
84            if (!$this->check_do_push_translate()) return;
85        }
86
87        array_splice($event->data['items'], -1, 0, [new MenuItem()]);
88    }
89
90    public function preprocess(Doku_Event $event, $param): void {
91        global $ID;
92
93        // check if action is show or translate
94        if ($event->data != 'show' and $event->data != 'translate') return;
95
96        // redirect to glossary ns start if glossary ns is called
97        if ($this->check_in_glossary_ns() and $event->data == 'show' and $ID == $this->get_glossary_ns()) {
98            send_redirect(wl($this->get_glossary_ns() . ':start'));
99        }
100
101        $split_id = explode(':', $ID);
102        $lang_ns = array_shift($split_id);
103        // check if we are in a language namespace
104        if (array_key_exists($lang_ns, $this->langs)) {
105            if($this->getConf('default_lang_in_ns') and $lang_ns === $this->get_default_lang()) {
106                // if the default lang is in a namespace and we are in that namespace --> push translate
107                $this->push_translate($event);
108            } else {
109                // in language namespace --> autotrans direct
110                $this->autotrans_direct($event);
111            }
112        } else {
113            // not in language namespace --> push translate
114            $this->push_translate($event);
115        }
116    }
117
118    public function pagetpl_load(Doku_Event $event, $param): void {
119        // handle glossary namespace init when we are in it
120        if ($this->check_in_glossary_ns()) {
121            $this->handle_glossary_init($event);
122            return;
123        }
124
125        $this->autotrans_editor($event);
126    }
127
128    public function update_glossary(Doku_Event $event, $param): void {
129        global $ID;
130        // this also checks if the glossary feature is enabled
131        if (!$this->check_in_glossary_ns()) return;
132
133        $glossary_ns = $this->get_glossary_ns();
134
135        // check if we are in a glossary definition
136        if(preg_match('/^' . $glossary_ns . ':(\w{2})_(\w{2})$/', $ID, $id_match)) {
137            $old_glossary_id = $this->get_glossary_id($id_match[1], $id_match[2]);
138            if ($event->data['changeType'] == DOKU_CHANGE_TYPE_DELETE) {
139                // page deleted --> delete glossary
140                if ($old_glossary_id) {
141                    $result = $this->delete_glossary($old_glossary_id);
142                    if ($result) {
143                        msg($this->getLang('msg_glossary_delete_success'), 1);
144                        $this->unset_glossary_id($id_match[1], $id_match[2]);
145                    }
146                }
147                return;
148            }
149
150            $entries = '';
151
152            // grep entries from definition table
153            preg_match_all('/[ \t]*\|(.*?)\|(.*?)\|/', $event->data['newContent'], $matches, PREG_SET_ORDER);
154            foreach ($matches as $match) {
155                $src = trim($match[1]);
156                $target = trim($match[2]);
157                if ($src == '' or $target == '') {
158                    msg($this->getLang('msg_glossary_empty_key'), -1);
159                    return;
160                }
161                $entries .=  $src . "\t" . $target . "\n";
162            }
163
164            if (empty($matches)) {
165                // no matches --> delete glossary
166                if ($old_glossary_id) {
167                    $result = $this->delete_glossary($old_glossary_id);
168                    if ($result) {
169                        msg($this->getLang('msg_glossary_delete_success'), 1);
170                        $this->unset_glossary_id($id_match[1], $id_match[2]);
171                    }
172                }
173                return;
174            }
175
176            $new_glossary_id = $this->create_glossary($id_match[1], $id_match[2], $entries);
177
178            if ($new_glossary_id) {
179                msg($this->getLang('msg_glossary_create_success'), 1);
180                $this->set_glossary_id($id_match[1], $id_match[2], $new_glossary_id);
181            } else {
182                return;
183            }
184
185            if ($old_glossary_id) $this->delete_glossary($old_glossary_id);
186        }
187    }
188
189    private function autotrans_direct(Doku_Event $event): void {
190        global $ID;
191
192        // abort if action is translate and the translate button is disabled
193        if ($event->data == 'translate' and !$this->getConf('show_button')) return;
194
195        // do nothing on show action when mode is not direct
196        if ($event->data == 'show' and $this->get_mode() != 'direct') return;
197
198        // allow translation of existing pages is we are in the translate action
199        $allow_existing = ($event->data == 'translate');
200
201        // reset action to show
202        $event->data = 'show';
203
204        if (!$this->check_do_translation($allow_existing)) {
205            return;
206        }
207
208        $org_page_info = $this->get_org_page_info();
209        $translated_text = $this->deepl_translate($org_page_info["text"], $this->get_target_lang(), $org_page_info["ns"]);
210
211        if ($translated_text === '') {
212            return;
213        }
214
215        saveWikiText($ID, $translated_text, 'Automatic translation');
216
217        msg($this->getLang('msg_translation_success'), 1);
218
219        // reload the page after translation
220        send_redirect(wl($ID));
221    }
222
223    private function autotrans_editor(Doku_Event $event): void {
224        if ($this->get_mode() != 'editor') return;
225
226        if (!$this->check_do_translation()) return;
227
228        $org_page_info = $this->get_org_page_info();
229
230        $event->data['tpl'] = $this->deepl_translate($org_page_info["text"], $this->get_target_lang(), $org_page_info["ns"]);
231    }
232
233    private function push_translate(Doku_Event $event): void {
234        global $ID;
235
236        // check if action is translate
237        if ($event->data != 'translate') return;
238
239        // check if button is enabled
240        if (!$this->getConf('show_button')) {
241            send_redirect(wl($ID));
242            return;
243        }
244
245        if (!$this->check_do_push_translate()) {
246            send_redirect(wl($ID));
247            return;
248        }
249
250        // push translate
251        $push_langs = $this->get_push_langs();
252        $org_page_text = rawWiki($ID);
253        foreach ($push_langs as $lang) {
254            // skip invalid languages
255            if (!array_key_exists($lang, $this->langs)) {
256                msg($this->getLang('msg_translation_fail_invalid_lang') . $lang, -1);
257                continue;
258            }
259
260            if ($this->getConf('default_lang_in_ns')) {
261                // if default lang is in ns: replace language namespace in ID
262                $split_id = explode(':', $ID);
263                array_shift($split_id);
264                $lang_id = implode(':', $split_id);
265                $lang_id = $lang . ':' . $lang_id;
266            } else {
267                // if default lang is not in ns: add language namespace to ID
268                $lang_id = $lang . ':' . $ID;
269            }
270
271            // check permissions
272            $perm = auth_quickaclcheck($lang_id);
273            $exists = page_exists($lang_id);
274            if (($exists and $perm < AUTH_EDIT) or (!$exists and $perm < AUTH_CREATE)) {
275                msg($this->getLang('msg_translation_fail_no_permissions') . $lang_id, -1);
276                continue;
277            }
278
279            $translated_text = $this->deepl_translate($org_page_text, $lang, getNS($ID));
280            saveWikiText($lang_id, $translated_text, 'Automatic push translation');
281        }
282
283        msg($this->getLang('msg_translation_success'), 1);
284
285        // reload the page after translation to clear the action
286        send_redirect(wl($ID));
287    }
288
289    private function handle_glossary_init(Doku_Event $event): void {
290        global $ID;
291
292        $glossary_ns = $this->get_glossary_ns();
293
294        // create glossary landing page
295        if ($ID == $glossary_ns . ':start') {
296            $landing_page_text = '====== ' . $this->getLang('glossary_landing_heading') . ' ======' . "\n";
297            $landing_page_text .= $this->getLang('glossary_landing_info_msg') . "\n";
298
299            $src_lang = substr($this->get_default_lang(), 0, 2);
300
301            $available_glossaries = $this->get_available_glossaries();
302            foreach ($available_glossaries as $glossary) {
303                if ($glossary['source_lang'] != $src_lang) continue;
304                // generate links to the available glossary pages
305                $landing_page_text .= '  * [[.:' . $glossary['source_lang'] . '_' . $glossary['target_lang'] . '|' . strtoupper($glossary['source_lang']) . ' -> ' . strtoupper($glossary['target_lang']) . ']]' . "\n";
306            }
307            $event->data['tpl'] = $landing_page_text;
308            return;
309        }
310
311        if (preg_match('/^' . $glossary_ns . ':(\w{2})_(\w{2})$/', $ID, $match)) {
312            // check if glossaries are supported for this language pair
313            if (!$this->check_glossary_supported($match[1], $match[2])) {
314                msg($this->getLang('msg_glossary_unsupported'), -1);
315                return;
316            }
317
318            $page_text = '====== ' . $this->getLang('glossary_definition_heading') . ': ' . strtoupper($match[1]) . ' -> ' . strtoupper($match[2]) . ' ======' . "\n";
319            $page_text .= $this->getLang('glossary_definition_help') . "\n\n";
320            $page_text .= '^ ' . strtoupper($match[1]) . ' ^ ' . strtoupper($match[2]) . ' ^' . "\n";
321
322            $event->data['tpl'] = $page_text;
323            return;
324        }
325    }
326
327    private function get_glossary_ns(): string {
328        return trim(strtolower($this->getConf('glossary_ns')));
329    }
330
331    private function get_mode(): string {
332        global $ID;
333        if ($this->getConf('editor_regex')) {
334            if (preg_match('/' . $this->getConf('editor_regex') . '/', $ID) === 1) return 'editor';
335        }
336        if ($this->getConf('direct_regex')) {
337            if (preg_match('/' . $this->getConf('direct_regex') . '/', $ID) === 1) return 'direct';
338        }
339        return $this->getConf('mode');
340    }
341
342    private function get_target_lang(): string {
343        global $ID;
344        $split_id = explode(':', $ID);
345        return array_shift($split_id);
346    }
347
348    private function get_default_lang(): string {
349        global $conf;
350
351        if (empty($conf['lang_before_translation'])) {
352            $default_lang = $conf['lang'];
353        } else {
354            $default_lang = $conf['lang_before_translation'];
355        }
356
357        return $default_lang;
358    }
359
360    private function get_org_page_info(): array {
361        global $ID;
362
363        $split_id = explode(':', $ID);
364        array_shift($split_id);
365        $org_id = implode(':', $split_id);
366
367        // if default lang is in ns: add default ns in front of org id
368        if ($this->getConf('default_lang_in_ns')) {
369            $org_id = $this->get_default_lang() . ':' . $org_id;
370        }
371
372        return array("ns" => getNS($org_id), "text" => rawWiki($org_id));
373    }
374
375    private function get_available_glossaries(): array {
376        if (!trim($this->getConf('api_key'))) {
377            msg($this->getLang('msg_bad_key'), -1);
378            return array();
379        }
380
381        if ($this->getConf('api') == 'free') {
382            $url = 'https://api-free.deepl.com/v2/glossary-language-pairs';
383        } else {
384            $url = 'https://api.deepl.com/v2/glossary-language-pairs';
385        }
386
387        $http = new DokuHTTPClient();
388
389        $http->headers = array('Authorization' => 'DeepL-Auth-Key ' . $this->getConf('api_key'));
390
391        $raw_response = $http->get($url);
392
393        if ($http->status >= 400) {
394            // add error messages
395            switch ($http->status) {
396                case 403:
397                    msg($this->getLang('msg_bad_key'), -1);
398                    break;
399                default:
400                    msg($this->getLang('msg_glossary_fetch_fail'), -1);
401                    break;
402            }
403
404            // if any error occurred return an empty array
405            return array();
406        }
407
408        $json_response = json_decode($raw_response, true);
409
410        return $json_response['supported_languages'];
411    }
412
413    private function get_glossary_id($src, $target): string {
414        if (!file_exists(DOKU_CONF . 'deepl-glossaries.json')) return '';
415
416        $key = $src . "_" . $target;
417
418        $raw_json = file_get_contents(DOKU_CONF . 'deepl-glossaries.json');
419        $content = json_decode($raw_json, true);
420
421        if (array_key_exists($key, $content)) {
422            return $content[$key];
423        } else {
424            return '';
425        }
426    }
427
428    private function set_glossary_id($src, $target, $glossary_id): void {
429        if (file_exists(DOKU_CONF . 'deepl-glossaries.json')) {
430            $raw_json = file_get_contents(DOKU_CONF . 'deepl-glossaries.json');
431            $content = json_decode($raw_json, true);
432        } else {
433            $content = array();
434        }
435
436        $key = $src . "_" . $target;
437
438        $content[$key] = $glossary_id;
439
440        $raw_json = json_encode($content);
441        file_put_contents(DOKU_CONF . 'deepl-glossaries.json', $raw_json);
442    }
443
444    private function unset_glossary_id($src, $target): void {
445        if (file_exists(DOKU_CONF . 'deepl-glossaries.json')) {
446            $raw_json = file_get_contents(DOKU_CONF . 'deepl-glossaries.json');
447            $content = json_decode($raw_json, true);
448        } else {
449            return;
450        }
451
452        $key = $src . "_" . $target;
453
454        unset($content[$key]);
455
456        $raw_json = json_encode($content);
457        file_put_contents(DOKU_CONF . 'deepl-glossaries.json', $raw_json);
458    }
459
460    private function check_in_glossary_ns(): bool {
461        global $ID;
462
463        $glossary_ns = $this->get_glossary_ns();
464
465        // check if the glossary namespace is defined
466        if (!$glossary_ns) return false;
467
468        // check if we are in the glossary namespace
469        if (substr($ID, 0, strlen($glossary_ns)) == $glossary_ns) {
470            return true;
471        } else {
472            return false;
473        }
474    }
475
476    private function check_glossary_supported($src, $target): bool {
477        if(strlen($src) != 2 or strlen($target) != 2) return false;
478        $available_glossaries = $this->get_available_glossaries();
479        foreach ($available_glossaries as $glossary) {
480            if ($src == $glossary['source_lang'] and $target == $glossary['target_lang']) return true;
481        }
482        return false;
483    }
484
485    private function check_do_translation($allow_existing = false): bool {
486        global $INFO;
487        global $ID;
488
489        // only translate if the current page does not exist
490        if ($INFO['exists'] and !$allow_existing) return false;
491
492        // permission check
493        $perm = auth_quickaclcheck($ID);
494        if (($INFO['exists'] and $perm < AUTH_EDIT) or (!$INFO['exists'] and $perm < AUTH_CREATE)) return false;
495
496        // skip blacklisted namespaces and pages
497        if ($this->getConf('blacklist_regex')) {
498            if (preg_match('/' . $this->getConf('blacklist_regex') . '/', $ID) === 1) return false;
499        }
500
501        $split_id = explode(':', $ID);
502        $lang_ns = array_shift($split_id);
503        // only translate if the current page is in a language namespace
504        if (!array_key_exists($lang_ns, $this->langs)) return false;
505
506        $org_id = implode(':', $split_id);
507
508        // if default lang is in ns: add default ns in front of org id
509        if ($this->getConf('default_lang_in_ns')) {
510            $org_id = $this->get_default_lang() . ':' . $org_id;
511        }
512
513        // no translations for the glossary namespace
514        $glossary_ns = $this->get_glossary_ns();
515        if ($glossary_ns and substr($org_id, 0, strlen($glossary_ns)) == $glossary_ns) return false;
516
517        // check if the original page exists
518        if (!page_exists($org_id)) return false;
519
520        return true;
521    }
522
523    private function check_do_push_translate(): bool {
524        global $ID;
525        global $INFO;
526
527        if (!$INFO['exists']) return false;
528
529        // only allow push translation if the user can edit this page
530        $perm = auth_quickaclcheck($ID);
531        if ($perm < AUTH_EDIT) return false;
532
533        // if default language is in namespace: only allow push translation from that namespace
534        if($this->getConf('default_lang_in_ns')) {
535            $split_id = explode(':', $ID);
536            $lang_ns = array_shift($split_id);
537
538            if ($lang_ns !== $this->get_default_lang()) return false;
539        }
540
541        // no translations for the glossary namespace
542        if ($this->check_in_glossary_ns()) return false;
543
544        $push_langs = $this->get_push_langs();
545        // push_langs empty --> push_translate disabled --> abort
546        if (empty($push_langs)) return false;
547
548        // skip blacklisted namespaces and pages
549        if ($this->getConf('blacklist_regex')) {
550            // blacklist regex match --> abort
551            if (preg_match('/' . $this->getConf('blacklist_regex') . '/', $ID) === 1) return false;
552        }
553
554        return true;
555    }
556
557    private function create_glossary($src, $target, $entries): string {
558        if (!trim($this->getConf('api_key'))) {
559            msg($this->getLang('msg_bad_key'), -1);
560            return '';
561        }
562
563        if ($this->getConf('api') == 'free') {
564            $url = 'https://api-free.deepl.com/v2/glossaries';
565        } else {
566            $url = 'https://api.deepl.com/v2/glossaries';
567        }
568
569        $data = array(
570            'name' => 'DokuWiki-Autotranslate-' . $src . '_' . $target,
571            'source_lang' => $src,
572            'target_lang' => $target,
573            'entries' => $entries,
574            'entries_format' => 'tsv'
575        );
576
577        $http = new DokuHTTPClient();
578
579        $http->headers = array('Authorization' => 'DeepL-Auth-Key ' . $this->getConf('api_key'));
580
581        $raw_response = $http->post($url, $data);
582
583        if ($http->status >= 400) {
584            // add error messages
585            switch ($http->status) {
586                case 403:
587                    msg($this->getLang('msg_bad_key'), -1);
588                    break;
589                case 400:
590                    msg($this->getLang('msg_glossary_content_invalid'), -1);
591                    break;
592                default:
593                    msg($this->getLang('msg_glossary_create_fail'), -1);
594                    break;
595            }
596
597            // if any error occurred return an empty string
598            return '';
599        }
600
601        $json_response = json_decode($raw_response, true);
602
603        return $json_response['glossary_id'];
604    }
605
606    private function delete_glossary($glossary_id): bool {
607        if (!trim($this->getConf('api_key'))) {
608            msg($this->getLang('msg_bad_key'), -1);
609            return false;
610        }
611
612        if ($this->getConf('api') == 'free') {
613            $url = 'https://api-free.deepl.com/v2/glossaries';
614        } else {
615            $url = 'https://api.deepl.com/v2/glossaries';
616        }
617
618        $url .= '/' . $glossary_id;
619
620        $http = new DokuHTTPClient();
621
622        $http->headers = array('Authorization' => 'DeepL-Auth-Key ' . $this->getConf('api_key'));
623
624        $http->sendRequest($url, '', 'DELETE');
625
626        if ($http->status >= 400) {
627            // add error messages
628            switch ($http->status) {
629                case 403:
630                    msg($this->getLang('msg_bad_key'), -1);
631                    break;
632                default:
633                    msg($this->getLang('msg_glossary_delete_fail'), -1);
634                    break;
635            }
636
637            // if any error occurred return false
638            return false;
639        }
640
641        return true;
642    }
643
644    private function deepl_translate($text, $target_lang, $org_ns): string {
645        if (!trim($this->getConf('api_key'))) {
646            msg($this->getLang('msg_translation_fail_bad_key'), -1);
647            return '';
648        }
649
650        $text = $this->patch_links($text, $target_lang, $org_ns);
651
652        $text = $this->insert_ignore_tags($text);
653
654        $data = array(
655            'source_lang' => strtoupper(substr($this->get_default_lang(), 0, 2)), // cut of things like "-informal"
656            'target_lang' => $this->langs[$target_lang],
657            'tag_handling' => 'xml',
658            'ignore_tags' => 'ignore',
659            'text' => $text
660        );
661
662        // check if glossaries are enabled
663        if ($this->get_glossary_ns()) {
664            $src = substr($this->get_default_lang(), 0, 2);
665            $target = substr($target_lang, 0, 2);
666            $glossary_id = $this->get_glossary_id($src, $target);
667            if ($glossary_id) {
668                // use glossary if it is defined
669                $data['glossary_id'] = $glossary_id;
670            }
671        }
672
673        if ($this->getConf('api') == 'free') {
674            $url = 'https://api-free.deepl.com/v2/translate';
675        } else {
676            $url = 'https://api.deepl.com/v2/translate';
677        }
678
679        $http = new DokuHTTPClient();
680
681        $http->headers = array('Authorization' => 'DeepL-Auth-Key ' . $this->getConf('api_key'));
682
683        $raw_response = $http->post($url, $data);
684
685        if ($http->status >= 400) {
686            // add error messages
687            switch ($http->status) {
688                case 403:
689                    msg($this->getLang('msg_translation_fail_bad_key'), -1);
690                    break;
691                case 404:
692                    msg($this->getLang('msg_translation_fail_invalid_glossary'), -1);
693                    break;
694                case 456:
695                    msg($this->getLang('msg_translation_fail_quota_exceeded'), -1);
696                    break;
697                default:
698                    msg($this->getLang('msg_translation_fail'), -1);
699                    break;
700            }
701
702            // if any error occurred return an empty string
703            return '';
704        }
705
706        $json_response = json_decode($raw_response, true);
707        $translated_text = $json_response['translations'][0]['text'];
708
709        $translated_text = $this->remove_ignore_tags($translated_text);
710
711        return $translated_text;
712    }
713
714    private function get_push_langs(): array {
715        $push_langs = trim($this->getConf('push_langs'));
716
717        if ($push_langs === '') return array();
718
719        return explode(' ', $push_langs);
720    }
721
722    /**
723     * Is the given ID a relative path?
724     *
725     * Always returns false if keep_relative is disabled.
726     *
727     * @param string $id
728     * @return bool
729     */
730    private function isRelativeLink($id)
731    {
732        if (!$this->getConf('keep_relative')) return false;
733        if ($id === '') return false;
734        if (strpos($id, ':') === false) return true;
735        if ($id[0] === '.') return true;
736        if ($id[0] === '~') return true;
737        return false;
738    }
739
740    private function patch_links($text, $target_lang, $ns): string {
741        /*
742         * 1. Find links in [[ aa:bb ]] or [[ aa:bb | cc ]]
743         * 2. Extract aa:bb
744         * 3. Check if lang:aa:bb exists
745         * 3.1. --> Yes --> replace
746         * 3.2. --> No --> leave it as it is
747         */
748
749
750        /*
751         * LINKS
752         */
753
754        preg_match_all('/\[\[([\s\S]*?)(#[\s\S]*?)?((\|)([\s\S]*?))?]]/', $text, $matches, PREG_SET_ORDER);
755
756        foreach ($matches as $match) {
757
758            // external link --> skip
759            if (strpos($match[1], '://') !== false) continue;
760
761            // skip interwiki links
762            if (strpos($match[1], '>') !== false) continue;
763
764            // skip mail addresses
765            if (strpos($match[1], '@') !== false) continue;
766
767            // skip windows share links
768            if (strpos($match[1], '\\\\') !== false) continue;
769
770            $resolved_id = trim($match[1]);
771            if($this->isRelativeLink($resolved_id)) continue;
772
773            resolve_pageid($ns, $resolved_id, $exists);
774
775            $resolved_id_full = $resolved_id;
776
777            // if the link already points to a target in a language namespace drop it and add the new language namespace
778            $split_id = explode(':', $resolved_id);
779            $lang_ns = array_shift($split_id);
780            if (array_key_exists($lang_ns, $this->langs)) {
781                $resolved_id = implode(':', $split_id);
782            }
783
784            $lang_id = $target_lang . ':' . $resolved_id;
785
786            if (!page_exists($lang_id)) {
787                // Page in target lang does not exist --> replace with absolute ID in case it was a relative ID
788                $new_link = '[[' . $resolved_id_full . $match[2] . $match[3] . ']]';
789            } else {
790                // Page in target lang exists --> replace link
791                $new_link = '[[' . $lang_id . $match[2] . $match[3] . ']]';
792            }
793
794            $text = str_replace($match[0], $new_link, $text);
795
796        }
797
798        /*
799         * MEDIA
800         */
801
802        preg_match_all('/\{\{(([\s\S]*?)(\?[\s\S]*?)?)(\|([\s\S]*?))?}}/', $text, $matches, PREG_SET_ORDER);
803
804        foreach ($matches as $match) {
805
806            // external image --> skip
807            if (strpos($match[1], '://') !== false) continue;
808
809            // skip things like {{tag>...}}
810            if (strpos($match[1], '>') !== false) continue;
811
812            // keep alignment
813            $align_left = "";
814            $align_right = "";
815
816            // align left --> space in front of ID
817            if (substr($match[1], 0, 1) == " ") $align_left = " ";
818            // align right --> space behind id
819            if (substr($match[1], -1) == " ") $align_right = " ";
820
821            $resolved_id = trim($match[2]);
822            $params = trim($match[3]);
823
824            if($this->isRelativeLink($resolved_id)) continue;
825
826            resolve_mediaid($ns, $resolved_id, $exists);
827
828            $resolved_id_full = $resolved_id;
829
830            // if the link already points to a target in a language namespace drop it and add the new language namespace
831            $split_id = explode(':', $resolved_id);
832            $lang_ns = array_shift($split_id);
833            if (array_key_exists($lang_ns, $this->langs)) {
834                $resolved_id = implode(':', $split_id);
835            }
836
837            $lang_id = $target_lang . ':' . $resolved_id;
838
839            $lang_id_fn = mediaFN($lang_id);
840
841            if (!file_exists($lang_id_fn)) {
842                // media in target lang does not exist --> replace with absolute ID in case it was a relative ID
843                $new_link = '{{' . $align_left . $resolved_id_full . $params . $align_right . $match[4] . '}}';
844            } else {
845                // media in target lang exists --> replace it
846                $new_link = '{{' . $align_left . $lang_id . $params . $align_right . $match[4] . '}}';
847            }
848
849            $text = str_replace($match[0], $new_link, $text);
850
851        }
852
853        return $text;
854    }
855
856    private function insert_ignore_tags($text): string {
857        // ignore every other xml-like tags (the tags themselves, not their content), otherwise deepl would break the formatting
858        $text = preg_replace('/<[\s\S]+?>/', '<ignore>${0}</ignore>', $text);
859
860        // prevent deepl from breaking headings
861        $text = preg_replace('/={1,6}/', '<ignore>${0}</ignore>', $text);
862
863        // prevent deepl from messing with nocache-instructions
864        $text = str_replace("~~NOCACHE~~", "<ignore>~~NOCACHE~~</ignore>", $text);
865
866        // fix for plugins like tag or template
867        $text = preg_replace('/\{\{[\s\w]+?>[\s\S]*?}}/', '<ignore>${0}</ignore>', $text);
868
869        // ignore links in wikitext (outside of dokuwiki-links)
870        $text = preg_replace('/\S+:\/\/\S+/', '<ignore>${0}</ignore>', $text);
871
872        // ignore link/media ids but translate the text (if existing)
873        $text = preg_replace('/\[\[([\s\S]*?)(#[\s\S]*?)?((\|)([\s\S]*?))?]]/', '<ignore>[[${1}${2}${4}</ignore>${5}<ignore>]]</ignore>', $text);
874        $text = preg_replace('/\{\{([\s\S]*?)(\?[\s\S]*?)?((\|)([\s\S]*?))?}}/', '<ignore>{{${1}${2}${4}</ignore>${5}<ignore>}}</ignore>', $text);
875
876        // prevent deepl from messing with tables
877        $text = str_replace("  ^  ", "<ignore>  ^  </ignore>", $text);
878        $text = str_replace("  ^ ", "<ignore>  ^ </ignore>", $text);
879        $text = str_replace(" ^  ", "<ignore> ^  </ignore>", $text);
880        $text = str_replace("^  ", "<ignore>^  </ignore>", $text);
881        $text = str_replace("  ^", "<ignore>  ^</ignore>", $text);
882        $text = str_replace("^", "<ignore>^</ignore>", $text);
883        $text = str_replace("  |  ", "<ignore>  |  </ignore>", $text);
884        $text = str_replace("  | ", "<ignore>  | </ignore>", $text);
885        $text = str_replace(" |  ", "<ignore> |  </ignore>", $text);
886        $text = str_replace("|  ", "<ignore>|  </ignore>", $text);
887        $text = str_replace("  |", "<ignore>  |</ignore>", $text);
888        $text = str_replace("|", "<ignore>|</ignore>", $text);
889
890        // prevent deepl from doing strange things with dokuwiki syntax
891        // if a full line is formatted, we have to double-ignore for some reason
892        $text = str_replace("''", "<ignore><ignore>''</ignore></ignore>", $text);
893        $text = str_replace("//", "<ignore><ignore>//</ignore></ignore>", $text);
894        $text = str_replace("**", "<ignore><ignore>**</ignore></ignore>", $text);
895        $text = str_replace("__", "<ignore><ignore>__</ignore></ignore>", $text);
896        $text = str_replace("\\\\", "<ignore><ignore>\\\\</ignore></ignore>", $text);
897
898        // prevent deepl from messing with smileys
899        $smileys = array_keys(getSmileys());
900        foreach ($smileys as $smiley) {
901            $text = str_replace($smiley, "<ignore>" . $smiley . "</ignore>", $text);
902        }
903
904        // ignore code tags
905        $text = preg_replace('/(<php[\s\S]*?>[\s\S]*?<\/php>)/', '<ignore>${1}</ignore>', $text);
906        $text = preg_replace('/(<file[\s\S]*?>[\s\S]*?<\/file>)/', '<ignore>${1}</ignore>', $text);
907        $text = preg_replace('/(<code[\s\S]*?>[\s\S]*?<\/code>)/', '<ignore>${1}</ignore>', $text);
908
909        // ignore the expressions from the ignore list
910        $ignored_expressions = explode(':', $this->getConf('ignored_expressions'));
911
912        foreach ($ignored_expressions as $expression) {
913            $text = str_replace($expression, '<ignore>' . $expression . '</ignore>', $text);
914        }
915
916        return $text;
917    }
918
919    private function remove_ignore_tags($text): string {
920        $ignored_expressions = explode(':', $this->getConf('ignored_expressions'));
921
922        foreach ($ignored_expressions as $expression) {
923            $text = str_replace('<ignore>' . $expression . '</ignore>', $expression, $text);
924        }
925
926        // prevent deepl from messing with nocache-instructions
927        $text = str_replace("<ignore>~~NOCACHE~~</ignore>", "~~NOCACHE~~", $text);
928
929        // prevent deepl from messing with tables
930        $text = str_replace("<ignore>^</ignore>", "^", $text);
931        $text = str_replace("<ignore>^  </ignore>", "^  ", $text);
932        $text = str_replace("<ignore>  ^</ignore>", "  ^", $text);
933        $text = str_replace("<ignore> ^  </ignore>", " ^  ", $text);
934        $text = str_replace("<ignore>  ^ </ignore>", "  ^ ", $text);
935        $text = str_replace("<ignore>  ^  </ignore>", "  ^  ", $text);
936        $text = str_replace("<ignore>|</ignore>", "|", $text);
937        $text = str_replace("<ignore>|  </ignore>", "|  ", $text);
938        $text = str_replace("<ignore>  |</ignore>", "  |", $text);
939        $text = str_replace("<ignore> |  </ignore>", " |  ", $text);
940        $text = str_replace("<ignore>  | </ignore>", "  | ", $text);
941        $text = str_replace("<ignore>  |  </ignore>", "  |  ", $text);
942
943        $text = str_replace("<ignore><ignore>''</ignore></ignore>", "''", $text);
944        $text = str_replace("<ignore><ignore>//</ignore></ignore>", "//", $text);
945        $text = str_replace("<ignore><ignore>**</ignore></ignore>", "**", $text);
946        $text = str_replace("<ignore><ignore>__</ignore></ignore>", "__", $text);
947        $text = str_replace("<ignore><ignore>\\\\</ignore></ignore>", "\\\\", $text);
948
949        // ignore links in wikitext (outside of dokuwiki-links)
950        $text = preg_replace('/<ignore>(\S+:\/\/\S+)<\/ignore>/', '${1}', $text);
951
952        $text = preg_replace('/<ignore>\[\[([\s\S]*?)(\|)?(<\/ignore>)([\s\S]*?)?<ignore>]]<\/ignore>/', '[[${1}${2}${4}]]', $text);
953        $text = preg_replace('/<ignore>\{\{([\s\S]*?)(\|)?(<\/ignore>)([\s\S]*?)?<ignore>}}<\/ignore>/', '{{${1}${2}${4}}}', $text);
954
955        // prevent deepl from messing with smileys
956        $smileys = array_keys(getSmileys());
957        foreach ($smileys as $smiley) {
958            $text = str_replace("<ignore>" . $smiley . "</ignore>", $smiley, $text);
959        }
960
961        $text = preg_replace('/<ignore>(<php[\s\S]*?>[\s\S]*?<\/php>)<\/ignore>/', '${1}', $text);
962        $text = preg_replace('/<ignore>(<file[\s\S]*?>[\s\S]*?<\/file>)<\/ignore>/', '${1}', $text);
963        $text = preg_replace('/<ignore>(<code[\s\S]*?>[\s\S]*?<\/code>)<\/ignore>/', '${1}', $text);
964
965        // fix for plugins like tag or template
966        $text = preg_replace('/<ignore>(\{\{[\s\w]+?>[\s\S]*?}})<\/ignore>/', '${1}', $text);
967
968        // prevent deepl from breaking headings
969        $text = preg_replace('/<ignore>(={1,6})<\/ignore>/','${1}', $text);
970
971        // ignore every other xml-like tags (the tags themselves, not their content), otherwise deepl would break the formatting
972        $text = preg_replace('/<ignore>(<[\s\S]+?>)<\/ignore>/', '${1}', $text);
973
974        // restore < and > for example from arrows (-->) in wikitext
975        $text = str_replace('&gt;', '>', $text);
976        $text = str_replace('&lt;', '<', $text);
977
978        // restore & in wikitext
979        $text = str_replace('&amp;', '&', $text);
980
981        return $text;
982    }
983}
984
985