xref: /plugin/siteexport/action/ajax.php (revision 56c4b32d8c91c103da10f9e5918de8f7bcaa82cd)
1<?php
2/**
3 * Site Export Plugin
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     i-net software <tools@inetsoftware.de>
7 * @author     Gerry Weissbach <gweissbach@inetsoftware.de>
8 */
9
10// must be run within Dokuwiki
11if (!defined('DOKU_INC')) define('DOKU_INC', /** @scrutinizer ignore-type */ realpath(dirname(__FILE__) . '/../../../../') . '/');
12if (!defined('DOKU_PLUGIN')) {
13    // Just for sanity
14    require_once(DOKU_INC . 'inc/plugin.php');
15    define('DOKU_PLUGIN', DOKU_INC . 'lib/plugins/');
16}
17
18require_once(DOKU_INC . '/inc/search.php');
19
20require_once(DOKU_PLUGIN . 'siteexport/inc/functions.php');
21require_once(DOKU_PLUGIN . 'siteexport/inc/httpproxy.php');
22require_once(DOKU_PLUGIN . 'siteexport/inc/filewriter.php');
23require_once(DOKU_PLUGIN . 'siteexport/inc/toc.php');
24require_once(DOKU_PLUGIN . 'siteexport/inc/javahelp.php');
25
26use dokuwiki\File\PageResolver;
27use dokuwiki\File\MediaResolver;
28
29class action_plugin_siteexport_ajax extends DokuWiki_Action_Plugin
30{
31    /**
32     * New internal variables for better structure
33     */
34    private $filewriter = null;
35    public $functions = null;
36
37    // List of files that have already been checked
38    private $fileChecked = array();
39
40    // Namespace of the page to export
41    private $namespace = '';
42
43    /**
44     * Register Plugin in DW
45     **/
46    public function register(Doku_Event_Handler $controller) {
47        $controller->register_hook('AJAX_CALL_UNKNOWN', 'BEFORE', $this, 'ajax_siteexport_provider');
48        $controller->register_hook('ACTION_ACT_PREPROCESS', 'BEFORE', $this, 'siteexport_action');
49    }
50
51    /**
52     * AJAX Provider - check what is going to be done
53     * @param $event
54     * @param $args
55     */
56    public function ajax_siteexport_provider(Doku_Event &$event, $args) {
57
58        // If this is not a siteexport call, ignore it.
59        if (!strstr($event->data, '__siteexport'))
60        {
61            return;
62        }
63
64        $this->__init_functions(true);
65
66        switch ($event->data) {
67            case '__siteexport_getsitelist': $this->ajax_siteexport_getsitelist($event); break;
68            case '__siteexport_addsite': $this->ajax_siteexport_addsite($event); break;
69            case '__siteexport_generateurl': $this->ajax_siteexport_generateurl($event); break;
70            case '__siteexport_aggregate': $this->ajax_siteexport_aggregate($event); break;
71        }
72    }
73
74    /**
75     * Export from a URL - action
76     * @param $event
77     */
78    public function siteexport_action( Doku_Event &$event ) {
79        global $ID;
80
81        // Check if the 'do' was siteexport
82        $keys = is_array($event->data) ? array_keys($event->data) : null;
83        $command = is_array($keys) ? array_shift($keys) : $event->data;
84        if ( $command != 'siteexport' ) { return false; }
85        $event->data = act_clean($event->data);
86
87        if ( headers_sent() ) {
88            msg("The siteexport function has to be called prior to any header output.", -1);
89        }
90
91        $this->__init_functions();
92
93        $this->functions->debug->message("========================================", null, 1);
94        $this->functions->debug->message("Starting export from URL call", null, 1);
95        $this->functions->debug->message("----------------------------------------", null, 1);
96
97        $event->preventDefault();
98        $event->stopPropagation();
99
100        // Fake security Token if none given
101        if (empty($_REQUEST['sectok'])) {
102            $_REQUEST['sectok'] = $this->functions->getSecurityToken();
103        }
104
105        // The timer will be used to do redirects if needed to prevent timeouts
106        $starttimer = time();
107        $timerdiff = $this->getConf('max_execution_time');
108
109        $data = $this->__get_siteexport_list_and_init_tocs($ID, !empty($_REQUEST['startcounter']));
110
111        if ($data === false) {
112            header("HTTP/1.0 401 Unauthorized");
113            print 'Unauthorized';
114            exit;
115        }
116
117        $counter = 0;
118
119        if ( count($data) == 0 && !$this->functions->settings->hasValidCacheFile ) {
120            exit( "No Data to export" );
121        }
122
123        foreach ( $data as $site ) {
124
125            if ( intval($site['exists']) == 1 || !isset($site['exists']) ) {
126
127                // Skip over the amount of urls that have been exported already
128                if ( empty($_REQUEST['startcounter']) || $counter >= intval($_REQUEST['startcounter']) ) {
129                    $status = $this->__siteexport_add_site($site['id']);
130
131                    if ( $status === false ) {
132                        $this->functions->debug->message("----------------------------------------", null, 1);
133                        $this->functions->debug->message("Errors during export from URL call", null, 1);
134                        $this->functions->debug->message("========================================", null, 1);
135                        print $this->functions->debug->runtimeErrors;
136                        exit(0); // We need to stop
137                    }
138                }
139            }
140
141            $counter++;
142            if (time()-$starttimer >= $timerdiff) {
143                $this->functions->debug->message("Will Redirect", null, 1);
144                $this->handleRuntimeErrorOutput();
145                $this->functions->startRedirctProcess($counter);
146            }
147        }
148
149        $this->functions->debug->message("----------------------------------------", null, 1);
150        $this->functions->debug->message("Finishing export from URL call", null, 1);
151        $this->functions->debug->message("========================================", null, 1);
152
153        $this->cleanCacheFiles();
154
155        $URL = ml($this->functions->settings->origZipFile, array('cache' => 'nocache', 'siteexport' => $this->functions->settings->pattern, 'sectok' => $this->functions->getSecurityToken()), true, '&');
156        $this->functions->debug->message("Redirecting to final file", $URL, 2);
157
158        $this->handleRuntimeErrorOutput();
159        send_redirect($URL);
160        exit(0); // Should not be reached, but anyways
161    }
162
163    private function handleRuntimeErrorOutput()
164    {
165        if (!empty($this->functions->debug->runtimeErrors))
166        {
167            $this->filewriter->__moveDataToZip($this->functions->debug->runtimeErrors, '_runtime_error/' . time() . '.html');
168        }
169    }
170
171    public function __init_functions($isAJAX = false)
172    {
173        global $conf;
174
175        $conf['useslash'] = 1;
176
177        $this->functions = new siteexport_functions(true, $isAJAX);
178        $this->filewriter = new siteexport_zipfilewriter($this->functions);
179
180        // Check for PDF Capabilities
181        if ($this->filewriter->canDoPDF()) {
182            $this->functions->settings->fileType = 'pdf';
183        }
184    }
185
186    /**
187     * Prepares the generated URL for direct download access
188     * Also gives back the parameters for this URL
189     * @param $event init event of the ajax request
190     */
191    private function ajax_siteexport_prepareURL_and_POSTData(Doku_Event &$event) {
192
193        $event->preventDefault();
194        $event->stopPropagation();
195
196        // Retrieve Information for download URL
197        $this->functions->debug->message("Prepared URL and POST from Request:", $_REQUEST, 2);
198        $url = $this->functions->prepare_POSTData($_REQUEST);
199        $combined = $this->functions->urlToPathAndParams($url);
200        list($path, $query) = explode('?', $combined, 2);
201        $return = array($url, $combined, $path, $query);
202
203        $this->functions->debug->message("Prepared URL and POST data:", $return, 2);
204        return $return;
205    }
206
207    /**
208     * generate direct access URL
209     **/
210    private function ajax_siteexport_generateurl(Doku_Event &$event) {
211
212        global $INPUT;
213
214        list($url, $combined, $path, $POSTData) = $this->ajax_siteexport_prepareURL_and_POSTData($event);
215
216        // WGET Redirects - this is an option for wget only.
217        // Calculate the maximum redirects that we want to allow. A Problem is that we don't know how long it will take to fetch one page
218        // Therefore we assume it takes about 5s for each page - that gives the freedom to have anough time for redirect.
219        $maxRedirectNumber = ceil((count($this->__get_siteexport_list($INPUT->str('ns'), true))*5)/$this->getConf('max_execution_time'));
220        $maxRedirect = $maxRedirectNumber > 0 ? '--max-redirect=' . ($maxRedirectNumber+3) . ' ' : '';
221        $maxRedirs = $maxRedirectNumber > 0 ? '--max-redirs ' . ($maxRedirectNumber+3) . ' ' : '';
222
223        $this->functions->debug->message("Generating Direct Download URL", $url, 2);
224
225        // If there was a Runtime Exception
226        if (!$this->functions->debug->firstRE()) {
227            $this->functions->debug->message("There have been errors while generating the download URLs.", null, 4);
228            return;
229        }
230
231        $zipFile = explode(":", ($this->getConf('zipfilename')));
232        $zipFile = array_pop($zipFile);
233
234        echo $url;
235        echo "\n";
236        echo 'wget ' . $maxRedirect . '--output-document=' . $zipFile . ' --post-data="' . $POSTData . '" ' . wl(cleanID($path), null, true) . ' --http-user=USER --http-passwd=PASSWD';
237        echo "\n";
238        echo 'curl -L ' . $maxRedirs . '-o ' . $zipFile . ' -d "' . $POSTData . '" ' . wl(cleanID($path), null, true) . ' --anyauth --user USER:PASSWD';
239        echo "\n";
240
241        $this->functions->debug->message("Checking for Cron parameters: ", $combined, 1);
242
243        return;
244    }
245
246    /**
247     * Get List of sites to be exported for AJAX (wrapper)
248     **/
249    private function ajax_siteexport_getsitelist(Doku_Event &$event) {
250
251        global $INPUT;
252
253        $event->preventDefault();
254        $event->stopPropagation();
255
256        $data = $this->__get_siteexport_list_and_init_tocs($INPUT->str('ns'));
257
258        // Important for reconaisance of the session
259
260        if ($data === false)
261        {
262            $this->functions->debug->runtimeException("No data generated. List of Files is 'false'.");
263            return;
264        }
265
266        if (empty($data) && !$this->functions->settings->hasValidCacheFile)
267        {
268            $this->functions->debug->runtimeException("Generated list is empty.");
269            return;
270        }
271
272        // If there was a Runtime Exception
273        if (!$this->functions->debug->firstRE())
274        {
275            $this->functions->debug->message("There have been errors while generating site list.", null, 4);
276            return;
277        }
278
279        echo "{$this->functions->settings->pattern}\n";
280        echo $this->functions->downloadURL() . "\n";
281        foreach ($data as $line) {
282            echo $line['id'] . "\n";
283        }
284
285        return;
286    }
287
288    private function ajax_siteexport_aggregate(Doku_Event &$event) {
289
290        // Quick preparations for one page only
291        if ($this->filewriter->hasValidCacheFile($_REQUEST)) {
292            $this->functions->debug->message("Had a valid cache file and will use it.", null, 2);
293            print $this->functions->downloadURL();
294
295            $event->preventDefault();
296            $event->stopPropagation();
297        } else {
298            // Then go for it!
299            $this->functions->debug->message("Will create a new cache thing.", null, 2);
300            $this->ajax_siteexport_addsite($event);
301        }
302
303    }
304
305    /**
306     * Add a page to the package (for AJAX calls - Wrapper)
307     **/
308    private function ajax_siteexport_addsite(Doku_Event &$event) {
309
310        global $INPUT;
311
312        $event->preventDefault();
313        $event->stopPropagation();
314
315        $this->functions->debug->message("========================================", null, 1);
316        $this->functions->debug->message("Starting export from AJAX call", null, 1);
317        $this->functions->debug->message("----------------------------------------", null, 1);
318
319        $status = $this->__siteexport_add_site($INPUT->str('site'));
320        if ( $status === false ) {
321            $this->functions->debug->message("----------------------------------------", null, 1);
322            $this->functions->debug->message("Errors during export from AJAX call", null, 1);
323            $this->functions->debug->message("========================================", null, 1);
324            return;
325        }
326
327        $this->functions->debug->message("----------------------------------------", null, 1);
328        $this->functions->debug->message("Finishing export from AJAX call", null, 1);
329        $this->functions->debug->message("========================================", null, 1);
330
331        // Print the download zip-File
332        $this->cleanCacheFiles();
333
334        // If there was a Runtime Exception
335        if (!$this->functions->debug->firstRE()) {
336            $this->functions->debug->message("There have been errors during the export.", null, 4);
337            return;
338        }
339
340        print $this->functions->downloadURL();
341        return;
342    }
343
344    /**
345     * Fetch the list of pages to be exported
346     **/
347    private function __get_siteexport_list($NS, $overrideCache = false) {
348        global $conf, $INPUT;
349
350        $PAGE = "";
351        $NS = $this->namespace = $this->functions->getNamespaceFromID($NS, $PAGE);
352        $this->functions->debug->message("ROOT Namespace to export from: '{$NS}' / {$this->namespace}", null, 2);
353
354        $depth = $this->getConf('depth');
355        $query = '';
356        $doSearch = 'search_allpages';
357
358        switch ($INPUT->int('depthType')) {
359            case 0:
360                $PAGE = $this->functions->cleanID(str_replace(":", "/", $NS . ':' . $PAGE));
361                $NS = (new PageResolver($NS))->resolveId($PAGE);
362                $exists = page_exists($NS);
363
364                if ($exists) {
365                    $data = array(array('id' => $PAGE));
366
367                    $this->functions->debug->message("Checking for Cache, depthType:0", null, 2);
368                    if (!$overrideCache && $this->filewriter->hasValidCacheFile($_REQUEST, $data))
369                    {
370                        return array();
371                    }
372
373                    $this->functions->debug->message("Exporting the following sites: ", $data, 2);
374                    return $data;
375                } else {
376                    // Does not exist, try next case
377                }
378            case 1:    $depth = 0;
379            break;
380            case 2:    $depth = $INPUT->int('depth');
381            break;
382        }
383
384        $opts = array('depth' => $depth, 'skipacl' => $this->getConf('skipacl'), 'query' => $query);
385        $this->functions->debug->message("Options", $opts, 2);
386
387        $data = array();
388        require_once (DOKU_INC . 'inc/search.php');
389
390        // Check, which TOC to take
391        if (!$this->functions->settings->useTOCFile) {
392            search($data, $conf['datadir'], $doSearch, $opts, $this->namespace);
393        } else {
394            $this->functions->debug->message("Using TOC for data", null, 2);
395
396            $doSearch = 'search_pagename';
397
398            // Create Data of the TOC File should be used instead
399            $opts['query'] = 'toc.txt';
400
401            $RAWdata = array();
402            search($RAWdata, $conf['datadir'], $doSearch, $opts, $this->namespace);
403
404            // There may be more than one toc and all of them have to be merged.
405            $data = array();
406            foreach ($RAWdata as $entry)
407            {
408                $tmpData = p_get_metadata($entry['id'], 'sitetoc siteexportTOC');
409
410                if (is_array($tmpData))
411                {
412                    $data = array_merge($data, $tmpData);
413                }
414            }
415        }
416
417        $this->functions->debug->message("Checking for Cache after lookup of pages", null, 2);
418        if (!$overrideCache && $this->filewriter->hasValidCacheFile($_REQUEST, $data))
419        {
420            return array();
421        }
422
423        $this->functions->debug->message("Exporting the following sites: ", $data, 2);
424        return $data;
425    }
426
427    private function __get_siteexport_list_and_init_tocs($NS, $isRedirected = false) {
428
429        // Clean up if not redirected
430        if (!$isRedirected && !$this->__removeOldZip()) {
431            $this->functions->debug->runtimeException("Can't remove old files.");
432            return false;
433        }
434
435        $data = $this->__get_siteexport_list($NS, $isRedirected);
436        if ($isRedirected || empty($data))
437        {
438            // if we have been redirected, simply return the data
439            $this->functions->debug->message("List is empty I guess. Used NS: '{$NS}' ", null, 1);
440            return $data;
441        }
442
443        // Create Eclipse Documentation Pages - TOC.xml, Context.xml
444        if (!empty($_REQUEST['absolutePath'])) $this->namespace = "";
445//        $this->__removeOldZip( $this->functions->settings->eclipseZipFile );
446
447        if (!empty($_REQUEST['eclipseDocZip']))
448        {
449            $toc = new siteexport_toc($this->functions, $NS);
450            $this->functions->debug->message("Generating eclipseDocZip", null, 2);
451            $this->filewriter->__moveDataToZip($toc->__getTOCXML($data), 'toc.xml');
452            $this->filewriter->__moveDataToZip($toc->__getContextXML($data), 'context.xml');
453        } else  if (!empty($_REQUEST['JavaHelpDocZip']))
454        {
455            $toc = new siteexport_javahelp($this->functions, $this->filewriter, $NS);
456            $toc->createTOCFiles($data);
457
458/*            $toc = new siteexport_toc($this->functions);
459            list($tocData, $mapData) = $toc->__getJavaHelpTOCXML($data);
460            $this->functions->debug->message("Generating JavaHelpDocZip", null, 2);
461            $this->filewriter->__moveDataToZip($tocData, 'toc.xml');
462            $this->filewriter->__moveDataToZip($mapData, 'map.xml');
463*/        }
464
465        return $data;
466    }
467
468    /**
469     * Add page with ID to the package
470     **/
471    private function __siteexport_add_site($ID) {
472        global $conf, $CURRENT_ID, $CURRENT_PARENT, $INPUT;
473
474        // Which is the current ID?
475        $CURRENT_ID = $ID;
476
477        $this->functions->debug->message("========================================", null, 2);
478        $this->functions->debug->message("Adding Site: '$ID'", null, 2);
479        $this->functions->debug->message("----------------------------------------", $_REQUEST, 2);
480
481        $request = $this->functions->settings->additionalParameters;
482        unset($request['diPlu']); // This will not be needed for the first request.
483        unset($request['diInv']); // This will not be needed for the first request.
484
485        // say, what to export and Build URL
486        // http://documentation:81/helpdesk/de/hds/getting-started?depthType=0&do=siteexport&ens=helpdesk%3Ade%3Ahds%3Agetting-started&pdfExport=1&renderer=siteexport_siteexportpdf&template=helpdesk
487
488        $do = $INPUT->int('exportbody', 0, true) == 1 ? $INPUT->str('renderer', $conf['renderer_xhtml'], true) : '';
489
490        if ($do == 'pdf' && $this->filewriter->canDoPDF())
491        {
492            $do = 'export_siteexport_pdf';
493            $INPUT->set('origRenderer', $INPUT->str('renderer', $conf['renderer_xhtml'], true));
494        } else if ($INPUT->str('renderer') == 'dw2pdf') {
495            $do = 'pdf';
496        }
497
498        $do = ($do == $conf['renderer_xhtml'] && $INPUT->int('exportbody', 0, true) != 1) ? '' : 'export_' . $do;
499
500        if ($do != 'export_' && !empty($do) )
501        {
502            $request['do'] = $do;
503        }
504
505        // set Template
506        if (!empty($INPUT->str('template'))) {
507            $request['template'] = $INPUT->str('template');
508        }
509
510        $this->functions->debug->message("REQUEST for add_site:", $request, 2);
511
512        $ID = $this->functions->cleanID($ID);
513        $url = $this->functions->wl($ID, $request, true, '&');
514
515        // Parse URI PATH and add "html"
516        $CURRENT_PARENT = $fileName = $this->functions->getSiteName($ID, true);
517        $this->functions->debug->message("Filename could be:", $fileName, 2);
518
519        $this->fileChecked[$url] = $fileName; // 2010-09-03 - One URL to one FileName
520        $this->functions->settings->depth = str_repeat('../', count(explode('/', $fileName))-1);
521
522        // fetch URL and save it in temp file
523        $tmpFile = $this->__getHTTPFile($url);
524        if ( $tmpFile === false ) {
525            $this->functions->debug->runtimeException("Creating temporary download file failed for '$url'. See log for more information.");
526            return false;
527        }
528
529        $dirname = dirname($fileName);
530        // If a Filename was given that does not comply to the original name, use this one!
531        if ( $this->filewriter->canDoPDF() ) {
532
533            $this->functions->debug->message("Will replace old filename '{$fileName}' with {$ID}", null, 1);
534            $extension = explode('.', $fileName);
535            $extension = array_pop($extension);
536
537            // 2014-04-29 added cleanID to ensure that links are generated consistently when using [[this>...]] or another local, relativ linking
538            $fileName = $dirname . '/' . $this->functions->cleanID($this->functions->getSiteTitle($ID)) . '.' . $extension;
539        } else if ( !empty($tmpFile[1]) /*&& !strstr($DATA[2], $tmpFile[1])*/ ) { // 2017-11-30: $DATA is never defined
540
541            $this->functions->debug->message("Will replace old filename '{$fileName}' with {$dirname}/{$tmpFile[1]}", null, 1);
542            $fileName = $dirname . '/' . $tmpFile[1];
543        }
544
545        // Add to zip
546        $this->fileChecked[$url] = $fileName;
547        $status = $this->filewriter->__addFileToZip($tmpFile[0], $fileName);
548        if (@unlink($tmpFile[0]) === false) {
549            $this->functions->debug->message("Could not remove temporary file", $tmpFile[0], 1);
550        }
551
552        return $status;
553    }
554
555    /**
556     * Download the file via HTTP URL + recurse if this is not an image
557     * The file will be saved as temporary file. The filename is the result.
558     **/
559    private function __getHTTPFile($URL, $RECURSE=false, $newAdditionalParameters=null) {
560        global $conf;
561
562        $EXCLUDE = $this->functions->settings->excludePattern;
563        if ( !empty($EXCLUDE) ) {
564            $PATTERN = "/(" . implode('|', explode(' ', preg_quote($EXCLUDE, '/'))) . ")/i";
565
566            $this->functions->debug->message("Checking for exclude: ", array(
567                "pattern" => $PATTERN,
568                "file" => $URL,
569                "matches" => preg_match($PATTERN, $URL) ? 'match' : 'no match'
570            ), 2);
571
572            if ( preg_match($PATTERN, $URL) ) { return false; }
573        }
574
575        $http = new HTTPProxy($this->functions);
576        $http->max_bodysize = $conf['fetchsize'];
577
578        // Add additional Params
579        $this->functions->addAdditionalParametersToURL($URL, $newAdditionalParameters);
580
581        $this->functions->debug->message("Fetching URL: '$URL'", null, 2);
582        $getData = $http->get($URL, true); // true == sloopy, get 304 body as well.
583
584        if( $getData === false ) { // || ($http->status != 200 && !$this->functions->settings->ignoreNon200) ) {
585
586            if ( $http->status != 200 && $this->functions->settings->ignoreNon200 ) {
587                $this->functions->debug->message("HTTP status was '{$http->status}' - but I was told to ignore it by the settings.", $URL, 3);
588                return true;
589            }
590
591            $this->functions->debug->message("Sending request failed with error, HTTP status was '{$http->status}'.", $URL, 4);
592            return false;
593        }
594
595        if( empty($getData) ) {
596            $this->functions->debug->message("No data fetched", $URL, 4);
597            return false;
598        }
599
600        $this->functions->debug->message("Headers received", $http->resp_headers, 2);
601
602        if ( !$RECURSE ) {
603            // Parse URI PATH and add "html"
604            $this->functions->debug->message("========================================", null, 1);
605            $this->functions->debug->message("Starting to recurse file '$URL'", null , 1);
606            $this->functions->debug->message("----------------------------------------", null, 1);
607            $this->__getInternalLinks($getData);
608            $this->functions->debug->message("----------------------------------------", null, 1);
609            $this->functions->debug->message("Finished to recurse file '$URL'", null , 1);
610            $this->functions->debug->message("========================================", null, 1);
611        }
612
613        $tmpFile = tempnam($this->functions->settings->tmpDir , 'siteexport__') ?: $this->functions->settings->tmpDir . "siteexport__";
614        $this->functions->debug->message("Temporary filename", $tmpFile, 1);
615
616        $fp = fopen( $tmpFile, "w");
617        if(!$fp) {
618            $this->functions->debug->message("Can't open temporary File '$tmpFile'.", null , 4);
619            return false;
620        }
621
622        fwrite($fp,$getData);
623        fclose($fp);
624
625        // plain/text; ...
626        $extension = explode(';', $http->resp_headers['content-type'], 2);
627        $extension = array_shift($extension);
628        $extension = explode('/', $extension, 2);
629        if ( $extension[0] == 'image' && preg_match("/^[a-zA-Z0-9]{3,}$/", $extension[1]) ) {
630            $extension = strtolower($extension[1]);
631            $this->functions->debug->message("Found new image extension:", $extension, 2);
632        } else {
633            unset($extension);
634        }
635
636        return array($tmpFile, preg_replace("/.*?filename=\"?(.*?)\"?;?$/", "$1", $http->resp_headers['content-disposition'] ?? ''), $extension ?? '');
637    }
638
639    /**
640     * Find internal links in the currently downloaded file. This also matches inside CSS files
641     **/
642    private function __getInternalLinks(&$DATA) {
643
644        $PATTERN = '(href|src|action)="([^"]*)"';
645        if (!$this->functions->settings->exportLinkedPages) {
646            // no links or forms
647            $PATTERN = '((?<!<a )href|src|action)="([^"]*)"';
648        }
649
650        $CALLBACK = array($this, '__fetchAndReplaceLink');
651        $DATA = preg_replace_callback("/$PATTERN/i", $CALLBACK, $DATA);
652
653        // Match CSS url(...) but avoid plain text like "API-URL (foo)"
654        $PATTERNCSS = '(?<![A-Za-z0-9_-])(url\s*?)\(([^\)]*)\)';
655        $DATA = preg_replace_callback("/$PATTERNCSS/i", $CALLBACK, $DATA);
656
657        $PATTERNSRCSET = '(srcset)="([^"]*)"';
658        $CALLBACK = array($this, '__fetchAndReplaceSrcset');
659        $DATA = preg_replace_callback("/$PATTERNSRCSET/i", $CALLBACK, $DATA);
660    }
661
662    /**
663     * Support for 'srcset' image atributes
664     */
665    private function __fetchAndReplaceSrcset($DATA) {
666
667        $this->functions->debug->message("SRCSET", $DATA, 2);
668        if ( strtolower($DATA[1]) != 'srcset' ) { return $DATA[0]; }
669
670        $SRCSETS = array_map( 'trim', explode( ",", $DATA[2] ) );
671        $NEWSRCSETS = array();
672        foreach( $SRCSETS as $SRCSET ) {
673
674            // tehre should be no other unencoded spaces in here
675            list($url, $size) = explode( " ", $SRCSET, 2 );
676
677            $this->functions->debug->message("SRCSET: URL before: '$url'", null, 2);
678            $url = $this->__fetchAndReplaceLink(array( $url, '', $url ));
679            $url = substr( $url, 2, -1 );
680            $this->functions->debug->message("SRCSET: URL after: '$url'", null, 2);
681
682            $NEWSRCSETS[]=$url . ' ' . $size;
683        }
684
685        return 'srcset="' . implode( ',', $NEWSRCSETS )  . '"';
686    }
687
688    /**
689     * Deep Fetch and replace of links inside the texts matched by __getInternalLinks
690     **/
691    private function __fetchAndReplaceLink($DATA) {
692        global $conf, $CURRENT_ID, $CURRENT_PARENT;
693
694        $noDeepReplace = true;
695        $newAdditionalParameters = $this->functions->settings->additionalParameters;
696        $newDepth = $this->functions->settings->depth;
697        $hadBase = false;
698
699        // Clean data[2], remote ' and "
700        $DATA[2] = preg_replace("/^\s*?['\"]?(.*?)['\"]?\s*?$/", '\1', trim($DATA[2]));
701
702        $this->functions->debug->message("Starting Link Replacement", array('data' => $DATA, 'additional Params' => $newAdditionalParameters, 'newDepth' => $newDepth, 'currentID' => $CURRENT_ID, 'currentParent' => $CURRENT_PARENT), 2);
703
704        // STEP 1: check for well known links that can be returned
705        if ( $this->__fetchAndReplaceWellKnownLinks( $DATA ) ) {
706            return $this->__rebuildLink($DATA, "");
707        }
708
709        // 2014-07-21: Origdata before anything else - or it will be missing some things.
710        $ORIGDATA2 = $DATA;
711        //        $ORIGDATA2 = $DATA[2]; // 08/10/2010 - this line required a $this->functions->wl which may mess up with the base URL
712        $this->functions->debug->message("OrigDATA is:", $ORIGDATA2, 1);
713
714        // strip all things out
715        // changed Data
716        $PARAMS = @parse_url($DATA[2], PHP_URL_QUERY);
717        $ANCHOR = @parse_url($DATA[2], PHP_URL_FRAGMENT);
718        $DATA[2] = @parse_url($DATA[2], PHP_URL_PATH);
719
720        // 2014-05-12 - fix problem with URLs starting with a ./ or ../ ... they seem to need the current IDs root
721        if (preg_match("#^\.\.?/#", $DATA[2])) {
722            $DATA[2] = getNS($CURRENT_ID) . ':' . $DATA[2];
723        }
724
725        // 2010-08-25 - fix problem with relative movement in links ( "test/../test2" )
726        // 2014-06-30 - what? to what will this end relatively?
727        $tmpData2 = '';
728        while ($tmpData2 != $DATA[2]) {
729            $tmpData2 = $DATA[2];
730            $DATA[2] = preg_replace("#/(?!\.\.)[^\/]*?/\.\./#", '/', $DATA[2]);
731        }
732
733        $temp = preg_replace("%^" . preg_quote(DOKU_BASE, '%') . "%", "", $DATA[2]);
734        if ($temp != $DATA[2]) {
735            $DATA[2] = $temp;
736            $hadBase = true; // 2010-08-23 Check if there has been a rewrite here that will have to be considered later on
737        }
738
739        $this->functions->debug->message("URL before rewriting option for others than 1", array($DATA, $PARAMS, $hadBase), 1);
740
741
742        // Handle rewrites other than 1 - just for non-lib-files
743        $this->__fetchAndReplaceLinkHandleRewrite( $DATA, $PARAMS );
744
745        $this->functions->debug->message("URL before rewriting option", array($DATA, $PARAMS), 2);
746
747        // Generate ID
748        $DATA[2] = str_replace('/', ':', $DATA[2]);
749
750        // If Data was empty this must be the same file!;
751        if (empty($DATA[2])) {
752            $DATA[2] = $CURRENT_ID;
753        }
754
755        $ID = $DATA[2];
756        $MEDIAMATCHER = "#(_media(/|:)|media=|_detail(/|:)|_export(/|:)|do=export_)#i"; // 2010-10-23 added "(/|:)" for the ID may not contain slashes anymore
757        $ISMEDIA = preg_match($MEDIAMATCHER, $DATA[2]) === 1;
758        if ($ISMEDIA !== false && $conf['userewrite'] == 1) {
759            $ID = preg_replace("#^_(detail|media)(/|:)#", "", $ID);
760        }
761
762        $ID = $this->functions->cleanID($DATA[2], null, $ISMEDIA);
763        //        $ID = $this->functions->cleanID($DATA[2], null, strstr($DATA[2], 'media') ); // Export anpassung nun weiter unten
764
765        //        $IDexists = page_exists($ID); // 08/10/2010 - Not needed. This will be done in the next block.
766        //        $this->functions->debug->message("Current ID: '$ID' exists: '" . ($IDexists ? 'true' : 'false') . "' (will be set to 'false' anyway)", null, 1);
767
768        $IDifIDnotExists = $ID; // 08/10/2010 - Save ID - with possible upper cases to preserve them
769        $IDexists = false;
770
771        $this->functions->debug->message("Resolving ID: '$ID'", null, 2);
772        if ($ISMEDIA !== false) {
773            $ID = (new MediaResolver('root'))->resolveId($ID);
774            $IDexists = media_exists($ID);
775            $this->functions->debug->message("Current mediaID to filename: '" . mediaFN($ID) . "'", null, 2);
776        } else {
777            $ID = (new PageResolver('root'))->resolveId($ID);
778            $IDexists = page_exists($ID);
779            $this->functions->debug->message("Current ID to filename: '" . wikiFN($ID) . "'", null, 2);
780        }
781
782        $this->functions->debug->message("Current ID after resolvement: '$ID' the ID does exist: '" . ($IDexists ? 'true' : 'false') . "'", null, 2);
783        //        $ORIGDATA2 = @parse_url($this->functions->wl($ORIGDATA2, null, true)); // What was the next 2 line for? It did mess up with links from {{jdoc>}}
784        //        $this->functions->debug->message("OrigData ID after parse:", $ORIGDATA2, 1); // 08/10/2010 - The lines are obsolete when the $ORIGDATA2 = $DATA. $ORIGDATA is only for fallback
785
786        // 08/10/2010 - If the ID does not exist, we may have a problem here with upper cases - they will all be lower by now!
787        if (!$IDexists) {
788            $ID = $IDifIDnotExists; // there may have been presevered Upper cases. We will need them!
789        }
790
791        // $this->functions->cleanID($DATA[2], null, strstr($DATA[2], 'media') || strstr($DATA[2], 'export') );
792        if (substr($ID, -1) == ':' || empty($ID)) $ID .= $conf['start'];
793
794        // Generate Download URL
795        // $PARAMS = trim(str_replace('&amp;', '&', $PARAMS));
796        $PARAMS = trim($PARAMS);
797        $this->functions->removeWikiVariables($PARAMS, false, true);
798
799        $url = $this->functions->wl($ID, null, true, null, null, true, $hadBase) . (!empty($ANCHOR) ? '#' . $ANCHOR : '') . (!empty($PARAMS) ? '?' . $PARAMS : '');
800        $this->functions->debug->message("URL from ID: '$url'", null, 2);
801
802        // Parse URI PATH and add "html"
803        $uri = @parse_url($url);
804        $DATA[2] = $uri['path'];
805
806        $this->functions->debug->message("DATA after parsing.", $DATA, 2);
807
808        // Second Rewrite for UseRewrite = 2
809        if ($conf['userewrite'] == 2 && preg_match("%((/lib/exe/(fetch|detail|indexer)|feed|doku)\.php)/?(.*?)$%", $DATA[2], $matches)) {
810
811            // The actual file in lib
812            $DATA[2] = $matches[1];
813            $PARAMS .= '&' . (in_array($matches[3], array('fetch', 'detail')) ? 'media' : 'id') . '=' . cleanID(str_replace('/', ':', $matches[4]));
814
815            $this->functions->debug->message("DATA after second rewrite with UseRewrite = 2", array($DATA, $matches, $PARAMS), 1);
816        }
817
818        $DATA['ANCHOR'] = $ANCHOR;
819        $DATA['PARAMS'] = $PARAMS;
820        $elements = explode('/', $DATA[2]);
821
822        // Main Switch to check the link
823        $result = $this->__fetchAndReplaceLinkMainSwitch( $elements, $DATA, $url, $newAdditionalParameters, $PARAMS, $noDeepReplace, $fileName, $newDepth, $ID, $IDexists );
824        if ( $result !== null ) {
825            return $result;
826        }
827
828        $this->functions->debug->message("DATA after SWITCH CASE decision", array($DATA, $noDeepReplace, $fileName, $newDepth), 1);
829
830        if ($this->filewriter->canDoPDF()) {
831            $this->functions->addAdditionalParametersToURL($url, $newAdditionalParameters);
832            $DATA[2] = $url;
833            unset($DATA['PARAMS']);
834            $rebuilt = $this->__rebuildLink($DATA, '');
835            // __rebuildLink returns a full href|src|action="…" fragment; log the inner URL for readability.
836            if (preg_match('/^\\w+\\s*=\\s*"([^"]*)"/', $rebuilt, $m)) {
837                $this->functions->debug->message("Creating PDF: fetching '{$m[1]}' (replacement {$rebuilt})", null, 2);
838            } else {
839                $this->functions->debug->message("Creating PDF with replacement '$rebuilt'", null, 2);
840            }
841
842            return $rebuilt;
843        }
844
845        // Finalize
846        return $this->__fetchAndReplaceLinkFinish( $DATA, $url, $noDeepReplace, $newAdditionalParameters, $ORIGDATA2, $newDepth, $IDexists, $fileName );
847    }
848
849    private function __fetchAndReplaceLinkMainSwitch( &$elements, &$DATA, &$url, &$newAdditionalParameters, &$PARAMS, &$noDeepReplace, &$fileName, &$newDepth, &$ID, &$IDexists ) {
850        switch (array_pop($elements)) {
851            // CSS Extra Handling with extra rewrites
852            case 'css.php'    :    // $DATA[2] .=  ( !$this->functions->settings->addParams || empty($PARAMS) ? '' : '.' . $this->functions->cleanID(preg_replace("/(=|\?|&amp;)/", ".", $PARAMS))) . '.css';
853                $DATA[2] .= '.' . $this->functions->cleanID(preg_replace("/(=|\?|&amp;)/", ".", $PARAMS)) . '.css'; // allways put parameters behind
854                // No paramters needed since they are rewritten.
855                $DATA['PARAMS'] = "";
856                $noDeepReplace = false;
857                $fileName = $this->functions->getSiteName($ID, true);
858
859                // NewDepth has to be relative to the css file itself ...
860                $newDepth = './' . str_repeat('../', count(explode('/', $fileName))-1); // it is an ID at this point.
861                $newAdditionalParameters['do'] = 'siteexport';
862
863                $this->functions->debug->message("This is CSS file", array($DATA, $noDeepReplace, $fileName, $newDepth, $newAdditionalParameters), 2);
864
865                break;
866            case 'jquery.php'   :
867            case 'js.php'    :    // $DATA[2] .= ( !$this->functions->settings->addParams || empty($PARAMS) ? '' : '.' . $this->functions->cleanID(preg_replace("/(=|\?|&amp;)/", ".", $PARAMS))) . '.js';
868                $DATA[2] .= '.t.' . $this->functions->cleanID($_REQUEST['template']) . '.js'; // allways put parameters behind
869                // set Template
870                if (!empty($_REQUEST['template'])) {
871                    $url .= (strstr($url, '?') ? '&' : '?') . 'template=' . $_REQUEST['template'];
872                }
873                // No paramters needed since they are rewritten.
874                $DATA['PARAMS'] = "";
875                $newAdditionalParameters['do'] = 'siteexport';
876
877                $this->functions->debug->message("This is JS file", array($DATA, $url, $newAdditionalParameters), 2);
878
879                break;
880                // Detail Handling with extra Rewrites if Paramaters are available - otherwise this is just the fetch
881            case 'indexer.php' :
882                $this->functions->debug->message("Skipping indexer", null, 2);
883                return "";
884            case 'detail.php' :
885                $noDeepReplace = false;
886
887                $this->__getParamsAndDataRewritten($DATA, $PARAMS, 'media');
888                $ID = $this->functions->cleanID(str_replace('/', ':', $DATA[2]), null, strstr($DATA[2], 'media'));
889                $fileName = $this->functions->getSiteName($ID, true); // 2010-09-03 - rewrite with override enabled
890
891                $newDepth = str_repeat('../', count(explode('/', $fileName))-1);
892                $this->__rebuildDataForNormalFiles($DATA, $PARAMS);
893                $DATA[2] .= '.detail.html';
894
895                $this->functions->debug->message("This is detail.php file with addParams", array($DATA, $ID, $fileName, $newDepth, $newAdditionalParameters), 2);
896                break;
897            case 'doku.php' :
898
899                $noDeepReplace = false;
900                $this->__getParamsAndDataRewritten($DATA, $PARAMS, 'id');
901                $ID = $this->functions->cleanID($DATA[2], null, strstr($DATA[2], 'id'));
902
903                $this->functions->debug->message("Current ID to filename (doku.php): '" . wikiFN($ID) . "'", null, 2);
904
905                $fileName = $this->functions->getSiteName($ID); // 2010-09-03 - rewrite with override enabled
906
907                $newDepth = str_repeat('../', count(explode('/', $fileName))-1);
908                $this->__rebuildDataForNormalFiles($DATA, $PARAMS);
909                $DATA2Name = explode('/', $fileName);
910                $DATA[2] .= '.' . array_pop($DATA2Name);
911
912                $this->functions->debug->message("This is doku.php file with addParams", array($DATA, $ID, $fileName, $newDepth, $newAdditionalParameters), 2);
913                return $this->__rebuildLink($DATA);
914
915                // Fetch Handling for media - rewriting everything
916            case 'fetch.php':
917                $this->__getParamsAndDataRewritten($DATA, $PARAMS, 'media');
918
919                $DATA[2] = str_replace('/', ':', $DATA[2]);
920                $ID = $this->functions->cleanID($DATA[2], null, strstr($DATA[2], 'media'));
921                $ID = (new MediaResolver('root'))->resolveId($ID);
922                $IDexists = media_exists($ID);
923
924                $DATA[2] = $this->functions->wl($ID, null, null, null, $IDexists, true);
925                $this->__rebuildDataForNormalFiles($DATA, $PARAMS);
926
927                $DATA['PARAMS'] = "";
928                $newAdditionalParameters = array();
929
930                $this->functions->debug->message("This is fetch.php file", array($DATA, $ID, $PARAMS), 2);
931                break;
932
933                // default Handling for Pages
934            case 'feed.php':
935                return ""; // Ignore. Has no sense to export.
936            default:
937                if (preg_match("%" . preg_quote(DOKU_BASE, '%') . "_detail/%", $DATA[2])) {
938
939                    // GET ID Param from origdata2
940                    preg_match("#id=(.*?)(&|\")#i", $DATA[0], $backlinkID);
941                    $this->__rebuildDataForNormalFiles($DATA, $PARAMS);
942
943                    $fileIDPart = isset($backlinkID[1]) && !empty($backlinkID[1]) ? $this->functions->cleanID(urldecode($backlinkID[1])) : 'detail';
944
945                    $ID = preg_replace("#^_detail(/|:)#", "", $ID);
946                    $DATA[2] .= ':' . $fileIDPart . '.' . $this->functions->settings->fileType; // add namespace and subpage for back button and add filetype
947
948                    $noDeepReplace = false;
949                    $fileName = $this->functions->shortenName($DATA[2]);
950                    $newDepth = str_repeat('../', count(explode('/', $fileName))-1);
951                    $url .= (strstr($url, '?') ? '&' : '?') . 'id=' . $fileIDPart; // add id-part to URL for backlinks
952
953                    $DATA['PARAMS'] = "";
954
955                    $this->functions->debug->message("This is something with '_detail' file", array($DATA, $backlinkID, $newDepth, $url, $ID), 2);
956                } else if (preg_match("%" . preg_quote(DOKU_BASE, '%') . "_export/(.*?)/%", $DATA[2], $fileType)) {
957
958                    // Fixes multiple codeblocks in one file
959                    $this->__rebuildDataForNormalFiles($DATA, $PARAMS);
960
961                    // add the Params no matter what they are. This is export. We don't mess with other files
962                    // adding the "/" fixes the usage of multiple codeblocks in the same namespace
963                    $DATA[2] .= (empty($PARAMS) ? '' : '/' . $PARAMS) . '.' . $fileType[1];
964
965                    $DATA['PARAMS'] = "";
966                    $this->functions->debug->message("This is something with '_export' file", $DATA, 2);
967
968                } else if ($IDexists) { // 08/10/2010 - was page_exists($ID) - but this should do as well.
969                    // If this is a page ... skip it!
970                    $DATA[2] .= (!$this->functions->settings->addParams || empty($PARAMS) ? '' : '.' . $this->functions->cleanID(preg_replace("/(=|\?|&amp;)/", ".", $PARAMS))) . '.' . $this->functions->settings->fileType;
971
972                    $DATA[2] = $this->functions->shortenName($DATA[2]);
973
974                    // If Parameters are to be included in the filename - they must not be added twice
975                    if ($this->functions->settings->addParams) $DATA['PARAMS'] = "";
976
977                    $this->functions->debug->message("This page really exists", $DATA, 1);
978
979                    return $this->__rebuildLink($DATA, null, $ID);
980                } else {
981                    $this->__rebuildDataForNormalFiles($DATA, $PARAMS, true);
982                    $newAdditionalParameters = null; // 2014-06-27 - when using the "normal" files way we will not need any additional stuff.
983                    // This would make problems with e.g. ditaa plugin
984                }
985
986                unset($newAdditionalParameters['diPlu']);
987        }
988
989        return null;
990    }
991
992    private function __fetchAndReplaceLinkFinish( $DATA, $url, $noDeepReplace, $newAdditionalParameters, $ORIGDATA2, $newDepth, $IDexists, $fileName ) {
993        global $conf, $CURRENT_ID, $CURRENT_PARENT;
994
995        // Create Name to save the file at
996        $DATA[2] = str_replace(':', '_', $DATA[2]);
997        $DATA[2] = $this->functions->shortenName($DATA[2]);
998
999
1000        // File already loaded?
1001        // 2010-10-23 - changes in_array from DATA[2] to $url - to check real URLs, the DATA[2] file will be checked with fileExistsInZip
1002        if (in_array($url, array_keys($this->fileChecked))) {
1003            $DATA[2] = $this->fileChecked[$url];
1004            $this->functions->debug->message("File has been checked before.", array($DATA, $url), 2);
1005            return $this->__rebuildLink($DATA);
1006        }
1007
1008        // 2010-09-03 - second check if the file is in the ZIP already.
1009        if ($this->filewriter->fileExistsInZip($DATA[2])) {
1010            $this->functions->debug->message("File with DATA exists in ZIP.", $DATA, 3);
1011            return $this->__rebuildLink($DATA);
1012        }
1013
1014        // 2010-10-23 - What if this is a fetch.php? than we produced an error.
1015        //        $this->fileChecked[] = $DATA[2];
1016
1017        // get tempFile and save it
1018        $origDepth = $this->functions->settings->depth;
1019        $this->functions->settings->depth = $newDepth;
1020
1021        $tmpID = $CURRENT_ID;
1022        $tmpParent = $CURRENT_PARENT;
1023
1024        $CURRENT_PARENT = $fileName;
1025        $this->functions->debug->message("Going to get the file", array($url, $noDeepReplace, $newAdditionalParameters), 2);
1026        $tmpFile = $this->__getHTTPFile($url, $noDeepReplace, $newAdditionalParameters);
1027        $this->functions->debug->message("The getHTTPFile result is still empty", $tmpFile === false ? 'YES' : 'NO', 2);
1028
1029        $CURRENT_PARENT = $tmpParent;
1030        $CURRENT_ID = $tmpID;
1031        $this->functions->settings->depth = $origDepth; // 2010-09-03 - Reset depth at the very end
1032
1033        if ($tmpFile === false) {
1034            // Keep an potentially extra link intact
1035
1036            $this->functions->debug->message("The fetched file '$url' is 'false'", null, 3);
1037            if ($IDexists === false) {
1038                $this->functions->debug->message("The file does not exist, fallback to ORIGDATA", $ORIGDATA2, 2);
1039                $DATA[2] = $this->functions->shortenName($ORIGDATA2[2]); // get Origdata Path
1040            }
1041
1042            $this->fileChecked[$url] = $DATA[2]; // 2010-09-03 - One URL to one FileName
1043            $link = $this->__rebuildLink($DATA);
1044            $this->functions->debug->message("Final Link after empty file from '$url'", null, 2);
1045
1046            return $link;
1047        } elseif ($tmpFile === true) {
1048            // Non-200 ignored; keep original link intact.
1049            return is_array($ORIGDATA2) && isset($ORIGDATA2[0]) ? $ORIGDATA2[0] : "file_not_found_and_ignored.html";
1050        }
1051
1052        $this->functions->debug->message("The fetched file looks good.", $tmpFile, 2);
1053        $dirname = dirname($DATA[2]);
1054
1055        // If a Filename was given that does not comply to the original name, us this one!
1056        // 2014-02-28 But only if we are on PDF Mode. Does this produce any other Problems?
1057        if ( $this->filewriter->canDoPDF() && !empty($tmpFile[1]) && !strstr($DATA[2], $tmpFile[1]) ) {
1058            $DATA[2] = $dirname . '/' . $tmpFile[1];
1059            $this->functions->debug->message("Changed filename.", $DATA[2], 2);
1060        }
1061
1062        // Custom extension if not set already - 2014-07-02
1063        if ( !empty($tmpFile[2]) && !preg_match("#\.{$tmpFile[2]}$#", $DATA[2]) ) {
1064            $DATA[2] = preg_match("#(\.[^\.]+)$#", $DATA[2]) ? preg_replace("#(\.[^\.]+)$#", '.' . $tmpFile[2], $DATA[2]) : $DATA[2] . '.' . $tmpFile[2];
1065            $this->functions->debug->message("Added extension provided from Server.", $DATA[2], 2);
1066        }
1067
1068        // Add to zip
1069        $this->fileChecked[$url] = $DATA[2]; // 2010-09-03 - One URL to one FileName
1070
1071        $this->filewriter->__addFileToZip($tmpFile[0], $DATA[2]);
1072        if (@unlink($tmpFile[0]) === false) {
1073            $this->functions->debug->message("Could not delete temporary file.", $tmpFile[0], 2);
1074        }
1075
1076        $newURL = $this->__rebuildLink($DATA);
1077        $this->functions->debug->message("Returning final Link to document: '$newURL'", null, 2);
1078
1079        return $newURL;
1080    }
1081
1082    private function __fetchAndReplaceWellKnownLinks( $DATA ) {
1083        // $DATA[2] = urldecode($DATA[2]); // Leads to problems because it does not re-encode the url
1084        // External and mailto links
1085        if (preg_match("%^(https?://|mailto:|javascript:|data:)%", $DATA[2])) {
1086            $this->functions->debug->message("Don't like http, mailto, data or javascript links here", null, 1);
1087            return true;
1088        }
1089        //if ( preg_match("%^(https?://|mailto:|" . DOKU_BASE . "/_export/)%", $DATA[2]) ) { return $this->__rebuildLink($DATA, ""); }
1090        // External media - this is deep down in the link, so we have to grep it out
1091        if (preg_match("%media=(https?://.*?$)%", $DATA[2], $matches)) {
1092            $DATA[2] = $matches[1];
1093            $this->functions->debug->message("This is an HTTP like somewhere else", $DATA, 1);
1094            return true;
1095        }
1096        // reference only links won't have to be rewritten
1097        if (preg_match("%^#.*?$%", $DATA[2])) {
1098            $this->functions->debug->message("This is a refercence only", null, 1);
1099            return true;
1100        }
1101
1102        return false;
1103    }
1104
1105    // Handle rewrites other than 1 - just for non-lib-files
1106    private function __fetchAndReplaceLinkHandleRewrite( &$DATA, &$PARAMS ) {
1107        global $conf;
1108        if ( !preg_match('$^(' . DOKU_BASE . ')?lib/$', $DATA[2]) ) {
1109            $this->functions->debug->message("Did not match '$^(" . DOKU_BASE . ")?lib/$' userewrite == {$conf['userewrite']}", null, 2);
1110            if ( $conf['userewrite'] == 2 ) {
1111                $DATA[2] = $this->__getInternalRewriteURL($DATA[2]);
1112            } elseif ( $conf['userewrite'] == 0 ) {
1113                $this->__getParamsAndDataRewritten($DATA, $PARAMS);
1114            }
1115        } else {
1116            $this->functions->debug->message("This file must be inside lib ...", null, 2);
1117        }
1118    }
1119
1120    /**
1121     * build the new link to be put in place for the donwloaded site
1122     **/
1123    private function __rebuildLink($DATA, $DEPTH = null, $existingPageID = null) {
1124        global $CURRENT_ID, $CURRENT_PARENT;
1125
1126        // depth is set, skip this one
1127        if (is_null($DEPTH)) $DEPTH = $this->functions->settings->depth;
1128        $DATA[2] .= (!empty($DATA['PARAMS']) && $this->functions->settings->addParams ? '?' . $DATA['PARAMS'] : '') . (!empty($DATA['ANCHOR']) ? '#' . $DATA['ANCHOR'] : '');
1129
1130        $intermediateURL = $DEPTH . $DATA[2];
1131
1132        // 2012-06-15 originally has an absolute path ... we might need a relative one if not in our namespace
1133        if (empty($_REQUEST['absolutePath']) && preg_match("#^(\.\./)+#", $intermediateURL)) {
1134
1135            $this->functions->debug->message("OK, this is not to be absolute: ", array($intermediateURL, $CURRENT_PARENT), 1);
1136            $intermediateURL = $this->functions->getRelativeURL($intermediateURL, $CURRENT_PARENT, $existingPageID);
1137        }
1138
1139        $newURL = $DATA[1] == 'url' ? $DATA[1] . '(' . $intermediateURL . ')' : $DATA[1] . '="' . $intermediateURL . '"';
1140        $this->functions->debug->message("Re-created URL: '$newURL'", $DEPTH, 2);
1141
1142        return $newURL;
1143    }
1144
1145
1146    /**
1147     * remove an old zip file
1148     **/
1149    private function __removeOldZip($FILENAMEID = null, $checkForMore = true, $reauthenticated = false) {
1150        global $INFO;
1151        global $conf;
1152
1153        $returnValue = true;
1154
1155        if (empty($FILENAMEID)) {
1156            $FILENAMEID = $this->functions->settings->origZipFile;
1157        }
1158
1159        if (!file_exists(mediaFN($FILENAMEID))) {
1160            $returnValue = true;
1161        } else {
1162
1163            require_once(DOKU_INC . 'inc/media.php');
1164            if (!media_delete($FILENAMEID, $INFO['perm'])) {
1165
1166                if (!$reauthenticated) {
1167                    $this->functions->authenticate();
1168                    return $this->__removeOldZip($FILENAMEID, $checkForMore, true);
1169                }
1170
1171                $returnValue = false;
1172            }
1173        }
1174
1175        if ($checkForMore) {
1176            // Try to remove more files.
1177            $ns = getNS($FILENAMEID);
1178            $fn = $this->functions->getSpecialExportFileName(noNS($FILENAMEID), '.+');
1179
1180            $data = array();
1181            search($data, $conf['mediadir'], 'search_media', array('pattern' => "/$fn$/i"), $ns);
1182
1183            if (count($data) > 0) {
1184
1185                // 30 Minuten Cache Zeit
1186                $cache = $this->functions->settings->cachetime;
1187                foreach ($data as $media) {
1188
1189                    //decide if has to be deleted needed:
1190                    if ($media['mtime'] < time()-$cache) {
1191                        $this->__removeOldZip($media['id'], false, $reauthenticated);
1192                    }
1193                }
1194            }
1195
1196        }
1197
1198        return $returnValue;
1199    }
1200
1201    /**
1202     * if confrewrite is set to internal rewrite, use this function - taken from a DW renderer
1203     **/
1204    private function __getInternalRewriteURL($url) {
1205        global $conf;
1206
1207        //construct page id from request URI
1208        if ($conf['userewrite'] != 2) { return $url; }
1209
1210        //get the script URL
1211        if ($conf['basedir']) {
1212            $relpath = '';
1213            $script = $conf['basedir'] . $relpath . basename($_SERVER['SCRIPT_FILENAME']);
1214        } elseif ($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']) {
1215            $script = preg_replace('/^' . preg_quote($_SERVER['DOCUMENT_ROOT'], '/') . '/', '',
1216            $_SERVER['SCRIPT_FILENAME']);
1217            $script = '/' . $script;
1218        } else {
1219            $script = $_SERVER['SCRIPT_NAME'];
1220        }
1221
1222        //clean script and request (fixes a windows problem)
1223        $script  = preg_replace('/\/\/+/', '/', $script);
1224        $request = preg_replace('/\/\/+/', '/', $url);
1225
1226        //remove script URL and Querystring to gain the id
1227        $id = $request;
1228        if (preg_match('/^' . preg_quote($script, '/') . '(.*)/', $request, $match)) {
1229            $id = preg_replace('/\?.*/', '', $match[1]);
1230        }
1231        $id = urldecode($id);
1232        //strip leading slashes
1233        $id = preg_replace('!^/+!', '', $id);
1234
1235        return $id;
1236    }
1237
1238    /**
1239     * rewrite parameter calls
1240     **/
1241    private function __getParamsAndDataRewritten(&$DATA, &$PARAMS, $IDKEY = 'id') {
1242
1243        if (empty($PARAMS))
1244            return array();
1245
1246        $PARRAY = explode('&', str_replace('&amp;', '&', $PARAMS));
1247        $PARAMS = array();
1248
1249        foreach ($PARRAY as $item) {
1250            list($key, $value) = array_pad( explode('=', $item, 2), 2, null );
1251            if (empty($key) || empty($value))
1252                continue;
1253
1254            if (strtolower(trim($key)) == $IDKEY) {
1255                $DATA[2] = preg_replace("%^" . preg_quote(DOKU_BASE, '%') . "%", "", str_replace(':', '/', $value));
1256                continue;
1257            }
1258
1259            $PARAMS[] = "$key=$value";
1260        }
1261
1262        sort($PARAMS);
1263
1264        $PARAMS = implode('&', $PARAMS);
1265    }
1266
1267    /**
1268     * rewrite detail.php calls
1269     **/
1270    private function __rebuildDataForNormalFiles(&$DATA, &$PARAMS, $addHash = false) {
1271        $PARTS = explode('.', $DATA[2]);
1272        $EXT = '';
1273        if (count($PARTS) > 1) {
1274            $EXT = '.' . array_pop($PARTS);
1275        }
1276
1277        $internalParams = $PARAMS = preg_replace("/(=|\?|&amp;)/", ".", $PARAMS);
1278
1279        // Do not map *.php → export fileType for real PHP endpoints (doku.php, lib/exe/*, …).
1280        // Otherwise PDF export turns form actions into e.g. /doku.pdf and breaks crawled HTML.
1281        $trimPath = ltrim($DATA[2], '/');
1282        $skipPhpRemap = (strpos($trimPath, 'lib/exe/') === 0)
1283            || (bool) preg_match('#(^|/)(doku|index)\.php$#', $trimPath);
1284
1285        // add anyways - if on overridde
1286        if (!$this->functions->settings->addParams && !empty($PARAMS) && $addHash) {
1287            $internalParams = md5($PARAMS);
1288        } else if (!$this->functions->settings->addParams) {
1289            $internalParams = null;
1290        }
1291
1292        $tailExt = ($EXT == '.php' && !$skipPhpRemap) ? '.' . $this->functions->settings->fileType : $EXT;
1293        $DATA[2] = implode('.', $PARTS) . (empty($internalParams) ? '' : '.' . $this->functions->cleanID($internalParams)) . $tailExt;
1294        $DATA[2] = preg_replace("/\.+/", ".", $DATA[2]);
1295        $this->functions->debug->message("Rebuilding Data for normal file.", $DATA[2], 1);
1296    }
1297
1298    /*
1299     * Clean JS and CSS cache files
1300     */
1301    private function cleanCacheFiles() {
1302
1303        $_SERVER['HTTP_HOST'] = preg_replace("/:?\d+$/", '', $_SERVER['HTTP_HOST']);
1304        $cache = getCacheName('scripts' . $_SERVER['HTTP_HOST'] . '-siteexport-js-' . $_SERVER['SERVER_PORT'], '.js');
1305        $this->unlinkIfExists($cache);
1306
1307        $tpl = trim(preg_replace('/[^\w-]+/', '', $_REQUEST['template']));
1308        if ($tpl)
1309        {
1310            $tplinc = DOKU_INC . 'lib/tpl/' . $tpl . '/';
1311        } else {
1312            $tplinc = DOKU_TPLINC;
1313        }
1314
1315        // The generated script depends on some dynamic options
1316        $cache = getCacheName('styles' . $_SERVER['HTTP_HOST'] . '-siteexport-js-' . $_SERVER['SERVER_PORT'] . DOKU_BASE . $tplinc , '.css');
1317        $this->unlinkIfExists($cache);
1318    }
1319
1320    /**
1321     * Clear Cache
1322     */
1323    private function unlinkIfExists($cache) {
1324        if (file_exists($cache) && @unlink($cache) === false) {
1325            $this->functions->debug->message('Could not remove file', $cache, 1 );
1326        }
1327
1328        if (function_exists('gzopen') && @unlink("{$cache}.gz") === false ) {
1329            $this->functions->debug->message('Could not remove file', $cache . '.gz', 1 );
1330        }
1331    }
1332}
1333