xref: /plugin/siteexport/action/ajax.php (revision 7d101cc131696cb3a0de345d8044a69fb2ef70e9)
1<?php
2/**
3 * Site Export Plugin
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     i-net software <tools@inetsoftware.de>
7 * @author     Gerry Weissbach <gweissbach@inetsoftware.de>
8 */
9
10// must be run within Dokuwiki
11if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../../../').'/');
12if(!defined('DOKU_PLUGIN')) {
13    // Just for sanity
14    require_once(DOKU_INC.'inc/plugin.php');
15    define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/');
16}
17
18require_once(DOKU_PLUGIN.'action.php');
19require_once(DOKU_INC.'/inc/search.php');
20
21require_once(DOKU_PLUGIN.'siteexport/inc/functions.php');
22require_once(DOKU_PLUGIN.'siteexport/inc/httpproxy.php');
23require_once(DOKU_PLUGIN.'siteexport/inc/filewriter.php');
24require_once(DOKU_PLUGIN.'siteexport/inc/toc.php');
25require_once(DOKU_PLUGIN.'siteexport/inc/javahelp.php');
26
27class action_plugin_siteexport_ajax extends DokuWiki_Action_Plugin
28{
29    /**
30     * New internal variables for better structure
31     */
32    private $filewriter = null;
33    public $functions = null;
34
35    // List of files that have already been checked
36    private $fileChecked = array();
37
38    // Namespace of the page to export
39    private $namespace = '';
40
41    /**
42     * for backward compatability
43     * @see inc/DokuWiki_Plugin#getInfo()
44     */
45    function getInfo(){
46        if ( method_exists(parent, 'getInfo')) {
47            $info = parent::getInfo();
48        }
49        return is_array($info) ? $info : confToHash(dirname(__FILE__).'/../plugin.info.txt');
50    }
51
52    /**
53     * Register Plugin in DW
54     **/
55    function register(&$controller) {
56        $controller->register_hook('AJAX_CALL_UNKNOWN', 'BEFORE', $this, 'ajax_siteexport_provider');
57        $controller->register_hook('ACTION_ACT_PREPROCESS', 'BEFORE', $this, 'siteexport_action');
58    }
59
60    /**
61     * AJAX Provider - check what is going to be done
62     * @param $event
63     * @param $args
64     */
65    function ajax_siteexport_provider(&$event, $args) {
66
67        // If this is not a siteexport call, ignore it.
68        if ( !strstr($event->data, '__siteexport' ) )
69        {
70            return;
71        }
72
73        $this->__init_functions();
74
75        switch( $event->data ) {
76            case '__siteexport_getsitelist': $this->ajax_siteexport_getsitelist( $event ); break;
77            case '__siteexport_addsite': $this->ajax_siteexport_addsite( $event ); break;
78            case '__siteexport_generateurl': $this->ajax_siteexport_generateurl( $event ); break;
79        }
80    }
81
82    /**
83     * Export from a URL - action
84     * @param $event
85     */
86    function siteexport_action( &$event ) {
87        global $ID;
88
89        // Check if the 'do' was siteexport
90        if ( $event->data != 'siteexport' ) { return false; }
91        if ( headers_sent() ) {
92            msg("The siteexport function has to be called prior to any header output.", -1);
93        }
94
95        $this->__init_functions();
96
97        $this->functions->debug->message("========================================", null, 1);
98        $this->functions->debug->message("Starting export from URL call", null, 1);
99
100        $event->preventDefault();
101        $event->stopPropagation();
102
103        // Fake security Token if none given
104        if ( empty( $_REQUEST['sectok'] ) ) {
105            $_REQUEST['sectok'] = getSecurityToken();
106        }
107
108        // The timer will be used to do redirects if needed to prevent timeouts
109        $starttimer = time();
110        $timerdiff = $this->getConf('max_execution_time');
111
112        $data = $this->__get_siteexport_list_and_init_tocs($ID, !empty($_REQUEST['startcounter']));
113
114        if ( $data === false ) {
115            header("HTTP/1.0 401 Unauthorized");
116            print 'Unauthorized';
117            exit;
118        }
119
120        $counter = 0;
121
122        if ( count($data) == 0 && !$this->functions->settings->hasValidCacheFile ) {
123            exit();
124        }
125
126        foreach ( $data as $site ) {
127
128			if ( intval($site['exists']) == 1 || !isset($site['exists']) ) {
129
130	            // Skip over the amount of urls that have been exported already
131	            if ( empty($_REQUEST['startcounter']) || $counter >= intval($_REQUEST['startcounter']) ) {
132	                $status = $this->__siteexport_add_site($site['id']);
133	            }
134			}
135
136            $counter ++;
137            if ( time() - $starttimer >= $timerdiff ) {
138                $this->functions->debug->message("Will Redirect", null, 1);
139                $this->handleRuntimeErrorOutput();
140                $this->functions->startRedirctProcess($counter);
141            }
142        }
143
144        $this->functions->debug->message("Finishing export from URL call", null, 1);
145        $this->functions->debug->message("========================================", null, 1);
146
147        $this->cleanCacheFiles();
148
149        $URL = ml($this->functions->settings->origZipFile, array('cache' => 'nocache', 'siteexport' => $this->functions->settings->pattern, 'sectok' => getSecurityToken()), true, '&');
150        $this->functions->debug->message("Redirecting to final file", $URL, 2);
151
152        $this->handleRuntimeErrorOutput();
153        send_redirect($URL);
154        exit(0); // Should not be reached, but anyways
155    }
156
157    private function handleRuntimeErrorOutput()
158    {
159        if ( !empty($this->functions->debug->runtimeErrors) )
160        {
161            $this->filewriter->__moveDataToZip($this->functions->debug->runtimeErrors, '_runtime_error/' . time() . '.html');
162        }
163    }
164
165    public function __init_functions()
166    {
167        $this->functions = new siteexport_functions();
168        $this->functions->debug->isAJAX = true;
169        $this->filewriter = new siteexport_zipfilewriter($this->functions);
170
171        // Check for PDF Capabilities
172        if ( $this->filewriter->canDoPDF() ) {
173            $this->functions->settings->fileType = 'pdf';
174        }
175    }
176
177    /**
178     * Prepares the generated URL for direct download access
179     * Also gives back the parameters for this URL
180     * @param $event init event of the ajax request
181     */
182    function ajax_siteexport_prepareURL_and_POSTData( &$event ) {
183
184        $event->preventDefault();
185        $event->stopPropagation();
186
187        // Retrieve Information for download URL
188        $url = $this->functions->prepare_POSTData($_REQUEST);
189        $combined = $this->functions->urlToPathAndParams($url);
190        list($path, $query) = explode('?', $combined, 2);
191        $return = array($url, $combined, $path, $query);
192
193        $this->functions->debug->message("Prepared URL and POST data:", $return, 2);
194        return $return;
195    }
196
197    /**
198     * Executes a Cron Job Action
199     * @param $event
200     */
201    function ajax_siteexport_cronaction( &$event )
202    {
203        $cronOverwriteExisting = intval($_REQUEST['cronOverwriteExisting']) == 1;
204        list($url, $combined) = $this->ajax_siteexport_prepareURL_and_POSTData($event);
205
206        if ( !$function =& plugin_load('cron', 'siteexport' ) )
207        {
208            $this->functions->debug->message("Tried to do an action with siteexport/cron, but the cron plugin is missing.", null, 4);
209        }
210
211        $status = null;
212        switch( $event->data ) {
213            case '__siteexport_savecron': $status = $function->saveCronDataWithParameters($combined, $cronOverwriteExisting); break;
214            case '__siteexport_deletecron': $status = $function->deleteCronDataWithParameters($combined); break;
215        }
216
217        if ( !empty($status) )
218        {
219            $this->functions->debug->message("Tried to do an action with siteexport/cron, but failed.", $status, 4);
220        }
221    }
222
223    /**
224     * generate direct access URL
225     **/
226    function ajax_siteexport_generateurl( &$event ) {
227
228        list($url, $combined, $path, $POSTData) = $this->ajax_siteexport_prepareURL_and_POSTData($event);
229
230        // WGET Redirects - this is an option for wget only.
231        // Calculate the maximum redirects that we want to allow. A Problem is that we don't know how long it will take to fetch one page
232        // Therefore we assume it takes about 5s for each page - that gives the freedom to have anough time for redirect.
233        $maxRedirectNumber = ceil( ( count($this->__get_siteexport_list($NS, true)) * 5) / $this->getConf('max_execution_time') );
234        $maxRedirect = $maxRedirectNumber > 0 ? '--max-redirect=' . ($maxRedirectNumber+3) . ' ' : '';
235        $maxRedirs = $maxRedirectNumber > 0 ? '--max-redirs ' . ($maxRedirectNumber+3) . ' ' : '';
236
237        $this->functions->debug->message("Generating Direct Download URL", $url, 2);
238
239        // If there was a Runtime Exception
240        if ( !$this->functions->debug->firstRE() ) {
241            $this->functions->debug->message("There have been errors while generating the download URLs.", null, 4);
242            return;
243        }
244
245        echo $url;
246        echo "\n";
247        echo 'wget ' . $maxRedirect . '--output-document=' . array_pop(explode(":", ($this->getConf('zipfilename')))) . ' --post-data="' . $POSTData . '" ' . wl(cleanID($path), null, true) . ' --http-user=USER --http-passwd=PASSWD';
248        echo "\n";
249        echo 'curl -L ' . $maxRedirs . '-o ' . array_pop(explode(":", ($this->getConf('zipfilename')))) . ' -d "' . $POSTData . '" ' . wl(cleanID($path), null, true) . ' --anyauth --user USER:PASSWD';
250        echo "\n";
251
252        $this->functions->debug->message("Checking for Cron parameters: ", $combined, 1);
253        if ( !$functions =& plugin_load('cron', 'siteexport' ) ||
254        !$functions->hasCronJobForParameters($combined) ) {
255            echo "false";
256        } else
257        {
258            echo "true";
259        }
260
261        return;
262    }
263
264    /**
265     * Get List of sites to be exported for AJAX (wrapper)
266     **/
267    function ajax_siteexport_getsitelist( &$event ) {
268
269        $event->preventDefault();
270        $event->stopPropagation();
271
272        $data = $this->__get_siteexport_list_and_init_tocs($_REQUEST['ns']);
273
274        // Important for reconaisance of the session
275
276        if ( $data === false )
277        {
278            $this->functions->debug->runtimeException("No data generated. List of Files is 'false'.");
279            return;
280        }
281
282        if ( empty($data) && !$this->functions->settings->hasValidCacheFile )
283        {
284            $this->functions->debug->runtimeException("Generated list is empty.");
285            return;
286        }
287
288        // If there was a Runtime Exception
289        if ( !$this->functions->debug->firstRE() )
290        {
291            $this->functions->debug->message("There have been errors while generating site list.", null, 4);
292            return;
293        }
294
295        echo "{$this->functions->settings->pattern}\n";
296        echo $this->functions->downloadURL() . "\n";
297        foreach($data as $line ){
298            echo $line['id'] . "\n";
299        }
300
301        return;
302    }
303
304    /**
305     * Add a page to the package (for AJAX calls - Wrapper)
306     **/
307    function ajax_siteexport_addsite( &$event ) {
308
309        $event->preventDefault();
310        $event->stopPropagation();
311
312        $this->functions->debug->message("========================================", null, 1);
313        $this->functions->debug->message("Starting export from AJAX call", null, 1);
314
315        $status = $this->__siteexport_add_site($_REQUEST['site']);
316        if ( $status === false ) { return; }
317
318        $this->functions->debug->message("Finishing export from AJAX call", null, 1);
319        $this->functions->debug->message("========================================", null, 1);
320
321        // Print the download zip-File
322        $this->cleanCacheFiles();
323
324        // If there was a Runtime Exception
325        if ( !$this->functions->debug->firstRE() ) {
326            $this->functions->debug->message("There have been errors during the export.", null, 4);
327            return;
328        }
329
330        print $this->functions->downloadURL();
331        return;
332    }
333
334    /**
335     * Fetch the list of pages to be exported
336     **/
337    function __get_siteexport_list($NS, $overrideCache=false) {
338        global $conf;
339
340        $NS = $this->namespace = $this->functions->getNamespaceFromID($NS, $PAGE);
341
342        $depth = $this->getConf('depth');
343        $query = '';
344        $doSearch = 'search_allpages';
345
346        switch( intval($_REQUEST['depthType']) ) {
347            case 0:
348                $query = $this->functions->cleanID(str_replace(":", "/", $NS.':'.$PAGE));
349                resolve_pageid($NS, $PAGE, $exists);
350
351                if ( $exists ) {
352                    $data = array( array( 'id' => $PAGE) );
353
354                    $this->functions->debug->message("Checking for Cache", null, 2);
355                    if ( !$overrideCache && $this->filewriter->hasValidCacheFile($_REQUEST, $data) )
356                    {
357                        return array();
358                    }
359
360                    return $data;
361                }
362            case 1:	$depth = 0;
363            break;
364            case 2:	$depth = intval($_REQUEST['depth']);
365            break;
366        }
367
368        $opts = array( 'depth' => $depth, 'skipacl' => $this->getConf('skipacl'), 'query' => $query);
369        $data = array();
370        require_once (DOKU_INC.'inc/search.php');
371
372        // Check, which TOC to take
373        if ( !$this->functions->settings->useTOCFile ) {
374            search($data, $conf['datadir'], $doSearch, $opts, $this->namespace);
375        } else {
376            $this->functions->debug->message("Using TOC for data", null, 2);
377
378            $doSearch = 'search_pagename';
379
380            // Create Data of the TOC File should be used instead
381            $opts['query'] = 'toc.txt';
382
383            $RAWdata = array();
384            search($RAWdata, $conf['datadir'], $doSearch, $opts, $this->namespace);
385
386            // There may be more than one toc and all of them have to be merged.
387            $data = array();
388            foreach( $RAWdata as $entry )
389            {
390                $tmpData = p_get_metadata($entry['id'], 'sitetoc siteexportTOC', true);
391
392                if ( is_array($tmpData) )
393                {
394                    $data = array_merge($data, $tmpData);
395                }
396            }
397        }
398
399        $this->functions->debug->message("Checking for Cache", null, 2);
400        if ( !$overrideCache && $this->filewriter->hasValidCacheFile($_REQUEST, $data) )
401        {
402            return array();
403        }
404
405        $this->functions->debug->message("Exporting the following sites: ", $data, 2);
406        return $data;
407    }
408
409    function __get_siteexport_list_and_init_tocs($NS, $isRedirected=false ) {
410
411        // Clean up if not redirected
412        if ( !$isRedirected && !$this->__removeOldZip() ) {
413            $this->functions->debug->runtimeException("Can't remove old files.");
414            return false;
415        }
416
417        $data = $this->__get_siteexport_list($NS, $isRedirected);
418        if ( $isRedirected || empty($data) )
419        {
420            // if we have been redirected, simply return the data
421            return $data;
422        }
423
424        // Create Eclipse Documentation Pages - TOC.xml, Context.xml
425        if ( !empty($_REQUEST['absolutePath']) ) $this->namespace = "";
426//        $this->__removeOldZip( $this->functions->settings->eclipseZipFile );
427
428        if ( !empty($_REQUEST['eclipseDocZip']) )
429        {
430            $toc = new siteexport_toc($this->functions);
431            $this->functions->debug->message("Generating eclipseDocZip", null, 2);
432            $this->filewriter->__moveDataToZip($toc->__getTOCXML($data), 'toc.xml');
433            $this->filewriter->__moveDataToZip($toc->__getContextXML($data), 'context.xml');
434        } else  if ( !empty($_REQUEST['JavaHelpDocZip']) )
435        {
436            $toc = new siteexport_javahelp($this->functions, $this->filewriter);
437            $toc->createTOCFiles($data);
438
439/*            $toc = new siteexport_toc($this->functions);
440            list($tocData, $mapData) = $toc->__getJavaHelpTOCXML($data);
441            $this->functions->debug->message("Generating JavaHelpDocZip", null, 2);
442            $this->filewriter->__moveDataToZip($tocData, 'toc.xml');
443            $this->filewriter->__moveDataToZip($mapData, 'map.xml');
444*/        }
445
446        return $data;
447    }
448
449    /**
450     * Add page with ID to the package
451     **/
452    function __siteexport_add_site( $ID ) {
453        global $conf, $currentID;
454
455        // Which is the current ID?
456        $currentID = $ID;
457
458        $this->functions->debug->message("========================================", null, 2);
459        $this->functions->debug->message("Adding Site: '$ID'", null, 2);
460
461        $request = $this->functions->settings->additionalParameters;
462        unset($request['diPlu']); // This will not be needed for the first request.
463        unset($request['diInv']); // This will not be needed for the first request.
464
465        // say, what to export and Build URL
466        // http://documentation:81/helpdesk/de/hds/getting-started?depthType=0&do=siteexport&ens=helpdesk%3Ade%3Ahds%3Agetting-started&pdfExport=1&renderer=siteexport_siteexportpdf&template=helpdesk
467
468        $do = (intval($_REQUEST['exportbody']) == 1 ? (empty($_REQUEST['renderer']) ? $conf['renderer_xhtml'] : $_REQUEST['renderer'] ) : '' );
469
470        if ($do == 'pdf' && $this->filewriter->canDoPDF() )
471        {
472            $do = 'export_siteexport_pdf';
473            $_REQUEST['origRenderer'] = (empty($_REQUEST['renderer']) ? $conf['renderer_xhtml'] : $_REQUEST['renderer'] );
474        }
475
476        $do = ($do == $conf['renderer_xhtml'] && intval($_REQUEST['exportbody']) != 1) ? '' : 'export_' . $do;
477
478        if ( $do != 'export_' && !empty($do) )
479        {
480            $request['do'] = $do;
481        }
482
483        // set Template
484        if ( !empty( $_REQUEST['template'] ) ) {
485            $request['template'] = $_REQUEST['template'];
486        }
487
488        $this->functions->debug->message("REQUEST for add_site:", $request, 2);
489
490        $ID = $this->functions->cleanID($ID);
491        $url = $this->functions->wl($ID, $request, true, '&');
492
493        // Parse URI PATH and add "html"
494        $fileName = $this->functions->getSiteName($ID, true);
495
496        $this->fileChecked[$url] = $fileName; // 2010-09-03 - One URL to one FileName
497        $this->functions->settings->depth = str_repeat('../', count(explode('/', $fileName))-1);
498
499        // fetch URL and save it in temp file
500        $tmpFile = $this->__getHTTPFile($url);
501        if ( $tmpFile === false ) {
502        	return $this->functions->debug->message("Creating temporary download file failed for '$url'. See log for more information.");
503        	return $this->functions->debug->runtimeException("Creating temporary download file failed for '$url'. See log for more information.");
504        }
505
506        // If a Filename was given that does not comply to the original name, use this one!
507        if ( !empty($tmpFile[1]) && !strstr($fileName, $tmpFile[1]) ) {
508
509            $dParts = explode('/', $fileName);
510            array_pop($dParts);
511            $dParts[] = $tmpFile[1];
512
513            $fileName = implode('/', $dParts);
514            $this->fileChecked[$url] = $fileName;
515        }
516
517        // Add to zip
518        $status = $this->filewriter->__addFileToZip($tmpFile[0], $fileName);
519        @unlink($tmpFile[0]);
520
521        return $status;
522    }
523
524    /**
525     * Download the file via HTTP URL + recurse if this is not an image
526     * The file will be saved as temporary file. The filename is the result.
527     **/
528    function __getHTTPFile($URL, $RECURSE=false, $newAdditionalParameters=null) {
529        global $conf;
530
531        $EXCLUDE = str_replace('/', '\/', ($this->getConf('exclude')));
532
533        if ( !empty($EXCLUDE) && preg_match("/(".preg_quote($EXCLUDE,"/").")/i", $URL) ) { return false; }
534
535        require_once( DOKU_INC . 'inc/HTTPClient.php');
536
537        $http = new HTTPProxy($this->functions->debug);
538        $http->max_bodysize = $conf['fetchsize'];
539        // $http->user = $_SERVER['PHP_AUTH_USER']; // Must not be set, or the files will be authenticated and have the edit thingies
540        // $http->pass = $_SERVER['PHP_AUTH_PW']; // Must not be set, or the files will be authenticated and have the edit thingies
541
542        // Add additional Params
543        $this->functions->addAdditionalParametersToURL($URL, $newAdditionalParameters);
544
545        $this->functions->debug->message("Fetching URL: '$URL'", null, 2);
546        $getData = $http->get($URL);
547
548        if( $getData === false ) {
549            $this->functions->debug->message("Sending request failed with error, HTTP status was '{$http->status}'.", $URL, 4);
550            return false;
551        }
552
553        if( empty($getData) ) {
554            $this->functions->debug->message("No data fetched.", null , 4);
555            return false;
556        }
557
558        $tmpFile = tempnam($this->functions->settings->tmpDir , 'siteexport__');
559        $this->functions->debug->message("Temporary filename", $tmpFile, 1);
560
561        $fp = fopen( $tmpFile, "w");
562        if(!$fp) {
563            $this->functions->debug->message("Can't open temporary File '$tmpFile'.", null , 4);
564            return false;
565        }
566
567        if ( !$RECURSE ) {
568            // Parse URI PATH and add "html"
569            $this->functions->debug->message("========================================", null, 1);
570            $this->functions->debug->message("Starting to recurse file '$URL'", null , 1);
571            $this->functions->debug->message("========================================", null, 1);
572            $this->__getInternalLinks($getData);
573            $this->functions->debug->message("========================================", null, 1);
574            $this->functions->debug->message("Finished to recurse file '$URL'", null , 1);
575            $this->functions->debug->message("========================================", null, 1);
576        }
577
578        fwrite($fp,$getData);
579        fclose($fp);
580
581        return array($tmpFile, preg_replace("/.*?filename=\"?(.*?)\"?;?$/", "$1", $http->resp_headers['content-disposition']));
582    }
583
584    /**
585     * Find internal links in the currently downloaded file. This also matches inside CSS files
586     **/
587    function __getInternalLinks(&$DATA) {
588
589        $PATTERN = '(href|src|action)="([^"]*)"';
590        $CALLBACK = array($this, '__fetchAndReplaceLink');
591        $DATA = preg_replace_callback("/$PATTERN/i", $CALLBACK, $DATA);
592
593        $PATTERNCSS = '(url\s*?)\([^\)]*\)';
594        $DATA = preg_replace_callback("/$PATTERNCSS/i", $CALLBACK, $DATA);
595    }
596
597    /**
598     * Deep Fetch and replace of links inside the texts matched by __getInternalLinks
599     **/
600    function __fetchAndReplaceLink($DATA) {
601        global $conf, $currentID;
602
603        $noDeepReplace = true;
604        $newAdditionalParameters = $this->functions->settings->additionalParameters;
605        $newDepth = $this->functions->settings->depth;
606        $hadBase = false;
607
608        $this->functions->debug->message("Starting Link Replacement", $DATA, 2);
609
610        // $DATA[2] = urldecode($DATA[2]); // Leads to problems because it does not re-encode the url
611        // External and mailto links
612        if ( preg_match("%^(https?://|mailto:|javascript:|data:)%", $DATA[2]) ) {
613            $this->functions->debug->message("Don't like http, mailto, data or javascript links here", null, 1);
614            return $this->__rebuildLink($DATA, "");
615        }
616        //if ( preg_match("%^(https?://|mailto:|" . DOKU_BASE . "/_export/)%", $DATA[2]) ) { return $this->__rebuildLink($DATA, ""); }
617        // External media - this is deep down in the link, so we have to grep it out
618        if ( preg_match("%media=(https?://.*?$)%", $DATA[2], $matches) ) {
619            $DATA[2] = $matches[1];
620            $this->functions->debug->message("This is an HTTP like somewhere else", $DATA, 1);
621            return $this->__rebuildLink($DATA, "");
622        }
623        // reference only links won't have to be rewritten
624        if ( preg_match("%^#.*?$%", $DATA[2]) ) {
625            $this->functions->debug->message("This is a refercence only", null, 1);
626            return $this->__rebuildLink($DATA, "");
627        }
628
629        // strip all things out
630        // changed Data
631        $PARAMS = @parse_url($DATA[2], PHP_URL_QUERY);
632        $ANCHOR = @parse_url($DATA[2], PHP_URL_FRAGMENT);
633        $DATA[2] = @parse_url($DATA[2], PHP_URL_PATH);
634
635        // 2010-08-25 - fix problem with relative movement in links ( "test/../test2" )
636        $tmpData2 = '';
637        while( $tmpData2 != $DATA[2] ) {
638            $tmpData2 = $DATA[2];
639            $DATA[2] = preg_replace("#/(?!\.\.)[^\/]*?/\.\./#", '/', $DATA[2]);
640        }
641
642        $temp = preg_replace("%^" . DOKU_BASE . "%", "", $DATA[2]);
643        if ( $temp != $DATA[2] ) {
644            $DATA[2] = $temp;
645            $hadBase = true; // 2010-08-23 Check if there has been a rewrite here that will have to be considered later on
646        }
647
648        $this->functions->debug->message("URL before rewriting option for others than 1", array($DATA, $PARAMS, $hadBase), 1);
649
650        // Handle rewrites other than 1
651        if ( !preg_match('$^/?lib/$', $DATA[2]) ) {
652            if ( $conf['userewrite'] == 2 ) {
653                $DATA[2] = $this->__getInternalRewriteURL($DATA[2]);
654            } elseif ( $conf['userewrite'] == 0 ) {
655                $this->__getParamsAndDataRewritten($DATA, $PARAMS);
656            }
657        }
658
659        $this->functions->debug->message("URL before rewriting option", array($DATA, $PARAMS), 2);
660
661        $ORIGDATA2 = $DATA;
662        //        $ORIGDATA2 = $DATA[2]; // 08/10/2010 - this line required a $this->functions->wl which may mess up with the base URL
663        $this->functions->debug->message("OrigDATA is:", $ORIGDATA2, 1);
664
665        // Generate ID
666        $DATA[2] = str_replace('/', ':', $DATA[2]);
667
668        // If Data was empty this must be the same file!;
669        if ( empty( $DATA[2] ) ) {
670            $DATA[2] = $currentID;
671        }
672
673        $ID = $DATA[2];
674        $MEDIAMATCHER = "#(_media(/|:)|media=|_detail(/|:)|_export(/|:)|do=export_)#i"; // 2010-10-23 added "(/|:)" for the ID may not contain slashes anymore
675        $ID = $this->functions->cleanID($DATA[2], null, preg_match($MEDIAMATCHER, $DATA[2]) );
676        //        $ID = $this->functions->cleanID($DATA[2], null, strstr($DATA[2], 'media') ); // Export anpassung nun weiter unten
677
678        //        $IDexists = page_exists($ID); // 08/10/2010 - Not needed. This will be done in the next block.
679        //        $this->functions->debug->message("Current ID: '$ID' exists: '" . ($IDexists ? 'true' : 'false') . "' (will be set to 'false' anyway)", null, 1);
680
681        $IDifIDnotExists = $ID; // 08/10/2010 - Save ID - with possible upper cases to preserve them
682        $IDexists = false;
683
684        $this->functions->debug->message("Resolving ID: '$ID'", null, 2);
685        if ( preg_match($MEDIAMATCHER, $DATA[2]) ) {
686            resolve_mediaid(null, $ID, $IDexists);
687
688            $this->functions->debug->message("Current mediaID to filename: '" . mediaFN($ID) . "'", null, 2);
689        } else {
690            resolve_pageid(null, $ID, $IDexists);
691            $this->functions->debug->message("Current ID to filename: '" . wikiFN($ID) . "'", null, 2);
692        }
693
694        $this->functions->debug->message("Current ID after resolvement: '$ID' the ID does exist: '" . ($IDexists ? 'true' : 'false') . "'", null, 2);
695        //        $ORIGDATA2 = @parse_url($this->functions->wl($ORIGDATA2, null, true)); // What was the next 2 line for? It did mess up with links from {{jdoc>}}
696        //        $this->functions->debug->message("OrigData ID after parse:", $ORIGDATA2, 1); // 08/10/2010 - The lines are obsolete when the $ORIGDATA2 = $DATA. $ORIGDATA is only for fallback
697
698        // 08/10/2010 - If the ID does not exist, we may have a problem here with upper cases - they will all be lower by now!
699        if ( !$IDexists ) {
700            $ID = $IDifIDnotExists; // there may have been presevered Upper cases. We will need them!
701        }
702
703        // $this->functions->cleanID($DATA[2], null, strstr($DATA[2], 'media') || strstr($DATA[2], 'export') );
704        if ( substr($ID, -1) == ':' || empty($ID) ) $ID .= $conf['start'];
705
706        // Generate Download URL
707        // $PARAMS = trim(str_replace('&amp;', '&', $PARAMS));
708        $PARAMS = trim($PARAMS);
709        $this->functions->removeWikiVariables($PARAMS, false, true);
710
711        $url = $this->functions->wl($ID, null, true, null, null, true, $hadBase) . ( !empty( $ANCHOR) ? '#' . $ANCHOR : '' ) . ( !empty( $PARAMS) ? '?' . $PARAMS : '' );
712        $this->functions->debug->message("URL from ID: '$url'", null, 2);
713
714        // Parse URI PATH and add "html"
715        $uri = @parse_url($url);
716        $DATA[2] = $uri['path'];
717        $DATA['ANCHOR'] = $ANCHOR;
718        $DATA['PARAMS'] = $PARAMS;
719
720        $this->functions->debug->message("DATA after parsing.", $DATA, 2);
721
722        // Second Rewrite for UseRewrite = 2
723        if ( $conf['userewrite'] == 2 ) {
724            $DATA[2] = preg_replace( '$/lib/.*?fetch\.php$', '', $DATA[2]);
725            $DATA[2] = preg_replace( '%(/lib/.*?detail\.php.*$)%', '\1' . '.' . $this->functions->settings->fileType, $DATA[2]);
726
727            if ( preg_match( '%/(lib/.*?detail|doku)\.php%', $DATA[2])) {
728                $noDeepReplace = false;
729                $fileName = $this->functions->getSiteName($ID);
730                $newDepth = str_repeat('../', count(explode('/', $fileName))-1);
731            }
732
733            $this->functions->debug->message("DATA after second rewrite with UseRewrite = 2", array($DATA, $noDeepReplace, $fileName, $newDepth), 1);
734        }
735
736        switch ( array_pop(explode('/', $DATA[2])) ) {
737            // CSS Extra Handling with extra rewrites
738            case 'css.php'	:	// $DATA[2] .=  ( !$this->functions->settings->addParams || empty($PARAMS) ? '' : '.' . $this->functions->cleanID(preg_replace("/(=|\?|&amp;)/", ".", $PARAMS))) . '.css';
739                $DATA[2] .=  '.' . $this->functions->cleanID(preg_replace("/(=|\?|&amp;)/", ".", $PARAMS)) . '.css'; // allways put parameters behind
740                // No paramters needed since they are rewritten.
741                $DATA['PARAMS'] = "";
742                $noDeepReplace = false;
743                $fileName = $this->functions->getSiteName($ID);
744                $newDepth = str_repeat('../', count(explode('/', $fileName))-1);
745                $newAdditionalParameters['do'] = 'siteexport';
746
747                $this->functions->debug->message("This is CSS file", array($DATA, $noDeepReplace, $fileName, $newDepth, $newAdditionalParameters), 2);
748
749                break;
750            case 'js.php'	:	// $DATA[2] .= ( !$this->functions->settings->addParams || empty($PARAMS) ? '' : '.' . $this->functions->cleanID(preg_replace("/(=|\?|&amp;)/", ".", $PARAMS))) . '.js';
751                $DATA[2] .=  '.t.' . $this->functions->cleanID($_REQUEST['template']) . '.js'; // allways put parameters behind
752                // set Template
753                if ( !empty( $_REQUEST['template'] ) ) {
754                    $url .= ( strstr($url, '?') ? '&' : '?' ) . 'template=' . $_REQUEST['template'];
755                }
756                // No paramters needed since they are rewritten.
757                $DATA['PARAMS'] = "";
758                $newAdditionalParameters['do'] = 'siteexport';
759
760                $this->functions->debug->message("This is JS file", array($DATA, $url, $fileName, $newAdditionalParameters), 2);
761
762                break;
763                // Detail Handling with extra Rewrites if Paramaters are available - otherwise this is just the fetch
764            case 'indexer.php' :
765                $this->functions->debug->message("Skipping indexer", null, 2);
766                return "";
767                break;
768            case 'detail.php' :
769                $fileName = $this->functions->getSiteName($ID, true); // 2010-09-03 - rewrite with override enabled
770            case 'doku.php' :
771                if ( $this->functions->settings->addParams ) {
772                    $noDeepReplace = false;
773
774                    if ( empty($fileName) ) {
775                        $fileName = $this->functions->getSiteName($ID); // 2010-09-03 - rewrite with override enabled
776                    }
777
778                    $newDepth = str_repeat('../', count(explode('/', $fileName))-1);
779                    $this->__rebuildDataForNormalFiles($DATA, $PARAMS);
780
781                    $this->functions->debug->message("This is doku.php or detail.php file with addParams", array($DATA, $fileName, $newDepth, $newAdditionalParameters), 2);
782                    break;
783                }
784
785                $url = str_replace('detail.php', 'fetch.php', $url);
786                $this->functions->debug->message("This is doku.php or detail.php file '$url'", null, 2);
787                // Fetch Handling for media - rewriting everything
788            case 'fetch.php':
789                $this->__getParamsAndDataRewritten($DATA, $PARAMS, 'media');
790
791                $DATA[2] = str_replace('/', ':', $DATA[2]);
792                $ID = $this->functions->cleanID($DATA[2], null, strstr($DATA[2], 'media'));
793
794                $urlM = ml($ID, null, true);
795                $uriM = @parse_url($urlM);
796                $DATA[2] = $uriM['path'] . ( !empty( $ANCHOR) ? '#' . $ANCHOR : '' ) . ( !empty( $PARAMS) ? '?' . $PARAMS : '' );
797
798                $DATA['PARAMS'] = "";
799                $newAdditionalParameters = array();
800
801                $this->functions->debug->message("This is fetch.php file", array($DATA, $ID), 2);
802                break;
803
804                // default Handling for Pages
805            default			:
806                if ( preg_match("%" . DOKU_BASE . "_detail/%", $DATA[2]) ) {
807
808                    // GET ID Param from origdata2
809                    preg_match("#id=(.*?)(&|\")#i", $DATA[0], $backlinkID);
810                    $this->__rebuildDataForNormalFiles($DATA, $PARAMS);
811
812                    $fileIDPart = isset($backlinkID[1]) && !empty($backlinkID[1]) ? $this->functions->cleanID(urldecode($backlinkID[1])) : 'detail';
813
814                    $DATA[2] .= '/' . $fileIDPart . '.' . $this->functions->settings->fileType; // add namespace and subpage for back button and add filetype
815
816                    $noDeepReplace = false;
817                    $fileName = $this->functions->shortenName($DATA[2]);
818                    $newDepth = str_repeat('../', count(explode('/', $fileName))-1);
819                    $url .= ( strstr($url, '?') ? '&' : '?' ) . 'id=' . $fileIDPart; // add id-part to URL for backlinks
820
821                    $DATA['PARAMS'] = "";
822
823                    $this->functions->debug->message("This is something with '_detail' file", array($DATA, $backlinkID, $newDepth, $url), 2);
824                } else if ( preg_match("%" . DOKU_BASE . "_export/(.*?)/%", $DATA[2], $fileType) ) {
825
826                    // Fixes multiple codeblocks in one file
827                    $this->__rebuildDataForNormalFiles($DATA, $PARAMS);
828
829                    // add the Params no matter what they are. This is export. We don't mess with other files
830                    // adding the "/" fixes the usage of multiple codeblocks in the same namespace
831                    $DATA[2] .= (empty( $PARAMS ) ? '' : '/' . $PARAMS) . '.'. $fileType[1];
832
833                    $DATA['PARAMS'] = "";
834                    $this->functions->debug->message("This is something with '_export' file", $DATA, 2);
835
836                } else if ( $IDexists ) { // 08/10/2010 - was page_exists($ID) - but this should do as well.
837                    // If this is a page ... skip it!
838                    $DATA[2] .= ( !$this->functions->settings->addParams || empty($PARAMS) ? '' : '.' . $this->functions->cleanID(preg_replace("/(=|\?|&amp;)/", ".", $PARAMS)))  . '.' . $this->functions->settings->fileType;
839
840                    // 2012-06-15 originally has an absolute path ... we might need a relative one if not in our namespace
841                    $this->functions->debug->message("OK, this is to be absolute: " . (empty($_REQUEST['absolutePath'])?'false':'true'), null, 1);
842                    if ( empty($_REQUEST['absolutePath']) )
843                    {
844                        $DATA[2] = $this->functions->getRelativeURL($DATA[2], $currentID);
845                    }
846
847                    $DATA[2] = $this->functions->shortenName($DATA[2]);
848
849                    // If Parameters are to be included in the filename - they must not be added twice
850                    if ( $this->functions->settings->addParams ) $DATA['PARAMS'] = "";
851
852                    $this->functions->debug->message("This page really exists", $DATA, 1);
853
854                    return $this->__rebuildLink($DATA);
855                } else {
856                    $this->__rebuildDataForNormalFiles($DATA, $PARAMS);
857                }
858
859                unset($newAdditionalParameters['diPlu']);
860        }
861
862
863        $this->functions->debug->message("DATA after SWITCH CASE decision", array($DATA, $noDeepReplace, $fileName, $newDepth), 1);
864
865        if ( $this->filewriter->canDoPDF() ) {
866            $this->functions->addAdditionalParametersToURL($url, $newAdditionalParameters);
867            $DATA[2] = $url;
868            unset($DATA['PARAMS']);
869            $url = $this->__rebuildLink($DATA, '');
870
871            $this->functions->debug->message("Creating PDF with URL '$url'", null, 2);
872
873            return $url;
874        }
875
876        // Create Name to save the file at
877        $DATA[2] = str_replace(':', '_', $DATA[2]);
878        $DATA[2] = $this->functions->shortenName($DATA[2]);
879
880
881        // File already loaded?
882        // 2010-10-23 - changes in_array from DATA[2] to $url - to check real URLs, the DATA[2] file will be checked with fileExistsInZip
883        if ( in_array($url, array_keys($this->fileChecked)) ) {
884            $DATA[2] = $this->fileChecked[$url];
885            $this->functions->debug->message("File has been checked before.", array($DATA, $url), 2);
886            return $this->__rebuildLink($DATA);
887        }
888
889        // 2010-09-03 - second check if the file is in the ZIP already.
890        if ( $this->filewriter->fileExistsInZip($DATA[2]) ) {
891            $this->functions->debug->message("File with DATA exists in ZIP.", $DATA, 3);
892            return $this->__rebuildLink($DATA);
893        }
894
895        // 2010-10-23 - What if this is a fetch.php? than we produced an error.
896        //        $this->fileChecked[] = $DATA[2];
897        $this->fileChecked[$url] = $DATA[2]; // 2010-09-03 - One URL to one FileName
898
899        // get tempFile and save it
900        $origDepth = $this->functions->settings->depth;
901        $this->functions->settings->depth = $newDepth;
902
903        $tmpID = $currentID;
904        $tmpFile === false;
905
906        $this->functions->debug->message("Going to get the file", array($url, $noDeepReplace, $newAdditionalParameters), 2);
907        $tmpFile = $this->__getHTTPFile($url, $noDeepReplace, $newAdditionalParameters);
908        $this->functions->debug->message("This is the getHTTPFile result", $tmpFile, 2);
909
910        $currentID = $tmpID;
911        $this->functions->settings->depth = $origDepth; // 2010-09-03 - Reset depth at the very end
912
913        if ( $tmpFile === false ) {
914            // Keep an potentially extra link intact
915
916            $this->functions->debug->message("The fetched file '$url' is 'false'", null, 3);
917            if ( $IDexists === false ) {
918                $this->functions->debug->message("The file does not exist, fallback to ORIGDATA", $ORIGDATA2, 2);
919                $DATA[2] = $this->functions->shortenName($ORIGDATA2[2]); // get Origdata Path
920            }
921
922            $this->fileChecked[$url] = $DATA[2]; // 2010-09-03 - One URL to one FileName
923            $link = $this->__rebuildLink($DATA);
924            $this->functions->debug->message("Final Link after empty file from '$url'", null, 2);
925
926            return $link;
927        }
928
929        $this->functions->debug->message("The fetched file looks good.", $tmpFile, 1);
930
931        // If a Filename was given that does not comply to the original name, us this one!
932        if ( !empty($tmpFile[1]) && !strstr($DATA[2], $tmpFile[1]) ) {
933
934            $dParts = explode('/', $DATA[2]);
935            array_pop($dParts);
936            $dParts[] = $tmpFile[1];
937
938            $DATA[2] = implode('/', $dParts);
939        }
940
941        // Add to zip
942        $this->fileChecked[$url] = $DATA[2]; // 2010-09-03 - One URL to one FileName
943
944        $status = $this->filewriter->__addFileToZip($tmpFile[0], $DATA[2]);
945        @unlink($tmpFile[0]);
946
947        $newURL = $this->__rebuildLink($DATA);
948        $this->functions->debug->message("Returning final Link to document: '$newURL'", null, 2);
949
950        return $newURL;
951    }
952
953    /**
954     * build the new link to be put in place for the donwloaded site
955     **/
956    function __rebuildLink($DATA, $DEPTH = null) {
957
958        // depth is set, skip this one
959        if ( is_null( $DEPTH ) ) $DEPTH = $this->functions->settings->depth;
960        $DATA[2] .= ( !empty( $DATA['PARAMS']) ? '?' . $DATA['PARAMS'] : '' ) . ( !empty( $DATA['ANCHOR'] ) ? '#' . $DATA['ANCHOR'] : '' );
961
962        $newURL = $DATA[1] == 'url' ? $DATA[1] . '(' . $DEPTH . $DATA[2] . ')' : $DATA[1] . '="' . $DEPTH . $DATA[2] . '"';
963        $this->functions->debug->message("Re-created URL: '$newURL'", null, 2);
964
965        return $newURL;
966    }
967
968
969    /**
970     * remove an old zip file
971     **/
972    function __removeOldZip( $FILENAMEID=null, $checkForMore=true ) {
973        global $INFO;
974        global $conf;
975
976        $returnValue = true;
977
978        if ( empty($FILENAMEID) ) {
979            $FILENAMEID = $this->functions->settings->origZipFile;
980        }
981
982        if ( !file_exists(mediaFN($FILENAMEID)) ) {
983            $returnValue = true;
984        } else {
985
986            if ( !$this->functions->settings->isCLI )
987            {
988                $INFO = pageinfo();
989                if ( $INFO['perm'] < AUTH_DELETE && !$this->functions->settings->isAuthed ) {
990                    list ( $USER, $PASS) = $this->functions->basic_authentication();
991                    auth_login($USER, $PASS);
992                    $this->functions->settings->isAuthed = true;
993                }
994            }
995
996            require_once( DOKU_INC . 'inc/media.php');
997            if ( !media_delete($FILENAMEID, $INFO['perm']) ) {
998                $returnValue = false;
999            }
1000        }
1001
1002        if ( $checkForMore ) {
1003            // Try to remove more files.
1004            $ns = getNS($FILENAMEID);
1005            $fn = $this->functions->getSpecialExportFileName(noNS($FILENAMEID), '.+');
1006
1007            $data = array();
1008            search($data, $conf['mediadir'], 'search_media', array('pattern' => "/$fn$/i"), $ns);
1009
1010            if ( count($data > 0) ) {
1011
1012                // 30 Minuten Cache Zeit
1013                $cache = $this->getConf('cachetime');
1014                foreach ( $data as $media ) {
1015
1016                    //decide if has to be deleted needed:
1017                    if( $media['mtime'] < time()-$cache) {
1018                        $this->__removeOldZip($media['id'], false);
1019                    }
1020                }
1021            }
1022
1023        }
1024
1025        return $returnValue;
1026    }
1027
1028    /**
1029     * if confrewrite is set to internal rewrite, use this function - taken from a DW renderer
1030     **/
1031    function __getInternalRewriteURL($url) {
1032        global $conf;
1033
1034        //construct page id from request URI
1035        if( $conf['userewrite'] != 2) { return $url; }
1036
1037        //get the script URL
1038        if($conf['basedir']) {
1039            $relpath = '';
1040            $script = $conf['basedir'].$relpath.basename($_SERVER['SCRIPT_FILENAME']);
1041        } elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']){
1042            $script = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','',
1043            $_SERVER['SCRIPT_FILENAME']);
1044            $script = '/'.$script;
1045        }else{
1046            $script = $_SERVER['SCRIPT_NAME'];
1047        }
1048
1049        //clean script and request (fixes a windows problem)
1050        $script  = preg_replace('/\/\/+/','/',$script);
1051        $request = preg_replace('/\/\/+/','/',$url);
1052
1053        //remove script URL and Querystring to gain the id
1054        if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){
1055            $id = preg_replace ('/\?.*/','',$match[1]);
1056        }
1057        $id = urldecode($id);
1058        //strip leading slashes
1059        $id = preg_replace('!^/+!','',$id);
1060
1061        return $id;
1062    }
1063
1064    /**
1065     * rewrite parameter calls
1066     **/
1067    function __getParamsAndDataRewritten(&$DATA, &$PARAMS, $IDKEY='id') {
1068
1069        $PARRAY = explode('&', str_replace('&amp;', '&', $PARAMS) );
1070        $PARAMS = "";
1071
1072        foreach ( $PARRAY as $item ) {
1073            list($key, $value) = explode('=', $item, 2);
1074            if ( empty($key) || empty($value) )
1075            continue;
1076
1077            if ( strtolower(trim($key)) == $IDKEY ) {
1078                $DATA[2] = preg_replace("%^" . DOKU_BASE . "%", "", $value);
1079                continue;
1080            }
1081
1082            if ( !empty( $PARAMS) ) {
1083                $PARAMS .= '&amp;';
1084            }
1085
1086            $PARAMS .= "$key=$value";
1087        }
1088    }
1089
1090    /**
1091     * rewrite detail.php calls
1092     **/
1093    function __rebuildDataForNormalFiles(&$DATA, &$PARAMS) {
1094        $PARTS = explode('.', $DATA[2]);
1095        if ( count($PARTS) > 1 ) {
1096            $EXT = '.' . array_pop($PARTS);
1097        }
1098
1099        $PARAMS = preg_replace("/(=|\?|&amp;)/", ".", $PARAMS);
1100        $DATA[2] = implode('.', $PARTS) . ( !$this->functions->settings->addParams || empty($PARAMS) ? '' : '.' . $this->functions->cleanID($PARAMS)) . ( $EXT == '.php' ? '.' . $this->functions->settings->fileType : $EXT );
1101        $DATA[2] = preg_replace("/\.+/", ".", $DATA[2]);
1102    }
1103
1104
1105
1106
1107    /*
1108     * Clean JS and CSS cache files
1109     */
1110    function cleanCacheFiles() {
1111
1112        $_SERVER['HTTP_HOST'] = preg_replace("/:?\d+$/", '', $_SERVER['HTTP_HOST']);
1113        $cache = getCacheName('scripts'.$_SERVER['HTTP_HOST'].'-siteexport-js-'.$_SERVER['SERVER_PORT'],'.js');
1114        $this->unlinkIfExists($cache);
1115
1116        $tpl = trim(preg_replace('/[^\w-]+/','',$_REQUEST['template']));
1117        if($tpl)
1118        {
1119            $tplinc = DOKU_INC.'lib/tpl/'.$tpl.'/';
1120            $tpldir = DOKU_BASE.'lib/tpl/'.$tpl.'/';
1121        } else {
1122            $tplinc = DOKU_TPLINC;
1123            $tpldir = DOKU_TPL;
1124        }
1125
1126        // The generated script depends on some dynamic options
1127        $cache = getCacheName('styles'.$_SERVER['HTTP_HOST'].'-siteexport-js-'.$_SERVER['SERVER_PORT'].DOKU_BASE.$tplinc.$style,'.css');
1128        $this->unlinkIfExists($cache);
1129    }
1130
1131    function unlinkIfExists($cache) {
1132        if ( file_exists($cache) ) {
1133            @unlink($cache);
1134            if(function_exists('gzopen')) @unlink("$cache.gz");
1135        }
1136    }
1137
1138    // Private unset function
1139    private function clear(&$variable)
1140    {
1141        if ( isset($variable) )
1142        {
1143            unset($variable);
1144        }
1145    }
1146}