xref: /plugin/siteexport/inc/pdfgenerator.php (revision 7d101cc131696cb3a0de345d8044a69fb2ef70e9)
1<?php
2
3if(!defined('DOKU_PLUGIN')) die('meh');
4
5if ( intval($_REQUEST['pdfExport']) == 1 && file_exists(DOKU_PLUGIN . 'dw2pdf/mpdf/mpdf.php') ) {
6
7    require_once(DOKU_PLUGIN . 'siteexport/inc/mpdf.php');
8    class siteexport_pdfgenerator
9    {
10        private $functions;
11
12        public function siteexport_pdfgenerator( $functions=null )
13        {
14            $this->functions = $functions;
15        }
16
17        function createPDFFromFile($filename, &$NAME) {
18
19            if ( !preg_match("/" . $this->settings->fileType . "$/", $NAME) ) {
20                $this->functions->debug->message("Filetype {$this->settings->fileType} did not match filename '$NAME'", null, 4);
21                return false;
22            }
23
24            $mpdf = new siteexportPDF('UTF-8-s', $this->functions->debug);
25
26            if ( !$mpdf ) {
27                $this->functions->debug->message("Could not instantiate MPDF", null, 4);
28                return false;
29            }
30
31
32            $html = file_get_contents($filename);
33
34            if ( !strstr($html, "<html") ) {
35                $this->functions->debug->message("Filecontent had no HTML starting tag", null, 4);
36                return false;
37            }
38
39            // Save HTML too
40            $this->functions->debug->message("Arranging HTML", null, 2);
41            $this->arrangeHtml($html, 'bl,acronym');
42            $this->functions->debug->message("Done arranging HTML:", $html, 1);
43
44            $fp = fopen($filename, "w");
45            fwrite($fp, $html);
46            fclose($fp);
47
48            $mpdf->debug = true;
49            $mpdf->list_indent_first_level = 1; // Indents the first level of lists.
50            $mpdf->basepath = $this->functions->settings->depth;
51            $mpdf->usepre = false;
52            $mpdf->margin_bottom_collapse = true;
53            $mpdf->SetDisplayMode('fullpage');
54            $mpdf->restoreBlockPageBreaks = true;
55            $this->img_dpi = 300;
56
57            // $mpdf->setBasePath(empty($this->functions->settings->depth) ? './' : $this->functions->settings->depth);
58            $mpdf->SetAutoFont(AUTOFONT_ALL);
59
60            // Temp dir
61
62            $mpdf->ignore_invalid_utf8 = true;
63            $mpdf->mirrorMargins = 0;	// don't mirror margins
64            $mpdf->WriteHTML($html);
65            $mpdf->Output($filename, "F");
66
67            return true;
68        }
69
70        function arrangeHtml(&$html, $norendertags = '' )
71        {
72            global $conf;
73
74            // add bookmark links
75            $html = preg_replace_callback("/<h(\d)(.*?)>(.+?)<\/h\\1>/s", array($this, '__pdfHeaderCallback'), $html);
76            $html = preg_replace_callback("/<\/div>\s*?<h({$conf['plugin']['siteexport']['PDFHeaderPagebreak']})(.*?)>/s", array($this, '__pdfHeaderCallbackPagebreak'), $html);
77            $html = preg_replace("/(<img.*?mediacenter.*?\/>)/", "<table style=\"width:100%; border: 0px solid #000;\"><tr><td style=\"text-align: center\">$1</td></tr></table>", $html);
78            $html = preg_replace("/<p>(\s*?<table.*?<\/table>\s*?)<\/p>/s", "$1", $html);
79            $html = preg_replace_callback("/<pre(.*?)>(.*?)<\/pre>/s", array($this, '__pdfPreCodeCallback'), $html);
80            $html = preg_replace_callback("/<a href=\"mailto:(.*?)\".*?>(.*?)<\/a>/s", array($this, '__pdfMailtoCallback'), $html);
81            /**/
82
83            $standardReplacer = array (
84            // insert a pagebreak for support of WRAP and PAGEBREAK plugins
85        							'<br style="page-break-after:always;">' => '<pagebreak />',
86                                    '<div class="wrap_pagebreak"></div>' => '<pagebreak />',
87                                    '<sup>' => '<sup class="sup">',
88                                    '<sub>' => '<sub class="sub">',
89                                    '<code>' => '<code class="code">',
90            );
91            $html = str_replace(array_keys($standardReplacer), array_values($standardReplacer), $html);
92
93            // thanks to Jared Ong
94            // Customized to strip all span tags so that the wiki <code> SQL would display properly
95            $norender = explode(',',$norendertags);
96            $html = $this->strip_only($html, $norender ); //array('span','acronym'));
97            $html = $this->strip_htmlencodedchars($html);
98            // Customized to strip all span tags so that the wiki <code> SQL would display properly
99        }
100
101        private function __pdfMailtoCallback($DATA) {
102            if ( $DATA[1] == $DATA[2] ) {
103                $DATA[2] = $this->deobfuscate($DATA[2]);
104            }
105            $DATA[1] = $this->deobfuscate($DATA[1]);
106            return "<a href=\"mailto:{$DATA[1]}\">{$DATA[2]}</a>";
107        }
108
109        private function __pdfPreCodeCallback($DATA) {
110
111            $code = nl2br($DATA[2]);
112            $code = preg_replace_callback("/(^|<br \/>)(\s+)(\S)/s", array($this, '__pdfPreWhitespacesCallback'), $code);
113
114            return "\n<pre" . $DATA[1] . ">\n" . $code . "\n</pre>\n";
115        }
116
117        private function __pdfPreWhitespacesCallback( $DATA ) {
118            return $DATA[1] . "\n" . str_repeat("&nbsp;", strlen($DATA[2])-($DATA[2]{0}=="\n"?1:0) ) . $DATA[3];
119        }
120
121        private function __pdfHeaderCallback($DATA) {
122            $contentText = $this->xmlEntities(preg_replace("/<\/?.*?>/s", '', $DATA[3])); // Double encoding - has to be decoded in mpdf once more.
123            return '<tocentry content="' . $contentText . '" level="' . ($DATA[1]-1) . '" /><bookmark content="' . $contentText . '" level="' . ($DATA[1]-1) . '" /><h' . $DATA[1] . $DATA[2] . '>' . $DATA[3] . '</h' . $DATA[1] . '>';
124        }
125
126        private function __pdfHeaderCallbackPagebreak($DATA) {
127            return '</div>' . "\r\n" . '<pagebreak />' . "\r\n\r\n" . '<h' . $DATA[1] . $DATA[2] . '>';
128        }
129        // thanks to Jared Ong
130        // Custom function for help in stripping span tags
131        private function strip_only($str, $tags) {
132            if(!is_array($tags)) {
133                $tags = (strpos($str, '>') !== false ? explode('>', str_replace('<', '', $tags)) : array($tags));
134                if(end($tags) == '') array_pop($tags);
135            }
136
137            foreach($tags as $tag) $str = preg_replace('#</?'.$tag.'[^>]*>#is', '', $str);
138            return $str;
139        }
140        // Custom function for help in stripping span tags
141
142        // Custom function for help in replacing &#039; &quot; &gt; &lt; &amp;
143        private function strip_htmlencodedchars($str) {
144            $str = str_replace('&#039;', '\'', $str);
145            //        $str = str_replace('&quot;', '"', $str);
146            //        $str = str_replace('&gt;', '>', $str);
147            //        $str = str_replace('&lt;', '<', $str);
148            //        $str = str_replace('&amp;', '&', $str);
149            return $str;
150        }
151        // Custom function for help in replacing &#039; &quot; &gt; &lt; &amp;
152
153        /**
154         * return an de-obfuscated email address in line with $conf['mailguard'] setting
155         */
156        private function deobfuscate($email) {
157            global $conf;
158
159            switch ($conf['mailguard']) {
160                case 'visible' :
161                    $obfuscate = array(' [at] ' => '@', ' [dot] ' => '.', ' [dash] ' => '-');
162                    return strtr($email, $obfuscate);
163
164                case 'hex' :
165                    $encode = '';
166                    $len = strlen($email);
167                    for ($x=0; $x < $len; $x+=6){
168                        $encode .= chr(hexdec($email{$x+3}.$email{($x+4)}));
169                    }
170                    return $encode;
171
172                case 'none' :
173                default :
174                    return $email;
175            }
176        }
177
178        /**
179         * Encoding ()taken from DW - but without needing the renderer
180         **/
181        private function xmlEntities($string) {
182            return htmlspecialchars($string,ENT_QUOTES,'UTF-8');
183        }
184    }
185}
186?>