xref: /plugin/siteexport/inc/pdfgenerator.php (revision b324a1904db918260ecf6c6ae2a8ee1c5f7c9512)
1<?php
2
3if (!defined('DOKU_PLUGIN')) die('meh');
4
5if (!empty($_REQUEST['pdfExport']) && intval($_REQUEST['pdfExport']) == 1 && file_exists(DOKU_PLUGIN . 'dw2pdf/mpdf/mpdf.php')) {
6
7    require_once(DOKU_PLUGIN . 'siteexport/inc/mpdf.php');
8    class siteexport_pdfgenerator
9    {
10        private $functions;
11
12        public function __construct($functions = null)
13        {
14            $this->functions = $functions;
15        }
16
17        function createPDFFromFile($filename, &$NAME) {
18
19            if (!preg_match("/" . $this->settings->fileType . "$/", $NAME)) {
20                $this->functions->debug->message("Filetype {$this->settings->fileType} did not match filename '$NAME'", null, 4);
21                return false;
22            }
23
24            $mpdf = new siteexportPDF($this->functions->debug);
25
26            if (!$mpdf) {
27                $this->functions->debug->message("Could not instantiate MPDF", null, 4);
28                return false;
29            }
30
31            $html = @file_get_contents($filename);
32
33            if (!strstr($html, "<html")) {
34                $this->functions->debug->message("Filecontent had no HTML starting tag", null, 4);
35                return false;
36            }
37
38            // Save HTML too
39            $this->functions->debug->message("Arranging HTML", null, 2);
40            $this->arrangeHtml($html, 'bl,acronym');
41            $this->functions->debug->message("Done arranging HTML:", $html, 1);
42
43            $mpdf->debug = false;
44            $mpdf->list_indent_first_level = 1; // Indents the first level of lists.
45            //$mpdf->SetBasePath("/");
46            $mpdf->usepre = false;
47            $mpdf->margin_bottom_collapse = true;
48            $mpdf->SetDisplayMode('fullpage');
49            $mpdf->restoreBlockPageBreaks = true;
50            $this->img_dpi = 300;
51
52            $mpdf->setBasePath(empty($this->functions->settings->depth) ? './' : $this->functions->settings->depth);
53
54            $mpdf->ignore_invalid_utf8 = true;
55            $mpdf->mirrorMargins = $this->functions->getConf('useOddEven'); // don't mirror margins
56            $mpdf->setAutoTopMargin = 'pad';
57            $mpdf->setAutoBottomMargin = 'pad';
58
59            $mpdf->WriteHTML($html);
60            $mpdf->Output($filename, "F");
61/*
62            $this->functions->debug->message("Used images:", $mpdf->images, 1);
63            $this->functions->debug->message("Failed images:", $mpdf->failedimages, 1);
64/*/
65//*/
66            return $html;
67        }
68
69        function arrangeHtml(&$html, $norendertags = '')
70        {
71            global $conf;
72
73            // add bookmark links
74            $html = preg_replace_callback("/<h(\d)(.*?)>(.*?)<\/h\\1>/s", array($this, '__pdfHeaderCallback'), $html);
75            $html = preg_replace_callback("/<\/div>\s*?<h({$conf['plugin']['siteexport']['PDFHeaderPagebreak']})(.*?)>/s", array($this, '__pdfHeaderCallbackPagebreak'), $html);
76            $html = preg_replace("/(<img.*?mediacenter.*?\/>)/", "<table style=\"width:100%; border: 0px solid #000;\"><tr><td style=\"text-align: center\">$1</td></tr></table>", $html);
77
78            // Remove p arround img and table
79            $html = preg_replace("/<p[^>]*?>(\s*?<img[^>]*?\/?>\s*?)<\/p>/s", "$1", $html);
80            $html = preg_replace("/<p[^>]*?>(\s*?<table.*?<\/table>\s*?)<\/p>/s", "$1", $html);
81            $html = preg_replace_callback("/<pre(.*?)>(.*?)<\/pre>/s", array($this, '__pdfPreCodeCallback'), $html);
82            $html = preg_replace_callback("/<a href=\"mailto:(.*?)\".*?>(.*?)<\/a>/s", array($this, '__pdfMailtoCallback'), $html);
83            /**/
84
85            $standardReplacer = array(
86            // insert a pagebreak for support of WRAP and PAGEBREAK plugins
87                                    '<br style="page-break-after:always;">' => '<pagebreak />',
88                                    '<div class="wrap_pagebreak"></div>' => '<pagebreak />',
89                                    '<sup>' => '<sup class="sup">',
90                                    '<sub>' => '<sub class="sub">',
91                                    '<code>' => '<code class="code">'
92            );
93            $html = str_replace(array_keys($standardReplacer), array_values($standardReplacer), $html);
94
95            // thanks to Jared Ong
96            // Customized to strip all span tags so that the wiki <code> SQL would display properly
97            $norender = explode(',', $norendertags);
98            $html = $this->strip_only($html, $norender); //array('span','acronym'));
99            $html = $this->strip_htmlencodedchars($html);
100            // Customized to strip all span tags so that the wiki <code> SQL would display properly
101        }
102
103        private function __pdfMailtoCallback($DATA) {
104            if ($DATA[1] == $DATA[2]) {
105                $DATA[2] = $this->deobfuscate($DATA[2]);
106            }
107            $DATA[1] = $this->deobfuscate($DATA[1]);
108            return "<a href=\"mailto:{$DATA[1]}\">{$DATA[2]}</a>";
109        }
110
111        private function __pdfPreCodeCallback($DATA) {
112
113            $code = nl2br($DATA[2]);
114            $code = preg_replace_callback("/(^|<br \/>)(\s+)(\S)/s", array($this, '__pdfPreWhitespacesCallback'), $code);
115
116            return "\n<pre" . $DATA[1] . ">\n" . $code . "\n</pre>\n";
117        }
118
119        private function __pdfPreWhitespacesCallback($DATA) {
120            return $DATA[1] . "\n" . str_repeat("&nbsp;", strlen($DATA[2])-($DATA[2]{0} == "\n" ? 1 : 0)) . $DATA[3];
121        }
122
123        private function __pdfHeaderCallback($DATA) {
124            //*
125            $contentText = htmlspecialchars_decode(preg_replace("/<\/?.*?>/s", '', $DATA[3]), ENT_NOQUOTES); // 2014-07-23 Do not encode again. or &auml; -> &amp;auml;
126            /*/
127            $contentText = $this->xmlEntities(preg_replace("/<\/?.*?>/s", '', $DATA[3])); // Double encoding - has to be decoded in mpdf once more.
128            //*/
129            return '<h' . $DATA[1] . $DATA[2] . '><tocentry content="' . $contentText . '" level="' . ($DATA[1]-1) . '" /><bookmark content="' . $contentText . '" level="' . ($DATA[1]-1) . '" />' . $DATA[3] . '</h' . $DATA[1] . '>';
130        }
131
132        private function __pdfHeaderCallbackPagebreak($DATA) {
133            return '</div>' . "\r\n" . '<pagebreak />' . "\r\n\r\n" . '<h' . $DATA[1] . $DATA[2] . '>';
134        }
135        // thanks to Jared Ong
136        // Custom function for help in stripping span tags
137        private function strip_only($str, $tags) {
138            if (!is_array($tags)) {
139                $tags = (strpos($str, '>') !== false ? explode('>', str_replace('<', '', $tags)) : array($tags));
140                if (end($tags) == '') array_pop($tags);
141            }
142
143            foreach ($tags as $tag) $str = preg_replace('#</?' . $tag . '[^>]*>#is', '', $str);
144            return $str;
145        }
146        // Custom function for help in stripping span tags
147
148        // Custom function for help in replacing &#039; &quot; &gt; &lt; &amp;
149        private function strip_htmlencodedchars($str) {
150            $str = str_replace('&#039;', '\'', $str);
151            //        $str = str_replace('&quot;', '"', $str);
152            //        $str = str_replace('&gt;', '>', $str);
153            //        $str = str_replace('&lt;', '<', $str);
154            //        $str = str_replace('&amp;', '&', $str);
155            return $str;
156        }
157        // Custom function for help in replacing &#039; &quot; &gt; &lt; &amp;
158
159        /**
160         * return an de-obfuscated email address in line with $conf['mailguard'] setting
161         */
162        private function deobfuscate($email) {
163            global $conf;
164
165            switch ($conf['mailguard']) {
166                case 'visible' :
167                    $obfuscate = array(' [at] ' => '@', ' [dot] ' => '.', ' [dash] ' => '-');
168                    return strtr($email, $obfuscate);
169
170                case 'hex' :
171                    $encode = '';
172                    $len = strlen($email);
173                    for ($x = 0; $x < $len; $x += 6) {
174                        $encode .= chr(hexdec($email{$x+3} . $email{($x+4)}));
175                    }
176                    return $encode;
177
178                case 'none' :
179                default :
180                    return $email;
181            }
182        }
183
184        /**
185         * Encoding ()taken from DW - but without needing the renderer
186         **/
187        private function xmlEntities($string) {
188            return htmlspecialchars($string, ENT_QUOTES, 'UTF-8');
189        }
190    }
191}
192