xref: /plugin/siteexport/inc/pdfgenerator.php (revision 56c4b32d8c91c103da10f9e5918de8f7bcaa82cd)
1<?php
2
3if (!defined('DOKU_PLUGIN')) die('meh');
4
5if (!function_exists('siteexport_dw2pdf_has_mpdf_engine')) {
6    /**
7     * True if dw2pdf ships an mPDF we can use (legacy bundled mpdf/ or Composer vendor/ + DokuPDF).
8     */
9    function siteexport_dw2pdf_has_mpdf_engine()
10    {
11        if (file_exists(DOKU_PLUGIN . 'dw2pdf/mpdf/mpdf.php')) {
12            return true;
13        }
14        return file_exists(DOKU_PLUGIN . 'dw2pdf/vendor/autoload.php')
15            && file_exists(DOKU_PLUGIN . 'dw2pdf/DokuPDF.class.php');
16    }
17}
18
19if (!empty($_REQUEST['pdfExport']) && intval($_REQUEST['pdfExport']) == 1 && siteexport_dw2pdf_has_mpdf_engine()) {
20
21    require_once(DOKU_PLUGIN . 'siteexport/inc/mpdf.php');
22    class siteexport_pdfgenerator
23    {
24        private $functions;
25
26        public function __construct($functions = null)
27        {
28            $this->functions = $functions;
29        }
30
31        public function createPDFFromFile($filename, &$NAME) {
32
33            global $INPUT;
34
35            if (!preg_match("/" . $this->functions->settings->fileType . "$/", $NAME)) {
36                $this->functions->debug->message("Filetype " . $this->functions->settings->fileType . " did not match filename '$NAME'", null, 4);
37                return false;
38            }
39
40            $mpdf = new siteexportPDF($this->functions->debug);
41
42            if (!$mpdf) {
43                $this->functions->debug->message("Could not instantiate MPDF", null, 4);
44                return false;
45            }
46
47            $html = @file_get_contents($filename);
48
49            if (!strstr($html, "<html")) {
50                $this->functions->debug->message("Filecontent had no HTML starting tag", null, 4);
51                return false;
52            }
53
54            // Save HTML too
55            $this->functions->debug->message("Arranging HTML", null, 2);
56            $this->arrangeHtml($html, 'bl,acronym');
57            $this->functions->debug->message("Done arranging HTML:", $html, 1);
58
59            $mpdf->debug = false;
60            $mpdf->list_indent_first_level = 1; // Indents the first level of lists.
61
62            // Legacy mPDF only; omitted with mPDF 8 / DokuPDF
63            if (property_exists($mpdf, 'usepre')) {
64                $mpdf->usepre = false;
65            }
66            $mpdf->margin_bottom_collapse = true;
67            $mpdf->SetDisplayMode('fullpage');
68            if (property_exists($mpdf, 'restoreBlockPageBreaks')) {
69                $mpdf->restoreBlockPageBreaks = true;
70            }
71
72            $mpdf->dpi = $INPUT->int('dpi', 96, true);
73            $mpdf->img_dpi = $INPUT->int('dpi', 96, true);
74
75            $mpdf->setBasePath(empty($this->functions->settings->depth) ? './' : $this->functions->settings->depth);
76
77            $mpdf->ignore_invalid_utf8 = true;
78            $mpdf->mirrorMargins = $this->functions->getConf('useOddEven'); // don't mirror margins
79            $mpdf->setAutoTopMargin = 'pad';
80            $mpdf->setAutoBottomMargin = 'pad';
81
82            $mpdf->WriteHTML($html);
83            $mpdf->Output($filename, "F");
84
85            return $html;
86        }
87
88        private function arrangeHtml(&$html, $norendertags = '')
89        {
90            global $conf;
91
92            // add bookmark links
93            $html = preg_replace_callback("/<h(\d)(.*?)>(.*?)<\/h\\1>/s", array($this, '__pdfHeaderCallback'), $html);
94            $html = preg_replace_callback("/<\/div>\s*?<h({$conf['plugin']['siteexport']['PDFHeaderPagebreak']})(.*?)>/s", array($this, '__pdfHeaderCallbackPagebreak'), $html);
95            $html = preg_replace("/(<img.*?mediacenter.*?\/>)/", "<table style=\"width:100%; border: 0px solid #000;\"><tr><td style=\"text-align: center\">$1</td></tr></table>", $html);
96
97            // Remove p arround img and table
98            $html = preg_replace("/<p[^>]*?>(\s*?<img[^>]*?\/?>\s*?)<\/p>/s", "$1", $html);
99            $html = preg_replace("/<p[^>]*?>(\s*?<table.*?<\/table>\s*?)<\/p>/s", "$1", $html);
100            $html = preg_replace_callback("/<pre(.*?)>(.*?)<\/pre>/s", array($this, '__pdfPreCodeCallback'), $html);
101            $html = preg_replace_callback("/<a href=\"mailto:(.*?)\".*?>(.*?)<\/a>/s", array($this, '__pdfMailtoCallback'), $html);
102            /**/
103
104            $standardReplacer = array(
105            // insert a pagebreak for support of WRAP and PAGEBREAK plugins
106                                    '<br style="page-break-after:always;">' => '<pagebreak />',
107                                    '<div class="wrap_pagebreak"></div>' => '<pagebreak />',
108                                    '<sup>' => '<sup class="sup">',
109                                    '<sub>' => '<sub class="sub">',
110                                    '<code>' => '<code class="code">'
111            );
112            $html = str_replace(array_keys($standardReplacer), array_values($standardReplacer), $html);
113
114            // thanks to Jared Ong
115            // Customized to strip all span tags so that the wiki <code> SQL would display properly
116            $norender = explode(',', $norendertags);
117            $html = $this->strip_only($html, $norender); //array('span','acronym'));
118            $html = $this->strip_htmlencodedchars($html);
119            // Customized to strip all span tags so that the wiki <code> SQL would display properly
120        }
121
122        private function __pdfMailtoCallback($DATA) {
123            if ($DATA[1] == $DATA[2]) {
124                $DATA[2] = $this->deobfuscate($DATA[2]);
125            }
126            $DATA[1] = $this->deobfuscate($DATA[1]);
127            return "<a href=\"mailto:{$DATA[1]}\">{$DATA[2]}</a>";
128        }
129
130        private function __pdfPreCodeCallback($DATA) {
131
132            $code = nl2br($DATA[2]);
133            $code = preg_replace_callback("/(^|<br \/>)(\s+)(\S)/s", array($this, '__pdfPreWhitespacesCallback'), $code);
134
135            return "\n<pre" . $DATA[1] . ">\n" . $code . "\n</pre>\n";
136        }
137
138        private function __pdfPreWhitespacesCallback($DATA) {
139            return $DATA[1] . "\n" . str_repeat("&nbsp;", strlen($DATA[2])-($DATA[2][0] == "\n" ? 1 : 0)) . $DATA[3];
140        }
141
142        private function __pdfHeaderCallback($DATA) {
143            $contentText = htmlspecialchars_decode(preg_replace("/<\/?.*?>/s", '', $DATA[3]), ENT_NOQUOTES); // 2014-07-23 Do not encode again. or &auml; -> &amp;auml;
144            return '<h' . $DATA[1] . $DATA[2] . '><tocentry content="' . $contentText . '" level="' . ($DATA[1]-1) . '" /><bookmark content="' . $contentText . '" level="' . ($DATA[1]-1) . '" />' . $DATA[3] . '</h' . $DATA[1] . '>';
145        }
146
147        private function __pdfHeaderCallbackPagebreak($DATA) {
148            return '</div>' . "\r\n" . '<pagebreak />' . "\r\n\r\n" . '<h' . $DATA[1] . $DATA[2] . '>';
149        }
150        // thanks to Jared Ong
151        // Custom function for help in stripping span tags
152        private function strip_only($str, $tags) {
153            if (!is_array($tags)) {
154                $tags = (strpos($str, '>') !== false ? explode('>', str_replace('<', '', $tags)) : array($tags));
155                if (end($tags) == '') array_pop($tags);
156            }
157
158            foreach ($tags as $tag) $str = preg_replace('#</?' . $tag . '[^>]*>#is', '', $str);
159            return $str;
160        }
161        // Custom function for help in stripping span tags
162
163        // Custom function for help in replacing &#039; &quot; &gt; &lt; &amp;
164        private function strip_htmlencodedchars($str) {
165            $str = str_replace('&#039;', '\'', $str);
166            return $str;
167        }
168        // Custom function for help in replacing &#039; &quot; &gt; &lt; &amp;
169
170        /**
171         * return an de-obfuscated email address in line with $conf['mailguard'] setting
172         */
173        private function deobfuscate($email) {
174            global $conf;
175
176            switch ($conf['mailguard']) {
177                case 'visible' :
178                    return /** @scrutinizer ignore-call */ strtr($email, array(' [at] ' => '@', ' [dot] ' => '.', ' [dash] ' => '-'));
179
180                case 'hex' :
181                    $encode = '';
182                    $len = strlen($email);
183                    for ($x = 0; $x < $len; $x += 6) {
184                        $encode .= chr((int)hexdec($email[$x+3] . $email[$x+4]));
185                    }
186                    return $encode;
187
188                case 'none' :
189                default :
190                    return $email;
191            }
192        }
193    }
194}
195