xref: /plugin/siteexport/inc/pdfgenerator.php (revision 7d101cc131696cb3a0de345d8044a69fb2ef70e9)
1*7d101cc1SGerry Weißbach<?php
2*7d101cc1SGerry Weißbach
3*7d101cc1SGerry Weißbachif(!defined('DOKU_PLUGIN')) die('meh');
4*7d101cc1SGerry Weißbach
5*7d101cc1SGerry Weißbachif ( intval($_REQUEST['pdfExport']) == 1 && file_exists(DOKU_PLUGIN . 'dw2pdf/mpdf/mpdf.php') ) {
6*7d101cc1SGerry Weißbach
7*7d101cc1SGerry Weißbach    require_once(DOKU_PLUGIN . 'siteexport/inc/mpdf.php');
8*7d101cc1SGerry Weißbach    class siteexport_pdfgenerator
9*7d101cc1SGerry Weißbach    {
10*7d101cc1SGerry Weißbach        private $functions;
11*7d101cc1SGerry Weißbach
12*7d101cc1SGerry Weißbach        public function siteexport_pdfgenerator( $functions=null )
13*7d101cc1SGerry Weißbach        {
14*7d101cc1SGerry Weißbach            $this->functions = $functions;
15*7d101cc1SGerry Weißbach        }
16*7d101cc1SGerry Weißbach
17*7d101cc1SGerry Weißbach        function createPDFFromFile($filename, &$NAME) {
18*7d101cc1SGerry Weißbach
19*7d101cc1SGerry Weißbach            if ( !preg_match("/" . $this->settings->fileType . "$/", $NAME) ) {
20*7d101cc1SGerry Weißbach                $this->functions->debug->message("Filetype {$this->settings->fileType} did not match filename '$NAME'", null, 4);
21*7d101cc1SGerry Weißbach                return false;
22*7d101cc1SGerry Weißbach            }
23*7d101cc1SGerry Weißbach
24*7d101cc1SGerry Weißbach            $mpdf = new siteexportPDF('UTF-8-s', $this->functions->debug);
25*7d101cc1SGerry Weißbach
26*7d101cc1SGerry Weißbach            if ( !$mpdf ) {
27*7d101cc1SGerry Weißbach                $this->functions->debug->message("Could not instantiate MPDF", null, 4);
28*7d101cc1SGerry Weißbach                return false;
29*7d101cc1SGerry Weißbach            }
30*7d101cc1SGerry Weißbach
31*7d101cc1SGerry Weißbach
32*7d101cc1SGerry Weißbach            $html = file_get_contents($filename);
33*7d101cc1SGerry Weißbach
34*7d101cc1SGerry Weißbach            if ( !strstr($html, "<html") ) {
35*7d101cc1SGerry Weißbach                $this->functions->debug->message("Filecontent had no HTML starting tag", null, 4);
36*7d101cc1SGerry Weißbach                return false;
37*7d101cc1SGerry Weißbach            }
38*7d101cc1SGerry Weißbach
39*7d101cc1SGerry Weißbach            // Save HTML too
40*7d101cc1SGerry Weißbach            $this->functions->debug->message("Arranging HTML", null, 2);
41*7d101cc1SGerry Weißbach            $this->arrangeHtml($html, 'bl,acronym');
42*7d101cc1SGerry Weißbach            $this->functions->debug->message("Done arranging HTML:", $html, 1);
43*7d101cc1SGerry Weißbach
44*7d101cc1SGerry Weißbach            $fp = fopen($filename, "w");
45*7d101cc1SGerry Weißbach            fwrite($fp, $html);
46*7d101cc1SGerry Weißbach            fclose($fp);
47*7d101cc1SGerry Weißbach
48*7d101cc1SGerry Weißbach            $mpdf->debug = true;
49*7d101cc1SGerry Weißbach            $mpdf->list_indent_first_level = 1; // Indents the first level of lists.
50*7d101cc1SGerry Weißbach            $mpdf->basepath = $this->functions->settings->depth;
51*7d101cc1SGerry Weißbach            $mpdf->usepre = false;
52*7d101cc1SGerry Weißbach            $mpdf->margin_bottom_collapse = true;
53*7d101cc1SGerry Weißbach            $mpdf->SetDisplayMode('fullpage');
54*7d101cc1SGerry Weißbach            $mpdf->restoreBlockPageBreaks = true;
55*7d101cc1SGerry Weißbach            $this->img_dpi = 300;
56*7d101cc1SGerry Weißbach
57*7d101cc1SGerry Weißbach            // $mpdf->setBasePath(empty($this->functions->settings->depth) ? './' : $this->functions->settings->depth);
58*7d101cc1SGerry Weißbach            $mpdf->SetAutoFont(AUTOFONT_ALL);
59*7d101cc1SGerry Weißbach
60*7d101cc1SGerry Weißbach            // Temp dir
61*7d101cc1SGerry Weißbach
62*7d101cc1SGerry Weißbach            $mpdf->ignore_invalid_utf8 = true;
63*7d101cc1SGerry Weißbach            $mpdf->mirrorMargins = 0;	// don't mirror margins
64*7d101cc1SGerry Weißbach            $mpdf->WriteHTML($html);
65*7d101cc1SGerry Weißbach            $mpdf->Output($filename, "F");
66*7d101cc1SGerry Weißbach
67*7d101cc1SGerry Weißbach            return true;
68*7d101cc1SGerry Weißbach        }
69*7d101cc1SGerry Weißbach
70*7d101cc1SGerry Weißbach        function arrangeHtml(&$html, $norendertags = '' )
71*7d101cc1SGerry Weißbach        {
72*7d101cc1SGerry Weißbach            global $conf;
73*7d101cc1SGerry Weißbach
74*7d101cc1SGerry Weißbach            // add bookmark links
75*7d101cc1SGerry Weißbach            $html = preg_replace_callback("/<h(\d)(.*?)>(.+?)<\/h\\1>/s", array($this, '__pdfHeaderCallback'), $html);
76*7d101cc1SGerry Weißbach            $html = preg_replace_callback("/<\/div>\s*?<h({$conf['plugin']['siteexport']['PDFHeaderPagebreak']})(.*?)>/s", array($this, '__pdfHeaderCallbackPagebreak'), $html);
77*7d101cc1SGerry Weißbach            $html = preg_replace("/(<img.*?mediacenter.*?\/>)/", "<table style=\"width:100%; border: 0px solid #000;\"><tr><td style=\"text-align: center\">$1</td></tr></table>", $html);
78*7d101cc1SGerry Weißbach            $html = preg_replace("/<p>(\s*?<table.*?<\/table>\s*?)<\/p>/s", "$1", $html);
79*7d101cc1SGerry Weißbach            $html = preg_replace_callback("/<pre(.*?)>(.*?)<\/pre>/s", array($this, '__pdfPreCodeCallback'), $html);
80*7d101cc1SGerry Weißbach            $html = preg_replace_callback("/<a href=\"mailto:(.*?)\".*?>(.*?)<\/a>/s", array($this, '__pdfMailtoCallback'), $html);
81*7d101cc1SGerry Weißbach            /**/
82*7d101cc1SGerry Weißbach
83*7d101cc1SGerry Weißbach            $standardReplacer = array (
84*7d101cc1SGerry Weißbach            // insert a pagebreak for support of WRAP and PAGEBREAK plugins
85*7d101cc1SGerry Weißbach        							'<br style="page-break-after:always;">' => '<pagebreak />',
86*7d101cc1SGerry Weißbach                                    '<div class="wrap_pagebreak"></div>' => '<pagebreak />',
87*7d101cc1SGerry Weißbach                                    '<sup>' => '<sup class="sup">',
88*7d101cc1SGerry Weißbach                                    '<sub>' => '<sub class="sub">',
89*7d101cc1SGerry Weißbach                                    '<code>' => '<code class="code">',
90*7d101cc1SGerry Weißbach            );
91*7d101cc1SGerry Weißbach            $html = str_replace(array_keys($standardReplacer), array_values($standardReplacer), $html);
92*7d101cc1SGerry Weißbach
93*7d101cc1SGerry Weißbach            // thanks to Jared Ong
94*7d101cc1SGerry Weißbach            // Customized to strip all span tags so that the wiki <code> SQL would display properly
95*7d101cc1SGerry Weißbach            $norender = explode(',',$norendertags);
96*7d101cc1SGerry Weißbach            $html = $this->strip_only($html, $norender ); //array('span','acronym'));
97*7d101cc1SGerry Weißbach            $html = $this->strip_htmlencodedchars($html);
98*7d101cc1SGerry Weißbach            // Customized to strip all span tags so that the wiki <code> SQL would display properly
99*7d101cc1SGerry Weißbach        }
100*7d101cc1SGerry Weißbach
101*7d101cc1SGerry Weißbach        private function __pdfMailtoCallback($DATA) {
102*7d101cc1SGerry Weißbach            if ( $DATA[1] == $DATA[2] ) {
103*7d101cc1SGerry Weißbach                $DATA[2] = $this->deobfuscate($DATA[2]);
104*7d101cc1SGerry Weißbach            }
105*7d101cc1SGerry Weißbach            $DATA[1] = $this->deobfuscate($DATA[1]);
106*7d101cc1SGerry Weißbach            return "<a href=\"mailto:{$DATA[1]}\">{$DATA[2]}</a>";
107*7d101cc1SGerry Weißbach        }
108*7d101cc1SGerry Weißbach
109*7d101cc1SGerry Weißbach        private function __pdfPreCodeCallback($DATA) {
110*7d101cc1SGerry Weißbach
111*7d101cc1SGerry Weißbach            $code = nl2br($DATA[2]);
112*7d101cc1SGerry Weißbach            $code = preg_replace_callback("/(^|<br \/>)(\s+)(\S)/s", array($this, '__pdfPreWhitespacesCallback'), $code);
113*7d101cc1SGerry Weißbach
114*7d101cc1SGerry Weißbach            return "\n<pre" . $DATA[1] . ">\n" . $code . "\n</pre>\n";
115*7d101cc1SGerry Weißbach        }
116*7d101cc1SGerry Weißbach
117*7d101cc1SGerry Weißbach        private function __pdfPreWhitespacesCallback( $DATA ) {
118*7d101cc1SGerry Weißbach            return $DATA[1] . "\n" . str_repeat("&nbsp;", strlen($DATA[2])-($DATA[2]{0}=="\n"?1:0) ) . $DATA[3];
119*7d101cc1SGerry Weißbach        }
120*7d101cc1SGerry Weißbach
121*7d101cc1SGerry Weißbach        private function __pdfHeaderCallback($DATA) {
122*7d101cc1SGerry Weißbach            $contentText = $this->xmlEntities(preg_replace("/<\/?.*?>/s", '', $DATA[3])); // Double encoding - has to be decoded in mpdf once more.
123*7d101cc1SGerry Weißbach            return '<tocentry content="' . $contentText . '" level="' . ($DATA[1]-1) . '" /><bookmark content="' . $contentText . '" level="' . ($DATA[1]-1) . '" /><h' . $DATA[1] . $DATA[2] . '>' . $DATA[3] . '</h' . $DATA[1] . '>';
124*7d101cc1SGerry Weißbach        }
125*7d101cc1SGerry Weißbach
126*7d101cc1SGerry Weißbach        private function __pdfHeaderCallbackPagebreak($DATA) {
127*7d101cc1SGerry Weißbach            return '</div>' . "\r\n" . '<pagebreak />' . "\r\n\r\n" . '<h' . $DATA[1] . $DATA[2] . '>';
128*7d101cc1SGerry Weißbach        }
129*7d101cc1SGerry Weißbach        // thanks to Jared Ong
130*7d101cc1SGerry Weißbach        // Custom function for help in stripping span tags
131*7d101cc1SGerry Weißbach        private function strip_only($str, $tags) {
132*7d101cc1SGerry Weißbach            if(!is_array($tags)) {
133*7d101cc1SGerry Weißbach                $tags = (strpos($str, '>') !== false ? explode('>', str_replace('<', '', $tags)) : array($tags));
134*7d101cc1SGerry Weißbach                if(end($tags) == '') array_pop($tags);
135*7d101cc1SGerry Weißbach            }
136*7d101cc1SGerry Weißbach
137*7d101cc1SGerry Weißbach            foreach($tags as $tag) $str = preg_replace('#</?'.$tag.'[^>]*>#is', '', $str);
138*7d101cc1SGerry Weißbach            return $str;
139*7d101cc1SGerry Weißbach        }
140*7d101cc1SGerry Weißbach        // Custom function for help in stripping span tags
141*7d101cc1SGerry Weißbach
142*7d101cc1SGerry Weißbach        // Custom function for help in replacing &#039; &quot; &gt; &lt; &amp;
143*7d101cc1SGerry Weißbach        private function strip_htmlencodedchars($str) {
144*7d101cc1SGerry Weißbach            $str = str_replace('&#039;', '\'', $str);
145*7d101cc1SGerry Weißbach            //        $str = str_replace('&quot;', '"', $str);
146*7d101cc1SGerry Weißbach            //        $str = str_replace('&gt;', '>', $str);
147*7d101cc1SGerry Weißbach            //        $str = str_replace('&lt;', '<', $str);
148*7d101cc1SGerry Weißbach            //        $str = str_replace('&amp;', '&', $str);
149*7d101cc1SGerry Weißbach            return $str;
150*7d101cc1SGerry Weißbach        }
151*7d101cc1SGerry Weißbach        // Custom function for help in replacing &#039; &quot; &gt; &lt; &amp;
152*7d101cc1SGerry Weißbach
153*7d101cc1SGerry Weißbach        /**
154*7d101cc1SGerry Weißbach         * return an de-obfuscated email address in line with $conf['mailguard'] setting
155*7d101cc1SGerry Weißbach         */
156*7d101cc1SGerry Weißbach        private function deobfuscate($email) {
157*7d101cc1SGerry Weißbach            global $conf;
158*7d101cc1SGerry Weißbach
159*7d101cc1SGerry Weißbach            switch ($conf['mailguard']) {
160*7d101cc1SGerry Weißbach                case 'visible' :
161*7d101cc1SGerry Weißbach                    $obfuscate = array(' [at] ' => '@', ' [dot] ' => '.', ' [dash] ' => '-');
162*7d101cc1SGerry Weißbach                    return strtr($email, $obfuscate);
163*7d101cc1SGerry Weißbach
164*7d101cc1SGerry Weißbach                case 'hex' :
165*7d101cc1SGerry Weißbach                    $encode = '';
166*7d101cc1SGerry Weißbach                    $len = strlen($email);
167*7d101cc1SGerry Weißbach                    for ($x=0; $x < $len; $x+=6){
168*7d101cc1SGerry Weißbach                        $encode .= chr(hexdec($email{$x+3}.$email{($x+4)}));
169*7d101cc1SGerry Weißbach                    }
170*7d101cc1SGerry Weißbach                    return $encode;
171*7d101cc1SGerry Weißbach
172*7d101cc1SGerry Weißbach                case 'none' :
173*7d101cc1SGerry Weißbach                default :
174*7d101cc1SGerry Weißbach                    return $email;
175*7d101cc1SGerry Weißbach            }
176*7d101cc1SGerry Weißbach        }
177*7d101cc1SGerry Weißbach
178*7d101cc1SGerry Weißbach        /**
179*7d101cc1SGerry Weißbach         * Encoding ()taken from DW - but without needing the renderer
180*7d101cc1SGerry Weißbach         **/
181*7d101cc1SGerry Weißbach        private function xmlEntities($string) {
182*7d101cc1SGerry Weißbach            return htmlspecialchars($string,ENT_QUOTES,'UTF-8');
183*7d101cc1SGerry Weißbach        }
184*7d101cc1SGerry Weißbach    }
185*7d101cc1SGerry Weißbach}
186*7d101cc1SGerry Weißbach?>