xref: /plugin/siteexport/inc/readCSS.patch (revision ed50730582ed6baa071dc0d2c327c7b4a387d58b)
1function ReadCSS($html) {
2	preg_match_all('/<style[^>]*media=["\']([^"\'>]*)["\'].*?<\/style>/is',$html,$m);
3	for($i=0; $i<count($m[0]); $i++) {
4		if ($this->mpdf->CSSselectMedia && !preg_match('/('.trim($this->mpdf->CSSselectMedia).'|all)/i',$m[1][$i])) {
5			$html = str_replace($m[0][$i],'',$html);
6		}
7	}
8	preg_match_all('/<link[^>]*media=["\']([^"\'>]*)["\'].*?>/is',$html,$m);
9	for($i=0; $i<count($m[0]); $i++) {
10		if ($this->mpdf->CSSselectMedia && !preg_match('/('.trim($this->mpdf->CSSselectMedia).'|all)/i',$m[1][$i])) {
11			$html = str_replace($m[0][$i],'',$html);
12		}
13	}
14
15	// mPDF 5.5.02
16	// Remove Comment tags <!-- ... --> inside CSS as <style> in HTML document
17	// Remove Comment tags /* ...  */ inside CSS as <style> in HTML document
18	// But first, we replace upper and mixed case closing style tag with lower
19	// case so we can use str_replace later.
20	preg_replace('/<\/style>/i', '</style>', $html);
21	preg_match_all('/<style.*?>(.*?)<\/style>/si',$html,$m);
22	if (count($m[1])) {
23		for($i=0;$i<count($m[1]);$i++) {
24			// Remove comment tags
25			$sub = preg_replace('/(<\!\-\-|\-\->)/s',' ',$m[1][$i]);
26			$sub = '>'.preg_replace('|/\*.*?\*/|s',' ',$sub).'</style>';
27			$html = str_replace('>'.$m[1][$i].'</style>', $sub, $html);
28		}
29	}
30
31
32	$html = preg_replace('/<!--mpdf/i','',$html);
33	$html = preg_replace('/mpdf-->/i','',$html);
34	$html = preg_replace('/<\!\-\-.*?\-\->/s',' ',$html);
35
36	$match = 0; // no match for instance
37	$regexp = ''; // This helps debugging: showing what is the REAL string being processed
38	$CSSext = array();
39
40	//CSS inside external files
41	$regexp = '/<link[^>]*rel=["\']stylesheet["\'][^>]*href=["\']([^>"\']*)["\'].*?>/si';
42	$x = preg_match_all($regexp,$html,$cxt);
43	if ($x) {
44		$match += $x;
45		$CSSext = $cxt[1];
46	}
47
48	$regexp = '/<link[^>]*href=["\']([^>"\']*)["\'][^>]*?rel=["\']stylesheet["\'].*?>/si';
49	$x = preg_match_all($regexp,$html,$cxt);
50	if ($x) {
51		$match += $x;
52		$CSSext = array_merge($CSSext,$cxt[1]);
53	}
54
55	// look for @import stylesheets
56	//$regexp = '/@import url\([\'\"]{0,1}([^\)]*?\.css)[\'\"]{0,1}\)/si';
57	$regexp = '/@import url\([\'\"]{0,1}([^\)]*?\.css(\?\S+)?)[\'\"]{0,1}\)/si';
58	$x = preg_match_all($regexp,$html,$cxt);
59	if ($x) {
60		$match += $x;
61		$CSSext = array_merge($CSSext,$cxt[1]);
62	}
63
64	// look for @import without the url()
65	//$regexp = '/@import [\'\"]{0,1}([^;]*?\.css)[\'\"]{0,1}/si';
66	$regexp = '/@import [\'\"]{0,1}([^;]*?\.css(\?\S+)?)[\'\"]{0,1}/si';
67	$x = preg_match_all($regexp,$html,$cxt);
68	if ($x) {
69		$match += $x;
70		$CSSext = array_merge($CSSext,$cxt[1]);
71	}
72
73	$ind = 0;
74	$CSSstr = '';
75
76	if (!is_array($this->cascadeCSS)) $this->cascadeCSS = array();
77
78	while($match){
79		$path = $CSSext[$ind];
80		$this->mpdf->GetFullPath($path);
81		$CSSextblock = $this->mpdf->_get_file($path);
82		if ($CSSextblock) {
83			// look for embedded @import stylesheets in other stylesheets
84			// and fix url paths (including background-images) relative to stylesheet
85			//$regexpem = '/@import url\([\'\"]{0,1}(.*?\.css)[\'\"]{0,1}\)/si';
86			$regexpem = '/@import url\([\'\"]{0,1}(.*?\.css(\?\S+)?)[\'\"]{0,1}\)/si';
87			$xem = preg_match_all($regexpem,$CSSextblock,$cxtem);
88			$cssBasePath = preg_replace('/\/[^\/]*$/','',$path) . '/';
89			if ($xem) {
90				foreach($cxtem[1] AS $cxtembedded) {
91					// path is relative to original stlyesheet!!
92					$this->mpdf->GetFullPath($cxtembedded, $cssBasePath );
93					$match++;
94					$CSSext[] = $cxtembedded;
95				}
96			}
97			$regexpem = '/(background[^;]*url\s*\(\s*[\'\"]{0,1})([^\)\'\"]*)([\'\"]{0,1}\s*\))/si';
98			$xem = preg_match_all($regexpem,$CSSextblock,$cxtem);
99			if ($xem) {
100				for ($i=0;$i<count($cxtem[0]);$i++) {
101					// path is relative to original stlyesheet!!
102					$embedded = $cxtem[2][$i];
103					if (!preg_match('/^data:image/i', $embedded)) {	// mPDF 5.5.13
104						$this->mpdf->GetFullPath($embedded, $cssBasePath );
105						$CSSextblock = str_replace($cxtem[0][$i], ($cxtem[1][$i].$embedded.$cxtem[3][$i]), $CSSextblock);
106					}
107				}
108			}
109			$CSSstr .= ' '.$CSSextblock;
110		}
111		$match--;
112		$ind++;
113	} //end of match
114
115	$match = 0; // reset value, if needed
116	// CSS as <style> in HTML document
117	$regexp = '/<style.*?>(.*?)<\/style>/si';
118	$match = preg_match_all($regexp,$html,$CSSblock);
119	if ($match) {
120		$tmpCSSstr = implode(' ',$CSSblock[1]);
121		$regexpem = '/(background[^;]*url\s*\(\s*[\'\"]{0,1})([^\)\'\"]*)([\'\"]{0,1}\s*\))/si';
122		$xem = preg_match_all($regexpem,$tmpCSSstr ,$cxtem);
123		if ($xem) {
124		   for ($i=0;$i<count($cxtem[0]);$i++) {
125			$embedded = $cxtem[2][$i];
126			if (!preg_match('/^data:image/i', $embedded)) {	// mPDF 5.5.13
127				$this->mpdf->GetFullPath($embedded);
128				$tmpCSSstr = str_replace($cxtem[0][$i], ($cxtem[1][$i].$embedded.$cxtem[3][$i]), $tmpCSSstr );
129			}
130		   }
131		}
132		$CSSstr .= ' '.$tmpCSSstr;
133	}
134	// Remove comments
135	$CSSstr = preg_replace('|/\*.*?\*/|s',' ',$CSSstr);
136	$CSSstr = preg_replace('/[\s\n\r\t\f]/s',' ',$CSSstr);
137
138	if (preg_match('/@media/',$CSSstr)) {
139		preg_match_all('/@media(.*?)\{(([^\{\}]*\{[^\{\}]*\})+)\s*\}/is',$CSSstr,$m);
140		for($i=0; $i<count($m[0]); $i++) {
141			if ($this->mpdf->CSSselectMedia && !preg_match('/('.trim($this->mpdf->CSSselectMedia).'|all)/is',$m[1][$i])) {
142				$CSSstr = str_replace($m[0][$i], '', $CSSstr);
143			}
144			else {
145				$CSSstr = str_replace($m[0][$i],' '.$m[2][$i].' ',$CSSstr);
146			}
147		}
148	}
149
150	// mPDF 5.5.13
151	// Replace any background: url(data:image... with temporary image file reference
152	preg_match_all("/(url\(data:image\/(jpeg|gif|png);base64,(.*)\))/si", $CSSstr, $idata);
153	if (count($idata[0])) {
154		for($i=0;$i<count($idata[0]);$i++) {
155			$file = _MPDF_TEMP_PATH.'_tempCSSidata'.RAND(1,10000).'_'.$i.'.'.$idata[2][$i];
156			//Save to local file
157			file_put_contents($file, base64_decode($idata[3][$i]));
158			// $this->mpdf->GetFullPath($file);	// ? is this needed - NO  mPDF 5.6.03
159			$CSSstr = str_replace($idata[0][$i], 'url("'.$file.'")', $CSSstr); 	// mPDF 5.5.17
160		}
161	}
162
163	$CSSstr = preg_replace('/(<\!\-\-|\-\->)/s',' ',$CSSstr);
164	if ($CSSstr ) {
165		preg_match_all('/(.*?)\{(.*?)\}/',$CSSstr,$styles);
166		for($i=0; $i < count($styles[1]) ; $i++)  {
167			// SET array e.g. $classproperties['COLOR'] = '#ffffff';
168	 		$stylestr= trim($styles[2][$i]);
169			$stylearr = explode(';',$stylestr);
170			foreach($stylearr AS $sta) {
171				if (trim($sta)) {
172					// Changed to allow style="background: url('http://www.bpm1.com/bg.jpg')"
173					list($property,$value) = explode(':',$sta,2);
174					$property = trim($property);
175					$value = preg_replace('/\s*!important/i','',$value);
176					$value = trim($value);
177					if ($property && ($value || $value==='0')) {
178					// Ignores -webkit-gradient so doesn't override -moz-
179						if ((strtoupper($property)=='BACKGROUND-IMAGE' || strtoupper($property)=='BACKGROUND') && preg_match('/-webkit-gradient/i',$value)) {
180							continue;
181						}
182	  					$classproperties[strtoupper($property)] = $value;
183					}
184				}
185			}
186			$classproperties = $this->fixCSS($classproperties);
187			$tagstr = strtoupper(trim($styles[1][$i]));
188			$tagarr = explode(',',$tagstr);
189			$pageselectors = false;	// used to turn on $this->mpdf->mirrorMargins
190			foreach($tagarr AS $tg) {
191				$tags = preg_split('/\s+/',trim($tg));
192				$level = count($tags);
193				$t = '';
194				$t2 = '';
195				$t3 = '';
196				if (trim($tags[0])=='@PAGE') {
197					if (isset($tags[0])) { $t = trim($tags[0]); }
198					if (isset($tags[1])) { $t2 = trim($tags[1]); }
199					if (isset($tags[2])) { $t3 = trim($tags[2]); }
200					$tag = '';
201					if ($level==1) { $tag = $t; }
202					else if ($level==2 && preg_match('/^[:](.*)$/',$t2,$m)) {
203						$tag = $t.'>>PSEUDO>>'.$m[1];
204						if ($m[1]=='LEFT' || $m[1]=='RIGHT') { $pageselectors = true; }	// used to turn on $this->mpdf->mirrorMargins
205					}
206					else if ($level==2) { $tag = $t.'>>NAMED>>'.$t2; }
207					else if ($level==3 && preg_match('/^[:](.*)$/',$t3,$m)) {
208						$tag = $t.'>>NAMED>>'.$t2.'>>PSEUDO>>'.$m[1];
209						if ($m[1]=='LEFT' || $m[1]=='RIGHT') { $pageselectors = true; }	// used to turn on $this->mpdf->mirrorMargins
210					}
211					if (isset($this->CSS[$tag]) && $tag) { $this->CSS[$tag] = $this->array_merge_recursive_unique($this->CSS[$tag], $classproperties); }
212					else if ($tag) { $this->CSS[$tag] = $classproperties; }
213				}
214
215				else if ($level == 1) {		// e.g. p or .class or #id or p.class or p#id
216				if (isset($tags[0])) { $t = trim($tags[0]); }
217					if ($t) {
218						$tag = '';
219						if (preg_match('/^[.](.*)$/',$t,$m)) { $tag = 'CLASS>>'.$m[1]; }
220						else if (preg_match('/^[#](.*)$/',$t,$m)) { $tag = 'ID>>'.$m[1]; }
221						else if (preg_match('/^('.$this->mpdf->allowedCSStags.')[.](.*)$/',$t,$m)) { $tag = $m[1].'>>CLASS>>'.$m[2]; }
222						else if (preg_match('/^('.$this->mpdf->allowedCSStags.')\s*:NTH-CHILD\((.*)\)$/',$t,$m)) { $tag = $m[1].'>>SELECTORNTHCHILD>>'.$m[2]; }
223						else if (preg_match('/^('.$this->mpdf->allowedCSStags.')[#](.*)$/',$t,$m)) { $tag = $m[1].'>>ID>>'.$m[2]; }
224						else if (preg_match('/^('.$this->mpdf->allowedCSStags.')$/',$t)) { $tag= $t; }
225						if (isset($this->CSS[$tag]) && $tag) { $this->CSS[$tag] = $this->array_merge_recursive_unique($this->CSS[$tag], $classproperties); }
226						else if ($tag) { $this->CSS[$tag] = $classproperties; }
227					}
228				}
229				else {
230					$tmp = array();
231					for($n=0;$n<$level;$n++) {
232						if (isset($tags[$n])) { $t = trim($tags[$n]); }
233						else { $t = ''; }
234						if ($t) {
235							$tag = '';
236							if (preg_match('/^[.](.*)$/',$t,$m)) { $tag = 'CLASS>>'.$m[1]; }
237							else if (preg_match('/^[#](.*)$/',$t,$m)) { $tag = 'ID>>'.$m[1]; }
238							else if (preg_match('/^('.$this->mpdf->allowedCSStags.')[.](.*)$/',$t,$m)) { $tag = $m[1].'>>CLASS>>'.$m[2]; }
239							else if (preg_match('/^('.$this->mpdf->allowedCSStags.')\s*:NTH-CHILD\((.*)\)$/',$t,$m)) { $tag = $m[1].'>>SELECTORNTHCHILD>>'.$m[2]; }
240							else if (preg_match('/^('.$this->mpdf->allowedCSStags.')[#](.*)$/',$t,$m)) { $tag = $m[1].'>>ID>>'.$m[2]; }
241							else if (preg_match('/^('.$this->mpdf->allowedCSStags.')$/',$t)) { $tag= $t; }
242
243							if ($tag) $tmp[] = $tag;
244							else { break; }
245						}
246					}
247
248					if ($tag) {
249						$x = &$this->cascadeCSS;
250						foreach($tmp AS $tp) { $x = &$x[$tp]; }
251						$x = $this->array_merge_recursive_unique($x, $classproperties);
252						$x['depth'] = $level;
253					}
254				}
255			}
256			if ($pageselectors) { $this->mpdf->mirrorMargins = true; }
257  			$properties = array();
258  			$values = array();
259  			$classproperties = array();
260		}
261	} // end of if
262	//Remove CSS (tags and content), if any
263	$regexp = '/<style.*?>(.*?)<\/style>/si'; // it can be <style> or <style type="txt/css">
264	$html = preg_replace($regexp,'',$html);
265//print_r($this->CSS); exit;
266//print_r($this->cascadeCSS); exit;
267	return $html;
268}
269