xref: /plugin/siteexport/inc/readCSS.patch (revision 7181180dce7fe3c0813ea41b412f45fd461096af)
1function ReadCSS($html) {
2	preg_match_all('/<style[^>]*media=["\']([^"\'>]*)["\'].*?<\/style>/is',$html,$m);
3	for($i=0; $i<count($m[0]); $i++) {
4		if ($this->mpdf->CSSselectMedia && !preg_match('/('.trim($this->mpdf->CSSselectMedia).'|all)/i',$m[1][$i])) {
5			$html = str_replace($m[0][$i],'',$html);
6		}
7	}
8	preg_match_all('/<link[^>]*media=["\']([^"\'>]*)["\'].*?>/is',$html,$m);
9	for($i=0; $i<count($m[0]); $i++) {
10		if ($this->mpdf->CSSselectMedia && !preg_match('/('.trim($this->mpdf->CSSselectMedia).'|all)/i',$m[1][$i])) {
11			$html = str_replace($m[0][$i],'',$html);
12		}
13	}
14
15	// mPDF 5.5.02
16	// Remove Comment tags <!-- ... --> inside CSS as <style> in HTML document
17	// Remove Comment tags /* ...  */ inside CSS as <style> in HTML document
18	// But first, we replace upper and mixed case closing style tag with lower
19	// case so we can use str_replace later.
20	preg_replace('/<\/style>/i', '</style>', $html);
21	preg_match_all('/<style.*?>(.*?)<\/style>/si',$html,$m);
22	if (count($m[1])) {
23		for($i=0;$i<count($m[1]);$i++) {
24			// Remove comment tags
25			$sub = preg_replace('/(<\!\-\-|\-\->)/s',' ',$m[1][$i]);
26			$sub = '>'.preg_replace('|/\*.*?\*/|s',' ',$sub).'</style>';
27			$html = str_replace('>'.$m[1][$i].'</style>', $sub, $html);
28		}
29	}
30
31
32	$html = preg_replace('/<!--mpdf/i','',$html);
33	$html = preg_replace('/mpdf-->/i','',$html);
34	$html = preg_replace('/<\!\-\-.*?\-\->/s',' ',$html);
35
36	$match = 0; // no match for instance
37	$regexp = ''; // This helps debugging: showing what is the REAL string being processed
38	$CSSext = array();
39
40	//CSS inside external files
41	$regexp = '/<link[^>]*rel=["\']stylesheet["\'][^>]*href=["\']([^>"\']*)["\'].*?>/si';
42	$x = preg_match_all($regexp,$html,$cxt);
43	if ($x) {
44		$match += $x;
45		$CSSext = $cxt[1];
46	}
47
48	$regexp = '/<link[^>]*href=["\']([^>"\']*)["\'][^>]*?rel=["\']stylesheet["\'].*?>/si';
49	$x = preg_match_all($regexp,$html,$cxt);
50	if ($x) {
51		$match += $x;
52		$CSSext = array_merge($CSSext,$cxt[1]);
53	}
54
55	// look for @import stylesheets
56	//$regexp = '/@import url\([\'\"]{0,1}([^\)]*?\.css)[\'\"]{0,1}\)/si';
57	$regexp = '/@import url\([\'\"]{0,1}([^\)]*?\.css(\?\S+)?)[\'\"]{0,1}\)/si';
58	$x = preg_match_all($regexp,$html,$cxt);
59	if ($x) {
60		$match += $x;
61		$CSSext = array_merge($CSSext,$cxt[1]);
62	}
63
64	// look for @import without the url()
65	//$regexp = '/@import [\'\"]{0,1}([^;]*?\.css)[\'\"]{0,1}/si';
66	$regexp = '/@import [\'\"]{0,1}([^;]*?\.css(\?\S+)?)[\'\"]{0,1}/si';
67	$x = preg_match_all($regexp,$html,$cxt);
68	if ($x) {
69		$match += $x;
70		$CSSext = array_merge($CSSext,$cxt[1]);
71	}
72
73	$ind = 0;
74	$CSSstr = '';
75
76	if (!is_array($this->cascadeCSS)) $this->cascadeCSS = array();
77
78	while($match){
79		$path = $CSSext[$ind];
80
81		$path = htmlspecialchars_decode($path);	// mPDF 6
82
83		$this->mpdf->GetFullPath($path);
84		$CSSextblock = $this->mpdf->_get_file($path);
85		if ($CSSextblock) {
86			// look for embedded @import stylesheets in other stylesheets
87			// and fix url paths (including background-images) relative to stylesheet
88			//$regexpem = '/@import url\([\'\"]{0,1}(.*?\.css)[\'\"]{0,1}\)/si';
89			$regexpem = '/@import url\([\'\"]{0,1}(.*?\.css(\?\S+)?)[\'\"]{0,1}\)/si';
90			$xem = preg_match_all($regexpem,$CSSextblock,$cxtem);
91			$cssBasePath = preg_replace('/\/[^\/]*$/','',$path) . '/';
92			if ($xem) {
93				foreach($cxtem[1] AS $cxtembedded) {
94					// path is relative to original stlyesheet!!
95					$this->mpdf->GetFullPath($cxtembedded, $cssBasePath );
96					$match++;
97					$CSSext[] = $cxtembedded;
98				}
99			}
100			$regexpem = '/(background[^;]*url\s*\(\s*[\'\"]{0,1})([^\)\'\"]*)([\'\"]{0,1}\s*\))/si';
101			$xem = preg_match_all($regexpem,$CSSextblock,$cxtem);
102			if ($xem) {
103				for ($i=0;$i<count($cxtem[0]);$i++) {
104					// path is relative to original stlyesheet!!
105					$embedded = $cxtem[2][$i];
106					if (!preg_match('/^data:image/i', $embedded)) {	// mPDF 5.5.13
107						$this->mpdf->GetFullPath($embedded, $cssBasePath );
108						$CSSextblock = str_replace($cxtem[0][$i], ($cxtem[1][$i].$embedded.$cxtem[3][$i]), $CSSextblock);
109					}
110				}
111			}
112			$CSSstr .= ' '.$CSSextblock;
113		}
114		$match--;
115		$ind++;
116	} //end of match
117
118	$match = 0; // reset value, if needed
119	// CSS as <style> in HTML document
120	$regexp = '/<style.*?>(.*?)<\/style>/si';
121	$match = preg_match_all($regexp,$html,$CSSblock);
122	if ($match) {
123		$tmpCSSstr = implode(' ',$CSSblock[1]);
124		$regexpem = '/(background[^;]*url\s*\(\s*[\'\"]{0,1})([^\)\'\"]*)([\'\"]{0,1}\s*\))/si';
125		$xem = preg_match_all($regexpem,$tmpCSSstr ,$cxtem);
126		if ($xem) {
127		   for ($i=0;$i<count($cxtem[0]);$i++) {
128			$embedded = $cxtem[2][$i];
129			if (!preg_match('/^data:image/i', $embedded)) {	// mPDF 5.5.13
130				$this->mpdf->GetFullPath($embedded);
131				$tmpCSSstr = str_replace($cxtem[0][$i], ($cxtem[1][$i].$embedded.$cxtem[3][$i]), $tmpCSSstr );
132			}
133		   }
134		}
135		$CSSstr .= ' '.$tmpCSSstr;
136	}
137	// Remove comments
138	$CSSstr = preg_replace('|/\*.*?\*/|s',' ',$CSSstr);
139	$CSSstr = preg_replace('/[\s\n\r\t\f]/s',' ',$CSSstr);
140
141	if (preg_match('/@media/',$CSSstr)) {
142		preg_match_all('/@media(.*?)\{(([^\{\}]*\{[^\{\}]*\})+)\s*\}/is',$CSSstr,$m);
143		for($i=0; $i<count($m[0]); $i++) {
144			if ($this->mpdf->CSSselectMedia && !preg_match('/('.trim($this->mpdf->CSSselectMedia).'|all)/is',$m[1][$i])) {
145				$CSSstr = str_replace($m[0][$i], '', $CSSstr);
146			}
147			else {
148				$CSSstr = str_replace($m[0][$i],' '.$m[2][$i].' ',$CSSstr);
149			}
150		}
151	}
152
153	// mPDF 5.5.13
154	// Replace any background: url(data:image... with temporary image file reference
155	preg_match_all("/(url\(data:image\/(jpeg|gif|png);base64,(.*?)\))/si", $CSSstr, $idata);	// mPDF 5.7.2
156	if (count($idata[0])) {
157		for($i=0;$i<count($idata[0]);$i++) {
158			$file = _MPDF_TEMP_PATH.'_tempCSSidata'.RAND(1,10000).'_'.$i.'.'.$idata[2][$i];
159			//Save to local file
160			file_put_contents($file, base64_decode($idata[3][$i]));
161			// $this->mpdf->GetFullPath($file);	// ? is this needed - NO  mPDF 5.6.03
162			$CSSstr = str_replace($idata[0][$i], 'url("'.$file.'")', $CSSstr); 	// mPDF 5.5.17
163		}
164	}
165
166	$CSSstr = preg_replace('/(<\!\-\-|\-\->)/s',' ',$CSSstr);
167
168	// mPDF 5.7.4 URLs
169	// Characters "(" ")" and ";" in url() e.g. background-image, cause problems parsing the CSS string
170	// URLencode ( and ), but change ";" to a code which can be converted back after parsing (so as not to confuse ;
171	// with a segment delimiter in the URI)
172	$tempmarker = '%ZZ';
173	if (strpos($CSSstr,'url(')!==false) {
174		preg_match_all( '/url\(\"(.*?)\"\)/', $CSSstr, $m);
175		for($i = 0; $i < count($m[1]) ; $i++) {
176			$tmp = str_replace(array('(',')',';'),array('%28','%29',$tempmarker),$m[1][$i]);
177			$CSSstr = preg_replace('/'.preg_quote($m[0][$i],'/').'/', 'url(\''.$tmp.'\')', $CSSstr);
178		}
179		preg_match_all( '/url\(\'(.*?)\'\)/', $CSSstr, $m);
180		for($i = 0; $i < count($m[1]) ; $i++) {
181			$tmp = str_replace(array('(',')',';'),array('%28','%29',$tempmarker),$m[1][$i]);
182			$CSSstr = preg_replace('/'.preg_quote($m[0][$i],'/').'/', 'url(\''.$tmp.'\')', $CSSstr);
183		}
184		preg_match_all( '/url\(([^\'\"].*?[^\'\"])\)/', $CSSstr, $m);
185		for($i = 0; $i < count($m[1]) ; $i++) {
186			$tmp = str_replace(array('(',')',';'),array('%28','%29',$tempmarker),$m[1][$i]);
187			$CSSstr = preg_replace('/'.preg_quote($m[0][$i],'/').'/', 'url(\''.$tmp.'\')', $CSSstr);
188		}
189	}
190
191
192
193	if ($CSSstr ) {
194		$classproperties = array();	// mPDF 6
195		preg_match_all('/(.*?)\{(.*?)\}/',$CSSstr,$styles);
196		for($i=0; $i < count($styles[1]) ; $i++)  {
197			// SET array e.g. $classproperties['COLOR'] = '#ffffff';
198	 		$stylestr= trim($styles[2][$i]);
199			$stylearr = explode(';',$stylestr);
200			foreach($stylearr AS $sta) {
201				if (trim($sta)) {
202					// Changed to allow style="background: url('http://www.bpm1.com/bg.jpg')"
203					$tmp = explode(':',$sta,2);
204					$property = $tmp[0];
205					if (isset($tmp[1])) { $value = $tmp[1]; }
206					else { $value = ''; }
207					$value = str_replace($tempmarker,';',$value);	// mPDF 5.7.4 URLs
208					$property = trim($property);
209					$value = preg_replace('/\s*!important/i','',$value);
210					$value = trim($value);
211					if ($property && ($value || $value==='0')) {
212					// Ignores -webkit-gradient so doesn't override -moz-
213						if ((strtoupper($property)=='BACKGROUND-IMAGE' || strtoupper($property)=='BACKGROUND') && preg_match('/-webkit-gradient/i',$value)) {
214							continue;
215						}
216	  					$classproperties[strtoupper($property)] = $value;
217					}
218				}
219			}
220			$classproperties = $this->fixCSS($classproperties);
221			$tagstr = strtoupper(trim($styles[1][$i]));
222			$tagarr = explode(',',$tagstr);
223			$pageselectors = false;	// used to turn on $this->mpdf->mirrorMargins
224			foreach($tagarr AS $tg) {
225				// mPDF 5.7.4
226				if (preg_match('/NTH-CHILD\((\s*(([\-+]?\d*)N(\s*[\-+]\s*\d+)?|[\-+]?\d+|ODD|EVEN)\s*)\)/',$tg,$m) ) {
227					$tg = preg_replace('/NTH-CHILD\(.*\)/', 'NTH-CHILD('.str_replace(' ','',$m[1]).')', $tg);
228				}
229				$tags = preg_split('/\s+/',trim($tg));
230				$level = count($tags);
231				$t = '';
232				$t2 = '';
233				$t3 = '';
234				if (trim($tags[0])=='@PAGE') {
235					if (isset($tags[0])) { $t = trim($tags[0]); }
236					if (isset($tags[1])) { $t2 = trim($tags[1]); }
237					if (isset($tags[2])) { $t3 = trim($tags[2]); }
238					$tag = '';
239					if ($level==1) { $tag = $t; }
240					else if ($level==2 && preg_match('/^[:](.*)$/',$t2,$m)) {
241						$tag = $t.'>>PSEUDO>>'.$m[1];
242						if ($m[1]=='LEFT' || $m[1]=='RIGHT') { $pageselectors = true; }	// used to turn on $this->mpdf->mirrorMargins
243					}
244					else if ($level==2) { $tag = $t.'>>NAMED>>'.$t2; }
245					else if ($level==3 && preg_match('/^[:](.*)$/',$t3,$m)) {
246						$tag = $t.'>>NAMED>>'.$t2.'>>PSEUDO>>'.$m[1];
247						if ($m[1]=='LEFT' || $m[1]=='RIGHT') { $pageselectors = true; }	// used to turn on $this->mpdf->mirrorMargins
248					}
249					if (isset($this->CSS[$tag]) && $tag) { $this->CSS[$tag] = $this->array_merge_recursive_unique($this->CSS[$tag], $classproperties); }
250					else if ($tag) { $this->CSS[$tag] = $classproperties; }
251				}
252
253				else if ($level == 1) {		// e.g. p or .class or #id or p.class or p#id
254				if (isset($tags[0])) { $t = trim($tags[0]); }
255					if ($t) {
256						$tag = '';
257						if (preg_match('/^[.](.*)$/',$t,$m)) { $tag = 'CLASS>>'.$m[1]; }
258						else if (preg_match('/^[#](.*)$/',$t,$m)) { $tag = 'ID>>'.$m[1]; }
259						else if (preg_match('/^\[LANG=[\'\"]{0,1}([A-Z\-]{2,11})[\'\"]{0,1}\]$/',$t,$m)) { $tag = 'LANG>>'.strtolower($m[1]); }	// mPDF 6  Special case for lang as attribute selector
260						else if (preg_match('/^:LANG\([\'\"]{0,1}([A-Z\-]{2,11})[\'\"]{0,1}\)$/',$t,$m)) { $tag = 'LANG>>'.strtolower($m[1]); }	// mPDF 6  Special case for lang as attribute selector
261						else if (preg_match('/^('.$this->mpdf->allowedCSStags.')[.](.*)$/',$t,$m)) { $tag = $m[1].'>>CLASS>>'.$m[2]; }
262						else if (preg_match('/^('.$this->mpdf->allowedCSStags.')\s*:NTH-CHILD\((.*)\)$/',$t,$m)) { $tag = $m[1].'>>SELECTORNTHCHILD>>'.$m[2]; }
263						else if (preg_match('/^('.$this->mpdf->allowedCSStags.')[#](.*)$/',$t,$m)) { $tag = $m[1].'>>ID>>'.$m[2]; }
264						else if (preg_match('/^('.$this->mpdf->allowedCSStags.')\[LANG=[\'\"]{0,1}([A-Z\-]{2,11})[\'\"]{0,1}\]$/',$t,$m)) { $tag = $m[1].'>>LANG>>'.strtolower($m[2]); }	// mPDF 6  Special case for lang as attribute selector
265						else if (preg_match('/^('.$this->mpdf->allowedCSStags.'):LANG\([\'\"]{0,1}([A-Z\-]{2,11})[\'\"]{0,1}\)$/',$t,$m)) { $tag = $m[1].'>>LANG>>'.strtolower($m[2]); }	// mPDF 6  Special case for lang as attribute selector
266						else if (preg_match('/^('.$this->mpdf->allowedCSStags.')$/',$t)) { $tag= $t; }
267						if (isset($this->CSS[$tag]) && $tag) { $this->CSS[$tag] = $this->array_merge_recursive_unique($this->CSS[$tag], $classproperties); }
268						else if ($tag) { $this->CSS[$tag] = $classproperties; }
269					}
270				}
271				else {
272					$tmp = array();
273					for($n=0;$n<$level;$n++) {
274						if (isset($tags[$n])) { $t = trim($tags[$n]); }
275						else { $t = ''; }
276						if ($t) {
277							$tag = '';
278							if (preg_match('/^[.](.*)$/',$t,$m)) { $tag = 'CLASS>>'.$m[1]; }
279							else if (preg_match('/^[#](.*)$/',$t,$m)) { $tag = 'ID>>'.$m[1]; }
280							else if (preg_match('/^\[LANG=[\'\"]{0,1}([A-Z\-]{2,11})[\'\"]{0,1}\]$/',$t,$m)) { $tag = 'LANG>>'.strtolower($m[1]); }	// mPDF 6  Special case for lang as attribute selector
281							else if (preg_match('/^:LANG\([\'\"]{0,1}([A-Z\-]{2,11})[\'\"]{0,1}\)$/',$t,$m)) { $tag = 'LANG>>'.strtolower($m[1]); }	// mPDF 6  Special case for lang as attribute selector
282							else if (preg_match('/^('.$this->mpdf->allowedCSStags.')[.](.*)$/',$t,$m)) { $tag = $m[1].'>>CLASS>>'.$m[2]; }
283							else if (preg_match('/^('.$this->mpdf->allowedCSStags.')\s*:NTH-CHILD\((.*)\)$/',$t,$m)) { $tag = $m[1].'>>SELECTORNTHCHILD>>'.$m[2]; }
284							else if (preg_match('/^('.$this->mpdf->allowedCSStags.')[#](.*)$/',$t,$m)) { $tag = $m[1].'>>ID>>'.$m[2]; }
285							else if (preg_match('/^('.$this->mpdf->allowedCSStags.')\[LANG=[\'\"]{0,1}([A-Z\-]{2,11})[\'\"]{0,1}\]$/',$t,$m)) { $tag = $m[1].'>>LANG>>'.strtolower($m[2]); }	// mPDF 6  Special case for lang as attribute selector
286							else if (preg_match('/^('.$this->mpdf->allowedCSStags.'):LANG\([\'\"]{0,1}([A-Z\-]{2,11})[\'\"]{0,1}\)$/',$t,$m)) { $tag = $m[1].'>>LANG>>'.strtolower($m[2]); }	// mPDF 6  Special case for lang as attribute selector
287							else if (preg_match('/^('.$this->mpdf->allowedCSStags.')$/',$t)) { $tag= $t; }
288
289							if ($tag) $tmp[] = $tag;
290							else { break; }
291						}
292					}
293
294					if ($tag) {
295						$x = &$this->cascadeCSS;
296						foreach($tmp AS $tp) { $x = &$x[$tp]; }
297						$x = $this->array_merge_recursive_unique($x, $classproperties);
298						$x['depth'] = $level;
299					}
300				}
301			}
302			if ($pageselectors) { $this->mpdf->mirrorMargins = true; }
303  			$properties = array();
304  			$values = array();
305  			$classproperties = array();
306		}
307	} // end of if
308	//Remove CSS (tags and content), if any
309	$regexp = '/<style.*?>(.*?)<\/style>/si'; // it can be <style> or <style type="txt/css">
310	$html = preg_replace($regexp,'',$html);
311//print_r($this->CSS); exit;
312//print_r($this->cascadeCSS); exit;
313	return $html;
314}
315