1<?php 2// $Header: /cvsroot/html2ps/css.parse.inc.php,v 1.28 2007/03/15 18:37:31 Konstantin Exp $ 3 4require_once(HTML2PS_DIR.'css.rules.page.inc.php'); 5require_once(HTML2PS_DIR.'css.property.collection.php'); 6require_once(HTML2PS_DIR.'css.parse.properties.php'); 7 8define("SELECTOR_CLASS_REGEXP","[\w\d_-]+"); 9define("SELECTOR_ID_REGEXP","[\w\d_-]+"); 10define("SELECTOR_ATTR_REGEXP","[\w]+"); 11define("SELECTOR_ATTR_VALUE_REGEXP","([\w]+)=['\"]?([\w]+)['\"]?"); 12define("SELECTOR_ATTR_VALUE_WORD_REGEXP" ,"([\w]+)~=['\"]?([\w]+)['\"]?"); 13 14// Parse the 'style' attribute value of current node\ 15// 16function parse_style_attr($root, &$state, &$pipeline) { 17 $style = $root->get_attribute("style"); 18 19 // Some "designers" (obviously lacking the brain and ability to read ) use such constructs: 20 // 21 // <input maxLength=256 size=45 name=searchfor value="" style="{width:350px}"> 22 // 23 // It is out of standard, as HTML 4.01 says: 24 // 25 // The syntax of the value of the style attribute is determined by the default style sheet language. 26 // For example, for [[CSS2]] inline style, use the declaration block syntax described in section 4.1.8 27 // *(without curly brace delimiters)* 28 // 29 // but still parsed by many browsers; let's be compatible with these idiots - remove curly braces 30 // 31 $style = preg_replace("/^\s*{/","",$style); 32 $style = preg_replace("/}\s*$/","",$style); 33 34 $properties = parse_css_properties($style, $pipeline); 35 36 $rule = new CSSRule(array( 37 array(SELECTOR_ANY), 38 $properties, 39 $pipeline->get_base_url(), 40 $root 41 ), 42 $pipeline); 43 44 $rule->apply($root, $state, $pipeline); 45} 46 47// TODO: make a real parser instead of if-then-else mess 48// 49// Selector grammar (according to CSS 2.1, paragraph 5.1 & 5.2): 50// Note that this particular grammar is not LL1, but still can be converter to 51// that form 52// 53// COMPOSITE_SELECTOR ::= SELECTOR ("," SELECTOR)* 54// 55// SELECTOR ::= SIMPLE_SELECTOR (COMBINATOR SIMPLE_SELECTOR)* 56// 57// COMBINATOR ::= WHITESPACE* COMBINATOR_SYMBOL WHITESPACE* 58// COMBINATOR_SYMBOL ::= " " | ">" | "+" 59// 60// SIMPLE_SELECTOR ::= TYPE_SELECTOR (ADDITIONAL_SELECTOR)* 61// SIMPLE_SELECTOR ::= UNIVERSAL_SELECTOR (ADDITIONAL_SELECTOR)* 62// SIMPLE_SELECTOR ::= (ADDITIONAL_SELECTOR)* 63// 64// CSS 2.1, p. 5.3: if the universal selector is not the only component of a simple selector, the "*" may be omitted 65// SIMPLE_SELECTOR ::= (ADDITIONAL_SELECTOR)* 66// 67// TYPE_SELECTOR ::= TAG_NAME 68// 69// UNIVERSAL_SELECTOR ::= "*" 70// 71// ADDITIONAL_SELECTOR ::= ATTRIBUTE_SELECTOR | ID_SELECTOR | PSEUDOCLASS | CLASS_SELECTOR | PSEUDOELEMENT 72// 73// ATTRIBUTE_SELECTOR ::= "[" ATTRIBUTE_NAME "]" 74// ATTRIBUTE_SELECTOR ::= "[" ATTRIBUTE_NAME "=" ATTR_VALUE "]" 75// ATTRIBUTE_SELECTOR ::= "[" ATTRIBUTE_NAME "~=" ATTR_VALUE "]" 76// ATTRIBUTE_SELECTOR ::= "[" ATTRIBUTE_NAME "|=" ATTR_VALUE "]" 77// 78// CLASS_SELECTOR ::= "." CLASS_NAME 79// 80// ID_SELECTOR ::= "#" ID_VALUE 81// 82// PSEUDOCLASS ::= ":first-child" | 83// ":link" | 84// ":visited" | // ignored in our case 85// ":hover" | // dynamic - ignored in our case 86// ":active" | // dynamic - ignored in our case 87// ":focus" | // dynamic - ignored in our case 88// ":lang(" LANG ")" | // dynamic - ignored in our case 89// 90// PSEUDOELEMENT ::= ":first-line" | 91// ":first-letter" | 92// ":before" | 93// ":after" | 94// 95// ATTR_VALUE ::= IDENTIFIER | STRING 96// CLASS_NAME ::= INDETIFIER 97// ID_VALUE ::= IDENTIFIER 98// 99function parse_css_selector($raw_selector) { 100 // Note a 'trim' call. Is is required as there could be leading/trailing spaces in $raw_selector 101 // 102 $raw_selector = strtolower(trim($raw_selector)); 103 104 // Direct Parent/child selectors (for example 'table > tr') 105 if (preg_match("/^(\S.*)\s*>\s*([^\s]+)$/", $raw_selector, $matches)) { 106 return array(SELECTOR_SEQUENCE, array( 107 parse_css_selector($matches[2]), 108 array(SELECTOR_DIRECT_PARENT, 109 parse_css_selector($matches[1])))); 110 } 111 112 // Parent/child selectors (for example 'table td') 113 if (preg_match("/^(\S.*)\s+([^\s]+)$/", $raw_selector, $matches)) { 114 return array(SELECTOR_SEQUENCE, array( 115 parse_css_selector($matches[2]), 116 array(SELECTOR_PARENT, 117 parse_css_selector($matches[1])))); 118 } 119 120 if (preg_match("/^(.+)\[(".SELECTOR_ATTR_REGEXP.")\]$/", $raw_selector, $matches)) { 121 return array(SELECTOR_SEQUENCE, array( 122 parse_css_selector($matches[1]), 123 array(SELECTOR_ATTR, $matches[2]))); 124 } 125 126 if (preg_match("/^(.+)\[".SELECTOR_ATTR_VALUE_REGEXP."\]$/", $raw_selector, $matches)) { 127 return array(SELECTOR_SEQUENCE, array( 128 parse_css_selector($matches[1]), 129 array(SELECTOR_ATTR_VALUE, $matches[2], css_remove_value_quotes($matches[3])))); 130 } 131 132 if (preg_match("/^(.+)\[".SELECTOR_ATTR_VALUE_WORD_REGEXP."\]$/", $raw_selector, $matches)) { 133 return array(SELECTOR_SEQUENCE, array( 134 parse_css_selector($matches[1]), 135 array(SELECTOR_ATTR_VALUE_WORD, $matches[2], css_remove_value_quotes($matches[3])))); 136 } 137 138 // pseudoclasses & pseudoelements 139 if (preg_match("/^([#\.\s\w_-]*):(\w+)$/", $raw_selector, $matches)) { 140 if ($matches[1] === "") { $matches[1] = "*"; }; 141 142 switch($matches[2]) { 143 case "lowlink": 144 return array(SELECTOR_SEQUENCE, array(parse_css_selector($matches[1]), array(SELECTOR_PSEUDOCLASS_LINK_LOW_PRIORITY))); 145 case "link": 146 return array(SELECTOR_SEQUENCE, array(parse_css_selector($matches[1]), array(SELECTOR_PSEUDOCLASS_LINK))); 147 case "before": 148 return array(SELECTOR_SEQUENCE, array(parse_css_selector($matches[1]), array(SELECTOR_PSEUDOELEMENT_BEFORE))); 149 case "after": 150 return array(SELECTOR_SEQUENCE, array(parse_css_selector($matches[1]), array(SELECTOR_PSEUDOELEMENT_AFTER))); 151 }; 152 }; 153 154 // :lang() pseudoclass 155 if (preg_match("/^([#\.\s\w_-]+):lang\((\w+)\)$/", $raw_selector, $matches)) { 156 return array(SELECTOR_SEQUENCE, array(parse_css_selector($matches[1]), array(SELECTOR_LANGUAGE, $matches[2]))); 157 }; 158 159 if (preg_match("/^(\S+)(\.\S+)$/", $raw_selector, $matches)) { 160 return array(SELECTOR_SEQUENCE, array(parse_css_selector($matches[1]), parse_css_selector($matches[2]))); 161 }; 162 163 switch ($raw_selector{0}) { 164 case '#': 165 return array(SELECTOR_ID, substr($raw_selector,1)); 166 case '.': 167 return array(SELECTOR_CLASS, substr($raw_selector,1)); 168 }; 169 170 if (preg_match("/^(\w+)#(".SELECTOR_ID_REGEXP.")$/", $raw_selector, $matches)) { 171 return array(SELECTOR_SEQUENCE, array(array(SELECTOR_ID, $matches[2]), array(SELECTOR_TAG, $matches[1]))); 172 }; 173 174 if ($raw_selector === "*") { 175 return array(SELECTOR_ANY); 176 }; 177 178 return array(SELECTOR_TAG,$raw_selector); 179} 180 181function parse_css_selectors($raw_selectors) { 182 $offset = 0; 183 $selectors = array(); 184 185 $selector_strings = explode(",",$raw_selectors); 186 187 foreach ($selector_strings as $selector_string) { 188 // See comment on SELECTOR_ANY regarding why this code is commented 189 // Remove the '* html' string from the selector 190 // $selector_string = preg_replace('/^\s*\*\s+html/','',$selector_string); 191 192 $selector_string = trim($selector_string); 193 194 // Support for non-valid CSS similar to: "selector1,selector2, {rules}" 195 // In this case we'll get three selectors; last will be empty string 196 197 if (!empty($selector_string)) { 198 $selectors[] = parse_css_selector($selector_string); 199 }; 200 }; 201 202 return $selectors; 203} 204 205// function &parse_css_property($property, &$pipeline) { 206// if (preg_match("/^(.*?)\s*:\s*(.*)/",$property, $matches)) { 207// $name = strtolower(trim($matches[1])); 208// $code = CSS::name2code($name); 209// if (is_null($code)) { 210// error_log(sprintf("Unsupported CSS property: '%s'", $name)); 211// $null = null; 212// return $null; 213// }; 214 215// $collection =& new CSSPropertyCollection(); 216// $collection->add_property(CSSPropertyDeclaration::create($code, trim($matches[2]), $pipeline)); 217// return $collection; 218// } elseif (preg_match("/@import\s+\"(.*)\";/",$property, $matches)) { 219// // @import "<url>" 220// $collection =& css_import(trim($matches[1]), $pipeline); 221// return $collection; 222// } elseif (preg_match("/@import\s+url\((.*)\);/",$property, $matches)) { 223// // @import url() 224// $collection =& css_import(trim($matches[1]), $pipeline); 225// return $collection; 226// } elseif (preg_match("/@import\s+(.*);/",$property, $matches)) { 227// // @import <url> 228// $collection =& css_import(trim($matches[1]), $pipeline); 229// return $collection; 230// } else { 231// $collection =& new CSSPropertyCollection(); 232// return $collection; 233// }; 234// } 235 236?>