1<?php
2// $Header: /cvsroot/html2ps/css.parse.inc.php,v 1.28 2007/03/15 18:37:31 Konstantin Exp $
3
4require_once(HTML2PS_DIR.'css.rules.page.inc.php');
5require_once(HTML2PS_DIR.'css.property.collection.php');
6require_once(HTML2PS_DIR.'css.parse.properties.php');
7
8define("SELECTOR_CLASS_REGEXP","[\w\d_-]+");
9define("SELECTOR_ID_REGEXP","[\w\d_-]+");
10define("SELECTOR_ATTR_REGEXP","[\w]+");
11define("SELECTOR_ATTR_VALUE_REGEXP","([\w]+)=['\"]?([\w]+)['\"]?");
12define("SELECTOR_ATTR_VALUE_WORD_REGEXP" ,"([\w]+)~=['\"]?([\w]+)['\"]?");
13
14// Parse the 'style' attribute value of current node\
15//
16function parse_style_attr($root, &$state, &$pipeline) {
17  $style = $root->get_attribute("style");
18
19  // Some "designers" (obviously lacking the brain and ability to read ) use such constructs:
20  //
21  // <input maxLength=256 size=45 name=searchfor value="" style="{width:350px}">
22  //
23  // It is out of standard, as HTML 4.01 says:
24  //
25  // The syntax of the value of the style attribute is determined by the default style sheet language.
26  // For example, for [[CSS2]] inline style, use the declaration block syntax described in section 4.1.8
27  // *(without curly brace delimiters)*
28  //
29  // but still parsed by many browsers; let's be compatible with these idiots - remove curly braces
30  //
31  $style = preg_replace("/^\s*{/","",$style);
32  $style = preg_replace("/}\s*$/","",$style);
33
34  $properties = parse_css_properties($style, $pipeline);
35
36  $rule = new CSSRule(array(
37                            array(SELECTOR_ANY),
38                            $properties,
39                            $pipeline->get_base_url(),
40                            $root
41                            ),
42                      $pipeline);
43
44  $rule->apply($root, $state, $pipeline);
45}
46
47// TODO: make a real parser instead of if-then-else mess
48//
49// Selector grammar (according to CSS 2.1, paragraph 5.1 & 5.2):
50// Note that this particular grammar is not LL1, but still can be converter to
51// that form
52//
53// COMPOSITE_SELECTOR  ::= SELECTOR ("," SELECTOR)*
54//
55// SELECTOR            ::= SIMPLE_SELECTOR (COMBINATOR SIMPLE_SELECTOR)*
56//
57// COMBINATOR          ::= WHITESPACE* COMBINATOR_SYMBOL WHITESPACE*
58// COMBINATOR_SYMBOL   ::= " " | ">" | "+"
59//
60// SIMPLE_SELECTOR     ::= TYPE_SELECTOR (ADDITIONAL_SELECTOR)*
61// SIMPLE_SELECTOR     ::= UNIVERSAL_SELECTOR (ADDITIONAL_SELECTOR)*
62// SIMPLE_SELECTOR     ::= (ADDITIONAL_SELECTOR)*
63//
64// CSS 2.1, p. 5.3: if the universal selector is not the only component of a simple selector, the "*" may be omitted
65// SIMPLE_SELECTOR     ::= (ADDITIONAL_SELECTOR)*
66//
67// TYPE_SELECTOR       ::= TAG_NAME
68//
69// UNIVERSAL_SELECTOR  ::= "*"
70//
71// ADDITIONAL_SELECTOR ::= ATTRIBUTE_SELECTOR | ID_SELECTOR | PSEUDOCLASS | CLASS_SELECTOR | PSEUDOELEMENT
72//
73// ATTRIBUTE_SELECTOR  ::= "[" ATTRIBUTE_NAME "]"
74// ATTRIBUTE_SELECTOR  ::= "[" ATTRIBUTE_NAME "="  ATTR_VALUE "]"
75// ATTRIBUTE_SELECTOR  ::= "[" ATTRIBUTE_NAME "~=" ATTR_VALUE "]"
76// ATTRIBUTE_SELECTOR  ::= "[" ATTRIBUTE_NAME "|=" ATTR_VALUE "]"
77//
78// CLASS_SELECTOR      ::= "." CLASS_NAME
79//
80// ID_SELECTOR         ::= "#" ID_VALUE
81//
82// PSEUDOCLASS         ::= ":first-child"    |
83//                         ":link"           |
84//                         ":visited"        | // ignored in our case
85//                         ":hover"          | // dynamic - ignored in our case
86//                         ":active"         | // dynamic - ignored in our case
87//                         ":focus"          | // dynamic - ignored in our case
88//                         ":lang(" LANG ")" | // dynamic - ignored in our case
89//
90// PSEUDOELEMENT       ::= ":first-line"     |
91//                         ":first-letter"   |
92//                         ":before"         |
93//                         ":after"          |
94//
95// ATTR_VALUE          ::= IDENTIFIER | STRING
96// CLASS_NAME          ::= INDETIFIER
97// ID_VALUE            ::= IDENTIFIER
98//
99function parse_css_selector($raw_selector) {
100  // Note a 'trim' call. Is is required as there could be leading/trailing spaces in $raw_selector
101  //
102  $raw_selector = strtolower(trim($raw_selector));
103
104  // Direct Parent/child selectors (for example 'table > tr')
105  if (preg_match("/^(\S.*)\s*>\s*([^\s]+)$/", $raw_selector, $matches)) {
106    return array(SELECTOR_SEQUENCE, array(
107                                          parse_css_selector($matches[2]),
108                                          array(SELECTOR_DIRECT_PARENT,
109                                                parse_css_selector($matches[1]))));
110  }
111
112  // Parent/child selectors (for example 'table td')
113  if (preg_match("/^(\S.*)\s+([^\s]+)$/", $raw_selector, $matches)) {
114    return array(SELECTOR_SEQUENCE, array(
115                                          parse_css_selector($matches[2]),
116                                          array(SELECTOR_PARENT,
117                                                parse_css_selector($matches[1]))));
118  }
119
120  if (preg_match("/^(.+)\[(".SELECTOR_ATTR_REGEXP.")\]$/", $raw_selector, $matches)) {
121    return array(SELECTOR_SEQUENCE, array(
122                                          parse_css_selector($matches[1]),
123                                          array(SELECTOR_ATTR, $matches[2])));
124  }
125
126  if (preg_match("/^(.+)\[".SELECTOR_ATTR_VALUE_REGEXP."\]$/", $raw_selector, $matches)) {
127    return array(SELECTOR_SEQUENCE, array(
128                                          parse_css_selector($matches[1]),
129                                          array(SELECTOR_ATTR_VALUE, $matches[2], css_remove_value_quotes($matches[3]))));
130  }
131
132  if (preg_match("/^(.+)\[".SELECTOR_ATTR_VALUE_WORD_REGEXP."\]$/", $raw_selector, $matches)) {
133    return array(SELECTOR_SEQUENCE, array(
134                                          parse_css_selector($matches[1]),
135                                          array(SELECTOR_ATTR_VALUE_WORD, $matches[2], css_remove_value_quotes($matches[3]))));
136  }
137
138  // pseudoclasses & pseudoelements
139  if (preg_match("/^([#\.\s\w_-]*):(\w+)$/", $raw_selector, $matches)) {
140    if ($matches[1] === "") { $matches[1] = "*"; };
141
142    switch($matches[2]) {
143     case "lowlink":
144      return array(SELECTOR_SEQUENCE, array(parse_css_selector($matches[1]), array(SELECTOR_PSEUDOCLASS_LINK_LOW_PRIORITY)));
145     case "link":
146      return array(SELECTOR_SEQUENCE, array(parse_css_selector($matches[1]), array(SELECTOR_PSEUDOCLASS_LINK)));
147     case "before":
148      return array(SELECTOR_SEQUENCE, array(parse_css_selector($matches[1]), array(SELECTOR_PSEUDOELEMENT_BEFORE)));
149     case "after":
150      return array(SELECTOR_SEQUENCE, array(parse_css_selector($matches[1]), array(SELECTOR_PSEUDOELEMENT_AFTER)));
151    };
152  };
153
154  // :lang() pseudoclass
155  if (preg_match("/^([#\.\s\w_-]+):lang\((\w+)\)$/", $raw_selector, $matches)) {
156    return array(SELECTOR_SEQUENCE, array(parse_css_selector($matches[1]), array(SELECTOR_LANGUAGE, $matches[2])));
157  };
158
159  if (preg_match("/^(\S+)(\.\S+)$/", $raw_selector, $matches)) {
160    return array(SELECTOR_SEQUENCE, array(parse_css_selector($matches[1]), parse_css_selector($matches[2])));
161  };
162
163  switch ($raw_selector{0}) {
164  case '#':
165    return array(SELECTOR_ID,    substr($raw_selector,1));
166  case '.':
167    return array(SELECTOR_CLASS, substr($raw_selector,1));
168  };
169
170  if (preg_match("/^(\w+)#(".SELECTOR_ID_REGEXP.")$/", $raw_selector, $matches)) {
171    return array(SELECTOR_SEQUENCE, array(array(SELECTOR_ID, $matches[2]), array(SELECTOR_TAG, $matches[1])));
172  };
173
174  if ($raw_selector === "*") {
175    return array(SELECTOR_ANY);
176  };
177
178  return array(SELECTOR_TAG,$raw_selector);
179}
180
181function parse_css_selectors($raw_selectors) {
182  $offset = 0;
183  $selectors = array();
184
185  $selector_strings = explode(",",$raw_selectors);
186
187  foreach ($selector_strings as $selector_string) {
188    // See comment on SELECTOR_ANY regarding why this code is commented
189    // Remove the '* html' string from the selector
190    // $selector_string = preg_replace('/^\s*\*\s+html/','',$selector_string);
191
192    $selector_string = trim($selector_string);
193
194    // Support for non-valid CSS similar to: "selector1,selector2, {rules}"
195    // In this case we'll get three selectors; last will be empty string
196
197    if (!empty($selector_string)) {
198      $selectors[] = parse_css_selector($selector_string);
199    };
200  };
201
202  return $selectors;
203}
204
205// function &parse_css_property($property, &$pipeline) {
206//   if (preg_match("/^(.*?)\s*:\s*(.*)/",$property, $matches)) {
207//     $name = strtolower(trim($matches[1]));
208//     $code = CSS::name2code($name);
209//     if (is_null($code)) {
210//       error_log(sprintf("Unsupported CSS property: '%s'", $name));
211//       $null = null;
212//       return $null;
213//     };
214
215//     $collection =& new CSSPropertyCollection();
216//     $collection->add_property(CSSPropertyDeclaration::create($code, trim($matches[2]), $pipeline));
217//     return $collection;
218//   } elseif (preg_match("/@import\s+\"(.*)\";/",$property, $matches)) {
219//     // @import "<url>"
220//     $collection =& css_import(trim($matches[1]), $pipeline);
221//     return $collection;
222//   } elseif (preg_match("/@import\s+url\((.*)\);/",$property, $matches)) {
223//     // @import url()
224//     $collection =& css_import(trim($matches[1]), $pipeline);
225//     return $collection;
226//   } elseif (preg_match("/@import\s+(.*);/",$property, $matches)) {
227//     // @import <url>
228//     $collection =& css_import(trim($matches[1]), $pipeline);
229//     return $collection;
230//   } else {
231//     $collection =& new CSSPropertyCollection();
232//     return $collection;
233//   };
234// }
235
236?>