1<?php
2// $Header: /cvsroot/html2ps/css.selectors.inc.php,v 1.12 2006/01/07 19:38:06 Konstantin Exp $
3
4define('SELECTOR_ID'   ,1);
5define('SELECTOR_CLASS',2);
6define('SELECTOR_TAG'  ,3);
7define('SELECTOR_TAG_CLASS',4);
8define('SELECTOR_SEQUENCE', 5);
9define('SELECTOR_PARENT', 6);         // TAG1 TAG2
10define('SELECTOR_ATTR_VALUE', 7);
11define('SELECTOR_PSEUDOCLASS_LINK', 8);
12define('SELECTOR_ATTR', 9);
13define('SELECTOR_DIRECT_PARENT', 10); // TAG1 > TAG2
14define('SELECTOR_LANGUAGE', 11);      // SELECTOR:lang(..)
15
16// Used for handling the body 'link' atttribute; this selector have no specificity at all
17// we need to introduce this selector type as some ill-brained designers use constructs like:
18//
19// <html>
20// <head><style type="text/css">a { color: red; }</style></head>
21// <body link="#000000"><a href="test">test</a>
22//
23// in this case the CSS rule should have the higher priority; nevertheless, using the default selector rules
24// we'd get find that 'link'-generated CSS rule is more important
25//
26define('SELECTOR_PSEUDOCLASS_LINK_LOW_PRIORITY', 12);
27
28// Used for hanling the following case:
29//
30// <head>
31// <style>img { border: 0; }</style>
32// </head>
33// <body><a href=""><img height="10" width="10" src=""></a>
34//
35define('SELECTOR_PARENT_LOW_PRIORITY', 13);
36
37define('SELECTOR_PSEUDOELEMENT_BEFORE', 14);
38define('SELECTOR_PSEUDOELEMENT_AFTER', 15);
39
40// Note on SELECTOR_ANY:
41// normally we should not process rules like
42// * html <some other selector> as they're IE specific and (according to CSS standard)
43// should be never matched
44define('SELECTOR_ANY', 16);
45
46define('SELECTOR_ATTR_VALUE_WORD',17);
47
48// CSS 2.1:
49// In CSS2, identifiers  (including element names, classes, and IDs in selectors) can contain only the characters [A-Za-z0-9] and
50// ISO 10646 characters 161 and higher, plus the hyphen (-); they cannot start with a hyphen or a digit.
51// They can also contain escaped characters and any ISO 10646 character as a numeric code (see next item). For instance,
52// the identifier "B&W?" may be written as "B\&W\?" or "B\26 W\3F".
53//
54// Any node can be marked by several space separated class names
55//
56function node_have_class($root, $target_class) {
57  if (!$root->has_attribute('class')) { return false; };
58
59  $classes = preg_split("/\s+/", strtolower($root->get_attribute('class')));
60
61  foreach ($classes as $class) {
62    if ($class == $target_class) {
63      return true;
64    };
65  };
66
67  return false;
68};
69
70function match_selector($selector, $root) {
71  switch ($selector[0]) {
72  case SELECTOR_TAG:
73    if ($selector[1] == strtolower($root->tagname())) { return true; };
74    break;
75  case SELECTOR_ID:
76    if ($selector[1] == strtolower($root->get_attribute('id'))) { return true; };
77    break;
78  case SELECTOR_CLASS:
79    if (node_have_class($root, $selector[1])) { return true; }
80    if ($selector[1] == strtolower($root->get_attribute('class'))) { return true; };
81    break;
82  case SELECTOR_TAG_CLASS:
83    if ((node_have_class($root, $selector[2])) &&
84        ($selector[1] == strtolower($root->tagname()))) { return true; };
85    break;
86  case SELECTOR_SEQUENCE:
87    foreach ($selector[1] as $subselector) {
88      if (!match_selector($subselector, $root)) { return false; };
89    };
90    return true;
91  case SELECTOR_PARENT:
92  case SELECTOR_PARENT_LOW_PRIORITY:
93    $node = $root->parent();
94
95    while ($node && $node->node_type() == XML_ELEMENT_NODE) {
96      if (match_selector($selector[1], $node)) { return true; };
97      $node = $node->parent();
98    };
99    return false;
100  case SELECTOR_DIRECT_PARENT:
101    $node = $root->parent();
102    if ($node && $node->node_type() == XML_ELEMENT_NODE) {
103      if (match_selector($selector[1], $node)) { return true; };
104    };
105    return false;
106  case SELECTOR_ATTR:
107    $attr_name = $selector[1];
108    return $root->has_attribute($attr_name);
109  case SELECTOR_ATTR_VALUE:
110    // Note that CSS 2.1 standard does not says strictly if attribute case
111    // is significiant:
112    // """
113    // Attribute values must be identifiers or strings. The case-sensitivity of attribute names and
114    // values in selectors depends on the document language.
115    // """
116    // As we've met several problems with pages having INPUT type attributes in upper (or ewen worse - mixed!)
117    // case, the following decision have been accepted: attribute values should not be case-sensitive
118
119    $attr_name  = $selector[1];
120    $attr_value = $selector[2];
121
122    if (!$root->has_attribute($attr_name)) {
123      return false;
124    };
125    return strtolower($root->get_attribute($attr_name)) == strtolower($attr_value);
126  case SELECTOR_ATTR_VALUE_WORD:
127    // Note that CSS 2.1 standard does not says strictly if attribute case
128    // is significiant:
129    // """
130    // Attribute values must be identifiers or strings. The case-sensitivity of attribute names and
131    // values in selectors depends on the document language.
132    // """
133    // As we've met several problems with pages having INPUT type attributes in upper (or ewen worse - mixed!)
134    // case, the following decision have been accepted: attribute values should not be case-sensitive
135
136    $attr_name  = $selector[1];
137    $attr_value = $selector[2];
138
139    if (!$root->has_attribute($attr_name)) {
140      return false;
141    };
142
143    $words = preg_split("/\s+/",$root->get_attribute($attr_name));
144    foreach ($words as $word) {
145      if (strtolower($word) == strtolower($attr_value)) { return true; };
146    };
147    return false;
148  case SELECTOR_PSEUDOCLASS_LINK:
149    return $root->tagname() == "a" && $root->has_attribute('href');
150  case SELECTOR_PSEUDOCLASS_LINK_LOW_PRIORITY:
151    return $root->tagname() == "a" && $root->has_attribute('href');
152
153    // Note that :before and :after always match
154  case SELECTOR_PSEUDOELEMENT_BEFORE:
155    return true;
156  case SELECTOR_PSEUDOELEMENT_AFTER:
157    return true;
158
159  case SELECTOR_LANGUAGE:
160    // FIXME: determine the document language
161    return true;
162
163  case SELECTOR_ANY:
164    return true;
165  };
166  return false;
167}
168
169function css_selector_specificity($selector) {
170  switch ($selector[0]) {
171  case SELECTOR_ID:
172    return array(1,0,0);
173  case SELECTOR_CLASS:
174    return array(0,1,0);
175  case SELECTOR_TAG:
176    return array(0,0,1);
177  case SELECTOR_TAG_CLASS:
178    return array(0,1,1);
179  case SELECTOR_SEQUENCE:
180    $specificity = array(0,0,0);
181    foreach ($selector[1] as $subselector) {
182      $s = css_selector_specificity($subselector);
183      $specificity = array($specificity[0]+$s[0],
184                           $specificity[1]+$s[1],
185                           $specificity[2]+$s[2]);
186    }
187    return $specificity;
188  case SELECTOR_PARENT:
189    return css_selector_specificity($selector[1]);
190  case SELECTOR_PARENT_LOW_PRIORITY:
191    return array(-1,-1,-1);
192  case SELECTOR_DIRECT_PARENT:
193    return css_selector_specificity($selector[1]);
194  case SELECTOR_ATTR:
195    return array(0,1,0);
196  case SELECTOR_ATTR_VALUE:
197    return array(0,1,0);
198  case SELECTOR_ATTR_VALUE_WORD:
199    return array(0,1,0);
200  case SELECTOR_PSEUDOCLASS_LINK:
201    return array(0,1,0);
202  case SELECTOR_PSEUDOCLASS_LINK_LOW_PRIORITY:
203    return array(0,0,0);
204  case SELECTOR_PSEUDOELEMENT_BEFORE:
205    return array(0,0,0);
206  case SELECTOR_PSEUDOELEMENT_AFTER:
207    return array(0,0,0);
208  case SELECTOR_LANGUAGE:
209    return array(0,1,0);
210  case SELECTOR_ANY:
211    return array(0,1,0);
212  default:
213    die("Bad selector while calculating selector specificity:".$selector[0]);
214  }
215}
216
217// Just an abstraction wrapper for determining the selector type
218// from the selector-describing structure
219//
220function selector_get_type($selector) {
221  return $selector[0];
222};
223
224?>