1<?php 2 3namespace Sabre\Xml; 4 5use XMLReader; 6 7/** 8 * The Reader class expands upon PHP's built-in XMLReader. 9 * 10 * The intended usage, is to assign certain XML elements to PHP classes. These 11 * need to be registered using the $elementMap public property. 12 * 13 * After this is done, a single call to parse() will parse the entire document, 14 * and delegate sub-sections of the document to element classes. 15 * 16 * @copyright Copyright (C) 2009-2015 fruux GmbH (https://fruux.com/). 17 * @author Evert Pot (http://evertpot.com/) 18 * @license http://sabre.io/license/ Modified BSD License 19 */ 20class Reader extends XMLReader { 21 22 use ContextStackTrait; 23 24 /** 25 * Returns the current nodename in clark-notation. 26 * 27 * For example: "{http://www.w3.org/2005/Atom}feed". 28 * Or if no namespace is defined: "{}feed". 29 * 30 * This method returns null if we're not currently on an element. 31 * 32 * @return string|null 33 */ 34 function getClark() { 35 36 if (! $this->localName) { 37 return null; 38 } 39 40 return '{' . $this->namespaceURI . '}' . $this->localName; 41 42 } 43 44 /** 45 * Reads the entire document. 46 * 47 * This function returns an array with the following three elements: 48 * * name - The root element name. 49 * * value - The value for the root element. 50 * * attributes - An array of attributes. 51 * 52 * This function will also disable the standard libxml error handler (which 53 * usually just results in PHP errors), and throw exceptions instead. 54 * 55 * @return array 56 */ 57 function parse() { 58 59 $previousEntityState = libxml_disable_entity_loader(true); 60 $previousSetting = libxml_use_internal_errors(true); 61 62 try { 63 64 // Really sorry about the silence operator, seems like I have no 65 // choice. See: 66 // 67 // https://bugs.php.net/bug.php?id=64230 68 while ($this->nodeType !== self::ELEMENT && @$this->read()) { 69 // noop 70 } 71 $result = $this->parseCurrentElement(); 72 73 $errors = libxml_get_errors(); 74 libxml_clear_errors(); 75 if ($errors) { 76 throw new LibXMLException($errors); 77 } 78 79 } finally { 80 libxml_use_internal_errors($previousSetting); 81 libxml_disable_entity_loader($previousEntityState); 82 } 83 84 return $result; 85 } 86 87 88 89 /** 90 * parseGetElements parses everything in the current sub-tree, 91 * and returns a an array of elements. 92 * 93 * Each element has a 'name', 'value' and 'attributes' key. 94 * 95 * If the the element didn't contain sub-elements, an empty array is always 96 * returned. If there was any text inside the element, it will be 97 * discarded. 98 * 99 * If the $elementMap argument is specified, the existing elementMap will 100 * be overridden while parsing the tree, and restored after this process. 101 * 102 * @param array $elementMap 103 * @return array 104 */ 105 function parseGetElements(array $elementMap = null) { 106 107 $result = $this->parseInnerTree($elementMap); 108 if (!is_array($result)) { 109 return []; 110 } 111 return $result; 112 113 } 114 115 /** 116 * Parses all elements below the current element. 117 * 118 * This method will return a string if this was a text-node, or an array if 119 * there were sub-elements. 120 * 121 * If there's both text and sub-elements, the text will be discarded. 122 * 123 * If the $elementMap argument is specified, the existing elementMap will 124 * be overridden while parsing the tree, and restored after this process. 125 * 126 * @param array $elementMap 127 * @return array|string 128 */ 129 function parseInnerTree(array $elementMap = null) { 130 131 $text = null; 132 $elements = []; 133 134 if ($this->nodeType === self::ELEMENT && $this->isEmptyElement) { 135 // Easy! 136 $this->next(); 137 return null; 138 } 139 140 if (!is_null($elementMap)) { 141 $this->pushContext(); 142 $this->elementMap = $elementMap; 143 } 144 145 try { 146 147 // Really sorry about the silence operator, seems like I have no 148 // choice. See: 149 // 150 // https://bugs.php.net/bug.php?id=64230 151 if (!@$this->read()) { 152 $errors = libxml_get_errors(); 153 libxml_clear_errors(); 154 if ($errors) { 155 throw new LibXMLException($errors); 156 } 157 throw new ParseException('This should never happen (famous last words)'); 158 } 159 160 while (true) { 161 162 if (!$this->isValid()) { 163 164 $errors = libxml_get_errors(); 165 166 if ($errors) { 167 libxml_clear_errors(); 168 throw new LibXMLException($errors); 169 } 170 } 171 172 switch ($this->nodeType) { 173 case self::ELEMENT : 174 $elements[] = $this->parseCurrentElement(); 175 break; 176 case self::TEXT : 177 case self::CDATA : 178 $text .= $this->value; 179 $this->read(); 180 break; 181 case self::END_ELEMENT : 182 // Ensuring we are moving the cursor after the end element. 183 $this->read(); 184 break 2; 185 case self::NONE : 186 throw new ParseException('We hit the end of the document prematurely. This likely means that some parser "eats" too many elements. Do not attempt to continue parsing.'); 187 default : 188 // Advance to the next element 189 $this->read(); 190 break; 191 } 192 193 } 194 195 } finally { 196 197 if (!is_null($elementMap)) { 198 $this->popContext(); 199 } 200 201 } 202 return ($elements ? $elements : $text); 203 204 } 205 206 /** 207 * Reads all text below the current element, and returns this as a string. 208 * 209 * @return string 210 */ 211 function readText() { 212 213 $result = ''; 214 $previousDepth = $this->depth; 215 216 while ($this->read() && $this->depth != $previousDepth) { 217 if (in_array($this->nodeType, [XMLReader::TEXT, XMLReader::CDATA, XMLReader::WHITESPACE])) { 218 $result .= $this->value; 219 } 220 } 221 return $result; 222 223 } 224 225 /** 226 * Parses the current XML element. 227 * 228 * This method returns arn array with 3 properties: 229 * * name - A clark-notation XML element name. 230 * * value - The parsed value. 231 * * attributes - A key-value list of attributes. 232 * 233 * @return array 234 */ 235 function parseCurrentElement() { 236 237 $name = $this->getClark(); 238 239 $attributes = []; 240 241 if ($this->hasAttributes) { 242 $attributes = $this->parseAttributes(); 243 } 244 245 $value = call_user_func( 246 $this->getDeserializerForElementName($name), 247 $this 248 ); 249 250 return [ 251 'name' => $name, 252 'value' => $value, 253 'attributes' => $attributes, 254 ]; 255 } 256 257 258 /** 259 * Grabs all the attributes from the current element, and returns them as a 260 * key-value array. 261 * 262 * If the attributes are part of the same namespace, they will simply be 263 * short keys. If they are defined on a different namespace, the attribute 264 * name will be retured in clark-notation. 265 * 266 * @return array 267 */ 268 function parseAttributes() { 269 270 $attributes = []; 271 272 while ($this->moveToNextAttribute()) { 273 if ($this->namespaceURI) { 274 275 // Ignoring 'xmlns', it doesn't make any sense. 276 if ($this->namespaceURI === 'http://www.w3.org/2000/xmlns/') { 277 continue; 278 } 279 280 $name = $this->getClark(); 281 $attributes[$name] = $this->value; 282 283 } else { 284 $attributes[$this->localName] = $this->value; 285 } 286 } 287 $this->moveToElement(); 288 289 return $attributes; 290 291 } 292 293 /** 294 * Returns the function that should be used to parse the element identified 295 * by it's clark-notation name. 296 * 297 * @param string $name 298 * @return callable 299 */ 300 function getDeserializerForElementName($name) { 301 302 303 if (!array_key_exists($name, $this->elementMap)) { 304 if (substr($name, 0, 2) == '{}' && array_key_exists(substr($name, 2), $this->elementMap)) { 305 $name = substr($name, 2); 306 } else { 307 return ['Sabre\\Xml\\Element\\Base', 'xmlDeserialize']; 308 } 309 } 310 311 $deserializer = $this->elementMap[$name]; 312 if (is_subclass_of($deserializer, 'Sabre\\Xml\\XmlDeserializable')) { 313 return [$deserializer, 'xmlDeserialize']; 314 } 315 316 if (is_callable($deserializer)) { 317 return $deserializer; 318 } 319 320 $type = gettype($deserializer); 321 if ($type === 'string') { 322 $type .= ' (' . $deserializer . ')'; 323 } elseif ($type === 'object') { 324 $type .= ' (' . get_class($deserializer) . ')'; 325 } 326 throw new \LogicException('Could not use this type as a deserializer: ' . $type . ' for element: ' . $name); 327 328 } 329 330} 331