1<?php 2 3namespace Sabre\Xml; 4 5use XMLReader; 6 7/** 8 * The Reader class expands upon PHP's built-in XMLReader. 9 * 10 * The intended usage, is to assign certain XML elements to PHP classes. These 11 * need to be registered using the $elementMap public property. 12 * 13 * After this is done, a single call to parse() will parse the entire document, 14 * and delegate sub-sections of the document to element classes. 15 * 16 * @copyright Copyright (C) 2009-2015 fruux GmbH (https://fruux.com/). 17 * @author Evert Pot (http://evertpot.com/) 18 * @license http://sabre.io/license/ Modified BSD License 19 */ 20class Reader extends XMLReader { 21 22 use ContextStackTrait; 23 24 /** 25 * Returns the current nodename in clark-notation. 26 * 27 * For example: "{http://www.w3.org/2005/Atom}feed". 28 * Or if no namespace is defined: "{}feed". 29 * 30 * This method returns null if we're not currently on an element. 31 * 32 * @return string|null 33 */ 34 function getClark() { 35 36 if (! $this->localName) { 37 return null; 38 } 39 40 return '{' . $this->namespaceURI . '}' . $this->localName; 41 42 } 43 44 /** 45 * Reads the entire document. 46 * 47 * This function returns an array with the following three elements: 48 * * name - The root element name. 49 * * value - The value for the root element. 50 * * attributes - An array of attributes. 51 * 52 * This function will also disable the standard libxml error handler (which 53 * usually just results in PHP errors), and throw exceptions instead. 54 * 55 * @return array 56 */ 57 function parse() { 58 59 $previousEntityState = libxml_disable_entity_loader(true); 60 $previousSetting = libxml_use_internal_errors(true); 61 62 // Really sorry about the silence operator, seems like I have no 63 // choice. See: 64 // 65 // https://bugs.php.net/bug.php?id=64230 66 while ($this->nodeType !== self::ELEMENT && @$this->read()) { 67 // noop 68 } 69 $result = $this->parseCurrentElement(); 70 71 $errors = libxml_get_errors(); 72 libxml_clear_errors(); 73 libxml_use_internal_errors($previousSetting); 74 libxml_disable_entity_loader($previousEntityState); 75 76 if ($errors) { 77 throw new LibXMLException($errors); 78 } 79 80 return $result; 81 } 82 83 84 85 /** 86 * parseGetElements parses everything in the current sub-tree, 87 * and returns a an array of elements. 88 * 89 * Each element has a 'name', 'value' and 'attributes' key. 90 * 91 * If the the element didn't contain sub-elements, an empty array is always 92 * returned. If there was any text inside the element, it will be 93 * discarded. 94 * 95 * If the $elementMap argument is specified, the existing elementMap will 96 * be overridden while parsing the tree, and restored after this process. 97 * 98 * @param array $elementMap 99 * @return array 100 */ 101 function parseGetElements(array $elementMap = null) { 102 103 $result = $this->parseInnerTree($elementMap); 104 if (!is_array($result)) { 105 return []; 106 } 107 return $result; 108 109 } 110 111 /** 112 * Parses all elements below the current element. 113 * 114 * This method will return a string if this was a text-node, or an array if 115 * there were sub-elements. 116 * 117 * If there's both text and sub-elements, the text will be discarded. 118 * 119 * If the $elementMap argument is specified, the existing elementMap will 120 * be overridden while parsing the tree, and restored after this process. 121 * 122 * @param array $elementMap 123 * @return array|string 124 */ 125 function parseInnerTree(array $elementMap = null) { 126 127 $previousDepth = $this->depth; 128 129 $text = null; 130 $elements = []; 131 $attributes = []; 132 133 if ($this->nodeType === self::ELEMENT && $this->isEmptyElement) { 134 // Easy! 135 $this->next(); 136 return null; 137 } 138 139 if (!is_null($elementMap)) { 140 $this->pushContext(); 141 $this->elementMap = $elementMap; 142 } 143 144 // Really sorry about the silence operator, seems like I have no 145 // choice. See: 146 // 147 // https://bugs.php.net/bug.php?id=64230 148 if (!@$this->read()) return false; 149 150 while (true) { 151 152 if (!$this->isValid()) { 153 154 $errors = libxml_get_errors(); 155 156 if ($errors) { 157 libxml_clear_errors(); 158 throw new LibXMLException($errors); 159 } 160 } 161 162 switch ($this->nodeType) { 163 case self::ELEMENT : 164 $elements[] = $this->parseCurrentElement(); 165 break; 166 case self::TEXT : 167 case self::CDATA : 168 $text .= $this->value; 169 $this->read(); 170 break; 171 case self::END_ELEMENT : 172 // Ensuring we are moving the cursor after the end element. 173 $this->read(); 174 break 2; 175 case self::NONE : 176 throw new ParseException('We hit the end of the document prematurely. This likely means that some parser "eats" too many elements. Do not attempt to continue parsing.'); 177 default : 178 // Advance to the next element 179 $this->read(); 180 break; 181 } 182 183 } 184 185 if (!is_null($elementMap)) { 186 $this->popContext(); 187 } 188 return ($elements ? $elements : $text); 189 190 } 191 192 /** 193 * Reads all text below the current element, and returns this as a string. 194 * 195 * @return string 196 */ 197 function readText() { 198 199 $result = ''; 200 $previousDepth = $this->depth; 201 202 while ($this->read() && $this->depth != $previousDepth) { 203 if (in_array($this->nodeType, [XMLReader::TEXT, XMLReader::CDATA, XMLReader::WHITESPACE])) { 204 $result .= $this->value; 205 } 206 } 207 return $result; 208 209 } 210 211 /** 212 * Parses the current XML element. 213 * 214 * This method returns arn array with 3 properties: 215 * * name - A clark-notation XML element name. 216 * * value - The parsed value. 217 * * attributes - A key-value list of attributes. 218 * 219 * @return array 220 */ 221 function parseCurrentElement() { 222 223 $name = $this->getClark(); 224 225 $attributes = []; 226 227 if ($this->hasAttributes) { 228 $attributes = $this->parseAttributes(); 229 } 230 231 if (array_key_exists($name, $this->elementMap)) { 232 $deserializer = $this->elementMap[$name]; 233 if (is_subclass_of($deserializer, 'Sabre\\Xml\\XmlDeserializable')) { 234 $value = call_user_func([ $deserializer, 'xmlDeserialize' ], $this); 235 } elseif (is_callable($deserializer)) { 236 $value = call_user_func($deserializer, $this); 237 } else { 238 $type = gettype($deserializer); 239 if ($type === 'string') { 240 $type .= ' (' . $deserializer . ')'; 241 } elseif ($type === 'object') { 242 $type .= ' (' . get_class($deserializer) . ')'; 243 } 244 throw new \LogicException('Could not use this type as a deserializer: ' . $type); 245 } 246 } else { 247 $value = Element\Base::xmlDeserialize($this); 248 } 249 250 return [ 251 'name' => $name, 252 'value' => $value, 253 'attributes' => $attributes, 254 ]; 255 } 256 257 /** 258 * Grabs all the attributes from the current element, and returns them as a 259 * key-value array. 260 * 261 * If the attributes are part of the same namespace, they will simply be 262 * short keys. If they are defined on a different namespace, the attribute 263 * name will be retured in clark-notation. 264 * 265 * @return void 266 */ 267 function parseAttributes() { 268 269 $attributes = []; 270 271 while ($this->moveToNextAttribute()) { 272 if ($this->namespaceURI) { 273 274 // Ignoring 'xmlns', it doesn't make any sense. 275 if ($this->namespaceURI === 'http://www.w3.org/2000/xmlns/') { 276 continue; 277 } 278 279 $name = $this->getClark(); 280 $attributes[$name] = $this->value; 281 282 } else { 283 $attributes[$this->localName] = $this->value; 284 } 285 } 286 $this->moveToElement(); 287 288 return $attributes; 289 290 } 291 292} 293