1*a1a3b679SAndreas Boehler<?php 2*a1a3b679SAndreas Boehler 3*a1a3b679SAndreas Boehlernamespace Sabre\Xml; 4*a1a3b679SAndreas Boehler 5*a1a3b679SAndreas Boehleruse XMLReader; 6*a1a3b679SAndreas Boehler 7*a1a3b679SAndreas Boehler/** 8*a1a3b679SAndreas Boehler * The Reader class expands upon PHP's built-in XMLReader. 9*a1a3b679SAndreas Boehler * 10*a1a3b679SAndreas Boehler * The intended usage, is to assign certain XML elements to PHP classes. These 11*a1a3b679SAndreas Boehler * need to be registered using the $elementMap public property. 12*a1a3b679SAndreas Boehler * 13*a1a3b679SAndreas Boehler * After this is done, a single call to parse() will parse the entire document, 14*a1a3b679SAndreas Boehler * and delegate sub-sections of the document to element classes. 15*a1a3b679SAndreas Boehler * 16*a1a3b679SAndreas Boehler * @copyright Copyright (C) 2009-2015 fruux GmbH (https://fruux.com/). 17*a1a3b679SAndreas Boehler * @author Evert Pot (http://evertpot.com/) 18*a1a3b679SAndreas Boehler * @license http://sabre.io/license/ Modified BSD License 19*a1a3b679SAndreas Boehler */ 20*a1a3b679SAndreas Boehlerclass Reader extends XMLReader { 21*a1a3b679SAndreas Boehler 22*a1a3b679SAndreas Boehler use ContextStackTrait; 23*a1a3b679SAndreas Boehler 24*a1a3b679SAndreas Boehler /** 25*a1a3b679SAndreas Boehler * Returns the current nodename in clark-notation. 26*a1a3b679SAndreas Boehler * 27*a1a3b679SAndreas Boehler * For example: "{http://www.w3.org/2005/Atom}feed". 28*a1a3b679SAndreas Boehler * Or if no namespace is defined: "{}feed". 29*a1a3b679SAndreas Boehler * 30*a1a3b679SAndreas Boehler * This method returns null if we're not currently on an element. 31*a1a3b679SAndreas Boehler * 32*a1a3b679SAndreas Boehler * @return string|null 33*a1a3b679SAndreas Boehler */ 34*a1a3b679SAndreas Boehler function getClark() { 35*a1a3b679SAndreas Boehler 36*a1a3b679SAndreas Boehler if (! $this->localName) { 37*a1a3b679SAndreas Boehler return null; 38*a1a3b679SAndreas Boehler } 39*a1a3b679SAndreas Boehler 40*a1a3b679SAndreas Boehler return '{' . $this->namespaceURI . '}' . $this->localName; 41*a1a3b679SAndreas Boehler 42*a1a3b679SAndreas Boehler } 43*a1a3b679SAndreas Boehler 44*a1a3b679SAndreas Boehler /** 45*a1a3b679SAndreas Boehler * Reads the entire document. 46*a1a3b679SAndreas Boehler * 47*a1a3b679SAndreas Boehler * This function returns an array with the following three elements: 48*a1a3b679SAndreas Boehler * * name - The root element name. 49*a1a3b679SAndreas Boehler * * value - The value for the root element. 50*a1a3b679SAndreas Boehler * * attributes - An array of attributes. 51*a1a3b679SAndreas Boehler * 52*a1a3b679SAndreas Boehler * This function will also disable the standard libxml error handler (which 53*a1a3b679SAndreas Boehler * usually just results in PHP errors), and throw exceptions instead. 54*a1a3b679SAndreas Boehler * 55*a1a3b679SAndreas Boehler * @return array 56*a1a3b679SAndreas Boehler */ 57*a1a3b679SAndreas Boehler function parse() { 58*a1a3b679SAndreas Boehler 59*a1a3b679SAndreas Boehler $previousEntityState = libxml_disable_entity_loader(true); 60*a1a3b679SAndreas Boehler $previousSetting = libxml_use_internal_errors(true); 61*a1a3b679SAndreas Boehler 62*a1a3b679SAndreas Boehler // Really sorry about the silence operator, seems like I have no 63*a1a3b679SAndreas Boehler // choice. See: 64*a1a3b679SAndreas Boehler // 65*a1a3b679SAndreas Boehler // https://bugs.php.net/bug.php?id=64230 66*a1a3b679SAndreas Boehler while ($this->nodeType !== self::ELEMENT && @$this->read()) { 67*a1a3b679SAndreas Boehler // noop 68*a1a3b679SAndreas Boehler } 69*a1a3b679SAndreas Boehler $result = $this->parseCurrentElement(); 70*a1a3b679SAndreas Boehler 71*a1a3b679SAndreas Boehler $errors = libxml_get_errors(); 72*a1a3b679SAndreas Boehler libxml_clear_errors(); 73*a1a3b679SAndreas Boehler libxml_use_internal_errors($previousSetting); 74*a1a3b679SAndreas Boehler libxml_disable_entity_loader($previousEntityState); 75*a1a3b679SAndreas Boehler 76*a1a3b679SAndreas Boehler if ($errors) { 77*a1a3b679SAndreas Boehler throw new LibXMLException($errors); 78*a1a3b679SAndreas Boehler } 79*a1a3b679SAndreas Boehler 80*a1a3b679SAndreas Boehler return $result; 81*a1a3b679SAndreas Boehler } 82*a1a3b679SAndreas Boehler 83*a1a3b679SAndreas Boehler 84*a1a3b679SAndreas Boehler 85*a1a3b679SAndreas Boehler /** 86*a1a3b679SAndreas Boehler * parseGetElements parses everything in the current sub-tree, 87*a1a3b679SAndreas Boehler * and returns a an array of elements. 88*a1a3b679SAndreas Boehler * 89*a1a3b679SAndreas Boehler * Each element has a 'name', 'value' and 'attributes' key. 90*a1a3b679SAndreas Boehler * 91*a1a3b679SAndreas Boehler * If the the element didn't contain sub-elements, an empty array is always 92*a1a3b679SAndreas Boehler * returned. If there was any text inside the element, it will be 93*a1a3b679SAndreas Boehler * discarded. 94*a1a3b679SAndreas Boehler * 95*a1a3b679SAndreas Boehler * If the $elementMap argument is specified, the existing elementMap will 96*a1a3b679SAndreas Boehler * be overridden while parsing the tree, and restored after this process. 97*a1a3b679SAndreas Boehler * 98*a1a3b679SAndreas Boehler * @param array $elementMap 99*a1a3b679SAndreas Boehler * @return array 100*a1a3b679SAndreas Boehler */ 101*a1a3b679SAndreas Boehler function parseGetElements(array $elementMap = null) { 102*a1a3b679SAndreas Boehler 103*a1a3b679SAndreas Boehler $result = $this->parseInnerTree($elementMap); 104*a1a3b679SAndreas Boehler if (!is_array($result)) { 105*a1a3b679SAndreas Boehler return []; 106*a1a3b679SAndreas Boehler } 107*a1a3b679SAndreas Boehler return $result; 108*a1a3b679SAndreas Boehler 109*a1a3b679SAndreas Boehler } 110*a1a3b679SAndreas Boehler 111*a1a3b679SAndreas Boehler /** 112*a1a3b679SAndreas Boehler * Parses all elements below the current element. 113*a1a3b679SAndreas Boehler * 114*a1a3b679SAndreas Boehler * This method will return a string if this was a text-node, or an array if 115*a1a3b679SAndreas Boehler * there were sub-elements. 116*a1a3b679SAndreas Boehler * 117*a1a3b679SAndreas Boehler * If there's both text and sub-elements, the text will be discarded. 118*a1a3b679SAndreas Boehler * 119*a1a3b679SAndreas Boehler * If the $elementMap argument is specified, the existing elementMap will 120*a1a3b679SAndreas Boehler * be overridden while parsing the tree, and restored after this process. 121*a1a3b679SAndreas Boehler * 122*a1a3b679SAndreas Boehler * @param array $elementMap 123*a1a3b679SAndreas Boehler * @return array|string 124*a1a3b679SAndreas Boehler */ 125*a1a3b679SAndreas Boehler function parseInnerTree(array $elementMap = null) { 126*a1a3b679SAndreas Boehler 127*a1a3b679SAndreas Boehler $previousDepth = $this->depth; 128*a1a3b679SAndreas Boehler 129*a1a3b679SAndreas Boehler $text = null; 130*a1a3b679SAndreas Boehler $elements = []; 131*a1a3b679SAndreas Boehler $attributes = []; 132*a1a3b679SAndreas Boehler 133*a1a3b679SAndreas Boehler if ($this->nodeType === self::ELEMENT && $this->isEmptyElement) { 134*a1a3b679SAndreas Boehler // Easy! 135*a1a3b679SAndreas Boehler $this->next(); 136*a1a3b679SAndreas Boehler return null; 137*a1a3b679SAndreas Boehler } 138*a1a3b679SAndreas Boehler 139*a1a3b679SAndreas Boehler if (!is_null($elementMap)) { 140*a1a3b679SAndreas Boehler $this->pushContext(); 141*a1a3b679SAndreas Boehler $this->elementMap = $elementMap; 142*a1a3b679SAndreas Boehler } 143*a1a3b679SAndreas Boehler 144*a1a3b679SAndreas Boehler // Really sorry about the silence operator, seems like I have no 145*a1a3b679SAndreas Boehler // choice. See: 146*a1a3b679SAndreas Boehler // 147*a1a3b679SAndreas Boehler // https://bugs.php.net/bug.php?id=64230 148*a1a3b679SAndreas Boehler if (!@$this->read()) return false; 149*a1a3b679SAndreas Boehler 150*a1a3b679SAndreas Boehler while (true) { 151*a1a3b679SAndreas Boehler 152*a1a3b679SAndreas Boehler if (!$this->isValid()) { 153*a1a3b679SAndreas Boehler 154*a1a3b679SAndreas Boehler $errors = libxml_get_errors(); 155*a1a3b679SAndreas Boehler 156*a1a3b679SAndreas Boehler if ($errors) { 157*a1a3b679SAndreas Boehler libxml_clear_errors(); 158*a1a3b679SAndreas Boehler throw new LibXMLException($errors); 159*a1a3b679SAndreas Boehler } 160*a1a3b679SAndreas Boehler } 161*a1a3b679SAndreas Boehler 162*a1a3b679SAndreas Boehler switch ($this->nodeType) { 163*a1a3b679SAndreas Boehler case self::ELEMENT : 164*a1a3b679SAndreas Boehler $elements[] = $this->parseCurrentElement(); 165*a1a3b679SAndreas Boehler break; 166*a1a3b679SAndreas Boehler case self::TEXT : 167*a1a3b679SAndreas Boehler case self::CDATA : 168*a1a3b679SAndreas Boehler $text .= $this->value; 169*a1a3b679SAndreas Boehler $this->read(); 170*a1a3b679SAndreas Boehler break; 171*a1a3b679SAndreas Boehler case self::END_ELEMENT : 172*a1a3b679SAndreas Boehler // Ensuring we are moving the cursor after the end element. 173*a1a3b679SAndreas Boehler $this->read(); 174*a1a3b679SAndreas Boehler break 2; 175*a1a3b679SAndreas Boehler case self::NONE : 176*a1a3b679SAndreas Boehler throw new ParseException('We hit the end of the document prematurely. This likely means that some parser "eats" too many elements. Do not attempt to continue parsing.'); 177*a1a3b679SAndreas Boehler default : 178*a1a3b679SAndreas Boehler // Advance to the next element 179*a1a3b679SAndreas Boehler $this->read(); 180*a1a3b679SAndreas Boehler break; 181*a1a3b679SAndreas Boehler } 182*a1a3b679SAndreas Boehler 183*a1a3b679SAndreas Boehler } 184*a1a3b679SAndreas Boehler 185*a1a3b679SAndreas Boehler if (!is_null($elementMap)) { 186*a1a3b679SAndreas Boehler $this->popContext(); 187*a1a3b679SAndreas Boehler } 188*a1a3b679SAndreas Boehler return ($elements ? $elements : $text); 189*a1a3b679SAndreas Boehler 190*a1a3b679SAndreas Boehler } 191*a1a3b679SAndreas Boehler 192*a1a3b679SAndreas Boehler /** 193*a1a3b679SAndreas Boehler * Reads all text below the current element, and returns this as a string. 194*a1a3b679SAndreas Boehler * 195*a1a3b679SAndreas Boehler * @return string 196*a1a3b679SAndreas Boehler */ 197*a1a3b679SAndreas Boehler function readText() { 198*a1a3b679SAndreas Boehler 199*a1a3b679SAndreas Boehler $result = ''; 200*a1a3b679SAndreas Boehler $previousDepth = $this->depth; 201*a1a3b679SAndreas Boehler 202*a1a3b679SAndreas Boehler while ($this->read() && $this->depth != $previousDepth) { 203*a1a3b679SAndreas Boehler if (in_array($this->nodeType, [XMLReader::TEXT, XMLReader::CDATA, XMLReader::WHITESPACE])) { 204*a1a3b679SAndreas Boehler $result .= $this->value; 205*a1a3b679SAndreas Boehler } 206*a1a3b679SAndreas Boehler } 207*a1a3b679SAndreas Boehler return $result; 208*a1a3b679SAndreas Boehler 209*a1a3b679SAndreas Boehler } 210*a1a3b679SAndreas Boehler 211*a1a3b679SAndreas Boehler /** 212*a1a3b679SAndreas Boehler * Parses the current XML element. 213*a1a3b679SAndreas Boehler * 214*a1a3b679SAndreas Boehler * This method returns arn array with 3 properties: 215*a1a3b679SAndreas Boehler * * name - A clark-notation XML element name. 216*a1a3b679SAndreas Boehler * * value - The parsed value. 217*a1a3b679SAndreas Boehler * * attributes - A key-value list of attributes. 218*a1a3b679SAndreas Boehler * 219*a1a3b679SAndreas Boehler * @return array 220*a1a3b679SAndreas Boehler */ 221*a1a3b679SAndreas Boehler function parseCurrentElement() { 222*a1a3b679SAndreas Boehler 223*a1a3b679SAndreas Boehler $name = $this->getClark(); 224*a1a3b679SAndreas Boehler 225*a1a3b679SAndreas Boehler $attributes = []; 226*a1a3b679SAndreas Boehler 227*a1a3b679SAndreas Boehler if ($this->hasAttributes) { 228*a1a3b679SAndreas Boehler $attributes = $this->parseAttributes(); 229*a1a3b679SAndreas Boehler } 230*a1a3b679SAndreas Boehler 231*a1a3b679SAndreas Boehler if (array_key_exists($name, $this->elementMap)) { 232*a1a3b679SAndreas Boehler $deserializer = $this->elementMap[$name]; 233*a1a3b679SAndreas Boehler if (is_subclass_of($deserializer, 'Sabre\\Xml\\XmlDeserializable')) { 234*a1a3b679SAndreas Boehler $value = call_user_func([ $deserializer, 'xmlDeserialize' ], $this); 235*a1a3b679SAndreas Boehler } elseif (is_callable($deserializer)) { 236*a1a3b679SAndreas Boehler $value = call_user_func($deserializer, $this); 237*a1a3b679SAndreas Boehler } else { 238*a1a3b679SAndreas Boehler $type = gettype($deserializer); 239*a1a3b679SAndreas Boehler if ($type === 'string') { 240*a1a3b679SAndreas Boehler $type .= ' (' . $deserializer . ')'; 241*a1a3b679SAndreas Boehler } elseif ($type === 'object') { 242*a1a3b679SAndreas Boehler $type .= ' (' . get_class($deserializer) . ')'; 243*a1a3b679SAndreas Boehler } 244*a1a3b679SAndreas Boehler throw new \LogicException('Could not use this type as a deserializer: ' . $type); 245*a1a3b679SAndreas Boehler } 246*a1a3b679SAndreas Boehler } else { 247*a1a3b679SAndreas Boehler $value = Element\Base::xmlDeserialize($this); 248*a1a3b679SAndreas Boehler } 249*a1a3b679SAndreas Boehler 250*a1a3b679SAndreas Boehler return [ 251*a1a3b679SAndreas Boehler 'name' => $name, 252*a1a3b679SAndreas Boehler 'value' => $value, 253*a1a3b679SAndreas Boehler 'attributes' => $attributes, 254*a1a3b679SAndreas Boehler ]; 255*a1a3b679SAndreas Boehler } 256*a1a3b679SAndreas Boehler 257*a1a3b679SAndreas Boehler /** 258*a1a3b679SAndreas Boehler * Grabs all the attributes from the current element, and returns them as a 259*a1a3b679SAndreas Boehler * key-value array. 260*a1a3b679SAndreas Boehler * 261*a1a3b679SAndreas Boehler * If the attributes are part of the same namespace, they will simply be 262*a1a3b679SAndreas Boehler * short keys. If they are defined on a different namespace, the attribute 263*a1a3b679SAndreas Boehler * name will be retured in clark-notation. 264*a1a3b679SAndreas Boehler * 265*a1a3b679SAndreas Boehler * @return void 266*a1a3b679SAndreas Boehler */ 267*a1a3b679SAndreas Boehler function parseAttributes() { 268*a1a3b679SAndreas Boehler 269*a1a3b679SAndreas Boehler $attributes = []; 270*a1a3b679SAndreas Boehler 271*a1a3b679SAndreas Boehler while ($this->moveToNextAttribute()) { 272*a1a3b679SAndreas Boehler if ($this->namespaceURI) { 273*a1a3b679SAndreas Boehler 274*a1a3b679SAndreas Boehler // Ignoring 'xmlns', it doesn't make any sense. 275*a1a3b679SAndreas Boehler if ($this->namespaceURI === 'http://www.w3.org/2000/xmlns/') { 276*a1a3b679SAndreas Boehler continue; 277*a1a3b679SAndreas Boehler } 278*a1a3b679SAndreas Boehler 279*a1a3b679SAndreas Boehler $name = $this->getClark(); 280*a1a3b679SAndreas Boehler $attributes[$name] = $this->value; 281*a1a3b679SAndreas Boehler 282*a1a3b679SAndreas Boehler } else { 283*a1a3b679SAndreas Boehler $attributes[$this->localName] = $this->value; 284*a1a3b679SAndreas Boehler } 285*a1a3b679SAndreas Boehler } 286*a1a3b679SAndreas Boehler $this->moveToElement(); 287*a1a3b679SAndreas Boehler 288*a1a3b679SAndreas Boehler return $attributes; 289*a1a3b679SAndreas Boehler 290*a1a3b679SAndreas Boehler } 291*a1a3b679SAndreas Boehler 292*a1a3b679SAndreas Boehler} 293