1<?php
2
3namespace Sabre\Xml;
4
5use XMLReader;
6
7/**
8 * The Reader class expands upon PHP's built-in XMLReader.
9 *
10 * The intended usage, is to assign certain XML elements to PHP classes. These
11 * need to be registered using the $elementMap public property.
12 *
13 * After this is done, a single call to parse() will parse the entire document,
14 * and delegate sub-sections of the document to element classes.
15 *
16 * @copyright Copyright (C) 2009-2015 fruux GmbH (https://fruux.com/).
17 * @author Evert Pot (http://evertpot.com/)
18 * @license http://sabre.io/license/ Modified BSD License
19 */
20class Reader extends XMLReader {
21
22    use ContextStackTrait;
23
24    /**
25     * Returns the current nodename in clark-notation.
26     *
27     * For example: "{http://www.w3.org/2005/Atom}feed".
28     * Or if no namespace is defined: "{}feed".
29     *
30     * This method returns null if we're not currently on an element.
31     *
32     * @return string|null
33     */
34    function getClark() {
35
36        if (! $this->localName) {
37            return null;
38        }
39
40        return '{' . $this->namespaceURI . '}' . $this->localName;
41
42    }
43
44    /**
45     * Reads the entire document.
46     *
47     * This function returns an array with the following three elements:
48     *    * name - The root element name.
49     *    * value - The value for the root element.
50     *    * attributes - An array of attributes.
51     *
52     * This function will also disable the standard libxml error handler (which
53     * usually just results in PHP errors), and throw exceptions instead.
54     *
55     * @return array
56     */
57    function parse() {
58
59        $previousEntityState = libxml_disable_entity_loader(true);
60        $previousSetting = libxml_use_internal_errors(true);
61
62        // Really sorry about the silence operator, seems like I have no
63        // choice. See:
64        //
65        // https://bugs.php.net/bug.php?id=64230
66        while ($this->nodeType !== self::ELEMENT && @$this->read()) {
67            // noop
68        }
69        $result = $this->parseCurrentElement();
70
71        $errors = libxml_get_errors();
72        libxml_clear_errors();
73        libxml_use_internal_errors($previousSetting);
74        libxml_disable_entity_loader($previousEntityState);
75
76        if ($errors) {
77            throw new LibXMLException($errors);
78        }
79
80        return $result;
81    }
82
83
84
85    /**
86     * parseGetElements parses everything in the current sub-tree,
87     * and returns a an array of elements.
88     *
89     * Each element has a 'name', 'value' and 'attributes' key.
90     *
91     * If the the element didn't contain sub-elements, an empty array is always
92     * returned. If there was any text inside the element, it will be
93     * discarded.
94     *
95     * If the $elementMap argument is specified, the existing elementMap will
96     * be overridden while parsing the tree, and restored after this process.
97     *
98     * @param array $elementMap
99     * @return array
100     */
101    function parseGetElements(array $elementMap = null) {
102
103        $result = $this->parseInnerTree($elementMap);
104        if (!is_array($result)) {
105            return [];
106        }
107        return $result;
108
109    }
110
111    /**
112     * Parses all elements below the current element.
113     *
114     * This method will return a string if this was a text-node, or an array if
115     * there were sub-elements.
116     *
117     * If there's both text and sub-elements, the text will be discarded.
118     *
119     * If the $elementMap argument is specified, the existing elementMap will
120     * be overridden while parsing the tree, and restored after this process.
121     *
122     * @param array $elementMap
123     * @return array|string
124     */
125    function parseInnerTree(array $elementMap = null) {
126
127        $previousDepth = $this->depth;
128
129        $text = null;
130        $elements = [];
131        $attributes = [];
132
133        if ($this->nodeType === self::ELEMENT && $this->isEmptyElement) {
134            // Easy!
135            $this->next();
136            return null;
137        }
138
139        if (!is_null($elementMap)) {
140            $this->pushContext();
141            $this->elementMap = $elementMap;
142        }
143
144        // Really sorry about the silence operator, seems like I have no
145        // choice. See:
146        //
147        // https://bugs.php.net/bug.php?id=64230
148        if (!@$this->read()) return false;
149
150        while (true) {
151
152            if (!$this->isValid()) {
153
154                $errors = libxml_get_errors();
155
156                if ($errors) {
157                    libxml_clear_errors();
158                    throw new LibXMLException($errors);
159                }
160            }
161
162            switch ($this->nodeType) {
163                case self::ELEMENT :
164                    $elements[] = $this->parseCurrentElement();
165                    break;
166                case self::TEXT :
167                case self::CDATA :
168                    $text .= $this->value;
169                    $this->read();
170                    break;
171                case self::END_ELEMENT :
172                    // Ensuring we are moving the cursor after the end element.
173                    $this->read();
174                    break 2;
175                case self::NONE :
176                    throw new ParseException('We hit the end of the document prematurely. This likely means that some parser "eats" too many elements. Do not attempt to continue parsing.');
177                default :
178                    // Advance to the next element
179                    $this->read();
180                    break;
181            }
182
183        }
184
185        if (!is_null($elementMap)) {
186            $this->popContext();
187        }
188        return ($elements ? $elements : $text);
189
190    }
191
192    /**
193     * Reads all text below the current element, and returns this as a string.
194     *
195     * @return string
196     */
197    function readText() {
198
199        $result = '';
200        $previousDepth = $this->depth;
201
202        while ($this->read() && $this->depth != $previousDepth) {
203            if (in_array($this->nodeType, [XMLReader::TEXT, XMLReader::CDATA, XMLReader::WHITESPACE])) {
204                $result .= $this->value;
205            }
206        }
207        return $result;
208
209    }
210
211    /**
212     * Parses the current XML element.
213     *
214     * This method returns arn array with 3 properties:
215     *   * name - A clark-notation XML element name.
216     *   * value - The parsed value.
217     *   * attributes - A key-value list of attributes.
218     *
219     * @return array
220     */
221    function parseCurrentElement() {
222
223        $name = $this->getClark();
224
225        $attributes = [];
226
227        if ($this->hasAttributes) {
228            $attributes = $this->parseAttributes();
229        }
230
231        if (array_key_exists($name, $this->elementMap)) {
232            $deserializer = $this->elementMap[$name];
233            if (is_subclass_of($deserializer, 'Sabre\\Xml\\XmlDeserializable')) {
234                $value = call_user_func([ $deserializer, 'xmlDeserialize' ], $this);
235            } elseif (is_callable($deserializer)) {
236                $value = call_user_func($deserializer, $this);
237            } else {
238                $type = gettype($deserializer);
239                if ($type === 'string') {
240                    $type .= ' (' . $deserializer . ')';
241                } elseif ($type === 'object') {
242                    $type .= ' (' . get_class($deserializer) . ')';
243                }
244                throw new \LogicException('Could not use this type as a deserializer: ' . $type);
245            }
246        } else {
247            $value = Element\Base::xmlDeserialize($this);
248        }
249
250        return [
251            'name'       => $name,
252            'value'      => $value,
253            'attributes' => $attributes,
254        ];
255    }
256
257    /**
258     * Grabs all the attributes from the current element, and returns them as a
259     * key-value array.
260     *
261     * If the attributes are part of the same namespace, they will simply be
262     * short keys. If they are defined on a different namespace, the attribute
263     * name will be retured in clark-notation.
264     *
265     * @return void
266     */
267    function parseAttributes() {
268
269        $attributes = [];
270
271        while ($this->moveToNextAttribute()) {
272            if ($this->namespaceURI) {
273
274                // Ignoring 'xmlns', it doesn't make any sense.
275                if ($this->namespaceURI === 'http://www.w3.org/2000/xmlns/') {
276                    continue;
277                }
278
279                $name = $this->getClark();
280                $attributes[$name] = $this->value;
281
282            } else {
283                $attributes[$this->localName] = $this->value;
284            }
285        }
286        $this->moveToElement();
287
288        return $attributes;
289
290    }
291
292}
293