1<?php
2
3namespace Sabre\Xml;
4
5use XMLReader;
6
7/**
8 * The Reader class expands upon PHP's built-in XMLReader.
9 *
10 * The intended usage, is to assign certain XML elements to PHP classes. These
11 * need to be registered using the $elementMap public property.
12 *
13 * After this is done, a single call to parse() will parse the entire document,
14 * and delegate sub-sections of the document to element classes.
15 *
16 * @copyright Copyright (C) 2009-2015 fruux GmbH (https://fruux.com/).
17 * @author Evert Pot (http://evertpot.com/)
18 * @license http://sabre.io/license/ Modified BSD License
19 */
20class Reader extends XMLReader {
21
22    use ContextStackTrait;
23
24    /**
25     * Returns the current nodename in clark-notation.
26     *
27     * For example: "{http://www.w3.org/2005/Atom}feed".
28     * Or if no namespace is defined: "{}feed".
29     *
30     * This method returns null if we're not currently on an element.
31     *
32     * @return string|null
33     */
34    function getClark() {
35
36        if (! $this->localName) {
37            return null;
38        }
39
40        return '{' . $this->namespaceURI . '}' . $this->localName;
41
42    }
43
44    /**
45     * Reads the entire document.
46     *
47     * This function returns an array with the following three elements:
48     *    * name - The root element name.
49     *    * value - The value for the root element.
50     *    * attributes - An array of attributes.
51     *
52     * This function will also disable the standard libxml error handler (which
53     * usually just results in PHP errors), and throw exceptions instead.
54     *
55     * @return array
56     */
57    function parse() {
58
59        $previousEntityState = libxml_disable_entity_loader(true);
60        $previousSetting = libxml_use_internal_errors(true);
61
62        try {
63
64            // Really sorry about the silence operator, seems like I have no
65            // choice. See:
66            //
67            // https://bugs.php.net/bug.php?id=64230
68            while ($this->nodeType !== self::ELEMENT && @$this->read()) {
69                // noop
70            }
71            $result = $this->parseCurrentElement();
72
73            $errors = libxml_get_errors();
74            libxml_clear_errors();
75            if ($errors) {
76                throw new LibXMLException($errors);
77            }
78
79        } finally {
80            libxml_use_internal_errors($previousSetting);
81            libxml_disable_entity_loader($previousEntityState);
82        }
83
84        return $result;
85    }
86
87
88
89    /**
90     * parseGetElements parses everything in the current sub-tree,
91     * and returns a an array of elements.
92     *
93     * Each element has a 'name', 'value' and 'attributes' key.
94     *
95     * If the the element didn't contain sub-elements, an empty array is always
96     * returned. If there was any text inside the element, it will be
97     * discarded.
98     *
99     * If the $elementMap argument is specified, the existing elementMap will
100     * be overridden while parsing the tree, and restored after this process.
101     *
102     * @param array $elementMap
103     * @return array
104     */
105    function parseGetElements(array $elementMap = null) {
106
107        $result = $this->parseInnerTree($elementMap);
108        if (!is_array($result)) {
109            return [];
110        }
111        return $result;
112
113    }
114
115    /**
116     * Parses all elements below the current element.
117     *
118     * This method will return a string if this was a text-node, or an array if
119     * there were sub-elements.
120     *
121     * If there's both text and sub-elements, the text will be discarded.
122     *
123     * If the $elementMap argument is specified, the existing elementMap will
124     * be overridden while parsing the tree, and restored after this process.
125     *
126     * @param array $elementMap
127     * @return array|string
128     */
129    function parseInnerTree(array $elementMap = null) {
130
131        $text = null;
132        $elements = [];
133
134        if ($this->nodeType === self::ELEMENT && $this->isEmptyElement) {
135            // Easy!
136            $this->next();
137            return null;
138        }
139
140        if (!is_null($elementMap)) {
141            $this->pushContext();
142            $this->elementMap = $elementMap;
143        }
144
145        try {
146
147            // Really sorry about the silence operator, seems like I have no
148            // choice. See:
149            //
150            // https://bugs.php.net/bug.php?id=64230
151            if (!@$this->read()) {
152                $errors = libxml_get_errors();
153                libxml_clear_errors();
154                if ($errors) {
155                    throw new LibXMLException($errors);
156                }
157                throw new ParseException('This should never happen (famous last words)');
158            }
159
160            while (true) {
161
162                if (!$this->isValid()) {
163
164                    $errors = libxml_get_errors();
165
166                    if ($errors) {
167                        libxml_clear_errors();
168                        throw new LibXMLException($errors);
169                    }
170                }
171
172                switch ($this->nodeType) {
173                    case self::ELEMENT :
174                        $elements[] = $this->parseCurrentElement();
175                        break;
176                    case self::TEXT :
177                    case self::CDATA :
178                        $text .= $this->value;
179                        $this->read();
180                        break;
181                    case self::END_ELEMENT :
182                        // Ensuring we are moving the cursor after the end element.
183                        $this->read();
184                        break 2;
185                    case self::NONE :
186                        throw new ParseException('We hit the end of the document prematurely. This likely means that some parser "eats" too many elements. Do not attempt to continue parsing.');
187                    default :
188                        // Advance to the next element
189                        $this->read();
190                        break;
191                }
192
193            }
194
195        } finally {
196
197            if (!is_null($elementMap)) {
198                $this->popContext();
199            }
200
201        }
202        return ($elements ? $elements : $text);
203
204    }
205
206    /**
207     * Reads all text below the current element, and returns this as a string.
208     *
209     * @return string
210     */
211    function readText() {
212
213        $result = '';
214        $previousDepth = $this->depth;
215
216        while ($this->read() && $this->depth != $previousDepth) {
217            if (in_array($this->nodeType, [XMLReader::TEXT, XMLReader::CDATA, XMLReader::WHITESPACE])) {
218                $result .= $this->value;
219            }
220        }
221        return $result;
222
223    }
224
225    /**
226     * Parses the current XML element.
227     *
228     * This method returns arn array with 3 properties:
229     *   * name - A clark-notation XML element name.
230     *   * value - The parsed value.
231     *   * attributes - A key-value list of attributes.
232     *
233     * @return array
234     */
235    function parseCurrentElement() {
236
237        $name = $this->getClark();
238
239        $attributes = [];
240
241        if ($this->hasAttributes) {
242            $attributes = $this->parseAttributes();
243        }
244
245        $value = call_user_func(
246            $this->getDeserializerForElementName($name),
247            $this
248        );
249
250        return [
251            'name'       => $name,
252            'value'      => $value,
253            'attributes' => $attributes,
254        ];
255    }
256
257
258    /**
259     * Grabs all the attributes from the current element, and returns them as a
260     * key-value array.
261     *
262     * If the attributes are part of the same namespace, they will simply be
263     * short keys. If they are defined on a different namespace, the attribute
264     * name will be retured in clark-notation.
265     *
266     * @return array
267     */
268    function parseAttributes() {
269
270        $attributes = [];
271
272        while ($this->moveToNextAttribute()) {
273            if ($this->namespaceURI) {
274
275                // Ignoring 'xmlns', it doesn't make any sense.
276                if ($this->namespaceURI === 'http://www.w3.org/2000/xmlns/') {
277                    continue;
278                }
279
280                $name = $this->getClark();
281                $attributes[$name] = $this->value;
282
283            } else {
284                $attributes[$this->localName] = $this->value;
285            }
286        }
287        $this->moveToElement();
288
289        return $attributes;
290
291    }
292
293    /**
294     * Returns the function that should be used to parse the element identified
295     * by it's clark-notation name.
296     *
297     * @param string $name
298     * @return callable
299     */
300    function getDeserializerForElementName($name) {
301
302
303        if (!array_key_exists($name, $this->elementMap)) {
304            if (substr($name, 0, 2) == '{}' && array_key_exists(substr($name, 2), $this->elementMap)) {
305                $name = substr($name, 2);
306            } else {
307                return ['Sabre\\Xml\\Element\\Base', 'xmlDeserialize'];
308            }
309        }
310
311        $deserializer = $this->elementMap[$name];
312        if (is_subclass_of($deserializer, 'Sabre\\Xml\\XmlDeserializable')) {
313            return [$deserializer, 'xmlDeserialize'];
314        }
315
316        if (is_callable($deserializer)) {
317            return $deserializer;
318        }
319
320        $type = gettype($deserializer);
321        if ($type === 'string') {
322            $type .= ' (' . $deserializer . ')';
323        } elseif ($type === 'object') {
324            $type .= ' (' . get_class($deserializer) . ')';
325        }
326        throw new \LogicException('Could not use this type as a deserializer: ' . $type . ' for element: ' . $name);
327
328    }
329
330}
331