1<?php
2/**
3 * XMLUtil: class with helper functions for simple XML handling
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author LarsDW223
7 */
8
9/**
10 * The XMLUtil class
11 */
12class XMLUtil
13{
14    public static function isValidXMLName ($sign) {
15        if (ctype_alnum($sign) || $sign == ':' || $sign == '-' || $sign == '_') {
16            return true;
17        }
18        return false;
19    }
20
21    /**
22     * Helper function which returns the opening $element tag
23     * if found in $xml_code. Otherwise it returns NULL.
24     *
25     * @param  $element    The name of the element
26     * @param  $xmlCode    The XML code to search through
27     * @return string      Found opening tag or NULL
28     */
29    public static function getElementOpenTag ($element, $xmlCode) {
30        $pattern = '/'.$element.'\s[^>]*>/';
31        if (preg_match ($pattern, $xmlCode, $matches) === 1) {
32            return $matches [0];
33        }
34        return NULL;
35    }
36
37    /**
38     * Helper function to find the next element $element and return its
39     * complete definition including opening and closing tag.
40     *
41     * THIS FUNCTION DOES NOT HANDLE ELEMENTS WHICH CAN BE NESTED IN THEMSELVES!!!
42     *
43     * @param  $element    The name of the element
44     * @param  $xmlCode    The XML code to search through
45     * @return string      Found element or NULL
46     */
47    public static function getElement ($element, $xmlCode, &$endPos=NULL) {
48        if(empty($element) || empty($xmlCode)) {
49            return NULL;
50        }
51        $pos = 0;
52        $max = strlen ($xmlCode);
53        $elementLength = strlen ($element);
54
55        // Search the opening tag first.
56        while ($pos < $max) {
57            $start = strpos ($xmlCode, '<'.$element, $pos);
58            if ($start === false) {
59                // Nothing found.
60                return NULL;
61            }
62
63            $next = $xmlCode [$start+$elementLength+1];
64            if ($next == '/' || $next == '>' || ctype_space($next)) {
65                // Found it.
66                break;
67            }
68
69            $pos = $start+$elementLength;
70        }
71        $pos = $start+$elementLength;
72
73        // Search next '>'.
74        $angle = strpos ($xmlCode, '>', $pos);
75        if ($angle === false) {
76            // Opening tag is not terminated.
77            return NULL;
78        }
79        $pos = $angle + 1;
80
81        // Is this already the end?
82        if ($xmlCode [$angle-1] == '/') {
83            // Yes.
84            $endPos = $angle+1;
85            return substr ($xmlCode, $start, $angle-$start+1);
86        }
87
88        // Now, search closing tag.
89        // (Simple solution which expects there are no child elements
90        //  with the same name. This means we assume the element can not
91        //  be nested in itself!)
92        $end = strpos ($xmlCode, '</'.$element.'>', $pos);
93        if ($end === false) {
94            return NULL;
95        }
96        $end += 3 + $elementLength;
97
98        // Found closing tag.
99        $endPos = $end;
100        return substr ($xmlCode, $start, $end-$start);
101    }
102
103    /**
104     * Helper function to find the next element $element and return its
105     * content only without the opening and closing tag of $element itself.
106     *
107     * THIS FUNCTION DOES NOT HANDLE ELEMENTS WHICH CAN BE NESTED IN THEMSELVES!!!
108     *
109     * @param  $element    The name of the element
110     * @param  $xmlCode    The XML code to search through
111     * @return string      Found element or NULL
112     */
113    public static function getElementContent ($element, $xmlCode, &$endPos=NULL) {
114        if(empty($element) || empty($xmlCode)) {
115            return NULL;
116        }
117        $pos = 0;
118        $max = strlen ($xmlCode);
119        $elementLength = strlen ($element);
120        $contentStart = 0;
121        $contentEnd = 0;
122
123        // Search the opening tag first.
124        while ($pos < $max) {
125            $start = strpos ($xmlCode, '<'.$element, $pos);
126            if ($start === false) {
127                // Nothing found.
128                return NULL;
129            }
130
131            $next = $xmlCode [$start+$elementLength+1];
132            if ($next == '/' || $next == '>' || ctype_space($next)) {
133                // Found it.
134                break;
135            }
136
137            $pos = $start+$elementLength;
138        }
139        $pos = $start+$elementLength;
140
141        // Search next '>'.
142        $angle = strpos ($xmlCode, '>', $pos);
143        if ($angle === false) {
144            // Opening tag is not terminated.
145            return NULL;
146        }
147        $pos = $angle + 1;
148
149        // Is this already the end?
150        if ($xmlCode [$angle-1] == '/') {
151            // Yes. No content in this case!
152            $endPos = $angle+1;
153            return NULL;
154        }
155        $contentStart = $angle+1;
156
157        // Now, search closing tag.
158        // (Simple solution which expects there are no child elements
159        //  with the same name. This means we assume the element can not
160        //  be nested in itself!)
161        $end = strpos ($xmlCode, '</'.$element.'>', $pos);
162        if ($end === false) {
163            return NULL;
164        }
165        $contentEnd = $end - 1;
166        $end += 3 + $elementLength;
167
168        // Found closing tag.
169        $endPos = $end;
170        if ($contentEnd <= $contentStart) {
171            return NULL;
172        }
173        return substr ($xmlCode, $contentStart, $contentEnd-$contentStart+1);
174    }
175
176    /**
177     * Helper function to find the next element and return its
178     * content only without the opening and closing tag of $element itself.
179     *
180     * THIS FUNCTION DOES NOT HANDLE ELEMENTS WHICH CAN BE NESTED IN THEMSELVES!!!
181     *
182     * @param  $element    On success $element carries the name of the found element
183     * @param  $xmlCode    The XML code to search through
184     * @return string      Found element or NULL
185     */
186    public static function getNextElementContent (&$element, $xmlCode, &$endPos=NULL) {
187        if(empty($xmlCode)) {
188            return NULL;
189        }
190        $pos = 0;
191        $max = strlen ($xmlCode);
192        $contentStart = 0;
193        $contentEnd = 0;
194
195        // Search the opening tag first.
196        while ($pos < $max) {
197            $start = strpos ($xmlCode, '<', $pos);
198            if ($start === false) {
199                // Nothing found.
200                return NULL;
201            }
202
203            if (XMLUtil::isValidXMLName ($xmlCode [$start+1])) {
204                // Extract element name.
205                $read = $start+1;
206                $found_element = '';
207                while (XMLUtil::isValidXMLName ($xmlCode [$read])) {
208                    $found_element .= $xmlCode [$read];
209                    $read++;
210                    if ($read >= $max) {
211                        return NULL;
212                    }
213                }
214                $elementLength = strlen ($found_element);
215
216                $next = $xmlCode [$start+$elementLength+1];
217                if ($next == '/' || $next == '>' || ctype_space($next)) {
218                    // Found it.
219                    break;
220                }
221
222                $pos = $start+$elementLength;
223            } else {
224                // Skip this one.
225                $pos = $start+2;
226            }
227        }
228        $pos = $start+$elementLength;
229
230        // Search next '>'.
231        $angle = strpos ($xmlCode, '>', $pos);
232        if ($angle === false) {
233            // Opening tag is not terminated.
234            return NULL;
235        }
236        $pos = $angle + 1;
237
238        // Is this already the end?
239        if ($xmlCode [$angle-1] == '/') {
240            // Yes. No content in this case!
241            $endPos = $angle+1;
242            $element = $found_element;
243            return NULL;
244        }
245        $contentStart = $angle+1;
246
247        // Now, search closing tag.
248        // (Simple solution which expects there are no child elements
249        //  with the same name. This means we assume the element can not
250        //  be nested in itself!)
251        $end = strpos ($xmlCode, '</'.$found_element.'>', $pos);
252        if ($end === false) {
253            return NULL;
254        }
255        $contentEnd = $end - 1;
256        $end += 3 + $elementLength;
257
258        // Found closing tag.
259        $endPos = $end;
260        if ($contentEnd <= $contentStart) {
261            return NULL;
262        }
263        $element = $found_element;
264        return substr ($xmlCode, $contentStart, $contentEnd-$contentStart+1);
265    }
266
267    /**
268     * Helper function to find the next element and return its
269     * complete definition including opening and closing tag.
270     *
271     * THIS FUNCTION DOES NOT HANDLE ELEMENTS WHICH CAN BE NESTED IN THEMSELVES!!!
272     *
273     * @param  $element    On success $element carries the name of the found element
274     * @param  $xmlCode    The XML code to search through
275     * @return string      Found element or NULL
276     */
277    public static function getNextElement (&$element, $xmlCode, &$endPos=NULL) {
278        if(empty($xmlCode)) {
279            return NULL;
280        }
281        $pos = 0;
282        $max = strlen ($xmlCode);
283
284        // Search the opening tag first.
285        while ($pos < $max) {
286            $start = strpos ($xmlCode, '<', $pos);
287            if ($start === false) {
288                // Nothing found.
289                return NULL;
290            }
291
292            if (XMLUtil::isValidXMLName ($xmlCode [$start+1])) {
293                // Extract element name.
294                $read = $start+1;
295                $found_element = '';
296                while (XMLUtil::isValidXMLName ($xmlCode [$read])) {
297                    $found_element .= $xmlCode [$read];
298                    $read++;
299                    if ($read >= $max) {
300                        return NULL;
301                    }
302                }
303                $elementLength = strlen ($found_element);
304
305                $next = $xmlCode [$start+$elementLength+1];
306                if ($next == '/' || $next == '>' || ctype_space($next)) {
307                    // Found it.
308                    break;
309                }
310
311                $pos = $start+$elementLength;
312            } else {
313                // Skip this one.
314                $pos = $start+2;
315            }
316        }
317        $pos = $start+$elementLength;
318
319        // Search next '>'.
320        $angle = strpos ($xmlCode, '>', $pos);
321        if ($angle === false) {
322            // Opening tag is not terminated.
323            return NULL;
324        }
325        $pos = $angle + 1;
326
327        // Is this already the end?
328        if ($xmlCode [$angle-1] == '/') {
329            // Yes.
330            $endPos = $angle+1;
331            $element = $found_element;
332            return substr ($xmlCode, $start, $angle-$start+1);
333        }
334
335        // Now, search closing tag.
336        // (Simple solution which expects there are no child elements
337        //  with the same name. This means we assume the element can not
338        //  be nested in itself!)
339        $end = strpos ($xmlCode, '</'.$found_element.'>', $pos);
340        if ($end === false) {
341            return NULL;
342        }
343        $end += 3 + $elementLength;
344
345        // Found closing tag.
346        $endPos = $end;
347        $element = $found_element;
348        return substr ($xmlCode, $start, $end-$start);
349    }
350
351    /**
352     * Helper function to replace an XML element with a string.
353     *
354     * @param  $element     Name of the element ot be replaced.
355     * @param  $xmlCode     The XML code to search through
356     * @param  $replacement The string which shall be inserted
357     * @return string       $xmlCode with replaced element
358     */
359    public static function elementReplace ($element, $xmlCode, $replacement) {
360        $start = strpos ($xmlCode, '<'.$element);
361        $empty = false;
362        if ($start === false) {
363            $empty = strpos ($xmlCode, '<'.$element.'/>');
364            if ($empty === false) {
365                return $xmlCode;
366            }
367        }
368        if ($empty !== false) {
369            // Element has the form '<element/>'. Do a simple string replace.
370            return str_replace('<'.$element.'/>', $replacement, $xmlCode);
371        }
372        $end = strpos ($xmlCode, '</'.$element.'>');
373        if ($end === false) {
374            // $xmlCode not well formed???
375            return $xmlCode;
376        }
377        $end_length = strlen ('</'.$element.'>');
378        return substr_replace ($xmlCode, $replacement, $start, $end-$start+$end_length);
379    }
380
381    /**
382     * Helper function which returns the value of $attribute
383     * if found in $xml_code. Otherwise it returns NULL.
384     *
385     * @param  $attribute    The name of the attribute
386     * @param  $xmlCode      The XML code to search through
387     * @return string        Found value or NULL
388     */
389    public static function getAttributeValue ($attribute, $xmlCode) {
390        $pattern = '/\s'.$attribute.'="[^"]*"/';
391        if (preg_match ($pattern, $xmlCode, $matches) === 1) {
392            $value = substr($matches [0], strlen($attribute)+2);
393            $value = trim($value, '"');
394            return $value;
395        }
396        return NULL;
397    }
398
399    /**
400     * Helper function which stores all attributes
401     * in the array $attributes as name => value pairs.
402     *
403     * @param  $attributes    Array to store the attributes in
404     * @param  $xmlCode       The XML code to search through
405     * @return integer        Number of found attributes or 0
406     */
407    public static function getAttributes (&$attributes, $xmlCode) {
408        $pattern = '/\s[-:_.a-zA-Z0-9]+="[^"]*"/';
409        if (preg_match_all ($pattern, $xmlCode, $matches, PREG_SET_ORDER) > 0) {
410            foreach ($matches as $match) {
411                $equal_pos = strpos($match [0], '=');
412                $name = substr($match [0], 0, $equal_pos);
413                $name = trim($name);
414                $value = substr($match [0], $equal_pos+1);
415                $value = trim($value, '"');
416                $attributes [$name] = $value;
417            }
418            return count($attributes);
419        }
420        return 0;
421    }
422}
423