1 <?php
2 /**
3  * XMLUtil: class with helper functions for simple XML handling
4  *
5  * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6  * @author LarsDW223
7  */
8 
9 /**
10  * The XMLUtil class
11  */
12 class XMLUtil
13 {
14     public static function isValidXMLName ($sign) {
15         if (ctype_alnum($sign) || $sign == ':' || $sign == '-' || $sign == '_') {
16             return true;
17         }
18         return false;
19     }
20 
21     /**
22      * Helper function which returns the opening $element tag
23      * if found in $xml_code. Otherwise it returns NULL.
24      *
25      * @param  $element    The name of the element
26      * @param  $xmlCode    The XML code to search through
27      * @return string      Found opening tag or NULL
28      */
29     public static function getElementOpenTag ($element, $xmlCode) {
30         $pattern = '/'.$element.'\s[^>]*>/';
31         if (preg_match ($pattern, $xmlCode, $matches) === 1) {
32             return $matches [0];
33         }
34         return NULL;
35     }
36 
37     /**
38      * Helper function to find the next element $element and return its
39      * complete definition including opening and closing tag.
40      *
41      * THIS FUNCTION DOES NOT HANDLE ELEMENTS WHICH CAN BE NESTED IN THEMSELVES!!!
42      *
43      * @param  $element    The name of the element
44      * @param  $xmlCode    The XML code to search through
45      * @return string      Found element or NULL
46      */
47     public static function getElement ($element, $xmlCode, &$endPos=NULL) {
48         if(empty($element) || empty($xmlCode)) {
49             return NULL;
50         }
51         $pos = 0;
52         $max = strlen ($xmlCode);
53         $elementLength = strlen ($element);
54 
55         // Search the opening tag first.
56         while ($pos < $max) {
57             $start = strpos ($xmlCode, '<'.$element, $pos);
58             if ($start === false) {
59                 // Nothing found.
60                 return NULL;
61             }
62 
63             $next = $xmlCode [$start+$elementLength+1];
64             if ($next == '/' || $next == '>' || ctype_space($next)) {
65                 // Found it.
66                 break;
67             }
68 
69             $pos = $start+$elementLength;
70         }
71         $pos = $start+$elementLength;
72 
73         // Search next '>'.
74         $angle = strpos ($xmlCode, '>', $pos);
75         if ($angle === false) {
76             // Opening tag is not terminated.
77             return NULL;
78         }
79         $pos = $angle + 1;
80 
81         // Is this already the end?
82         if ($xmlCode [$angle-1] == '/') {
83             // Yes.
84             $endPos = $angle+1;
85             return substr ($xmlCode, $start, $angle-$start+1);
86         }
87 
88         // Now, search closing tag.
89         // (Simple solution which expects there are no child elements
90         //  with the same name. This means we assume the element can not
91         //  be nested in itself!)
92         $end = strpos ($xmlCode, '</'.$element.'>', $pos);
93         if ($end === false) {
94             return NULL;
95         }
96         $end += 3 + $elementLength;
97 
98         // Found closing tag.
99         $endPos = $end;
100         return substr ($xmlCode, $start, $end-$start);
101     }
102 
103     /**
104      * Helper function to find the next element $element and return its
105      * content only without the opening and closing tag of $element itself.
106      *
107      * THIS FUNCTION DOES NOT HANDLE ELEMENTS WHICH CAN BE NESTED IN THEMSELVES!!!
108      *
109      * @param  $element    The name of the element
110      * @param  $xmlCode    The XML code to search through
111      * @return string      Found element or NULL
112      */
113     public static function getElementContent ($element, $xmlCode, &$endPos=NULL) {
114         if(empty($element) || empty($xmlCode)) {
115             return NULL;
116         }
117         $pos = 0;
118         $max = strlen ($xmlCode);
119         $elementLength = strlen ($element);
120         $contentStart = 0;
121         $contentEnd = 0;
122 
123         // Search the opening tag first.
124         while ($pos < $max) {
125             $start = strpos ($xmlCode, '<'.$element, $pos);
126             if ($start === false) {
127                 // Nothing found.
128                 return NULL;
129             }
130 
131             $next = $xmlCode [$start+$elementLength+1];
132             if ($next == '/' || $next == '>' || ctype_space($next)) {
133                 // Found it.
134                 break;
135             }
136 
137             $pos = $start+$elementLength;
138         }
139         $pos = $start+$elementLength;
140 
141         // Search next '>'.
142         $angle = strpos ($xmlCode, '>', $pos);
143         if ($angle === false) {
144             // Opening tag is not terminated.
145             return NULL;
146         }
147         $pos = $angle + 1;
148 
149         // Is this already the end?
150         if ($xmlCode [$angle-1] == '/') {
151             // Yes. No content in this case!
152             $endPos = $angle+1;
153             return NULL;
154         }
155         $contentStart = $angle+1;
156 
157         // Now, search closing tag.
158         // (Simple solution which expects there are no child elements
159         //  with the same name. This means we assume the element can not
160         //  be nested in itself!)
161         $end = strpos ($xmlCode, '</'.$element.'>', $pos);
162         if ($end === false) {
163             return NULL;
164         }
165         $contentEnd = $end - 1;
166         $end += 3 + $elementLength;
167 
168         // Found closing tag.
169         $endPos = $end;
170         if ($contentEnd <= $contentStart) {
171             return NULL;
172         }
173         return substr ($xmlCode, $contentStart, $contentEnd-$contentStart+1);
174     }
175 
176     /**
177      * Helper function to find the next element and return its
178      * content only without the opening and closing tag of $element itself.
179      *
180      * THIS FUNCTION DOES NOT HANDLE ELEMENTS WHICH CAN BE NESTED IN THEMSELVES!!!
181      *
182      * @param  $element    On success $element carries the name of the found element
183      * @param  $xmlCode    The XML code to search through
184      * @return string      Found element or NULL
185      */
186     public static function getNextElementContent (&$element, $xmlCode, &$endPos=NULL) {
187         if(empty($xmlCode)) {
188             return NULL;
189         }
190         $pos = 0;
191         $max = strlen ($xmlCode);
192         $contentStart = 0;
193         $contentEnd = 0;
194 
195         // Search the opening tag first.
196         while ($pos < $max) {
197             $start = strpos ($xmlCode, '<', $pos);
198             if ($start === false) {
199                 // Nothing found.
200                 return NULL;
201             }
202 
203             if (XMLUtil::isValidXMLName ($xmlCode [$start+1])) {
204                 // Extract element name.
205                 $read = $start+1;
206                 $found_element = '';
207                 while (XMLUtil::isValidXMLName ($xmlCode [$read])) {
208                     $found_element .= $xmlCode [$read];
209                     $read++;
210                     if ($read >= $max) {
211                         return NULL;
212                     }
213                 }
214                 $elementLength = strlen ($found_element);
215 
216                 $next = $xmlCode [$start+$elementLength+1];
217                 if ($next == '/' || $next == '>' || ctype_space($next)) {
218                     // Found it.
219                     break;
220                 }
221 
222                 $pos = $start+$elementLength;
223             } else {
224                 // Skip this one.
225                 $pos = $start+2;
226             }
227         }
228         $pos = $start+$elementLength;
229 
230         // Search next '>'.
231         $angle = strpos ($xmlCode, '>', $pos);
232         if ($angle === false) {
233             // Opening tag is not terminated.
234             return NULL;
235         }
236         $pos = $angle + 1;
237 
238         // Is this already the end?
239         if ($xmlCode [$angle-1] == '/') {
240             // Yes. No content in this case!
241             $endPos = $angle+1;
242             $element = $found_element;
243             return NULL;
244         }
245         $contentStart = $angle+1;
246 
247         // Now, search closing tag.
248         // (Simple solution which expects there are no child elements
249         //  with the same name. This means we assume the element can not
250         //  be nested in itself!)
251         $end = strpos ($xmlCode, '</'.$found_element.'>', $pos);
252         if ($end === false) {
253             return NULL;
254         }
255         $contentEnd = $end - 1;
256         $end += 3 + $elementLength;
257 
258         // Found closing tag.
259         $endPos = $end;
260         if ($contentEnd <= $contentStart) {
261             return NULL;
262         }
263         $element = $found_element;
264         return substr ($xmlCode, $contentStart, $contentEnd-$contentStart+1);
265     }
266 
267     /**
268      * Helper function to find the next element and return its
269      * complete definition including opening and closing tag.
270      *
271      * THIS FUNCTION DOES NOT HANDLE ELEMENTS WHICH CAN BE NESTED IN THEMSELVES!!!
272      *
273      * @param  $element    On success $element carries the name of the found element
274      * @param  $xmlCode    The XML code to search through
275      * @return string      Found element or NULL
276      */
277     public static function getNextElement (&$element, $xmlCode, &$endPos=NULL) {
278         if(empty($xmlCode)) {
279             return NULL;
280         }
281         $pos = 0;
282         $max = strlen ($xmlCode);
283 
284         // Search the opening tag first.
285         while ($pos < $max) {
286             $start = strpos ($xmlCode, '<', $pos);
287             if ($start === false) {
288                 // Nothing found.
289                 return NULL;
290             }
291 
292             if (XMLUtil::isValidXMLName ($xmlCode [$start+1])) {
293                 // Extract element name.
294                 $read = $start+1;
295                 $found_element = '';
296                 while (XMLUtil::isValidXMLName ($xmlCode [$read])) {
297                     $found_element .= $xmlCode [$read];
298                     $read++;
299                     if ($read >= $max) {
300                         return NULL;
301                     }
302                 }
303                 $elementLength = strlen ($found_element);
304 
305                 $next = $xmlCode [$start+$elementLength+1];
306                 if ($next == '/' || $next == '>' || ctype_space($next)) {
307                     // Found it.
308                     break;
309                 }
310 
311                 $pos = $start+$elementLength;
312             } else {
313                 // Skip this one.
314                 $pos = $start+2;
315             }
316         }
317         $pos = $start+$elementLength;
318 
319         // Search next '>'.
320         $angle = strpos ($xmlCode, '>', $pos);
321         if ($angle === false) {
322             // Opening tag is not terminated.
323             return NULL;
324         }
325         $pos = $angle + 1;
326 
327         // Is this already the end?
328         if ($xmlCode [$angle-1] == '/') {
329             // Yes.
330             $endPos = $angle+1;
331             $element = $found_element;
332             return substr ($xmlCode, $start, $angle-$start+1);
333         }
334 
335         // Now, search closing tag.
336         // (Simple solution which expects there are no child elements
337         //  with the same name. This means we assume the element can not
338         //  be nested in itself!)
339         $end = strpos ($xmlCode, '</'.$found_element.'>', $pos);
340         if ($end === false) {
341             return NULL;
342         }
343         $end += 3 + $elementLength;
344 
345         // Found closing tag.
346         $endPos = $end;
347         $element = $found_element;
348         return substr ($xmlCode, $start, $end-$start);
349     }
350 
351     /**
352      * Helper function to replace an XML element with a string.
353      *
354      * @param  $element     Name of the element ot be replaced.
355      * @param  $xmlCode     The XML code to search through
356      * @param  $replacement The string which shall be inserted
357      * @return string       $xmlCode with replaced element
358      */
359     public static function elementReplace ($element, $xmlCode, $replacement) {
360         $start = strpos ($xmlCode, '<'.$element);
361         $empty = false;
362         if ($start === false) {
363             $empty = strpos ($xmlCode, '<'.$element.'/>');
364             if ($empty === false) {
365                 return $xmlCode;
366             }
367         }
368         if ($empty !== false) {
369             // Element has the form '<element/>'. Do a simple string replace.
370             return str_replace('<'.$element.'/>', $replacement, $xmlCode);
371         }
372         $end = strpos ($xmlCode, '</'.$element.'>');
373         if ($end === false) {
374             // $xmlCode not well formed???
375             return $xmlCode;
376         }
377         $end_length = strlen ('</'.$element.'>');
378         return substr_replace ($xmlCode, $replacement, $start, $end-$start+$end_length);
379     }
380 
381     /**
382      * Helper function which returns the value of $attribute
383      * if found in $xml_code. Otherwise it returns NULL.
384      *
385      * @param  $attribute    The name of the attribute
386      * @param  $xmlCode      The XML code to search through
387      * @return string        Found value or NULL
388      */
389     public static function getAttributeValue ($attribute, $xmlCode) {
390         $pattern = '/\s'.$attribute.'="[^"]*"/';
391         if (preg_match ($pattern, $xmlCode, $matches) === 1) {
392             $value = substr($matches [0], strlen($attribute)+2);
393             $value = trim($value, '"');
394             return $value;
395         }
396         return NULL;
397     }
398 
399     /**
400      * Helper function which stores all attributes
401      * in the array $attributes as name => value pairs.
402      *
403      * @param  $attributes    Array to store the attributes in
404      * @param  $xmlCode       The XML code to search through
405      * @return integer        Number of found attributes or 0
406      */
407     public static function getAttributes (&$attributes, $xmlCode) {
408         $pattern = '/\s[-:_.a-zA-Z0-9]+="[^"]*"/';
409         if (preg_match_all ($pattern, $xmlCode, $matches, PREG_SET_ORDER) > 0) {
410             foreach ($matches as $match) {
411                 $equal_pos = strpos($match [0], '=');
412                 $name = substr($match [0], 0, $equal_pos);
413                 $name = trim($name);
414                 $value = substr($match [0], $equal_pos+1);
415                 $value = trim($value, '"');
416                 $attributes [$name] = $value;
417             }
418             return count($attributes);
419         }
420         return 0;
421     }
422 }
423