xref: /plugin/combo/ComboStrap/Xml/XmlDocument.php (revision 04fd306c7c155fa133ebb3669986875d65988276)
1*04fd306cSNickeau<?php
2*04fd306cSNickeau
3*04fd306cSNickeau/**
4*04fd306cSNickeau * Copyright (c) 2021. ComboStrap, Inc. and its affiliates. All Rights Reserved.
5*04fd306cSNickeau *
6*04fd306cSNickeau * This source code is licensed under the GPL license found in the
7*04fd306cSNickeau * COPYING  file in the root directory of this source tree.
8*04fd306cSNickeau *
9*04fd306cSNickeau * @license  GPL 3 (https://www.gnu.org/licenses/gpl-3.0.en.html)
10*04fd306cSNickeau * @author   ComboStrap <support@combostrap.com>
11*04fd306cSNickeau *
12*04fd306cSNickeau */
13*04fd306cSNickeau
14*04fd306cSNickeaunamespace ComboStrap\Xml;
15*04fd306cSNickeau
16*04fd306cSNickeauuse ComboStrap\ExceptionBadState;
17*04fd306cSNickeauuse ComboStrap\ExceptionBadSyntax;
18*04fd306cSNickeauuse ComboStrap\ExceptionNotFound;
19*04fd306cSNickeauuse ComboStrap\FileSystems;
20*04fd306cSNickeauuse ComboStrap\LogUtility;
21*04fd306cSNickeauuse ComboStrap\Path;
22*04fd306cSNickeauuse ComboStrap\PluginUtility;
23*04fd306cSNickeauuse DOMAttr;
24*04fd306cSNickeauuse DOMDocument;
25*04fd306cSNickeauuse DOMElement;
26*04fd306cSNickeauuse DOMNodeList;
27*04fd306cSNickeauuse DOMXPath;
28*04fd306cSNickeauuse LibXMLError;
29*04fd306cSNickeauuse PhpCss;
30*04fd306cSNickeau
31*04fd306cSNickeau
32*04fd306cSNickeau/**
33*04fd306cSNickeau * A xml document that follows the Web Api interface.
34*04fd306cSNickeau *
35*04fd306cSNickeau * Note Dokuwiki now uses since [jack_jackrum](https://www.dokuwiki.org/changes#release_2023-04-04_jack_jackrum):
36*04fd306cSNickeau * the [dom-wrapper](https://github.com/scotteh/php-dom-wrapper)
37*04fd306cSNickeau * that follow the Jquery API and uses [css-selector](https://symfony.com/doc/current/components/css_selector.html)
38*04fd306cSNickeau * to get Xpath expression from Css selector
39*04fd306cSNickeau *
40*04fd306cSNickeau */
41*04fd306cSNickeauclass XmlDocument
42*04fd306cSNickeau{
43*04fd306cSNickeau    const HTML_TYPE = "html";
44*04fd306cSNickeau    const XML_TYPE = "xml";
45*04fd306cSNickeau    /**
46*04fd306cSNickeau     * The error that the HTML loading
47*04fd306cSNickeau     * may returns
48*04fd306cSNickeau     */
49*04fd306cSNickeau    const KNOWN_HTML_LOADING_ERRORS = [
50*04fd306cSNickeau        "Tag section invalid\n", // section is HTML5 tag
51*04fd306cSNickeau        "Tag footer invalid\n", // footer is HTML5 tag
52*04fd306cSNickeau        "error parsing attribute name\n", // name is an HTML5 attribute
53*04fd306cSNickeau        "Unexpected end tag : blockquote\n", // name is an HTML5 attribute
54*04fd306cSNickeau        "Tag bdi invalid\n",
55*04fd306cSNickeau        "Tag path invalid\n", // svg
56*04fd306cSNickeau        "Tag svg invalid\n", // svg
57*04fd306cSNickeau        "Unexpected end tag : a\n", // when the document is only a anchor
58*04fd306cSNickeau        "Unexpected end tag : p\n", // when the document is only a p
59*04fd306cSNickeau        "Unexpected end tag : button\n", // when the document is only a button
60*04fd306cSNickeau    ];
61*04fd306cSNickeau
62*04fd306cSNickeau    const CANONICAL = "xml";
63*04fd306cSNickeau
64*04fd306cSNickeau    /**
65*04fd306cSNickeau     * @var DOMDocument
66*04fd306cSNickeau     */
67*04fd306cSNickeau    private DOMDocument $domDocument;
68*04fd306cSNickeau    /**
69*04fd306cSNickeau     * @var DOMXPath
70*04fd306cSNickeau     */
71*04fd306cSNickeau    private DOMXPath $domXpath;
72*04fd306cSNickeau
73*04fd306cSNickeau    /**
74*04fd306cSNickeau     * XmlFile constructor.
75*04fd306cSNickeau     * @param $text
76*04fd306cSNickeau     * @param string $type - HTML or not
77*04fd306cSNickeau     * @throws ExceptionBadSyntax - if the document is not valid or the lib xml is not available
78*04fd306cSNickeau     *
79*04fd306cSNickeau     * Getting the width of an error HTML document if the file was downloaded
80*04fd306cSNickeau     * from a server has no use at all
81*04fd306cSNickeau     */
82*04fd306cSNickeau    public function __construct($text, string $type = self::XML_TYPE)
83*04fd306cSNickeau    {
84*04fd306cSNickeau
85*04fd306cSNickeau        if (!$this->isXmlExtensionLoaded()) {
86*04fd306cSNickeau            /**
87*04fd306cSNickeau             * If the XML module is not present
88*04fd306cSNickeau             */
89*04fd306cSNickeau            throw new ExceptionBadSyntax("The php `libxml` module was not found on your installation, the xml/svg file could not be modified / instantiated", self::CANONICAL);
90*04fd306cSNickeau        }
91*04fd306cSNickeau
92*04fd306cSNickeau        // https://www.php.net/manual/en/libxml.constants.php
93*04fd306cSNickeau        $options = LIBXML_NOCDATA
94*04fd306cSNickeau            // | LIBXML_NOBLANKS // same as preserveWhiteSpace=true, not set to be able to format the output
95*04fd306cSNickeau            | LIBXML_NOXMLDECL // Drop the XML declaration when saving a document
96*04fd306cSNickeau            | LIBXML_NONET // No network during load
97*04fd306cSNickeau            | LIBXML_NSCLEAN // Remove redundant namespace declarations - for whatever reason, the formatting does not work if this is set
98*04fd306cSNickeau        ;
99*04fd306cSNickeau
100*04fd306cSNickeau        // HTML
101*04fd306cSNickeau        if ($type == self::HTML_TYPE) {
102*04fd306cSNickeau
103*04fd306cSNickeau            // Options that cause the process to hang if this is not for a html file
104*04fd306cSNickeau            // Empty tag option may also be used only on save
105*04fd306cSNickeau            //   at https://www.php.net/manual/en/domdocument.save.php
106*04fd306cSNickeau            //   and https://www.php.net/manual/en/domdocument.savexml.php
107*04fd306cSNickeau            $options = $options
108*04fd306cSNickeau                // | LIBXML_NOEMPTYTAG // Expand empty tags (e.g. <br/> to <br></br>)
109*04fd306cSNickeau                | LIBXML_HTML_NODEFDTD // No doctype
110*04fd306cSNickeau                | LIBXML_HTML_NOIMPLIED;
111*04fd306cSNickeau
112*04fd306cSNickeau
113*04fd306cSNickeau        }
114*04fd306cSNickeau
115*04fd306cSNickeau        /**
116*04fd306cSNickeau         * No warning reporting
117*04fd306cSNickeau         * Load XML issue E_STRICT warning seen in the log
118*04fd306cSNickeau         */
119*04fd306cSNickeau        if (!PluginUtility::isTest()) {
120*04fd306cSNickeau            $oldLevel = error_reporting(E_ERROR);
121*04fd306cSNickeau        }
122*04fd306cSNickeau
123*04fd306cSNickeau        $this->domDocument = new DOMDocument('1.0', 'UTF-8');
124*04fd306cSNickeau
125*04fd306cSNickeau        $this->mandatoryFormatConfigBeforeLoading();
126*04fd306cSNickeau
127*04fd306cSNickeau
128*04fd306cSNickeau        $text = $this->processTextBeforeLoading($text);
129*04fd306cSNickeau
130*04fd306cSNickeau        /**
131*04fd306cSNickeau         * Because the load does handle HTML5tag as error
132*04fd306cSNickeau         * (ie section for instance)
133*04fd306cSNickeau         * We take over the errors and handle them after the below load
134*04fd306cSNickeau         *
135*04fd306cSNickeau         * https://www.php.net/manual/en/function.libxml-use-internal-errors.php
136*04fd306cSNickeau         *
137*04fd306cSNickeau         */
138*04fd306cSNickeau        libxml_use_internal_errors(true);
139*04fd306cSNickeau
140*04fd306cSNickeau        if ($type == self::XML_TYPE) {
141*04fd306cSNickeau
142*04fd306cSNickeau            $result = $this->domDocument->loadXML($text, $options);
143*04fd306cSNickeau
144*04fd306cSNickeau        } else {
145*04fd306cSNickeau
146*04fd306cSNickeau            /**
147*04fd306cSNickeau             * Unlike loading XML, HTML does not have to be well-formed to load.
148*04fd306cSNickeau             * While malformed HTML should load successfully, this function may generate E_WARNING errors
149*04fd306cSNickeau             * @deprecated as we try to be XHTML compliantXML but yeah this is not always possible
150*04fd306cSNickeau             */
151*04fd306cSNickeau
152*04fd306cSNickeau            /**
153*04fd306cSNickeau             * Bug: Even if we set that the document is an UTF-8
154*04fd306cSNickeau             * loadHTML treat the string as being in ISO-8859-1 if without any heading
155*04fd306cSNickeau             * (ie <xml encoding="utf-8"..>
156*04fd306cSNickeau             * https://stackoverflow.com/questions/8218230/php-domdocument-loadhtml-not-encoding-utf-8-correctly
157*04fd306cSNickeau             * Otherwise French and other language are not well loaded
158*04fd306cSNickeau             *
159*04fd306cSNickeau             * We use the trick to transform UTF-8 to HTML
160*04fd306cSNickeau             */
161*04fd306cSNickeau            $htmlEntityEncoded = mb_convert_encoding($text, 'HTML-ENTITIES', 'UTF-8');
162*04fd306cSNickeau            $result = $this->domDocument->loadHTML($htmlEntityEncoded, $options);
163*04fd306cSNickeau
164*04fd306cSNickeau        }
165*04fd306cSNickeau        if ($result === false) {
166*04fd306cSNickeau
167*04fd306cSNickeau            /**
168*04fd306cSNickeau             * Error
169*04fd306cSNickeau             */
170*04fd306cSNickeau            $errors = libxml_get_errors();
171*04fd306cSNickeau
172*04fd306cSNickeau            foreach ($errors as $error) {
173*04fd306cSNickeau
174*04fd306cSNickeau                /* @var LibXMLError
175*04fd306cSNickeau                 * @noinspection PhpComposerExtensionStubsInspection
176*04fd306cSNickeau                 *
177*04fd306cSNickeau                 * Section is an html5 tag (and is invalid for libxml)
178*04fd306cSNickeau                 */
179*04fd306cSNickeau                if (!in_array($error->message, self::KNOWN_HTML_LOADING_ERRORS)) {
180*04fd306cSNickeau                    /**
181*04fd306cSNickeau                     * This error is an XML and HTML error
182*04fd306cSNickeau                     */
183*04fd306cSNickeau                    if (
184*04fd306cSNickeau                        strpos($error->message, "htmlParseEntityRef: expecting ';' in Entity") !== false
185*04fd306cSNickeau                        ||
186*04fd306cSNickeau                        $error->message == "EntityRef: expecting ';'\n"
187*04fd306cSNickeau                    ) {
188*04fd306cSNickeau                        $message = "There is big probability that there is an ampersand alone `&`. ie You forgot to call html/Xml entities in a `src` or `url` attribute.";
189*04fd306cSNickeau                    } else {
190*04fd306cSNickeau                        $message = "Error while loading HTML";
191*04fd306cSNickeau                    }
192*04fd306cSNickeau                    /**
193*04fd306cSNickeau                     * inboolean attribute XML loading error
194*04fd306cSNickeau                     */
195*04fd306cSNickeau                    if (strpos($error->message, "Specification mandates value for attribute") !== false) {
196*04fd306cSNickeau                        $message = "Xml does not allow boolean attribute (ie without any value). If you skip this error, you will get a general attribute constructing error as next error. Load as HTML.";
197*04fd306cSNickeau                    }
198*04fd306cSNickeau
199*04fd306cSNickeau                    $message .= "Error: " . $error->message . ", Loaded text: " . $text;
200*04fd306cSNickeau
201*04fd306cSNickeau                    /**
202*04fd306cSNickeau                     * We clean the errors, otherwise
203*04fd306cSNickeau                     * in a test series, they failed the next test
204*04fd306cSNickeau                     *
205*04fd306cSNickeau                     */
206*04fd306cSNickeau                    libxml_clear_errors();
207*04fd306cSNickeau
208*04fd306cSNickeau                    // The xml dom object is null, we got NULL pointer exception everywhere
209*04fd306cSNickeau                    // just throw, the code will see it
210*04fd306cSNickeau                    throw new ExceptionBadSyntax($message, self::CANONICAL);
211*04fd306cSNickeau
212*04fd306cSNickeau                }
213*04fd306cSNickeau
214*04fd306cSNickeau            }
215*04fd306cSNickeau        }
216*04fd306cSNickeau
217*04fd306cSNickeau        /**
218*04fd306cSNickeau         * We clean the known errors (otherwise they are added in a queue)
219*04fd306cSNickeau         */
220*04fd306cSNickeau        libxml_clear_errors();
221*04fd306cSNickeau
222*04fd306cSNickeau        /**
223*04fd306cSNickeau         * Error reporting back
224*04fd306cSNickeau         */
225*04fd306cSNickeau        if (!PluginUtility::isTest() && isset($oldLevel)) {
226*04fd306cSNickeau            error_reporting($oldLevel);
227*04fd306cSNickeau        }
228*04fd306cSNickeau
229*04fd306cSNickeau        // namespace error : Namespace prefix dc on format is not defined
230*04fd306cSNickeau        // missing the ns declaration in the file. example:
231*04fd306cSNickeau        // xmlns:dc="http://purl.org/dc/elements/1.1/"
232*04fd306cSNickeau
233*04fd306cSNickeau
234*04fd306cSNickeau    }
235*04fd306cSNickeau
236*04fd306cSNickeau    /**
237*04fd306cSNickeau     * To not have a collusion with {@link FetcherSvg::createFetchImageSvgFromPath()}
238*04fd306cSNickeau     * @param Path $path
239*04fd306cSNickeau     * @return XmlDocument
240*04fd306cSNickeau     * @throws ExceptionNotFound - if the file does not exist
241*04fd306cSNickeau     * @throws ExceptionBadSyntax - if the content is not valid
242*04fd306cSNickeau     */
243*04fd306cSNickeau    public
244*04fd306cSNickeau    static function createXmlDocFromPath(Path $path): XmlDocument
245*04fd306cSNickeau    {
246*04fd306cSNickeau        $mime = XmlDocument::XML_TYPE;
247*04fd306cSNickeau        if (in_array($path->getExtension(), ["html", "htm"])) {
248*04fd306cSNickeau            $mime = XmlDocument::HTML_TYPE;
249*04fd306cSNickeau        }
250*04fd306cSNickeau        $content = FileSystems::getContent($path);
251*04fd306cSNickeau        return (new XmlDocument($content, $mime));
252*04fd306cSNickeau    }
253*04fd306cSNickeau
254*04fd306cSNickeau    /**
255*04fd306cSNickeau     *
256*04fd306cSNickeau     * @throws ExceptionBadSyntax
257*04fd306cSNickeau     */
258*04fd306cSNickeau    public
259*04fd306cSNickeau    static function createXmlDocFromMarkup($string, $asHtml = false): XmlDocument
260*04fd306cSNickeau    {
261*04fd306cSNickeau
262*04fd306cSNickeau        $mime = XmlDocument::XML_TYPE;
263*04fd306cSNickeau        if ($asHtml) {
264*04fd306cSNickeau            $mime = XmlDocument::HTML_TYPE;
265*04fd306cSNickeau        }
266*04fd306cSNickeau        return new XmlDocument($string, $mime);
267*04fd306cSNickeau    }
268*04fd306cSNickeau
269*04fd306cSNickeau    /**
270*04fd306cSNickeau     * HTML loading is more permissive
271*04fd306cSNickeau     *
272*04fd306cSNickeau     * For instance, you would not get an error on boolean attribute
273*04fd306cSNickeau     * ```
274*04fd306cSNickeau     * Error while loading HTMLError: Specification mandates value for attribute defer
275*04fd306cSNickeau     * ```
276*04fd306cSNickeau     * In Xml, it's mandatory but not in HTML, they are known as:
277*04fd306cSNickeau     * https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#boolean-attribute
278*04fd306cSNickeau     *
279*04fd306cSNickeau     *
280*04fd306cSNickeau     * @throws ExceptionBadSyntax
281*04fd306cSNickeau     */
282*04fd306cSNickeau    public static function createHtmlDocFromMarkup($markup): XmlDocument
283*04fd306cSNickeau    {
284*04fd306cSNickeau        return self::createXmlDocFromMarkup($markup, true);
285*04fd306cSNickeau    }
286*04fd306cSNickeau
287*04fd306cSNickeau    public
288*04fd306cSNickeau    function &getDomDocument(): DOMDocument
289*04fd306cSNickeau    {
290*04fd306cSNickeau        return $this->domDocument;
291*04fd306cSNickeau    }
292*04fd306cSNickeau
293*04fd306cSNickeau    /**
294*04fd306cSNickeau     * @param $name
295*04fd306cSNickeau     * @param $value
296*04fd306cSNickeau     * @return void
297*04fd306cSNickeau     * @deprecated use {@link XmlDocument::getElement()} instead
298*04fd306cSNickeau     */
299*04fd306cSNickeau    public function setRootAttribute($name, $value)
300*04fd306cSNickeau    {
301*04fd306cSNickeau        if ($this->isXmlExtensionLoaded()) {
302*04fd306cSNickeau            $this->domDocument->documentElement->setAttribute($name, $value);
303*04fd306cSNickeau        }
304*04fd306cSNickeau    }
305*04fd306cSNickeau
306*04fd306cSNickeau    /**
307*04fd306cSNickeau     * @param $name
308*04fd306cSNickeau     * @return string null if not found
309*04fd306cSNickeau     * @deprecated uses {@link XmlElement::getAttribute()} of {@link self::getElement()}
310*04fd306cSNickeau     */
311*04fd306cSNickeau    public function getRootAttributeValue($name): ?string
312*04fd306cSNickeau    {
313*04fd306cSNickeau        $value = $this->domDocument->documentElement->getAttribute($name);
314*04fd306cSNickeau        if ($value === "") {
315*04fd306cSNickeau            return null;
316*04fd306cSNickeau        }
317*04fd306cSNickeau        return $value;
318*04fd306cSNickeau    }
319*04fd306cSNickeau
320*04fd306cSNickeau    public function toXhtml(DOMElement $element = null): string
321*04fd306cSNickeau    {
322*04fd306cSNickeau        return $this->toXml($element);
323*04fd306cSNickeau    }
324*04fd306cSNickeau
325*04fd306cSNickeau    public function toXml(DOMElement $element = null): string
326*04fd306cSNickeau    {
327*04fd306cSNickeau
328*04fd306cSNickeau        if ($element === null) {
329*04fd306cSNickeau            $element = $this->getDomDocument()->documentElement;
330*04fd306cSNickeau        }
331*04fd306cSNickeau        /**
332*04fd306cSNickeau         * LIBXML_NOXMLDECL (no xml declaration) does not work because only empty tag is recognized
333*04fd306cSNickeau         * https://www.php.net/manual/en/domdocument.savexml.php
334*04fd306cSNickeau         */
335*04fd306cSNickeau        $xmlText = $this->getDomDocument()->saveXML(
336*04fd306cSNickeau            $element,
337*04fd306cSNickeau            LIBXML_NOXMLDECL
338*04fd306cSNickeau        );
339*04fd306cSNickeau        // Delete doctype (for svg optimization)
340*04fd306cSNickeau        // php has only doctype manipulation for HTML
341*04fd306cSNickeau        $xmlText = preg_replace('/^<!DOCTYPE.+?>/', '', $xmlText);
342*04fd306cSNickeau        return trim($xmlText);
343*04fd306cSNickeau
344*04fd306cSNickeau    }
345*04fd306cSNickeau
346*04fd306cSNickeau    /**
347*04fd306cSNickeau     * https://www.php.net/manual/en/dom.installation.php
348*04fd306cSNickeau     *
349*04fd306cSNickeau     * Check it with
350*04fd306cSNickeau     * ```
351*04fd306cSNickeau     * php -m
352*04fd306cSNickeau     * ```
353*04fd306cSNickeau     * Install with
354*04fd306cSNickeau     * ```
355*04fd306cSNickeau     * sudo apt-get install php-xml
356*04fd306cSNickeau     * ```
357*04fd306cSNickeau     * @return bool
358*04fd306cSNickeau     */
359*04fd306cSNickeau    public function isXmlExtensionLoaded(): bool
360*04fd306cSNickeau    {
361*04fd306cSNickeau        // A suffix used in the bad message
362*04fd306cSNickeau        $suffixBadMessage = "php extension is not installed. To install it, you need to install xml. Example: `sudo apt-get install php-xml`, `yum install php-xml`";
363*04fd306cSNickeau
364*04fd306cSNickeau        // https://www.php.net/manual/en/dom.requirements.php
365*04fd306cSNickeau        $loaded = extension_loaded("libxml");
366*04fd306cSNickeau        if ($loaded === false) {
367*04fd306cSNickeau            LogUtility::msg("The libxml {$suffixBadMessage}");
368*04fd306cSNickeau        } else {
369*04fd306cSNickeau            $loaded = extension_loaded("xml");
370*04fd306cSNickeau            if ($loaded === false) {
371*04fd306cSNickeau                LogUtility::msg("The xml {$suffixBadMessage}");
372*04fd306cSNickeau            } else {
373*04fd306cSNickeau                $loaded = extension_loaded("dom");
374*04fd306cSNickeau                if ($loaded === false) {
375*04fd306cSNickeau                    LogUtility::msg("The dom {$suffixBadMessage}");
376*04fd306cSNickeau                }
377*04fd306cSNickeau            }
378*04fd306cSNickeau        }
379*04fd306cSNickeau        return $loaded;
380*04fd306cSNickeau    }
381*04fd306cSNickeau
382*04fd306cSNickeau    /**
383*04fd306cSNickeau     * https://stackoverflow.com/questions/30257438/how-to-completely-remove-a-namespace-using-domdocument
384*04fd306cSNickeau     * @param $namespaceUri
385*04fd306cSNickeau     */
386*04fd306cSNickeau    function removeNamespace($namespaceUri)
387*04fd306cSNickeau    {
388*04fd306cSNickeau        if (empty($namespaceUri)) {
389*04fd306cSNickeau            throw new \RuntimeException("The namespace is empty and should be specified");
390*04fd306cSNickeau        }
391*04fd306cSNickeau
392*04fd306cSNickeau        if (strpos($namespaceUri, "http") === false) {
393*04fd306cSNickeau            LogUtility::msg("Internal warning: The namespaceURI ($namespaceUri) does not seems to be an URI", LogUtility::LVL_MSG_WARNING, "support");
394*04fd306cSNickeau        }
395*04fd306cSNickeau
396*04fd306cSNickeau        /**
397*04fd306cSNickeau         * @var DOMNodeList $nodes
398*04fd306cSNickeau         * finds all nodes that have a namespace node called $ns where their parent node doesn't also have the same namespace.
399*04fd306cSNickeau         * @var DOMNodeList $nodes
400*04fd306cSNickeau         */
401*04fd306cSNickeau        try {
402*04fd306cSNickeau            $nodes = $this->xpath("//*[namespace-uri()='$namespaceUri']");
403*04fd306cSNickeau            foreach ($nodes as $node) {
404*04fd306cSNickeau                /** @var DOMElement $node */
405*04fd306cSNickeau                $node->parentNode->removeChild($node);
406*04fd306cSNickeau            }
407*04fd306cSNickeau        } catch (ExceptionBadSyntax $e) {
408*04fd306cSNickeau            LogUtility::error("Internal Error on xpath: {$e->getMessage()}");
409*04fd306cSNickeau        }
410*04fd306cSNickeau
411*04fd306cSNickeau        try {
412*04fd306cSNickeau            $nodes = $this->xpath("//@*[namespace-uri()='$namespaceUri']");
413*04fd306cSNickeau            foreach ($nodes as $node) {
414*04fd306cSNickeau                /** @var DOMAttr $node */
415*04fd306cSNickeau                /** @var DOMElement $DOMNode */
416*04fd306cSNickeau                $DOMNode = $node->parentNode;
417*04fd306cSNickeau                $DOMNode->removeAttributeNode($node);
418*04fd306cSNickeau            }
419*04fd306cSNickeau        } catch (ExceptionBadSyntax $e) {
420*04fd306cSNickeau            LogUtility::error("Internal Error on xpath: {$e->getMessage()}");
421*04fd306cSNickeau        }
422*04fd306cSNickeau
423*04fd306cSNickeau
424*04fd306cSNickeau        //Node namespace can be select only from the document
425*04fd306cSNickeau        $xpath = new DOMXPath($this->getDomDocument());
426*04fd306cSNickeau        $DOMNodeList = $xpath->query("namespace::*", $this->getDomDocument()->ownerDocument);
427*04fd306cSNickeau        foreach ($DOMNodeList as $node) {
428*04fd306cSNickeau            $namespaceURI = $node->namespaceURI;
429*04fd306cSNickeau            if ($namespaceURI == $namespaceUri) {
430*04fd306cSNickeau                $parentNode = $node->parentNode;
431*04fd306cSNickeau                $parentNode->removeAttributeNS($namespaceUri, $node->localName);
432*04fd306cSNickeau            }
433*04fd306cSNickeau        }
434*04fd306cSNickeau
435*04fd306cSNickeau
436*04fd306cSNickeau    }
437*04fd306cSNickeau
438*04fd306cSNickeau    public function getNamespaces(): array
439*04fd306cSNickeau    {
440*04fd306cSNickeau        /**
441*04fd306cSNickeau         * We can't query with the library {@link XmlDocument::xpath()} function because
442*04fd306cSNickeau         * we register in the xpath the namespace
443*04fd306cSNickeau         */
444*04fd306cSNickeau        $xpath = new DOMXPath($this->getDomDocument());
445*04fd306cSNickeau        // `namespace::*` means selects all the namespace attribute of the context node
446*04fd306cSNickeau        // namespace is an axes
447*04fd306cSNickeau        // See https://www.w3.org/TR/1999/REC-xpath-19991116/#axes
448*04fd306cSNickeau        // the namespace axis contains the namespace nodes of the context node; the axis will be empty unless the context node is an element
449*04fd306cSNickeau        $DOMNodeList = $xpath->query('namespace::*', $this->getDomDocument()->ownerDocument);
450*04fd306cSNickeau        $nameSpace = array();
451*04fd306cSNickeau        foreach ($DOMNodeList as $node) {
452*04fd306cSNickeau            /** @var DOMElement $node */
453*04fd306cSNickeau
454*04fd306cSNickeau            $namespaceURI = $node->namespaceURI;
455*04fd306cSNickeau            $localName = $node->prefix;
456*04fd306cSNickeau            if ($namespaceURI != null) {
457*04fd306cSNickeau                $nameSpace[$localName] = $namespaceURI;
458*04fd306cSNickeau            }
459*04fd306cSNickeau        }
460*04fd306cSNickeau        return $nameSpace;
461*04fd306cSNickeau    }
462*04fd306cSNickeau
463*04fd306cSNickeau    /**
464*04fd306cSNickeau     * A wrapper that register namespace for the query
465*04fd306cSNickeau     * with the defined prefix
466*04fd306cSNickeau     * See comment:
467*04fd306cSNickeau     * https://www.php.net/manual/en/domxpath.registernamespace.php#51480
468*04fd306cSNickeau     * @param $query
469*04fd306cSNickeau     * @param DOMElement|null $contextNode
470*04fd306cSNickeau     * @return DOMNodeList
471*04fd306cSNickeau     *
472*04fd306cSNickeau     * Note that this is possible to do evaluation to return a string instead
473*04fd306cSNickeau     * https://www.php.net/manual/en/domxpath.evaluate.php
474*04fd306cSNickeau     * @throws ExceptionBadSyntax - if the query is invalid
475*04fd306cSNickeau     */
476*04fd306cSNickeau    public
477*04fd306cSNickeau    function xpath($query, DOMElement $contextNode = null): DOMNodeList
478*04fd306cSNickeau    {
479*04fd306cSNickeau        if (!isset($this->domXpath)) {
480*04fd306cSNickeau
481*04fd306cSNickeau            $this->domXpath = new DOMXPath($this->getDomDocument());
482*04fd306cSNickeau
483*04fd306cSNickeau            /**
484*04fd306cSNickeau             * Prefix mapping
485*04fd306cSNickeau             * It is necessary to use xpath to handle documents which have default namespaces.
486*04fd306cSNickeau             * The xpath expression will search for items with no namespace by default.
487*04fd306cSNickeau             */
488*04fd306cSNickeau            foreach ($this->getNamespaces() as $prefix => $namespaceUri) {
489*04fd306cSNickeau                /**
490*04fd306cSNickeau                 * You can't register an empty prefix
491*04fd306cSNickeau                 * Default namespace (without a prefix) can only be accessed by the local-name() and namespace-uri() attributes.
492*04fd306cSNickeau                 */
493*04fd306cSNickeau                if (!empty($prefix)) {
494*04fd306cSNickeau                    $result = $this->domXpath->registerNamespace($prefix, $namespaceUri);
495*04fd306cSNickeau                    if (!$result) {
496*04fd306cSNickeau                        LogUtility::msg("Not able to register the prefix ($prefix) for the namespace uri ($namespaceUri)");
497*04fd306cSNickeau                    }
498*04fd306cSNickeau                }
499*04fd306cSNickeau            }
500*04fd306cSNickeau        }
501*04fd306cSNickeau
502*04fd306cSNickeau        if ($contextNode === null) {
503*04fd306cSNickeau            $contextNode = $this->domDocument;
504*04fd306cSNickeau        }
505*04fd306cSNickeau        $domList = $this->domXpath->query($query, $contextNode);
506*04fd306cSNickeau        if ($domList === false) {
507*04fd306cSNickeau            throw new ExceptionBadSyntax("The query expression ($query) may be malformed");
508*04fd306cSNickeau        }
509*04fd306cSNickeau        return $domList;
510*04fd306cSNickeau
511*04fd306cSNickeau    }
512*04fd306cSNickeau
513*04fd306cSNickeau
514*04fd306cSNickeau    public
515*04fd306cSNickeau    function removeRootAttribute($attribute)
516*04fd306cSNickeau    {
517*04fd306cSNickeau
518*04fd306cSNickeau        // This function does not work
519*04fd306cSNickeau        // $result = $this->getXmlDom()->documentElement->removeAttribute($attribute);
520*04fd306cSNickeau
521*04fd306cSNickeau        for ($i = 0; $i < $this->getDomDocument()->documentElement->attributes->length; $i++) {
522*04fd306cSNickeau            if ($this->getDomDocument()->documentElement->attributes[$i]->name == $attribute) {
523*04fd306cSNickeau                $result = $this->getDomDocument()->documentElement->removeAttributeNode($this->getDomDocument()->documentElement->attributes[$i]);
524*04fd306cSNickeau                if ($result === false) {
525*04fd306cSNickeau                    throw new \RuntimeException("Not able to delete the $attribute");
526*04fd306cSNickeau                }
527*04fd306cSNickeau                // There is no break here because you may find multiple version attribute for instance
528*04fd306cSNickeau            }
529*04fd306cSNickeau        }
530*04fd306cSNickeau
531*04fd306cSNickeau    }
532*04fd306cSNickeau
533*04fd306cSNickeau    public
534*04fd306cSNickeau    function removeRootChildNode($nodeName)
535*04fd306cSNickeau    {
536*04fd306cSNickeau        for ($i = 0; $i < $this->getDomDocument()->documentElement->childNodes->length; $i++) {
537*04fd306cSNickeau            $childNode = &$this->getDomDocument()->documentElement->childNodes[$i];
538*04fd306cSNickeau            if ($childNode->nodeName == $nodeName) {
539*04fd306cSNickeau                $result = $this->getDomDocument()->documentElement->removeChild($childNode);
540*04fd306cSNickeau                if ($result == false) {
541*04fd306cSNickeau                    throw new \RuntimeException("Not able to delete the child node $nodeName");
542*04fd306cSNickeau                }
543*04fd306cSNickeau                break;
544*04fd306cSNickeau            }
545*04fd306cSNickeau        }
546*04fd306cSNickeau    }
547*04fd306cSNickeau
548*04fd306cSNickeau    /**
549*04fd306cSNickeau     *
550*04fd306cSNickeau     * Add a value to an attribute value
551*04fd306cSNickeau     * Example
552*04fd306cSNickeau     * <a class="actual">
553*04fd306cSNickeau     *
554*04fd306cSNickeau     * if you add "new"
555*04fd306cSNickeau     * <a class="actual new">
556*04fd306cSNickeau     *
557*04fd306cSNickeau     * @param $attName
558*04fd306cSNickeau     * @param $attValue
559*04fd306cSNickeau     * @param DOMElement $xml
560*04fd306cSNickeau     */
561*04fd306cSNickeau    public
562*04fd306cSNickeau    function addAttributeValue($attName, $attValue, $xml)
563*04fd306cSNickeau    {
564*04fd306cSNickeau
565*04fd306cSNickeau        /**
566*04fd306cSNickeau         * Empty condition is better than {@link DOMElement::hasAttribute()}
567*04fd306cSNickeau         * because even if the dom element has the attribute, the value
568*04fd306cSNickeau         * may be empty
569*04fd306cSNickeau         */
570*04fd306cSNickeau        $value = $xml->getAttribute($attName);
571*04fd306cSNickeau        if (empty($value)) {
572*04fd306cSNickeau            $xml->setAttribute($attName, $attValue);
573*04fd306cSNickeau        } else {
574*04fd306cSNickeau            $actualAttValue = $xml->getAttribute($attName);
575*04fd306cSNickeau            $explodeArray = explode(" ", $actualAttValue);
576*04fd306cSNickeau            if (!in_array($attValue, $explodeArray)) {
577*04fd306cSNickeau                $xml->setAttribute($attName, (string)$actualAttValue . " $attValue");
578*04fd306cSNickeau            }
579*04fd306cSNickeau        }
580*04fd306cSNickeau
581*04fd306cSNickeau    }
582*04fd306cSNickeau
583*04fd306cSNickeau    public function diff(XmlDocument $rightDocument): string
584*04fd306cSNickeau    {
585*04fd306cSNickeau        $error = "";
586*04fd306cSNickeau        XmlSystems::diffNode($this->getDomDocument(), $rightDocument->getDomDocument(), $error);
587*04fd306cSNickeau        return $error;
588*04fd306cSNickeau    }
589*04fd306cSNickeau
590*04fd306cSNickeau    /**
591*04fd306cSNickeau     * @return string a XML formatted
592*04fd306cSNickeau     *
593*04fd306cSNickeau     * !!!! The parameter preserveWhiteSpace should have been set to false before loading
594*04fd306cSNickeau     * https://www.php.net/manual/en/class.domdocument.php#domdocument.props.formatoutput
595*04fd306cSNickeau     * $this->xmlDom->preserveWhiteSpace = false;
596*04fd306cSNickeau     *
597*04fd306cSNickeau     * We do it with the function {@link XmlDocument::mandatoryFormatConfigBeforeLoading()}
598*04fd306cSNickeau     *
599*04fd306cSNickeau     */
600*04fd306cSNickeau    public function toXmlFormatted(DOMElement $element = null): string
601*04fd306cSNickeau    {
602*04fd306cSNickeau
603*04fd306cSNickeau        $this->domDocument->formatOutput = true;
604*04fd306cSNickeau        return $this->toXml($element);
605*04fd306cSNickeau
606*04fd306cSNickeau    }
607*04fd306cSNickeau
608*04fd306cSNickeau    /**
609*04fd306cSNickeau     * @return string that can be diff
610*04fd306cSNickeau     *   * EOL diff are not seen
611*04fd306cSNickeau     *   * space are
612*04fd306cSNickeau     *
613*04fd306cSNickeau     * See also {@link XmlDocument::processTextBeforeLoading()}
614*04fd306cSNickeau     * that is needed before loading
615*04fd306cSNickeau     */
616*04fd306cSNickeau    public function toXmlNormalized(DOMElement $element = null): string
617*04fd306cSNickeau    {
618*04fd306cSNickeau
619*04fd306cSNickeau        /**
620*04fd306cSNickeau         * If the text was a list
621*04fd306cSNickeau         * of sibling text without parent
622*04fd306cSNickeau         * We may get a body
623*04fd306cSNickeau         * @deprecated letting the code until
624*04fd306cSNickeau         * TODO: delete this code when the test pass
625*04fd306cSNickeau         */
626*04fd306cSNickeau//        $body = $doc->getElementsByTagName("body");
627*04fd306cSNickeau//        if ($body->length != 0) {
628*04fd306cSNickeau//            $DOMNodeList = $body->item(0)->childNodes;
629*04fd306cSNickeau//            $output = "";
630*04fd306cSNickeau//            foreach ($DOMNodeList as $value) {
631*04fd306cSNickeau//                $output .= $doc->saveXML($value) . DOKU_LF;
632*04fd306cSNickeau//            }
633*04fd306cSNickeau//        }
634*04fd306cSNickeau
635*04fd306cSNickeau        if ($element == null) {
636*04fd306cSNickeau            $element = $this->domDocument->documentElement;
637*04fd306cSNickeau        }
638*04fd306cSNickeau        $element->normalize();
639*04fd306cSNickeau        return $this->toXmlFormatted($element);
640*04fd306cSNickeau    }
641*04fd306cSNickeau
642*04fd306cSNickeau    /**
643*04fd306cSNickeau     * Not really conventional but
644*04fd306cSNickeau     * to be able to {@link toXmlNormalized}
645*04fd306cSNickeau     * the EOL should be deleted
646*04fd306cSNickeau     * We do it before loading and not with a XML documentation
647*04fd306cSNickeau     */
648*04fd306cSNickeau    private function processTextBeforeLoading($text)
649*04fd306cSNickeau    {
650*04fd306cSNickeau        $text = str_replace(DOKU_LF, "", $text);
651*04fd306cSNickeau        $text = preg_replace("/\r\n\s*\r\n/", "\r\n", $text);
652*04fd306cSNickeau        $text = preg_replace("/\n\s*\n/", "\n", $text);
653*04fd306cSNickeau        $text = preg_replace("/\n\n/", "\n", $text);
654*04fd306cSNickeau        return $text;
655*04fd306cSNickeau
656*04fd306cSNickeau    }
657*04fd306cSNickeau
658*04fd306cSNickeau
659*04fd306cSNickeau    /**
660*04fd306cSNickeau     * This function is called just before loading
661*04fd306cSNickeau     * in order to be able to {@link XmlDocument::toXmlFormatted() format the output }
662*04fd306cSNickeau     * https://www.php.net/manual/en/class.domdocument.php#domdocument.props.formatoutput
663*04fd306cSNickeau     * Mandatory for a a good formatting before loading
664*04fd306cSNickeau     *
665*04fd306cSNickeau     */
666*04fd306cSNickeau    private function mandatoryFormatConfigBeforeLoading()
667*04fd306cSNickeau    {
668*04fd306cSNickeau        // not that
669*04fd306cSNickeau        // the loading option: LIBXML_NOBLANKS
670*04fd306cSNickeau        // is equivalent to $this->xmlDom->preserveWhiteSpace = true;
671*04fd306cSNickeau        $this->domDocument->preserveWhiteSpace = false;
672*04fd306cSNickeau    }
673*04fd306cSNickeau
674*04fd306cSNickeau    /**
675*04fd306cSNickeau     * @param string $attributeName
676*04fd306cSNickeau     * @param DOMElement $nodeElement
677*04fd306cSNickeau     * @return void
678*04fd306cSNickeau     * @deprecated use the {@link XmlElement::removeAttribute()} if possible
679*04fd306cSNickeau     */
680*04fd306cSNickeau    public function removeAttributeValue(string $attributeName, DOMElement $nodeElement)
681*04fd306cSNickeau    {
682*04fd306cSNickeau        $attr = $nodeElement->getAttributeNode($attributeName);
683*04fd306cSNickeau        if (!$attr) {
684*04fd306cSNickeau            return;
685*04fd306cSNickeau        }
686*04fd306cSNickeau        $result = $nodeElement->removeAttributeNode($attr);
687*04fd306cSNickeau        if ($result === false) {
688*04fd306cSNickeau            LogUtility::msg("Not able to delete the attribute $attributeName of the node element $nodeElement->tagName in the Xml document");
689*04fd306cSNickeau        }
690*04fd306cSNickeau    }
691*04fd306cSNickeau
692*04fd306cSNickeau
693*04fd306cSNickeau    /**
694*04fd306cSNickeau     * Query via a CSS selector
695*04fd306cSNickeau     * (not that it will not work with other namespace than the default one, ie xmlns will not work)
696*04fd306cSNickeau     * @throws ExceptionBadSyntax - if the selector is not valid
697*04fd306cSNickeau     * @throws ExceptionNotFound - if the selector selects nothing
698*04fd306cSNickeau     */
699*04fd306cSNickeau    public function querySelector(string $selector): XmlElement
700*04fd306cSNickeau    {
701*04fd306cSNickeau        $domNodeList = $this->querySelectorAll($selector);
702*04fd306cSNickeau        if (sizeof($domNodeList) >= 1) {
703*04fd306cSNickeau            return $domNodeList[0];
704*04fd306cSNickeau        }
705*04fd306cSNickeau        throw new ExceptionNotFound("No element was found with the selector $selector");
706*04fd306cSNickeau
707*04fd306cSNickeau    }
708*04fd306cSNickeau
709*04fd306cSNickeau    /**
710*04fd306cSNickeau     * @return XmlElement[]
711*04fd306cSNickeau     * @throws ExceptionBadSyntax
712*04fd306cSNickeau     */
713*04fd306cSNickeau    public function querySelectorAll(string $selector): array
714*04fd306cSNickeau    {
715*04fd306cSNickeau        $xpath = $this->cssSelectorToXpath($selector);
716*04fd306cSNickeau        $domNodeList = $this->xpath($xpath);
717*04fd306cSNickeau        $domNodes = [];
718*04fd306cSNickeau        foreach ($domNodeList as $domNode) {
719*04fd306cSNickeau            if ($domNode instanceof DOMElement) {
720*04fd306cSNickeau                $domNodes[] = new XmlElement($domNode, $this);
721*04fd306cSNickeau            }
722*04fd306cSNickeau        }
723*04fd306cSNickeau        return $domNodes;
724*04fd306cSNickeau
725*04fd306cSNickeau    }
726*04fd306cSNickeau
727*04fd306cSNickeau    /**
728*04fd306cSNickeau     * @throws ExceptionBadSyntax
729*04fd306cSNickeau     */
730*04fd306cSNickeau    public function cssSelectorToXpath(string $selector): string
731*04fd306cSNickeau    {
732*04fd306cSNickeau        try {
733*04fd306cSNickeau            return PhpCss::toXpath($selector);
734*04fd306cSNickeau        } catch (PhpCss\Exception\ParserException $e) {
735*04fd306cSNickeau            throw new ExceptionBadSyntax("The selector ($selector) is not valid. Error: {$e->getMessage()}");
736*04fd306cSNickeau        }
737*04fd306cSNickeau    }
738*04fd306cSNickeau
739*04fd306cSNickeau    /**
740*04fd306cSNickeau     * An utility function to know how to remove a node
741*04fd306cSNickeau     * @param \DOMNode $nodeElement
742*04fd306cSNickeau     * @deprecated use {@link XmlElement::remove} instead
743*04fd306cSNickeau     */
744*04fd306cSNickeau    public function removeNode(\DOMNode $nodeElement)
745*04fd306cSNickeau    {
746*04fd306cSNickeau
747*04fd306cSNickeau        $nodeElement->parentNode->removeChild($nodeElement);
748*04fd306cSNickeau
749*04fd306cSNickeau    }
750*04fd306cSNickeau
751*04fd306cSNickeau    public function getElement(): XmlElement
752*04fd306cSNickeau    {
753*04fd306cSNickeau        return XmlElement::create($this->getDomDocument()->documentElement, $this);
754*04fd306cSNickeau    }
755*04fd306cSNickeau
756*04fd306cSNickeau    public function toHtml()
757*04fd306cSNickeau    {
758*04fd306cSNickeau        return $this->domDocument->saveHTML();
759*04fd306cSNickeau    }
760*04fd306cSNickeau
761*04fd306cSNickeau    /**
762*04fd306cSNickeau     * @throws \DOMException - if invalid local name
763*04fd306cSNickeau     */
764*04fd306cSNickeau    public function createElement(string $localName): XmlElement
765*04fd306cSNickeau    {
766*04fd306cSNickeau        $element = $this->domDocument->createElement($localName);
767*04fd306cSNickeau        return XmlElement::create($element, $this);
768*04fd306cSNickeau    }
769*04fd306cSNickeau
770*04fd306cSNickeau    /**
771*04fd306cSNickeau     * @throws ExceptionBadSyntax
772*04fd306cSNickeau     * @throws ExceptionBadState
773*04fd306cSNickeau     */
774*04fd306cSNickeau    public function xpathFirstDomElement(string $xpath): DOMElement
775*04fd306cSNickeau    {
776*04fd306cSNickeau        $domList = $this->xpath($xpath);
777*04fd306cSNickeau        $domElement = $domList->item(0);
778*04fd306cSNickeau        if ($domElement instanceof DOMElement) {
779*04fd306cSNickeau            return $domElement;
780*04fd306cSNickeau        } else {
781*04fd306cSNickeau            throw new ExceptionBadState("The first DOM node is not a DOM element");
782*04fd306cSNickeau        }
783*04fd306cSNickeau    }
784*04fd306cSNickeau
785*04fd306cSNickeau
786*04fd306cSNickeau}
787