xref: /plugin/combo/ComboStrap/Xml/XmlDocument.php (revision 70bbd7f1f72440223cc13f3495efdcb2b0a11514)
104fd306cSNickeau<?php
204fd306cSNickeau
304fd306cSNickeau/**
404fd306cSNickeau * Copyright (c) 2021. ComboStrap, Inc. and its affiliates. All Rights Reserved.
504fd306cSNickeau *
604fd306cSNickeau * This source code is licensed under the GPL license found in the
704fd306cSNickeau * COPYING  file in the root directory of this source tree.
804fd306cSNickeau *
904fd306cSNickeau * @license  GPL 3 (https://www.gnu.org/licenses/gpl-3.0.en.html)
1004fd306cSNickeau * @author   ComboStrap <support@combostrap.com>
1104fd306cSNickeau *
1204fd306cSNickeau */
1304fd306cSNickeau
1404fd306cSNickeaunamespace ComboStrap\Xml;
1504fd306cSNickeau
1604fd306cSNickeauuse ComboStrap\ExceptionBadState;
1704fd306cSNickeauuse ComboStrap\ExceptionBadSyntax;
1804fd306cSNickeauuse ComboStrap\ExceptionNotFound;
1904fd306cSNickeauuse ComboStrap\FileSystems;
2004fd306cSNickeauuse ComboStrap\LogUtility;
2104fd306cSNickeauuse ComboStrap\Path;
2204fd306cSNickeauuse ComboStrap\PluginUtility;
2304fd306cSNickeauuse DOMAttr;
2404fd306cSNickeauuse DOMDocument;
2504fd306cSNickeauuse DOMElement;
2604fd306cSNickeauuse DOMNodeList;
2704fd306cSNickeauuse DOMXPath;
2804fd306cSNickeauuse LibXMLError;
2904fd306cSNickeauuse PhpCss;
3004fd306cSNickeau
3104fd306cSNickeau
3204fd306cSNickeau/**
3304fd306cSNickeau * A xml document that follows the Web Api interface.
3404fd306cSNickeau *
3504fd306cSNickeau * Note Dokuwiki now uses since [jack_jackrum](https://www.dokuwiki.org/changes#release_2023-04-04_jack_jackrum):
3604fd306cSNickeau * the [dom-wrapper](https://github.com/scotteh/php-dom-wrapper)
3704fd306cSNickeau * that follow the Jquery API and uses [css-selector](https://symfony.com/doc/current/components/css_selector.html)
3804fd306cSNickeau * to get Xpath expression from Css selector
3904fd306cSNickeau *
4004fd306cSNickeau */
4104fd306cSNickeauclass XmlDocument
4204fd306cSNickeau{
4304fd306cSNickeau    const HTML_TYPE = "html";
4404fd306cSNickeau    const XML_TYPE = "xml";
4504fd306cSNickeau    /**
4604fd306cSNickeau     * The error that the HTML loading
4704fd306cSNickeau     * may returns
4804fd306cSNickeau     */
4904fd306cSNickeau    const KNOWN_HTML_LOADING_ERRORS = [
5004fd306cSNickeau        "Tag section invalid\n", // section is HTML5 tag
5104fd306cSNickeau        "Tag footer invalid\n", // footer is HTML5 tag
5204fd306cSNickeau        "error parsing attribute name\n", // name is an HTML5 attribute
5304fd306cSNickeau        "Unexpected end tag : blockquote\n", // name is an HTML5 attribute
5404fd306cSNickeau        "Tag bdi invalid\n",
5504fd306cSNickeau        "Tag path invalid\n", // svg
5604fd306cSNickeau        "Tag svg invalid\n", // svg
5704fd306cSNickeau        "Unexpected end tag : a\n", // when the document is only a anchor
5804fd306cSNickeau        "Unexpected end tag : p\n", // when the document is only a p
5904fd306cSNickeau        "Unexpected end tag : button\n", // when the document is only a button
6004fd306cSNickeau    ];
6104fd306cSNickeau
6204fd306cSNickeau    const CANONICAL = "xml";
6304fd306cSNickeau
6404fd306cSNickeau    /**
6504fd306cSNickeau     * @var DOMDocument
6604fd306cSNickeau     */
6704fd306cSNickeau    private DOMDocument $domDocument;
6804fd306cSNickeau    /**
6904fd306cSNickeau     * @var DOMXPath
7004fd306cSNickeau     */
7104fd306cSNickeau    private DOMXPath $domXpath;
7204fd306cSNickeau
7304fd306cSNickeau    /**
7404fd306cSNickeau     * XmlFile constructor.
7504fd306cSNickeau     * @param $text
7604fd306cSNickeau     * @param string $type - HTML or not
7704fd306cSNickeau     * @throws ExceptionBadSyntax - if the document is not valid or the lib xml is not available
7804fd306cSNickeau     *
7904fd306cSNickeau     * Getting the width of an error HTML document if the file was downloaded
8004fd306cSNickeau     * from a server has no use at all
8104fd306cSNickeau     */
8204fd306cSNickeau    public function __construct($text, string $type = self::XML_TYPE)
8304fd306cSNickeau    {
8404fd306cSNickeau
85*70bbd7f1Sgerardnico        if (empty($text)) {
86*70bbd7f1Sgerardnico            throw new ExceptionBadSyntax("The xml text markup should not be empty.", self::CANONICAL);
87*70bbd7f1Sgerardnico        }
8804fd306cSNickeau        if (!$this->isXmlExtensionLoaded()) {
8904fd306cSNickeau            /**
9004fd306cSNickeau             * If the XML module is not present
9104fd306cSNickeau             */
9204fd306cSNickeau            throw new ExceptionBadSyntax("The php `libxml` module was not found on your installation, the xml/svg file could not be modified / instantiated", self::CANONICAL);
9304fd306cSNickeau        }
9404fd306cSNickeau
9504fd306cSNickeau        // https://www.php.net/manual/en/libxml.constants.php
9604fd306cSNickeau        $options = LIBXML_NOCDATA
9704fd306cSNickeau            // | LIBXML_NOBLANKS // same as preserveWhiteSpace=true, not set to be able to format the output
9804fd306cSNickeau            | LIBXML_NOXMLDECL // Drop the XML declaration when saving a document
9904fd306cSNickeau            | LIBXML_NONET // No network during load
10004fd306cSNickeau            | LIBXML_NSCLEAN // Remove redundant namespace declarations - for whatever reason, the formatting does not work if this is set
10104fd306cSNickeau        ;
10204fd306cSNickeau
10304fd306cSNickeau        // HTML
10404fd306cSNickeau        if ($type == self::HTML_TYPE) {
10504fd306cSNickeau
10604fd306cSNickeau            // Options that cause the process to hang if this is not for a html file
10704fd306cSNickeau            // Empty tag option may also be used only on save
10804fd306cSNickeau            //   at https://www.php.net/manual/en/domdocument.save.php
10904fd306cSNickeau            //   and https://www.php.net/manual/en/domdocument.savexml.php
11004fd306cSNickeau            $options = $options
11104fd306cSNickeau                // | LIBXML_NOEMPTYTAG // Expand empty tags (e.g. <br/> to <br></br>)
11204fd306cSNickeau                | LIBXML_HTML_NODEFDTD // No doctype
11304fd306cSNickeau                | LIBXML_HTML_NOIMPLIED;
11404fd306cSNickeau
11504fd306cSNickeau
11604fd306cSNickeau        }
11704fd306cSNickeau
11804fd306cSNickeau        /**
11904fd306cSNickeau         * No warning reporting
12004fd306cSNickeau         * Load XML issue E_STRICT warning seen in the log
12104fd306cSNickeau         */
12204fd306cSNickeau        if (!PluginUtility::isTest()) {
12304fd306cSNickeau            $oldLevel = error_reporting(E_ERROR);
12404fd306cSNickeau        }
12504fd306cSNickeau
12604fd306cSNickeau        $this->domDocument = new DOMDocument('1.0', 'UTF-8');
12704fd306cSNickeau
12804fd306cSNickeau        $this->mandatoryFormatConfigBeforeLoading();
12904fd306cSNickeau
13004fd306cSNickeau
13104fd306cSNickeau        $text = $this->processTextBeforeLoading($text);
13204fd306cSNickeau
13304fd306cSNickeau        /**
13404fd306cSNickeau         * Because the load does handle HTML5tag as error
13504fd306cSNickeau         * (ie section for instance)
13604fd306cSNickeau         * We take over the errors and handle them after the below load
13704fd306cSNickeau         *
13804fd306cSNickeau         * https://www.php.net/manual/en/function.libxml-use-internal-errors.php
13904fd306cSNickeau         *
14004fd306cSNickeau         */
14104fd306cSNickeau        libxml_use_internal_errors(true);
14204fd306cSNickeau
14304fd306cSNickeau        if ($type == self::XML_TYPE) {
14404fd306cSNickeau
14504fd306cSNickeau            $result = $this->domDocument->loadXML($text, $options);
14604fd306cSNickeau
14704fd306cSNickeau        } else {
14804fd306cSNickeau
14904fd306cSNickeau            /**
15004fd306cSNickeau             * Unlike loading XML, HTML does not have to be well-formed to load.
15104fd306cSNickeau             * While malformed HTML should load successfully, this function may generate E_WARNING errors
15204fd306cSNickeau             * @deprecated as we try to be XHTML compliantXML but yeah this is not always possible
15304fd306cSNickeau             */
15404fd306cSNickeau
15504fd306cSNickeau            /**
15604fd306cSNickeau             * Bug: Even if we set that the document is an UTF-8
15704fd306cSNickeau             * loadHTML treat the string as being in ISO-8859-1 if without any heading
15804fd306cSNickeau             * (ie <xml encoding="utf-8"..>
15904fd306cSNickeau             * https://stackoverflow.com/questions/8218230/php-domdocument-loadhtml-not-encoding-utf-8-correctly
16004fd306cSNickeau             * Otherwise French and other language are not well loaded
16104fd306cSNickeau             *
16204fd306cSNickeau             * We use the trick to transform UTF-8 to HTML
16304fd306cSNickeau             */
16404fd306cSNickeau            $htmlEntityEncoded = mb_convert_encoding($text, 'HTML-ENTITIES', 'UTF-8');
16504fd306cSNickeau            $result = $this->domDocument->loadHTML($htmlEntityEncoded, $options);
16604fd306cSNickeau
16704fd306cSNickeau        }
16804fd306cSNickeau        if ($result === false) {
16904fd306cSNickeau
17004fd306cSNickeau            /**
17104fd306cSNickeau             * Error
17204fd306cSNickeau             */
17304fd306cSNickeau            $errors = libxml_get_errors();
17404fd306cSNickeau
17504fd306cSNickeau            foreach ($errors as $error) {
17604fd306cSNickeau
17704fd306cSNickeau                /* @var LibXMLError
17804fd306cSNickeau                 * @noinspection PhpComposerExtensionStubsInspection
17904fd306cSNickeau                 *
18004fd306cSNickeau                 * Section is an html5 tag (and is invalid for libxml)
18104fd306cSNickeau                 */
18204fd306cSNickeau                if (!in_array($error->message, self::KNOWN_HTML_LOADING_ERRORS)) {
18304fd306cSNickeau                    /**
18404fd306cSNickeau                     * This error is an XML and HTML error
18504fd306cSNickeau                     */
18604fd306cSNickeau                    if (
18704fd306cSNickeau                        strpos($error->message, "htmlParseEntityRef: expecting ';' in Entity") !== false
18804fd306cSNickeau                        ||
18904fd306cSNickeau                        $error->message == "EntityRef: expecting ';'\n"
19004fd306cSNickeau                    ) {
19104fd306cSNickeau                        $message = "There is big probability that there is an ampersand alone `&`. ie You forgot to call html/Xml entities in a `src` or `url` attribute.";
19204fd306cSNickeau                    } else {
19304fd306cSNickeau                        $message = "Error while loading HTML";
19404fd306cSNickeau                    }
19504fd306cSNickeau                    /**
19604fd306cSNickeau                     * inboolean attribute XML loading error
19704fd306cSNickeau                     */
19804fd306cSNickeau                    if (strpos($error->message, "Specification mandates value for attribute") !== false) {
19904fd306cSNickeau                        $message = "Xml does not allow boolean attribute (ie without any value). If you skip this error, you will get a general attribute constructing error as next error. Load as HTML.";
20004fd306cSNickeau                    }
20104fd306cSNickeau
20204fd306cSNickeau                    $message .= "Error: " . $error->message . ", Loaded text: " . $text;
20304fd306cSNickeau
20404fd306cSNickeau                    /**
20504fd306cSNickeau                     * We clean the errors, otherwise
20604fd306cSNickeau                     * in a test series, they failed the next test
20704fd306cSNickeau                     *
20804fd306cSNickeau                     */
20904fd306cSNickeau                    libxml_clear_errors();
21004fd306cSNickeau
21104fd306cSNickeau                    // The xml dom object is null, we got NULL pointer exception everywhere
21204fd306cSNickeau                    // just throw, the code will see it
21304fd306cSNickeau                    throw new ExceptionBadSyntax($message, self::CANONICAL);
21404fd306cSNickeau
21504fd306cSNickeau                }
21604fd306cSNickeau
21704fd306cSNickeau            }
21804fd306cSNickeau        }
21904fd306cSNickeau
22004fd306cSNickeau        /**
22104fd306cSNickeau         * We clean the known errors (otherwise they are added in a queue)
22204fd306cSNickeau         */
22304fd306cSNickeau        libxml_clear_errors();
22404fd306cSNickeau
22504fd306cSNickeau        /**
22604fd306cSNickeau         * Error reporting back
22704fd306cSNickeau         */
22804fd306cSNickeau        if (!PluginUtility::isTest() && isset($oldLevel)) {
22904fd306cSNickeau            error_reporting($oldLevel);
23004fd306cSNickeau        }
23104fd306cSNickeau
23204fd306cSNickeau        // namespace error : Namespace prefix dc on format is not defined
23304fd306cSNickeau        // missing the ns declaration in the file. example:
23404fd306cSNickeau        // xmlns:dc="http://purl.org/dc/elements/1.1/"
23504fd306cSNickeau
23604fd306cSNickeau
23704fd306cSNickeau    }
23804fd306cSNickeau
23904fd306cSNickeau    /**
24004fd306cSNickeau     * To not have a collusion with {@link FetcherSvg::createFetchImageSvgFromPath()}
24104fd306cSNickeau     * @param Path $path
24204fd306cSNickeau     * @return XmlDocument
24304fd306cSNickeau     * @throws ExceptionNotFound - if the file does not exist
24404fd306cSNickeau     * @throws ExceptionBadSyntax - if the content is not valid
24504fd306cSNickeau     */
24604fd306cSNickeau    public
24704fd306cSNickeau    static function createXmlDocFromPath(Path $path): XmlDocument
24804fd306cSNickeau    {
24904fd306cSNickeau        $mime = XmlDocument::XML_TYPE;
25004fd306cSNickeau        if (in_array($path->getExtension(), ["html", "htm"])) {
25104fd306cSNickeau            $mime = XmlDocument::HTML_TYPE;
25204fd306cSNickeau        }
25304fd306cSNickeau        $content = FileSystems::getContent($path);
25404fd306cSNickeau        return (new XmlDocument($content, $mime));
25504fd306cSNickeau    }
25604fd306cSNickeau
25704fd306cSNickeau    /**
25804fd306cSNickeau     *
25904fd306cSNickeau     * @throws ExceptionBadSyntax
26004fd306cSNickeau     */
26104fd306cSNickeau    public
26204fd306cSNickeau    static function createXmlDocFromMarkup($string, $asHtml = false): XmlDocument
26304fd306cSNickeau    {
26404fd306cSNickeau
26504fd306cSNickeau        $mime = XmlDocument::XML_TYPE;
26604fd306cSNickeau        if ($asHtml) {
26704fd306cSNickeau            $mime = XmlDocument::HTML_TYPE;
26804fd306cSNickeau        }
26904fd306cSNickeau        return new XmlDocument($string, $mime);
27004fd306cSNickeau    }
27104fd306cSNickeau
27204fd306cSNickeau    /**
27304fd306cSNickeau     * HTML loading is more permissive
27404fd306cSNickeau     *
27504fd306cSNickeau     * For instance, you would not get an error on boolean attribute
27604fd306cSNickeau     * ```
27704fd306cSNickeau     * Error while loading HTMLError: Specification mandates value for attribute defer
27804fd306cSNickeau     * ```
27904fd306cSNickeau     * In Xml, it's mandatory but not in HTML, they are known as:
28004fd306cSNickeau     * https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#boolean-attribute
28104fd306cSNickeau     *
28204fd306cSNickeau     *
28304fd306cSNickeau     * @throws ExceptionBadSyntax
28404fd306cSNickeau     */
28504fd306cSNickeau    public static function createHtmlDocFromMarkup($markup): XmlDocument
28604fd306cSNickeau    {
28704fd306cSNickeau        return self::createXmlDocFromMarkup($markup, true);
28804fd306cSNickeau    }
28904fd306cSNickeau
29004fd306cSNickeau    public
29104fd306cSNickeau    function &getDomDocument(): DOMDocument
29204fd306cSNickeau    {
29304fd306cSNickeau        return $this->domDocument;
29404fd306cSNickeau    }
29504fd306cSNickeau
29604fd306cSNickeau    /**
29704fd306cSNickeau     * @param $name
29804fd306cSNickeau     * @param $value
29904fd306cSNickeau     * @return void
30004fd306cSNickeau     * @deprecated use {@link XmlDocument::getElement()} instead
30104fd306cSNickeau     */
30204fd306cSNickeau    public function setRootAttribute($name, $value)
30304fd306cSNickeau    {
30404fd306cSNickeau        if ($this->isXmlExtensionLoaded()) {
30504fd306cSNickeau            $this->domDocument->documentElement->setAttribute($name, $value);
30604fd306cSNickeau        }
30704fd306cSNickeau    }
30804fd306cSNickeau
30904fd306cSNickeau    /**
31004fd306cSNickeau     * @param $name
31104fd306cSNickeau     * @return string null if not found
31204fd306cSNickeau     * @deprecated uses {@link XmlElement::getAttribute()} of {@link self::getElement()}
31304fd306cSNickeau     */
31404fd306cSNickeau    public function getRootAttributeValue($name): ?string
31504fd306cSNickeau    {
31604fd306cSNickeau        $value = $this->domDocument->documentElement->getAttribute($name);
31704fd306cSNickeau        if ($value === "") {
31804fd306cSNickeau            return null;
31904fd306cSNickeau        }
32004fd306cSNickeau        return $value;
32104fd306cSNickeau    }
32204fd306cSNickeau
32304fd306cSNickeau    public function toXhtml(DOMElement $element = null): string
32404fd306cSNickeau    {
32504fd306cSNickeau        return $this->toXml($element);
32604fd306cSNickeau    }
32704fd306cSNickeau
32804fd306cSNickeau    public function toXml(DOMElement $element = null): string
32904fd306cSNickeau    {
33004fd306cSNickeau
33104fd306cSNickeau        if ($element === null) {
33204fd306cSNickeau            $element = $this->getDomDocument()->documentElement;
33304fd306cSNickeau        }
33404fd306cSNickeau        /**
33504fd306cSNickeau         * LIBXML_NOXMLDECL (no xml declaration) does not work because only empty tag is recognized
33604fd306cSNickeau         * https://www.php.net/manual/en/domdocument.savexml.php
33704fd306cSNickeau         */
33804fd306cSNickeau        $xmlText = $this->getDomDocument()->saveXML(
33904fd306cSNickeau            $element,
34004fd306cSNickeau            LIBXML_NOXMLDECL
34104fd306cSNickeau        );
34204fd306cSNickeau        // Delete doctype (for svg optimization)
34304fd306cSNickeau        // php has only doctype manipulation for HTML
34404fd306cSNickeau        $xmlText = preg_replace('/^<!DOCTYPE.+?>/', '', $xmlText);
34504fd306cSNickeau        return trim($xmlText);
34604fd306cSNickeau
34704fd306cSNickeau    }
34804fd306cSNickeau
34904fd306cSNickeau    /**
35004fd306cSNickeau     * https://www.php.net/manual/en/dom.installation.php
35104fd306cSNickeau     *
35204fd306cSNickeau     * Check it with
35304fd306cSNickeau     * ```
35404fd306cSNickeau     * php -m
35504fd306cSNickeau     * ```
35604fd306cSNickeau     * Install with
35704fd306cSNickeau     * ```
35804fd306cSNickeau     * sudo apt-get install php-xml
35904fd306cSNickeau     * ```
36004fd306cSNickeau     * @return bool
36104fd306cSNickeau     */
36204fd306cSNickeau    public function isXmlExtensionLoaded(): bool
36304fd306cSNickeau    {
36404fd306cSNickeau        // A suffix used in the bad message
36504fd306cSNickeau        $suffixBadMessage = "php extension is not installed. To install it, you need to install xml. Example: `sudo apt-get install php-xml`, `yum install php-xml`";
36604fd306cSNickeau
36704fd306cSNickeau        // https://www.php.net/manual/en/dom.requirements.php
36804fd306cSNickeau        $loaded = extension_loaded("libxml");
36904fd306cSNickeau        if ($loaded === false) {
37004fd306cSNickeau            LogUtility::msg("The libxml {$suffixBadMessage}");
37104fd306cSNickeau        } else {
37204fd306cSNickeau            $loaded = extension_loaded("xml");
37304fd306cSNickeau            if ($loaded === false) {
37404fd306cSNickeau                LogUtility::msg("The xml {$suffixBadMessage}");
37504fd306cSNickeau            } else {
37604fd306cSNickeau                $loaded = extension_loaded("dom");
37704fd306cSNickeau                if ($loaded === false) {
37804fd306cSNickeau                    LogUtility::msg("The dom {$suffixBadMessage}");
37904fd306cSNickeau                }
38004fd306cSNickeau            }
38104fd306cSNickeau        }
38204fd306cSNickeau        return $loaded;
38304fd306cSNickeau    }
38404fd306cSNickeau
38504fd306cSNickeau    /**
38604fd306cSNickeau     * https://stackoverflow.com/questions/30257438/how-to-completely-remove-a-namespace-using-domdocument
38704fd306cSNickeau     * @param $namespaceUri
38804fd306cSNickeau     */
38904fd306cSNickeau    function removeNamespace($namespaceUri)
39004fd306cSNickeau    {
39104fd306cSNickeau        if (empty($namespaceUri)) {
39204fd306cSNickeau            throw new \RuntimeException("The namespace is empty and should be specified");
39304fd306cSNickeau        }
39404fd306cSNickeau
39504fd306cSNickeau        if (strpos($namespaceUri, "http") === false) {
39604fd306cSNickeau            LogUtility::msg("Internal warning: The namespaceURI ($namespaceUri) does not seems to be an URI", LogUtility::LVL_MSG_WARNING, "support");
39704fd306cSNickeau        }
39804fd306cSNickeau
39904fd306cSNickeau        /**
40004fd306cSNickeau         * @var DOMNodeList $nodes
40104fd306cSNickeau         * finds all nodes that have a namespace node called $ns where their parent node doesn't also have the same namespace.
40204fd306cSNickeau         * @var DOMNodeList $nodes
40304fd306cSNickeau         */
40404fd306cSNickeau        try {
40504fd306cSNickeau            $nodes = $this->xpath("//*[namespace-uri()='$namespaceUri']");
40604fd306cSNickeau            foreach ($nodes as $node) {
40704fd306cSNickeau                /** @var DOMElement $node */
40804fd306cSNickeau                $node->parentNode->removeChild($node);
40904fd306cSNickeau            }
41004fd306cSNickeau        } catch (ExceptionBadSyntax $e) {
41104fd306cSNickeau            LogUtility::error("Internal Error on xpath: {$e->getMessage()}");
41204fd306cSNickeau        }
41304fd306cSNickeau
41404fd306cSNickeau        try {
41504fd306cSNickeau            $nodes = $this->xpath("//@*[namespace-uri()='$namespaceUri']");
41604fd306cSNickeau            foreach ($nodes as $node) {
41704fd306cSNickeau                /** @var DOMAttr $node */
41804fd306cSNickeau                /** @var DOMElement $DOMNode */
41904fd306cSNickeau                $DOMNode = $node->parentNode;
42004fd306cSNickeau                $DOMNode->removeAttributeNode($node);
42104fd306cSNickeau            }
42204fd306cSNickeau        } catch (ExceptionBadSyntax $e) {
42304fd306cSNickeau            LogUtility::error("Internal Error on xpath: {$e->getMessage()}");
42404fd306cSNickeau        }
42504fd306cSNickeau
42604fd306cSNickeau
42704fd306cSNickeau        //Node namespace can be select only from the document
42804fd306cSNickeau        $xpath = new DOMXPath($this->getDomDocument());
42904fd306cSNickeau        $DOMNodeList = $xpath->query("namespace::*", $this->getDomDocument()->ownerDocument);
43004fd306cSNickeau        foreach ($DOMNodeList as $node) {
43104fd306cSNickeau            $namespaceURI = $node->namespaceURI;
43204fd306cSNickeau            if ($namespaceURI == $namespaceUri) {
43304fd306cSNickeau                $parentNode = $node->parentNode;
43404fd306cSNickeau                $parentNode->removeAttributeNS($namespaceUri, $node->localName);
43504fd306cSNickeau            }
43604fd306cSNickeau        }
43704fd306cSNickeau
43804fd306cSNickeau
43904fd306cSNickeau    }
44004fd306cSNickeau
44104fd306cSNickeau    public function getNamespaces(): array
44204fd306cSNickeau    {
44304fd306cSNickeau        /**
44404fd306cSNickeau         * We can't query with the library {@link XmlDocument::xpath()} function because
44504fd306cSNickeau         * we register in the xpath the namespace
44604fd306cSNickeau         */
44704fd306cSNickeau        $xpath = new DOMXPath($this->getDomDocument());
44804fd306cSNickeau        // `namespace::*` means selects all the namespace attribute of the context node
44904fd306cSNickeau        // namespace is an axes
45004fd306cSNickeau        // See https://www.w3.org/TR/1999/REC-xpath-19991116/#axes
45104fd306cSNickeau        // the namespace axis contains the namespace nodes of the context node; the axis will be empty unless the context node is an element
45204fd306cSNickeau        $DOMNodeList = $xpath->query('namespace::*', $this->getDomDocument()->ownerDocument);
45304fd306cSNickeau        $nameSpace = array();
45404fd306cSNickeau        foreach ($DOMNodeList as $node) {
45504fd306cSNickeau            /** @var DOMElement $node */
45604fd306cSNickeau
45704fd306cSNickeau            $namespaceURI = $node->namespaceURI;
45804fd306cSNickeau            $localName = $node->prefix;
45904fd306cSNickeau            if ($namespaceURI != null) {
46004fd306cSNickeau                $nameSpace[$localName] = $namespaceURI;
46104fd306cSNickeau            }
46204fd306cSNickeau        }
46304fd306cSNickeau        return $nameSpace;
46404fd306cSNickeau    }
46504fd306cSNickeau
46604fd306cSNickeau    /**
46704fd306cSNickeau     * A wrapper that register namespace for the query
46804fd306cSNickeau     * with the defined prefix
46904fd306cSNickeau     * See comment:
47004fd306cSNickeau     * https://www.php.net/manual/en/domxpath.registernamespace.php#51480
47104fd306cSNickeau     * @param $query
47204fd306cSNickeau     * @param DOMElement|null $contextNode
47304fd306cSNickeau     * @return DOMNodeList
47404fd306cSNickeau     *
47504fd306cSNickeau     * Note that this is possible to do evaluation to return a string instead
47604fd306cSNickeau     * https://www.php.net/manual/en/domxpath.evaluate.php
47704fd306cSNickeau     * @throws ExceptionBadSyntax - if the query is invalid
47804fd306cSNickeau     */
47904fd306cSNickeau    public
48004fd306cSNickeau    function xpath($query, DOMElement $contextNode = null): DOMNodeList
48104fd306cSNickeau    {
48204fd306cSNickeau        if (!isset($this->domXpath)) {
48304fd306cSNickeau
48404fd306cSNickeau            $this->domXpath = new DOMXPath($this->getDomDocument());
48504fd306cSNickeau
48604fd306cSNickeau            /**
48704fd306cSNickeau             * Prefix mapping
48804fd306cSNickeau             * It is necessary to use xpath to handle documents which have default namespaces.
48904fd306cSNickeau             * The xpath expression will search for items with no namespace by default.
49004fd306cSNickeau             */
49104fd306cSNickeau            foreach ($this->getNamespaces() as $prefix => $namespaceUri) {
49204fd306cSNickeau                /**
49304fd306cSNickeau                 * You can't register an empty prefix
49404fd306cSNickeau                 * Default namespace (without a prefix) can only be accessed by the local-name() and namespace-uri() attributes.
49504fd306cSNickeau                 */
49604fd306cSNickeau                if (!empty($prefix)) {
49704fd306cSNickeau                    $result = $this->domXpath->registerNamespace($prefix, $namespaceUri);
49804fd306cSNickeau                    if (!$result) {
49904fd306cSNickeau                        LogUtility::msg("Not able to register the prefix ($prefix) for the namespace uri ($namespaceUri)");
50004fd306cSNickeau                    }
50104fd306cSNickeau                }
50204fd306cSNickeau            }
50304fd306cSNickeau        }
50404fd306cSNickeau
50504fd306cSNickeau        if ($contextNode === null) {
50604fd306cSNickeau            $contextNode = $this->domDocument;
50704fd306cSNickeau        }
50804fd306cSNickeau        $domList = $this->domXpath->query($query, $contextNode);
50904fd306cSNickeau        if ($domList === false) {
51004fd306cSNickeau            throw new ExceptionBadSyntax("The query expression ($query) may be malformed");
51104fd306cSNickeau        }
51204fd306cSNickeau        return $domList;
51304fd306cSNickeau
51404fd306cSNickeau    }
51504fd306cSNickeau
51604fd306cSNickeau
51704fd306cSNickeau    public
51804fd306cSNickeau    function removeRootAttribute($attribute)
51904fd306cSNickeau    {
52004fd306cSNickeau
52104fd306cSNickeau        // This function does not work
52204fd306cSNickeau        // $result = $this->getXmlDom()->documentElement->removeAttribute($attribute);
52304fd306cSNickeau
52404fd306cSNickeau        for ($i = 0; $i < $this->getDomDocument()->documentElement->attributes->length; $i++) {
52504fd306cSNickeau            if ($this->getDomDocument()->documentElement->attributes[$i]->name == $attribute) {
52604fd306cSNickeau                $result = $this->getDomDocument()->documentElement->removeAttributeNode($this->getDomDocument()->documentElement->attributes[$i]);
52704fd306cSNickeau                if ($result === false) {
52804fd306cSNickeau                    throw new \RuntimeException("Not able to delete the $attribute");
52904fd306cSNickeau                }
53004fd306cSNickeau                // There is no break here because you may find multiple version attribute for instance
53104fd306cSNickeau            }
53204fd306cSNickeau        }
53304fd306cSNickeau
53404fd306cSNickeau    }
53504fd306cSNickeau
53604fd306cSNickeau    public
53704fd306cSNickeau    function removeRootChildNode($nodeName)
53804fd306cSNickeau    {
53904fd306cSNickeau        for ($i = 0; $i < $this->getDomDocument()->documentElement->childNodes->length; $i++) {
54004fd306cSNickeau            $childNode = &$this->getDomDocument()->documentElement->childNodes[$i];
54104fd306cSNickeau            if ($childNode->nodeName == $nodeName) {
54204fd306cSNickeau                $result = $this->getDomDocument()->documentElement->removeChild($childNode);
54304fd306cSNickeau                if ($result == false) {
54404fd306cSNickeau                    throw new \RuntimeException("Not able to delete the child node $nodeName");
54504fd306cSNickeau                }
54604fd306cSNickeau                break;
54704fd306cSNickeau            }
54804fd306cSNickeau        }
54904fd306cSNickeau    }
55004fd306cSNickeau
55104fd306cSNickeau    /**
55204fd306cSNickeau     *
55304fd306cSNickeau     * Add a value to an attribute value
55404fd306cSNickeau     * Example
55504fd306cSNickeau     * <a class="actual">
55604fd306cSNickeau     *
55704fd306cSNickeau     * if you add "new"
55804fd306cSNickeau     * <a class="actual new">
55904fd306cSNickeau     *
56004fd306cSNickeau     * @param $attName
56104fd306cSNickeau     * @param $attValue
56204fd306cSNickeau     * @param DOMElement $xml
56304fd306cSNickeau     */
56404fd306cSNickeau    public
56504fd306cSNickeau    function addAttributeValue($attName, $attValue, $xml)
56604fd306cSNickeau    {
56704fd306cSNickeau
56804fd306cSNickeau        /**
56904fd306cSNickeau         * Empty condition is better than {@link DOMElement::hasAttribute()}
57004fd306cSNickeau         * because even if the dom element has the attribute, the value
57104fd306cSNickeau         * may be empty
57204fd306cSNickeau         */
57304fd306cSNickeau        $value = $xml->getAttribute($attName);
57404fd306cSNickeau        if (empty($value)) {
57504fd306cSNickeau            $xml->setAttribute($attName, $attValue);
57604fd306cSNickeau        } else {
57704fd306cSNickeau            $actualAttValue = $xml->getAttribute($attName);
57804fd306cSNickeau            $explodeArray = explode(" ", $actualAttValue);
57904fd306cSNickeau            if (!in_array($attValue, $explodeArray)) {
58004fd306cSNickeau                $xml->setAttribute($attName, (string)$actualAttValue . " $attValue");
58104fd306cSNickeau            }
58204fd306cSNickeau        }
58304fd306cSNickeau
58404fd306cSNickeau    }
58504fd306cSNickeau
58604fd306cSNickeau    public function diff(XmlDocument $rightDocument): string
58704fd306cSNickeau    {
58804fd306cSNickeau        $error = "";
58904fd306cSNickeau        XmlSystems::diffNode($this->getDomDocument(), $rightDocument->getDomDocument(), $error);
59004fd306cSNickeau        return $error;
59104fd306cSNickeau    }
59204fd306cSNickeau
59304fd306cSNickeau    /**
59404fd306cSNickeau     * @return string a XML formatted
59504fd306cSNickeau     *
59604fd306cSNickeau     * !!!! The parameter preserveWhiteSpace should have been set to false before loading
59704fd306cSNickeau     * https://www.php.net/manual/en/class.domdocument.php#domdocument.props.formatoutput
59804fd306cSNickeau     * $this->xmlDom->preserveWhiteSpace = false;
59904fd306cSNickeau     *
60004fd306cSNickeau     * We do it with the function {@link XmlDocument::mandatoryFormatConfigBeforeLoading()}
60104fd306cSNickeau     *
60204fd306cSNickeau     */
60304fd306cSNickeau    public function toXmlFormatted(DOMElement $element = null): string
60404fd306cSNickeau    {
60504fd306cSNickeau
60604fd306cSNickeau        $this->domDocument->formatOutput = true;
60704fd306cSNickeau        return $this->toXml($element);
60804fd306cSNickeau
60904fd306cSNickeau    }
61004fd306cSNickeau
61104fd306cSNickeau    /**
61204fd306cSNickeau     * @return string that can be diff
61304fd306cSNickeau     *   * EOL diff are not seen
61404fd306cSNickeau     *   * space are
61504fd306cSNickeau     *
61604fd306cSNickeau     * See also {@link XmlDocument::processTextBeforeLoading()}
61704fd306cSNickeau     * that is needed before loading
61804fd306cSNickeau     */
61904fd306cSNickeau    public function toXmlNormalized(DOMElement $element = null): string
62004fd306cSNickeau    {
62104fd306cSNickeau
62204fd306cSNickeau        /**
62304fd306cSNickeau         * If the text was a list
62404fd306cSNickeau         * of sibling text without parent
62504fd306cSNickeau         * We may get a body
62604fd306cSNickeau         * @deprecated letting the code until
62704fd306cSNickeau         * TODO: delete this code when the test pass
62804fd306cSNickeau         */
62904fd306cSNickeau//        $body = $doc->getElementsByTagName("body");
63004fd306cSNickeau//        if ($body->length != 0) {
63104fd306cSNickeau//            $DOMNodeList = $body->item(0)->childNodes;
63204fd306cSNickeau//            $output = "";
63304fd306cSNickeau//            foreach ($DOMNodeList as $value) {
63404fd306cSNickeau//                $output .= $doc->saveXML($value) . DOKU_LF;
63504fd306cSNickeau//            }
63604fd306cSNickeau//        }
63704fd306cSNickeau
63804fd306cSNickeau        if ($element == null) {
63904fd306cSNickeau            $element = $this->domDocument->documentElement;
64004fd306cSNickeau        }
64104fd306cSNickeau        $element->normalize();
64204fd306cSNickeau        return $this->toXmlFormatted($element);
64304fd306cSNickeau    }
64404fd306cSNickeau
64504fd306cSNickeau    /**
64604fd306cSNickeau     * Not really conventional but
64704fd306cSNickeau     * to be able to {@link toXmlNormalized}
64804fd306cSNickeau     * the EOL should be deleted
64904fd306cSNickeau     * We do it before loading and not with a XML documentation
65004fd306cSNickeau     */
65104fd306cSNickeau    private function processTextBeforeLoading($text)
65204fd306cSNickeau    {
65304fd306cSNickeau        $text = str_replace(DOKU_LF, "", $text);
65404fd306cSNickeau        $text = preg_replace("/\r\n\s*\r\n/", "\r\n", $text);
65504fd306cSNickeau        $text = preg_replace("/\n\s*\n/", "\n", $text);
65604fd306cSNickeau        $text = preg_replace("/\n\n/", "\n", $text);
65704fd306cSNickeau        return $text;
65804fd306cSNickeau
65904fd306cSNickeau    }
66004fd306cSNickeau
66104fd306cSNickeau
66204fd306cSNickeau    /**
66304fd306cSNickeau     * This function is called just before loading
66404fd306cSNickeau     * in order to be able to {@link XmlDocument::toXmlFormatted() format the output }
66504fd306cSNickeau     * https://www.php.net/manual/en/class.domdocument.php#domdocument.props.formatoutput
66604fd306cSNickeau     * Mandatory for a a good formatting before loading
66704fd306cSNickeau     *
66804fd306cSNickeau     */
66904fd306cSNickeau    private function mandatoryFormatConfigBeforeLoading()
67004fd306cSNickeau    {
67104fd306cSNickeau        // not that
67204fd306cSNickeau        // the loading option: LIBXML_NOBLANKS
67304fd306cSNickeau        // is equivalent to $this->xmlDom->preserveWhiteSpace = true;
67404fd306cSNickeau        $this->domDocument->preserveWhiteSpace = false;
67504fd306cSNickeau    }
67604fd306cSNickeau
67704fd306cSNickeau    /**
67804fd306cSNickeau     * @param string $attributeName
67904fd306cSNickeau     * @param DOMElement $nodeElement
68004fd306cSNickeau     * @return void
68104fd306cSNickeau     * @deprecated use the {@link XmlElement::removeAttribute()} if possible
68204fd306cSNickeau     */
68304fd306cSNickeau    public function removeAttributeValue(string $attributeName, DOMElement $nodeElement)
68404fd306cSNickeau    {
68504fd306cSNickeau        $attr = $nodeElement->getAttributeNode($attributeName);
68604fd306cSNickeau        if (!$attr) {
68704fd306cSNickeau            return;
68804fd306cSNickeau        }
68904fd306cSNickeau        $result = $nodeElement->removeAttributeNode($attr);
69004fd306cSNickeau        if ($result === false) {
69104fd306cSNickeau            LogUtility::msg("Not able to delete the attribute $attributeName of the node element $nodeElement->tagName in the Xml document");
69204fd306cSNickeau        }
69304fd306cSNickeau    }
69404fd306cSNickeau
69504fd306cSNickeau
69604fd306cSNickeau    /**
69704fd306cSNickeau     * Query via a CSS selector
69804fd306cSNickeau     * (not that it will not work with other namespace than the default one, ie xmlns will not work)
69904fd306cSNickeau     * @throws ExceptionBadSyntax - if the selector is not valid
70004fd306cSNickeau     * @throws ExceptionNotFound - if the selector selects nothing
70104fd306cSNickeau     */
70204fd306cSNickeau    public function querySelector(string $selector): XmlElement
70304fd306cSNickeau    {
70404fd306cSNickeau        $domNodeList = $this->querySelectorAll($selector);
70504fd306cSNickeau        if (sizeof($domNodeList) >= 1) {
70604fd306cSNickeau            return $domNodeList[0];
70704fd306cSNickeau        }
70804fd306cSNickeau        throw new ExceptionNotFound("No element was found with the selector $selector");
70904fd306cSNickeau
71004fd306cSNickeau    }
71104fd306cSNickeau
71204fd306cSNickeau    /**
71304fd306cSNickeau     * @return XmlElement[]
71404fd306cSNickeau     * @throws ExceptionBadSyntax
71504fd306cSNickeau     */
71604fd306cSNickeau    public function querySelectorAll(string $selector): array
71704fd306cSNickeau    {
71804fd306cSNickeau        $xpath = $this->cssSelectorToXpath($selector);
71904fd306cSNickeau        $domNodeList = $this->xpath($xpath);
72004fd306cSNickeau        $domNodes = [];
72104fd306cSNickeau        foreach ($domNodeList as $domNode) {
72204fd306cSNickeau            if ($domNode instanceof DOMElement) {
72304fd306cSNickeau                $domNodes[] = new XmlElement($domNode, $this);
72404fd306cSNickeau            }
72504fd306cSNickeau        }
72604fd306cSNickeau        return $domNodes;
72704fd306cSNickeau
72804fd306cSNickeau    }
72904fd306cSNickeau
73004fd306cSNickeau    /**
73104fd306cSNickeau     * @throws ExceptionBadSyntax
73204fd306cSNickeau     */
73304fd306cSNickeau    public function cssSelectorToXpath(string $selector): string
73404fd306cSNickeau    {
73504fd306cSNickeau        try {
73604fd306cSNickeau            return PhpCss::toXpath($selector);
73704fd306cSNickeau        } catch (PhpCss\Exception\ParserException $e) {
73804fd306cSNickeau            throw new ExceptionBadSyntax("The selector ($selector) is not valid. Error: {$e->getMessage()}");
73904fd306cSNickeau        }
74004fd306cSNickeau    }
74104fd306cSNickeau
74204fd306cSNickeau    /**
74304fd306cSNickeau     * An utility function to know how to remove a node
74404fd306cSNickeau     * @param \DOMNode $nodeElement
74504fd306cSNickeau     * @deprecated use {@link XmlElement::remove} instead
74604fd306cSNickeau     */
74704fd306cSNickeau    public function removeNode(\DOMNode $nodeElement)
74804fd306cSNickeau    {
74904fd306cSNickeau
75004fd306cSNickeau        $nodeElement->parentNode->removeChild($nodeElement);
75104fd306cSNickeau
75204fd306cSNickeau    }
75304fd306cSNickeau
75404fd306cSNickeau    public function getElement(): XmlElement
75504fd306cSNickeau    {
75604fd306cSNickeau        return XmlElement::create($this->getDomDocument()->documentElement, $this);
75704fd306cSNickeau    }
75804fd306cSNickeau
75904fd306cSNickeau    public function toHtml()
76004fd306cSNickeau    {
76104fd306cSNickeau        return $this->domDocument->saveHTML();
76204fd306cSNickeau    }
76304fd306cSNickeau
76404fd306cSNickeau    /**
76504fd306cSNickeau     * @throws \DOMException - if invalid local name
76604fd306cSNickeau     */
76704fd306cSNickeau    public function createElement(string $localName): XmlElement
76804fd306cSNickeau    {
76904fd306cSNickeau        $element = $this->domDocument->createElement($localName);
77004fd306cSNickeau        return XmlElement::create($element, $this);
77104fd306cSNickeau    }
77204fd306cSNickeau
77304fd306cSNickeau    /**
77404fd306cSNickeau     * @throws ExceptionBadSyntax
77504fd306cSNickeau     * @throws ExceptionBadState
77604fd306cSNickeau     */
77704fd306cSNickeau    public function xpathFirstDomElement(string $xpath): DOMElement
77804fd306cSNickeau    {
77904fd306cSNickeau        $domList = $this->xpath($xpath);
78004fd306cSNickeau        $domElement = $domList->item(0);
78104fd306cSNickeau        if ($domElement instanceof DOMElement) {
78204fd306cSNickeau            return $domElement;
78304fd306cSNickeau        } else {
78404fd306cSNickeau            throw new ExceptionBadState("The first DOM node is not a DOM element");
78504fd306cSNickeau        }
78604fd306cSNickeau    }
78704fd306cSNickeau
78804fd306cSNickeau
78904fd306cSNickeau}
790