104fd306cSNickeau<?php 204fd306cSNickeau 304fd306cSNickeau/** 404fd306cSNickeau * Copyright (c) 2021. ComboStrap, Inc. and its affiliates. All Rights Reserved. 504fd306cSNickeau * 604fd306cSNickeau * This source code is licensed under the GPL license found in the 704fd306cSNickeau * COPYING file in the root directory of this source tree. 804fd306cSNickeau * 904fd306cSNickeau * @license GPL 3 (https://www.gnu.org/licenses/gpl-3.0.en.html) 1004fd306cSNickeau * @author ComboStrap <support@combostrap.com> 1104fd306cSNickeau * 1204fd306cSNickeau */ 1304fd306cSNickeau 1404fd306cSNickeaunamespace ComboStrap\Xml; 1504fd306cSNickeau 1604fd306cSNickeauuse ComboStrap\ExceptionBadState; 1704fd306cSNickeauuse ComboStrap\ExceptionBadSyntax; 1804fd306cSNickeauuse ComboStrap\ExceptionNotFound; 1904fd306cSNickeauuse ComboStrap\FileSystems; 2004fd306cSNickeauuse ComboStrap\LogUtility; 2104fd306cSNickeauuse ComboStrap\Path; 2204fd306cSNickeauuse ComboStrap\PluginUtility; 2304fd306cSNickeauuse DOMAttr; 2404fd306cSNickeauuse DOMDocument; 2504fd306cSNickeauuse DOMElement; 2604fd306cSNickeauuse DOMNodeList; 2704fd306cSNickeauuse DOMXPath; 2804fd306cSNickeauuse LibXMLError; 2904fd306cSNickeauuse PhpCss; 3004fd306cSNickeau 3104fd306cSNickeau 3204fd306cSNickeau/** 3304fd306cSNickeau * A xml document that follows the Web Api interface. 3404fd306cSNickeau * 3504fd306cSNickeau * Note Dokuwiki now uses since [jack_jackrum](https://www.dokuwiki.org/changes#release_2023-04-04_jack_jackrum): 3604fd306cSNickeau * the [dom-wrapper](https://github.com/scotteh/php-dom-wrapper) 3704fd306cSNickeau * that follow the Jquery API and uses [css-selector](https://symfony.com/doc/current/components/css_selector.html) 3804fd306cSNickeau * to get Xpath expression from Css selector 3904fd306cSNickeau * 4004fd306cSNickeau */ 4104fd306cSNickeauclass XmlDocument 4204fd306cSNickeau{ 4304fd306cSNickeau const HTML_TYPE = "html"; 4404fd306cSNickeau const XML_TYPE = "xml"; 4504fd306cSNickeau /** 4604fd306cSNickeau * The error that the HTML loading 4704fd306cSNickeau * may returns 4804fd306cSNickeau */ 4904fd306cSNickeau const KNOWN_HTML_LOADING_ERRORS = [ 5004fd306cSNickeau "Tag section invalid\n", // section is HTML5 tag 5104fd306cSNickeau "Tag footer invalid\n", // footer is HTML5 tag 5204fd306cSNickeau "error parsing attribute name\n", // name is an HTML5 attribute 5304fd306cSNickeau "Unexpected end tag : blockquote\n", // name is an HTML5 attribute 5404fd306cSNickeau "Tag bdi invalid\n", 5504fd306cSNickeau "Tag path invalid\n", // svg 5604fd306cSNickeau "Tag svg invalid\n", // svg 5704fd306cSNickeau "Unexpected end tag : a\n", // when the document is only a anchor 5804fd306cSNickeau "Unexpected end tag : p\n", // when the document is only a p 5904fd306cSNickeau "Unexpected end tag : button\n", // when the document is only a button 6004fd306cSNickeau ]; 6104fd306cSNickeau 6204fd306cSNickeau const CANONICAL = "xml"; 6304fd306cSNickeau 6404fd306cSNickeau /** 6504fd306cSNickeau * @var DOMDocument 6604fd306cSNickeau */ 6704fd306cSNickeau private DOMDocument $domDocument; 6804fd306cSNickeau /** 6904fd306cSNickeau * @var DOMXPath 7004fd306cSNickeau */ 7104fd306cSNickeau private DOMXPath $domXpath; 7204fd306cSNickeau 7304fd306cSNickeau /** 7404fd306cSNickeau * XmlFile constructor. 7504fd306cSNickeau * @param $text 7604fd306cSNickeau * @param string $type - HTML or not 7704fd306cSNickeau * @throws ExceptionBadSyntax - if the document is not valid or the lib xml is not available 7804fd306cSNickeau * 7904fd306cSNickeau * Getting the width of an error HTML document if the file was downloaded 8004fd306cSNickeau * from a server has no use at all 8104fd306cSNickeau */ 8204fd306cSNickeau public function __construct($text, string $type = self::XML_TYPE) 8304fd306cSNickeau { 8404fd306cSNickeau 85*70bbd7f1Sgerardnico if (empty($text)) { 86*70bbd7f1Sgerardnico throw new ExceptionBadSyntax("The xml text markup should not be empty.", self::CANONICAL); 87*70bbd7f1Sgerardnico } 8804fd306cSNickeau if (!$this->isXmlExtensionLoaded()) { 8904fd306cSNickeau /** 9004fd306cSNickeau * If the XML module is not present 9104fd306cSNickeau */ 9204fd306cSNickeau throw new ExceptionBadSyntax("The php `libxml` module was not found on your installation, the xml/svg file could not be modified / instantiated", self::CANONICAL); 9304fd306cSNickeau } 9404fd306cSNickeau 9504fd306cSNickeau // https://www.php.net/manual/en/libxml.constants.php 9604fd306cSNickeau $options = LIBXML_NOCDATA 9704fd306cSNickeau // | LIBXML_NOBLANKS // same as preserveWhiteSpace=true, not set to be able to format the output 9804fd306cSNickeau | LIBXML_NOXMLDECL // Drop the XML declaration when saving a document 9904fd306cSNickeau | LIBXML_NONET // No network during load 10004fd306cSNickeau | LIBXML_NSCLEAN // Remove redundant namespace declarations - for whatever reason, the formatting does not work if this is set 10104fd306cSNickeau ; 10204fd306cSNickeau 10304fd306cSNickeau // HTML 10404fd306cSNickeau if ($type == self::HTML_TYPE) { 10504fd306cSNickeau 10604fd306cSNickeau // Options that cause the process to hang if this is not for a html file 10704fd306cSNickeau // Empty tag option may also be used only on save 10804fd306cSNickeau // at https://www.php.net/manual/en/domdocument.save.php 10904fd306cSNickeau // and https://www.php.net/manual/en/domdocument.savexml.php 11004fd306cSNickeau $options = $options 11104fd306cSNickeau // | LIBXML_NOEMPTYTAG // Expand empty tags (e.g. <br/> to <br></br>) 11204fd306cSNickeau | LIBXML_HTML_NODEFDTD // No doctype 11304fd306cSNickeau | LIBXML_HTML_NOIMPLIED; 11404fd306cSNickeau 11504fd306cSNickeau 11604fd306cSNickeau } 11704fd306cSNickeau 11804fd306cSNickeau /** 11904fd306cSNickeau * No warning reporting 12004fd306cSNickeau * Load XML issue E_STRICT warning seen in the log 12104fd306cSNickeau */ 12204fd306cSNickeau if (!PluginUtility::isTest()) { 12304fd306cSNickeau $oldLevel = error_reporting(E_ERROR); 12404fd306cSNickeau } 12504fd306cSNickeau 12604fd306cSNickeau $this->domDocument = new DOMDocument('1.0', 'UTF-8'); 12704fd306cSNickeau 12804fd306cSNickeau $this->mandatoryFormatConfigBeforeLoading(); 12904fd306cSNickeau 13004fd306cSNickeau 13104fd306cSNickeau $text = $this->processTextBeforeLoading($text); 13204fd306cSNickeau 13304fd306cSNickeau /** 13404fd306cSNickeau * Because the load does handle HTML5tag as error 13504fd306cSNickeau * (ie section for instance) 13604fd306cSNickeau * We take over the errors and handle them after the below load 13704fd306cSNickeau * 13804fd306cSNickeau * https://www.php.net/manual/en/function.libxml-use-internal-errors.php 13904fd306cSNickeau * 14004fd306cSNickeau */ 14104fd306cSNickeau libxml_use_internal_errors(true); 14204fd306cSNickeau 14304fd306cSNickeau if ($type == self::XML_TYPE) { 14404fd306cSNickeau 14504fd306cSNickeau $result = $this->domDocument->loadXML($text, $options); 14604fd306cSNickeau 14704fd306cSNickeau } else { 14804fd306cSNickeau 14904fd306cSNickeau /** 15004fd306cSNickeau * Unlike loading XML, HTML does not have to be well-formed to load. 15104fd306cSNickeau * While malformed HTML should load successfully, this function may generate E_WARNING errors 15204fd306cSNickeau * @deprecated as we try to be XHTML compliantXML but yeah this is not always possible 15304fd306cSNickeau */ 15404fd306cSNickeau 15504fd306cSNickeau /** 15604fd306cSNickeau * Bug: Even if we set that the document is an UTF-8 15704fd306cSNickeau * loadHTML treat the string as being in ISO-8859-1 if without any heading 15804fd306cSNickeau * (ie <xml encoding="utf-8"..> 15904fd306cSNickeau * https://stackoverflow.com/questions/8218230/php-domdocument-loadhtml-not-encoding-utf-8-correctly 16004fd306cSNickeau * Otherwise French and other language are not well loaded 16104fd306cSNickeau * 16204fd306cSNickeau * We use the trick to transform UTF-8 to HTML 16304fd306cSNickeau */ 16404fd306cSNickeau $htmlEntityEncoded = mb_convert_encoding($text, 'HTML-ENTITIES', 'UTF-8'); 16504fd306cSNickeau $result = $this->domDocument->loadHTML($htmlEntityEncoded, $options); 16604fd306cSNickeau 16704fd306cSNickeau } 16804fd306cSNickeau if ($result === false) { 16904fd306cSNickeau 17004fd306cSNickeau /** 17104fd306cSNickeau * Error 17204fd306cSNickeau */ 17304fd306cSNickeau $errors = libxml_get_errors(); 17404fd306cSNickeau 17504fd306cSNickeau foreach ($errors as $error) { 17604fd306cSNickeau 17704fd306cSNickeau /* @var LibXMLError 17804fd306cSNickeau * @noinspection PhpComposerExtensionStubsInspection 17904fd306cSNickeau * 18004fd306cSNickeau * Section is an html5 tag (and is invalid for libxml) 18104fd306cSNickeau */ 18204fd306cSNickeau if (!in_array($error->message, self::KNOWN_HTML_LOADING_ERRORS)) { 18304fd306cSNickeau /** 18404fd306cSNickeau * This error is an XML and HTML error 18504fd306cSNickeau */ 18604fd306cSNickeau if ( 18704fd306cSNickeau strpos($error->message, "htmlParseEntityRef: expecting ';' in Entity") !== false 18804fd306cSNickeau || 18904fd306cSNickeau $error->message == "EntityRef: expecting ';'\n" 19004fd306cSNickeau ) { 19104fd306cSNickeau $message = "There is big probability that there is an ampersand alone `&`. ie You forgot to call html/Xml entities in a `src` or `url` attribute."; 19204fd306cSNickeau } else { 19304fd306cSNickeau $message = "Error while loading HTML"; 19404fd306cSNickeau } 19504fd306cSNickeau /** 19604fd306cSNickeau * inboolean attribute XML loading error 19704fd306cSNickeau */ 19804fd306cSNickeau if (strpos($error->message, "Specification mandates value for attribute") !== false) { 19904fd306cSNickeau $message = "Xml does not allow boolean attribute (ie without any value). If you skip this error, you will get a general attribute constructing error as next error. Load as HTML."; 20004fd306cSNickeau } 20104fd306cSNickeau 20204fd306cSNickeau $message .= "Error: " . $error->message . ", Loaded text: " . $text; 20304fd306cSNickeau 20404fd306cSNickeau /** 20504fd306cSNickeau * We clean the errors, otherwise 20604fd306cSNickeau * in a test series, they failed the next test 20704fd306cSNickeau * 20804fd306cSNickeau */ 20904fd306cSNickeau libxml_clear_errors(); 21004fd306cSNickeau 21104fd306cSNickeau // The xml dom object is null, we got NULL pointer exception everywhere 21204fd306cSNickeau // just throw, the code will see it 21304fd306cSNickeau throw new ExceptionBadSyntax($message, self::CANONICAL); 21404fd306cSNickeau 21504fd306cSNickeau } 21604fd306cSNickeau 21704fd306cSNickeau } 21804fd306cSNickeau } 21904fd306cSNickeau 22004fd306cSNickeau /** 22104fd306cSNickeau * We clean the known errors (otherwise they are added in a queue) 22204fd306cSNickeau */ 22304fd306cSNickeau libxml_clear_errors(); 22404fd306cSNickeau 22504fd306cSNickeau /** 22604fd306cSNickeau * Error reporting back 22704fd306cSNickeau */ 22804fd306cSNickeau if (!PluginUtility::isTest() && isset($oldLevel)) { 22904fd306cSNickeau error_reporting($oldLevel); 23004fd306cSNickeau } 23104fd306cSNickeau 23204fd306cSNickeau // namespace error : Namespace prefix dc on format is not defined 23304fd306cSNickeau // missing the ns declaration in the file. example: 23404fd306cSNickeau // xmlns:dc="http://purl.org/dc/elements/1.1/" 23504fd306cSNickeau 23604fd306cSNickeau 23704fd306cSNickeau } 23804fd306cSNickeau 23904fd306cSNickeau /** 24004fd306cSNickeau * To not have a collusion with {@link FetcherSvg::createFetchImageSvgFromPath()} 24104fd306cSNickeau * @param Path $path 24204fd306cSNickeau * @return XmlDocument 24304fd306cSNickeau * @throws ExceptionNotFound - if the file does not exist 24404fd306cSNickeau * @throws ExceptionBadSyntax - if the content is not valid 24504fd306cSNickeau */ 24604fd306cSNickeau public 24704fd306cSNickeau static function createXmlDocFromPath(Path $path): XmlDocument 24804fd306cSNickeau { 24904fd306cSNickeau $mime = XmlDocument::XML_TYPE; 25004fd306cSNickeau if (in_array($path->getExtension(), ["html", "htm"])) { 25104fd306cSNickeau $mime = XmlDocument::HTML_TYPE; 25204fd306cSNickeau } 25304fd306cSNickeau $content = FileSystems::getContent($path); 25404fd306cSNickeau return (new XmlDocument($content, $mime)); 25504fd306cSNickeau } 25604fd306cSNickeau 25704fd306cSNickeau /** 25804fd306cSNickeau * 25904fd306cSNickeau * @throws ExceptionBadSyntax 26004fd306cSNickeau */ 26104fd306cSNickeau public 26204fd306cSNickeau static function createXmlDocFromMarkup($string, $asHtml = false): XmlDocument 26304fd306cSNickeau { 26404fd306cSNickeau 26504fd306cSNickeau $mime = XmlDocument::XML_TYPE; 26604fd306cSNickeau if ($asHtml) { 26704fd306cSNickeau $mime = XmlDocument::HTML_TYPE; 26804fd306cSNickeau } 26904fd306cSNickeau return new XmlDocument($string, $mime); 27004fd306cSNickeau } 27104fd306cSNickeau 27204fd306cSNickeau /** 27304fd306cSNickeau * HTML loading is more permissive 27404fd306cSNickeau * 27504fd306cSNickeau * For instance, you would not get an error on boolean attribute 27604fd306cSNickeau * ``` 27704fd306cSNickeau * Error while loading HTMLError: Specification mandates value for attribute defer 27804fd306cSNickeau * ``` 27904fd306cSNickeau * In Xml, it's mandatory but not in HTML, they are known as: 28004fd306cSNickeau * https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#boolean-attribute 28104fd306cSNickeau * 28204fd306cSNickeau * 28304fd306cSNickeau * @throws ExceptionBadSyntax 28404fd306cSNickeau */ 28504fd306cSNickeau public static function createHtmlDocFromMarkup($markup): XmlDocument 28604fd306cSNickeau { 28704fd306cSNickeau return self::createXmlDocFromMarkup($markup, true); 28804fd306cSNickeau } 28904fd306cSNickeau 29004fd306cSNickeau public 29104fd306cSNickeau function &getDomDocument(): DOMDocument 29204fd306cSNickeau { 29304fd306cSNickeau return $this->domDocument; 29404fd306cSNickeau } 29504fd306cSNickeau 29604fd306cSNickeau /** 29704fd306cSNickeau * @param $name 29804fd306cSNickeau * @param $value 29904fd306cSNickeau * @return void 30004fd306cSNickeau * @deprecated use {@link XmlDocument::getElement()} instead 30104fd306cSNickeau */ 30204fd306cSNickeau public function setRootAttribute($name, $value) 30304fd306cSNickeau { 30404fd306cSNickeau if ($this->isXmlExtensionLoaded()) { 30504fd306cSNickeau $this->domDocument->documentElement->setAttribute($name, $value); 30604fd306cSNickeau } 30704fd306cSNickeau } 30804fd306cSNickeau 30904fd306cSNickeau /** 31004fd306cSNickeau * @param $name 31104fd306cSNickeau * @return string null if not found 31204fd306cSNickeau * @deprecated uses {@link XmlElement::getAttribute()} of {@link self::getElement()} 31304fd306cSNickeau */ 31404fd306cSNickeau public function getRootAttributeValue($name): ?string 31504fd306cSNickeau { 31604fd306cSNickeau $value = $this->domDocument->documentElement->getAttribute($name); 31704fd306cSNickeau if ($value === "") { 31804fd306cSNickeau return null; 31904fd306cSNickeau } 32004fd306cSNickeau return $value; 32104fd306cSNickeau } 32204fd306cSNickeau 32304fd306cSNickeau public function toXhtml(DOMElement $element = null): string 32404fd306cSNickeau { 32504fd306cSNickeau return $this->toXml($element); 32604fd306cSNickeau } 32704fd306cSNickeau 32804fd306cSNickeau public function toXml(DOMElement $element = null): string 32904fd306cSNickeau { 33004fd306cSNickeau 33104fd306cSNickeau if ($element === null) { 33204fd306cSNickeau $element = $this->getDomDocument()->documentElement; 33304fd306cSNickeau } 33404fd306cSNickeau /** 33504fd306cSNickeau * LIBXML_NOXMLDECL (no xml declaration) does not work because only empty tag is recognized 33604fd306cSNickeau * https://www.php.net/manual/en/domdocument.savexml.php 33704fd306cSNickeau */ 33804fd306cSNickeau $xmlText = $this->getDomDocument()->saveXML( 33904fd306cSNickeau $element, 34004fd306cSNickeau LIBXML_NOXMLDECL 34104fd306cSNickeau ); 34204fd306cSNickeau // Delete doctype (for svg optimization) 34304fd306cSNickeau // php has only doctype manipulation for HTML 34404fd306cSNickeau $xmlText = preg_replace('/^<!DOCTYPE.+?>/', '', $xmlText); 34504fd306cSNickeau return trim($xmlText); 34604fd306cSNickeau 34704fd306cSNickeau } 34804fd306cSNickeau 34904fd306cSNickeau /** 35004fd306cSNickeau * https://www.php.net/manual/en/dom.installation.php 35104fd306cSNickeau * 35204fd306cSNickeau * Check it with 35304fd306cSNickeau * ``` 35404fd306cSNickeau * php -m 35504fd306cSNickeau * ``` 35604fd306cSNickeau * Install with 35704fd306cSNickeau * ``` 35804fd306cSNickeau * sudo apt-get install php-xml 35904fd306cSNickeau * ``` 36004fd306cSNickeau * @return bool 36104fd306cSNickeau */ 36204fd306cSNickeau public function isXmlExtensionLoaded(): bool 36304fd306cSNickeau { 36404fd306cSNickeau // A suffix used in the bad message 36504fd306cSNickeau $suffixBadMessage = "php extension is not installed. To install it, you need to install xml. Example: `sudo apt-get install php-xml`, `yum install php-xml`"; 36604fd306cSNickeau 36704fd306cSNickeau // https://www.php.net/manual/en/dom.requirements.php 36804fd306cSNickeau $loaded = extension_loaded("libxml"); 36904fd306cSNickeau if ($loaded === false) { 37004fd306cSNickeau LogUtility::msg("The libxml {$suffixBadMessage}"); 37104fd306cSNickeau } else { 37204fd306cSNickeau $loaded = extension_loaded("xml"); 37304fd306cSNickeau if ($loaded === false) { 37404fd306cSNickeau LogUtility::msg("The xml {$suffixBadMessage}"); 37504fd306cSNickeau } else { 37604fd306cSNickeau $loaded = extension_loaded("dom"); 37704fd306cSNickeau if ($loaded === false) { 37804fd306cSNickeau LogUtility::msg("The dom {$suffixBadMessage}"); 37904fd306cSNickeau } 38004fd306cSNickeau } 38104fd306cSNickeau } 38204fd306cSNickeau return $loaded; 38304fd306cSNickeau } 38404fd306cSNickeau 38504fd306cSNickeau /** 38604fd306cSNickeau * https://stackoverflow.com/questions/30257438/how-to-completely-remove-a-namespace-using-domdocument 38704fd306cSNickeau * @param $namespaceUri 38804fd306cSNickeau */ 38904fd306cSNickeau function removeNamespace($namespaceUri) 39004fd306cSNickeau { 39104fd306cSNickeau if (empty($namespaceUri)) { 39204fd306cSNickeau throw new \RuntimeException("The namespace is empty and should be specified"); 39304fd306cSNickeau } 39404fd306cSNickeau 39504fd306cSNickeau if (strpos($namespaceUri, "http") === false) { 39604fd306cSNickeau LogUtility::msg("Internal warning: The namespaceURI ($namespaceUri) does not seems to be an URI", LogUtility::LVL_MSG_WARNING, "support"); 39704fd306cSNickeau } 39804fd306cSNickeau 39904fd306cSNickeau /** 40004fd306cSNickeau * @var DOMNodeList $nodes 40104fd306cSNickeau * finds all nodes that have a namespace node called $ns where their parent node doesn't also have the same namespace. 40204fd306cSNickeau * @var DOMNodeList $nodes 40304fd306cSNickeau */ 40404fd306cSNickeau try { 40504fd306cSNickeau $nodes = $this->xpath("//*[namespace-uri()='$namespaceUri']"); 40604fd306cSNickeau foreach ($nodes as $node) { 40704fd306cSNickeau /** @var DOMElement $node */ 40804fd306cSNickeau $node->parentNode->removeChild($node); 40904fd306cSNickeau } 41004fd306cSNickeau } catch (ExceptionBadSyntax $e) { 41104fd306cSNickeau LogUtility::error("Internal Error on xpath: {$e->getMessage()}"); 41204fd306cSNickeau } 41304fd306cSNickeau 41404fd306cSNickeau try { 41504fd306cSNickeau $nodes = $this->xpath("//@*[namespace-uri()='$namespaceUri']"); 41604fd306cSNickeau foreach ($nodes as $node) { 41704fd306cSNickeau /** @var DOMAttr $node */ 41804fd306cSNickeau /** @var DOMElement $DOMNode */ 41904fd306cSNickeau $DOMNode = $node->parentNode; 42004fd306cSNickeau $DOMNode->removeAttributeNode($node); 42104fd306cSNickeau } 42204fd306cSNickeau } catch (ExceptionBadSyntax $e) { 42304fd306cSNickeau LogUtility::error("Internal Error on xpath: {$e->getMessage()}"); 42404fd306cSNickeau } 42504fd306cSNickeau 42604fd306cSNickeau 42704fd306cSNickeau //Node namespace can be select only from the document 42804fd306cSNickeau $xpath = new DOMXPath($this->getDomDocument()); 42904fd306cSNickeau $DOMNodeList = $xpath->query("namespace::*", $this->getDomDocument()->ownerDocument); 43004fd306cSNickeau foreach ($DOMNodeList as $node) { 43104fd306cSNickeau $namespaceURI = $node->namespaceURI; 43204fd306cSNickeau if ($namespaceURI == $namespaceUri) { 43304fd306cSNickeau $parentNode = $node->parentNode; 43404fd306cSNickeau $parentNode->removeAttributeNS($namespaceUri, $node->localName); 43504fd306cSNickeau } 43604fd306cSNickeau } 43704fd306cSNickeau 43804fd306cSNickeau 43904fd306cSNickeau } 44004fd306cSNickeau 44104fd306cSNickeau public function getNamespaces(): array 44204fd306cSNickeau { 44304fd306cSNickeau /** 44404fd306cSNickeau * We can't query with the library {@link XmlDocument::xpath()} function because 44504fd306cSNickeau * we register in the xpath the namespace 44604fd306cSNickeau */ 44704fd306cSNickeau $xpath = new DOMXPath($this->getDomDocument()); 44804fd306cSNickeau // `namespace::*` means selects all the namespace attribute of the context node 44904fd306cSNickeau // namespace is an axes 45004fd306cSNickeau // See https://www.w3.org/TR/1999/REC-xpath-19991116/#axes 45104fd306cSNickeau // the namespace axis contains the namespace nodes of the context node; the axis will be empty unless the context node is an element 45204fd306cSNickeau $DOMNodeList = $xpath->query('namespace::*', $this->getDomDocument()->ownerDocument); 45304fd306cSNickeau $nameSpace = array(); 45404fd306cSNickeau foreach ($DOMNodeList as $node) { 45504fd306cSNickeau /** @var DOMElement $node */ 45604fd306cSNickeau 45704fd306cSNickeau $namespaceURI = $node->namespaceURI; 45804fd306cSNickeau $localName = $node->prefix; 45904fd306cSNickeau if ($namespaceURI != null) { 46004fd306cSNickeau $nameSpace[$localName] = $namespaceURI; 46104fd306cSNickeau } 46204fd306cSNickeau } 46304fd306cSNickeau return $nameSpace; 46404fd306cSNickeau } 46504fd306cSNickeau 46604fd306cSNickeau /** 46704fd306cSNickeau * A wrapper that register namespace for the query 46804fd306cSNickeau * with the defined prefix 46904fd306cSNickeau * See comment: 47004fd306cSNickeau * https://www.php.net/manual/en/domxpath.registernamespace.php#51480 47104fd306cSNickeau * @param $query 47204fd306cSNickeau * @param DOMElement|null $contextNode 47304fd306cSNickeau * @return DOMNodeList 47404fd306cSNickeau * 47504fd306cSNickeau * Note that this is possible to do evaluation to return a string instead 47604fd306cSNickeau * https://www.php.net/manual/en/domxpath.evaluate.php 47704fd306cSNickeau * @throws ExceptionBadSyntax - if the query is invalid 47804fd306cSNickeau */ 47904fd306cSNickeau public 48004fd306cSNickeau function xpath($query, DOMElement $contextNode = null): DOMNodeList 48104fd306cSNickeau { 48204fd306cSNickeau if (!isset($this->domXpath)) { 48304fd306cSNickeau 48404fd306cSNickeau $this->domXpath = new DOMXPath($this->getDomDocument()); 48504fd306cSNickeau 48604fd306cSNickeau /** 48704fd306cSNickeau * Prefix mapping 48804fd306cSNickeau * It is necessary to use xpath to handle documents which have default namespaces. 48904fd306cSNickeau * The xpath expression will search for items with no namespace by default. 49004fd306cSNickeau */ 49104fd306cSNickeau foreach ($this->getNamespaces() as $prefix => $namespaceUri) { 49204fd306cSNickeau /** 49304fd306cSNickeau * You can't register an empty prefix 49404fd306cSNickeau * Default namespace (without a prefix) can only be accessed by the local-name() and namespace-uri() attributes. 49504fd306cSNickeau */ 49604fd306cSNickeau if (!empty($prefix)) { 49704fd306cSNickeau $result = $this->domXpath->registerNamespace($prefix, $namespaceUri); 49804fd306cSNickeau if (!$result) { 49904fd306cSNickeau LogUtility::msg("Not able to register the prefix ($prefix) for the namespace uri ($namespaceUri)"); 50004fd306cSNickeau } 50104fd306cSNickeau } 50204fd306cSNickeau } 50304fd306cSNickeau } 50404fd306cSNickeau 50504fd306cSNickeau if ($contextNode === null) { 50604fd306cSNickeau $contextNode = $this->domDocument; 50704fd306cSNickeau } 50804fd306cSNickeau $domList = $this->domXpath->query($query, $contextNode); 50904fd306cSNickeau if ($domList === false) { 51004fd306cSNickeau throw new ExceptionBadSyntax("The query expression ($query) may be malformed"); 51104fd306cSNickeau } 51204fd306cSNickeau return $domList; 51304fd306cSNickeau 51404fd306cSNickeau } 51504fd306cSNickeau 51604fd306cSNickeau 51704fd306cSNickeau public 51804fd306cSNickeau function removeRootAttribute($attribute) 51904fd306cSNickeau { 52004fd306cSNickeau 52104fd306cSNickeau // This function does not work 52204fd306cSNickeau // $result = $this->getXmlDom()->documentElement->removeAttribute($attribute); 52304fd306cSNickeau 52404fd306cSNickeau for ($i = 0; $i < $this->getDomDocument()->documentElement->attributes->length; $i++) { 52504fd306cSNickeau if ($this->getDomDocument()->documentElement->attributes[$i]->name == $attribute) { 52604fd306cSNickeau $result = $this->getDomDocument()->documentElement->removeAttributeNode($this->getDomDocument()->documentElement->attributes[$i]); 52704fd306cSNickeau if ($result === false) { 52804fd306cSNickeau throw new \RuntimeException("Not able to delete the $attribute"); 52904fd306cSNickeau } 53004fd306cSNickeau // There is no break here because you may find multiple version attribute for instance 53104fd306cSNickeau } 53204fd306cSNickeau } 53304fd306cSNickeau 53404fd306cSNickeau } 53504fd306cSNickeau 53604fd306cSNickeau public 53704fd306cSNickeau function removeRootChildNode($nodeName) 53804fd306cSNickeau { 53904fd306cSNickeau for ($i = 0; $i < $this->getDomDocument()->documentElement->childNodes->length; $i++) { 54004fd306cSNickeau $childNode = &$this->getDomDocument()->documentElement->childNodes[$i]; 54104fd306cSNickeau if ($childNode->nodeName == $nodeName) { 54204fd306cSNickeau $result = $this->getDomDocument()->documentElement->removeChild($childNode); 54304fd306cSNickeau if ($result == false) { 54404fd306cSNickeau throw new \RuntimeException("Not able to delete the child node $nodeName"); 54504fd306cSNickeau } 54604fd306cSNickeau break; 54704fd306cSNickeau } 54804fd306cSNickeau } 54904fd306cSNickeau } 55004fd306cSNickeau 55104fd306cSNickeau /** 55204fd306cSNickeau * 55304fd306cSNickeau * Add a value to an attribute value 55404fd306cSNickeau * Example 55504fd306cSNickeau * <a class="actual"> 55604fd306cSNickeau * 55704fd306cSNickeau * if you add "new" 55804fd306cSNickeau * <a class="actual new"> 55904fd306cSNickeau * 56004fd306cSNickeau * @param $attName 56104fd306cSNickeau * @param $attValue 56204fd306cSNickeau * @param DOMElement $xml 56304fd306cSNickeau */ 56404fd306cSNickeau public 56504fd306cSNickeau function addAttributeValue($attName, $attValue, $xml) 56604fd306cSNickeau { 56704fd306cSNickeau 56804fd306cSNickeau /** 56904fd306cSNickeau * Empty condition is better than {@link DOMElement::hasAttribute()} 57004fd306cSNickeau * because even if the dom element has the attribute, the value 57104fd306cSNickeau * may be empty 57204fd306cSNickeau */ 57304fd306cSNickeau $value = $xml->getAttribute($attName); 57404fd306cSNickeau if (empty($value)) { 57504fd306cSNickeau $xml->setAttribute($attName, $attValue); 57604fd306cSNickeau } else { 57704fd306cSNickeau $actualAttValue = $xml->getAttribute($attName); 57804fd306cSNickeau $explodeArray = explode(" ", $actualAttValue); 57904fd306cSNickeau if (!in_array($attValue, $explodeArray)) { 58004fd306cSNickeau $xml->setAttribute($attName, (string)$actualAttValue . " $attValue"); 58104fd306cSNickeau } 58204fd306cSNickeau } 58304fd306cSNickeau 58404fd306cSNickeau } 58504fd306cSNickeau 58604fd306cSNickeau public function diff(XmlDocument $rightDocument): string 58704fd306cSNickeau { 58804fd306cSNickeau $error = ""; 58904fd306cSNickeau XmlSystems::diffNode($this->getDomDocument(), $rightDocument->getDomDocument(), $error); 59004fd306cSNickeau return $error; 59104fd306cSNickeau } 59204fd306cSNickeau 59304fd306cSNickeau /** 59404fd306cSNickeau * @return string a XML formatted 59504fd306cSNickeau * 59604fd306cSNickeau * !!!! The parameter preserveWhiteSpace should have been set to false before loading 59704fd306cSNickeau * https://www.php.net/manual/en/class.domdocument.php#domdocument.props.formatoutput 59804fd306cSNickeau * $this->xmlDom->preserveWhiteSpace = false; 59904fd306cSNickeau * 60004fd306cSNickeau * We do it with the function {@link XmlDocument::mandatoryFormatConfigBeforeLoading()} 60104fd306cSNickeau * 60204fd306cSNickeau */ 60304fd306cSNickeau public function toXmlFormatted(DOMElement $element = null): string 60404fd306cSNickeau { 60504fd306cSNickeau 60604fd306cSNickeau $this->domDocument->formatOutput = true; 60704fd306cSNickeau return $this->toXml($element); 60804fd306cSNickeau 60904fd306cSNickeau } 61004fd306cSNickeau 61104fd306cSNickeau /** 61204fd306cSNickeau * @return string that can be diff 61304fd306cSNickeau * * EOL diff are not seen 61404fd306cSNickeau * * space are 61504fd306cSNickeau * 61604fd306cSNickeau * See also {@link XmlDocument::processTextBeforeLoading()} 61704fd306cSNickeau * that is needed before loading 61804fd306cSNickeau */ 61904fd306cSNickeau public function toXmlNormalized(DOMElement $element = null): string 62004fd306cSNickeau { 62104fd306cSNickeau 62204fd306cSNickeau /** 62304fd306cSNickeau * If the text was a list 62404fd306cSNickeau * of sibling text without parent 62504fd306cSNickeau * We may get a body 62604fd306cSNickeau * @deprecated letting the code until 62704fd306cSNickeau * TODO: delete this code when the test pass 62804fd306cSNickeau */ 62904fd306cSNickeau// $body = $doc->getElementsByTagName("body"); 63004fd306cSNickeau// if ($body->length != 0) { 63104fd306cSNickeau// $DOMNodeList = $body->item(0)->childNodes; 63204fd306cSNickeau// $output = ""; 63304fd306cSNickeau// foreach ($DOMNodeList as $value) { 63404fd306cSNickeau// $output .= $doc->saveXML($value) . DOKU_LF; 63504fd306cSNickeau// } 63604fd306cSNickeau// } 63704fd306cSNickeau 63804fd306cSNickeau if ($element == null) { 63904fd306cSNickeau $element = $this->domDocument->documentElement; 64004fd306cSNickeau } 64104fd306cSNickeau $element->normalize(); 64204fd306cSNickeau return $this->toXmlFormatted($element); 64304fd306cSNickeau } 64404fd306cSNickeau 64504fd306cSNickeau /** 64604fd306cSNickeau * Not really conventional but 64704fd306cSNickeau * to be able to {@link toXmlNormalized} 64804fd306cSNickeau * the EOL should be deleted 64904fd306cSNickeau * We do it before loading and not with a XML documentation 65004fd306cSNickeau */ 65104fd306cSNickeau private function processTextBeforeLoading($text) 65204fd306cSNickeau { 65304fd306cSNickeau $text = str_replace(DOKU_LF, "", $text); 65404fd306cSNickeau $text = preg_replace("/\r\n\s*\r\n/", "\r\n", $text); 65504fd306cSNickeau $text = preg_replace("/\n\s*\n/", "\n", $text); 65604fd306cSNickeau $text = preg_replace("/\n\n/", "\n", $text); 65704fd306cSNickeau return $text; 65804fd306cSNickeau 65904fd306cSNickeau } 66004fd306cSNickeau 66104fd306cSNickeau 66204fd306cSNickeau /** 66304fd306cSNickeau * This function is called just before loading 66404fd306cSNickeau * in order to be able to {@link XmlDocument::toXmlFormatted() format the output } 66504fd306cSNickeau * https://www.php.net/manual/en/class.domdocument.php#domdocument.props.formatoutput 66604fd306cSNickeau * Mandatory for a a good formatting before loading 66704fd306cSNickeau * 66804fd306cSNickeau */ 66904fd306cSNickeau private function mandatoryFormatConfigBeforeLoading() 67004fd306cSNickeau { 67104fd306cSNickeau // not that 67204fd306cSNickeau // the loading option: LIBXML_NOBLANKS 67304fd306cSNickeau // is equivalent to $this->xmlDom->preserveWhiteSpace = true; 67404fd306cSNickeau $this->domDocument->preserveWhiteSpace = false; 67504fd306cSNickeau } 67604fd306cSNickeau 67704fd306cSNickeau /** 67804fd306cSNickeau * @param string $attributeName 67904fd306cSNickeau * @param DOMElement $nodeElement 68004fd306cSNickeau * @return void 68104fd306cSNickeau * @deprecated use the {@link XmlElement::removeAttribute()} if possible 68204fd306cSNickeau */ 68304fd306cSNickeau public function removeAttributeValue(string $attributeName, DOMElement $nodeElement) 68404fd306cSNickeau { 68504fd306cSNickeau $attr = $nodeElement->getAttributeNode($attributeName); 68604fd306cSNickeau if (!$attr) { 68704fd306cSNickeau return; 68804fd306cSNickeau } 68904fd306cSNickeau $result = $nodeElement->removeAttributeNode($attr); 69004fd306cSNickeau if ($result === false) { 69104fd306cSNickeau LogUtility::msg("Not able to delete the attribute $attributeName of the node element $nodeElement->tagName in the Xml document"); 69204fd306cSNickeau } 69304fd306cSNickeau } 69404fd306cSNickeau 69504fd306cSNickeau 69604fd306cSNickeau /** 69704fd306cSNickeau * Query via a CSS selector 69804fd306cSNickeau * (not that it will not work with other namespace than the default one, ie xmlns will not work) 69904fd306cSNickeau * @throws ExceptionBadSyntax - if the selector is not valid 70004fd306cSNickeau * @throws ExceptionNotFound - if the selector selects nothing 70104fd306cSNickeau */ 70204fd306cSNickeau public function querySelector(string $selector): XmlElement 70304fd306cSNickeau { 70404fd306cSNickeau $domNodeList = $this->querySelectorAll($selector); 70504fd306cSNickeau if (sizeof($domNodeList) >= 1) { 70604fd306cSNickeau return $domNodeList[0]; 70704fd306cSNickeau } 70804fd306cSNickeau throw new ExceptionNotFound("No element was found with the selector $selector"); 70904fd306cSNickeau 71004fd306cSNickeau } 71104fd306cSNickeau 71204fd306cSNickeau /** 71304fd306cSNickeau * @return XmlElement[] 71404fd306cSNickeau * @throws ExceptionBadSyntax 71504fd306cSNickeau */ 71604fd306cSNickeau public function querySelectorAll(string $selector): array 71704fd306cSNickeau { 71804fd306cSNickeau $xpath = $this->cssSelectorToXpath($selector); 71904fd306cSNickeau $domNodeList = $this->xpath($xpath); 72004fd306cSNickeau $domNodes = []; 72104fd306cSNickeau foreach ($domNodeList as $domNode) { 72204fd306cSNickeau if ($domNode instanceof DOMElement) { 72304fd306cSNickeau $domNodes[] = new XmlElement($domNode, $this); 72404fd306cSNickeau } 72504fd306cSNickeau } 72604fd306cSNickeau return $domNodes; 72704fd306cSNickeau 72804fd306cSNickeau } 72904fd306cSNickeau 73004fd306cSNickeau /** 73104fd306cSNickeau * @throws ExceptionBadSyntax 73204fd306cSNickeau */ 73304fd306cSNickeau public function cssSelectorToXpath(string $selector): string 73404fd306cSNickeau { 73504fd306cSNickeau try { 73604fd306cSNickeau return PhpCss::toXpath($selector); 73704fd306cSNickeau } catch (PhpCss\Exception\ParserException $e) { 73804fd306cSNickeau throw new ExceptionBadSyntax("The selector ($selector) is not valid. Error: {$e->getMessage()}"); 73904fd306cSNickeau } 74004fd306cSNickeau } 74104fd306cSNickeau 74204fd306cSNickeau /** 74304fd306cSNickeau * An utility function to know how to remove a node 74404fd306cSNickeau * @param \DOMNode $nodeElement 74504fd306cSNickeau * @deprecated use {@link XmlElement::remove} instead 74604fd306cSNickeau */ 74704fd306cSNickeau public function removeNode(\DOMNode $nodeElement) 74804fd306cSNickeau { 74904fd306cSNickeau 75004fd306cSNickeau $nodeElement->parentNode->removeChild($nodeElement); 75104fd306cSNickeau 75204fd306cSNickeau } 75304fd306cSNickeau 75404fd306cSNickeau public function getElement(): XmlElement 75504fd306cSNickeau { 75604fd306cSNickeau return XmlElement::create($this->getDomDocument()->documentElement, $this); 75704fd306cSNickeau } 75804fd306cSNickeau 75904fd306cSNickeau public function toHtml() 76004fd306cSNickeau { 76104fd306cSNickeau return $this->domDocument->saveHTML(); 76204fd306cSNickeau } 76304fd306cSNickeau 76404fd306cSNickeau /** 76504fd306cSNickeau * @throws \DOMException - if invalid local name 76604fd306cSNickeau */ 76704fd306cSNickeau public function createElement(string $localName): XmlElement 76804fd306cSNickeau { 76904fd306cSNickeau $element = $this->domDocument->createElement($localName); 77004fd306cSNickeau return XmlElement::create($element, $this); 77104fd306cSNickeau } 77204fd306cSNickeau 77304fd306cSNickeau /** 77404fd306cSNickeau * @throws ExceptionBadSyntax 77504fd306cSNickeau * @throws ExceptionBadState 77604fd306cSNickeau */ 77704fd306cSNickeau public function xpathFirstDomElement(string $xpath): DOMElement 77804fd306cSNickeau { 77904fd306cSNickeau $domList = $this->xpath($xpath); 78004fd306cSNickeau $domElement = $domList->item(0); 78104fd306cSNickeau if ($domElement instanceof DOMElement) { 78204fd306cSNickeau return $domElement; 78304fd306cSNickeau } else { 78404fd306cSNickeau throw new ExceptionBadState("The first DOM node is not a DOM element"); 78504fd306cSNickeau } 78604fd306cSNickeau } 78704fd306cSNickeau 78804fd306cSNickeau 78904fd306cSNickeau} 790