1*04fd306cSNickeau<?php 2*04fd306cSNickeau 3*04fd306cSNickeau/** 4*04fd306cSNickeau * Copyright (c) 2021. ComboStrap, Inc. and its affiliates. All Rights Reserved. 5*04fd306cSNickeau * 6*04fd306cSNickeau * This source code is licensed under the GPL license found in the 7*04fd306cSNickeau * COPYING file in the root directory of this source tree. 8*04fd306cSNickeau * 9*04fd306cSNickeau * @license GPL 3 (https://www.gnu.org/licenses/gpl-3.0.en.html) 10*04fd306cSNickeau * @author ComboStrap <support@combostrap.com> 11*04fd306cSNickeau * 12*04fd306cSNickeau */ 13*04fd306cSNickeau 14*04fd306cSNickeaunamespace ComboStrap\Xml; 15*04fd306cSNickeau 16*04fd306cSNickeauuse ComboStrap\ExceptionBadState; 17*04fd306cSNickeauuse ComboStrap\ExceptionBadSyntax; 18*04fd306cSNickeauuse ComboStrap\ExceptionNotFound; 19*04fd306cSNickeauuse ComboStrap\FileSystems; 20*04fd306cSNickeauuse ComboStrap\LogUtility; 21*04fd306cSNickeauuse ComboStrap\Path; 22*04fd306cSNickeauuse ComboStrap\PluginUtility; 23*04fd306cSNickeauuse DOMAttr; 24*04fd306cSNickeauuse DOMDocument; 25*04fd306cSNickeauuse DOMElement; 26*04fd306cSNickeauuse DOMNodeList; 27*04fd306cSNickeauuse DOMXPath; 28*04fd306cSNickeauuse LibXMLError; 29*04fd306cSNickeauuse PhpCss; 30*04fd306cSNickeau 31*04fd306cSNickeau 32*04fd306cSNickeau/** 33*04fd306cSNickeau * A xml document that follows the Web Api interface. 34*04fd306cSNickeau * 35*04fd306cSNickeau * Note Dokuwiki now uses since [jack_jackrum](https://www.dokuwiki.org/changes#release_2023-04-04_jack_jackrum): 36*04fd306cSNickeau * the [dom-wrapper](https://github.com/scotteh/php-dom-wrapper) 37*04fd306cSNickeau * that follow the Jquery API and uses [css-selector](https://symfony.com/doc/current/components/css_selector.html) 38*04fd306cSNickeau * to get Xpath expression from Css selector 39*04fd306cSNickeau * 40*04fd306cSNickeau */ 41*04fd306cSNickeauclass XmlDocument 42*04fd306cSNickeau{ 43*04fd306cSNickeau const HTML_TYPE = "html"; 44*04fd306cSNickeau const XML_TYPE = "xml"; 45*04fd306cSNickeau /** 46*04fd306cSNickeau * The error that the HTML loading 47*04fd306cSNickeau * may returns 48*04fd306cSNickeau */ 49*04fd306cSNickeau const KNOWN_HTML_LOADING_ERRORS = [ 50*04fd306cSNickeau "Tag section invalid\n", // section is HTML5 tag 51*04fd306cSNickeau "Tag footer invalid\n", // footer is HTML5 tag 52*04fd306cSNickeau "error parsing attribute name\n", // name is an HTML5 attribute 53*04fd306cSNickeau "Unexpected end tag : blockquote\n", // name is an HTML5 attribute 54*04fd306cSNickeau "Tag bdi invalid\n", 55*04fd306cSNickeau "Tag path invalid\n", // svg 56*04fd306cSNickeau "Tag svg invalid\n", // svg 57*04fd306cSNickeau "Unexpected end tag : a\n", // when the document is only a anchor 58*04fd306cSNickeau "Unexpected end tag : p\n", // when the document is only a p 59*04fd306cSNickeau "Unexpected end tag : button\n", // when the document is only a button 60*04fd306cSNickeau ]; 61*04fd306cSNickeau 62*04fd306cSNickeau const CANONICAL = "xml"; 63*04fd306cSNickeau 64*04fd306cSNickeau /** 65*04fd306cSNickeau * @var DOMDocument 66*04fd306cSNickeau */ 67*04fd306cSNickeau private DOMDocument $domDocument; 68*04fd306cSNickeau /** 69*04fd306cSNickeau * @var DOMXPath 70*04fd306cSNickeau */ 71*04fd306cSNickeau private DOMXPath $domXpath; 72*04fd306cSNickeau 73*04fd306cSNickeau /** 74*04fd306cSNickeau * XmlFile constructor. 75*04fd306cSNickeau * @param $text 76*04fd306cSNickeau * @param string $type - HTML or not 77*04fd306cSNickeau * @throws ExceptionBadSyntax - if the document is not valid or the lib xml is not available 78*04fd306cSNickeau * 79*04fd306cSNickeau * Getting the width of an error HTML document if the file was downloaded 80*04fd306cSNickeau * from a server has no use at all 81*04fd306cSNickeau */ 82*04fd306cSNickeau public function __construct($text, string $type = self::XML_TYPE) 83*04fd306cSNickeau { 84*04fd306cSNickeau 85*04fd306cSNickeau if (!$this->isXmlExtensionLoaded()) { 86*04fd306cSNickeau /** 87*04fd306cSNickeau * If the XML module is not present 88*04fd306cSNickeau */ 89*04fd306cSNickeau throw new ExceptionBadSyntax("The php `libxml` module was not found on your installation, the xml/svg file could not be modified / instantiated", self::CANONICAL); 90*04fd306cSNickeau } 91*04fd306cSNickeau 92*04fd306cSNickeau // https://www.php.net/manual/en/libxml.constants.php 93*04fd306cSNickeau $options = LIBXML_NOCDATA 94*04fd306cSNickeau // | LIBXML_NOBLANKS // same as preserveWhiteSpace=true, not set to be able to format the output 95*04fd306cSNickeau | LIBXML_NOXMLDECL // Drop the XML declaration when saving a document 96*04fd306cSNickeau | LIBXML_NONET // No network during load 97*04fd306cSNickeau | LIBXML_NSCLEAN // Remove redundant namespace declarations - for whatever reason, the formatting does not work if this is set 98*04fd306cSNickeau ; 99*04fd306cSNickeau 100*04fd306cSNickeau // HTML 101*04fd306cSNickeau if ($type == self::HTML_TYPE) { 102*04fd306cSNickeau 103*04fd306cSNickeau // Options that cause the process to hang if this is not for a html file 104*04fd306cSNickeau // Empty tag option may also be used only on save 105*04fd306cSNickeau // at https://www.php.net/manual/en/domdocument.save.php 106*04fd306cSNickeau // and https://www.php.net/manual/en/domdocument.savexml.php 107*04fd306cSNickeau $options = $options 108*04fd306cSNickeau // | LIBXML_NOEMPTYTAG // Expand empty tags (e.g. <br/> to <br></br>) 109*04fd306cSNickeau | LIBXML_HTML_NODEFDTD // No doctype 110*04fd306cSNickeau | LIBXML_HTML_NOIMPLIED; 111*04fd306cSNickeau 112*04fd306cSNickeau 113*04fd306cSNickeau } 114*04fd306cSNickeau 115*04fd306cSNickeau /** 116*04fd306cSNickeau * No warning reporting 117*04fd306cSNickeau * Load XML issue E_STRICT warning seen in the log 118*04fd306cSNickeau */ 119*04fd306cSNickeau if (!PluginUtility::isTest()) { 120*04fd306cSNickeau $oldLevel = error_reporting(E_ERROR); 121*04fd306cSNickeau } 122*04fd306cSNickeau 123*04fd306cSNickeau $this->domDocument = new DOMDocument('1.0', 'UTF-8'); 124*04fd306cSNickeau 125*04fd306cSNickeau $this->mandatoryFormatConfigBeforeLoading(); 126*04fd306cSNickeau 127*04fd306cSNickeau 128*04fd306cSNickeau $text = $this->processTextBeforeLoading($text); 129*04fd306cSNickeau 130*04fd306cSNickeau /** 131*04fd306cSNickeau * Because the load does handle HTML5tag as error 132*04fd306cSNickeau * (ie section for instance) 133*04fd306cSNickeau * We take over the errors and handle them after the below load 134*04fd306cSNickeau * 135*04fd306cSNickeau * https://www.php.net/manual/en/function.libxml-use-internal-errors.php 136*04fd306cSNickeau * 137*04fd306cSNickeau */ 138*04fd306cSNickeau libxml_use_internal_errors(true); 139*04fd306cSNickeau 140*04fd306cSNickeau if ($type == self::XML_TYPE) { 141*04fd306cSNickeau 142*04fd306cSNickeau $result = $this->domDocument->loadXML($text, $options); 143*04fd306cSNickeau 144*04fd306cSNickeau } else { 145*04fd306cSNickeau 146*04fd306cSNickeau /** 147*04fd306cSNickeau * Unlike loading XML, HTML does not have to be well-formed to load. 148*04fd306cSNickeau * While malformed HTML should load successfully, this function may generate E_WARNING errors 149*04fd306cSNickeau * @deprecated as we try to be XHTML compliantXML but yeah this is not always possible 150*04fd306cSNickeau */ 151*04fd306cSNickeau 152*04fd306cSNickeau /** 153*04fd306cSNickeau * Bug: Even if we set that the document is an UTF-8 154*04fd306cSNickeau * loadHTML treat the string as being in ISO-8859-1 if without any heading 155*04fd306cSNickeau * (ie <xml encoding="utf-8"..> 156*04fd306cSNickeau * https://stackoverflow.com/questions/8218230/php-domdocument-loadhtml-not-encoding-utf-8-correctly 157*04fd306cSNickeau * Otherwise French and other language are not well loaded 158*04fd306cSNickeau * 159*04fd306cSNickeau * We use the trick to transform UTF-8 to HTML 160*04fd306cSNickeau */ 161*04fd306cSNickeau $htmlEntityEncoded = mb_convert_encoding($text, 'HTML-ENTITIES', 'UTF-8'); 162*04fd306cSNickeau $result = $this->domDocument->loadHTML($htmlEntityEncoded, $options); 163*04fd306cSNickeau 164*04fd306cSNickeau } 165*04fd306cSNickeau if ($result === false) { 166*04fd306cSNickeau 167*04fd306cSNickeau /** 168*04fd306cSNickeau * Error 169*04fd306cSNickeau */ 170*04fd306cSNickeau $errors = libxml_get_errors(); 171*04fd306cSNickeau 172*04fd306cSNickeau foreach ($errors as $error) { 173*04fd306cSNickeau 174*04fd306cSNickeau /* @var LibXMLError 175*04fd306cSNickeau * @noinspection PhpComposerExtensionStubsInspection 176*04fd306cSNickeau * 177*04fd306cSNickeau * Section is an html5 tag (and is invalid for libxml) 178*04fd306cSNickeau */ 179*04fd306cSNickeau if (!in_array($error->message, self::KNOWN_HTML_LOADING_ERRORS)) { 180*04fd306cSNickeau /** 181*04fd306cSNickeau * This error is an XML and HTML error 182*04fd306cSNickeau */ 183*04fd306cSNickeau if ( 184*04fd306cSNickeau strpos($error->message, "htmlParseEntityRef: expecting ';' in Entity") !== false 185*04fd306cSNickeau || 186*04fd306cSNickeau $error->message == "EntityRef: expecting ';'\n" 187*04fd306cSNickeau ) { 188*04fd306cSNickeau $message = "There is big probability that there is an ampersand alone `&`. ie You forgot to call html/Xml entities in a `src` or `url` attribute."; 189*04fd306cSNickeau } else { 190*04fd306cSNickeau $message = "Error while loading HTML"; 191*04fd306cSNickeau } 192*04fd306cSNickeau /** 193*04fd306cSNickeau * inboolean attribute XML loading error 194*04fd306cSNickeau */ 195*04fd306cSNickeau if (strpos($error->message, "Specification mandates value for attribute") !== false) { 196*04fd306cSNickeau $message = "Xml does not allow boolean attribute (ie without any value). If you skip this error, you will get a general attribute constructing error as next error. Load as HTML."; 197*04fd306cSNickeau } 198*04fd306cSNickeau 199*04fd306cSNickeau $message .= "Error: " . $error->message . ", Loaded text: " . $text; 200*04fd306cSNickeau 201*04fd306cSNickeau /** 202*04fd306cSNickeau * We clean the errors, otherwise 203*04fd306cSNickeau * in a test series, they failed the next test 204*04fd306cSNickeau * 205*04fd306cSNickeau */ 206*04fd306cSNickeau libxml_clear_errors(); 207*04fd306cSNickeau 208*04fd306cSNickeau // The xml dom object is null, we got NULL pointer exception everywhere 209*04fd306cSNickeau // just throw, the code will see it 210*04fd306cSNickeau throw new ExceptionBadSyntax($message, self::CANONICAL); 211*04fd306cSNickeau 212*04fd306cSNickeau } 213*04fd306cSNickeau 214*04fd306cSNickeau } 215*04fd306cSNickeau } 216*04fd306cSNickeau 217*04fd306cSNickeau /** 218*04fd306cSNickeau * We clean the known errors (otherwise they are added in a queue) 219*04fd306cSNickeau */ 220*04fd306cSNickeau libxml_clear_errors(); 221*04fd306cSNickeau 222*04fd306cSNickeau /** 223*04fd306cSNickeau * Error reporting back 224*04fd306cSNickeau */ 225*04fd306cSNickeau if (!PluginUtility::isTest() && isset($oldLevel)) { 226*04fd306cSNickeau error_reporting($oldLevel); 227*04fd306cSNickeau } 228*04fd306cSNickeau 229*04fd306cSNickeau // namespace error : Namespace prefix dc on format is not defined 230*04fd306cSNickeau // missing the ns declaration in the file. example: 231*04fd306cSNickeau // xmlns:dc="http://purl.org/dc/elements/1.1/" 232*04fd306cSNickeau 233*04fd306cSNickeau 234*04fd306cSNickeau } 235*04fd306cSNickeau 236*04fd306cSNickeau /** 237*04fd306cSNickeau * To not have a collusion with {@link FetcherSvg::createFetchImageSvgFromPath()} 238*04fd306cSNickeau * @param Path $path 239*04fd306cSNickeau * @return XmlDocument 240*04fd306cSNickeau * @throws ExceptionNotFound - if the file does not exist 241*04fd306cSNickeau * @throws ExceptionBadSyntax - if the content is not valid 242*04fd306cSNickeau */ 243*04fd306cSNickeau public 244*04fd306cSNickeau static function createXmlDocFromPath(Path $path): XmlDocument 245*04fd306cSNickeau { 246*04fd306cSNickeau $mime = XmlDocument::XML_TYPE; 247*04fd306cSNickeau if (in_array($path->getExtension(), ["html", "htm"])) { 248*04fd306cSNickeau $mime = XmlDocument::HTML_TYPE; 249*04fd306cSNickeau } 250*04fd306cSNickeau $content = FileSystems::getContent($path); 251*04fd306cSNickeau return (new XmlDocument($content, $mime)); 252*04fd306cSNickeau } 253*04fd306cSNickeau 254*04fd306cSNickeau /** 255*04fd306cSNickeau * 256*04fd306cSNickeau * @throws ExceptionBadSyntax 257*04fd306cSNickeau */ 258*04fd306cSNickeau public 259*04fd306cSNickeau static function createXmlDocFromMarkup($string, $asHtml = false): XmlDocument 260*04fd306cSNickeau { 261*04fd306cSNickeau 262*04fd306cSNickeau $mime = XmlDocument::XML_TYPE; 263*04fd306cSNickeau if ($asHtml) { 264*04fd306cSNickeau $mime = XmlDocument::HTML_TYPE; 265*04fd306cSNickeau } 266*04fd306cSNickeau return new XmlDocument($string, $mime); 267*04fd306cSNickeau } 268*04fd306cSNickeau 269*04fd306cSNickeau /** 270*04fd306cSNickeau * HTML loading is more permissive 271*04fd306cSNickeau * 272*04fd306cSNickeau * For instance, you would not get an error on boolean attribute 273*04fd306cSNickeau * ``` 274*04fd306cSNickeau * Error while loading HTMLError: Specification mandates value for attribute defer 275*04fd306cSNickeau * ``` 276*04fd306cSNickeau * In Xml, it's mandatory but not in HTML, they are known as: 277*04fd306cSNickeau * https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#boolean-attribute 278*04fd306cSNickeau * 279*04fd306cSNickeau * 280*04fd306cSNickeau * @throws ExceptionBadSyntax 281*04fd306cSNickeau */ 282*04fd306cSNickeau public static function createHtmlDocFromMarkup($markup): XmlDocument 283*04fd306cSNickeau { 284*04fd306cSNickeau return self::createXmlDocFromMarkup($markup, true); 285*04fd306cSNickeau } 286*04fd306cSNickeau 287*04fd306cSNickeau public 288*04fd306cSNickeau function &getDomDocument(): DOMDocument 289*04fd306cSNickeau { 290*04fd306cSNickeau return $this->domDocument; 291*04fd306cSNickeau } 292*04fd306cSNickeau 293*04fd306cSNickeau /** 294*04fd306cSNickeau * @param $name 295*04fd306cSNickeau * @param $value 296*04fd306cSNickeau * @return void 297*04fd306cSNickeau * @deprecated use {@link XmlDocument::getElement()} instead 298*04fd306cSNickeau */ 299*04fd306cSNickeau public function setRootAttribute($name, $value) 300*04fd306cSNickeau { 301*04fd306cSNickeau if ($this->isXmlExtensionLoaded()) { 302*04fd306cSNickeau $this->domDocument->documentElement->setAttribute($name, $value); 303*04fd306cSNickeau } 304*04fd306cSNickeau } 305*04fd306cSNickeau 306*04fd306cSNickeau /** 307*04fd306cSNickeau * @param $name 308*04fd306cSNickeau * @return string null if not found 309*04fd306cSNickeau * @deprecated uses {@link XmlElement::getAttribute()} of {@link self::getElement()} 310*04fd306cSNickeau */ 311*04fd306cSNickeau public function getRootAttributeValue($name): ?string 312*04fd306cSNickeau { 313*04fd306cSNickeau $value = $this->domDocument->documentElement->getAttribute($name); 314*04fd306cSNickeau if ($value === "") { 315*04fd306cSNickeau return null; 316*04fd306cSNickeau } 317*04fd306cSNickeau return $value; 318*04fd306cSNickeau } 319*04fd306cSNickeau 320*04fd306cSNickeau public function toXhtml(DOMElement $element = null): string 321*04fd306cSNickeau { 322*04fd306cSNickeau return $this->toXml($element); 323*04fd306cSNickeau } 324*04fd306cSNickeau 325*04fd306cSNickeau public function toXml(DOMElement $element = null): string 326*04fd306cSNickeau { 327*04fd306cSNickeau 328*04fd306cSNickeau if ($element === null) { 329*04fd306cSNickeau $element = $this->getDomDocument()->documentElement; 330*04fd306cSNickeau } 331*04fd306cSNickeau /** 332*04fd306cSNickeau * LIBXML_NOXMLDECL (no xml declaration) does not work because only empty tag is recognized 333*04fd306cSNickeau * https://www.php.net/manual/en/domdocument.savexml.php 334*04fd306cSNickeau */ 335*04fd306cSNickeau $xmlText = $this->getDomDocument()->saveXML( 336*04fd306cSNickeau $element, 337*04fd306cSNickeau LIBXML_NOXMLDECL 338*04fd306cSNickeau ); 339*04fd306cSNickeau // Delete doctype (for svg optimization) 340*04fd306cSNickeau // php has only doctype manipulation for HTML 341*04fd306cSNickeau $xmlText = preg_replace('/^<!DOCTYPE.+?>/', '', $xmlText); 342*04fd306cSNickeau return trim($xmlText); 343*04fd306cSNickeau 344*04fd306cSNickeau } 345*04fd306cSNickeau 346*04fd306cSNickeau /** 347*04fd306cSNickeau * https://www.php.net/manual/en/dom.installation.php 348*04fd306cSNickeau * 349*04fd306cSNickeau * Check it with 350*04fd306cSNickeau * ``` 351*04fd306cSNickeau * php -m 352*04fd306cSNickeau * ``` 353*04fd306cSNickeau * Install with 354*04fd306cSNickeau * ``` 355*04fd306cSNickeau * sudo apt-get install php-xml 356*04fd306cSNickeau * ``` 357*04fd306cSNickeau * @return bool 358*04fd306cSNickeau */ 359*04fd306cSNickeau public function isXmlExtensionLoaded(): bool 360*04fd306cSNickeau { 361*04fd306cSNickeau // A suffix used in the bad message 362*04fd306cSNickeau $suffixBadMessage = "php extension is not installed. To install it, you need to install xml. Example: `sudo apt-get install php-xml`, `yum install php-xml`"; 363*04fd306cSNickeau 364*04fd306cSNickeau // https://www.php.net/manual/en/dom.requirements.php 365*04fd306cSNickeau $loaded = extension_loaded("libxml"); 366*04fd306cSNickeau if ($loaded === false) { 367*04fd306cSNickeau LogUtility::msg("The libxml {$suffixBadMessage}"); 368*04fd306cSNickeau } else { 369*04fd306cSNickeau $loaded = extension_loaded("xml"); 370*04fd306cSNickeau if ($loaded === false) { 371*04fd306cSNickeau LogUtility::msg("The xml {$suffixBadMessage}"); 372*04fd306cSNickeau } else { 373*04fd306cSNickeau $loaded = extension_loaded("dom"); 374*04fd306cSNickeau if ($loaded === false) { 375*04fd306cSNickeau LogUtility::msg("The dom {$suffixBadMessage}"); 376*04fd306cSNickeau } 377*04fd306cSNickeau } 378*04fd306cSNickeau } 379*04fd306cSNickeau return $loaded; 380*04fd306cSNickeau } 381*04fd306cSNickeau 382*04fd306cSNickeau /** 383*04fd306cSNickeau * https://stackoverflow.com/questions/30257438/how-to-completely-remove-a-namespace-using-domdocument 384*04fd306cSNickeau * @param $namespaceUri 385*04fd306cSNickeau */ 386*04fd306cSNickeau function removeNamespace($namespaceUri) 387*04fd306cSNickeau { 388*04fd306cSNickeau if (empty($namespaceUri)) { 389*04fd306cSNickeau throw new \RuntimeException("The namespace is empty and should be specified"); 390*04fd306cSNickeau } 391*04fd306cSNickeau 392*04fd306cSNickeau if (strpos($namespaceUri, "http") === false) { 393*04fd306cSNickeau LogUtility::msg("Internal warning: The namespaceURI ($namespaceUri) does not seems to be an URI", LogUtility::LVL_MSG_WARNING, "support"); 394*04fd306cSNickeau } 395*04fd306cSNickeau 396*04fd306cSNickeau /** 397*04fd306cSNickeau * @var DOMNodeList $nodes 398*04fd306cSNickeau * finds all nodes that have a namespace node called $ns where their parent node doesn't also have the same namespace. 399*04fd306cSNickeau * @var DOMNodeList $nodes 400*04fd306cSNickeau */ 401*04fd306cSNickeau try { 402*04fd306cSNickeau $nodes = $this->xpath("//*[namespace-uri()='$namespaceUri']"); 403*04fd306cSNickeau foreach ($nodes as $node) { 404*04fd306cSNickeau /** @var DOMElement $node */ 405*04fd306cSNickeau $node->parentNode->removeChild($node); 406*04fd306cSNickeau } 407*04fd306cSNickeau } catch (ExceptionBadSyntax $e) { 408*04fd306cSNickeau LogUtility::error("Internal Error on xpath: {$e->getMessage()}"); 409*04fd306cSNickeau } 410*04fd306cSNickeau 411*04fd306cSNickeau try { 412*04fd306cSNickeau $nodes = $this->xpath("//@*[namespace-uri()='$namespaceUri']"); 413*04fd306cSNickeau foreach ($nodes as $node) { 414*04fd306cSNickeau /** @var DOMAttr $node */ 415*04fd306cSNickeau /** @var DOMElement $DOMNode */ 416*04fd306cSNickeau $DOMNode = $node->parentNode; 417*04fd306cSNickeau $DOMNode->removeAttributeNode($node); 418*04fd306cSNickeau } 419*04fd306cSNickeau } catch (ExceptionBadSyntax $e) { 420*04fd306cSNickeau LogUtility::error("Internal Error on xpath: {$e->getMessage()}"); 421*04fd306cSNickeau } 422*04fd306cSNickeau 423*04fd306cSNickeau 424*04fd306cSNickeau //Node namespace can be select only from the document 425*04fd306cSNickeau $xpath = new DOMXPath($this->getDomDocument()); 426*04fd306cSNickeau $DOMNodeList = $xpath->query("namespace::*", $this->getDomDocument()->ownerDocument); 427*04fd306cSNickeau foreach ($DOMNodeList as $node) { 428*04fd306cSNickeau $namespaceURI = $node->namespaceURI; 429*04fd306cSNickeau if ($namespaceURI == $namespaceUri) { 430*04fd306cSNickeau $parentNode = $node->parentNode; 431*04fd306cSNickeau $parentNode->removeAttributeNS($namespaceUri, $node->localName); 432*04fd306cSNickeau } 433*04fd306cSNickeau } 434*04fd306cSNickeau 435*04fd306cSNickeau 436*04fd306cSNickeau } 437*04fd306cSNickeau 438*04fd306cSNickeau public function getNamespaces(): array 439*04fd306cSNickeau { 440*04fd306cSNickeau /** 441*04fd306cSNickeau * We can't query with the library {@link XmlDocument::xpath()} function because 442*04fd306cSNickeau * we register in the xpath the namespace 443*04fd306cSNickeau */ 444*04fd306cSNickeau $xpath = new DOMXPath($this->getDomDocument()); 445*04fd306cSNickeau // `namespace::*` means selects all the namespace attribute of the context node 446*04fd306cSNickeau // namespace is an axes 447*04fd306cSNickeau // See https://www.w3.org/TR/1999/REC-xpath-19991116/#axes 448*04fd306cSNickeau // the namespace axis contains the namespace nodes of the context node; the axis will be empty unless the context node is an element 449*04fd306cSNickeau $DOMNodeList = $xpath->query('namespace::*', $this->getDomDocument()->ownerDocument); 450*04fd306cSNickeau $nameSpace = array(); 451*04fd306cSNickeau foreach ($DOMNodeList as $node) { 452*04fd306cSNickeau /** @var DOMElement $node */ 453*04fd306cSNickeau 454*04fd306cSNickeau $namespaceURI = $node->namespaceURI; 455*04fd306cSNickeau $localName = $node->prefix; 456*04fd306cSNickeau if ($namespaceURI != null) { 457*04fd306cSNickeau $nameSpace[$localName] = $namespaceURI; 458*04fd306cSNickeau } 459*04fd306cSNickeau } 460*04fd306cSNickeau return $nameSpace; 461*04fd306cSNickeau } 462*04fd306cSNickeau 463*04fd306cSNickeau /** 464*04fd306cSNickeau * A wrapper that register namespace for the query 465*04fd306cSNickeau * with the defined prefix 466*04fd306cSNickeau * See comment: 467*04fd306cSNickeau * https://www.php.net/manual/en/domxpath.registernamespace.php#51480 468*04fd306cSNickeau * @param $query 469*04fd306cSNickeau * @param DOMElement|null $contextNode 470*04fd306cSNickeau * @return DOMNodeList 471*04fd306cSNickeau * 472*04fd306cSNickeau * Note that this is possible to do evaluation to return a string instead 473*04fd306cSNickeau * https://www.php.net/manual/en/domxpath.evaluate.php 474*04fd306cSNickeau * @throws ExceptionBadSyntax - if the query is invalid 475*04fd306cSNickeau */ 476*04fd306cSNickeau public 477*04fd306cSNickeau function xpath($query, DOMElement $contextNode = null): DOMNodeList 478*04fd306cSNickeau { 479*04fd306cSNickeau if (!isset($this->domXpath)) { 480*04fd306cSNickeau 481*04fd306cSNickeau $this->domXpath = new DOMXPath($this->getDomDocument()); 482*04fd306cSNickeau 483*04fd306cSNickeau /** 484*04fd306cSNickeau * Prefix mapping 485*04fd306cSNickeau * It is necessary to use xpath to handle documents which have default namespaces. 486*04fd306cSNickeau * The xpath expression will search for items with no namespace by default. 487*04fd306cSNickeau */ 488*04fd306cSNickeau foreach ($this->getNamespaces() as $prefix => $namespaceUri) { 489*04fd306cSNickeau /** 490*04fd306cSNickeau * You can't register an empty prefix 491*04fd306cSNickeau * Default namespace (without a prefix) can only be accessed by the local-name() and namespace-uri() attributes. 492*04fd306cSNickeau */ 493*04fd306cSNickeau if (!empty($prefix)) { 494*04fd306cSNickeau $result = $this->domXpath->registerNamespace($prefix, $namespaceUri); 495*04fd306cSNickeau if (!$result) { 496*04fd306cSNickeau LogUtility::msg("Not able to register the prefix ($prefix) for the namespace uri ($namespaceUri)"); 497*04fd306cSNickeau } 498*04fd306cSNickeau } 499*04fd306cSNickeau } 500*04fd306cSNickeau } 501*04fd306cSNickeau 502*04fd306cSNickeau if ($contextNode === null) { 503*04fd306cSNickeau $contextNode = $this->domDocument; 504*04fd306cSNickeau } 505*04fd306cSNickeau $domList = $this->domXpath->query($query, $contextNode); 506*04fd306cSNickeau if ($domList === false) { 507*04fd306cSNickeau throw new ExceptionBadSyntax("The query expression ($query) may be malformed"); 508*04fd306cSNickeau } 509*04fd306cSNickeau return $domList; 510*04fd306cSNickeau 511*04fd306cSNickeau } 512*04fd306cSNickeau 513*04fd306cSNickeau 514*04fd306cSNickeau public 515*04fd306cSNickeau function removeRootAttribute($attribute) 516*04fd306cSNickeau { 517*04fd306cSNickeau 518*04fd306cSNickeau // This function does not work 519*04fd306cSNickeau // $result = $this->getXmlDom()->documentElement->removeAttribute($attribute); 520*04fd306cSNickeau 521*04fd306cSNickeau for ($i = 0; $i < $this->getDomDocument()->documentElement->attributes->length; $i++) { 522*04fd306cSNickeau if ($this->getDomDocument()->documentElement->attributes[$i]->name == $attribute) { 523*04fd306cSNickeau $result = $this->getDomDocument()->documentElement->removeAttributeNode($this->getDomDocument()->documentElement->attributes[$i]); 524*04fd306cSNickeau if ($result === false) { 525*04fd306cSNickeau throw new \RuntimeException("Not able to delete the $attribute"); 526*04fd306cSNickeau } 527*04fd306cSNickeau // There is no break here because you may find multiple version attribute for instance 528*04fd306cSNickeau } 529*04fd306cSNickeau } 530*04fd306cSNickeau 531*04fd306cSNickeau } 532*04fd306cSNickeau 533*04fd306cSNickeau public 534*04fd306cSNickeau function removeRootChildNode($nodeName) 535*04fd306cSNickeau { 536*04fd306cSNickeau for ($i = 0; $i < $this->getDomDocument()->documentElement->childNodes->length; $i++) { 537*04fd306cSNickeau $childNode = &$this->getDomDocument()->documentElement->childNodes[$i]; 538*04fd306cSNickeau if ($childNode->nodeName == $nodeName) { 539*04fd306cSNickeau $result = $this->getDomDocument()->documentElement->removeChild($childNode); 540*04fd306cSNickeau if ($result == false) { 541*04fd306cSNickeau throw new \RuntimeException("Not able to delete the child node $nodeName"); 542*04fd306cSNickeau } 543*04fd306cSNickeau break; 544*04fd306cSNickeau } 545*04fd306cSNickeau } 546*04fd306cSNickeau } 547*04fd306cSNickeau 548*04fd306cSNickeau /** 549*04fd306cSNickeau * 550*04fd306cSNickeau * Add a value to an attribute value 551*04fd306cSNickeau * Example 552*04fd306cSNickeau * <a class="actual"> 553*04fd306cSNickeau * 554*04fd306cSNickeau * if you add "new" 555*04fd306cSNickeau * <a class="actual new"> 556*04fd306cSNickeau * 557*04fd306cSNickeau * @param $attName 558*04fd306cSNickeau * @param $attValue 559*04fd306cSNickeau * @param DOMElement $xml 560*04fd306cSNickeau */ 561*04fd306cSNickeau public 562*04fd306cSNickeau function addAttributeValue($attName, $attValue, $xml) 563*04fd306cSNickeau { 564*04fd306cSNickeau 565*04fd306cSNickeau /** 566*04fd306cSNickeau * Empty condition is better than {@link DOMElement::hasAttribute()} 567*04fd306cSNickeau * because even if the dom element has the attribute, the value 568*04fd306cSNickeau * may be empty 569*04fd306cSNickeau */ 570*04fd306cSNickeau $value = $xml->getAttribute($attName); 571*04fd306cSNickeau if (empty($value)) { 572*04fd306cSNickeau $xml->setAttribute($attName, $attValue); 573*04fd306cSNickeau } else { 574*04fd306cSNickeau $actualAttValue = $xml->getAttribute($attName); 575*04fd306cSNickeau $explodeArray = explode(" ", $actualAttValue); 576*04fd306cSNickeau if (!in_array($attValue, $explodeArray)) { 577*04fd306cSNickeau $xml->setAttribute($attName, (string)$actualAttValue . " $attValue"); 578*04fd306cSNickeau } 579*04fd306cSNickeau } 580*04fd306cSNickeau 581*04fd306cSNickeau } 582*04fd306cSNickeau 583*04fd306cSNickeau public function diff(XmlDocument $rightDocument): string 584*04fd306cSNickeau { 585*04fd306cSNickeau $error = ""; 586*04fd306cSNickeau XmlSystems::diffNode($this->getDomDocument(), $rightDocument->getDomDocument(), $error); 587*04fd306cSNickeau return $error; 588*04fd306cSNickeau } 589*04fd306cSNickeau 590*04fd306cSNickeau /** 591*04fd306cSNickeau * @return string a XML formatted 592*04fd306cSNickeau * 593*04fd306cSNickeau * !!!! The parameter preserveWhiteSpace should have been set to false before loading 594*04fd306cSNickeau * https://www.php.net/manual/en/class.domdocument.php#domdocument.props.formatoutput 595*04fd306cSNickeau * $this->xmlDom->preserveWhiteSpace = false; 596*04fd306cSNickeau * 597*04fd306cSNickeau * We do it with the function {@link XmlDocument::mandatoryFormatConfigBeforeLoading()} 598*04fd306cSNickeau * 599*04fd306cSNickeau */ 600*04fd306cSNickeau public function toXmlFormatted(DOMElement $element = null): string 601*04fd306cSNickeau { 602*04fd306cSNickeau 603*04fd306cSNickeau $this->domDocument->formatOutput = true; 604*04fd306cSNickeau return $this->toXml($element); 605*04fd306cSNickeau 606*04fd306cSNickeau } 607*04fd306cSNickeau 608*04fd306cSNickeau /** 609*04fd306cSNickeau * @return string that can be diff 610*04fd306cSNickeau * * EOL diff are not seen 611*04fd306cSNickeau * * space are 612*04fd306cSNickeau * 613*04fd306cSNickeau * See also {@link XmlDocument::processTextBeforeLoading()} 614*04fd306cSNickeau * that is needed before loading 615*04fd306cSNickeau */ 616*04fd306cSNickeau public function toXmlNormalized(DOMElement $element = null): string 617*04fd306cSNickeau { 618*04fd306cSNickeau 619*04fd306cSNickeau /** 620*04fd306cSNickeau * If the text was a list 621*04fd306cSNickeau * of sibling text without parent 622*04fd306cSNickeau * We may get a body 623*04fd306cSNickeau * @deprecated letting the code until 624*04fd306cSNickeau * TODO: delete this code when the test pass 625*04fd306cSNickeau */ 626*04fd306cSNickeau// $body = $doc->getElementsByTagName("body"); 627*04fd306cSNickeau// if ($body->length != 0) { 628*04fd306cSNickeau// $DOMNodeList = $body->item(0)->childNodes; 629*04fd306cSNickeau// $output = ""; 630*04fd306cSNickeau// foreach ($DOMNodeList as $value) { 631*04fd306cSNickeau// $output .= $doc->saveXML($value) . DOKU_LF; 632*04fd306cSNickeau// } 633*04fd306cSNickeau// } 634*04fd306cSNickeau 635*04fd306cSNickeau if ($element == null) { 636*04fd306cSNickeau $element = $this->domDocument->documentElement; 637*04fd306cSNickeau } 638*04fd306cSNickeau $element->normalize(); 639*04fd306cSNickeau return $this->toXmlFormatted($element); 640*04fd306cSNickeau } 641*04fd306cSNickeau 642*04fd306cSNickeau /** 643*04fd306cSNickeau * Not really conventional but 644*04fd306cSNickeau * to be able to {@link toXmlNormalized} 645*04fd306cSNickeau * the EOL should be deleted 646*04fd306cSNickeau * We do it before loading and not with a XML documentation 647*04fd306cSNickeau */ 648*04fd306cSNickeau private function processTextBeforeLoading($text) 649*04fd306cSNickeau { 650*04fd306cSNickeau $text = str_replace(DOKU_LF, "", $text); 651*04fd306cSNickeau $text = preg_replace("/\r\n\s*\r\n/", "\r\n", $text); 652*04fd306cSNickeau $text = preg_replace("/\n\s*\n/", "\n", $text); 653*04fd306cSNickeau $text = preg_replace("/\n\n/", "\n", $text); 654*04fd306cSNickeau return $text; 655*04fd306cSNickeau 656*04fd306cSNickeau } 657*04fd306cSNickeau 658*04fd306cSNickeau 659*04fd306cSNickeau /** 660*04fd306cSNickeau * This function is called just before loading 661*04fd306cSNickeau * in order to be able to {@link XmlDocument::toXmlFormatted() format the output } 662*04fd306cSNickeau * https://www.php.net/manual/en/class.domdocument.php#domdocument.props.formatoutput 663*04fd306cSNickeau * Mandatory for a a good formatting before loading 664*04fd306cSNickeau * 665*04fd306cSNickeau */ 666*04fd306cSNickeau private function mandatoryFormatConfigBeforeLoading() 667*04fd306cSNickeau { 668*04fd306cSNickeau // not that 669*04fd306cSNickeau // the loading option: LIBXML_NOBLANKS 670*04fd306cSNickeau // is equivalent to $this->xmlDom->preserveWhiteSpace = true; 671*04fd306cSNickeau $this->domDocument->preserveWhiteSpace = false; 672*04fd306cSNickeau } 673*04fd306cSNickeau 674*04fd306cSNickeau /** 675*04fd306cSNickeau * @param string $attributeName 676*04fd306cSNickeau * @param DOMElement $nodeElement 677*04fd306cSNickeau * @return void 678*04fd306cSNickeau * @deprecated use the {@link XmlElement::removeAttribute()} if possible 679*04fd306cSNickeau */ 680*04fd306cSNickeau public function removeAttributeValue(string $attributeName, DOMElement $nodeElement) 681*04fd306cSNickeau { 682*04fd306cSNickeau $attr = $nodeElement->getAttributeNode($attributeName); 683*04fd306cSNickeau if (!$attr) { 684*04fd306cSNickeau return; 685*04fd306cSNickeau } 686*04fd306cSNickeau $result = $nodeElement->removeAttributeNode($attr); 687*04fd306cSNickeau if ($result === false) { 688*04fd306cSNickeau LogUtility::msg("Not able to delete the attribute $attributeName of the node element $nodeElement->tagName in the Xml document"); 689*04fd306cSNickeau } 690*04fd306cSNickeau } 691*04fd306cSNickeau 692*04fd306cSNickeau 693*04fd306cSNickeau /** 694*04fd306cSNickeau * Query via a CSS selector 695*04fd306cSNickeau * (not that it will not work with other namespace than the default one, ie xmlns will not work) 696*04fd306cSNickeau * @throws ExceptionBadSyntax - if the selector is not valid 697*04fd306cSNickeau * @throws ExceptionNotFound - if the selector selects nothing 698*04fd306cSNickeau */ 699*04fd306cSNickeau public function querySelector(string $selector): XmlElement 700*04fd306cSNickeau { 701*04fd306cSNickeau $domNodeList = $this->querySelectorAll($selector); 702*04fd306cSNickeau if (sizeof($domNodeList) >= 1) { 703*04fd306cSNickeau return $domNodeList[0]; 704*04fd306cSNickeau } 705*04fd306cSNickeau throw new ExceptionNotFound("No element was found with the selector $selector"); 706*04fd306cSNickeau 707*04fd306cSNickeau } 708*04fd306cSNickeau 709*04fd306cSNickeau /** 710*04fd306cSNickeau * @return XmlElement[] 711*04fd306cSNickeau * @throws ExceptionBadSyntax 712*04fd306cSNickeau */ 713*04fd306cSNickeau public function querySelectorAll(string $selector): array 714*04fd306cSNickeau { 715*04fd306cSNickeau $xpath = $this->cssSelectorToXpath($selector); 716*04fd306cSNickeau $domNodeList = $this->xpath($xpath); 717*04fd306cSNickeau $domNodes = []; 718*04fd306cSNickeau foreach ($domNodeList as $domNode) { 719*04fd306cSNickeau if ($domNode instanceof DOMElement) { 720*04fd306cSNickeau $domNodes[] = new XmlElement($domNode, $this); 721*04fd306cSNickeau } 722*04fd306cSNickeau } 723*04fd306cSNickeau return $domNodes; 724*04fd306cSNickeau 725*04fd306cSNickeau } 726*04fd306cSNickeau 727*04fd306cSNickeau /** 728*04fd306cSNickeau * @throws ExceptionBadSyntax 729*04fd306cSNickeau */ 730*04fd306cSNickeau public function cssSelectorToXpath(string $selector): string 731*04fd306cSNickeau { 732*04fd306cSNickeau try { 733*04fd306cSNickeau return PhpCss::toXpath($selector); 734*04fd306cSNickeau } catch (PhpCss\Exception\ParserException $e) { 735*04fd306cSNickeau throw new ExceptionBadSyntax("The selector ($selector) is not valid. Error: {$e->getMessage()}"); 736*04fd306cSNickeau } 737*04fd306cSNickeau } 738*04fd306cSNickeau 739*04fd306cSNickeau /** 740*04fd306cSNickeau * An utility function to know how to remove a node 741*04fd306cSNickeau * @param \DOMNode $nodeElement 742*04fd306cSNickeau * @deprecated use {@link XmlElement::remove} instead 743*04fd306cSNickeau */ 744*04fd306cSNickeau public function removeNode(\DOMNode $nodeElement) 745*04fd306cSNickeau { 746*04fd306cSNickeau 747*04fd306cSNickeau $nodeElement->parentNode->removeChild($nodeElement); 748*04fd306cSNickeau 749*04fd306cSNickeau } 750*04fd306cSNickeau 751*04fd306cSNickeau public function getElement(): XmlElement 752*04fd306cSNickeau { 753*04fd306cSNickeau return XmlElement::create($this->getDomDocument()->documentElement, $this); 754*04fd306cSNickeau } 755*04fd306cSNickeau 756*04fd306cSNickeau public function toHtml() 757*04fd306cSNickeau { 758*04fd306cSNickeau return $this->domDocument->saveHTML(); 759*04fd306cSNickeau } 760*04fd306cSNickeau 761*04fd306cSNickeau /** 762*04fd306cSNickeau * @throws \DOMException - if invalid local name 763*04fd306cSNickeau */ 764*04fd306cSNickeau public function createElement(string $localName): XmlElement 765*04fd306cSNickeau { 766*04fd306cSNickeau $element = $this->domDocument->createElement($localName); 767*04fd306cSNickeau return XmlElement::create($element, $this); 768*04fd306cSNickeau } 769*04fd306cSNickeau 770*04fd306cSNickeau /** 771*04fd306cSNickeau * @throws ExceptionBadSyntax 772*04fd306cSNickeau * @throws ExceptionBadState 773*04fd306cSNickeau */ 774*04fd306cSNickeau public function xpathFirstDomElement(string $xpath): DOMElement 775*04fd306cSNickeau { 776*04fd306cSNickeau $domList = $this->xpath($xpath); 777*04fd306cSNickeau $domElement = $domList->item(0); 778*04fd306cSNickeau if ($domElement instanceof DOMElement) { 779*04fd306cSNickeau return $domElement; 780*04fd306cSNickeau } else { 781*04fd306cSNickeau throw new ExceptionBadState("The first DOM node is not a DOM element"); 782*04fd306cSNickeau } 783*04fd306cSNickeau } 784*04fd306cSNickeau 785*04fd306cSNickeau 786*04fd306cSNickeau} 787