1<?php 2 3namespace ComboStrap; 4 5use ComboStrap\Web\Url; 6use ComboStrap\Web\UrlEndpoint; 7use syntax_plugin_combo_variable; 8 9/** 10 * 11 * Basically, a class that parse a link/media markup reference and returns an URL. 12 * 13 * Detailed, the class parses the reference: 14 * * from a {@link MarkupRef::createMediaFromRef() media markup} 15 * * or {@link MarkupRef::createLinkFromRef() link markup} 16 * and returns an {@link MarkupRef::getUrl() URL}, 17 * 18 * You may determine the {@link MarkupRef::getSchemeType() type of reference} 19 * 20 * For a {@link MarkupRef::WIKI_URI}, the URL returned is: 21 * * a {@link UrlEndpoint::createFetchUrl() fetch url} for a media 22 * * a {@link UrlEndpoint::createDokuUrl() doku url} for a link (ie page) 23 * 24 * If this is a {@link MarkupRef::INTERWIKI_URI}, you may also get the {@link MarkupRef::getInterWiki() interwiki instance} 25 * If this is a {@link MarkupRef::WIKI_URI}, you may also get the {@link MarkupRef::getPath() path} 26 * 27 * 28 * Why ? 29 * The parsing function {@link Doku_Handler_Parse_Media} has some flow / problem 30 * * It keeps the anchor only if there is no query string 31 * * It takes the first digit as the width (ie media.pdf?page=31 would have a width of 31) 32 * * `src` is not only the media path but may have a anchor 33 * * ... 34 * 35 */ 36class MarkupRef 37{ 38 public const WINDOWS_SHARE_URI = 'windowsShare'; 39 public const LOCAL_URI = 'local'; 40 public const EMAIL_URI = 'email'; 41 public const WEB_URI = 'external'; 42 43 /** 44 * Type of Ref 45 */ 46 public const INTERWIKI_URI = 'interwiki'; 47 public const WIKI_URI = 'internal'; 48 public const VARIABLE_URI = 'internal_template'; 49 public const REF_ATTRIBUTE = "ref"; 50 51 52 /** 53 * The type of markup ref (ie media or link) 54 */ 55 private string $type; 56 const MEDIA_TYPE = "media"; 57 const LINK_TYPE = "link"; 58 59 private string $refScheme; 60 public const EXTERNAL_MEDIA_CALL_NAME = "external"; 61 62 63 private string $ref; 64 private ?Url $url = null; 65 66 private ?Path $path = null; 67 private ?InterWiki $interWiki = null; 68 69 70 /** 71 * @throws ExceptionBadSyntax 72 * @throws ExceptionBadArgument 73 * @throws ExceptionNotFound 74 */ 75 public function __construct($ref, $type) 76 { 77 $this->ref = $ref; 78 $this->type = $type; 79 80 $this->url = Url::createEmpty(); 81 82 $ref = trim($ref); 83 84 /** 85 * Email validation pattern 86 * E-Mail (pattern below is defined in inc/mail.php) 87 * 88 * Example: 89 * [[support@combostrap.com?subject=hallo world]] 90 * [[support@combostrap.com]] 91 */ 92 $emailRfc2822 = "0-9a-zA-Z!#$%&'*+/=?^_`{|}~-"; 93 $emailPattern = '[' . $emailRfc2822 . ']+(?:\.[' . $emailRfc2822 . ']+)*@(?i:[0-9a-z][0-9a-z-]*\.)+(?i:[a-z]{2,63})'; 94 if (preg_match('<' . $emailPattern . '>', $ref)) { 95 $this->refScheme = self::EMAIL_URI; 96 $position = strpos($ref, "?"); 97 98 if ($position !== false) { 99 $email = substr($ref, 0, $position); 100 $queryStringAndFragment = substr($ref, $position + 1); 101 $this->url = Url::createFromString("mailto:$email"); 102 $this->parseAndAddQueryStringAndFragment($queryStringAndFragment); 103 } else { 104 $this->url = Url::createFromString("mailto:$ref"); 105 } 106 return; 107 } 108 109 /** 110 * Case when the URL is just a full conform URL 111 * 112 * Example: `https://` or `ftp://` 113 * 114 * Other scheme are not yet recognized 115 * because it can also be a wiki id 116 * For instance, `mailto:` is also a valid page 117 * 118 * same as {@link media_isexternal()} check only http / ftp scheme 119 */ 120 if (preg_match('#^([a-z0-9\-.+]+?)://#i', $ref)) { 121 try { 122 $this->url = Url::createFromString($ref); 123 $this->refScheme = self::WEB_URI; 124 125 /** 126 * Authorized scheme only (to not inject code ?) 127 */ 128 $authorizedSchemes = self::loadAndGetAuthorizedSchemes(); 129 if (!in_array($this->url->getScheme(), $authorizedSchemes)) { 130 throw new ExceptionBadSyntax("The scheme ({$this->url->getScheme()}) of the URL ({$this->url}) is not authorized"); 131 } 132 try { 133 $isImage = FileSystems::getMime($this->url)->isImage(); 134 } catch (ExceptionNotFound $e) { 135 $isImage = false; 136 } 137 if ($isImage) { 138 $properties = $this->url->getQueryProperties(); 139 if (count($properties) >= 1) { 140 try { 141 /** 142 * The first parameter is the `Width X Height` 143 */ 144 $widthAndHeight = array_key_first($properties); 145 $xPosition = strpos($widthAndHeight, "x"); 146 if ($xPosition !== false) { 147 $width = DataType::toInteger(substr($widthAndHeight, 0, $xPosition)); 148 if ($width !== 0) { 149 $this->url->addQueryParameter(Dimension::WIDTH_KEY, $width); 150 } 151 $height = DataType::toInteger(substr($widthAndHeight, $xPosition + 1)); 152 $this->url->addQueryParameter(Dimension::HEIGHT_KEY, $height); 153 } else { 154 $width = DataType::toInteger($widthAndHeight); 155 $this->url->addQueryParameter(Dimension::WIDTH_KEY, $width); 156 } 157 $this->url->deleteQueryParameter($widthAndHeight); 158 if ($this->url->hasProperty(MediaMarkup::LINKING_NOLINK_VALUE)) { 159 $this->url->addQueryParameter(MediaMarkup::LINKING_KEY, MediaMarkup::LINKING_NOLINK_VALUE); 160 $this->url->deleteQueryParameter(MediaMarkup::LINKING_NOLINK_VALUE); 161 } 162 } catch (ExceptionBadArgument $e) { 163 // not a number/integer 164 } 165 } 166 } 167 return; 168 } catch (ExceptionBadSyntax $e) { 169 throw new ExceptionBadSyntax("The url string was not validated as an URL ($ref). Error: {$e->getMessage()}"); 170 } 171 } 172 173 /** 174 * Windows share link 175 */ 176 if (preg_match('/^\\\\\\\\[^\\\\]+?\\\\/u', $ref)) { 177 $this->refScheme = self::WINDOWS_SHARE_URI; 178 $this->url = LocalPath::createFromPathString($ref)->getUrl(); 179 return; 180 } 181 182 /** 183 * Only Fragment (also known as local link) 184 */ 185 if (preg_match('/^#.?/', $ref)) { 186 $this->refScheme = self::LOCAL_URI; 187 188 $fragment = substr($ref, 1); 189 if ($fragment !== "") { 190 $fragment = OutlineSection::textToHtmlSectionId($fragment); 191 } 192 $this->url = Url::createEmpty()->setFragment($fragment); 193 $this->path = WikiPath::createRequestedPagePathFromRequest(); 194 return; 195 } 196 197 /** 198 * Interwiki ? 199 */ 200 if (preg_match('/^[a-zA-Z0-9.]+>/u', $ref)) { 201 202 $this->refScheme = MarkupRef::INTERWIKI_URI; 203 switch ($type) { 204 case self::MEDIA_TYPE: 205 $this->interWiki = InterWiki::createMediaInterWikiFromString($ref); 206 break; 207 case self::LINK_TYPE: 208 $this->interWiki = InterWiki::createLinkInterWikiFromString($ref); 209 break; 210 default: 211 LogUtility::internalError("The type ($type) is unknown, returning a interwiki link ref"); 212 $this->interWiki = InterWiki::createLinkInterWikiFromString($ref); 213 break; 214 } 215 $this->url = $this->interWiki->toUrl(); 216 return; 217 218 } 219 220 221 /** 222 * It can be a link with a ref template 223 */ 224 if (syntax_plugin_combo_variable::isVariable($ref)) { 225 $this->refScheme = MarkupRef::VARIABLE_URI; 226 return; 227 } 228 229 /** 230 * Doku Path 231 * We parse it 232 */ 233 $this->refScheme = MarkupRef::WIKI_URI; 234 235 $questionMarkPosition = strpos($ref, "?"); 236 $wikiPath = $ref; 237 $fragment = null; 238 $queryStringAndAnchorOriginal = null; 239 if ($questionMarkPosition !== false) { 240 $wikiPath = substr($ref, 0, $questionMarkPosition); 241 $queryStringAndAnchorOriginal = substr($ref, $questionMarkPosition + 1); 242 } else { 243 // We may have only an anchor 244 $hashTagPosition = strpos($ref, "#"); 245 if ($hashTagPosition !== false) { 246 $wikiPath = substr($ref, 0, $hashTagPosition); 247 $fragment = substr($ref, $hashTagPosition + 1); 248 } 249 } 250 251 /** 252 * 253 * Clean it 254 */ 255 $wikiPath = $this->normalizePath($wikiPath); 256 257 /** 258 * The URL 259 * The path is created at the end because it may have a revision 260 */ 261 switch ($type) { 262 case self::MEDIA_TYPE: 263 $this->url = UrlEndpoint::createFetchUrl(); 264 break; 265 case self::LINK_TYPE: 266 $this->url = UrlEndpoint::createDokuUrl(); 267 break; 268 default: 269 throw new ExceptionBadArgument("The ref type ($type) is unknown"); 270 } 271 272 273 /** 274 * Parsing Query string if any 275 */ 276 if ($queryStringAndAnchorOriginal !== null) { 277 278 $this->parseAndAddQueryStringAndFragment($queryStringAndAnchorOriginal); 279 280 } 281 282 /** 283 * The path 284 */ 285 try { 286 $rev = $this->url->getQueryPropertyValue(WikiPath::REV_ATTRIBUTE); 287 } catch (ExceptionNotFound $e) { 288 $rev = null; 289 } 290 /** 291 * The wiki path may be relative 292 */ 293 switch ($type) { 294 case self::MEDIA_TYPE: 295 $this->path = WikiPath::createMediaPathFromId($wikiPath, $rev); 296 $this->url->addQueryParameter(MediaMarkup::$MEDIA_QUERY_PARAMETER, $this->path->getWikiId()); 297 $this->addRevToUrl($rev); 298 299 if ($fragment !== null) { 300 $this->url->setFragment($fragment); 301 } 302 303 break; 304 case self::LINK_TYPE: 305 306 /** 307 * The path may be an id if it exists 308 * otherwise it's a relative path 309 * MarkupPath is important because a link to 310 * a namespace (ie wikiPath = `ns:`) 311 * should become `ns:start`) 312 */ 313 $markupPath = MarkupPath::createMarkupFromStringPath($wikiPath); 314 if (!FileSystems::exists($markupPath) && $wikiPath !== "") { 315 // We test for an empty wikiPath string 316 // because if the wiki path is the empty string, 317 // this is the current requested page 318 // An empty id is the root and always exists 319 $idPath = MarkupPath::createMarkupFromId($wikiPath); 320 if (FileSystems::exists($idPath)) { 321 $markupPath = $idPath; 322 } 323 } 324 325 /** 326 * The path may be a namespace, in the page system 327 * the path should then be the index page 328 */ 329 try { 330 $this->path = $markupPath->getPathObject()->toWikiPath(); 331 } catch (ExceptionCompile $e) { 332 throw new ExceptionRuntimeInternal("Path should be a wiki path"); 333 } 334 $this->url->addQueryParameter(DokuwikiId::DOKUWIKI_ID_ATTRIBUTE, $this->path->getWikiId()); 335 $this->addRevToUrl($rev); 336 337 if ($fragment !== null) { 338 $fragment = OutlineSection::textToHtmlSectionId($fragment); 339 $this->url->setFragment($fragment); 340 } 341 342 break; 343 default: 344 throw new ExceptionBadArgument("The ref type ($type) is unknown"); 345 } 346 347 } 348 349 /** 350 * @throws ExceptionBadArgument 351 * @throws ExceptionBadSyntax 352 * @throws ExceptionNotFound 353 */ 354 public 355 static function createMediaFromRef($refProcessing): MarkupRef 356 { 357 return new MarkupRef($refProcessing, self::MEDIA_TYPE); 358 } 359 360 /** 361 * @throws ExceptionBadSyntax 362 * @throws ExceptionBadArgument 363 * @throws ExceptionNotFound 364 */ 365 public 366 static function createLinkFromRef($refProcessing): MarkupRef 367 { 368 return new MarkupRef($refProcessing, self::LINK_TYPE); 369 } 370 371 // https://www.dokuwiki.org/urlschemes 372 private static function loadAndGetAuthorizedSchemes(): array 373 { 374 375 return ExecutionContext::getActualOrCreateFromEnv() 376 ->getConfig() 377 ->getAuthorizedUrlSchemes(); 378 379 380 } 381 382 /** 383 * In case of manual entry, the function will normalize the path 384 * @param string $wikiPath - a path entered by a user 385 * @return string 386 */ 387 public function normalizePath(string $wikiPath): string 388 { 389 if ($wikiPath === "") { 390 return $wikiPath; 391 } 392 // slash to double point 393 $wikiPath = str_replace(WikiPath::NAMESPACE_SEPARATOR_SLASH, WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $wikiPath); 394 395 $isNamespacePath = false; 396 if ($wikiPath[strlen($wikiPath) - 1] === WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT) { 397 $isNamespacePath = true; 398 } 399 $isPath = false; 400 if ($wikiPath[0] === WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT) { 401 $isPath = true; 402 } 403 $pathType = "unknown"; 404 if ($wikiPath[0] === WikiPath::CURRENT_PATH_CHARACTER) { 405 $pathType = "current"; 406 if (isset($wikiPath[1])) { 407 if ($wikiPath[1] === WikiPath::CURRENT_PATH_CHARACTER) { 408 $pathType = "parent"; 409 } 410 } 411 } 412 /** 413 * Dokuwiki Compliance 414 */ 415 $cleanPath = cleanID($wikiPath); 416 if ($isNamespacePath) { 417 $cleanPath = "$cleanPath:"; 418 } 419 switch ($pathType) { 420 case "current": 421 if (!$isNamespacePath) { 422 $cleanPath = WikiPath::CURRENT_PATH_CHARACTER . WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT . $cleanPath; 423 } else { 424 $cleanPath = WikiPath::CURRENT_PATH_CHARACTER . $cleanPath; 425 } 426 break; 427 case "parent": 428 if (!$isNamespacePath) { 429 $cleanPath = WikiPath::CURRENT_PARENT_PATH_CHARACTER . WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT . $cleanPath; 430 } else { 431 $cleanPath = WikiPath::CURRENT_PARENT_PATH_CHARACTER . $cleanPath; 432 } 433 break; 434 } 435 if ($isPath) { 436 $cleanPath = ":$cleanPath"; 437 } 438 return $cleanPath; 439 } 440 441 442 public 443 function getUrl(): Url 444 { 445 return $this->url; 446 } 447 448 /** 449 * @throws ExceptionNotFound 450 */ 451 public 452 function getPath(): WikiPath 453 { 454 if ($this->path === null) { 455 throw new ExceptionNotFound("No path was found"); 456 } 457 return $this->path; 458 } 459 460 public 461 function getRef(): string 462 { 463 return $this->ref; 464 } 465 466 public function getSchemeType(): string 467 { 468 return $this->refScheme; 469 } 470 471 /** 472 * @throws ExceptionNotFound 473 */ 474 public function getInterWiki(): InterWiki 475 { 476 if ($this->interWiki === null) { 477 throw new ExceptionNotFound("This ref ($this->ref) is not an interWiki."); 478 } 479 return $this->interWiki; 480 } 481 482 private function addRevToUrl($rev = null): void 483 { 484 if ($rev !== null) { 485 $this->url->addQueryParameter(WikiPath::REV_ATTRIBUTE, $rev); 486 } 487 } 488 489 490 public function getType(): string 491 { 492 return $this->type; 493 } 494 495 /** 496 * A query parameters value may have a # for the definition of a color 497 * This process takes it into account 498 * @param string $queryStringAndFragment 499 * @return void 500 */ 501 private function parseAndAddQueryStringAndFragment(string $queryStringAndFragment) 502 { 503 /** 504 * The value $queryStringAndAnchorOriginal 505 * is kept to create the original queryString 506 * at the end if we found an anchor 507 * 508 * We parse token by token because we allow a hashtag for a hex color 509 */ 510 $queryStringAndAnchorProcessing = $queryStringAndFragment; 511 while (strlen($queryStringAndAnchorProcessing) > 0) { 512 513 /** 514 * Capture the token 515 * and reduce the text 516 */ 517 $questionMarkPos = strpos($queryStringAndAnchorProcessing, "&"); 518 if ($questionMarkPos !== false) { 519 $token = substr($queryStringAndAnchorProcessing, 0, $questionMarkPos); 520 $queryStringAndAnchorProcessing = substr($queryStringAndAnchorProcessing, $questionMarkPos + 1); 521 } else { 522 $token = $queryStringAndAnchorProcessing; 523 $queryStringAndAnchorProcessing = ""; 524 } 525 526 527 /** 528 * Sizing (wxh) 529 */ 530 $sizing = []; 531 if (preg_match('/^([0-9]+)(?:x([0-9]+))?/', $token, $sizing)) { 532 $this->url->addQueryParameter(Dimension::WIDTH_KEY, $sizing[1]); 533 if (isset($sizing[2])) { 534 $this->url->addQueryParameter(Dimension::HEIGHT_KEY, $sizing[2]); 535 } 536 $token = substr($token, strlen($sizing[0])); 537 if ($token === "") { 538 // no anchor behind we continue 539 continue; 540 } 541 } 542 543 /** 544 * Linking 545 */ 546 $found = preg_match('/^(nolink|direct|linkonly|details)/i', $token, $matches); 547 if ($found) { 548 $linkingValue = $matches[1]; 549 $this->url->addQueryParameter(MediaMarkup::LINKING_KEY, $linkingValue); 550 $token = substr($token, strlen($linkingValue)); 551 if ($token == "") { 552 // no anchor behind we continue 553 continue; 554 } 555 } 556 557 /** 558 * Cache 559 */ 560 $noCacheValue = IFetcherAbs::NOCACHE_VALUE; 561 $found = preg_match('/^(' . $noCacheValue . ')/i', $token, $matches); 562 if ($found) { 563 $this->url->addQueryParameter(IFetcherAbs::CACHE_KEY, $noCacheValue); 564 $token = substr($token, strlen($noCacheValue)); 565 if ($token == "") { 566 // no anchor behind we continue 567 continue; 568 } 569 } 570 571 /** 572 * Anchor value after a single token case 573 */ 574 if (strpos($token, '#') === 0) { 575 $this->url->setFragment(substr($token, 1)); 576 continue; 577 } 578 579 /** 580 * Key, value 581 * explode to the first `=` 582 * in the anchor value, we can have one 583 * 584 * Ex with media.pdf#page=31 585 */ 586 $tokens = explode("=", $token, 2); 587 $key = $tokens[0]; 588 if (count($tokens) == 2) { 589 $value = $tokens[1]; 590 } else { 591 $value = null; 592 } 593 594 /** 595 * Case of an anchor after a boolean attribute (ie without =) 596 * at the end 597 */ 598 $anchorPosition = strpos($key, '#'); 599 if ($anchorPosition !== false) { 600 $this->url->setFragment(substr($key, $anchorPosition + 1)); 601 $key = substr($key, 0, $anchorPosition); 602 } 603 604 /** 605 * Test Anchor on the value 606 */ 607 if ($value !== null) { 608 if (($countHashTag = substr_count($value, "#")) >= 3) { 609 LogUtility::msg("The value ($value) of the key ($key) for the link ($this) has $countHashTag `#` characters and the maximum supported is 2.", LogUtility::LVL_MSG_ERROR); 610 continue; 611 } 612 } else { 613 /** 614 * Boolean attribute 615 * (null does not make it) 616 */ 617 $value = null; 618 } 619 620 $anchorPosition = false; 621 $lowerCaseKey = strtolower($key); 622 if ($lowerCaseKey === TextColor::CSS_ATTRIBUTE) { 623 /** 624 * Special case when color has one color value as hexadecimal # 625 * and the hashtag 626 */ 627 if (strpos($value, '#') == 0) { 628 if (substr_count($value, "#") >= 2) { 629 630 /** 631 * The last one 632 */ 633 $anchorPosition = strrpos($value, '#'); 634 } 635 // no anchor then 636 } else { 637 // a color that is not hexadecimal can have an anchor 638 $anchorPosition = strpos($value, "#"); 639 } 640 } else { 641 // general case 642 $anchorPosition = strpos($value, "#"); 643 } 644 if ($anchorPosition !== false) { 645 $this->url->setFragment(substr($value, $anchorPosition + 1)); 646 $value = substr($value, 0, $anchorPosition); 647 } 648 649 switch ($lowerCaseKey) { 650 case Dimension::WIDTH_KEY_SHORT: // used in a link w=xxx 651 $this->url->addQueryParameter(Dimension::WIDTH_KEY, $value); 652 break; 653 case Dimension::HEIGHT_KEY_SHORT: // used in a link h=xxxx 654 $this->url->addQueryParameter(Dimension::HEIGHT_KEY, $value); 655 break; 656 default: 657 $this->url->addQueryParameter($key, $value); 658 break; 659 } 660 661 } 662 663 } 664 665 public function __toString() 666 { 667 return $this->getRef(); 668 } 669 670 671} 672