1 <?php
2 
3 namespace ComboStrap;
4 
5 use ComboStrap\Web\Url;
6 use ComboStrap\Web\UrlEndpoint;
7 use syntax_plugin_combo_variable;
8 
9 /**
10  *
11  * Basically, a class that parse a link/media markup reference and returns an URL.
12  *
13  * Detailed, the class parses the reference:
14  *   * from a {@link MarkupRef::createMediaFromRef() media markup}
15  *   * or {@link MarkupRef::createLinkFromRef() link markup}
16  * and returns an {@link MarkupRef::getUrl() URL},
17  *
18  * You may determine the {@link MarkupRef::getSchemeType() type of reference}
19  *
20  * For a {@link MarkupRef::WIKI_URI}, the URL returned is:
21  *   * a {@link UrlEndpoint::createFetchUrl() fetch url} for a media
22  *   * a {@link UrlEndpoint::createDokuUrl() doku url} for a link (ie page)
23  *
24  * If this is a {@link MarkupRef::INTERWIKI_URI}, you may also get the {@link MarkupRef::getInterWiki() interwiki instance}
25  * If this is a {@link MarkupRef::WIKI_URI}, you may also get the {@link MarkupRef::getPath() path}
26  *
27  *
28  * Why ?
29  * The parsing function {@link Doku_Handler_Parse_Media} has some flow / problem
30  *    * It keeps the anchor only if there is no query string
31  *    * It takes the first digit as the width (ie media.pdf?page=31 would have a width of 31)
32  *    * `src` is not only the media path but may have a anchor
33  *    * ...
34  *
35  */
36 class MarkupRef
37 {
38     public const WINDOWS_SHARE_URI = 'windowsShare';
39     public const LOCAL_URI = 'local';
40     public const EMAIL_URI = 'email';
41     public const WEB_URI = 'external';
42 
43     /**
44      * Type of Ref
45      */
46     public const INTERWIKI_URI = 'interwiki';
47     public const WIKI_URI = 'internal';
48     public const VARIABLE_URI = 'internal_template';
49     public const REF_ATTRIBUTE = "ref";
50 
51 
52     /**
53      * The type of markup ref (ie media or link)
54      */
55     private string $type;
56     const MEDIA_TYPE = "media";
57     const LINK_TYPE = "link";
58 
59     private string $refScheme;
60     public const EXTERNAL_MEDIA_CALL_NAME = "external";
61 
62 
63     private string $ref;
64     private ?Url $url = null;
65 
66     private ?Path $path = null;
67     private ?InterWiki $interWiki = null;
68 
69 
70     /**
71      * @throws ExceptionBadSyntax
72      * @throws ExceptionBadArgument
73      * @throws ExceptionNotFound
74      */
75     public function __construct($ref, $type)
76     {
77         $this->ref = $ref;
78         $this->type = $type;
79 
80         $this->url = Url::createEmpty();
81 
82         $ref = trim($ref);
83 
84         /**
85          * Email validation pattern
86          * E-Mail (pattern below is defined in inc/mail.php)
87          *
88          * Example:
89          * [[support@combostrap.com?subject=hallo world]]
90          * [[support@combostrap.com]]
91          */
92         $emailRfc2822 = "0-9a-zA-Z!#$%&'*+/=?^_`{|}~-";
93         $emailPattern = '[' . $emailRfc2822 . ']+(?:\.[' . $emailRfc2822 . ']+)*@(?i:[0-9a-z][0-9a-z-]*\.)+(?i:[a-z]{2,63})';
94         if (preg_match('<' . $emailPattern . '>', $ref)) {
95             $this->refScheme = self::EMAIL_URI;
96             $position = strpos($ref, "?");
97 
98             if ($position !== false) {
99                 $email = substr($ref, 0, $position);
100                 $queryStringAndFragment = substr($ref, $position + 1);
101                 $this->url = Url::createFromString("mailto:$email");
102                 $this->parseAndAddQueryStringAndFragment($queryStringAndFragment);
103             } else {
104                 $this->url = Url::createFromString("mailto:$ref");
105             }
106             return;
107         }
108 
109         /**
110          * Case when the URL is just a full conform URL
111          *
112          * Example: `https://` or `ftp://`
113          *
114          * Other scheme are not yet recognized
115          * because it can also be a wiki id
116          * For instance, `mailto:` is also a valid page
117          *
118          * same as {@link media_isexternal()}  check only http / ftp scheme
119          */
120         if (preg_match('#^([a-z0-9\-.+]+?)://#i', $ref)) {
121             try {
122                 $this->url = Url::createFromString($ref);
123                 $this->refScheme = self::WEB_URI;
124 
125                 /**
126                  * Authorized scheme only (to not inject code ?)
127                  */
128                 $authorizedSchemes = self::loadAndGetAuthorizedSchemes();
129                 if (!in_array($this->url->getScheme(), $authorizedSchemes)) {
130                     throw new ExceptionBadSyntax("The scheme ({$this->url->getScheme()}) of the URL ({$this->url}) is not authorized");
131                 }
132                 try {
133                     $isImage = FileSystems::getMime($this->url)->isImage();
134                 } catch (ExceptionNotFound $e) {
135                     $isImage = false;
136                 }
137                 if ($isImage) {
138                     $properties = $this->url->getQueryProperties();
139                     if (count($properties) >= 1) {
140                         try {
141                             /**
142                              * The first parameter is the `Width X Height`
143                              */
144                             $widthAndHeight = array_key_first($properties);
145                             $xPosition = strpos($widthAndHeight, "x");
146                             if ($xPosition !== false) {
147                                 $width = DataType::toInteger(substr($widthAndHeight, 0, $xPosition));
148                                 if ($width !== 0) {
149                                     $this->url->addQueryParameter(Dimension::WIDTH_KEY, $width);
150                                 }
151                                 $height = DataType::toInteger(substr($widthAndHeight, $xPosition + 1));
152                                 $this->url->addQueryParameter(Dimension::HEIGHT_KEY, $height);
153                             } else {
154                                 $width = DataType::toInteger($widthAndHeight);
155                                 $this->url->addQueryParameter(Dimension::WIDTH_KEY, $width);
156                             }
157                             $this->url->deleteQueryParameter($widthAndHeight);
158                             if ($this->url->hasProperty(MediaMarkup::LINKING_NOLINK_VALUE)) {
159                                 $this->url->addQueryParameter(MediaMarkup::LINKING_KEY, MediaMarkup::LINKING_NOLINK_VALUE);
160                                 $this->url->deleteQueryParameter(MediaMarkup::LINKING_NOLINK_VALUE);
161                             }
162                         } catch (ExceptionBadArgument $e) {
163                             // not a number/integer
164                         }
165                     }
166                 }
167                 return;
168             } catch (ExceptionBadSyntax $e) {
169                 throw new ExceptionBadSyntax("The url string was not validated as an URL ($ref). Error: {$e->getMessage()}");
170             }
171         }
172 
173         /**
174          * Windows share link
175          */
176         if (preg_match('/^\\\\\\\\[^\\\\]+?\\\\/u', $ref)) {
177             $this->refScheme = self::WINDOWS_SHARE_URI;
178             $this->url = LocalPath::createFromPathString($ref)->getUrl();
179             return;
180         }
181 
182         /**
183          * Only Fragment (also known as local link)
184          */
185         if (preg_match('/^#.?/', $ref)) {
186             $this->refScheme = self::LOCAL_URI;
187 
188             $fragment = substr($ref, 1);
189             if ($fragment !== "") {
190                 $fragment = OutlineSection::textToHtmlSectionId($fragment);
191             }
192             $this->url = Url::createEmpty()->setFragment($fragment);
193             $this->path = WikiPath::createRequestedPagePathFromRequest();
194             return;
195         }
196 
197         /**
198          * Interwiki ?
199          */
200         if (preg_match('/^[a-zA-Z0-9.]+>/u', $ref)) {
201 
202             $this->refScheme = MarkupRef::INTERWIKI_URI;
203             switch ($type) {
204                 case self::MEDIA_TYPE:
205                     $this->interWiki = InterWiki::createMediaInterWikiFromString($ref);
206                     break;
207                 case self::LINK_TYPE:
208                     $this->interWiki = InterWiki::createLinkInterWikiFromString($ref);
209                     break;
210                 default:
211                     LogUtility::internalError("The type ($type) is unknown, returning a interwiki link ref");
212                     $this->interWiki = InterWiki::createLinkInterWikiFromString($ref);
213                     break;
214             }
215             $this->url = $this->interWiki->toUrl();
216             return;
217 
218         }
219 
220 
221         /**
222          * It can be a link with a ref template
223          */
224         if (syntax_plugin_combo_variable::isVariable($ref)) {
225             $this->refScheme = MarkupRef::VARIABLE_URI;
226             return;
227         }
228 
229         /**
230          * Doku Path
231          * We parse it
232          */
233         $this->refScheme = MarkupRef::WIKI_URI;
234 
235         $questionMarkPosition = strpos($ref, "?");
236         $wikiPath = $ref;
237         $fragment = null;
238         $queryStringAndAnchorOriginal = null;
239         if ($questionMarkPosition !== false) {
240             $wikiPath = substr($ref, 0, $questionMarkPosition);
241             $queryStringAndAnchorOriginal = substr($ref, $questionMarkPosition + 1);
242         } else {
243             // We may have only an anchor
244             $hashTagPosition = strpos($ref, "#");
245             if ($hashTagPosition !== false) {
246                 $wikiPath = substr($ref, 0, $hashTagPosition);
247                 $fragment = substr($ref, $hashTagPosition + 1);
248             }
249         }
250 
251         /**
252          *
253          * Clean it
254          */
255         $wikiPath = $this->normalizePath($wikiPath);
256 
257         /**
258          * The URL
259          * The path is created at the end because it may have a revision
260          */
261         switch ($type) {
262             case self::MEDIA_TYPE:
263                 $this->url = UrlEndpoint::createFetchUrl();
264                 break;
265             case self::LINK_TYPE:
266                 $this->url = UrlEndpoint::createDokuUrl();
267                 break;
268             default:
269                 throw new ExceptionBadArgument("The ref type ($type) is unknown");
270         }
271 
272 
273         /**
274          * Parsing Query string if any
275          */
276         if ($queryStringAndAnchorOriginal !== null) {
277 
278             $this->parseAndAddQueryStringAndFragment($queryStringAndAnchorOriginal);
279 
280         }
281 
282         /**
283          * The path
284          */
285         try {
286             $rev = $this->url->getQueryPropertyValue(WikiPath::REV_ATTRIBUTE);
287         } catch (ExceptionNotFound $e) {
288             $rev = null;
289         }
290         /**
291          * The wiki path may be relative
292          */
293         switch ($type) {
294             case self::MEDIA_TYPE:
295                 $this->path = WikiPath::createMediaPathFromId($wikiPath, $rev);
296                 $this->url->addQueryParameter(MediaMarkup::$MEDIA_QUERY_PARAMETER, $this->path->getWikiId());
297                 $this->addRevToUrl($rev);
298 
299                 if ($fragment !== null) {
300                     $this->url->setFragment($fragment);
301                 }
302 
303                 break;
304             case self::LINK_TYPE:
305 
306                 /**
307                  * The path may be an id if it exists
308                  * otherwise it's a relative path
309                  * MarkupPath is important because a link to
310                  * a namespace (ie wikiPath = `ns:`)
311                  * should become `ns:start`)
312                  */
313                 $markupPath = MarkupPath::createMarkupFromStringPath($wikiPath);
314                 if (!FileSystems::exists($markupPath) && $wikiPath !== "") {
315                     // We test for an empty wikiPath string
316                     // because if the wiki path is the empty string,
317                     // this is the current requested page
318                     // An empty id is the root and always exists
319                     $idPath = MarkupPath::createMarkupFromId($wikiPath);
320                     if (FileSystems::exists($idPath)) {
321                         $markupPath = $idPath;
322                     }
323                 }
324 
325                 /**
326                  * The path may be a namespace, in the page system
327                  * the path should then be the index page
328                  */
329                 try {
330                     $this->path = $markupPath->getPathObject()->toWikiPath();
331                 } catch (ExceptionCompile $e) {
332                     throw new ExceptionRuntimeInternal("Path should be a wiki path");
333                 }
334                 $this->url->addQueryParameter(DokuwikiId::DOKUWIKI_ID_ATTRIBUTE, $this->path->getWikiId());
335                 $this->addRevToUrl($rev);
336 
337                 if ($fragment !== null) {
338                     $fragment = OutlineSection::textToHtmlSectionId($fragment);
339                     $this->url->setFragment($fragment);
340                 }
341 
342                 break;
343             default:
344                 throw new ExceptionBadArgument("The ref type ($type) is unknown");
345         }
346 
347     }
348 
349     /**
350      * @throws ExceptionBadArgument
351      * @throws ExceptionBadSyntax
352      * @throws ExceptionNotFound
353      */
354     public
355     static function createMediaFromRef($refProcessing): MarkupRef
356     {
357         return new MarkupRef($refProcessing, self::MEDIA_TYPE);
358     }
359 
360     /**
361      * @throws ExceptionBadSyntax
362      * @throws ExceptionBadArgument
363      * @throws ExceptionNotFound
364      */
365     public
366     static function createLinkFromRef($refProcessing): MarkupRef
367     {
368         return new MarkupRef($refProcessing, self::LINK_TYPE);
369     }
370 
371     // https://www.dokuwiki.org/urlschemes
372     private static function loadAndGetAuthorizedSchemes(): array
373     {
374 
375         return ExecutionContext::getActualOrCreateFromEnv()
376             ->getConfig()
377             ->getAuthorizedUrlSchemes();
378 
379 
380     }
381 
382     /**
383      * In case of manual entry, the function will normalize the path
384      * @param string $wikiPath - a path entered by a user
385      * @return string
386      */
387     public function normalizePath(string $wikiPath): string
388     {
389         if ($wikiPath === "") {
390             return $wikiPath;
391         }
392         // slash to double point
393         $wikiPath = str_replace(WikiPath::NAMESPACE_SEPARATOR_SLASH, WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $wikiPath);
394 
395         $isNamespacePath = false;
396         if ($wikiPath[strlen($wikiPath) - 1] === WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT) {
397             $isNamespacePath = true;
398         }
399         $isPath = false;
400         if ($wikiPath[0] === WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT) {
401             $isPath = true;
402         }
403         $pathType = "unknown";
404         if ($wikiPath[0] === WikiPath::CURRENT_PATH_CHARACTER) {
405             $pathType = "current";
406             if (isset($wikiPath[1])) {
407                 if ($wikiPath[1] === WikiPath::CURRENT_PATH_CHARACTER) {
408                     $pathType = "parent";
409                 }
410             }
411         }
412         /**
413          * Dokuwiki Compliance
414          */
415         $cleanPath = cleanID($wikiPath);
416         if ($isNamespacePath) {
417             $cleanPath = "$cleanPath:";
418         }
419         switch ($pathType) {
420             case "current":
421                 if (!$isNamespacePath) {
422                     $cleanPath = WikiPath::CURRENT_PATH_CHARACTER . WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT . $cleanPath;
423                 } else {
424                     $cleanPath = WikiPath::CURRENT_PATH_CHARACTER . $cleanPath;
425                 }
426                 break;
427             case "parent":
428                 if (!$isNamespacePath) {
429                     $cleanPath = WikiPath::CURRENT_PARENT_PATH_CHARACTER . WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT . $cleanPath;
430                 } else {
431                     $cleanPath = WikiPath::CURRENT_PARENT_PATH_CHARACTER . $cleanPath;
432                 }
433                 break;
434         }
435         if ($isPath) {
436             $cleanPath = ":$cleanPath";
437         }
438         return $cleanPath;
439     }
440 
441 
442     public
443     function getUrl(): Url
444     {
445         return $this->url;
446     }
447 
448     /**
449      * @throws ExceptionNotFound
450      */
451     public
452     function getPath(): WikiPath
453     {
454         if ($this->path === null) {
455             throw new ExceptionNotFound("No path was found");
456         }
457         return $this->path;
458     }
459 
460     public
461     function getRef(): string
462     {
463         return $this->ref;
464     }
465 
466     public function getSchemeType(): string
467     {
468         return $this->refScheme;
469     }
470 
471     /**
472      * @throws ExceptionNotFound
473      */
474     public function getInterWiki(): InterWiki
475     {
476         if ($this->interWiki === null) {
477             throw new ExceptionNotFound("This ref ($this->ref) is not an interWiki.");
478         }
479         return $this->interWiki;
480     }
481 
482     private function addRevToUrl($rev = null): void
483     {
484         if ($rev !== null) {
485             $this->url->addQueryParameter(WikiPath::REV_ATTRIBUTE, $rev);
486         }
487     }
488 
489 
490     public function getType(): string
491     {
492         return $this->type;
493     }
494 
495     /**
496      * A query parameters value may have a # for the definition of a color
497      * This process takes it into account
498      * @param string $queryStringAndFragment
499      * @return void
500      */
501     private function parseAndAddQueryStringAndFragment(string $queryStringAndFragment)
502     {
503         /**
504          * The value $queryStringAndAnchorOriginal
505          * is kept to create the original queryString
506          * at the end if we found an anchor
507          *
508          * We parse token by token because we allow a hashtag for a hex color
509          */
510         $queryStringAndAnchorProcessing = $queryStringAndFragment;
511         while (strlen($queryStringAndAnchorProcessing) > 0) {
512 
513             /**
514              * Capture the token
515              * and reduce the text
516              */
517             $questionMarkPos = strpos($queryStringAndAnchorProcessing, "&");
518             if ($questionMarkPos !== false) {
519                 $token = substr($queryStringAndAnchorProcessing, 0, $questionMarkPos);
520                 $queryStringAndAnchorProcessing = substr($queryStringAndAnchorProcessing, $questionMarkPos + 1);
521             } else {
522                 $token = $queryStringAndAnchorProcessing;
523                 $queryStringAndAnchorProcessing = "";
524             }
525 
526 
527             /**
528              * Sizing (wxh)
529              */
530             $sizing = [];
531             if (preg_match('/^([0-9]+)(?:x([0-9]+))?/', $token, $sizing)) {
532                 $this->url->addQueryParameter(Dimension::WIDTH_KEY, $sizing[1]);
533                 if (isset($sizing[2])) {
534                     $this->url->addQueryParameter(Dimension::HEIGHT_KEY, $sizing[2]);
535                 }
536                 $token = substr($token, strlen($sizing[0]));
537                 if ($token === "") {
538                     // no anchor behind we continue
539                     continue;
540                 }
541             }
542 
543             /**
544              * Linking
545              */
546             $found = preg_match('/^(nolink|direct|linkonly|details)/i', $token, $matches);
547             if ($found) {
548                 $linkingValue = $matches[1];
549                 $this->url->addQueryParameter(MediaMarkup::LINKING_KEY, $linkingValue);
550                 $token = substr($token, strlen($linkingValue));
551                 if ($token == "") {
552                     // no anchor behind we continue
553                     continue;
554                 }
555             }
556 
557             /**
558              * Cache
559              */
560             $noCacheValue = IFetcherAbs::NOCACHE_VALUE;
561             $found = preg_match('/^(' . $noCacheValue . ')/i', $token, $matches);
562             if ($found) {
563                 $this->url->addQueryParameter(IFetcherAbs::CACHE_KEY, $noCacheValue);
564                 $token = substr($token, strlen($noCacheValue));
565                 if ($token == "") {
566                     // no anchor behind we continue
567                     continue;
568                 }
569             }
570 
571             /**
572              * Anchor value after a single token case
573              */
574             if (strpos($token, '#') === 0) {
575                 $this->url->setFragment(substr($token, 1));
576                 continue;
577             }
578 
579             /**
580              * Key, value
581              * explode to the first `=`
582              * in the anchor value, we can have one
583              *
584              * Ex with media.pdf#page=31
585              */
586             $tokens = explode("=", $token, 2);
587             $key = $tokens[0];
588             if (count($tokens) == 2) {
589                 $value = $tokens[1];
590             } else {
591                 $value = null;
592             }
593 
594             /**
595              * Case of an anchor after a boolean attribute (ie without =)
596              * at the end
597              */
598             $anchorPosition = strpos($key, '#');
599             if ($anchorPosition !== false) {
600                 $this->url->setFragment(substr($key, $anchorPosition + 1));
601                 $key = substr($key, 0, $anchorPosition);
602             }
603 
604             /**
605              * Test Anchor on the value
606              */
607             if ($value !== null) {
608                 if (($countHashTag = substr_count($value, "#")) >= 3) {
609                     LogUtility::msg("The value ($value) of the key ($key) for the link ($this) has $countHashTag `#` characters and the maximum supported is 2.", LogUtility::LVL_MSG_ERROR);
610                     continue;
611                 }
612             } else {
613                 /**
614                  * Boolean attribute
615                  * (null does not make it)
616                  */
617                 $value = null;
618             }
619 
620             $anchorPosition = false;
621             $lowerCaseKey = strtolower($key);
622             if ($lowerCaseKey === TextColor::CSS_ATTRIBUTE) {
623                 /**
624                  * Special case when color has one color value as hexadecimal #
625                  * and the hashtag
626                  */
627                 if (strpos($value, '#') == 0) {
628                     if (substr_count($value, "#") >= 2) {
629 
630                         /**
631                          * The last one
632                          */
633                         $anchorPosition = strrpos($value, '#');
634                     }
635                     // no anchor then
636                 } else {
637                     // a color that is not hexadecimal can have an anchor
638                     $anchorPosition = strpos($value, "#");
639                 }
640             } else {
641                 // general case
642                 $anchorPosition = strpos($value, "#");
643             }
644             if ($anchorPosition !== false) {
645                 $this->url->setFragment(substr($value, $anchorPosition + 1));
646                 $value = substr($value, 0, $anchorPosition);
647             }
648 
649             switch ($lowerCaseKey) {
650                 case Dimension::WIDTH_KEY_SHORT: // used in a link w=xxx
651                     $this->url->addQueryParameter(Dimension::WIDTH_KEY, $value);
652                     break;
653                 case Dimension::HEIGHT_KEY_SHORT: // used in a link h=xxxx
654                     $this->url->addQueryParameter(Dimension::HEIGHT_KEY, $value);
655                     break;
656                 default:
657                     $this->url->addQueryParameter($key, $value);
658                     break;
659             }
660 
661         }
662 
663     }
664 
665     public function __toString()
666     {
667         return $this->getRef();
668     }
669 
670 
671 }
672