1<?php
2
3namespace ComboStrap;
4
5use ComboStrap\Web\Url;
6use ComboStrap\Web\UrlEndpoint;
7use syntax_plugin_combo_variable;
8
9/**
10 *
11 * Basically, a class that parse a link/media markup reference and returns an URL.
12 *
13 * Detailed, the class parses the reference:
14 *   * from a {@link MarkupRef::createMediaFromRef() media markup}
15 *   * or {@link MarkupRef::createLinkFromRef() link markup}
16 * and returns an {@link MarkupRef::getUrl() URL},
17 *
18 * You may determine the {@link MarkupRef::getSchemeType() type of reference}
19 *
20 * For a {@link MarkupRef::WIKI_URI}, the URL returned is:
21 *   * a {@link UrlEndpoint::createFetchUrl() fetch url} for a media
22 *   * a {@link UrlEndpoint::createDokuUrl() doku url} for a link (ie page)
23 *
24 * If this is a {@link MarkupRef::INTERWIKI_URI}, you may also get the {@link MarkupRef::getInterWiki() interwiki instance}
25 * If this is a {@link MarkupRef::WIKI_URI}, you may also get the {@link MarkupRef::getPath() path}
26 *
27 *
28 * Why ?
29 * The parsing function {@link Doku_Handler_Parse_Media} has some flow / problem
30 *    * It keeps the anchor only if there is no query string
31 *    * It takes the first digit as the width (ie media.pdf?page=31 would have a width of 31)
32 *    * `src` is not only the media path but may have a anchor
33 *    * ...
34 *
35 */
36class MarkupRef
37{
38    public const WINDOWS_SHARE_URI = 'windowsShare';
39    public const LOCAL_URI = 'local';
40    public const EMAIL_URI = 'email';
41    public const WEB_URI = 'external';
42
43    /**
44     * Type of Ref
45     */
46    public const INTERWIKI_URI = 'interwiki';
47    public const WIKI_URI = 'internal';
48    public const VARIABLE_URI = 'internal_template';
49    public const REF_ATTRIBUTE = "ref";
50
51
52    /**
53     * The type of markup ref (ie media or link)
54     */
55    private string $type;
56    const MEDIA_TYPE = "media";
57    const LINK_TYPE = "link";
58
59    private string $refScheme;
60    public const EXTERNAL_MEDIA_CALL_NAME = "external";
61
62
63    private string $ref;
64    private ?Url $url = null;
65
66    private ?Path $path = null;
67    private ?InterWiki $interWiki = null;
68
69
70    /**
71     * @throws ExceptionBadSyntax
72     * @throws ExceptionBadArgument
73     * @throws ExceptionNotFound
74     */
75    public function __construct($ref, $type)
76    {
77        $this->ref = $ref;
78        $this->type = $type;
79
80        $this->url = Url::createEmpty();
81
82        $ref = trim($ref);
83
84        /**
85         * Email validation pattern
86         * E-Mail (pattern below is defined in inc/mail.php)
87         *
88         * Example:
89         * [[support@combostrap.com?subject=hallo world]]
90         * [[support@combostrap.com]]
91         */
92        $emailRfc2822 = "0-9a-zA-Z!#$%&'*+/=?^_`{|}~-";
93        $emailPattern = '[' . $emailRfc2822 . ']+(?:\.[' . $emailRfc2822 . ']+)*@(?i:[0-9a-z][0-9a-z-]*\.)+(?i:[a-z]{2,63})';
94        if (preg_match('<' . $emailPattern . '>', $ref)) {
95            $this->refScheme = self::EMAIL_URI;
96            $position = strpos($ref, "?");
97
98            if ($position !== false) {
99                $email = substr($ref, 0, $position);
100                $queryStringAndFragment = substr($ref, $position + 1);
101                $this->url = Url::createFromString("mailto:$email");
102                $this->parseAndAddQueryStringAndFragment($queryStringAndFragment);
103            } else {
104                $this->url = Url::createFromString("mailto:$ref");
105            }
106            return;
107        }
108
109        /**
110         * Case when the URL is just a full conform URL
111         *
112         * Example: `https://` or `ftp://`
113         *
114         * Other scheme are not yet recognized
115         * because it can also be a wiki id
116         * For instance, `mailto:` is also a valid page
117         *
118         * same as {@link media_isexternal()}  check only http / ftp scheme
119         */
120        if (preg_match('#^([a-z0-9\-.+]+?)://#i', $ref)) {
121            try {
122                $this->url = Url::createFromString($ref);
123                $this->refScheme = self::WEB_URI;
124
125                /**
126                 * Authorized scheme only (to not inject code ?)
127                 */
128                $authorizedSchemes = self::loadAndGetAuthorizedSchemes();
129                if (!in_array($this->url->getScheme(), $authorizedSchemes)) {
130                    throw new ExceptionBadSyntax("The scheme ({$this->url->getScheme()}) of the URL ({$this->url}) is not authorized");
131                }
132                try {
133                    $isImage = FileSystems::getMime($this->url)->isImage();
134                } catch (ExceptionNotFound $e) {
135                    $isImage = false;
136                }
137                if ($isImage) {
138                    $properties = $this->url->getQueryProperties();
139                    if (count($properties) >= 1) {
140                        try {
141                            /**
142                             * The first parameter is the `Width X Height`
143                             */
144                            $widthAndHeight = array_key_first($properties);
145                            $xPosition = strpos($widthAndHeight, "x");
146                            if ($xPosition !== false) {
147                                $width = DataType::toInteger(substr($widthAndHeight, 0, $xPosition));
148                                if ($width !== 0) {
149                                    $this->url->addQueryParameter(Dimension::WIDTH_KEY, $width);
150                                }
151                                $height = DataType::toInteger(substr($widthAndHeight, $xPosition + 1));
152                                $this->url->addQueryParameter(Dimension::HEIGHT_KEY, $height);
153                            } else {
154                                $width = DataType::toInteger($widthAndHeight);
155                                $this->url->addQueryParameter(Dimension::WIDTH_KEY, $width);
156                            }
157                            $this->url->deleteQueryParameter($widthAndHeight);
158                            if ($this->url->hasProperty(MediaMarkup::LINKING_NOLINK_VALUE)) {
159                                $this->url->addQueryParameter(MediaMarkup::LINKING_KEY, MediaMarkup::LINKING_NOLINK_VALUE);
160                                $this->url->deleteQueryParameter(MediaMarkup::LINKING_NOLINK_VALUE);
161                            }
162                        } catch (ExceptionBadArgument $e) {
163                            // not a number/integer
164                        }
165                    }
166                }
167                return;
168            } catch (ExceptionBadSyntax $e) {
169                throw new ExceptionBadSyntax("The url string was not validated as an URL ($ref). Error: {$e->getMessage()}");
170            }
171        }
172
173        /**
174         * Windows share link
175         */
176        if (preg_match('/^\\\\\\\\[^\\\\]+?\\\\/u', $ref)) {
177            $this->refScheme = self::WINDOWS_SHARE_URI;
178            $this->url = LocalPath::createFromPathString($ref)->getUrl();
179            return;
180        }
181
182        /**
183         * Only Fragment (also known as local link)
184         */
185        if (preg_match('/^#.?/', $ref)) {
186            $this->refScheme = self::LOCAL_URI;
187
188            $fragment = substr($ref, 1);
189            if ($fragment !== "") {
190                $fragment = OutlineSection::textToHtmlSectionId($fragment);
191            }
192            $this->url = Url::createEmpty()->setFragment($fragment);
193            $this->path = WikiPath::createRequestedPagePathFromRequest();
194            return;
195        }
196
197        /**
198         * Interwiki ?
199         */
200        if (preg_match('/^[a-zA-Z0-9.]+>/u', $ref)) {
201
202            $this->refScheme = MarkupRef::INTERWIKI_URI;
203            switch ($type) {
204                case self::MEDIA_TYPE:
205                    $this->interWiki = InterWiki::createMediaInterWikiFromString($ref);
206                    break;
207                case self::LINK_TYPE:
208                    $this->interWiki = InterWiki::createLinkInterWikiFromString($ref);
209                    break;
210                default:
211                    LogUtility::internalError("The type ($type) is unknown, returning a interwiki link ref");
212                    $this->interWiki = InterWiki::createLinkInterWikiFromString($ref);
213                    break;
214            }
215            $this->url = $this->interWiki->toUrl();
216            return;
217
218        }
219
220
221        /**
222         * It can be a link with a ref template
223         */
224        if (syntax_plugin_combo_variable::isVariable($ref)) {
225            $this->refScheme = MarkupRef::VARIABLE_URI;
226            return;
227        }
228
229        /**
230         * Doku Path
231         * We parse it
232         */
233        $this->refScheme = MarkupRef::WIKI_URI;
234
235        $questionMarkPosition = strpos($ref, "?");
236        $wikiPath = $ref;
237        $fragment = null;
238        $queryStringAndAnchorOriginal = null;
239        if ($questionMarkPosition !== false) {
240            $wikiPath = substr($ref, 0, $questionMarkPosition);
241            $queryStringAndAnchorOriginal = substr($ref, $questionMarkPosition + 1);
242        } else {
243            // We may have only an anchor
244            $hashTagPosition = strpos($ref, "#");
245            if ($hashTagPosition !== false) {
246                $wikiPath = substr($ref, 0, $hashTagPosition);
247                $fragment = substr($ref, $hashTagPosition + 1);
248            }
249        }
250
251        /**
252         *
253         * Clean it
254         */
255        $wikiPath = $this->normalizePath($wikiPath);
256
257        /**
258         * The URL
259         * The path is created at the end because it may have a revision
260         */
261        switch ($type) {
262            case self::MEDIA_TYPE:
263                $this->url = UrlEndpoint::createFetchUrl();
264                break;
265            case self::LINK_TYPE:
266                $this->url = UrlEndpoint::createDokuUrl();
267                break;
268            default:
269                throw new ExceptionBadArgument("The ref type ($type) is unknown");
270        }
271
272
273        /**
274         * Parsing Query string if any
275         */
276        if ($queryStringAndAnchorOriginal !== null) {
277
278            $this->parseAndAddQueryStringAndFragment($queryStringAndAnchorOriginal);
279
280        }
281
282        /**
283         * The path
284         */
285        try {
286            $rev = $this->url->getQueryPropertyValue(WikiPath::REV_ATTRIBUTE);
287        } catch (ExceptionNotFound $e) {
288            $rev = null;
289        }
290        /**
291         * The wiki path may be relative
292         */
293        switch ($type) {
294            case self::MEDIA_TYPE:
295                $this->path = WikiPath::createMediaPathFromId($wikiPath, $rev);
296                $this->url->addQueryParameter(MediaMarkup::$MEDIA_QUERY_PARAMETER, $this->path->getWikiId());
297                $this->addRevToUrl($rev);
298
299                if ($fragment !== null) {
300                    $this->url->setFragment($fragment);
301                }
302
303                break;
304            case self::LINK_TYPE:
305
306                /**
307                 * The path may be an id if it exists
308                 * otherwise it's a relative path
309                 * MarkupPath is important because a link to
310                 * a namespace (ie wikiPath = `ns:`)
311                 * should become `ns:start`)
312                 */
313                $markupPath = MarkupPath::createMarkupFromStringPath($wikiPath);
314                if (!FileSystems::exists($markupPath) && $wikiPath !== "") {
315                    // We test for an empty wikiPath string
316                    // because if the wiki path is the empty string,
317                    // this is the current requested page
318                    // An empty id is the root and always exists
319                    $idPath = MarkupPath::createMarkupFromId($wikiPath);
320                    if (FileSystems::exists($idPath)) {
321                        $markupPath = $idPath;
322                    }
323                }
324
325                /**
326                 * The path may be a namespace, in the page system
327                 * the path should then be the index page
328                 */
329                try {
330                    $this->path = $markupPath->getPathObject()->toWikiPath();
331                } catch (ExceptionCompile $e) {
332                    throw new ExceptionRuntimeInternal("Path should be a wiki path");
333                }
334                $this->url->addQueryParameter(DokuwikiId::DOKUWIKI_ID_ATTRIBUTE, $this->path->getWikiId());
335                $this->addRevToUrl($rev);
336
337                if ($fragment !== null) {
338                    $fragment = OutlineSection::textToHtmlSectionId($fragment);
339                    $this->url->setFragment($fragment);
340                }
341
342                break;
343            default:
344                throw new ExceptionBadArgument("The ref type ($type) is unknown");
345        }
346
347    }
348
349    /**
350     * @throws ExceptionBadArgument
351     * @throws ExceptionBadSyntax
352     * @throws ExceptionNotFound
353     */
354    public
355    static function createMediaFromRef($refProcessing): MarkupRef
356    {
357        return new MarkupRef($refProcessing, self::MEDIA_TYPE);
358    }
359
360    /**
361     * @throws ExceptionBadSyntax
362     * @throws ExceptionBadArgument
363     * @throws ExceptionNotFound
364     */
365    public
366    static function createLinkFromRef($refProcessing): MarkupRef
367    {
368        return new MarkupRef($refProcessing, self::LINK_TYPE);
369    }
370
371    // https://www.dokuwiki.org/urlschemes
372    private static function loadAndGetAuthorizedSchemes(): array
373    {
374
375        return ExecutionContext::getActualOrCreateFromEnv()
376            ->getConfig()
377            ->getAuthorizedUrlSchemes();
378
379
380    }
381
382    /**
383     * In case of manual entry, the function will normalize the path
384     * @param string $wikiPath - a path entered by a user
385     * @return string
386     */
387    public function normalizePath(string $wikiPath): string
388    {
389        if ($wikiPath === "") {
390            return $wikiPath;
391        }
392        // slash to double point
393        $wikiPath = str_replace(WikiPath::NAMESPACE_SEPARATOR_SLASH, WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT, $wikiPath);
394
395        $isNamespacePath = false;
396        if ($wikiPath[strlen($wikiPath) - 1] === WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT) {
397            $isNamespacePath = true;
398        }
399        $isPath = false;
400        if ($wikiPath[0] === WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT) {
401            $isPath = true;
402        }
403        $pathType = "unknown";
404        if ($wikiPath[0] === WikiPath::CURRENT_PATH_CHARACTER) {
405            $pathType = "current";
406            if (isset($wikiPath[1])) {
407                if ($wikiPath[1] === WikiPath::CURRENT_PATH_CHARACTER) {
408                    $pathType = "parent";
409                }
410            }
411        }
412        /**
413         * Dokuwiki Compliance
414         */
415        $cleanPath = cleanID($wikiPath);
416        if ($isNamespacePath) {
417            $cleanPath = "$cleanPath:";
418        }
419        switch ($pathType) {
420            case "current":
421                if (!$isNamespacePath) {
422                    $cleanPath = WikiPath::CURRENT_PATH_CHARACTER . WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT . $cleanPath;
423                } else {
424                    $cleanPath = WikiPath::CURRENT_PATH_CHARACTER . $cleanPath;
425                }
426                break;
427            case "parent":
428                if (!$isNamespacePath) {
429                    $cleanPath = WikiPath::CURRENT_PARENT_PATH_CHARACTER . WikiPath::NAMESPACE_SEPARATOR_DOUBLE_POINT . $cleanPath;
430                } else {
431                    $cleanPath = WikiPath::CURRENT_PARENT_PATH_CHARACTER . $cleanPath;
432                }
433                break;
434        }
435        if ($isPath) {
436            $cleanPath = ":$cleanPath";
437        }
438        return $cleanPath;
439    }
440
441
442    public
443    function getUrl(): Url
444    {
445        return $this->url;
446    }
447
448    /**
449     * @throws ExceptionNotFound
450     */
451    public
452    function getPath(): WikiPath
453    {
454        if ($this->path === null) {
455            throw new ExceptionNotFound("No path was found");
456        }
457        return $this->path;
458    }
459
460    public
461    function getRef(): string
462    {
463        return $this->ref;
464    }
465
466    public function getSchemeType(): string
467    {
468        return $this->refScheme;
469    }
470
471    /**
472     * @throws ExceptionNotFound
473     */
474    public function getInterWiki(): InterWiki
475    {
476        if ($this->interWiki === null) {
477            throw new ExceptionNotFound("This ref ($this->ref) is not an interWiki.");
478        }
479        return $this->interWiki;
480    }
481
482    private function addRevToUrl($rev = null): void
483    {
484        if ($rev !== null) {
485            $this->url->addQueryParameter(WikiPath::REV_ATTRIBUTE, $rev);
486        }
487    }
488
489
490    public function getType(): string
491    {
492        return $this->type;
493    }
494
495    /**
496     * A query parameters value may have a # for the definition of a color
497     * This process takes it into account
498     * @param string $queryStringAndFragment
499     * @return void
500     */
501    private function parseAndAddQueryStringAndFragment(string $queryStringAndFragment)
502    {
503        /**
504         * The value $queryStringAndAnchorOriginal
505         * is kept to create the original queryString
506         * at the end if we found an anchor
507         *
508         * We parse token by token because we allow a hashtag for a hex color
509         */
510        $queryStringAndAnchorProcessing = $queryStringAndFragment;
511        while (strlen($queryStringAndAnchorProcessing) > 0) {
512
513            /**
514             * Capture the token
515             * and reduce the text
516             */
517            $questionMarkPos = strpos($queryStringAndAnchorProcessing, "&");
518            if ($questionMarkPos !== false) {
519                $token = substr($queryStringAndAnchorProcessing, 0, $questionMarkPos);
520                $queryStringAndAnchorProcessing = substr($queryStringAndAnchorProcessing, $questionMarkPos + 1);
521            } else {
522                $token = $queryStringAndAnchorProcessing;
523                $queryStringAndAnchorProcessing = "";
524            }
525
526
527            /**
528             * Sizing (wxh)
529             */
530            $sizing = [];
531            if (preg_match('/^([0-9]+)(?:x([0-9]+))?/', $token, $sizing)) {
532                $this->url->addQueryParameter(Dimension::WIDTH_KEY, $sizing[1]);
533                if (isset($sizing[2])) {
534                    $this->url->addQueryParameter(Dimension::HEIGHT_KEY, $sizing[2]);
535                }
536                $token = substr($token, strlen($sizing[0]));
537                if ($token === "") {
538                    // no anchor behind we continue
539                    continue;
540                }
541            }
542
543            /**
544             * Linking
545             */
546            $found = preg_match('/^(nolink|direct|linkonly|details)/i', $token, $matches);
547            if ($found) {
548                $linkingValue = $matches[1];
549                $this->url->addQueryParameter(MediaMarkup::LINKING_KEY, $linkingValue);
550                $token = substr($token, strlen($linkingValue));
551                if ($token == "") {
552                    // no anchor behind we continue
553                    continue;
554                }
555            }
556
557            /**
558             * Cache
559             */
560            $noCacheValue = IFetcherAbs::NOCACHE_VALUE;
561            $found = preg_match('/^(' . $noCacheValue . ')/i', $token, $matches);
562            if ($found) {
563                $this->url->addQueryParameter(IFetcherAbs::CACHE_KEY, $noCacheValue);
564                $token = substr($token, strlen($noCacheValue));
565                if ($token == "") {
566                    // no anchor behind we continue
567                    continue;
568                }
569            }
570
571            /**
572             * Anchor value after a single token case
573             */
574            if (strpos($token, '#') === 0) {
575                $this->url->setFragment(substr($token, 1));
576                continue;
577            }
578
579            /**
580             * Key, value
581             * explode to the first `=`
582             * in the anchor value, we can have one
583             *
584             * Ex with media.pdf#page=31
585             */
586            $tokens = explode("=", $token, 2);
587            $key = $tokens[0];
588            if (count($tokens) == 2) {
589                $value = $tokens[1];
590            } else {
591                $value = null;
592            }
593
594            /**
595             * Case of an anchor after a boolean attribute (ie without =)
596             * at the end
597             */
598            $anchorPosition = strpos($key, '#');
599            if ($anchorPosition !== false) {
600                $this->url->setFragment(substr($key, $anchorPosition + 1));
601                $key = substr($key, 0, $anchorPosition);
602            }
603
604            /**
605             * Test Anchor on the value
606             */
607            if ($value !== null) {
608                if (($countHashTag = substr_count($value, "#")) >= 3) {
609                    LogUtility::msg("The value ($value) of the key ($key) for the link ($this) has $countHashTag `#` characters and the maximum supported is 2.", LogUtility::LVL_MSG_ERROR);
610                    continue;
611                }
612            } else {
613                /**
614                 * Boolean attribute
615                 * (null does not make it)
616                 */
617                $value = null;
618            }
619
620            $anchorPosition = false;
621            $lowerCaseKey = strtolower($key);
622            if ($lowerCaseKey === TextColor::CSS_ATTRIBUTE) {
623                /**
624                 * Special case when color has one color value as hexadecimal #
625                 * and the hashtag
626                 */
627                if (strpos($value, '#') == 0) {
628                    if (substr_count($value, "#") >= 2) {
629
630                        /**
631                         * The last one
632                         */
633                        $anchorPosition = strrpos($value, '#');
634                    }
635                    // no anchor then
636                } else {
637                    // a color that is not hexadecimal can have an anchor
638                    $anchorPosition = strpos($value, "#");
639                }
640            } else {
641                // general case
642                $anchorPosition = strpos($value, "#");
643            }
644            if ($anchorPosition !== false) {
645                $this->url->setFragment(substr($value, $anchorPosition + 1));
646                $value = substr($value, 0, $anchorPosition);
647            }
648
649            switch ($lowerCaseKey) {
650                case Dimension::WIDTH_KEY_SHORT: // used in a link w=xxx
651                    $this->url->addQueryParameter(Dimension::WIDTH_KEY, $value);
652                    break;
653                case Dimension::HEIGHT_KEY_SHORT: // used in a link h=xxxx
654                    $this->url->addQueryParameter(Dimension::HEIGHT_KEY, $value);
655                    break;
656                default:
657                    $this->url->addQueryParameter($key, $value);
658                    break;
659            }
660
661        }
662
663    }
664
665    public function __toString()
666    {
667        return $this->getRef();
668    }
669
670
671}
672