1<?php
2
3namespace Sabre\Uri;
4
5/**
6 * This file contains all the uri handling functions.
7 *
8 * @copyright Copyright (C) fruux GmbH (https://fruux.com/)
9 * @author Evert Pot (http://evertpot.com/)
10 * @license http://sabre.io/license/
11 */
12
13/**
14 * Resolves relative urls, like a browser would.
15 *
16 * This function takes a basePath, which itself _may_ also be relative, and
17 * then applies the relative path on top of it.
18 *
19 * @param string $basePath
20 * @param string $newPath
21 * @return string
22 */
23function resolve($basePath, $newPath) {
24
25    $base = parse($basePath);
26    $delta = parse($newPath);
27
28    $pick = function($part) use ($base, $delta) {
29
30        if ($delta[$part]) {
31            return $delta[$part];
32        } elseif ($base[$part]) {
33            return $base[$part];
34        }
35        return null;
36
37    };
38
39    // If the new path defines a scheme, it's absolute and we can just return
40    // that.
41    if ($delta['scheme']) {
42        return build($delta);
43    }
44
45    $newParts = [];
46
47    $newParts['scheme'] = $pick('scheme');
48    $newParts['host'] = $pick('host');
49    $newParts['port'] = $pick('port');
50
51    $path = '';
52    if ($delta['path']) {
53        // If the path starts with a slash
54        if ($delta['path'][0] === '/') {
55            $path = $delta['path'];
56        } else {
57            // Removing last component from base path.
58            $path = $base['path'];
59            if (strpos($path, '/') !== false) {
60                $path = substr($path, 0, strrpos($path, '/'));
61            }
62            $path .= '/' . $delta['path'];
63        }
64    } else {
65        $path = $base['path'] ?: '/';
66    }
67    // Removing .. and .
68    $pathParts = explode('/', $path);
69    $newPathParts = [];
70    foreach ($pathParts as $pathPart) {
71
72        switch ($pathPart) {
73            //case '' :
74            case '.' :
75                break;
76            case '..' :
77                array_pop($newPathParts);
78                break;
79            default :
80                $newPathParts[] = $pathPart;
81                break;
82        }
83    }
84
85    $path = implode('/', $newPathParts);
86
87    // If the source url ended with a /, we want to preserve that.
88    $newParts['path'] = $path;
89    if ($delta['query']) {
90        $newParts['query'] = $delta['query'];
91    } elseif (!empty($base['query']) && empty($delta['host']) && empty($delta['path'])) {
92        // Keep the old query if host and path didn't change
93        $newParts['query'] = $base['query'];
94    }
95    if ($delta['fragment']) {
96        $newParts['fragment'] = $delta['fragment'];
97    }
98    return build($newParts);
99
100}
101
102/**
103 * Takes a URI or partial URI as its argument, and normalizes it.
104 *
105 * After normalizing a URI, you can safely compare it to other URIs.
106 * This function will for instance convert a %7E into a tilde, according to
107 * rfc3986.
108 *
109 * It will also change a %3a into a %3A.
110 *
111 * @param string $uri
112 * @return string
113 */
114function normalize($uri) {
115
116    $parts = parse($uri);
117
118    if (!empty($parts['path'])) {
119        $pathParts = explode('/', ltrim($parts['path'], '/'));
120        $newPathParts = [];
121        foreach ($pathParts as $pathPart) {
122            switch ($pathPart) {
123                case '.':
124                    // skip
125                    break;
126                case '..' :
127                    // One level up in the hierarchy
128                    array_pop($newPathParts);
129                    break;
130                default :
131                    // Ensuring that everything is correctly percent-encoded.
132                    $newPathParts[] = rawurlencode(rawurldecode($pathPart));
133                    break;
134            }
135        }
136        $parts['path'] = '/' . implode('/', $newPathParts);
137    }
138
139    if ($parts['scheme']) {
140        $parts['scheme'] = strtolower($parts['scheme']);
141        $defaultPorts = [
142            'http'  => '80',
143            'https' => '443',
144        ];
145
146        if (!empty($parts['port']) && isset($defaultPorts[$parts['scheme']]) && $defaultPorts[$parts['scheme']] == $parts['port']) {
147            // Removing default ports.
148            unset($parts['port']);
149        }
150        // A few HTTP specific rules.
151        switch ($parts['scheme']) {
152            case 'http' :
153            case 'https' :
154                if (empty($parts['path'])) {
155                    // An empty path is equivalent to / in http.
156                    $parts['path'] = '/';
157                }
158                break;
159        }
160    }
161
162    if ($parts['host']) $parts['host'] = strtolower($parts['host']);
163
164    return build($parts);
165
166}
167
168/**
169 * Parses a URI and returns its individual components.
170 *
171 * This method largely behaves the same as PHP's parse_url, except that it will
172 * return an array with all the array keys, including the ones that are not
173 * set by parse_url, which makes it a bit easier to work with.
174 *
175 * Unlike PHP's parse_url, it will also convert any non-ascii characters to
176 * percent-encoded strings. PHP's parse_url corrupts these characters on OS X.
177 *
178 * @param string $uri
179 * @return array
180 */
181function parse($uri) {
182
183    // Normally a URI must be ASCII, however. However, often it's not and
184    // parse_url might corrupt these strings.
185    //
186    // For that reason we take any non-ascii characters from the uri and
187    // uriencode them first.
188    $uri = preg_replace_callback(
189        '/[^[:ascii:]]/u',
190        function($matches) {
191            return rawurlencode($matches[0]);
192        },
193        $uri
194    );
195
196    $result = parse_url($uri);
197    if (!$result) {
198        $result = _parse_fallback($uri);
199    }
200
201    return
202         $result + [
203            'scheme'   => null,
204            'host'     => null,
205            'path'     => null,
206            'port'     => null,
207            'user'     => null,
208            'query'    => null,
209            'fragment' => null,
210        ];
211
212}
213
214/**
215 * This function takes the components returned from PHP's parse_url, and uses
216 * it to generate a new uri.
217 *
218 * @param array $parts
219 * @return string
220 */
221function build(array $parts) {
222
223    $uri = '';
224
225    $authority = '';
226    if (!empty($parts['host'])) {
227        $authority = $parts['host'];
228        if (!empty($parts['user'])) {
229            $authority = $parts['user'] . '@' . $authority;
230        }
231        if (!empty($parts['port'])) {
232            $authority = $authority . ':' . $parts['port'];
233        }
234    }
235
236    if (!empty($parts['scheme'])) {
237        // If there's a scheme, there's also a host.
238        $uri = $parts['scheme'] . ':';
239
240    }
241    if ($authority || (!empty($parts['scheme']) && $parts['scheme'] === 'file')) {
242        // No scheme, but there is a host.
243        $uri .= '//' . $authority;
244
245    }
246
247    if (!empty($parts['path'])) {
248        $uri .= $parts['path'];
249    }
250    if (!empty($parts['query'])) {
251        $uri .= '?' . $parts['query'];
252    }
253    if (!empty($parts['fragment'])) {
254        $uri .= '#' . $parts['fragment'];
255    }
256
257    return $uri;
258
259}
260
261/**
262 * Returns the 'dirname' and 'basename' for a path.
263 *
264 * The reason there is a custom function for this purpose, is because
265 * basename() is locale aware (behaviour changes if C locale or a UTF-8 locale
266 * is used) and we need a method that just operates on UTF-8 characters.
267 *
268 * In addition basename and dirname are platform aware, and will treat
269 * backslash (\) as a directory separator on windows.
270 *
271 * This method returns the 2 components as an array.
272 *
273 * If there is no dirname, it will return an empty string. Any / appearing at
274 * the end of the string is stripped off.
275 *
276 * @param string $path
277 * @return array
278 */
279function split($path) {
280
281    $matches = [];
282    if (preg_match('/^(?:(?:(.*)(?:\/+))?([^\/]+))(?:\/?)$/u', $path, $matches)) {
283        return [$matches[1], $matches[2]];
284    }
285    return [null,null];
286
287}
288
289/**
290 * This function is another implementation of parse_url, except this one is
291 * fully written in PHP.
292 *
293 * The reason is that the PHP bug team is not willing to admit that there are
294 * bugs in the parse_url implementation.
295 *
296 * This function is only called if the main parse method fails. It's pretty
297 * crude and probably slow, so the original parse_url is usually preferred.
298 *
299 * @param string $uri
300 * @return array
301 */
302function _parse_fallback($uri) {
303
304    // Normally a URI must be ASCII, however. However, often it's not and
305    // parse_url might corrupt these strings.
306    //
307    // For that reason we take any non-ascii characters from the uri and
308    // uriencode them first.
309    $uri = preg_replace_callback(
310        '/[^[:ascii:]]/u',
311        function($matches) {
312            return rawurlencode($matches[0]);
313        },
314        $uri
315    );
316
317    $result = [
318        'scheme'   => null,
319        'host'     => null,
320        'port'     => null,
321        'user'     => null,
322        'path'     => null,
323        'fragment' => null,
324        'query'    => null,
325    ];
326
327    if (preg_match('% ^([A-Za-z][A-Za-z0-9+-\.]+): %x', $uri, $matches)) {
328
329        $result['scheme'] = $matches[1];
330        // Take what's left.
331        $uri = substr($uri, strlen($result['scheme']) + 1);
332
333    }
334
335    // Taking off a fragment part
336    if (strpos($uri, '#') !== false) {
337        list($uri, $result['fragment']) = explode('#', $uri, 2);
338    }
339    // Taking off the query part
340    if (strpos($uri, '?') !== false) {
341        list($uri, $result['query']) = explode('?', $uri, 2);
342    }
343
344    if (substr($uri, 0, 3) === '///') {
345      // The triple slash uris are a bit unusual, but we have special handling
346      // for them.
347      $result['path'] = substr($uri, 2);
348      $result['host'] = '';
349    } elseif (substr($uri, 0, 2) === '//') {
350        // Uris that have an authority part.
351        $regex = '
352          %^
353            //
354            (?: (?<user> [^:@]+) (: (?<pass> [^@]+)) @)?
355            (?<host> ( [^:/]* | \[ [^\]]+ \] ))
356            (?: : (?<port> [0-9]+))?
357            (?<path> / .*)?
358          $%x
359        ';
360        if (!preg_match($regex, $uri, $matches)) {
361            throw new InvalidUriException('Invalid, or could not parse URI');
362        }
363        if ($matches['host']) $result['host'] = $matches['host'];
364        if ($matches['port']) $result['port'] = (int)$matches['port'];
365        if (isset($matches['path'])) $result['path'] = $matches['path'];
366        if ($matches['user']) $result['user'] = $matches['user'];
367        if ($matches['pass']) $result['pass'] = $matches['pass'];
368    } else {
369        $result['path'] = $uri;
370    }
371
372    return $result;
373}
374