'80', 'https' => '443', ]; if (!empty($parts['port']) && isset($defaultPorts[$parts['scheme']]) && $defaultPorts[$parts['scheme']] == $parts['port']) { // Removing default ports. unset($parts['port']); } // A few HTTP specific rules. switch ($parts['scheme']) { case 'http' : case 'https' : if (empty($parts['path'])) { // An empty path is equivalent to / in http. $parts['path'] = '/'; } break; } } if ($parts['host']) $parts['host'] = strtolower($parts['host']); return build($parts); } /** * Parses a URI and returns its individual components. * * This method largely behaves the same as PHP's parse_url, except that it will * return an array with all the array keys, including the ones that are not * set by parse_url, which makes it a bit easier to work with. * * Unlike PHP's parse_url, it will also convert any non-ascii characters to * percent-encoded strings. PHP's parse_url corrupts these characters on OS X. * * @param string $uri * @return array */ function parse($uri) { // Normally a URI must be ASCII, however. However, often it's not and // parse_url might corrupt these strings. // // For that reason we take any non-ascii characters from the uri and // uriencode them first. $uri = preg_replace_callback( '/[^[:ascii:]]/u', function($matches) { return rawurlencode($matches[0]); }, $uri ); $result = parse_url($uri); if (!$result) { $result = _parse_fallback($uri); } return $result + [ 'scheme' => null, 'host' => null, 'path' => null, 'port' => null, 'user' => null, 'query' => null, 'fragment' => null, ]; } /** * This function takes the components returned from PHP's parse_url, and uses * it to generate a new uri. * * @param array $parts * @return string */ function build(array $parts) { $uri = ''; $authority = ''; if (!empty($parts['host'])) { $authority = $parts['host']; if (!empty($parts['user'])) { $authority = $parts['user'] . '@' . $authority; } if (!empty($parts['port'])) { $authority = $authority . ':' . $parts['port']; } } if (!empty($parts['scheme'])) { // If there's a scheme, there's also a host. $uri = $parts['scheme'] . ':'; } if ($authority || (!empty($parts['scheme']) && $parts['scheme'] === 'file')) { // No scheme, but there is a host. $uri .= '//' . $authority; } if (!empty($parts['path'])) { $uri .= $parts['path']; } if (!empty($parts['query'])) { $uri .= '?' . $parts['query']; } if (!empty($parts['fragment'])) { $uri .= '#' . $parts['fragment']; } return $uri; } /** * Returns the 'dirname' and 'basename' for a path. * * The reason there is a custom function for this purpose, is because * basename() is locale aware (behaviour changes if C locale or a UTF-8 locale * is used) and we need a method that just operates on UTF-8 characters. * * In addition basename and dirname are platform aware, and will treat * backslash (\) as a directory separator on windows. * * This method returns the 2 components as an array. * * If there is no dirname, it will return an empty string. Any / appearing at * the end of the string is stripped off. * * @param string $path * @return array */ function split($path) { $matches = []; if (preg_match('/^(?:(?:(.*)(?:\/+))?([^\/]+))(?:\/?)$/u', $path, $matches)) { return [$matches[1], $matches[2]]; } return [null,null]; } /** * This function is another implementation of parse_url, except this one is * fully written in PHP. * * The reason is that the PHP bug team is not willing to admit that there are * bugs in the parse_url implementation. * * This function is only called if the main parse method fails. It's pretty * crude and probably slow, so the original parse_url is usually preferred. * * @param string $uri * @return array */ function _parse_fallback($uri) { // Normally a URI must be ASCII, however. However, often it's not and // parse_url might corrupt these strings. // // For that reason we take any non-ascii characters from the uri and // uriencode them first. $uri = preg_replace_callback( '/[^[:ascii:]]/u', function($matches) { return rawurlencode($matches[0]); }, $uri ); $result = [ 'scheme' => null, 'host' => null, 'port' => null, 'user' => null, 'path' => null, 'fragment' => null, 'query' => null, ]; if (preg_match('% ^([A-Za-z][A-Za-z0-9+-\.]+): %x', $uri, $matches)) { $result['scheme'] = $matches[1]; // Take what's left. $uri = substr($uri, strlen($result['scheme']) + 1); } // Taking off a fragment part if (strpos($uri, '#') !== false) { list($uri, $result['fragment']) = explode('#', $uri, 2); } // Taking off the query part if (strpos($uri, '?') !== false) { list($uri, $result['query']) = explode('?', $uri, 2); } if (substr($uri, 0, 3) === '///') { // The triple slash uris are a bit unusual, but we have special handling // for them. $result['path'] = substr($uri, 2); $result['host'] = ''; } elseif (substr($uri, 0, 2) === '//') { // Uris that have an authority part. $regex = ' %^ // (?: (? [^:@]+) (: (? [^@]+)) @)? (? ( [^:/]* | \[ [^\]]+ \] )) (?: : (? [0-9]+))? (? / .*)? $%x '; if (!preg_match($regex, $uri, $matches)) { throw new InvalidUriException('Invalid, or could not parse URI'); } if ($matches['host']) $result['host'] = $matches['host']; if ($matches['port']) $result['port'] = (int)$matches['port']; if (isset($matches['path'])) $result['path'] = $matches['path']; if ($matches['user']) $result['user'] = $matches['user']; if ($matches['pass']) $result['pass'] = $matches['pass']; } else { $result['path'] = $uri; } return $result; }