1<?php
2/**
3 * Functions for dealing with OpenID trust roots
4 *
5 * PHP versions 4 and 5
6 *
7 * LICENSE: See the COPYING file included in this distribution.
8 *
9 * @package OpenID
10 * @author JanRain, Inc. <openid@janrain.com>
11 * @copyright 2005-2008 Janrain, Inc.
12 * @license http://www.apache.org/licenses/LICENSE-2.0 Apache
13 */
14
15require_once 'Auth/OpenID/Discover.php';
16
17/**
18 * A regular expression that matches a domain ending in a top-level domains.
19 * Used in checking trust roots for sanity.
20 *
21 * @access private
22 */
23define('Auth_OpenID___TLDs',
24       '/\.(ac|ad|ae|aero|af|ag|ai|al|am|an|ao|aq|ar|arpa|as|asia' .
25       '|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|biz|bj|bm|bn|bo|br' .
26       '|bs|bt|bv|bw|by|bz|ca|cat|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co' .
27       '|com|coop|cr|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|edu|ee|eg' .
28       '|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl' .
29       '|gm|gn|gov|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie' .
30       '|il|im|in|info|int|io|iq|ir|is|it|je|jm|jo|jobs|jp|ke|kg|kh' .
31       '|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly' .
32       '|ma|mc|md|me|mg|mh|mil|mk|ml|mm|mn|mo|mobi|mp|mq|mr|ms|mt' .
33       '|mu|museum|mv|mw|mx|my|mz|na|name|nc|ne|net|nf|ng|ni|nl|no' .
34       '|np|nr|nu|nz|om|org|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|pro|ps|pt' .
35       '|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl' .
36       '|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tel|tf|tg|th|tj|tk|tl|tm' .
37       '|tn|to|tp|tr|travel|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve' .
38       '|vg|vi|vn|vu|wf|ws|xn--0zwm56d|xn--11b5bs3a9aj6g' .
39       '|xn--80akhbyknj4f|xn--9t4b11yi5a|xn--deba0ad|xn--g6w251d' .
40       '|xn--hgbk6aj7f53bba|xn--hlcj6aya9esc7a|xn--jxalpdlp' .
41       '|xn--kgbechtv|xn--zckzah|ye|yt|yu|za|zm|zw)\.?$/');
42
43define('Auth_OpenID___HostSegmentRe',
44       "/^(?:[-a-zA-Z0-9!$&'\\(\\)\\*+,;=._~]|%[a-zA-Z0-9]{2})*$/");
45
46/**
47 * A wrapper for trust-root related functions
48 */
49class Auth_OpenID_TrustRoot {
50    /*
51     * Return a discovery URL for this realm.
52     *
53     * Return null if the realm could not be parsed or was not valid.
54     *
55     * @param return_to The relying party return URL of the OpenID
56     * authentication request
57     *
58     * @return The URL upon which relying party discovery should be
59     * run in order to verify the return_to URL
60     */
61    static function buildDiscoveryURL($realm)
62    {
63        $parsed = Auth_OpenID_TrustRoot::_parse($realm);
64
65        if ($parsed === false) {
66            return false;
67        }
68
69        if ($parsed['wildcard']) {
70            // Use "www." in place of the star
71            if ($parsed['host'][0] != '.') {
72                return false;
73            }
74
75            $www_domain = 'www' . $parsed['host'];
76
77            return sprintf('%s://%s%s', $parsed['scheme'],
78                           $www_domain, $parsed['path']);
79        } else {
80            return $parsed['unparsed'];
81        }
82    }
83
84    /**
85     * Parse a URL into its trust_root parts.
86     *
87     * @static
88     *
89     * @access private
90     *
91     * @param string $trust_root The url to parse
92     *
93     * @return mixed $parsed Either an associative array of trust root
94     * parts or false if parsing failed.
95     */
96    static function _parse($trust_root)
97    {
98        $trust_root = Auth_OpenID_urinorm($trust_root);
99        if ($trust_root === null) {
100            return false;
101        }
102
103        if (preg_match("/:\/\/[^:]+(:\d+){2,}(\/|$)/", $trust_root)) {
104            return false;
105        }
106
107        $parts = @parse_url($trust_root);
108        if ($parts === false) {
109            return false;
110        }
111
112        $required_parts = array('scheme', 'host');
113        $forbidden_parts = array('user', 'pass', 'fragment');
114        $keys = array_keys($parts);
115        if (array_intersect($keys, $required_parts) != $required_parts) {
116            return false;
117        }
118
119        if (array_intersect($keys, $forbidden_parts) != array()) {
120            return false;
121        }
122
123        if (!preg_match(Auth_OpenID___HostSegmentRe, $parts['host'])) {
124            return false;
125        }
126
127        $scheme = strtolower($parts['scheme']);
128        $allowed_schemes = array('http', 'https');
129        if (!in_array($scheme, $allowed_schemes)) {
130            return false;
131        }
132        $parts['scheme'] = $scheme;
133
134        $host = strtolower($parts['host']);
135        $hostparts = explode('*', $host);
136        switch (count($hostparts)) {
137        case 1:
138            $parts['wildcard'] = false;
139            break;
140        case 2:
141            if ($hostparts[0] ||
142                ($hostparts[1] && substr($hostparts[1], 0, 1) != '.')) {
143                return false;
144            }
145            $host = $hostparts[1];
146            $parts['wildcard'] = true;
147            break;
148        default:
149            return false;
150        }
151        if (strpos($host, ':') !== false) {
152            return false;
153        }
154
155        $parts['host'] = $host;
156
157        if (isset($parts['path'])) {
158            $path = strtolower($parts['path']);
159            if (substr($path, 0, 1) != '/') {
160                return false;
161            }
162        } else {
163            $path = '/';
164        }
165
166        $parts['path'] = $path;
167        if (!isset($parts['port'])) {
168            $parts['port'] = false;
169        }
170
171
172        $parts['unparsed'] = $trust_root;
173
174        return $parts;
175    }
176
177    /**
178     * Is this trust root sane?
179     *
180     * A trust root is sane if it is syntactically valid and it has a
181     * reasonable domain name. Specifically, the domain name must be
182     * more than one level below a standard TLD or more than two
183     * levels below a two-letter tld.
184     *
185     * For example, '*.com' is not a sane trust root, but '*.foo.com'
186     * is.  '*.co.uk' is not sane, but '*.bbc.co.uk' is.
187     *
188     * This check is not always correct, but it attempts to err on the
189     * side of marking sane trust roots insane instead of marking
190     * insane trust roots sane. For example, 'kink.fm' is marked as
191     * insane even though it "should" (for some meaning of should) be
192     * marked sane.
193     *
194     * This function should be used when creating OpenID servers to
195     * alert the users of the server when a consumer attempts to get
196     * the user to accept a suspicious trust root.
197     *
198     * @static
199     * @param string $trust_root The trust root to check
200     * @return bool $sanity Whether the trust root looks OK
201     */
202    static function isSane($trust_root)
203    {
204        $parts = Auth_OpenID_TrustRoot::_parse($trust_root);
205        if ($parts === false) {
206            return false;
207        }
208
209        // Localhost is a special case
210        if ($parts['host'] == 'localhost') {
211            return true;
212        }
213
214        $host_parts = explode('.', $parts['host']);
215        if ($parts['wildcard']) {
216            // Remove the empty string from the beginning of the array
217            array_shift($host_parts);
218        }
219
220        if ($host_parts && !$host_parts[count($host_parts) - 1]) {
221            array_pop($host_parts);
222        }
223
224        if (!$host_parts) {
225            return false;
226        }
227
228        // Don't allow adjacent dots
229        if (in_array('', $host_parts, true)) {
230            return false;
231        }
232
233        // Get the top-level domain of the host. If it is not a valid TLD,
234        // it's not sane.
235        preg_match(Auth_OpenID___TLDs, $parts['host'], $matches);
236        if (!$matches) {
237            return false;
238        }
239        $tld = $matches[1];
240
241        if (count($host_parts) == 1) {
242            return false;
243        }
244
245        if ($parts['wildcard']) {
246            // It's a 2-letter tld with a short second to last segment
247            // so there needs to be more than two segments specified
248            // (e.g. *.co.uk is insane)
249            $second_level = $host_parts[count($host_parts) - 2];
250            if (strlen($tld) == 2 && strlen($second_level) <= 3) {
251                return count($host_parts) > 2;
252            }
253        }
254
255        return true;
256    }
257
258    /**
259     * Does this URL match the given trust root?
260     *
261     * Return whether the URL falls under the given trust root. This
262     * does not check whether the trust root is sane. If the URL or
263     * trust root do not parse, this function will return false.
264     *
265     * @param string $trust_root The trust root to match against
266     *
267     * @param string $url The URL to check
268     *
269     * @return bool $matches Whether the URL matches against the
270     * trust root
271     */
272    static function match($trust_root, $url)
273    {
274        $trust_root_parsed = Auth_OpenID_TrustRoot::_parse($trust_root);
275        $url_parsed = Auth_OpenID_TrustRoot::_parse($url);
276        if (!$trust_root_parsed || !$url_parsed) {
277            return false;
278        }
279
280        // Check hosts matching
281        if ($url_parsed['wildcard']) {
282            return false;
283        }
284        if ($trust_root_parsed['wildcard']) {
285            $host_tail = $trust_root_parsed['host'];
286            $host = $url_parsed['host'];
287            if ($host_tail &&
288                substr($host, -(strlen($host_tail))) != $host_tail &&
289                substr($host_tail, 1) != $host) {
290                return false;
291            }
292        } else {
293            if ($trust_root_parsed['host'] != $url_parsed['host']) {
294                return false;
295            }
296        }
297
298        // Check path and query matching
299        $base_path = $trust_root_parsed['path'];
300        $path = $url_parsed['path'];
301        if (!isset($trust_root_parsed['query'])) {
302            if ($base_path != $path) {
303                if (substr($path, 0, strlen($base_path)) != $base_path) {
304                    return false;
305                }
306                if (substr($base_path, strlen($base_path) - 1, 1) != '/' &&
307                    substr($path, strlen($base_path), 1) != '/') {
308                    return false;
309                }
310            }
311        } else {
312            $base_query = $trust_root_parsed['query'];
313            $query = @$url_parsed['query'];
314            $qplus = substr($query, 0, strlen($base_query) + 1);
315            $bqplus = $base_query . '&';
316            if ($base_path != $path ||
317                ($base_query != $query && $qplus != $bqplus)) {
318                return false;
319            }
320        }
321
322        // The port and scheme need to match exactly
323        return ($trust_root_parsed['scheme'] == $url_parsed['scheme'] &&
324                $url_parsed['port'] === $trust_root_parsed['port']);
325    }
326}
327
328/*
329 * If the endpoint is a relying party OpenID return_to endpoint,
330 * return the endpoint URL. Otherwise, return None.
331 *
332 * This function is intended to be used as a filter for the Yadis
333 * filtering interface.
334 *
335 * @see: C{L{openid.yadis.services}}
336 * @see: C{L{openid.yadis.filters}}
337 *
338 * @param endpoint: An XRDS BasicServiceEndpoint, as returned by
339 * performing Yadis dicovery.
340 *
341 * @returns: The endpoint URL or None if the endpoint is not a
342 * relying party endpoint.
343 */
344function filter_extractReturnURL($endpoint)
345{
346    if ($endpoint->matchTypes(array(Auth_OpenID_RP_RETURN_TO_URL_TYPE))) {
347        return $endpoint;
348    } else {
349        return null;
350    }
351}
352
353function &Auth_OpenID_extractReturnURL(&$endpoint_list)
354{
355    $result = array();
356
357    foreach ($endpoint_list as $endpoint) {
358        if (filter_extractReturnURL($endpoint)) {
359            $result[] = $endpoint;
360        }
361    }
362
363    return $result;
364}
365
366/*
367 * Is the return_to URL under one of the supplied allowed return_to
368 * URLs?
369 */
370function Auth_OpenID_returnToMatches($allowed_return_to_urls, $return_to)
371{
372    foreach ($allowed_return_to_urls as $allowed_return_to) {
373        // A return_to pattern works the same as a realm, except that
374        // it's not allowed to use a wildcard. We'll model this by
375        // parsing it as a realm, and not trying to match it if it has
376        // a wildcard.
377
378        $return_realm = Auth_OpenID_TrustRoot::_parse($allowed_return_to);
379        if (// Parses as a trust root
380            ($return_realm !== false) &&
381            // Does not have a wildcard
382            (!$return_realm['wildcard']) &&
383            // Matches the return_to that we passed in with it
384            (Auth_OpenID_TrustRoot::match($allowed_return_to, $return_to))) {
385            return true;
386        }
387    }
388
389    // No URL in the list matched
390    return false;
391}
392
393/*
394 * Given a relying party discovery URL return a list of return_to
395 * URLs.
396 */
397function Auth_OpenID_getAllowedReturnURLs($relying_party_url, $fetcher,
398              $discover_function=null)
399{
400    if ($discover_function === null) {
401        $discover_function = array('Auth_Yadis_Yadis', 'discover');
402    }
403
404    $xrds_parse_cb = array('Auth_OpenID_ServiceEndpoint', 'consumerFromXRDS');
405
406    list($rp_url_after_redirects, $endpoints) =
407        Auth_Yadis_getServiceEndpoints($relying_party_url, $xrds_parse_cb,
408                                       $discover_function, $fetcher);
409
410    if ($rp_url_after_redirects != $relying_party_url) {
411        // Verification caused a redirect
412        return false;
413    }
414
415    call_user_func_array($discover_function,
416                         array($relying_party_url, &$fetcher));
417
418    $return_to_urls = array();
419    $matching_endpoints = Auth_OpenID_extractReturnURL($endpoints);
420
421    foreach ($matching_endpoints as $e) {
422        $return_to_urls[] = $e->server_url;
423    }
424
425    return $return_to_urls;
426}
427
428/*
429 * Verify that a return_to URL is valid for the given realm.
430 *
431 * This function builds a discovery URL, performs Yadis discovery on
432 * it, makes sure that the URL does not redirect, parses out the
433 * return_to URLs, and finally checks to see if the current return_to
434 * URL matches the return_to.
435 *
436 * @return true if the return_to URL is valid for the realm
437 */
438function Auth_OpenID_verifyReturnTo($realm_str, $return_to, $fetcher,
439              $_vrfy='Auth_OpenID_getAllowedReturnURLs')
440{
441    $disco_url = Auth_OpenID_TrustRoot::buildDiscoveryURL($realm_str);
442
443    if ($disco_url === false) {
444        return false;
445    }
446
447    $allowable_urls = call_user_func_array($_vrfy,
448                           array($disco_url, $fetcher));
449
450    // The realm_str could not be parsed.
451    if ($allowable_urls === false) {
452        return false;
453    }
454
455    if (Auth_OpenID_returnToMatches($allowable_urls, $return_to)) {
456        return true;
457    } else {
458        return false;
459    }
460}
461
462