1<?php
2/**
3 *  OEMBED PLUGIN
4 *
5 *  Version history
6 *    2008-07-31 - release v0.6 by Dwayne Bent <dbb.pub0@liqd.org>
7 *    2019-09-01 - resuscitation & realignment with "Greebo"
8 *
9 *  Licensed under the GPL 2 [http://www.gnu.org/licenses/gpl.html]
10 *
11 **/
12
13if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/');
14if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/');
15define('OEMBED_BASE',DOKU_PLUGIN.'oembed/');
16require_once(DOKU_PLUGIN.'syntax.php');
17require_once(DOKU_INC.'inc/HTTP/HTTPClient.php');
18
19class syntax_plugin_oembed extends DokuWiki_Syntax_Plugin {
20    var $errors       = array();
21    var $version      = '1.0';
22    var $regex_master = '/^{{>\s*(?<url>.+?)(?:\s+(?<params>.+?))??\s*}}$/';
23
24    function getType(){
25        return 'substition';
26    }
27
28    function getAllowedTypes() {
29        return array();
30    }
31
32    function getPType(){
33        return 'block';
34    }
35
36    function getSort(){
37        return 285;
38    }
39
40    function connectTo($mode) {
41        $this->Lexer->addSpecialPattern('{{>.+?}}', $mode, 'plugin_oembed');
42    }
43
44    function handle($match, $state, $pos, Doku_Handler $handler){
45        if($state == DOKU_LEXER_SPECIAL){
46            if($parsed_tag = $this->parseTag($match)){
47                $oembed_data = $this->resolve($parsed_tag);
48                return array('oembed_data' => $oembed_data,
49                             'tag'         => $parsed_tag,
50                             'errors'      => $this->errors);
51            }
52        }
53
54        return false;
55    }
56
57    function render($mode, Doku_Renderer $renderer, $data) {
58        if($mode == 'xhtml'){
59            $renderer->doc .= $this->renderXHTML($data);
60        }
61
62        return false;
63    }
64
65    /***************************************************************************
66     * PARSE FUNCTIONS
67     *     Convert input strings to a usable form
68     **************************************************************************/
69
70    /*
71     * Parse the entire matched string
72     *
73     * $tag: The entire matched string
74     *
75     * returns:
76     *     false on error otherwise
77     *     array of parsed data:
78     *         url: the target url
79     *         params: array of parsed parameters, see parseParams()
80     */
81    function parseTag($tag){
82        if(preg_match($this->regex_master, $tag, $matches)){
83            return array('url'    => $matches['url'],
84                         'params' => $this->parseParams($matches['params']));
85        }
86
87        return false;
88    }
89
90    /*
91     * Parse the tag parameters
92     *
93     * $params: whitespace delimited list of parameters (no trailing or leading
94     *          whitespace)
95     *
96     * returns:
97     *     array of parsed parameters:
98     *         provider: array of provider parameters:
99     *             name => value
100     *         plugin: array of plugin parameters
101     *             name => value
102     */
103    function parseParams($params){
104        $parsed_params = array('provider' => array(), 'plugin' => array());
105
106        if($params != null){
107            foreach(preg_split('/\s+/', $params) as $param){
108                if(preg_match('/^(?<type>!|\?)(?<name>\S+?)(?:=(?<value>\S+?))?$/', $param, $matches)){
109                    if($matches['type'] == '?'){
110                        $parsed_params['provider'][$matches['name']] = $matches['value'];
111                    }
112                    else if($matches['type'] == '!'){
113                        $parsed_params['plugin'][$matches['name']] = $matches['value'];
114                    }
115                }
116            }
117        }
118
119        return $parsed_params;
120    }
121
122    /*
123     * Parse an HTTP response containing OEmbed data
124     *
125     * $response: array of HTTP response data
126     *     status: numerical HTTP status code
127     *     headers: array of HTTP headers
128     *         name => value
129     *     body: body of the response
130     *
131     * returns: false on error or array of parsed oembed data:
132     *         name => value
133     */
134    function parseResponse($response){
135        if($response['status'] != 200) return $this->error("Provider returned HTTP Status {$response['status']} for {$tag['url']}");
136        if(!$type = $this->parseContentType($response['headers']['content-type'])) return false;
137
138        $oembed = array();
139
140        switch($type){
141            case 'xml':
142                if(!$xml = simplexml_load_string($response['body'])) return $this->error("Unable to parse XML: {$response['body']}");
143
144                foreach($xml as $element){
145                    $oembed[$element->getName()] = (string) $element;
146                }
147
148                break;
149            case 'json':
150                $oembed = json_decode($response['body']);
151
152                break;
153            default:
154                return $this->error("Internal error occured. Found type: {$type}");
155        }
156
157        //if($oembed['version'] != '1.0') return $this->error("Unsupported OEmbed version: {$oembed['version']}");
158        return $oembed;
159    }
160
161    /*
162     * Parse a content-type string from an HTTP header
163     *
164     * $header: The content-type string
165     *
166     * returns: false on error or 'json' for JSON content or 'xml' for XML content
167     */
168    function parseContentType($header){
169        if(!preg_match('/^\s*(?<type>[^;\s]+)(.*)?/', $header, $matches)){
170            return $this->error("Invalid Content-Type header: {$header}");
171        }
172
173        switch($matches['type']){
174            case 'text/xml':
175                return 'xml';
176            case 'application/json':
177                return 'json';
178            // non-spec content-types, only supported for compatibility
179            case 'application/xml':
180                return 'xml';
181            case 'text/json':
182                return 'json';
183            case 'text/plain':
184                return 'json';
185            default:
186                return $this->error("Unsupported Content-Type: {$matches['type']}");
187        }
188    }
189
190    /***************************************************************************
191     * RESOLVE FUNCTIONS
192     *     Given parsed tag data get OEmbed data
193     **************************************************************************/
194
195    /*
196     * Given parsed tag information, return OEmbed data
197     *
198     * $tag: Parsed tag information, as from parseTag()
199     *
200     * returns: false on error or array of OEmbed data
201     *     oembed: array of OEmbed data as returned from provider
202     *     query_url: URL used to get the OEmbed data
203     *     target_url: URL to which the OEmbed data refers
204     */
205    function resolve($tag){
206
207        // try to resolve using cache
208        if($data = $this->resolveCache($tag)) return $data;
209
210        // try to resolve directly
211        if(array_key_exists('direct', $tag['params']['plugin'])){
212            if($this->getConf('enable_direct_link')){
213                return $this->resolveDirect($tag);
214            }
215        }
216
217        if($this->getConf('resolution_priority') == 'link discovery'){
218            // try link discovery
219            if($this->getConf('enable_link_discovery')){
220                if($data = $this->resolveDiscovery($tag)) return $data;
221            }
222
223            // try local provider list
224            if($this->getConf('enable_provider_list')){
225                if($data = $this->resolveProviderList($tag)) return $data;
226            }
227        }
228        else if($this->getConf('resolution_priority') == 'provider list'){
229            // try local provider list
230            if($this->getConf('enable_provider_list')){
231                if($data = $this->resolveProviderList($tag)) return $data;
232            }
233
234            // try link discovery
235            if($this->getConf('enable_link_discovery')){
236                if($data = $this->resolveDiscovery($tag)) return $data;
237            }
238        }
239        return $this->error("All resolution methods failed");
240    }
241
242    /*
243     * Analogous to resolve(), using the cache for resolution
244     */
245    function resolveCache($tag){
246        return false;
247    }
248
249    /*
250     * Analogous to resolve(), using a directly entered API endpoint for
251     * resolution
252     */
253    function resolveDirect($tag){
254        $query_url = $this->buildURL($tag['url'], $tag['params']['provider']);
255        if(!$response = $this->fetch($query_url)) return false;
256        if(!$oembed = $this->parseResponse($response)) return false;
257
258        return array('oembed'     => $oembed,
259                     'query_url'  => $query_url,
260                     'target_url' => $tag['params']['provider']['url']);
261    }
262
263    /*
264     * Analogous to resolve(), using link discovery for resolution
265     */
266    function resolveDiscovery($tag){
267
268        if(!$response = $this->fetch($tag['url'])) return false;
269        if(!$link_url = $this->getOEmbedLink($response['body'])) return false;
270
271        $query_url = $this->buildURL($link_url, $tag['params']['provider']);
272
273        if(!$response = $this->fetch($query_url)) return false;
274        if(!$oembed = $this->parseResponse($response)) return false;
275
276
277
278        return array('oembed'     => $oembed,
279                     'query_url'  => $query_url,
280                     'target_url' => $tag['url']);
281    }
282
283    /*
284     * Analogous to resolve(), using the local provider list for resolution
285     */
286    function resolveProviderList($tag){
287        if(!$api = $this->getProviderAPI($tag['url'])) return false;
288
289        $api = str_replace("{format}", $this->getConf('format_preference'), $api);
290        $params = array_merge($tag['params']['provider'], array('url' => $tag['url']));
291        $query_url = $this->buildURL($api, $params);
292
293        if(!$response = $this->fetch($query_url)) return false;
294        if(!$oembed = $this->parseResponse($response)) return false;
295
296        return array('oembed'     => $oembed,
297                     'query_url'  => $query_url,
298                     'target_url' => $tag['url']);
299    }
300
301    /***************************************************************************
302     * RENDER FUNCTIONS
303     *     Convert OEmbed data to a presentable form
304     **************************************************************************/
305
306    /*
307     * Given OEmbed data as returned by resolve(), produces a valid XHTML
308     * representation
309     *
310     * $data: OEmbed data as returned by resolve()
311     *
312     * returns: XHTML representation of OEmbed data
313     */
314    function renderXHTML($data){
315        $content = '';
316
317        if(!$data['oembed_data']){
318            $content .= "OEmbed Error";
319            $content .= "<ul>";
320            foreach($data['errors'] as $error){
321                $content .= "<li>".$error."</li>";
322            }
323            $content .= "</ul>";
324
325            return $content;
326        }
327
328        $oembed = $this->sanitizeOEmbed($data['oembed_data']['oembed']);
329
330        if(array_key_exists('thumbnail', $data['tag']['params']['plugin'])){
331            if($oembed['thumbnail_url']){
332                $img = '<img src="'.$oembed['thumbnail_url'].'" alt="'.$oembed['title'].'" title="'.$oembed['title'].'" height="'.$oembed['thumbnail_height'].'px" width="'.$oembed['thumbnail_width'].'px"/>';
333                $content = '<a href="'.$data['oembed_data']['target_url'].'">'.$img.'</a>';
334            }
335            else{
336                $content = $this->renderXHTMLLink($data);
337            }
338        }
339        else{
340            switch($oembed['type']){
341                case 'photo':
342                    if($this->getConf('fullwidth_images')){
343                        $content = '<img src="'.$oembed['url'].'" alt="'.$oembed['title'].'" title="'.$oembed['title'].'" width=100% />';
344                    } else {
345                        $content = '<img src="'.$oembed['url'].'" alt="'.$oembed['title'].'" title="'.$oembed['title'].'" height="'.$oembed['height'].'px" width="'.$oembed['width'].'px"/>';
346                    }
347                    break;
348                case 'video':
349                    $content = $oembed['html'];
350                    break;
351                case 'link':
352                    $content = $this->renderXHTMLLink($data);
353                    break;
354                case 'rich':
355                    $content = $oembed['html'];
356                    break;
357                default:
358                    $content = "OEmbed Error <ul><li>Unsupported media type: {$oembed['type']}</li></ul>";
359            }
360        }
361
362        return $content;
363    }
364
365    /*
366     * Given OEmbed data as returned by resolve(), produces a valid XHTML
367     * representation as a simple link
368     *
369     * $data: OEmbed data as returned by resolve()
370     *
371     * returns: XHTML representation of OEmbed data as a simple link
372     */
373    function renderXHTMLLink($data){
374        $text .= ($data['oembed_data']['oembed']['provider_name'] != null) ? $data['oembed_data']['oembed']['provider_name'].': ' : '';
375        $text .= $data['oembed_data']['oembed']['title'];
376        $text .= ($data['oembed_data']['oembed']['author_name'] != null) ? ' &ndash; '.$data['oembed_data']['oembed']['author_name'] : '';
377        return '<a class="urlextern" href="'.$data['oembed_data']['target_url'].'">'.$text.'</a>';
378    }
379
380    /***************************************************************************
381     * UTILITY FUNCTIONS
382     *     Provides shared functionality
383     **************************************************************************/
384
385    /*
386     * Stores a message in the errors array and returns false
387     *
388     * $msg: message to store
389     *
390     * returns: false
391     */
392    function error($msg){
393        array_push($this->errors, $msg);
394        return false;
395    }
396
397    /*
398     * Performs an HTTP GET request on the given URL
399     *
400     * $url: URL to perform the request on
401     *
402     * returns: false on error or array representing the HTTP response
403     *     status: numerical HTTP status code
404     *     headers: array of HTTP headers
405     *         name => value
406     *     body: HTTP response body
407     */
408    function fetch($url){
409        $client = new DokuHTTPClient();
410        if(!$client->sendRequest($url)){
411            return $this->error("Error sending request to provider: {$url}");
412        }
413
414        return array('status'  => $client->status,
415                     'headers' => $client->resp_headers,
416                     'body'    => $client->resp_body);
417    }
418
419    /*
420     * Given a base URL, create a new URL using the given parameters. Query
421     * values are URL encoded.
422     *
423     * $base: base URL, any existing parameter values should be URL encoded.
424     * $params: array of parameters to add to URL
425     *     name => value
426     *
427     * returns: the new URL
428     */
429    function buildURL($base, $params){
430        $url = $base;
431
432        $first = strpos($base,"?") === false ? true : false;
433        foreach($params as $name => $value){
434            if($first){ $url .= "?"; $first = false; }
435            else { $url .= "&"; }
436
437            $url .= $name."=".rawurlencode($value);
438        }
439
440        return $url;
441    }
442
443    /*
444     * Given raw HTML, tries to extract oembed discovery link
445     *
446     * Based on code by Keith Devens:
447     * http://keithdevens.com/weblog/archive/2002/Jun/03/RSSAuto-DiscoveryPHP
448     *
449     * Parameters:
450     *   $html: raw HTML
451     *
452     * Returns: false on error or no link present or an OEmbed discovery link
453     */
454    function getOEmbedLink($html){
455        $ret_link = false;
456
457        if(!$html) return false;
458
459        // search through the HTML, save all <link> tags
460        // and store each link's attributes in an associative array
461        preg_match_all('/<link\s+(.*?)\s*\/?>/si', $html, $matches);
462        $links = $matches[1];
463        $final_links = array();
464        $link_count = count($links);
465        for($n=0; $n<$link_count; $n++){
466            $attributes = preg_split('/\s+/s', $links[$n]);
467            foreach($attributes as $attribute){
468                $att = preg_split('/\s*=\s*/s', $attribute, 2);
469                if(isset($att[1])){
470                    $att[1] = preg_replace('/([\'"]?)(.*)\1/', '$2', $att[1]);
471                    $final_link[strtolower($att[0])] = $att[1];
472                }
473            }
474            $final_links[$n] = $final_link;
475        }
476
477        // now figure out which one points to the OEmbed data
478        for($n=0; $n<$link_count; $n++){
479            if(strtolower($final_links[$n]['rel']) == 'alternate'){
480                if(strtolower($final_links[$n]['type']) == 'application/json+oembed'){
481                    if($this->getConf('format_preference') == 'json'){
482                        return $final_links[$n]['href'];
483                    }
484                    else{
485                        $ret_link = $final_links[$n]['href'];
486                    }
487                }
488
489                // application/xml+oembed only exists for compatability not in spec
490                if(strtolower($final_links[$n]['type']) == 'text/xml+oembed' or
491                   strtolower($final_links[$n]['type']) == 'application/xml+oembed'){
492                    if($this->getConf('format_preference') == 'xml'){
493                        return $final_links[$n]['href'];
494                    }
495                    else{
496                        $ret_link = $final_links[$n]['href'];
497                    }
498                }
499            }
500        }
501
502        return $ret_link;
503    }
504
505    /*
506     * Given a URL, finds a OEmbed provider API endpoint which can be used with
507     * it from the local provider list.
508     *
509     * $url: URL to search a provider for
510     *
511     * Returns: false on error or no provider find or the API endpoint of an
512     *          appropriate provider
513     */
514    function getProviderAPI($url){
515        $providers_path = OEMBED_BASE.'providers.xml';
516        if(!$providers = simplexml_load_file($providers_path)) return false;
517
518        foreach($providers->provider as $provider){
519            foreach($provider->scheme as $scheme){
520                $regex = "@^".str_replace("@","\@",$scheme)."$@i";
521                if(preg_match($regex, trim($url))){
522                    $attrs = $provider->attributes();
523                    if(($api = $attrs['api']) != null){
524                        return $api;
525                    }
526                }
527            }
528        }
529
530        return false;
531    }
532
533    /*
534     * Runs htmlspecialchars() on values in OEmbed data EXCEPT for html values
535     *
536     * $oembed: array of OEmbed data from parseResponse()
537     *
538     * Returns: identical array to $oembed in which all values except for html
539     *          are run through htmlspecialchars()
540     */
541    function sanitizeOEmbed($oembed){
542        $retarray = array();
543
544        foreach($oembed as $key => $value){
545            if($key == 'html'){
546                $retarray[$key] = $value;
547            }
548            else{
549                $retarray[$key] = htmlspecialchars($value);
550            }
551        }
552
553        return $retarray;
554    }
555
556    /***************************************************************************
557     * DEBUG FUNCTIONS
558     *     For testing and devlopment, not regularly used
559     **************************************************************************/
560
561    function _log($msg){
562        $fh = fopen(OEMBED_BASE."oembed.log",'a');
563        $curtime = date('Y-m-d H:i:s');
564        fwrite($fh, "[{$curtime}] {$msg}\n");
565        fclose($fh);
566    }
567
568    function _logParsedTag($parsed_tag){
569        $this->_log("Parsed Tag");
570        $this->_log("    URL: {$parsed_tag['url']}");
571        $this->_log("    Provider Params:");
572        foreach($parsed_tag['params']['provider'] as $key => $value){
573            $this->_log("        {$key} => {$value}");
574        }
575        $this->_log("    Plugin Params:");
576        foreach($parsed_tag['params']['plugin'] as $key => $value){
577            $this->_log("        {$key} => {$value}");
578        }
579    }
580
581    function _logOEmbedData($oembed){
582        $this->_log("OEmbed Data:");
583        $this->_log("    target_url: {$oembed['target_url']}");
584        $this->_log("    query_url: {$oembed['query_url']}");
585        $this->_log("    Response:");
586        foreach($oembed['oembed'] as $name => $value){
587            $this->_log("        {$name}: {$value}");
588        }
589    }
590
591    function _logErrors($errors){
592        $this->_log("Errors:");
593        foreach($errors as $error){
594            $this->_log("    {$error}");
595        }
596    }
597}
598
599