1<?php
2
3use dokuwiki\Extension\SyntaxPlugin;
4use dokuwiki\HTTP\DokuHTTPClient;
5
6/**
7 * DokuWiki Plugin oembed (Syntax Component)
8 *
9 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
10 * @author nik gaffney <nik@fo.am>
11 *
12 *  Version history
13 *   - 2008-07-31 - release v0.6 by Dwayne Bent <dbb.pub0@liqd.org>
14 *   - 2019-09-01 - resuscitation & realignment with "Greebo"
15 *   - 2025-05-25 - rewrite for "Kaos" & "Librarian" compatibilty
16 *
17 */
18
19// path on server filesystem
20
21if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/');
22if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/');
23define('OEMBED_BASE',DOKU_PLUGIN.'oembed/');
24
25class syntax_plugin_oembed extends SyntaxPlugin
26{
27    var $errors       = array();
28    var $version      = '1.0';
29    var $regex_master = '/^{{>\s*(?<url>.+?)(?:\s+(?<params>.+?))??\s*}}$/';
30
31    /** @inheritDoc */
32    public function getType()
33    {
34        return 'substition';
35    }
36
37    /** @inheritDoc */
38    public function getPType()
39    {
40        return 'block';
41    }
42
43    /** @inheritDoc */
44    public function getSort()
45    {
46        return 285;
47    }
48
49    /** @inheritDoc */
50    public function connectTo($mode)
51    {
52        $this->Lexer->addSpecialPattern('{{>.+?}}', $mode, 'plugin_oembed');
53    }
54
55    //    /** @inheritDoc */
56    //    public function postConnect()
57    //    {
58    //        $this->Lexer->addExitPattern('</FIXME>', 'plugin_oembed');
59    //    }
60
61    /** @inheritDoc */
62    public function handle($match, $state, $pos, Doku_Handler $handler)
63    {
64        if($state == DOKU_LEXER_SPECIAL){
65            if($parsed_tag = $this->parseTag($match)){
66                $oembed_data = $this->resolve($parsed_tag);
67                return array('oembed_data' => $oembed_data,
68                             'tag'         => $parsed_tag,
69                             'errors'      => $this->errors);
70            }
71        }
72        return false;
73    }
74
75    /** @inheritDoc */
76    public function render($mode, Doku_Renderer $renderer, $data)
77    {
78        if ($mode !== 'xhtml') {
79            return false;
80        }
81        $renderer->doc .= $this->renderXHTML($data);
82    }
83
84    /***************************************************************************
85     * PARSE FUNCTIONS
86     *     Convert input strings to a usable form
87     **************************************************************************/
88
89    /*
90     * Parse the entire matched string
91     *
92     * $tag: The entire matched string
93     *
94     * returns:
95     *     false on error otherwise
96     *     array of parsed data:
97     *         url: the target url
98     *         params: array of parsed parameters, see parseParams()
99     */
100    function parseTag($tag){
101        if(preg_match($this->regex_master, $tag, $matches)){
102            return array('url'    => $matches['url'],
103                         'params' => $this->parseParams($matches['params']));
104        }
105
106        return false;
107    }
108
109    /*
110     * Parse the tag parameters
111     *
112     * $params: whitespace delimited list of parameters (no trailing or leading
113     *          whitespace)
114     *
115     * returns:
116     *     array of parsed parameters:
117     *         provider: array of provider parameters:
118     *             name => value
119     *         plugin: array of plugin parameters
120     *             name => value
121     */
122    function parseParams($params){
123        $parsed_params = array('provider' => array(), 'plugin' => array());
124
125        if($params != null){
126            foreach(preg_split('/\s+/', $params) as $param){
127                if(preg_match('/^(?<type>!|\?)(?<name>\S+?)(?:=(?<value>\S+?))?$/', $param, $matches)){
128                    if($matches['type'] == '?'){
129                        $parsed_params['provider'][$matches['name']] = $matches['value'];
130                    }
131                    else if($matches['type'] == '!'){
132                        $parsed_params['plugin'][$matches['name']] = $matches['value'];
133                    }
134                }
135            }
136        }
137
138        return $parsed_params;
139    }
140
141    /*
142     * Parse an HTTP response containing OEmbed data
143     *
144     * $response: array of HTTP response data
145     *     status: numerical HTTP status code
146     *     headers: array of HTTP headers
147     *         name => value
148     *     body: body of the response
149     *
150     * returns: false on error or array of parsed oembed data:
151     *         name => value
152     */
153    function parseResponse($response){
154        if($response['status'] != 200) return $this->error("Provider returned HTTP Status {$response['status']} for {$tag['url']}");
155        if(!$type = $this->parseContentType($response['headers']['content-type'])) return false;
156
157        $oembed = array();
158
159        switch($type){
160        case 'xml':
161            if(!$xml = simplexml_load_string($response['body'])) return $this->error("Unable to parse XML: {$response['body']}");
162
163            foreach($xml as $element){
164                $oembed[$element->getName()] = (string) $element;
165            }
166
167            break;
168        case 'json':
169            $oembed = json_decode($response['body']);
170
171            break;
172        default:
173            return $this->error("Internal error occured. Found type: {$type}");
174        }
175
176        //if($oembed['version'] != '1.0') return $this->error("Unsupported OEmbed version: {$oembed['version']}");
177        return $oembed;
178    }
179
180    /*
181     * Parse a content-type string from an HTTP header
182     *
183     * $header: The content-type string
184     *
185     * returns: false on error or 'json' for JSON content or 'xml' for XML content
186     */
187    function parseContentType($header){
188        if(!preg_match('/^\s*(?<type>[^;\s]+)(.*)?/', $header, $matches)){
189            return $this->error("Invalid Content-Type header: {$header}");
190        }
191
192        switch($matches['type']){
193        case 'text/xml':
194            return 'xml';
195        case 'application/json':
196            return 'json';
197            // non-spec content-types, only supported for compatibility
198        case 'application/xml':
199            return 'xml';
200        case 'text/json':
201            return 'json';
202        case 'text/plain':
203            return 'json';
204        default:
205            return $this->error("Unsupported Content-Type: {$matches['type']}");
206        }
207    }
208
209    /*
210     * Given parsed tag information, return OEmbed data
211     *
212     * $tag: Parsed tag information, as from parseTag()
213     *
214     * returns: false on error or array of OEmbed data
215     *     oembed: array of OEmbed data as returned from provider
216     *     query_url: URL used to get the OEmbed data
217     *     target_url: URL to which the OEmbed data refers
218     */
219    function resolve($tag){
220
221        // try to resolve using cache
222        if($data = $this->resolveCache($tag)) return $data;
223
224        // try to resolve directly
225        if(array_key_exists('direct', $tag['params']['plugin'])){
226            if($this->getConf('enable_direct_link')){
227                return $this->resolveDirect($tag);
228            }
229        }
230
231        if($this->getConf('resolution_priority') == 'link discovery'){
232            // try link discovery
233            if($this->getConf('enable_link_discovery')){
234                if($data = $this->resolveDiscovery($tag)) return $data;
235            }
236            // try local provider list
237            if($this->getConf('enable_provider_list')){
238                if($data = $this->resolveProviderList($tag)) return $data;
239            }
240        }
241        else if($this->getConf('resolution_priority') == 'provider list'){
242            // try local provider list
243            if($this->getConf('enable_provider_list')){
244                if($data = $this->resolveProviderList($tag)) return $data;
245            }
246            // try link discovery
247            if($this->getConf('enable_link_discovery')){
248                if($data = $this->resolveDiscovery($tag)) return $data;
249            }
250        }
251        return $this->error("All resolution methods failed");
252    }
253
254    /*
255     * Analogous to resolve(), using the cache for resolution
256     */
257    function resolveCache($tag){
258        return false;
259    }
260
261    /*
262     * Analogous to resolve(), using a directly entered API endpoint for
263     * resolution
264     */
265    function resolveDirect($tag){
266        $query_url = $this->buildURL($tag['url'], $tag['params']['provider']);
267        if(!$response = $this->fetch($query_url)) return false;
268        if(!$oembed = $this->parseResponse($response)) return false;
269
270        return array('oembed'     => $oembed,
271                     'query_url'  => $query_url,
272                     'target_url' => $tag['params']['provider']['url']);
273    }
274
275    /*
276     * Analogous to resolve(), using link discovery for resolution
277     */
278    function resolveDiscovery($tag){
279        if(!$response = $this->fetch($tag['url'])) return false;
280        if(!$link_url = $this->getOEmbedLink($response['body'])) return false;
281
282        $query_url = $this->buildURL($link_url, $tag['params']['provider']);
283
284        if(!$response = $this->fetch($query_url)) return false;
285        if(!$oembed = $this->parseResponse($response)) return false;
286
287        return array('oembed'     => $oembed,
288                     'query_url'  => $query_url,
289                     'target_url' => $tag['url']);
290    }
291
292    /*
293     * Analogous to resolve(), using the local provider list for resolution
294     */
295    function resolveProviderList($tag){
296        if(!$api = $this->getProviderAPI($tag['url'])) return false;
297
298        $api = str_replace("{format}", $this->getConf('format_preference'), $api);
299        $params = array_merge($tag['params']['provider'], array('url' => $tag['url']));
300        $query_url = $this->buildURL($api, $params);
301
302        if(!$response = $this->fetch($query_url)) return false;
303        if(!$oembed = $this->parseResponse($response)) return false;
304
305        return array('oembed'     => $oembed,
306                     'query_url'  => $query_url,
307                     'target_url' => $tag['url']);
308    }
309
310    /*
311     * Given OEmbed data as returned by resolve(), produces a valid XHTML
312     * representation
313     *
314     * $data: OEmbed data as returned by resolve()
315     *
316     * returns: XHTML representation of OEmbed data
317     */
318
319        function renderXHTML($data){
320        $content = '';
321
322        if(!$data['oembed_data']){
323            $content .= "OEmbed Error";
324            $content .= "<ul>";
325            foreach($data['errors'] as $error){
326                $content .= "<li>".$error."</li>";
327            }
328            $content .= "</ul>";
329
330            return $content;
331        }
332
333        $oembed = $this->sanitizeOEmbed($data['oembed_data']['oembed']);
334
335        if(array_key_exists('thumbnail', $data['tag']['params']['plugin'])){
336            if($oembed['thumbnail_url']){
337                $img = '<img src="'.$oembed['thumbnail_url'].'" alt="'.$oembed['title'].'" title="'.$oembed['title'].'" height="'.$oembed['thumbnail_height'].'px" width="'.$oembed['thumbnail_width'].'px"/>';
338                $content = '<a href="'.$data['oembed_data']['target_url'].'">'.$img.'</a>';
339            }
340            else{
341                $content = $this->renderXHTMLLink($data);
342            }
343        }
344        else{
345            switch($oembed['type']){
346                case 'photo':
347                    if($this->getConf('fullwidth_images')){
348                        $content = '<img src="'.$oembed['url'].'" alt="'.$oembed['title'].'" title="'.$oembed['title'].'" width=100% />';
349                    } else {
350                        $content = '<img src="'.$oembed['url'].'" alt="'.$oembed['title'].'" title="'.$oembed['title'].'" height="'.$oembed['height'].'px" width="'.$oembed['width'].'px"/>';
351                    }
352                    break;
353                case 'video':
354                    $content = $oembed['html'];
355                    break;
356                case 'link':
357                    $content = $this->renderXHTMLLink($data);
358                    break;
359                case 'rich':
360                    $content = $oembed['html'];
361                    break;
362                default:
363                    $content = "OEmbed Error <ul><li>Unsupported media type: {$oembed['type']}</li></ul>";
364            }
365        }
366
367        return $content;
368    }
369
370    /*
371     * Given OEmbed data as returned by resolve(), produces a valid XHTML
372     * representation as a simple link
373     *
374     * $data: OEmbed data as returned by resolve()
375     *
376     * returns: XHTML representation of OEmbed data as a simple link
377     */
378    function renderXHTMLLink($data){
379        $text .= ($data['oembed_data']['oembed']['provider_name'] != null) ? $data['oembed_data']['oembed']['provider_name'].': ' : '';
380        $text .= $data['oembed_data']['oembed']['title'];
381        $text .= ($data['oembed_data']['oembed']['author_name'] != null) ? ' &ndash; '.$data['oembed_data']['oembed']['author_name'] : '';
382        return '<a class="urlextern" href="'.$data['oembed_data']['target_url'].'">'.$text.'</a>';
383    }
384
385    /***************************************************************************
386     * UTILITY FUNCTIONS
387     *     Provides shared functionality
388     **************************************************************************/
389
390    /*
391     * Stores a message in the errors array and returns false
392     *
393     * $msg: message to store
394     *
395     * returns: false
396     */
397    function error($msg){
398        array_push($this->errors, $msg);
399        return false;
400    }
401
402    /*
403     * Performs an HTTP GET request on the given URL
404     *
405     * $url: URL to perform the request on
406     *
407     * returns: false on error or array representing the HTTP response
408     *     status: numerical HTTP status code
409     *     headers: array of HTTP headers
410     *         name => value
411     *     body: HTTP response body
412     */
413    function fetch($url){
414        $client = new DokuHTTPClient();
415        if(!$client->sendRequest($url)){
416            return $this->error("Error sending request to provider: {$url}");
417        }
418
419        return array('status'  => $client->status,
420                     'headers' => $client->resp_headers,
421                     'body'    => $client->resp_body);
422    }
423
424    /*
425     * Given a base URL, create a new URL using the given parameters. Query
426     * values are URL encoded.
427     *
428     * $base: base URL, any existing parameter values should be URL encoded.
429     * $params: array of parameters to add to URL
430     *     name => value
431     *
432     * returns: the new URL
433     */
434    function buildURL($base, $params){
435        $url = $base;
436
437        $first = strpos($base,"?") === false ? true : false;
438        foreach($params as $name => $value){
439            if($first){ $url .= "?"; $first = false; }
440            else { $url .= "&"; }
441
442            $url .= $name."=".rawurlencode($value);
443        }
444
445        return $url;
446    }
447
448    /*
449     * Given raw HTML, tries to extract oembed discovery link
450     *
451     * Based on code by Keith Devens:
452     * http://keithdevens.com/weblog/archive/2002/Jun/03/RSSAuto-DiscoveryPHP
453     *
454     * Parameters:
455     *   $html: raw HTML
456     *
457     * Returns: false on error or no link present or an OEmbed discovery link
458     */
459    function getOEmbedLink($html){
460        $ret_link = false;
461
462        if(!$html) return false;
463
464        // search through the HTML, save all <link> tags
465        // and store each link's attributes in an associative array
466        preg_match_all('/<link\s+(.*?)\s*\/?>/si', $html, $matches);
467        $links = $matches[1];
468        $final_links = array();
469        $link_count = count($links);
470        for($n=0; $n<$link_count; $n++){
471            $attributes = preg_split('/\s+/s', $links[$n]);
472            foreach($attributes as $attribute){
473                $att = preg_split('/\s*=\s*/s', $attribute, 2);
474                if(isset($att[1])){
475                    $att[1] = preg_replace('/([\'"]?)(.*)\1/', '$2', $att[1]);
476                    $final_link[strtolower($att[0])] = $att[1];
477                }
478            }
479            $final_links[$n] = $final_link;
480        }
481
482        // now figure out which one points to the OEmbed data
483        for($n=0; $n<$link_count; $n++){
484            if(strtolower($final_links[$n]['rel']) == 'alternate'){
485                if(strtolower($final_links[$n]['type']) == 'application/json+oembed'){
486                    if($this->getConf('format_preference') == 'json'){
487                        return $final_links[$n]['href'];
488                    }
489                    else{
490                        $ret_link = $final_links[$n]['href'];
491                    }
492                }
493
494                // application/xml+oembed only exists for compatability not in spec
495                if(strtolower($final_links[$n]['type']) == 'text/xml+oembed' or
496                   strtolower($final_links[$n]['type']) == 'application/xml+oembed'){
497                    if($this->getConf('format_preference') == 'xml'){
498                        return $final_links[$n]['href'];
499                    }
500                    else{
501                        $ret_link = $final_links[$n]['href'];
502                    }
503                }
504            }
505        }
506
507        return $ret_link;
508    }
509
510    /*
511     * Given a URL, finds a OEmbed provider API endpoint which can be used with
512     * it from the local provider list.
513     *
514     * $url: URL to search a provider for
515     *
516     * Returns: false on error or no provider find or the API endpoint of an
517     *          appropriate provider
518     */
519    function getProviderAPI($url){
520        $providers_path = OEMBED_BASE.'providers.xml';
521        if(!$providers = simplexml_load_file($providers_path)) return false;
522
523        foreach($providers->provider as $provider){
524            foreach($provider->scheme as $scheme){
525                $regex = "@^".str_replace("@","\@",$scheme)."$@i";
526                if(preg_match($regex, trim($url))){
527                    $attrs = $provider->attributes();
528                    if(($api = $attrs['api']) != null){
529                        return $api;
530                    }
531                }
532            }
533        }
534
535        return false;
536    }
537
538    /*
539     * Runs htmlspecialchars() on values in OEmbed data EXCEPT for html values
540     *
541     * $oembed: array of OEmbed data from parseResponse()
542     *
543     * Returns: identical array to $oembed in which all values except for html
544     *          are run through htmlspecialchars()
545     */
546    function sanitizeOEmbed($oembed){
547        $retarray = array();
548
549        foreach($oembed as $key => $value){
550            if($key == 'html'){
551                $retarray[$key] = $value;
552            }
553            else{
554                $retarray[$key] = htmlspecialchars($value);
555            }
556        }
557
558        return $retarray;
559    }
560
561    /***************************************************************************
562     * DEBUG FUNCTIONS
563     *     For testing and devlopment, not regularly used
564     **************************************************************************/
565
566    function _log($msg){
567        $fh = fopen(OEMBED_BASE."oembed.log",'a');
568        $curtime = date('Y-m-d H:i:s');
569        fwrite($fh, "[{$curtime}] {$msg}\n");
570        fclose($fh);
571    }
572
573    function _logParsedTag($parsed_tag){
574        $this->_log("Parsed Tag");
575        $this->_log("    URL: {$parsed_tag['url']}");
576        $this->_log("    Provider Params:");
577        foreach($parsed_tag['params']['provider'] as $key => $value){
578            $this->_log("        {$key} => {$value}");
579        }
580        $this->_log("    Plugin Params:");
581        foreach($parsed_tag['params']['plugin'] as $key => $value){
582            $this->_log("        {$key} => {$value}");
583        }
584    }
585
586    function _logOEmbedData($oembed){
587        $this->_log("OEmbed Data:");
588        $this->_log("    target_url: {$oembed['target_url']}");
589        $this->_log("    query_url: {$oembed['query_url']}");
590        $this->_log("    Response:");
591        foreach($oembed['oembed'] as $name => $value){
592            $this->_log("        {$name}: {$value}");
593        }
594    }
595
596    function _logErrors($errors){
597        $this->_log("Errors:");
598        foreach($errors as $error){
599            $this->_log("    {$error}");
600        }
601    }
602}
603