1<?php
2/**
3 * DokuWiki Plugin strata (Helper Component)
4 *
5 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
6 * @author  Brend Wanders <b.wanders@utwente.nl>
7 */
8
9if (!defined('DOKU_INC')) die('meh.');
10
11/**
12 * Helper to construct and handle syntax fragments.
13 */
14class helper_plugin_strata_syntax_RegexHelper {
15    /**
16     * Regular expression fragment table. This is used for interpolation of
17     * syntax patterns, and should be without captures. Do not assume any
18     * specific delimiter.
19     */
20    var $regexFragments = array(
21        'variable'  => '(?:\?[^\s:\(\)\[\]\{\}\<\>\|\~\!\@\#\$\%\^\&\*\?\="]+)',
22        'predicate' => '(?:[^:\(\)\[\]\{\}\<\>\|\~\!\@\#\$\%\^\&\*\?\="]+)',
23        'reflit'    => '(?:\[\[[^]]*\]\])',
24        'type'      => '(?:\[\s*[a-z0-9]+\s*(?:::[^\]]*)?\])',
25        'aggregate' => '(?:@\s*[a-z0-9]+(?:\([^\)]*\))?)',
26        'operator'  => '(?:!=|>=|<=|>|<|=|!~>|!~|!\^~|!\$~|\^~|\$~|~>|~)',
27        'any'       => '(?:.+?)'
28    );
29
30    /**
31     * Patterns used to extract information from captured fragments. These patterns
32     * are used with '/' as delimiter, and should contain at least one capture group.
33     */
34    var $regexCaptures = array(
35        'variable'  => array('\?(.*)', array('name')),
36        'aggregate' => array('@\s*([a-z0-9]+)(?:\(([^\)]*)\))?', array('aggregate','hint')),
37        'type'      => array('\[\s*([a-z0-9]+)\s*(?:::([^\]]*))?\]', array('type', 'hint')),
38        'reflit'    => array('\[\[(.*)\]\]',array('reference'))
39    );
40
41    /**
42     * Grabs the syntax fragment.
43     */
44    function __get($name) {
45        if(array_key_exists($name, $this->regexFragments)) {
46            return $this->regexFragments[$name];
47        } else {
48            $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS);
49            trigger_error("Undefined syntax fragment '$name' on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE);
50        }
51    }
52
53    /**
54     * Extracts information from a fragment, based on the type.
55     */
56    function __call($name, $arguments) {
57        if(array_key_exists($name, $this->regexCaptures)) {
58            list($pattern, $names) = $this->regexCaptures[$name];
59            $result = preg_match("/^{$pattern}$/", $arguments[0], $match);
60            if($result === 1) {
61                array_shift($match);
62                $shortest = min(count($names), count($match));
63                return new helper_plugin_strata_syntax_RegexHelperCapture(array_combine(array_slice($names,0,$shortest), array_slice($match, 0, $shortest)));
64            } else {
65                return null;
66            }
67        } else {
68            $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS);
69            trigger_error("Undefined syntax capture '$name' on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE);
70        }
71    }
72}
73
74/**
75 * A single capture. Used as a return value for the RegexHelper's
76 * capture methods.
77 */
78class helper_plugin_strata_syntax_RegexHelperCapture implements ArrayAccess {
79    function __construct($values) {
80        $this->values = $values;
81    }
82
83    function __get($name) {
84        if(array_key_exists($name, $this->values)) {
85            return $this->values[$name];
86        } else {
87            return null;
88        }
89    }
90
91    function offsetExists($offset) {
92        // the index is valid iff:
93        //   it is an existing field name
94        //   it is a correct nummeric index (with 0 being the first name and count-1 the last)
95        return isset($this->values[$offset]) || ($offset >= 0 && $offset < count($this->values));
96    }
97
98    function offsetGet($offset) {
99        // return the correct offset
100        if (isset($this->values[$offset])) {
101            return $this->values[$offset];
102        } else {
103            // or try the numeric offsets
104            if(is_numeric($offset) && $offset >= 0 && $offset < count($this->values)) {
105                // translate numeric offset to key
106                $keys = array_keys($this->values);
107                return $this->values[$keys[intval($offset)]];
108            } else {
109                // offset unknown, return without value
110                return;
111            }
112        }
113    }
114
115    function offsetSet($offset, $value) {
116        // noop
117        $trace = debug_backtrace();
118        trigger_error("Syntax fragment fields are read-only on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE);
119    }
120
121    function offsetUnset($offset) {
122        // noop
123        $trace = debug_backtrace();
124        trigger_error("Syntax fragment fields are read-only on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE);
125    }
126}
127
128/**
129 * Helper plugin for common syntax parsing.
130 */
131class helper_plugin_strata_syntax extends DokuWiki_Plugin {
132    public static $patterns;
133
134    /**
135     * Static initializer called directly after class declaration.
136     *
137     * This static method exists because we want to keep the static $patterns
138     * and its initialization close together.
139     */
140    static function initialize() {
141        self::$patterns = new helper_plugin_strata_syntax_RegexHelper();
142    }
143
144    /**
145     * Constructor.
146     */
147    function __construct() {
148        $this->util =& plugin_load('helper', 'strata_util');
149        $this->error = '';
150        $this->regions = array();
151    }
152
153    /**
154     * Returns an object describing the pattern fragments.
155     */
156    function getPatterns() {
157        return self::$patterns;
158    }
159
160    /**
161     * Determines whether a line can be ignored.
162     */
163    function ignorableLine($line) {
164        $line = utf8_trim($line);
165        return $line == '' || utf8_substr($line,0,2) == '--';
166    }
167
168    /**
169     * Updates the given typemap with new information.
170     *
171     * @param typemap array a typemap
172     * @param var string the name of the variable
173     * @param type string the type of the variable
174     * @param hint string the type hint of the variable
175     */
176    function updateTypemap(&$typemap, $var, $type, $hint=null) {
177        if(empty($typemap[$var]) && $type) {
178            $typemap[$var] = array('type'=>$type,'hint'=>$hint);
179            return true;
180        }
181
182        return false;
183    }
184
185    /**
186     * Constructs a literal with the given text.
187     */
188    function literal($val) {
189        return array('type'=>'literal', 'text'=>$val);
190    }
191
192    /**
193     * Constructs a variable with the given name.
194     */
195    function variable($var) {
196        if($var[0] == '?') $var = substr($var,1);
197        return array('type'=>'variable', 'text'=>$var);
198    }
199
200    function _fail($message, $regions=array()) {
201        msg($message,-1);
202
203        if($this->isGroup($regions) || $this->isText($regions)) {
204            $regions = array($regions);
205        }
206
207        $lines = array();
208        foreach($regions as $r) $lines[] = array('start'=>$r['start'], 'end'=>$r['end']);
209        throw new strata_exception($message, $lines);
210    }
211
212    /**
213     * Constructs a query from the give tree.
214     *
215     * @param root array the tree to transform
216     * @param typemap array the type information collected so far
217     * @param projection array the variables to project
218     * @return a query structure
219     */
220    function constructQuery(&$root, &$typemap, $projection) {
221        $p = $this->getPatterns();
222
223        $result = array(
224            'type'=>'select',
225            'group'=>array(),
226            'projection'=>$projection,
227            'ordering'=>array(),
228            'grouping'=>false,
229            'considering'=>array()
230        );
231
232        // extract sort groups
233        $ordering = $this->extractGroups($root, 'sort');
234
235        // extract grouping groups
236        $grouping = $this->extractGroups($root, 'group');
237
238        // extract additional projection groups
239        $considering = $this->extractGroups($root, 'consider');
240
241        // transform actual group
242        $where = $this->extractGroups($root, 'where');
243        $tree = null;
244        if(count($where)==0) {
245            $tree =& $root;
246        } elseif(count($where)==1) {
247            $tree =& $where[0];
248            if(count($root['cs'])) {
249                $this->_fail($this->getLang('error_query_outofwhere'), $root['cs']);
250            }
251        } else {
252            $this->_fail($this->getLang('error_query_singlewhere'), $where);
253        }
254
255        list($group, $scope) = $this->transformGroup($tree, $typemap);
256        $result['group'] = $group;
257        if(!$group) return false;
258
259        // handle sort groups
260        if(count($ordering)) {
261            if(count($ordering) > 1) {
262                $this->_fail($this->getLang('error_query_multisort'), $ordering);
263            }
264
265            // handle each line in the group
266            foreach($ordering[0]['cs'] as $line) {
267                if($this->isGroup($line)) {
268                    $this->_fail($this->getLang('error_query_sortblock'), $line);
269                }
270
271                if(preg_match("/^({$p->variable})\s*(?:\((asc|desc)(?:ending)?\))?$/S",utf8_trim($line['text']),$match)) {
272                    $var = $p->variable($match[1]);
273                    if(!in_array($var->name, $scope)) {
274                        $this->_fail(sprintf($this->getLang('error_query_sortvar'),utf8_tohtml(hsc($var->name))), $line);
275                    }
276
277                    $result['ordering'][] = array('variable'=>$var->name, 'direction'=>($match[2]?:'asc'));
278                } else {
279                    $this->_fail(sprintf($this->getLang('error_query_sortline'), utf8_tohtml(hsc($line['text']))), $line);
280                }
281            }
282        }
283
284        //handle grouping
285        if(count($grouping)) {
286            if(count($grouping) > 1) {
287                $this->_fail($this->getLang('error_query_multigrouping'), $grouping);
288            }
289
290            // we have a group, so we want grouping
291            $result['grouping'] = array();
292
293            foreach($grouping[0]['cs'] as $line) {
294                if($this->isGroup($line)) {
295                    $this->_fail($this->getLang('error_query_groupblock'), $line);
296                }
297
298                if(preg_match("/({$p->variable})$/",utf8_trim($line['text']),$match)) {
299                    $var = $p->variable($match[1]);
300                    if(!in_array($var->name, $scope)) {
301                        $this->_fail(sprintf($this->getLang('error_query_groupvar'),utf8_tohtml(hsc($var->name))), $line);
302                    }
303
304                    $result['grouping'][] = $var->name;
305                } else {
306                    $this->_fail(sprintf($this->getLang('error_query_groupline'), utf8_tohtml(hsc($line['text']))), $line);
307                }
308            }
309        }
310
311        //handle considering
312        if(count($considering)) {
313            if(count($considering) > 1) {
314                $this->_fail($this->getLang('error_query_multiconsidering'), $considering);
315            }
316
317            foreach($considering[0]['cs'] as $line) {
318                if($this->isGroup($line)) {
319                    $this->_fail($this->getLang('error_query_considerblock'), $line);
320                }
321
322                if(preg_match("/^({$p->variable})$/",utf8_trim($line['text']),$match)) {
323                    $var = $p->variable($match[1]);
324                    if(!in_array($var->name, $scope)) {
325                        $this->_fail(sprintf($this->getLang('error_query_considervar'),utf8_tohtml(hsc($var->name))), $line);
326                    }
327
328                    $result['considering'][] = $var->name;
329                } else {
330                    $this->_fail(sprintf($this->getLang('error_query_considerline'), utf8_tohtml(hsc($line['text']))), $line);
331                }
332            }
333        }
334
335        foreach($projection as $var) {
336            if(!in_array($var, $scope)) {
337                $this->_fail(sprintf($this->getLang('error_query_selectvar'), utf8_tohtml(hsc($var))));
338            }
339        }
340
341        // return final query structure
342        return array($result, $scope);
343    }
344
345    /**
346     * Transforms a full query group.
347     *
348     * @param root array the tree to transform
349     * @param typemap array the type information
350     * @return the transformed group and a list of in-scope variables
351     */
352    function transformGroup(&$root, &$typemap) {
353        // extract patterns and split them in triples and filters
354        $patterns = $this->extractText($root);
355
356        // extract union groups
357        $unions = $this->extractGroups($root, 'union');
358
359        // extract minus groups
360        $minuses = $this->extractGroups($root,'minus');
361
362        // extract optional groups
363        $optionals = $this->extractGroups($root,'optional');
364
365        // check for leftovers
366        if(count($root['cs'])) {
367            $this->_fail(sprintf($this->getLang('error_query_group'),( isset($root['cs'][0]['tag']) ? sprintf($this->getLang('named_group'), utf8_tohtml(hsc($root['cs'][0]['tag']))) : $this->getLang('unnamed_group'))), $root['cs']);
368        }
369
370        // split patterns into triples and filters
371        list($patterns, $filters, $scope) = $this->transformPatterns($patterns, $typemap);
372
373        // convert each union into a pattern
374        foreach($unions as $union) {
375            list($u, $s) = $this->transformUnion($union, $typemap);
376            $scope = array_merge($scope, $s);
377            $patterns[] = $u;
378        }
379
380        if(count($patterns) == 0) {
381            $this->_fail(sprintf($this->getLang('error_query_grouppattern')), $root);
382        }
383
384        // chain all patterns with ANDs
385        $result = array_shift($patterns);
386        foreach($patterns as $pattern) {
387            $result = array(
388                'type'=>'and',
389                'lhs'=>$result,
390                'rhs'=>$pattern
391            );
392        }
393
394        // apply all optionals
395        if(count($optionals)) {
396            foreach($optionals as $optional) {
397                // convert eacfh optional
398                list($optional, $s) = $this->transformGroup($optional, $typemap);
399                $scope = array_merge($scope, $s);
400                $result = array(
401                    'type'=>'optional',
402                    'lhs'=>$result,
403                    'rhs'=>$optional
404                );
405            }
406        }
407
408
409        // add all filters; these are a bit weird, as only a single FILTER is really supported
410        // (we have defined multiple filters as being a conjunction)
411        if(count($filters)) {
412            foreach($filters as $f) {
413                $line = $f['_line'];
414                unset($f['_line']);
415                if($f['lhs']['type'] == 'variable' && !in_array($f['lhs']['text'], $scope)) {
416                    $this->_fail(sprintf($this->getLang('error_query_filterscope'),utf8_tohtml(hsc($f['lhs']['text']))), $line);
417                }
418                if($f['rhs']['type'] == 'variable' && !in_array($f['rhs']['text'], $scope)) {
419                    $this->_fail(sprintf($this->getLang('error_query_filterscope'),utf8_tohtml(hsc($f['rhs']['text']))), $line);
420                }
421            }
422
423            $result = array(
424                'type'=>'filter',
425                'lhs'=>$result,
426                'rhs'=>$filters
427            );
428        }
429
430        // apply all minuses
431        if(count($minuses)) {
432            foreach($minuses as $minus) {
433                // convert each minus, and discard their scope
434                list($minus, $s) = $this->transformGroup($minus, $typemap);
435                $result = array(
436                    'type'=>'minus',
437                    'lhs'=>$result,
438                    'rhs'=>$minus
439                );
440            }
441        }
442
443        return array($result, $scope);
444    }
445
446    /**
447     * Transforms a union group with multiple subgroups
448     *
449     * @param root array the union group to transform
450     * @param typemap array the type information
451     * @return the transformed group and a list of in-scope variables
452     */
453    function transformUnion(&$root, &$typemap) {
454        // fetch all child patterns
455        $subs = $this->extractGroups($root,null);
456
457        // do sanity checks
458        if(count($root['cs'])) {
459            $this->_fail($this->getLang('error_query_unionblocks'), $root['cs']);
460        }
461
462        if(count($subs) < 2) {
463            $this->_fail($this->getLang('error_query_unionreq'), $root);
464        }
465
466        // transform the first group
467        list($result,$scope) = $this->transformGroup(array_shift($subs), $typemap);
468
469        // transform each subsequent group
470        foreach($subs as $sub) {
471            list($rhs, $s) = $this->transformGroup($sub, $typemap);
472            $scope = array_merge($scope, $s);
473            $result = array(
474                'type'=>'union',
475                'lhs'=>$result,
476                'rhs'=>$rhs
477            );
478        }
479
480        return array($result, $scope);
481    }
482
483    /**
484     * Transforms a list of patterns into a list of triples and a
485     * list of filters.
486     *
487     * @param lines array a list of lines to transform
488     * @param typemap array the type information
489     * @return a list of triples, a list of filters and a list of in-scope variables
490     */
491    function transformPatterns(&$lines, &$typemap) {
492        // we need this to resolve things
493        global $ID;
494
495        // we need patterns
496        $p = $this->getPatterns();
497
498        // result holders
499        $scope = array();
500        $triples = array();
501        $filters = array();
502
503        foreach($lines as $lineNode) {
504            $line = trim($lineNode['text']);
505
506            // [grammar] TRIPLEPATTERN := (VARIABLE|REFLIT) ' ' (VARIABLE|PREDICATE) TYPE? : ANY
507            if(preg_match("/^({$p->variable}|{$p->reflit})\s+({$p->variable}|{$p->predicate})\s*({$p->type})?\s*:\s*({$p->any})$/S",$line,$match)) {
508                list(, $subject, $predicate, $type, $object) = $match;
509
510                $subject = utf8_trim($subject);
511                if($subject[0] == '?') {
512                    $subject = $this->variable($subject);
513                    $scope[] = $subject['text'];
514                    $this->updateTypemap($typemap, $subject['text'], 'ref');
515                } else {
516                    global $ID;
517                    $subject = $p->reflit($subject)->reference;
518                    $subject = $this->util->loadType('ref')->normalize($subject,null);
519                    $subject = $this->literal($subject);
520                }
521
522                $predicate = utf8_trim($predicate);
523                if($predicate[0] == '?') {
524                    $predicate = $this->variable($predicate);
525                    $scope[] = $predicate['text'];
526                    $this->updateTypemap($typemap, $predicate['text'], 'text');
527                } else {
528                    $predicate = $this->literal($this->util->normalizePredicate($predicate));
529                }
530
531                $object = utf8_trim($object);
532                if($object[0] == '?') {
533                    // match a proper type variable
534                    if(preg_match("/^({$p->variable})\s*({$p->type})?$/",$object,$captures)!=1) {
535                        $this->_fail($this->getLang('error_pattern_garbage'),$lineNode);
536                    }
537                    $var=$captures[1]??null;
538                    $vtype=$captures[2]??null;
539
540                    // create the object node
541                    $object = $this->variable($var);
542                    $scope[] = $object['text'];
543
544                    // try direct type first, implied type second
545                    $vtype = $p->type($vtype);
546                    $type = $p->type($type);
547                    if (isset ($vtype))
548                    {
549                      $this->updateTypemap($typemap, $object['text'], $vtype->type, $vtype->hint);
550                    }
551                    else if (isset($type))
552                    {
553                      $this->updateTypemap($typemap, $object['text'], $type->type, $type->hint);
554                    }
555                } else {
556                    // check for empty string token
557                    if($object == '[[]]') {
558                        $object='';
559                    }
560                    if(!$type) {
561                        list($type, $hint) = $this->util->getDefaultType();
562                    } else {
563                        $type = $p->type($type);
564                        $hint = $type->hint;
565                        $type = $type->type;
566                    }
567                    $type = $this->util->loadType($type);
568                    $object = $this->literal($type->normalize($object,$hint));
569                }
570
571                $triples[] = array('type'=>'triple','subject'=>$subject, 'predicate'=>$predicate, 'object'=>$object);
572
573            // [grammar] FILTER := VARIABLE TYPE? OPERATOR VARIABLE TYPE?
574            } elseif(preg_match("/^({$p->variable})\s*({$p->type})?\s*({$p->operator})\s*({$p->variable})\s*({$p->type})?$/S",$line, $match)) {
575                list(,$lhs, $ltype, $operator, $rhs, $rtype) = $match;
576
577                $lhs = $this->variable($lhs);
578                $rhs = $this->variable($rhs);
579
580                if($operator == '~>' || $operator == '!~>') $operator = str_replace('~>','^~',$operator);
581
582                // do type information propagation
583                $rtype = $p->type($rtype);
584                $ltype = $p->type($ltype);
585
586                if($ltype) {
587                    // left has a defined type, so update the map
588                    $this->updateTypemap($typemap, $lhs['text'], $ltype->type, $ltype->hint);
589
590                    // and propagate to right if possible
591                    if(!$rtype) {
592                        $this->updateTypemap($typemap, $rhs['text'], $ltype->type, $lhint->hint);
593                    }
594                }
595                if($rtype) {
596                    // right has a defined type, so update the map
597                    $this->updateTypemap($typemap, $rhs['text'], $rtype->type, $rtype->hint);
598
599                    // and propagate to left if possible
600                    if(!$ltype) {
601                        $this->updateTypemap($typemap, $lhs['text'], $rtype->type, $rtype->hint);
602                    }
603                }
604
605                $filters[] = array('type'=>'filter', 'lhs'=>$lhs, 'operator'=>$operator, 'rhs'=>$rhs, '_line'=>$lineNode);
606
607            // [grammar] FILTER := VARIABLE TYPE? OPERATOR ANY
608            } elseif(preg_match("/^({$p->variable})\s*({$p->type})?\s*({$p->operator})\s*({$p->any})$/S",$line, $match)) {
609
610                // filter pattern
611                list(, $lhs,$ltype,$operator,$rhs) = $match;
612
613                $lhs = $this->variable($lhs);
614
615                // update typemap if a type was defined
616                list($type,$hint) = $p->type($ltype);
617                if($type) {
618                    $this->updateTypemap($typemap, $lhs['text'],$type,$hint);
619                } else {
620                    // use the already declared type if no type was defined
621                    if(!empty($typemap[$lhs['text']])) {
622                        extract($typemap[$lhs['text']]);
623                    } else {
624                        list($type, $hint) = $this->util->getDefaultType();
625                    }
626                }
627
628                // check for empty string token
629                if($rhs == '[[]]') {
630                    $rhs = '';
631                }
632
633                // special case: the right hand side of the 'in' operator always normalizes with the 'text' type
634                if($operator == '~>' || $operator == '!~>') {
635                    $operator = str_replace('~>','^~', $operator);
636                    $type = 'text';
637                    unset($hint);
638                }
639
640                // normalize
641                $type = $this->util->loadType($type);
642                $rhs = $this->literal($type->normalize($rhs,$hint));
643
644                $filters[] = array('type'=>'filter','lhs'=>$lhs, 'operator'=>$operator, 'rhs'=>$rhs, '_line'=>$lineNode);
645
646            // [grammar] FILTER := ANY OPERATOR VARIABLE TYPE?
647            } elseif(preg_match("/^({$p->any})\s*({$p->operator})\s*({$p->variable})\s*({$p->type})?$/S",$line, $match)) {
648                list(, $lhs,$operator,$rhs,$rtype) = $match;
649
650                $rhs = $this->variable($rhs);
651
652                // update typemap if a type was defined
653                list($type, $hint) = $p->type($rtype);
654                if($type) {
655                    $this->updateTypemap($typemap, $rhs['text'],$type,$hint);
656                } else {
657                    // use the already declared type if no type was defined
658                    if(!empty($typemap[$rhs['text']])) {
659                        extract($typemap[$rhs['text']]);
660                    } else {
661                        list($type, $hint) = $this->util->getDefaultType();
662                    }
663                }
664
665                // check for empty string token
666                if($lhs == '[[]]') {
667                    $lhs = '';
668                }
669
670                // special case: the left hand side of the 'in' operator always normalizes with the 'page' type
671                if($operator == '~>' || $operator == '!~>') {
672                    $operator = str_replace('~>','^~', $operator);
673                    $type = 'page';
674                    unset($hint);
675                }
676
677                // normalize
678                $type = $this->util->loadType($type);
679                $lhs = $this->literal($type->normalize($lhs,$hint));
680
681                $filters[] = array('type'=>'filter','lhs'=>$lhs, 'operator'=>$operator, 'rhs'=>$rhs, '_line'=>$lineNode);
682            } else {
683                // unknown lines are fail
684                $this->_fail(sprintf($this->getLang('error_query_pattern'),utf8_tohtml(hsc($line))), $lineNode);
685            }
686        }
687
688        return array($triples, $filters, $scope);
689    }
690
691    function getFields(&$tree, &$typemap) {
692        $fields = array();
693
694        // extract the projection information in 'long syntax' if available
695        $fieldsGroups = $this->extractGroups($tree, 'fields');
696
697        // parse 'long syntax' if we don't have projection information yet
698        if(count($fieldsGroups)) {
699            if(count($fieldsGroups) > 1) {
700                $this->_fail($this->getLang('error_query_fieldsgroups'), $fieldsGroups);
701            }
702
703            $fieldsLines = $this->extractText($fieldsGroups[0]);
704            if(count($fieldsGroups[0]['cs'])) {
705                $this->_fail(sprintf($this->getLang('error_query_fieldsblock'),( isset($fieldsGroups[0]['cs'][0]['tag']) ? sprintf($this->getLang('named_group'),hsc($fieldsGroups[0]['cs'][0]['tag'])) : $this->getLang('unnamed_group'))), $fieldsGroups[0]['cs']);
706            }
707            $fields = $this->parseFieldsLong($fieldsLines, $typemap);
708            if(!$fields) return array();
709        }
710
711        return $fields;
712    }
713
714    /**
715     * Parses a projection group in 'long syntax'.
716     */
717    function parseFieldsLong($lines, &$typemap) {
718        $p = $this->getPatterns();
719        $result = array();
720
721        foreach($lines as $lineNode) {
722            $line = trim($lineNode['text']);
723            // FIELDLONG := VARIABLE AGGREGATE? TYPE? (':' ANY)?
724            if(preg_match("/^({$p->variable})\s*({$p->aggregate})?\s*({$p->type})?(?:\s*(:)\s*({$p->any})?\s*)?$/S",$line, $match)) {
725                list(, $var, $vaggregate, $vtype, $nocaphint, $caption) = $match;
726                $variable = $p->variable($var)->name;
727                if(!$nocaphint || (!$nocaphint && !$caption)) $caption = ucfirst($variable);
728
729                list($type,$hint) = $p->type($vtype);
730                list($agg,$agghint) = $p->aggregate($vaggregate);
731
732                $this->updateTypemap($typemap, $variable, $type, $hint);
733                $result[] = array('variable'=>$variable,'caption'=>$caption, 'aggregate'=>$agg, 'aggregateHint'=>$agghint, 'type'=>$type, 'hint'=>$hint);
734            } else {
735                $this->_fail(sprintf($this->getLang('error_query_fieldsline'),utf8_tohtml(hsc($line))), $lineNode);
736            }
737        }
738
739        return $result;
740    }
741
742    /**
743     * Parses a projection group in 'short syntax'.
744     */
745    function parseFieldsShort($line, &$typemap) {
746        $p = $this->getPatterns();
747        $result = array();
748
749        // FIELDSHORT := VARIABLE AGGREGATE? TYPE? CAPTION?
750        if(preg_match_all("/\s*({$p->variable})\s*({$p->aggregate})?\s*({$p->type})?\s*(?:(\")([^\"]*)\")?/",$line,$match, PREG_SET_ORDER)) {
751            foreach($match as $m) {
752                $var=$m[1]??null;
753                $vaggregate=$m[2]??null;
754                $vtype=$m[3]??null;
755                $caption_indicator=$m[4]??null;
756                $caption=$m[5]??null;
757
758                $variable = $p->variable($var)->name;
759                list($type, $hint) = $p->type($vtype);
760                list($agg, $agghint) = $p->aggregate($vaggregate);
761                if(!$caption_indicator) $caption = ucfirst($variable);
762                $this->updateTypemap($typemap, $variable, $type, $hint);
763                $result[] = array('variable'=>$variable,'caption'=>$caption, 'aggregate'=>$agg, 'aggregateHint'=>$agghint, 'type'=>$type, 'hint'=>$hint);
764            }
765        }
766
767        return $result;
768    }
769
770    /**
771     * Returns the regex pattern used by the 'short syntax' for projection. This methods can
772     * be used to get a dokuwiki-lexer-safe regex to embed into your own syntax pattern.
773     *
774     * @param captions boolean Whether the pattern should include caption matching (defaults to true)
775     */
776    function fieldsShortPattern($captions = true) {
777        $p = $this->getPatterns();
778        return "(?:\s*{$p->variable}\s*{$p->aggregate}?\s*{$p->type}?".($captions?'\s*(?:"[^"]*")?':'').")";
779    }
780
781    /**
782     * Constructs a tagged tree from the given list of lines.
783     *
784     * @return a tagged tree
785     */
786    function constructTree($lines, $what) {
787        $root = array(
788            'tag'=>'',
789            'cs'=>array(),
790            'start'=>1,
791            'end'=>1
792        );
793
794        $stack = array();
795        $stack[] =& $root;
796        $top = count($stack)-1;
797        $lineCount = 0;
798
799        foreach($lines as $line) {
800            $lineCount++;
801            if($this->ignorableLine($line)) continue;
802
803            if(preg_match('/^([^\{]*) *{$/',utf8_trim($line),$match)) {
804                list(, $tag) = $match;
805                $tag = utf8_trim($tag);
806
807                $stack[$top]['cs'][] = array(
808                    'tag'=>$tag?:null,
809                    'cs'=>array(),
810                    'start'=>$lineCount,
811                    'end'=>0
812                );
813                $stack[] =& $stack[$top]['cs'][count($stack[$top]['cs'])-1];
814                $top = count($stack)-1;
815
816            } elseif(preg_match('/^}$/',utf8_trim($line))) {
817                $stack[$top]['end'] = $lineCount;
818                array_pop($stack);
819                $top = count($stack)-1;
820
821            } else {
822                $stack[$top]['cs'][] = array(
823                    'text'=>$line,
824                    'start'=>$lineCount,
825                    'end'=>$lineCount
826                );
827            }
828        }
829
830        if(count($stack) != 1 || $stack[0] != $root) {
831            msg(sprintf($this->getLang('error_syntax_braces'),$what),-1);
832        }
833
834        $root['end'] = $lineCount;
835
836        return $root;
837    }
838
839    /**
840     * Renders a debug display of the syntax.
841     *
842     * @param lines array the lines that form the syntax
843     * @param region array the region to highlight
844     * @return a string with markup
845     */
846    function debugTree($lines, $regions) {
847        $result = '';
848        $lineCount = 0;
849        $count = 0;
850
851        foreach($lines as $line) {
852            $lineCount++;
853
854            foreach($regions as $region) {
855                if($lineCount == $region['start']) {
856                    if($count == 0) $result .= '<div class="strata-debug-highlight">';
857                    $count++;
858                }
859
860                if($lineCount == $region['end']+1) {
861                    $count--;
862
863                    if($count==0) $result .= '</div>';
864                }
865            }
866
867            if($line != '') {
868                $result .= '<div class="strata-debug-line">'.hsc($line).'</div>'."\n";
869            } else {
870                $result .= '<div class="strata-debug-line"><br/></div>'."\n";
871            }
872        }
873
874        if($count > 0) {
875            $result .= '</div>';
876        }
877
878        return '<div class="strata-debug">'.$result.'</div>';
879    }
880
881    /**
882     * Extract all occurences of tagged groups from the given tree.
883     * This method does not remove the tagged groups from subtrees of
884     * the given root.
885     *
886     * @param root array the tree to operate on
887     * @param tag string the tag to remove
888     * @return an array of groups
889     */
890    function extractGroups(&$root, $tag) {
891        $result = array();
892        $to_remove = array();
893        foreach($root['cs'] as $i=>&$tree) {
894            if(!$this->isGroup($tree)) continue;
895            if($tree['tag'] == $tag || (($tag=='' || $tag==null) && $tree['tag'] == null) ) {
896                $result[] =& $tree;
897                $to_remove[] = $i;
898            }
899        }
900        // invert order of to_remove to always remove higher indices first
901        rsort($to_remove);
902        foreach($to_remove as $i) {
903            array_splice($root['cs'],$i,1);
904        }
905        return $result;
906    }
907
908    /**
909     * Extracts all text elements from the given tree.
910     * This method does not remove the text elements from subtrees
911     * of the root.
912     *
913     * @param root array the tree to operate on
914     * @return array an array of text elements
915     */
916    function extractText(&$root) {
917        $result = array();
918        $to_remove = array();
919        foreach($root['cs'] as $i=>&$tree) {
920            if(!$this->isText($tree)) continue;
921            $result[] =& $tree;
922            $to_remove[] = $i;
923        }
924        // invert order of to_remove to always remove higher indices first
925        rsort($to_remove);
926        foreach($to_remove as $i) {
927            array_splice($root['cs'],$i,1);
928        }
929        return $result;
930    }
931
932    /**
933     * Returns whether the given node is a line.
934     */
935    function isText(&$node) {
936        return array_key_exists('text', $node);
937    }
938
939    /**
940     * Returns whether the given node is a group.
941     */
942    function isGroup(&$node) {
943        return array_key_exists('tag', $node);
944    }
945
946    /**
947     * Sets all properties given as '$properties' to the values parsed from '$trees'.
948     *
949     * The property array has as keys all possible properties, which are specified by its
950     * values. Such specification is an array that may have the following keys, with the
951     * described values:
952     * - choices: array of possible values, where the keys are the internally used values
953     *     and the values specify synonyms for the choice, of which the first listed one
954     *     is most common. For example: 'true' => array('yes', 'yeah') specifies that the
955     *     user can choose 'yes' or 'yeah' (of which 'yes' is the commonly used value) and
956     *     that the return value will contain 'true' if this choice was chosen.
957     * - pattern: regular expression that defines all possible values.
958     * - pattern_desc: description used for errors when a pattern is specified.
959     * - minOccur: positive integer specifying the minimum number of values, defaults to 1.
960     * - maxOccur: integer greater than or equal to minOccur, which specifies the maximum
961     *     number of values, defaults to minOccur.
962     * - default: the default value (which must be a value the user is allowed to set).
963     *     When default is given, this method guarantees that the property is always set,
964     *     otherwise the property may not be set since all properties are optional.
965     * Either 'choices' or 'pattern' must be set (not both), all other values are optional.
966     *
967     * An example property array is as follows:
968     * array(
969     *   'example boolean' => array(
970     *     'choices' => array('y' => array('yes', 'yeah'), 'n' => array('no', 'nay')),
971     *     'minOccur' => 1,
972     *     'maxOccur' => 3,
973     *     'default' => 'yes'
974     *   ),
975     *   'example natural number' => array(
976     *     'pattern' => '/^[0-9]+$/',
977     *     'pattern_desc' => $this->getLang('property_Z*')
978     *   )
979     * )
980     *
981     * @param $properties The properties that can be set.
982     * @param $trees The trees that contain the values for these properties.
983     * @return An array with as indices the property names and as value a list of all values given for that property.
984     */
985    function setProperties($properties, $trees) {
986        $propertyValues = array();
987        $p = $this->getPatterns();
988
989        foreach ($trees as $tree) {
990            $text = $this->extractText($tree);
991            foreach($text as $lineNode) {
992                $line = utf8_trim($lineNode['text']);
993                if (preg_match('/^('.$p->predicate.')(\*)?\s*:\s*('.$p->any.')$/', $line, $match)) {
994                    list(, $variable, $multi, $value) = $match;
995                    $this->_setPropertyValue($properties, $tree['tag'], $lineNode, $variable, !empty($multi), $value, $propertyValues);
996                } else {
997                    $this->emitError($lineNode, 'error_property_weirdgroupline', hsc($tree['tag']), hsc($line));
998                }
999            }
1000            // Warn about unknown groups
1001            foreach ($tree['cs'] as $group) {
1002                $this->emitError($group, 'error_property_unknowngroup', hsc($trees[0]['tag']), hsc($group['tag']));
1003            }
1004        }
1005
1006        // Set property defaults
1007        foreach ($properties as $name => $p) {
1008            if (!isset($propertyValues[$name]) && isset($p['default'])) {
1009                $this->_setPropertyValue($properties, 'default value', null, $name, false, $p['default'], $propertyValues);
1010            }
1011        }
1012
1013        // Show errors, if any
1014        $this->showErrors();
1015
1016        return $propertyValues;
1017    }
1018
1019    function _setPropertyValue($properties, $group, $region, $variable, $isMulti, $value, &$propertyValues) {
1020        if (!isset($properties[$variable])) {
1021            // Unknown property: show error
1022            $property_title_values = $this->getLang('property_title_values');
1023            $propertyList = implode(', ', array_map(function ($n, $p) use ($property_title_values) {
1024                $values = implode(', ', array_map(function ($c) {
1025                    return $c[0];
1026                }, $p['choices']));
1027                $title = sprintf($property_title_values, $values);
1028                return '\'<code title="' . hsc($title) . '">' . hsc($n) . '</code>\'';
1029            }, array_keys($properties), $properties));
1030            $this->emitError($region, 'error_property_unknownproperty', hsc($group), hsc($variable), $propertyList);
1031        } else if (isset($propertyValues[$variable])) {
1032            // Property is specified more than once: show error
1033            $this->emitError($region, 'error_property_multi', hsc($group), hsc($variable));
1034        } else {
1035            $p = $properties[$variable];
1036            $minOccur = isset($p['minOccur']) ? $p['minOccur'] : 1;
1037            $maxOccur = isset($p['maxOccur']) ? $p['maxOccur'] : $minOccur;
1038
1039            if ($isMulti) {
1040                $values = array_map('utf8_trim', explode(',', $value));
1041            } else if ($minOccur == 1 || $minOccur == $maxOccur) {
1042                // Repeat the given value as often as we expect it
1043                $values = array_fill(0, $minOccur, $value);
1044            } else {
1045                // A single value was given, but multiple were expected
1046                $this->emitError($region, 'error_property_notmulti', hsc($group), hsc($variable), $minOccur);
1047                return;
1048            }
1049
1050            if (count($values) < $minOccur || count($values) > $maxOccur) {
1051                // Number of values given differs from expected number
1052                if ($minOccur == $maxOccur) {
1053                    $this->emitError($region, 'error_property_occur', hsc($group), hsc($variable), $minOccur, count($values));
1054                } else {
1055                    $this->emitError($region, 'error_property_occurrange', hsc($group), hsc($variable), $minOccur, $maxOccur, count($values));
1056                }
1057            } else if (isset($p['choices'])) { // Check whether the given property values are valid choices
1058                // Create a mapping from choice to normalized value of the choice
1059                $choices = array();
1060                $choicesInfo = array(); // For nice error messages
1061                foreach ($p['choices'] as $nc => $c) {
1062                    if (is_array($c)) {
1063                        $choices = array_merge($choices, array_fill_keys($c, $nc));
1064                        $title = sprintf($this->getLang('property_title_synonyms'), implode(', ', $c));
1065                        $choicesInfo[] = '\'<code title="' . hsc($title) . '">' . hsc($c[0]) . '</code>\'';
1066                    } else {
1067                        $choices[$c] = $c;
1068                        $choicesInfo[] = '\'<code>' . hsc($c) . '</code>\'';
1069                    }
1070                }
1071                if (!isset($choices['']) && isset($p['default'])) {
1072                    $choices[''] = $choices[$p['default']];
1073                }
1074
1075                $incorrect = array_diff($values, array_keys($choices)); // Find all values that are not a valid choice
1076                if (count($incorrect) > 0) {
1077                    unset($choices['']);
1078                    foreach (array_unique($incorrect) as $v) {
1079                        $this->emitError($region, 'error_property_invalidchoice', hsc($group), hsc($variable), hsc($v), implode(', ', $choicesInfo));
1080                    }
1081                } else {
1082                    $propertyValues[$variable] = array_map(function($v) use ($choices) { return $choices[$v]; }, $values);
1083                }
1084            } else if (isset($p['pattern'])) { // Check whether the given property values match the pattern
1085                $incorrect = array_filter($values, function($v) use ($p) { return !preg_match($p['pattern'], $v); });
1086                if (count($incorrect) > 0) {
1087                    foreach (array_unique($incorrect) as $v) {
1088                        if (isset($p['pattern_desc'])) {
1089                            $this->emitError($region, 'error_property_patterndesc', hsc($group), hsc($variable), hsc($v), $p['pattern_desc']);
1090                        } else {
1091                            $this->emitError($region, 'error_property_pattern', hsc($group), hsc($variable), hsc($v), hsc($p['pattern']));
1092                        }
1093                    }
1094                } else {
1095                    $propertyValues[$variable] = $values;
1096                }
1097            } else { // Property value has no requirements
1098                $propertyValues[$variable] = $values;
1099            }
1100        }
1101    }
1102
1103    /**
1104     * Generates a html error message, ensuring that all utf8 in arguments is escaped correctly.
1105     * The generated messages might be accumulated until showErrors is called.
1106     *
1107     * @param region The region at which the error occurs.
1108     * @param msg_id The id of the message in the language file.
1109     */
1110    function emitError($region, $msg_id) {
1111        $args = func_get_args();
1112        array_shift($args);
1113        array_shift($args);
1114        $args = array_map('strval', $args); // convert everything to strings first
1115        $args = array_map('utf8_tohtml', $args); // Escape args
1116        $msg = vsprintf($this->getLang($msg_id), $args);
1117        msg($msg, -1);
1118        $this->error .= "<br />\n" . $msg;
1119        $this->regions[] = $region;
1120    }
1121
1122    /**
1123     * Ensures that all emitted errors are shown.
1124     */
1125    function showErrors() {
1126        if (!empty($this->error)) {
1127            $error = $this->error;
1128            $regions = $this->regions;
1129            $this->error = '';
1130            $this->regions = array();
1131            throw new strata_exception($error, $regions);
1132        }
1133    }
1134}
1135
1136// call static initiliazer (PHP doesn't offer this feature)
1137helper_plugin_strata_syntax::initialize();
1138