1 <?php
2 /**
3  * DokuWiki Plugin strata (Helper Component)
4  *
5  * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
6  * @author  Brend Wanders <b.wanders@utwente.nl>
7  */
8 
9 if (!defined('DOKU_INC')) die('meh.');
10 
11 /**
12  * Helper to construct and handle syntax fragments.
13  */
14 class helper_plugin_strata_syntax_RegexHelper {
15     /**
16      * Regular expression fragment table. This is used for interpolation of
17      * syntax patterns, and should be without captures. Do not assume any
18      * specific delimiter.
19      */
20     var $regexFragments = array(
21         'variable'  => '(?:\?[^\s:\(\)\[\]\{\}\<\>\|\~\!\@\#\$\%\^\&\*\?\="]+)',
22         'predicate' => '(?:[^:\(\)\[\]\{\}\<\>\|\~\!\@\#\$\%\^\&\*\?\="]+)',
23         'reflit'    => '(?:\[\[[^]]*\]\])',
24         'type'      => '(?:\[\s*[a-z0-9]+\s*(?:::[^\]]*)?\])',
25         'aggregate' => '(?:@\s*[a-z0-9]+(?:\([^\)]*\))?)',
26         'operator'  => '(?:!=|>=|<=|>|<|=|!~>|!~|!\^~|!\$~|\^~|\$~|~>|~)',
27         'any'       => '(?:.+?)'
28     );
29 
30     /**
31      * Patterns used to extract information from captured fragments. These patterns
32      * are used with '/' as delimiter, and should contain at least one capture group.
33      */
34     var $regexCaptures = array(
35         'variable'  => array('\?(.*)', array('name')),
36         'aggregate' => array('@\s*([a-z0-9]+)(?:\(([^\)]*)\))?', array('aggregate','hint')),
37         'type'      => array('\[\s*([a-z0-9]+)\s*(?:::([^\]]*))?\]', array('type', 'hint')),
38         'reflit'    => array('\[\[(.*)\]\]',array('reference'))
39     );
40 
41     /**
42      * Grabs the syntax fragment.
43      */
44     function __get($name) {
45         if(array_key_exists($name, $this->regexFragments)) {
46             return $this->regexFragments[$name];
47         } else {
48             $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS);
49             trigger_error("Undefined syntax fragment '$name' on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE);
50         }
51     }
52 
53     /**
54      * Extracts information from a fragment, based on the type.
55      */
56     function __call($name, $arguments) {
57         if(array_key_exists($name, $this->regexCaptures)) {
58             list($pattern, $names) = $this->regexCaptures[$name];
59             $result = preg_match("/^{$pattern}$/", $arguments[0], $match);
60             if($result === 1) {
61                 array_shift($match);
62                 $shortest = min(count($names), count($match));
63                 return new helper_plugin_strata_syntax_RegexHelperCapture(array_combine(array_slice($names,0,$shortest), array_slice($match, 0, $shortest)));
64             } else {
65                 return null;
66             }
67         } else {
68             $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS);
69             trigger_error("Undefined syntax capture '$name' on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE);
70         }
71     }
72 }
73 
74 /**
75  * A single capture. Used as a return value for the RegexHelper's
76  * capture methods.
77  */
78 class helper_plugin_strata_syntax_RegexHelperCapture implements ArrayAccess {
79     function __construct($values) {
80         $this->values = $values;
81     }
82 
83     function __get($name) {
84         if(array_key_exists($name, $this->values)) {
85             return $this->values[$name];
86         } else {
87             return null;
88         }
89     }
90 
91     function offsetExists($offset) {
92         // the index is valid iff:
93         //   it is an existing field name
94         //   it is a correct nummeric index (with 0 being the first name and count-1 the last)
95         return isset($this->values[$offset]) || ($offset >= 0 && $offset < count($this->values));
96     }
97 
98     function offsetGet($offset) {
99         // return the correct offset
100         if (isset($this->values[$offset])) {
101             return $this->values[$offset];
102         } else {
103             // or try the numeric offsets
104             if(is_numeric($offset) && $offset >= 0 && $offset < count($this->values)) {
105                 // translate numeric offset to key
106                 $keys = array_keys($this->values);
107                 return $this->values[$keys[intval($offset)]];
108             } else {
109                 // offset unknown, return without value
110                 return;
111             }
112         }
113     }
114 
115     function offsetSet($offset, $value) {
116         // noop
117         $trace = debug_backtrace();
118         trigger_error("Syntax fragment fields are read-only on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE);
119     }
120 
121     function offsetUnset($offset) {
122         // noop
123         $trace = debug_backtrace();
124         trigger_error("Syntax fragment fields are read-only on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE);
125     }
126 }
127 
128 /**
129  * Helper plugin for common syntax parsing.
130  */
131 class helper_plugin_strata_syntax extends DokuWiki_Plugin {
132     public static $patterns;
133 
134     /**
135      * Static initializer called directly after class declaration.
136      *
137      * This static method exists because we want to keep the static $patterns
138      * and its initialization close together.
139      */
140     static function initialize() {
141         self::$patterns = new helper_plugin_strata_syntax_RegexHelper();
142     }
143 
144     /**
145      * Constructor.
146      */
147     function __construct() {
148         $this->util =& plugin_load('helper', 'strata_util');
149         $this->error = '';
150         $this->regions = array();
151     }
152 
153     /**
154      * Returns an object describing the pattern fragments.
155      */
156     function getPatterns() {
157         return self::$patterns;
158     }
159 
160     /**
161      * Determines whether a line can be ignored.
162      */
163     function ignorableLine($line) {
164         $line = utf8_trim($line);
165         return $line == '' || utf8_substr($line,0,2) == '--';
166     }
167 
168     /**
169      * Updates the given typemap with new information.
170      *
171      * @param typemap array a typemap
172      * @param var string the name of the variable
173      * @param type string the type of the variable
174      * @param hint string the type hint of the variable
175      */
176     function updateTypemap(&$typemap, $var, $type, $hint=null) {
177         if(empty($typemap[$var]) && $type) {
178             $typemap[$var] = array('type'=>$type,'hint'=>$hint);
179             return true;
180         }
181 
182         return false;
183     }
184 
185     /**
186      * Constructs a literal with the given text.
187      */
188     function literal($val) {
189         return array('type'=>'literal', 'text'=>$val);
190     }
191 
192     /**
193      * Constructs a variable with the given name.
194      */
195     function variable($var) {
196         if($var[0] == '?') $var = substr($var,1);
197         return array('type'=>'variable', 'text'=>$var);
198     }
199 
200     function _fail($message, $regions=array()) {
201         msg($message,-1);
202 
203         if($this->isGroup($regions) || $this->isText($regions)) {
204             $regions = array($regions);
205         }
206 
207         $lines = array();
208         foreach($regions as $r) $lines[] = array('start'=>$r['start'], 'end'=>$r['end']);
209         throw new strata_exception($message, $lines);
210     }
211 
212     /**
213      * Constructs a query from the give tree.
214      *
215      * @param root array the tree to transform
216      * @param typemap array the type information collected so far
217      * @param projection array the variables to project
218      * @return a query structure
219      */
220     function constructQuery(&$root, &$typemap, $projection) {
221         $p = $this->getPatterns();
222 
223         $result = array(
224             'type'=>'select',
225             'group'=>array(),
226             'projection'=>$projection,
227             'ordering'=>array(),
228             'grouping'=>false,
229             'considering'=>array()
230         );
231 
232         // extract sort groups
233         $ordering = $this->extractGroups($root, 'sort');
234 
235         // extract grouping groups
236         $grouping = $this->extractGroups($root, 'group');
237 
238         // extract additional projection groups
239         $considering = $this->extractGroups($root, 'consider');
240 
241         // transform actual group
242         $where = $this->extractGroups($root, 'where');
243         $tree = null;
244         if(count($where)==0) {
245             $tree =& $root;
246         } elseif(count($where)==1) {
247             $tree =& $where[0];
248             if(count($root['cs'])) {
249                 $this->_fail($this->getLang('error_query_outofwhere'), $root['cs']);
250             }
251         } else {
252             $this->_fail($this->getLang('error_query_singlewhere'), $where);
253         }
254 
255         list($group, $scope) = $this->transformGroup($tree, $typemap);
256         $result['group'] = $group;
257         if(!$group) return false;
258 
259         // handle sort groups
260         if(count($ordering)) {
261             if(count($ordering) > 1) {
262                 $this->_fail($this->getLang('error_query_multisort'), $ordering);
263             }
264 
265             // handle each line in the group
266             foreach($ordering[0]['cs'] as $line) {
267                 if($this->isGroup($line)) {
268                     $this->_fail($this->getLang('error_query_sortblock'), $line);
269                 }
270 
271                 if(preg_match("/^({$p->variable})\s*(?:\((asc|desc)(?:ending)?\))?$/S",utf8_trim($line['text']),$match)) {
272                     $var = $p->variable($match[1]);
273                     if(!in_array($var->name, $scope)) {
274                         $this->_fail(sprintf($this->getLang('error_query_sortvar'),utf8_tohtml(hsc($var->name))), $line);
275                     }
276 
277                     $result['ordering'][] = array('variable'=>$var->name, 'direction'=>($match[2]?:'asc'));
278                 } else {
279                     $this->_fail(sprintf($this->getLang('error_query_sortline'), utf8_tohtml(hsc($line['text']))), $line);
280                 }
281             }
282         }
283 
284         //handle grouping
285         if(count($grouping)) {
286             if(count($grouping) > 1) {
287                 $this->_fail($this->getLang('error_query_multigrouping'), $grouping);
288             }
289 
290             // we have a group, so we want grouping
291             $result['grouping'] = array();
292 
293             foreach($grouping[0]['cs'] as $line) {
294                 if($this->isGroup($line)) {
295                     $this->_fail($this->getLang('error_query_groupblock'), $line);
296                 }
297 
298                 if(preg_match("/({$p->variable})$/",utf8_trim($line['text']),$match)) {
299                     $var = $p->variable($match[1]);
300                     if(!in_array($var->name, $scope)) {
301                         $this->_fail(sprintf($this->getLang('error_query_groupvar'),utf8_tohtml(hsc($var->name))), $line);
302                     }
303 
304                     $result['grouping'][] = $var->name;
305                 } else {
306                     $this->_fail(sprintf($this->getLang('error_query_groupline'), utf8_tohtml(hsc($line['text']))), $line);
307                 }
308             }
309         }
310 
311         //handle considering
312         if(count($considering)) {
313             if(count($considering) > 1) {
314                 $this->_fail($this->getLang('error_query_multiconsidering'), $considering);
315             }
316 
317             foreach($considering[0]['cs'] as $line) {
318                 if($this->isGroup($line)) {
319                     $this->_fail($this->getLang('error_query_considerblock'), $line);
320                 }
321 
322                 if(preg_match("/^({$p->variable})$/",utf8_trim($line['text']),$match)) {
323                     $var = $p->variable($match[1]);
324                     if(!in_array($var->name, $scope)) {
325                         $this->_fail(sprintf($this->getLang('error_query_considervar'),utf8_tohtml(hsc($var->name))), $line);
326                     }
327 
328                     $result['considering'][] = $var->name;
329                 } else {
330                     $this->_fail(sprintf($this->getLang('error_query_considerline'), utf8_tohtml(hsc($line['text']))), $line);
331                 }
332             }
333         }
334 
335         foreach($projection as $var) {
336             if(!in_array($var, $scope)) {
337                 $this->_fail(sprintf($this->getLang('error_query_selectvar'), utf8_tohtml(hsc($var))));
338             }
339         }
340 
341         // return final query structure
342         return array($result, $scope);
343     }
344 
345     /**
346      * Transforms a full query group.
347      *
348      * @param root array the tree to transform
349      * @param typemap array the type information
350      * @return the transformed group and a list of in-scope variables
351      */
352     function transformGroup(&$root, &$typemap) {
353         // extract patterns and split them in triples and filters
354         $patterns = $this->extractText($root);
355 
356         // extract union groups
357         $unions = $this->extractGroups($root, 'union');
358 
359         // extract minus groups
360         $minuses = $this->extractGroups($root,'minus');
361 
362         // extract optional groups
363         $optionals = $this->extractGroups($root,'optional');
364 
365         // check for leftovers
366         if(count($root['cs'])) {
367             $this->_fail(sprintf($this->getLang('error_query_group'),( isset($root['cs'][0]['tag']) ? sprintf($this->getLang('named_group'), utf8_tohtml(hsc($root['cs'][0]['tag']))) : $this->getLang('unnamed_group'))), $root['cs']);
368         }
369 
370         // split patterns into triples and filters
371         list($patterns, $filters, $scope) = $this->transformPatterns($patterns, $typemap);
372 
373         // convert each union into a pattern
374         foreach($unions as $union) {
375             list($u, $s) = $this->transformUnion($union, $typemap);
376             $scope = array_merge($scope, $s);
377             $patterns[] = $u;
378         }
379 
380         if(count($patterns) == 0) {
381             $this->_fail(sprintf($this->getLang('error_query_grouppattern')), $root);
382         }
383 
384         // chain all patterns with ANDs
385         $result = array_shift($patterns);
386         foreach($patterns as $pattern) {
387             $result = array(
388                 'type'=>'and',
389                 'lhs'=>$result,
390                 'rhs'=>$pattern
391             );
392         }
393 
394         // apply all optionals
395         if(count($optionals)) {
396             foreach($optionals as $optional) {
397                 // convert eacfh optional
398                 list($optional, $s) = $this->transformGroup($optional, $typemap);
399                 $scope = array_merge($scope, $s);
400                 $result = array(
401                     'type'=>'optional',
402                     'lhs'=>$result,
403                     'rhs'=>$optional
404                 );
405             }
406         }
407 
408 
409         // add all filters; these are a bit weird, as only a single FILTER is really supported
410         // (we have defined multiple filters as being a conjunction)
411         if(count($filters)) {
412             foreach($filters as $f) {
413                 $line = $f['_line'];
414                 unset($f['_line']);
415                 if($f['lhs']['type'] == 'variable' && !in_array($f['lhs']['text'], $scope)) {
416                     $this->_fail(sprintf($this->getLang('error_query_filterscope'),utf8_tohtml(hsc($f['lhs']['text']))), $line);
417                 }
418                 if($f['rhs']['type'] == 'variable' && !in_array($f['rhs']['text'], $scope)) {
419                     $this->_fail(sprintf($this->getLang('error_query_filterscope'),utf8_tohtml(hsc($f['rhs']['text']))), $line);
420                 }
421             }
422 
423             $result = array(
424                 'type'=>'filter',
425                 'lhs'=>$result,
426                 'rhs'=>$filters
427             );
428         }
429 
430         // apply all minuses
431         if(count($minuses)) {
432             foreach($minuses as $minus) {
433                 // convert each minus, and discard their scope
434                 list($minus, $s) = $this->transformGroup($minus, $typemap);
435                 $result = array(
436                     'type'=>'minus',
437                     'lhs'=>$result,
438                     'rhs'=>$minus
439                 );
440             }
441         }
442 
443         return array($result, $scope);
444     }
445 
446     /**
447      * Transforms a union group with multiple subgroups
448      *
449      * @param root array the union group to transform
450      * @param typemap array the type information
451      * @return the transformed group and a list of in-scope variables
452      */
453     function transformUnion(&$root, &$typemap) {
454         // fetch all child patterns
455         $subs = $this->extractGroups($root,null);
456 
457         // do sanity checks
458         if(count($root['cs'])) {
459             $this->_fail($this->getLang('error_query_unionblocks'), $root['cs']);
460         }
461 
462         if(count($subs) < 2) {
463             $this->_fail($this->getLang('error_query_unionreq'), $root);
464         }
465 
466         // transform the first group
467         list($result,$scope) = $this->transformGroup(array_shift($subs), $typemap);
468 
469         // transform each subsequent group
470         foreach($subs as $sub) {
471             list($rhs, $s) = $this->transformGroup($sub, $typemap);
472             $scope = array_merge($scope, $s);
473             $result = array(
474                 'type'=>'union',
475                 'lhs'=>$result,
476                 'rhs'=>$rhs
477             );
478         }
479 
480         return array($result, $scope);
481     }
482 
483     /**
484      * Transforms a list of patterns into a list of triples and a
485      * list of filters.
486      *
487      * @param lines array a list of lines to transform
488      * @param typemap array the type information
489      * @return a list of triples, a list of filters and a list of in-scope variables
490      */
491     function transformPatterns(&$lines, &$typemap) {
492         // we need this to resolve things
493         global $ID;
494 
495         // we need patterns
496         $p = $this->getPatterns();
497 
498         // result holders
499         $scope = array();
500         $triples = array();
501         $filters = array();
502 
503         foreach($lines as $lineNode) {
504             $line = trim($lineNode['text']);
505 
506             // [grammar] TRIPLEPATTERN := (VARIABLE|REFLIT) ' ' (VARIABLE|PREDICATE) TYPE? : ANY
507             if(preg_match("/^({$p->variable}|{$p->reflit})\s+({$p->variable}|{$p->predicate})\s*({$p->type})?\s*:\s*({$p->any})$/S",$line,$match)) {
508                 list(, $subject, $predicate, $type, $object) = $match;
509 
510                 $subject = utf8_trim($subject);
511                 if($subject[0] == '?') {
512                     $subject = $this->variable($subject);
513                     $scope[] = $subject['text'];
514                     $this->updateTypemap($typemap, $subject['text'], 'ref');
515                 } else {
516                     global $ID;
517                     $subject = $p->reflit($subject)->reference;
518                     $subject = $this->util->loadType('ref')->normalize($subject,null);
519                     $subject = $this->literal($subject);
520                 }
521 
522                 $predicate = utf8_trim($predicate);
523                 if($predicate[0] == '?') {
524                     $predicate = $this->variable($predicate);
525                     $scope[] = $predicate['text'];
526                     $this->updateTypemap($typemap, $predicate['text'], 'text');
527                 } else {
528                     $predicate = $this->literal($this->util->normalizePredicate($predicate));
529                 }
530 
531                 $object = utf8_trim($object);
532                 if($object[0] == '?') {
533                     // match a proper type variable
534                     if(preg_match("/^({$p->variable})\s*({$p->type})?$/",$object,$captures)!=1) {
535                         $this->_fail($this->getLang('error_pattern_garbage'),$lineNode);
536                     }
537                     $var=$captures[1]??null;
538                     $vtype=$captures[2]??null;
539 
540                     // create the object node
541                     $object = $this->variable($var);
542                     $scope[] = $object['text'];
543 
544                     // try direct type first, implied type second
545                     $vtype = $p->type($vtype);
546                     $type = $p->type($type);
547                     if (isset ($type))
548                     {
549                       $this->updateTypemap($typemap, $object['text'], $vtype->type, $vtype->hint);
550                       $this->updateTypemap($typemap, $object['text'], $type->type, $type->hint);
551                     }
552                 } else {
553                     // check for empty string token
554                     if($object == '[[]]') {
555                         $object='';
556                     }
557                     if(!$type) {
558                         list($type, $hint) = $this->util->getDefaultType();
559                     } else {
560                         $type = $p->type($type);
561                         $hint = $type->hint;
562                         $type = $type->type;
563                     }
564                     $type = $this->util->loadType($type);
565                     $object = $this->literal($type->normalize($object,$hint));
566                 }
567 
568                 $triples[] = array('type'=>'triple','subject'=>$subject, 'predicate'=>$predicate, 'object'=>$object);
569 
570             // [grammar] FILTER := VARIABLE TYPE? OPERATOR VARIABLE TYPE?
571             } elseif(preg_match("/^({$p->variable})\s*({$p->type})?\s*({$p->operator})\s*({$p->variable})\s*({$p->type})?$/S",$line, $match)) {
572                 list(,$lhs, $ltype, $operator, $rhs, $rtype) = $match;
573 
574                 $lhs = $this->variable($lhs);
575                 $rhs = $this->variable($rhs);
576 
577                 if($operator == '~>' || $operator == '!~>') $operator = str_replace('~>','^~',$operator);
578 
579                 // do type information propagation
580                 $rtype = $p->type($rtype);
581                 $ltype = $p->type($ltype);
582 
583                 if($ltype) {
584                     // left has a defined type, so update the map
585                     $this->updateTypemap($typemap, $lhs['text'], $ltype->type, $ltype->hint);
586 
587                     // and propagate to right if possible
588                     if(!$rtype) {
589                         $this->updateTypemap($typemap, $rhs['text'], $ltype->type, $lhint->hint);
590                     }
591                 }
592                 if($rtype) {
593                     // right has a defined type, so update the map
594                     $this->updateTypemap($typemap, $rhs['text'], $rtype->type, $rtype->hint);
595 
596                     // and propagate to left if possible
597                     if(!$ltype) {
598                         $this->updateTypemap($typemap, $lhs['text'], $rtype->type, $rtype->hint);
599                     }
600                 }
601 
602                 $filters[] = array('type'=>'filter', 'lhs'=>$lhs, 'operator'=>$operator, 'rhs'=>$rhs, '_line'=>$lineNode);
603 
604             // [grammar] FILTER := VARIABLE TYPE? OPERATOR ANY
605             } elseif(preg_match("/^({$p->variable})\s*({$p->type})?\s*({$p->operator})\s*({$p->any})$/S",$line, $match)) {
606 
607                 // filter pattern
608                 list(, $lhs,$ltype,$operator,$rhs) = $match;
609 
610                 $lhs = $this->variable($lhs);
611 
612                 // update typemap if a type was defined
613                 list($type,$hint) = $p->type($ltype);
614                 if($type) {
615                     $this->updateTypemap($typemap, $lhs['text'],$type,$hint);
616                 } else {
617                     // use the already declared type if no type was defined
618                     if(!empty($typemap[$lhs['text']])) {
619                         extract($typemap[$lhs['text']]);
620                     } else {
621                         list($type, $hint) = $this->util->getDefaultType();
622                     }
623                 }
624 
625                 // check for empty string token
626                 if($rhs == '[[]]') {
627                     $rhs = '';
628                 }
629 
630                 // special case: the right hand side of the 'in' operator always normalizes with the 'text' type
631                 if($operator == '~>' || $operator == '!~>') {
632                     $operator = str_replace('~>','^~', $operator);
633                     $type = 'text';
634                     unset($hint);
635                 }
636 
637                 // normalize
638                 $type = $this->util->loadType($type);
639                 $rhs = $this->literal($type->normalize($rhs,$hint));
640 
641                 $filters[] = array('type'=>'filter','lhs'=>$lhs, 'operator'=>$operator, 'rhs'=>$rhs, '_line'=>$lineNode);
642 
643             // [grammar] FILTER := ANY OPERATOR VARIABLE TYPE?
644             } elseif(preg_match("/^({$p->any})\s*({$p->operator})\s*({$p->variable})\s*({$p->type})?$/S",$line, $match)) {
645                 list(, $lhs,$operator,$rhs,$rtype) = $match;
646 
647                 $rhs = $this->variable($rhs);
648 
649                 // update typemap if a type was defined
650                 list($type, $hint) = $p->type($rtype);
651                 if($type) {
652                     $this->updateTypemap($typemap, $rhs['text'],$type,$hint);
653                 } else {
654                     // use the already declared type if no type was defined
655                     if(!empty($typemap[$rhs['text']])) {
656                         extract($typemap[$rhs['text']]);
657                     } else {
658                         list($type, $hint) = $this->util->getDefaultType();
659                     }
660                 }
661 
662                 // check for empty string token
663                 if($lhs == '[[]]') {
664                     $lhs = '';
665                 }
666 
667                 // special case: the left hand side of the 'in' operator always normalizes with the 'page' type
668                 if($operator == '~>' || $operator == '!~>') {
669                     $operator = str_replace('~>','^~', $operator);
670                     $type = 'page';
671                     unset($hint);
672                 }
673 
674                 // normalize
675                 $type = $this->util->loadType($type);
676                 $lhs = $this->literal($type->normalize($lhs,$hint));
677 
678                 $filters[] = array('type'=>'filter','lhs'=>$lhs, 'operator'=>$operator, 'rhs'=>$rhs, '_line'=>$lineNode);
679             } else {
680                 // unknown lines are fail
681                 $this->_fail(sprintf($this->getLang('error_query_pattern'),utf8_tohtml(hsc($line))), $lineNode);
682             }
683         }
684 
685         return array($triples, $filters, $scope);
686     }
687 
688     function getFields(&$tree, &$typemap) {
689         $fields = array();
690 
691         // extract the projection information in 'long syntax' if available
692         $fieldsGroups = $this->extractGroups($tree, 'fields');
693 
694         // parse 'long syntax' if we don't have projection information yet
695         if(count($fieldsGroups)) {
696             if(count($fieldsGroups) > 1) {
697                 $this->_fail($this->getLang('error_query_fieldsgroups'), $fieldsGroups);
698             }
699 
700             $fieldsLines = $this->extractText($fieldsGroups[0]);
701             if(count($fieldsGroups[0]['cs'])) {
702                 $this->_fail(sprintf($this->getLang('error_query_fieldsblock'),( isset($fieldsGroups[0]['cs'][0]['tag']) ? sprintf($this->getLang('named_group'),hsc($fieldsGroups[0]['cs'][0]['tag'])) : $this->getLang('unnamed_group'))), $fieldsGroups[0]['cs']);
703             }
704             $fields = $this->parseFieldsLong($fieldsLines, $typemap);
705             if(!$fields) return array();
706         }
707 
708         return $fields;
709     }
710 
711     /**
712      * Parses a projection group in 'long syntax'.
713      */
714     function parseFieldsLong($lines, &$typemap) {
715         $p = $this->getPatterns();
716         $result = array();
717 
718         foreach($lines as $lineNode) {
719             $line = trim($lineNode['text']);
720             // FIELDLONG := VARIABLE AGGREGATE? TYPE? (':' ANY)?
721             if(preg_match("/^({$p->variable})\s*({$p->aggregate})?\s*({$p->type})?(?:\s*(:)\s*({$p->any})?\s*)?$/S",$line, $match)) {
722                 list(, $var, $vaggregate, $vtype, $nocaphint, $caption) = $match;
723                 $variable = $p->variable($var)->name;
724                 if(!$nocaphint || (!$nocaphint && !$caption)) $caption = ucfirst($variable);
725 
726                 list($type,$hint) = $p->type($vtype);
727                 list($agg,$agghint) = $p->aggregate($vaggregate);
728 
729                 $this->updateTypemap($typemap, $variable, $type, $hint);
730                 $result[] = array('variable'=>$variable,'caption'=>$caption, 'aggregate'=>$agg, 'aggregateHint'=>$agghint, 'type'=>$type, 'hint'=>$hint);
731             } else {
732                 $this->_fail(sprintf($this->getLang('error_query_fieldsline'),utf8_tohtml(hsc($line))), $lineNode);
733             }
734         }
735 
736         return $result;
737     }
738 
739     /**
740      * Parses a projection group in 'short syntax'.
741      */
742     function parseFieldsShort($line, &$typemap) {
743         $p = $this->getPatterns();
744         $result = array();
745 
746         // FIELDSHORT := VARIABLE AGGREGATE? TYPE? CAPTION?
747         if(preg_match_all("/\s*({$p->variable})\s*({$p->aggregate})?\s*({$p->type})?\s*(?:(\")([^\"]*)\")?/",$line,$match, PREG_SET_ORDER)) {
748             foreach($match as $m) {
749                 $var=$m[1]??null;
750                 $vaggregate=$m[2]??null;
751                 $vtype=$m[3]??null;
752                 $caption_indicator=$m[4]??null;
753                 $caption=$m[5]??null;
754 
755                 $variable = $p->variable($var)->name;
756                 list($type, $hint) = $p->type($vtype);
757                 list($agg, $agghint) = $p->aggregate($vaggregate);
758                 if(!$caption_indicator) $caption = ucfirst($variable);
759                 $this->updateTypemap($typemap, $variable, $type, $hint);
760                 $result[] = array('variable'=>$variable,'caption'=>$caption, 'aggregate'=>$agg, 'aggregateHint'=>$agghint, 'type'=>$type, 'hint'=>$hint);
761             }
762         }
763 
764         return $result;
765     }
766 
767     /**
768      * Returns the regex pattern used by the 'short syntax' for projection. This methods can
769      * be used to get a dokuwiki-lexer-safe regex to embed into your own syntax pattern.
770      *
771      * @param captions boolean Whether the pattern should include caption matching (defaults to true)
772      */
773     function fieldsShortPattern($captions = true) {
774         $p = $this->getPatterns();
775         return "(?:\s*{$p->variable}\s*{$p->aggregate}?\s*{$p->type}?".($captions?'\s*(?:"[^"]*")?':'').")";
776     }
777 
778     /**
779      * Constructs a tagged tree from the given list of lines.
780      *
781      * @return a tagged tree
782      */
783     function constructTree($lines, $what) {
784         $root = array(
785             'tag'=>'',
786             'cs'=>array(),
787             'start'=>1,
788             'end'=>1
789         );
790 
791         $stack = array();
792         $stack[] =& $root;
793         $top = count($stack)-1;
794         $lineCount = 0;
795 
796         foreach($lines as $line) {
797             $lineCount++;
798             if($this->ignorableLine($line)) continue;
799 
800             if(preg_match('/^([^\{]*) *{$/',utf8_trim($line),$match)) {
801                 list(, $tag) = $match;
802                 $tag = utf8_trim($tag);
803 
804                 $stack[$top]['cs'][] = array(
805                     'tag'=>$tag?:null,
806                     'cs'=>array(),
807                     'start'=>$lineCount,
808                     'end'=>0
809                 );
810                 $stack[] =& $stack[$top]['cs'][count($stack[$top]['cs'])-1];
811                 $top = count($stack)-1;
812 
813             } elseif(preg_match('/^}$/',utf8_trim($line))) {
814                 $stack[$top]['end'] = $lineCount;
815                 array_pop($stack);
816                 $top = count($stack)-1;
817 
818             } else {
819                 $stack[$top]['cs'][] = array(
820                     'text'=>$line,
821                     'start'=>$lineCount,
822                     'end'=>$lineCount
823                 );
824             }
825         }
826 
827         if(count($stack) != 1 || $stack[0] != $root) {
828             msg(sprintf($this->getLang('error_syntax_braces'),$what),-1);
829         }
830 
831         $root['end'] = $lineCount;
832 
833         return $root;
834     }
835 
836     /**
837      * Renders a debug display of the syntax.
838      *
839      * @param lines array the lines that form the syntax
840      * @param region array the region to highlight
841      * @return a string with markup
842      */
843     function debugTree($lines, $regions) {
844         $result = '';
845         $lineCount = 0;
846         $count = 0;
847 
848         foreach($lines as $line) {
849             $lineCount++;
850 
851             foreach($regions as $region) {
852                 if($lineCount == $region['start']) {
853                     if($count == 0) $result .= '<div class="strata-debug-highlight">';
854                     $count++;
855                 }
856 
857                 if($lineCount == $region['end']+1) {
858                     $count--;
859 
860                     if($count==0) $result .= '</div>';
861                 }
862             }
863 
864             if($line != '') {
865                 $result .= '<div class="strata-debug-line">'.hsc($line).'</div>'."\n";
866             } else {
867                 $result .= '<div class="strata-debug-line"><br/></div>'."\n";
868             }
869         }
870 
871         if($count > 0) {
872             $result .= '</div>';
873         }
874 
875         return '<div class="strata-debug">'.$result.'</div>';
876     }
877 
878     /**
879      * Extract all occurences of tagged groups from the given tree.
880      * This method does not remove the tagged groups from subtrees of
881      * the given root.
882      *
883      * @param root array the tree to operate on
884      * @param tag string the tag to remove
885      * @return an array of groups
886      */
887     function extractGroups(&$root, $tag) {
888         $result = array();
889         $to_remove = array();
890         foreach($root['cs'] as $i=>&$tree) {
891             if(!$this->isGroup($tree)) continue;
892             if($tree['tag'] == $tag || (($tag=='' || $tag==null) && $tree['tag'] == null) ) {
893                 $result[] =& $tree;
894                 $to_remove[] = $i;
895             }
896         }
897         // invert order of to_remove to always remove higher indices first
898         rsort($to_remove);
899         foreach($to_remove as $i) {
900             array_splice($root['cs'],$i,1);
901         }
902         return $result;
903     }
904 
905     /**
906      * Extracts all text elements from the given tree.
907      * This method does not remove the text elements from subtrees
908      * of the root.
909      *
910      * @param root array the tree to operate on
911      * @return array an array of text elements
912      */
913     function extractText(&$root) {
914         $result = array();
915         $to_remove = array();
916         foreach($root['cs'] as $i=>&$tree) {
917             if(!$this->isText($tree)) continue;
918             $result[] =& $tree;
919             $to_remove[] = $i;
920         }
921         // invert order of to_remove to always remove higher indices first
922         rsort($to_remove);
923         foreach($to_remove as $i) {
924             array_splice($root['cs'],$i,1);
925         }
926         return $result;
927     }
928 
929     /**
930      * Returns whether the given node is a line.
931      */
932     function isText(&$node) {
933         return array_key_exists('text', $node);
934     }
935 
936     /**
937      * Returns whether the given node is a group.
938      */
939     function isGroup(&$node) {
940         return array_key_exists('tag', $node);
941     }
942 
943     /**
944      * Sets all properties given as '$properties' to the values parsed from '$trees'.
945      *
946      * The property array has as keys all possible properties, which are specified by its
947      * values. Such specification is an array that may have the following keys, with the
948      * described values:
949      * - choices: array of possible values, where the keys are the internally used values
950      *     and the values specify synonyms for the choice, of which the first listed one
951      *     is most common. For example: 'true' => array('yes', 'yeah') specifies that the
952      *     user can choose 'yes' or 'yeah' (of which 'yes' is the commonly used value) and
953      *     that the return value will contain 'true' if this choice was chosen.
954      * - pattern: regular expression that defines all possible values.
955      * - pattern_desc: description used for errors when a pattern is specified.
956      * - minOccur: positive integer specifying the minimum number of values, defaults to 1.
957      * - maxOccur: integer greater than or equal to minOccur, which specifies the maximum
958      *     number of values, defaults to minOccur.
959      * - default: the default value (which must be a value the user is allowed to set).
960      *     When default is given, this method guarantees that the property is always set,
961      *     otherwise the property may not be set since all properties are optional.
962      * Either 'choices' or 'pattern' must be set (not both), all other values are optional.
963      *
964      * An example property array is as follows:
965      * array(
966      *   'example boolean' => array(
967      *     'choices' => array('y' => array('yes', 'yeah'), 'n' => array('no', 'nay')),
968      *     'minOccur' => 1,
969      *     'maxOccur' => 3,
970      *     'default' => 'yes'
971      *   ),
972      *   'example natural number' => array(
973      *     'pattern' => '/^[0-9]+$/',
974      *     'pattern_desc' => $this->getLang('property_Z*')
975      *   )
976      * )
977      *
978      * @param $properties The properties that can be set.
979      * @param $trees The trees that contain the values for these properties.
980      * @return An array with as indices the property names and as value a list of all values given for that property.
981      */
982     function setProperties($properties, $trees) {
983         $propertyValues = array();
984         $p = $this->getPatterns();
985 
986         foreach ($trees as $tree) {
987             $text = $this->extractText($tree);
988             foreach($text as $lineNode) {
989                 $line = utf8_trim($lineNode['text']);
990                 if (preg_match('/^('.$p->predicate.')(\*)?\s*:\s*('.$p->any.')$/', $line, $match)) {
991                     list(, $variable, $multi, $value) = $match;
992                     $this->_setPropertyValue($properties, $tree['tag'], $lineNode, $variable, !empty($multi), $value, $propertyValues);
993                 } else {
994                     $this->emitError($lineNode, 'error_property_weirdgroupline', hsc($tree['tag']), hsc($line));
995                 }
996             }
997             // Warn about unknown groups
998             foreach ($tree['cs'] as $group) {
999                 $this->emitError($group, 'error_property_unknowngroup', hsc($trees[0]['tag']), hsc($group['tag']));
1000             }
1001         }
1002 
1003         // Set property defaults
1004         foreach ($properties as $name => $p) {
1005             if (!isset($propertyValues[$name]) && isset($p['default'])) {
1006                 $this->_setPropertyValue($properties, 'default value', null, $name, false, $p['default'], $propertyValues);
1007             }
1008         }
1009 
1010         // Show errors, if any
1011         $this->showErrors();
1012 
1013         return $propertyValues;
1014     }
1015 
1016     function _setPropertyValue($properties, $group, $region, $variable, $isMulti, $value, &$propertyValues) {
1017         if (!isset($properties[$variable])) {
1018             // Unknown property: show error
1019             $property_title_values = $this->getLang('property_title_values');
1020             $propertyList = implode(', ', array_map(function ($n, $p) use ($property_title_values) {
1021                 $values = implode(', ', array_map(function ($c) {
1022                     return $c[0];
1023                 }, $p['choices']));
1024                 $title = sprintf($property_title_values, $values);
1025                 return '\'<code title="' . hsc($title) . '">' . hsc($n) . '</code>\'';
1026             }, array_keys($properties), $properties));
1027             $this->emitError($region, 'error_property_unknownproperty', hsc($group), hsc($variable), $propertyList);
1028         } else if (isset($propertyValues[$variable])) {
1029             // Property is specified more than once: show error
1030             $this->emitError($region, 'error_property_multi', hsc($group), hsc($variable));
1031         } else {
1032             $p = $properties[$variable];
1033             $minOccur = isset($p['minOccur']) ? $p['minOccur'] : 1;
1034             $maxOccur = isset($p['maxOccur']) ? $p['maxOccur'] : $minOccur;
1035 
1036             if ($isMulti) {
1037                 $values = array_map('utf8_trim', explode(',', $value));
1038             } else if ($minOccur == 1 || $minOccur == $maxOccur) {
1039                 // Repeat the given value as often as we expect it
1040                 $values = array_fill(0, $minOccur, $value);
1041             } else {
1042                 // A single value was given, but multiple were expected
1043                 $this->emitError($region, 'error_property_notmulti', hsc($group), hsc($variable), $minOccur);
1044                 return;
1045             }
1046 
1047             if (count($values) < $minOccur || count($values) > $maxOccur) {
1048                 // Number of values given differs from expected number
1049                 if ($minOccur == $maxOccur) {
1050                     $this->emitError($region, 'error_property_occur', hsc($group), hsc($variable), $minOccur, count($values));
1051                 } else {
1052                     $this->emitError($region, 'error_property_occurrange', hsc($group), hsc($variable), $minOccur, $maxOccur, count($values));
1053                 }
1054             } else if (isset($p['choices'])) { // Check whether the given property values are valid choices
1055                 // Create a mapping from choice to normalized value of the choice
1056                 $choices = array();
1057                 $choicesInfo = array(); // For nice error messages
1058                 foreach ($p['choices'] as $nc => $c) {
1059                     if (is_array($c)) {
1060                         $choices = array_merge($choices, array_fill_keys($c, $nc));
1061                         $title = sprintf($this->getLang('property_title_synonyms'), implode(', ', $c));
1062                         $choicesInfo[] = '\'<code title="' . hsc($title) . '">' . hsc($c[0]) . '</code>\'';
1063                     } else {
1064                         $choices[$c] = $c;
1065                         $choicesInfo[] = '\'<code>' . hsc($c) . '</code>\'';
1066                     }
1067                 }
1068                 if (!isset($choices['']) && isset($p['default'])) {
1069                     $choices[''] = $choices[$p['default']];
1070                 }
1071 
1072                 $incorrect = array_diff($values, array_keys($choices)); // Find all values that are not a valid choice
1073                 if (count($incorrect) > 0) {
1074                     unset($choices['']);
1075                     foreach (array_unique($incorrect) as $v) {
1076                         $this->emitError($region, 'error_property_invalidchoice', hsc($group), hsc($variable), hsc($v), implode(', ', $choicesInfo));
1077                     }
1078                 } else {
1079                     $propertyValues[$variable] = array_map(function($v) use ($choices) { return $choices[$v]; }, $values);
1080                 }
1081             } else if (isset($p['pattern'])) { // Check whether the given property values match the pattern
1082                 $incorrect = array_filter($values, function($v) use ($p) { return !preg_match($p['pattern'], $v); });
1083                 if (count($incorrect) > 0) {
1084                     foreach (array_unique($incorrect) as $v) {
1085                         if (isset($p['pattern_desc'])) {
1086                             $this->emitError($region, 'error_property_patterndesc', hsc($group), hsc($variable), hsc($v), $p['pattern_desc']);
1087                         } else {
1088                             $this->emitError($region, 'error_property_pattern', hsc($group), hsc($variable), hsc($v), hsc($p['pattern']));
1089                         }
1090                     }
1091                 } else {
1092                     $propertyValues[$variable] = $values;
1093                 }
1094             } else { // Property value has no requirements
1095                 $propertyValues[$variable] = $values;
1096             }
1097         }
1098     }
1099 
1100     /**
1101      * Generates a html error message, ensuring that all utf8 in arguments is escaped correctly.
1102      * The generated messages might be accumulated until showErrors is called.
1103      *
1104      * @param region The region at which the error occurs.
1105      * @param msg_id The id of the message in the language file.
1106      */
1107     function emitError($region, $msg_id) {
1108         $args = func_get_args();
1109         array_shift($args);
1110         array_shift($args);
1111         $args = array_map('strval', $args); // convert everything to strings first
1112         $args = array_map('utf8_tohtml', $args); // Escape args
1113         $msg = vsprintf($this->getLang($msg_id), $args);
1114         msg($msg, -1);
1115         $this->error .= "<br />\n" . $msg;
1116         $this->regions[] = $region;
1117     }
1118 
1119     /**
1120      * Ensures that all emitted errors are shown.
1121      */
1122     function showErrors() {
1123         if (!empty($this->error)) {
1124             $error = $this->error;
1125             $regions = $this->regions;
1126             $this->error = '';
1127             $this->regions = array();
1128             throw new strata_exception($error, $regions);
1129         }
1130     }
1131 }
1132 
1133 // call static initiliazer (PHP doesn't offer this feature)
1134 helper_plugin_strata_syntax::initialize();
1135