xref: /plugin/strata/helper/syntax.php (revision 5153720fcc1dd2b6e63035d45f7c2bc32e429371)
1*5153720fSfkaag71<?php
2*5153720fSfkaag71/**
3*5153720fSfkaag71 * DokuWiki Plugin strata (Helper Component)
4*5153720fSfkaag71 *
5*5153720fSfkaag71 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
6*5153720fSfkaag71 * @author  Brend Wanders <b.wanders@utwente.nl>
7*5153720fSfkaag71 */
8*5153720fSfkaag71
9*5153720fSfkaag71if (!defined('DOKU_INC')) die('meh.');
10*5153720fSfkaag71
11*5153720fSfkaag71/**
12*5153720fSfkaag71 * Helper to construct and handle syntax fragments.
13*5153720fSfkaag71 */
14*5153720fSfkaag71class helper_plugin_strata_syntax_RegexHelper {
15*5153720fSfkaag71    /**
16*5153720fSfkaag71     * Regular expression fragment table. This is used for interpolation of
17*5153720fSfkaag71     * syntax patterns, and should be without captures. Do not assume any
18*5153720fSfkaag71     * specific delimiter.
19*5153720fSfkaag71     */
20*5153720fSfkaag71    var $regexFragments = array(
21*5153720fSfkaag71        'variable'  => '(?:\?[^\s:\(\)\[\]\{\}\<\>\|\~\!\@\#\$\%\^\&\*\?\="]+)',
22*5153720fSfkaag71        'predicate' => '(?:[^:\(\)\[\]\{\}\<\>\|\~\!\@\#\$\%\^\&\*\?\="]+)',
23*5153720fSfkaag71        'reflit'    => '(?:\[\[[^]]*\]\])',
24*5153720fSfkaag71        'type'      => '(?:\[\s*[a-z0-9]+\s*(?:::[^\]]*)?\])',
25*5153720fSfkaag71        'aggregate' => '(?:@\s*[a-z0-9]+(?:\([^\)]*\))?)',
26*5153720fSfkaag71        'operator'  => '(?:!=|>=|<=|>|<|=|!~>|!~|!\^~|!\$~|\^~|\$~|~>|~)',
27*5153720fSfkaag71        'any'       => '(?:.+?)'
28*5153720fSfkaag71    );
29*5153720fSfkaag71
30*5153720fSfkaag71    /**
31*5153720fSfkaag71     * Patterns used to extract information from captured fragments. These patterns
32*5153720fSfkaag71     * are used with '/' as delimiter, and should contain at least one capture group.
33*5153720fSfkaag71     */
34*5153720fSfkaag71    var $regexCaptures = array(
35*5153720fSfkaag71        'variable'  => array('\?(.*)', array('name')),
36*5153720fSfkaag71        'aggregate' => array('@\s*([a-z0-9]+)(?:\(([^\)]*)\))?', array('aggregate','hint')),
37*5153720fSfkaag71        'type'      => array('\[\s*([a-z0-9]+)\s*(?:::([^\]]*))?\]', array('type', 'hint')),
38*5153720fSfkaag71        'reflit'    => array('\[\[(.*)\]\]',array('reference'))
39*5153720fSfkaag71    );
40*5153720fSfkaag71
41*5153720fSfkaag71    /**
42*5153720fSfkaag71     * Grabs the syntax fragment.
43*5153720fSfkaag71     */
44*5153720fSfkaag71    function __get($name) {
45*5153720fSfkaag71        if(array_key_exists($name, $this->regexFragments)) {
46*5153720fSfkaag71            return $this->regexFragments[$name];
47*5153720fSfkaag71        } else {
48*5153720fSfkaag71            $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS);
49*5153720fSfkaag71            trigger_error("Undefined syntax fragment '$name' on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE);
50*5153720fSfkaag71        }
51*5153720fSfkaag71    }
52*5153720fSfkaag71
53*5153720fSfkaag71    /**
54*5153720fSfkaag71     * Extracts information from a fragment, based on the type.
55*5153720fSfkaag71     */
56*5153720fSfkaag71    function __call($name, $arguments) {
57*5153720fSfkaag71        if(array_key_exists($name, $this->regexCaptures)) {
58*5153720fSfkaag71            list($pattern, $names) = $this->regexCaptures[$name];
59*5153720fSfkaag71            $result = preg_match("/^{$pattern}$/", $arguments[0], $match);
60*5153720fSfkaag71            if($result === 1) {
61*5153720fSfkaag71                array_shift($match);
62*5153720fSfkaag71                $shortest = min(count($names), count($match));
63*5153720fSfkaag71                return new helper_plugin_strata_syntax_RegexHelperCapture(array_combine(array_slice($names,0,$shortest), array_slice($match, 0, $shortest)));
64*5153720fSfkaag71            } else {
65*5153720fSfkaag71                return null;
66*5153720fSfkaag71            }
67*5153720fSfkaag71        } else {
68*5153720fSfkaag71            $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS);
69*5153720fSfkaag71            trigger_error("Undefined syntax capture '$name' on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE);
70*5153720fSfkaag71        }
71*5153720fSfkaag71    }
72*5153720fSfkaag71}
73*5153720fSfkaag71
74*5153720fSfkaag71/**
75*5153720fSfkaag71 * A single capture. Used as a return value for the RegexHelper's
76*5153720fSfkaag71 * capture methods.
77*5153720fSfkaag71 */
78*5153720fSfkaag71class helper_plugin_strata_syntax_RegexHelperCapture implements ArrayAccess {
79*5153720fSfkaag71    function __construct($values) {
80*5153720fSfkaag71        $this->values = $values;
81*5153720fSfkaag71    }
82*5153720fSfkaag71
83*5153720fSfkaag71    function __get($name) {
84*5153720fSfkaag71        if(array_key_exists($name, $this->values)) {
85*5153720fSfkaag71            return $this->values[$name];
86*5153720fSfkaag71        } else {
87*5153720fSfkaag71            return null;
88*5153720fSfkaag71        }
89*5153720fSfkaag71    }
90*5153720fSfkaag71
91*5153720fSfkaag71    function offsetExists($offset) {
92*5153720fSfkaag71        // the index is valid iff:
93*5153720fSfkaag71        //   it is an existing field name
94*5153720fSfkaag71        //   it is a correct nummeric index (with 0 being the first name and count-1 the last)
95*5153720fSfkaag71        return isset($this->values[$offset]) || ($offset >= 0 && $offset < count($this->values));
96*5153720fSfkaag71    }
97*5153720fSfkaag71
98*5153720fSfkaag71    function offsetGet($offset) {
99*5153720fSfkaag71        // return the correct offset
100*5153720fSfkaag71        if (isset($this->values[$offset])) {
101*5153720fSfkaag71            return $this->values[$offset];
102*5153720fSfkaag71        } else {
103*5153720fSfkaag71            // or try the numeric offsets
104*5153720fSfkaag71            if(is_numeric($offset) && $offset >= 0 && $offset < count($this->values)) {
105*5153720fSfkaag71                // translate numeric offset to key
106*5153720fSfkaag71                $keys = array_keys($this->values);
107*5153720fSfkaag71                return $this->values[$keys[intval($offset)]];
108*5153720fSfkaag71            } else {
109*5153720fSfkaag71                // offset unknown, return without value
110*5153720fSfkaag71                return;
111*5153720fSfkaag71            }
112*5153720fSfkaag71        }
113*5153720fSfkaag71    }
114*5153720fSfkaag71
115*5153720fSfkaag71    function offsetSet($offset, $value) {
116*5153720fSfkaag71        // noop
117*5153720fSfkaag71        $trace = debug_backtrace();
118*5153720fSfkaag71        trigger_error("Syntax fragment fields are read-only on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE);
119*5153720fSfkaag71    }
120*5153720fSfkaag71
121*5153720fSfkaag71    function offsetUnset($offset) {
122*5153720fSfkaag71        // noop
123*5153720fSfkaag71        $trace = debug_backtrace();
124*5153720fSfkaag71        trigger_error("Syntax fragment fields are read-only on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE);
125*5153720fSfkaag71    }
126*5153720fSfkaag71}
127*5153720fSfkaag71
128*5153720fSfkaag71/**
129*5153720fSfkaag71 * Helper plugin for common syntax parsing.
130*5153720fSfkaag71 */
131*5153720fSfkaag71class helper_plugin_strata_syntax extends DokuWiki_Plugin {
132*5153720fSfkaag71    public static $patterns;
133*5153720fSfkaag71
134*5153720fSfkaag71    /**
135*5153720fSfkaag71     * Static initializer called directly after class declaration.
136*5153720fSfkaag71     *
137*5153720fSfkaag71     * This static method exists because we want to keep the static $patterns
138*5153720fSfkaag71     * and its initialization close together.
139*5153720fSfkaag71     */
140*5153720fSfkaag71    static function initialize() {
141*5153720fSfkaag71        self::$patterns = new helper_plugin_strata_syntax_RegexHelper();
142*5153720fSfkaag71    }
143*5153720fSfkaag71
144*5153720fSfkaag71    /**
145*5153720fSfkaag71     * Constructor.
146*5153720fSfkaag71     */
147*5153720fSfkaag71    function __construct() {
148*5153720fSfkaag71        $this->util =& plugin_load('helper', 'strata_util');
149*5153720fSfkaag71        $this->error = '';
150*5153720fSfkaag71        $this->regions = array();
151*5153720fSfkaag71    }
152*5153720fSfkaag71
153*5153720fSfkaag71    /**
154*5153720fSfkaag71     * Returns an object describing the pattern fragments.
155*5153720fSfkaag71     */
156*5153720fSfkaag71    function getPatterns() {
157*5153720fSfkaag71        return self::$patterns;
158*5153720fSfkaag71    }
159*5153720fSfkaag71
160*5153720fSfkaag71    /**
161*5153720fSfkaag71     * Determines whether a line can be ignored.
162*5153720fSfkaag71     */
163*5153720fSfkaag71    function ignorableLine($line) {
164*5153720fSfkaag71        $line = utf8_trim($line);
165*5153720fSfkaag71        return $line == '' || utf8_substr($line,0,2) == '--';
166*5153720fSfkaag71    }
167*5153720fSfkaag71
168*5153720fSfkaag71    /**
169*5153720fSfkaag71     * Updates the given typemap with new information.
170*5153720fSfkaag71     *
171*5153720fSfkaag71     * @param typemap array a typemap
172*5153720fSfkaag71     * @param var string the name of the variable
173*5153720fSfkaag71     * @param type string the type of the variable
174*5153720fSfkaag71     * @param hint string the type hint of the variable
175*5153720fSfkaag71     */
176*5153720fSfkaag71    function updateTypemap(&$typemap, $var, $type, $hint=null) {
177*5153720fSfkaag71        if(empty($typemap[$var]) && $type) {
178*5153720fSfkaag71            $typemap[$var] = array('type'=>$type,'hint'=>$hint);
179*5153720fSfkaag71            return true;
180*5153720fSfkaag71        }
181*5153720fSfkaag71
182*5153720fSfkaag71        return false;
183*5153720fSfkaag71    }
184*5153720fSfkaag71
185*5153720fSfkaag71    /**
186*5153720fSfkaag71     * Constructs a literal with the given text.
187*5153720fSfkaag71     */
188*5153720fSfkaag71    function literal($val) {
189*5153720fSfkaag71        return array('type'=>'literal', 'text'=>$val);
190*5153720fSfkaag71    }
191*5153720fSfkaag71
192*5153720fSfkaag71    /**
193*5153720fSfkaag71     * Constructs a variable with the given name.
194*5153720fSfkaag71     */
195*5153720fSfkaag71    function variable($var) {
196*5153720fSfkaag71        if($var[0] == '?') $var = substr($var,1);
197*5153720fSfkaag71        return array('type'=>'variable', 'text'=>$var);
198*5153720fSfkaag71    }
199*5153720fSfkaag71
200*5153720fSfkaag71    function _fail($message, $regions=array()) {
201*5153720fSfkaag71        msg($message,-1);
202*5153720fSfkaag71
203*5153720fSfkaag71        if($this->isGroup($regions) || $this->isText($regions)) {
204*5153720fSfkaag71            $regions = array($regions);
205*5153720fSfkaag71        }
206*5153720fSfkaag71
207*5153720fSfkaag71        $lines = array();
208*5153720fSfkaag71        foreach($regions as $r) $lines[] = array('start'=>$r['start'], 'end'=>$r['end']);
209*5153720fSfkaag71        throw new strata_exception($message, $lines);
210*5153720fSfkaag71    }
211*5153720fSfkaag71
212*5153720fSfkaag71    /**
213*5153720fSfkaag71     * Constructs a query from the give tree.
214*5153720fSfkaag71     *
215*5153720fSfkaag71     * @param root array the tree to transform
216*5153720fSfkaag71     * @param typemap array the type information collected so far
217*5153720fSfkaag71     * @param projection array the variables to project
218*5153720fSfkaag71     * @return a query structure
219*5153720fSfkaag71     */
220*5153720fSfkaag71    function constructQuery(&$root, &$typemap, $projection) {
221*5153720fSfkaag71        $p = $this->getPatterns();
222*5153720fSfkaag71
223*5153720fSfkaag71        $result = array(
224*5153720fSfkaag71            'type'=>'select',
225*5153720fSfkaag71            'group'=>array(),
226*5153720fSfkaag71            'projection'=>$projection,
227*5153720fSfkaag71            'ordering'=>array(),
228*5153720fSfkaag71            'grouping'=>false,
229*5153720fSfkaag71            'considering'=>array()
230*5153720fSfkaag71        );
231*5153720fSfkaag71
232*5153720fSfkaag71        // extract sort groups
233*5153720fSfkaag71        $ordering = $this->extractGroups($root, 'sort');
234*5153720fSfkaag71
235*5153720fSfkaag71        // extract grouping groups
236*5153720fSfkaag71        $grouping = $this->extractGroups($root, 'group');
237*5153720fSfkaag71
238*5153720fSfkaag71        // extract additional projection groups
239*5153720fSfkaag71        $considering = $this->extractGroups($root, 'consider');
240*5153720fSfkaag71
241*5153720fSfkaag71        // transform actual group
242*5153720fSfkaag71        $where = $this->extractGroups($root, 'where');
243*5153720fSfkaag71        $tree = null;
244*5153720fSfkaag71        if(count($where)==0) {
245*5153720fSfkaag71            $tree =& $root;
246*5153720fSfkaag71        } elseif(count($where)==1) {
247*5153720fSfkaag71            $tree =& $where[0];
248*5153720fSfkaag71            if(count($root['cs'])) {
249*5153720fSfkaag71                $this->_fail($this->getLang('error_query_outofwhere'), $root['cs']);
250*5153720fSfkaag71            }
251*5153720fSfkaag71        } else {
252*5153720fSfkaag71            $this->_fail($this->getLang('error_query_singlewhere'), $where);
253*5153720fSfkaag71        }
254*5153720fSfkaag71
255*5153720fSfkaag71        list($group, $scope) = $this->transformGroup($tree, $typemap);
256*5153720fSfkaag71        $result['group'] = $group;
257*5153720fSfkaag71        if(!$group) return false;
258*5153720fSfkaag71
259*5153720fSfkaag71        // handle sort groups
260*5153720fSfkaag71        if(count($ordering)) {
261*5153720fSfkaag71            if(count($ordering) > 1) {
262*5153720fSfkaag71                $this->_fail($this->getLang('error_query_multisort'), $ordering);
263*5153720fSfkaag71            }
264*5153720fSfkaag71
265*5153720fSfkaag71            // handle each line in the group
266*5153720fSfkaag71            foreach($ordering[0]['cs'] as $line) {
267*5153720fSfkaag71                if($this->isGroup($line)) {
268*5153720fSfkaag71                    $this->_fail($this->getLang('error_query_sortblock'), $line);
269*5153720fSfkaag71                }
270*5153720fSfkaag71
271*5153720fSfkaag71                if(preg_match("/^({$p->variable})\s*(?:\((asc|desc)(?:ending)?\))?$/S",utf8_trim($line['text']),$match)) {
272*5153720fSfkaag71                    $var = $p->variable($match[1]);
273*5153720fSfkaag71                    if(!in_array($var->name, $scope)) {
274*5153720fSfkaag71                        $this->_fail(sprintf($this->getLang('error_query_sortvar'),utf8_tohtml(hsc($var->name))), $line);
275*5153720fSfkaag71                    }
276*5153720fSfkaag71
277*5153720fSfkaag71                    $result['ordering'][] = array('variable'=>$var->name, 'direction'=>($match[2]?:'asc'));
278*5153720fSfkaag71                } else {
279*5153720fSfkaag71                    $this->_fail(sprintf($this->getLang('error_query_sortline'), utf8_tohtml(hsc($line['text']))), $line);
280*5153720fSfkaag71                }
281*5153720fSfkaag71            }
282*5153720fSfkaag71        }
283*5153720fSfkaag71
284*5153720fSfkaag71        //handle grouping
285*5153720fSfkaag71        if(count($grouping)) {
286*5153720fSfkaag71            if(count($grouping) > 1) {
287*5153720fSfkaag71                $this->_fail($this->getLang('error_query_multigrouping'), $grouping);
288*5153720fSfkaag71            }
289*5153720fSfkaag71
290*5153720fSfkaag71            // we have a group, so we want grouping
291*5153720fSfkaag71            $result['grouping'] = array();
292*5153720fSfkaag71
293*5153720fSfkaag71            foreach($grouping[0]['cs'] as $line) {
294*5153720fSfkaag71                if($this->isGroup($line)) {
295*5153720fSfkaag71                    $this->_fail($this->getLang('error_query_groupblock'), $line);
296*5153720fSfkaag71                }
297*5153720fSfkaag71
298*5153720fSfkaag71                if(preg_match("/({$p->variable})$/",utf8_trim($line['text']),$match)) {
299*5153720fSfkaag71                    $var = $p->variable($match[1]);
300*5153720fSfkaag71                    if(!in_array($var->name, $scope)) {
301*5153720fSfkaag71                        $this->_fail(sprintf($this->getLang('error_query_groupvar'),utf8_tohtml(hsc($var->name))), $line);
302*5153720fSfkaag71                    }
303*5153720fSfkaag71
304*5153720fSfkaag71                    $result['grouping'][] = $var->name;
305*5153720fSfkaag71                } else {
306*5153720fSfkaag71                    $this->_fail(sprintf($this->getLang('error_query_groupline'), utf8_tohtml(hsc($line['text']))), $line);
307*5153720fSfkaag71                }
308*5153720fSfkaag71            }
309*5153720fSfkaag71        }
310*5153720fSfkaag71
311*5153720fSfkaag71        //handle considering
312*5153720fSfkaag71        if(count($considering)) {
313*5153720fSfkaag71            if(count($considering) > 1) {
314*5153720fSfkaag71                $this->_fail($this->getLang('error_query_multiconsidering'), $considering);
315*5153720fSfkaag71            }
316*5153720fSfkaag71
317*5153720fSfkaag71            foreach($considering[0]['cs'] as $line) {
318*5153720fSfkaag71                if($this->isGroup($line)) {
319*5153720fSfkaag71                    $this->_fail($this->getLang('error_query_considerblock'), $line);
320*5153720fSfkaag71                }
321*5153720fSfkaag71
322*5153720fSfkaag71                if(preg_match("/^({$p->variable})$/",utf8_trim($line['text']),$match)) {
323*5153720fSfkaag71                    $var = $p->variable($match[1]);
324*5153720fSfkaag71                    if(!in_array($var->name, $scope)) {
325*5153720fSfkaag71                        $this->_fail(sprintf($this->getLang('error_query_considervar'),utf8_tohtml(hsc($var->name))), $line);
326*5153720fSfkaag71                    }
327*5153720fSfkaag71
328*5153720fSfkaag71                    $result['considering'][] = $var->name;
329*5153720fSfkaag71                } else {
330*5153720fSfkaag71                    $this->_fail(sprintf($this->getLang('error_query_considerline'), utf8_tohtml(hsc($line['text']))), $line);
331*5153720fSfkaag71                }
332*5153720fSfkaag71            }
333*5153720fSfkaag71        }
334*5153720fSfkaag71
335*5153720fSfkaag71        foreach($projection as $var) {
336*5153720fSfkaag71            if(!in_array($var, $scope)) {
337*5153720fSfkaag71                $this->_fail(sprintf($this->getLang('error_query_selectvar'), utf8_tohtml(hsc($var))));
338*5153720fSfkaag71            }
339*5153720fSfkaag71        }
340*5153720fSfkaag71
341*5153720fSfkaag71        // return final query structure
342*5153720fSfkaag71        return array($result, $scope);
343*5153720fSfkaag71    }
344*5153720fSfkaag71
345*5153720fSfkaag71    /**
346*5153720fSfkaag71     * Transforms a full query group.
347*5153720fSfkaag71     *
348*5153720fSfkaag71     * @param root array the tree to transform
349*5153720fSfkaag71     * @param typemap array the type information
350*5153720fSfkaag71     * @return the transformed group and a list of in-scope variables
351*5153720fSfkaag71     */
352*5153720fSfkaag71    function transformGroup(&$root, &$typemap) {
353*5153720fSfkaag71        // extract patterns and split them in triples and filters
354*5153720fSfkaag71        $patterns = $this->extractText($root);
355*5153720fSfkaag71
356*5153720fSfkaag71        // extract union groups
357*5153720fSfkaag71        $unions = $this->extractGroups($root, 'union');
358*5153720fSfkaag71
359*5153720fSfkaag71        // extract minus groups
360*5153720fSfkaag71        $minuses = $this->extractGroups($root,'minus');
361*5153720fSfkaag71
362*5153720fSfkaag71        // extract optional groups
363*5153720fSfkaag71        $optionals = $this->extractGroups($root,'optional');
364*5153720fSfkaag71
365*5153720fSfkaag71        // check for leftovers
366*5153720fSfkaag71        if(count($root['cs'])) {
367*5153720fSfkaag71            $this->_fail(sprintf($this->getLang('error_query_group'),( isset($root['cs'][0]['tag']) ? sprintf($this->getLang('named_group'), utf8_tohtml(hsc($root['cs'][0]['tag']))) : $this->getLang('unnamed_group'))), $root['cs']);
368*5153720fSfkaag71        }
369*5153720fSfkaag71
370*5153720fSfkaag71        // split patterns into triples and filters
371*5153720fSfkaag71        list($patterns, $filters, $scope) = $this->transformPatterns($patterns, $typemap);
372*5153720fSfkaag71
373*5153720fSfkaag71        // convert each union into a pattern
374*5153720fSfkaag71        foreach($unions as $union) {
375*5153720fSfkaag71            list($u, $s) = $this->transformUnion($union, $typemap);
376*5153720fSfkaag71            $scope = array_merge($scope, $s);
377*5153720fSfkaag71            $patterns[] = $u;
378*5153720fSfkaag71        }
379*5153720fSfkaag71
380*5153720fSfkaag71        if(count($patterns) == 0) {
381*5153720fSfkaag71            $this->_fail(sprintf($this->getLang('error_query_grouppattern')), $root);
382*5153720fSfkaag71        }
383*5153720fSfkaag71
384*5153720fSfkaag71        // chain all patterns with ANDs
385*5153720fSfkaag71        $result = array_shift($patterns);
386*5153720fSfkaag71        foreach($patterns as $pattern) {
387*5153720fSfkaag71            $result = array(
388*5153720fSfkaag71                'type'=>'and',
389*5153720fSfkaag71                'lhs'=>$result,
390*5153720fSfkaag71                'rhs'=>$pattern
391*5153720fSfkaag71            );
392*5153720fSfkaag71        }
393*5153720fSfkaag71
394*5153720fSfkaag71        // apply all optionals
395*5153720fSfkaag71        if(count($optionals)) {
396*5153720fSfkaag71            foreach($optionals as $optional) {
397*5153720fSfkaag71                // convert eacfh optional
398*5153720fSfkaag71                list($optional, $s) = $this->transformGroup($optional, $typemap);
399*5153720fSfkaag71                $scope = array_merge($scope, $s);
400*5153720fSfkaag71                $result = array(
401*5153720fSfkaag71                    'type'=>'optional',
402*5153720fSfkaag71                    'lhs'=>$result,
403*5153720fSfkaag71                    'rhs'=>$optional
404*5153720fSfkaag71                );
405*5153720fSfkaag71            }
406*5153720fSfkaag71        }
407*5153720fSfkaag71
408*5153720fSfkaag71
409*5153720fSfkaag71        // add all filters; these are a bit weird, as only a single FILTER is really supported
410*5153720fSfkaag71        // (we have defined multiple filters as being a conjunction)
411*5153720fSfkaag71        if(count($filters)) {
412*5153720fSfkaag71            foreach($filters as $f) {
413*5153720fSfkaag71                $line = $f['_line'];
414*5153720fSfkaag71                unset($f['_line']);
415*5153720fSfkaag71                if($f['lhs']['type'] == 'variable' && !in_array($f['lhs']['text'], $scope)) {
416*5153720fSfkaag71                    $this->_fail(sprintf($this->getLang('error_query_filterscope'),utf8_tohtml(hsc($f['lhs']['text']))), $line);
417*5153720fSfkaag71                }
418*5153720fSfkaag71                if($f['rhs']['type'] == 'variable' && !in_array($f['rhs']['text'], $scope)) {
419*5153720fSfkaag71                    $this->_fail(sprintf($this->getLang('error_query_filterscope'),utf8_tohtml(hsc($f['rhs']['text']))), $line);
420*5153720fSfkaag71                }
421*5153720fSfkaag71            }
422*5153720fSfkaag71
423*5153720fSfkaag71            $result = array(
424*5153720fSfkaag71                'type'=>'filter',
425*5153720fSfkaag71                'lhs'=>$result,
426*5153720fSfkaag71                'rhs'=>$filters
427*5153720fSfkaag71            );
428*5153720fSfkaag71        }
429*5153720fSfkaag71
430*5153720fSfkaag71        // apply all minuses
431*5153720fSfkaag71        if(count($minuses)) {
432*5153720fSfkaag71            foreach($minuses as $minus) {
433*5153720fSfkaag71                // convert each minus, and discard their scope
434*5153720fSfkaag71                list($minus, $s) = $this->transformGroup($minus, $typemap);
435*5153720fSfkaag71                $result = array(
436*5153720fSfkaag71                    'type'=>'minus',
437*5153720fSfkaag71                    'lhs'=>$result,
438*5153720fSfkaag71                    'rhs'=>$minus
439*5153720fSfkaag71                );
440*5153720fSfkaag71            }
441*5153720fSfkaag71        }
442*5153720fSfkaag71
443*5153720fSfkaag71        return array($result, $scope);
444*5153720fSfkaag71    }
445*5153720fSfkaag71
446*5153720fSfkaag71    /**
447*5153720fSfkaag71     * Transforms a union group with multiple subgroups
448*5153720fSfkaag71     *
449*5153720fSfkaag71     * @param root array the union group to transform
450*5153720fSfkaag71     * @param typemap array the type information
451*5153720fSfkaag71     * @return the transformed group and a list of in-scope variables
452*5153720fSfkaag71     */
453*5153720fSfkaag71    function transformUnion(&$root, &$typemap) {
454*5153720fSfkaag71        // fetch all child patterns
455*5153720fSfkaag71        $subs = $this->extractGroups($root,null);
456*5153720fSfkaag71
457*5153720fSfkaag71        // do sanity checks
458*5153720fSfkaag71        if(count($root['cs'])) {
459*5153720fSfkaag71            $this->_fail($this->getLang('error_query_unionblocks'), $root['cs']);
460*5153720fSfkaag71        }
461*5153720fSfkaag71
462*5153720fSfkaag71        if(count($subs) < 2) {
463*5153720fSfkaag71            $this->_fail($this->getLang('error_query_unionreq'), $root);
464*5153720fSfkaag71        }
465*5153720fSfkaag71
466*5153720fSfkaag71        // transform the first group
467*5153720fSfkaag71        list($result,$scope) = $this->transformGroup(array_shift($subs), $typemap);
468*5153720fSfkaag71
469*5153720fSfkaag71        // transform each subsequent group
470*5153720fSfkaag71        foreach($subs as $sub) {
471*5153720fSfkaag71            list($rhs, $s) = $this->transformGroup($sub, $typemap);
472*5153720fSfkaag71            $scope = array_merge($scope, $s);
473*5153720fSfkaag71            $result = array(
474*5153720fSfkaag71                'type'=>'union',
475*5153720fSfkaag71                'lhs'=>$result,
476*5153720fSfkaag71                'rhs'=>$rhs
477*5153720fSfkaag71            );
478*5153720fSfkaag71        }
479*5153720fSfkaag71
480*5153720fSfkaag71        return array($result, $scope);
481*5153720fSfkaag71    }
482*5153720fSfkaag71
483*5153720fSfkaag71    /**
484*5153720fSfkaag71     * Transforms a list of patterns into a list of triples and a
485*5153720fSfkaag71     * list of filters.
486*5153720fSfkaag71     *
487*5153720fSfkaag71     * @param lines array a list of lines to transform
488*5153720fSfkaag71     * @param typemap array the type information
489*5153720fSfkaag71     * @return a list of triples, a list of filters and a list of in-scope variables
490*5153720fSfkaag71     */
491*5153720fSfkaag71    function transformPatterns(&$lines, &$typemap) {
492*5153720fSfkaag71        // we need this to resolve things
493*5153720fSfkaag71        global $ID;
494*5153720fSfkaag71
495*5153720fSfkaag71        // we need patterns
496*5153720fSfkaag71        $p = $this->getPatterns();
497*5153720fSfkaag71
498*5153720fSfkaag71        // result holders
499*5153720fSfkaag71        $scope = array();
500*5153720fSfkaag71        $triples = array();
501*5153720fSfkaag71        $filters = array();
502*5153720fSfkaag71
503*5153720fSfkaag71        foreach($lines as $lineNode) {
504*5153720fSfkaag71            $line = trim($lineNode['text']);
505*5153720fSfkaag71
506*5153720fSfkaag71            // [grammar] TRIPLEPATTERN := (VARIABLE|REFLIT) ' ' (VARIABLE|PREDICATE) TYPE? : ANY
507*5153720fSfkaag71            if(preg_match("/^({$p->variable}|{$p->reflit})\s+({$p->variable}|{$p->predicate})\s*({$p->type})?\s*:\s*({$p->any})$/S",$line,$match)) {
508*5153720fSfkaag71                list(, $subject, $predicate, $type, $object) = $match;
509*5153720fSfkaag71
510*5153720fSfkaag71                $subject = utf8_trim($subject);
511*5153720fSfkaag71                if($subject[0] == '?') {
512*5153720fSfkaag71                    $subject = $this->variable($subject);
513*5153720fSfkaag71                    $scope[] = $subject['text'];
514*5153720fSfkaag71                    $this->updateTypemap($typemap, $subject['text'], 'ref');
515*5153720fSfkaag71                } else {
516*5153720fSfkaag71                    global $ID;
517*5153720fSfkaag71                    $subject = $p->reflit($subject)->reference;
518*5153720fSfkaag71                    $subject = $this->util->loadType('ref')->normalize($subject,null);
519*5153720fSfkaag71                    $subject = $this->literal($subject);
520*5153720fSfkaag71                }
521*5153720fSfkaag71
522*5153720fSfkaag71                $predicate = utf8_trim($predicate);
523*5153720fSfkaag71                if($predicate[0] == '?') {
524*5153720fSfkaag71                    $predicate = $this->variable($predicate);
525*5153720fSfkaag71                    $scope[] = $predicate['text'];
526*5153720fSfkaag71                    $this->updateTypemap($typemap, $predicate['text'], 'text');
527*5153720fSfkaag71                } else {
528*5153720fSfkaag71                    $predicate = $this->literal($this->util->normalizePredicate($predicate));
529*5153720fSfkaag71                }
530*5153720fSfkaag71
531*5153720fSfkaag71                $object = utf8_trim($object);
532*5153720fSfkaag71                if($object[0] == '?') {
533*5153720fSfkaag71                    // match a proper type variable
534*5153720fSfkaag71                    if(preg_match("/^({$p->variable})\s*({$p->type})?$/",$object,$captures)!=1) {
535*5153720fSfkaag71                        $this->_fail($this->getLang('error_pattern_garbage'),$lineNode);
536*5153720fSfkaag71                    }
537*5153720fSfkaag71                    list(, $var, $vtype) = $captures;
538*5153720fSfkaag71
539*5153720fSfkaag71                    // create the object node
540*5153720fSfkaag71                    $object = $this->variable($var);
541*5153720fSfkaag71                    $scope[] = $object['text'];
542*5153720fSfkaag71
543*5153720fSfkaag71                    // try direct type first, implied type second
544*5153720fSfkaag71                    $vtype = $p->type($vtype);
545*5153720fSfkaag71                    $type = $p->type($type);
546*5153720fSfkaag71                    $this->updateTypemap($typemap, $object['text'], $vtype->type, $vtype->hint);
547*5153720fSfkaag71                    $this->updateTypemap($typemap, $object['text'], $type->type, $type->hint);
548*5153720fSfkaag71                } else {
549*5153720fSfkaag71                    // check for empty string token
550*5153720fSfkaag71                    if($object == '[[]]') {
551*5153720fSfkaag71                        $object='';
552*5153720fSfkaag71                    }
553*5153720fSfkaag71                    if(!$type) {
554*5153720fSfkaag71                        list($type, $hint) = $this->util->getDefaultType();
555*5153720fSfkaag71                    } else {
556*5153720fSfkaag71                        $type = $p->type($type);
557*5153720fSfkaag71                        $hint = $type->hint;
558*5153720fSfkaag71                        $type = $type->type;
559*5153720fSfkaag71                    }
560*5153720fSfkaag71                    $type = $this->util->loadType($type);
561*5153720fSfkaag71                    $object = $this->literal($type->normalize($object,$hint));
562*5153720fSfkaag71                }
563*5153720fSfkaag71
564*5153720fSfkaag71                $triples[] = array('type'=>'triple','subject'=>$subject, 'predicate'=>$predicate, 'object'=>$object);
565*5153720fSfkaag71
566*5153720fSfkaag71            // [grammar] FILTER := VARIABLE TYPE? OPERATOR VARIABLE TYPE?
567*5153720fSfkaag71            } elseif(preg_match("/^({$p->variable})\s*({$p->type})?\s*({$p->operator})\s*({$p->variable})\s*({$p->type})?$/S",$line, $match)) {
568*5153720fSfkaag71                list(,$lhs, $ltype, $operator, $rhs, $rtype) = $match;
569*5153720fSfkaag71
570*5153720fSfkaag71                $lhs = $this->variable($lhs);
571*5153720fSfkaag71                $rhs = $this->variable($rhs);
572*5153720fSfkaag71
573*5153720fSfkaag71                if($operator == '~>' || $operator == '!~>') $operator = str_replace('~>','^~',$operator);
574*5153720fSfkaag71
575*5153720fSfkaag71                // do type information propagation
576*5153720fSfkaag71                $rtype = $p->type($rtype);
577*5153720fSfkaag71                $ltype = $p->type($ltype);
578*5153720fSfkaag71
579*5153720fSfkaag71                if($ltype) {
580*5153720fSfkaag71                    // left has a defined type, so update the map
581*5153720fSfkaag71                    $this->updateTypemap($typemap, $lhs['text'], $ltype->type, $ltype->hint);
582*5153720fSfkaag71
583*5153720fSfkaag71                    // and propagate to right if possible
584*5153720fSfkaag71                    if(!$rtype) {
585*5153720fSfkaag71                        $this->updateTypemap($typemap, $rhs['text'], $ltype->type, $lhint->hint);
586*5153720fSfkaag71                    }
587*5153720fSfkaag71                }
588*5153720fSfkaag71                if($rtype) {
589*5153720fSfkaag71                    // right has a defined type, so update the map
590*5153720fSfkaag71                    $this->updateTypemap($typemap, $rhs['text'], $rtype->type, $rtype->hint);
591*5153720fSfkaag71
592*5153720fSfkaag71                    // and propagate to left if possible
593*5153720fSfkaag71                    if(!$ltype) {
594*5153720fSfkaag71                        $this->updateTypemap($typemap, $lhs['text'], $rtype->type, $rtype->hint);
595*5153720fSfkaag71                    }
596*5153720fSfkaag71                }
597*5153720fSfkaag71
598*5153720fSfkaag71                $filters[] = array('type'=>'filter', 'lhs'=>$lhs, 'operator'=>$operator, 'rhs'=>$rhs, '_line'=>$lineNode);
599*5153720fSfkaag71
600*5153720fSfkaag71            // [grammar] FILTER := VARIABLE TYPE? OPERATOR ANY
601*5153720fSfkaag71            } elseif(preg_match("/^({$p->variable})\s*({$p->type})?\s*({$p->operator})\s*({$p->any})$/S",$line, $match)) {
602*5153720fSfkaag71
603*5153720fSfkaag71                // filter pattern
604*5153720fSfkaag71                list(, $lhs,$ltype,$operator,$rhs) = $match;
605*5153720fSfkaag71
606*5153720fSfkaag71                $lhs = $this->variable($lhs);
607*5153720fSfkaag71
608*5153720fSfkaag71                // update typemap if a type was defined
609*5153720fSfkaag71                list($type,$hint) = $p->type($ltype);
610*5153720fSfkaag71                if($type) {
611*5153720fSfkaag71                    $this->updateTypemap($typemap, $lhs['text'],$type,$hint);
612*5153720fSfkaag71                } else {
613*5153720fSfkaag71                    // use the already declared type if no type was defined
614*5153720fSfkaag71                    if(!empty($typemap[$lhs['text']])) {
615*5153720fSfkaag71                        extract($typemap[$lhs['text']]);
616*5153720fSfkaag71                    } else {
617*5153720fSfkaag71                        list($type, $hint) = $this->util->getDefaultType();
618*5153720fSfkaag71                    }
619*5153720fSfkaag71                }
620*5153720fSfkaag71
621*5153720fSfkaag71                // check for empty string token
622*5153720fSfkaag71                if($rhs == '[[]]') {
623*5153720fSfkaag71                    $rhs = '';
624*5153720fSfkaag71                }
625*5153720fSfkaag71
626*5153720fSfkaag71                // special case: the right hand side of the 'in' operator always normalizes with the 'text' type
627*5153720fSfkaag71                if($operator == '~>' || $operator == '!~>') {
628*5153720fSfkaag71                    $operator = str_replace('~>','^~', $operator);
629*5153720fSfkaag71                    $type = 'text';
630*5153720fSfkaag71                    unset($hint);
631*5153720fSfkaag71                }
632*5153720fSfkaag71
633*5153720fSfkaag71                // normalize
634*5153720fSfkaag71                $type = $this->util->loadType($type);
635*5153720fSfkaag71                $rhs = $this->literal($type->normalize($rhs,$hint));
636*5153720fSfkaag71
637*5153720fSfkaag71                $filters[] = array('type'=>'filter','lhs'=>$lhs, 'operator'=>$operator, 'rhs'=>$rhs, '_line'=>$lineNode);
638*5153720fSfkaag71
639*5153720fSfkaag71            // [grammar] FILTER := ANY OPERATOR VARIABLE TYPE?
640*5153720fSfkaag71            } elseif(preg_match("/^({$p->any})\s*({$p->operator})\s*({$p->variable})\s*({$p->type})?$/S",$line, $match)) {
641*5153720fSfkaag71                list(, $lhs,$operator,$rhs,$rtype) = $match;
642*5153720fSfkaag71
643*5153720fSfkaag71                $rhs = $this->variable($rhs);
644*5153720fSfkaag71
645*5153720fSfkaag71                // update typemap if a type was defined
646*5153720fSfkaag71                list($type, $hint) = $p->type($rtype);
647*5153720fSfkaag71                if($type) {
648*5153720fSfkaag71                    $this->updateTypemap($typemap, $rhs['text'],$type,$hint);
649*5153720fSfkaag71                } else {
650*5153720fSfkaag71                    // use the already declared type if no type was defined
651*5153720fSfkaag71                    if(!empty($typemap[$rhs['text']])) {
652*5153720fSfkaag71                        extract($typemap[$rhs['text']]);
653*5153720fSfkaag71                    } else {
654*5153720fSfkaag71                        list($type, $hint) = $this->util->getDefaultType();
655*5153720fSfkaag71                    }
656*5153720fSfkaag71                }
657*5153720fSfkaag71
658*5153720fSfkaag71                // check for empty string token
659*5153720fSfkaag71                if($lhs == '[[]]') {
660*5153720fSfkaag71                    $lhs = '';
661*5153720fSfkaag71                }
662*5153720fSfkaag71
663*5153720fSfkaag71                // special case: the left hand side of the 'in' operator always normalizes with the 'page' type
664*5153720fSfkaag71                if($operator == '~>' || $operator == '!~>') {
665*5153720fSfkaag71                    $operator = str_replace('~>','^~', $operator);
666*5153720fSfkaag71                    $type = 'page';
667*5153720fSfkaag71                    unset($hint);
668*5153720fSfkaag71                }
669*5153720fSfkaag71
670*5153720fSfkaag71                // normalize
671*5153720fSfkaag71                $type = $this->util->loadType($type);
672*5153720fSfkaag71                $lhs = $this->literal($type->normalize($lhs,$hint));
673*5153720fSfkaag71
674*5153720fSfkaag71                $filters[] = array('type'=>'filter','lhs'=>$lhs, 'operator'=>$operator, 'rhs'=>$rhs, '_line'=>$lineNode);
675*5153720fSfkaag71            } else {
676*5153720fSfkaag71                // unknown lines are fail
677*5153720fSfkaag71                $this->_fail(sprintf($this->getLang('error_query_pattern'),utf8_tohtml(hsc($line))), $lineNode);
678*5153720fSfkaag71            }
679*5153720fSfkaag71        }
680*5153720fSfkaag71
681*5153720fSfkaag71        return array($triples, $filters, $scope);
682*5153720fSfkaag71    }
683*5153720fSfkaag71
684*5153720fSfkaag71    function getFields(&$tree, &$typemap) {
685*5153720fSfkaag71        $fields = array();
686*5153720fSfkaag71
687*5153720fSfkaag71        // extract the projection information in 'long syntax' if available
688*5153720fSfkaag71        $fieldsGroups = $this->extractGroups($tree, 'fields');
689*5153720fSfkaag71
690*5153720fSfkaag71        // parse 'long syntax' if we don't have projection information yet
691*5153720fSfkaag71        if(count($fieldsGroups)) {
692*5153720fSfkaag71            if(count($fieldsGroups) > 1) {
693*5153720fSfkaag71                $this->_fail($this->getLang('error_query_fieldsgroups'), $fieldsGroups);
694*5153720fSfkaag71            }
695*5153720fSfkaag71
696*5153720fSfkaag71            $fieldsLines = $this->extractText($fieldsGroups[0]);
697*5153720fSfkaag71            if(count($fieldsGroups[0]['cs'])) {
698*5153720fSfkaag71                $this->_fail(sprintf($this->getLang('error_query_fieldsblock'),( isset($fieldsGroups[0]['cs'][0]['tag']) ? sprintf($this->getLang('named_group'),hsc($fieldsGroups[0]['cs'][0]['tag'])) : $this->getLang('unnamed_group'))), $fieldsGroups[0]['cs']);
699*5153720fSfkaag71            }
700*5153720fSfkaag71            $fields = $this->parseFieldsLong($fieldsLines, $typemap);
701*5153720fSfkaag71            if(!$fields) return array();
702*5153720fSfkaag71        }
703*5153720fSfkaag71
704*5153720fSfkaag71        return $fields;
705*5153720fSfkaag71    }
706*5153720fSfkaag71
707*5153720fSfkaag71    /**
708*5153720fSfkaag71     * Parses a projection group in 'long syntax'.
709*5153720fSfkaag71     */
710*5153720fSfkaag71    function parseFieldsLong($lines, &$typemap) {
711*5153720fSfkaag71        $p = $this->getPatterns();
712*5153720fSfkaag71        $result = array();
713*5153720fSfkaag71
714*5153720fSfkaag71        foreach($lines as $lineNode) {
715*5153720fSfkaag71            $line = trim($lineNode['text']);
716*5153720fSfkaag71            // FIELDLONG := VARIABLE AGGREGATE? TYPE? (':' ANY)?
717*5153720fSfkaag71            if(preg_match("/^({$p->variable})\s*({$p->aggregate})?\s*({$p->type})?(?:\s*(:)\s*({$p->any})?\s*)?$/S",$line, $match)) {
718*5153720fSfkaag71                list(, $var, $vaggregate, $vtype, $nocaphint, $caption) = $match;
719*5153720fSfkaag71                $variable = $p->variable($var)->name;
720*5153720fSfkaag71                if(!$nocaphint || (!$nocaphint && !$caption)) $caption = ucfirst($variable);
721*5153720fSfkaag71
722*5153720fSfkaag71                list($type,$hint) = $p->type($vtype);
723*5153720fSfkaag71                list($agg,$agghint) = $p->aggregate($vaggregate);
724*5153720fSfkaag71
725*5153720fSfkaag71                $this->updateTypemap($typemap, $variable, $type, $hint);
726*5153720fSfkaag71                $result[] = array('variable'=>$variable,'caption'=>$caption, 'aggregate'=>$agg, 'aggregateHint'=>$agghint, 'type'=>$type, 'hint'=>$hint);
727*5153720fSfkaag71            } else {
728*5153720fSfkaag71                $this->_fail(sprintf($this->getLang('error_query_fieldsline'),utf8_tohtml(hsc($line))), $lineNode);
729*5153720fSfkaag71            }
730*5153720fSfkaag71        }
731*5153720fSfkaag71
732*5153720fSfkaag71        return $result;
733*5153720fSfkaag71    }
734*5153720fSfkaag71
735*5153720fSfkaag71    /**
736*5153720fSfkaag71     * Parses a projection group in 'short syntax'.
737*5153720fSfkaag71     */
738*5153720fSfkaag71    function parseFieldsShort($line, &$typemap) {
739*5153720fSfkaag71        $p = $this->getPatterns();
740*5153720fSfkaag71        $result = array();
741*5153720fSfkaag71
742*5153720fSfkaag71        // FIELDSHORT := VARIABLE AGGREGATE? TYPE? CAPTION?
743*5153720fSfkaag71        if(preg_match_all("/\s*({$p->variable})\s*({$p->aggregate})?\s*({$p->type})?\s*(?:(\")([^\"]*)\")?/",$line,$match, PREG_SET_ORDER)) {
744*5153720fSfkaag71            foreach($match as $m) {
745*5153720fSfkaag71                list(, $var, $vaggregate, $vtype, $caption_indicator, $caption) = $m;
746*5153720fSfkaag71                $variable = $p->variable($var)->name;
747*5153720fSfkaag71                list($type, $hint) = $p->type($vtype);
748*5153720fSfkaag71                list($agg, $agghint) = $p->aggregate($vaggregate);
749*5153720fSfkaag71                if(!$caption_indicator) $caption = ucfirst($variable);
750*5153720fSfkaag71                $this->updateTypemap($typemap, $variable, $type, $hint);
751*5153720fSfkaag71                $result[] = array('variable'=>$variable,'caption'=>$caption, 'aggregate'=>$agg, 'aggregateHint'=>$agghint, 'type'=>$type, 'hint'=>$hint);
752*5153720fSfkaag71            }
753*5153720fSfkaag71        }
754*5153720fSfkaag71
755*5153720fSfkaag71        return $result;
756*5153720fSfkaag71    }
757*5153720fSfkaag71
758*5153720fSfkaag71    /**
759*5153720fSfkaag71     * Returns the regex pattern used by the 'short syntax' for projection. This methods can
760*5153720fSfkaag71     * be used to get a dokuwiki-lexer-safe regex to embed into your own syntax pattern.
761*5153720fSfkaag71     *
762*5153720fSfkaag71     * @param captions boolean Whether the pattern should include caption matching (defaults to true)
763*5153720fSfkaag71     */
764*5153720fSfkaag71    function fieldsShortPattern($captions = true) {
765*5153720fSfkaag71        $p = $this->getPatterns();
766*5153720fSfkaag71        return "(?:\s*{$p->variable}\s*{$p->aggregate}?\s*{$p->type}?".($captions?'\s*(?:"[^"]*")?':'').")";
767*5153720fSfkaag71    }
768*5153720fSfkaag71
769*5153720fSfkaag71    /**
770*5153720fSfkaag71     * Constructs a tagged tree from the given list of lines.
771*5153720fSfkaag71     *
772*5153720fSfkaag71     * @return a tagged tree
773*5153720fSfkaag71     */
774*5153720fSfkaag71    function constructTree($lines, $what) {
775*5153720fSfkaag71        $root = array(
776*5153720fSfkaag71            'tag'=>'',
777*5153720fSfkaag71            'cs'=>array(),
778*5153720fSfkaag71            'start'=>1,
779*5153720fSfkaag71            'end'=>1
780*5153720fSfkaag71        );
781*5153720fSfkaag71
782*5153720fSfkaag71        $stack = array();
783*5153720fSfkaag71        $stack[] =& $root;
784*5153720fSfkaag71        $top = count($stack)-1;
785*5153720fSfkaag71        $lineCount = 0;
786*5153720fSfkaag71
787*5153720fSfkaag71        foreach($lines as $line) {
788*5153720fSfkaag71            $lineCount++;
789*5153720fSfkaag71            if($this->ignorableLine($line)) continue;
790*5153720fSfkaag71
791*5153720fSfkaag71            if(preg_match('/^([^\{]*) *{$/',utf8_trim($line),$match)) {
792*5153720fSfkaag71                list(, $tag) = $match;
793*5153720fSfkaag71                $tag = utf8_trim($tag);
794*5153720fSfkaag71
795*5153720fSfkaag71                $stack[$top]['cs'][] = array(
796*5153720fSfkaag71                    'tag'=>$tag?:null,
797*5153720fSfkaag71                    'cs'=>array(),
798*5153720fSfkaag71                    'start'=>$lineCount,
799*5153720fSfkaag71                    'end'=>0
800*5153720fSfkaag71                );
801*5153720fSfkaag71                $stack[] =& $stack[$top]['cs'][count($stack[$top]['cs'])-1];
802*5153720fSfkaag71                $top = count($stack)-1;
803*5153720fSfkaag71
804*5153720fSfkaag71            } elseif(preg_match('/^}$/',utf8_trim($line))) {
805*5153720fSfkaag71                $stack[$top]['end'] = $lineCount;
806*5153720fSfkaag71                array_pop($stack);
807*5153720fSfkaag71                $top = count($stack)-1;
808*5153720fSfkaag71
809*5153720fSfkaag71            } else {
810*5153720fSfkaag71                $stack[$top]['cs'][] = array(
811*5153720fSfkaag71                    'text'=>$line,
812*5153720fSfkaag71                    'start'=>$lineCount,
813*5153720fSfkaag71                    'end'=>$lineCount
814*5153720fSfkaag71                );
815*5153720fSfkaag71            }
816*5153720fSfkaag71        }
817*5153720fSfkaag71
818*5153720fSfkaag71        if(count($stack) != 1 || $stack[0] != $root) {
819*5153720fSfkaag71            msg(sprintf($this->getLang('error_syntax_braces'),$what),-1);
820*5153720fSfkaag71        }
821*5153720fSfkaag71
822*5153720fSfkaag71        $root['end'] = $lineCount;
823*5153720fSfkaag71
824*5153720fSfkaag71        return $root;
825*5153720fSfkaag71    }
826*5153720fSfkaag71
827*5153720fSfkaag71    /**
828*5153720fSfkaag71     * Renders a debug display of the syntax.
829*5153720fSfkaag71     *
830*5153720fSfkaag71     * @param lines array the lines that form the syntax
831*5153720fSfkaag71     * @param region array the region to highlight
832*5153720fSfkaag71     * @return a string with markup
833*5153720fSfkaag71     */
834*5153720fSfkaag71    function debugTree($lines, $regions) {
835*5153720fSfkaag71        $result = '';
836*5153720fSfkaag71        $lineCount = 0;
837*5153720fSfkaag71        $count = 0;
838*5153720fSfkaag71
839*5153720fSfkaag71        foreach($lines as $line) {
840*5153720fSfkaag71            $lineCount++;
841*5153720fSfkaag71
842*5153720fSfkaag71            foreach($regions as $region) {
843*5153720fSfkaag71                if($lineCount == $region['start']) {
844*5153720fSfkaag71                    if($count == 0) $result .= '<div class="strata-debug-highlight">';
845*5153720fSfkaag71                    $count++;
846*5153720fSfkaag71                }
847*5153720fSfkaag71
848*5153720fSfkaag71                if($lineCount == $region['end']+1) {
849*5153720fSfkaag71                    $count--;
850*5153720fSfkaag71
851*5153720fSfkaag71                    if($count==0) $result .= '</div>';
852*5153720fSfkaag71                }
853*5153720fSfkaag71            }
854*5153720fSfkaag71
855*5153720fSfkaag71            if($line != '') {
856*5153720fSfkaag71                $result .= '<div class="strata-debug-line">'.hsc($line).'</div>'."\n";
857*5153720fSfkaag71            } else {
858*5153720fSfkaag71                $result .= '<div class="strata-debug-line"><br/></div>'."\n";
859*5153720fSfkaag71            }
860*5153720fSfkaag71        }
861*5153720fSfkaag71
862*5153720fSfkaag71        if($count > 0) {
863*5153720fSfkaag71            $result .= '</div>';
864*5153720fSfkaag71        }
865*5153720fSfkaag71
866*5153720fSfkaag71        return '<div class="strata-debug">'.$result.'</div>';
867*5153720fSfkaag71    }
868*5153720fSfkaag71
869*5153720fSfkaag71    /**
870*5153720fSfkaag71     * Extract all occurences of tagged groups from the given tree.
871*5153720fSfkaag71     * This method does not remove the tagged groups from subtrees of
872*5153720fSfkaag71     * the given root.
873*5153720fSfkaag71     *
874*5153720fSfkaag71     * @param root array the tree to operate on
875*5153720fSfkaag71     * @param tag string the tag to remove
876*5153720fSfkaag71     * @return an array of groups
877*5153720fSfkaag71     */
878*5153720fSfkaag71    function extractGroups(&$root, $tag) {
879*5153720fSfkaag71        $result = array();
880*5153720fSfkaag71        $to_remove = array();
881*5153720fSfkaag71        foreach($root['cs'] as $i=>&$tree) {
882*5153720fSfkaag71            if(!$this->isGroup($tree)) continue;
883*5153720fSfkaag71            if($tree['tag'] == $tag || (($tag=='' || $tag==null) && $tree['tag'] == null) ) {
884*5153720fSfkaag71                $result[] =& $tree;
885*5153720fSfkaag71                $to_remove[] = $i;
886*5153720fSfkaag71            }
887*5153720fSfkaag71        }
888*5153720fSfkaag71        // invert order of to_remove to always remove higher indices first
889*5153720fSfkaag71        rsort($to_remove);
890*5153720fSfkaag71        foreach($to_remove as $i) {
891*5153720fSfkaag71            array_splice($root['cs'],$i,1);
892*5153720fSfkaag71        }
893*5153720fSfkaag71        return $result;
894*5153720fSfkaag71    }
895*5153720fSfkaag71
896*5153720fSfkaag71    /**
897*5153720fSfkaag71     * Extracts all text elements from the given tree.
898*5153720fSfkaag71     * This method does not remove the text elements from subtrees
899*5153720fSfkaag71     * of the root.
900*5153720fSfkaag71     *
901*5153720fSfkaag71     * @param root array the tree to operate on
902*5153720fSfkaag71     * @return array an array of text elements
903*5153720fSfkaag71     */
904*5153720fSfkaag71    function extractText(&$root) {
905*5153720fSfkaag71        $result = array();
906*5153720fSfkaag71        $to_remove = array();
907*5153720fSfkaag71        foreach($root['cs'] as $i=>&$tree) {
908*5153720fSfkaag71            if(!$this->isText($tree)) continue;
909*5153720fSfkaag71            $result[] =& $tree;
910*5153720fSfkaag71            $to_remove[] = $i;
911*5153720fSfkaag71        }
912*5153720fSfkaag71        // invert order of to_remove to always remove higher indices first
913*5153720fSfkaag71        rsort($to_remove);
914*5153720fSfkaag71        foreach($to_remove as $i) {
915*5153720fSfkaag71            array_splice($root['cs'],$i,1);
916*5153720fSfkaag71        }
917*5153720fSfkaag71        return $result;
918*5153720fSfkaag71    }
919*5153720fSfkaag71
920*5153720fSfkaag71    /**
921*5153720fSfkaag71     * Returns whether the given node is a line.
922*5153720fSfkaag71     */
923*5153720fSfkaag71    function isText(&$node) {
924*5153720fSfkaag71        return array_key_exists('text', $node);
925*5153720fSfkaag71    }
926*5153720fSfkaag71
927*5153720fSfkaag71    /**
928*5153720fSfkaag71     * Returns whether the given node is a group.
929*5153720fSfkaag71     */
930*5153720fSfkaag71    function isGroup(&$node) {
931*5153720fSfkaag71        return array_key_exists('tag', $node);
932*5153720fSfkaag71    }
933*5153720fSfkaag71
934*5153720fSfkaag71    /**
935*5153720fSfkaag71     * Sets all properties given as '$properties' to the values parsed from '$trees'.
936*5153720fSfkaag71     *
937*5153720fSfkaag71     * The property array has as keys all possible properties, which are specified by its
938*5153720fSfkaag71     * values. Such specification is an array that may have the following keys, with the
939*5153720fSfkaag71     * described values:
940*5153720fSfkaag71     * - choices: array of possible values, where the keys are the internally used values
941*5153720fSfkaag71     *     and the values specify synonyms for the choice, of which the first listed one
942*5153720fSfkaag71     *     is most common. For example: 'true' => array('yes', 'yeah') specifies that the
943*5153720fSfkaag71     *     user can choose 'yes' or 'yeah' (of which 'yes' is the commonly used value) and
944*5153720fSfkaag71     *     that the return value will contain 'true' if this choice was chosen.
945*5153720fSfkaag71     * - pattern: regular expression that defines all possible values.
946*5153720fSfkaag71     * - pattern_desc: description used for errors when a pattern is specified.
947*5153720fSfkaag71     * - minOccur: positive integer specifying the minimum number of values, defaults to 1.
948*5153720fSfkaag71     * - maxOccur: integer greater than or equal to minOccur, which specifies the maximum
949*5153720fSfkaag71     *     number of values, defaults to minOccur.
950*5153720fSfkaag71     * - default: the default value (which must be a value the user is allowed to set).
951*5153720fSfkaag71     *     When default is given, this method guarantees that the property is always set,
952*5153720fSfkaag71     *     otherwise the property may not be set since all properties are optional.
953*5153720fSfkaag71     * Either 'choices' or 'pattern' must be set (not both), all other values are optional.
954*5153720fSfkaag71     *
955*5153720fSfkaag71     * An example property array is as follows:
956*5153720fSfkaag71     * array(
957*5153720fSfkaag71     *   'example boolean' => array(
958*5153720fSfkaag71     *     'choices' => array('y' => array('yes', 'yeah'), 'n' => array('no', 'nay')),
959*5153720fSfkaag71     *     'minOccur' => 1,
960*5153720fSfkaag71     *     'maxOccur' => 3,
961*5153720fSfkaag71     *     'default' => 'yes'
962*5153720fSfkaag71     *   ),
963*5153720fSfkaag71     *   'example natural number' => array(
964*5153720fSfkaag71     *     'pattern' => '/^[0-9]+$/',
965*5153720fSfkaag71     *     'pattern_desc' => $this->getLang('property_Z*')
966*5153720fSfkaag71     *   )
967*5153720fSfkaag71     * )
968*5153720fSfkaag71     *
969*5153720fSfkaag71     * @param $properties The properties that can be set.
970*5153720fSfkaag71     * @param $trees The trees that contain the values for these properties.
971*5153720fSfkaag71     * @return An array with as indices the property names and as value a list of all values given for that property.
972*5153720fSfkaag71     */
973*5153720fSfkaag71    function setProperties($properties, $trees) {
974*5153720fSfkaag71        $propertyValues = array();
975*5153720fSfkaag71        $p = $this->getPatterns();
976*5153720fSfkaag71
977*5153720fSfkaag71        foreach ($trees as $tree) {
978*5153720fSfkaag71            $text = $this->extractText($tree);
979*5153720fSfkaag71            foreach($text as $lineNode) {
980*5153720fSfkaag71                $line = utf8_trim($lineNode['text']);
981*5153720fSfkaag71                if (preg_match('/^('.$p->predicate.')(\*)?\s*:\s*('.$p->any.')$/', $line, $match)) {
982*5153720fSfkaag71                    list(, $variable, $multi, $value) = $match;
983*5153720fSfkaag71                    $this->_setPropertyValue($properties, $tree['tag'], $lineNode, $variable, !empty($multi), $value, $propertyValues);
984*5153720fSfkaag71                } else {
985*5153720fSfkaag71                    $this->emitError($lineNode, 'error_property_weirdgroupline', hsc($tree['tag']), hsc($line));
986*5153720fSfkaag71                }
987*5153720fSfkaag71            }
988*5153720fSfkaag71            // Warn about unknown groups
989*5153720fSfkaag71            foreach ($tree['cs'] as $group) {
990*5153720fSfkaag71                $this->emitError($group, 'error_property_unknowngroup', hsc($trees[0]['tag']), hsc($group['tag']));
991*5153720fSfkaag71            }
992*5153720fSfkaag71        }
993*5153720fSfkaag71
994*5153720fSfkaag71        // Set property defaults
995*5153720fSfkaag71        foreach ($properties as $name => $p) {
996*5153720fSfkaag71            if (!isset($propertyValues[$name]) && isset($p['default'])) {
997*5153720fSfkaag71                $this->_setPropertyValue($properties, 'default value', null, $name, false, $p['default'], $propertyValues);
998*5153720fSfkaag71            }
999*5153720fSfkaag71        }
1000*5153720fSfkaag71
1001*5153720fSfkaag71        // Show errors, if any
1002*5153720fSfkaag71        $this->showErrors();
1003*5153720fSfkaag71
1004*5153720fSfkaag71        return $propertyValues;
1005*5153720fSfkaag71    }
1006*5153720fSfkaag71
1007*5153720fSfkaag71    function _setPropertyValue($properties, $group, $region, $variable, $isMulti, $value, &$propertyValues) {
1008*5153720fSfkaag71        if (!isset($properties[$variable])) {
1009*5153720fSfkaag71            // Unknown property: show error
1010*5153720fSfkaag71            $property_title_values = $this->getLang('property_title_values');
1011*5153720fSfkaag71            $propertyList = implode(', ', array_map(function ($n, $p) use ($property_title_values) {
1012*5153720fSfkaag71                $values = implode(', ', array_map(function ($c) {
1013*5153720fSfkaag71                    return $c[0];
1014*5153720fSfkaag71                }, $p['choices']));
1015*5153720fSfkaag71                $title = sprintf($property_title_values, $values);
1016*5153720fSfkaag71                return '\'<code title="' . hsc($title) . '">' . hsc($n) . '</code>\'';
1017*5153720fSfkaag71            }, array_keys($properties), $properties));
1018*5153720fSfkaag71            $this->emitError($region, 'error_property_unknownproperty', hsc($group), hsc($variable), $propertyList);
1019*5153720fSfkaag71        } else if (isset($propertyValues[$variable])) {
1020*5153720fSfkaag71            // Property is specified more than once: show error
1021*5153720fSfkaag71            $this->emitError($region, 'error_property_multi', hsc($group), hsc($variable));
1022*5153720fSfkaag71        } else {
1023*5153720fSfkaag71            $p = $properties[$variable];
1024*5153720fSfkaag71            $minOccur = isset($p['minOccur']) ? $p['minOccur'] : 1;
1025*5153720fSfkaag71            $maxOccur = isset($p['maxOccur']) ? $p['maxOccur'] : $minOccur;
1026*5153720fSfkaag71
1027*5153720fSfkaag71            if ($isMulti) {
1028*5153720fSfkaag71                $values = array_map('utf8_trim', explode(',', $value));
1029*5153720fSfkaag71            } else if ($minOccur == 1 || $minOccur == $maxOccur) {
1030*5153720fSfkaag71                // Repeat the given value as often as we expect it
1031*5153720fSfkaag71                $values = array_fill(0, $minOccur, $value);
1032*5153720fSfkaag71            } else {
1033*5153720fSfkaag71                // A single value was given, but multiple were expected
1034*5153720fSfkaag71                $this->emitError($region, 'error_property_notmulti', hsc($group), hsc($variable), $minOccur);
1035*5153720fSfkaag71                return;
1036*5153720fSfkaag71            }
1037*5153720fSfkaag71
1038*5153720fSfkaag71            if (count($values) < $minOccur || count($values) > $maxOccur) {
1039*5153720fSfkaag71                // Number of values given differs from expected number
1040*5153720fSfkaag71                if ($minOccur == $maxOccur) {
1041*5153720fSfkaag71                    $this->emitError($region, 'error_property_occur', hsc($group), hsc($variable), $minOccur, count($values));
1042*5153720fSfkaag71                } else {
1043*5153720fSfkaag71                    $this->emitError($region, 'error_property_occurrange', hsc($group), hsc($variable), $minOccur, $maxOccur, count($values));
1044*5153720fSfkaag71                }
1045*5153720fSfkaag71            } else if (isset($p['choices'])) { // Check whether the given property values are valid choices
1046*5153720fSfkaag71                // Create a mapping from choice to normalized value of the choice
1047*5153720fSfkaag71                $choices = array();
1048*5153720fSfkaag71                $choicesInfo = array(); // For nice error messages
1049*5153720fSfkaag71                foreach ($p['choices'] as $nc => $c) {
1050*5153720fSfkaag71                    if (is_array($c)) {
1051*5153720fSfkaag71                        $choices = array_merge($choices, array_fill_keys($c, $nc));
1052*5153720fSfkaag71                        $title = sprintf($this->getLang('property_title_synonyms'), implode(', ', $c));
1053*5153720fSfkaag71                        $choicesInfo[] = '\'<code title="' . hsc($title) . '">' . hsc($c[0]) . '</code>\'';
1054*5153720fSfkaag71                    } else {
1055*5153720fSfkaag71                        $choices[$c] = $c;
1056*5153720fSfkaag71                        $choicesInfo[] = '\'<code>' . hsc($c) . '</code>\'';
1057*5153720fSfkaag71                    }
1058*5153720fSfkaag71                }
1059*5153720fSfkaag71                if (!isset($choices['']) && isset($p['default'])) {
1060*5153720fSfkaag71                    $choices[''] = $choices[$p['default']];
1061*5153720fSfkaag71                }
1062*5153720fSfkaag71
1063*5153720fSfkaag71                $incorrect = array_diff($values, array_keys($choices)); // Find all values that are not a valid choice
1064*5153720fSfkaag71                if (count($incorrect) > 0) {
1065*5153720fSfkaag71                    unset($choices['']);
1066*5153720fSfkaag71                    foreach (array_unique($incorrect) as $v) {
1067*5153720fSfkaag71                        $this->emitError($region, 'error_property_invalidchoice', hsc($group), hsc($variable), hsc($v), implode(', ', $choicesInfo));
1068*5153720fSfkaag71                    }
1069*5153720fSfkaag71                } else {
1070*5153720fSfkaag71                    $propertyValues[$variable] = array_map(function($v) use ($choices) { return $choices[$v]; }, $values);
1071*5153720fSfkaag71                }
1072*5153720fSfkaag71            } else if (isset($p['pattern'])) { // Check whether the given property values match the pattern
1073*5153720fSfkaag71                $incorrect = array_filter($values, function($v) use ($p) { return !preg_match($p['pattern'], $v); });
1074*5153720fSfkaag71                if (count($incorrect) > 0) {
1075*5153720fSfkaag71                    foreach (array_unique($incorrect) as $v) {
1076*5153720fSfkaag71                        if (isset($p['pattern_desc'])) {
1077*5153720fSfkaag71                            $this->emitError($region, 'error_property_patterndesc', hsc($group), hsc($variable), hsc($v), $p['pattern_desc']);
1078*5153720fSfkaag71                        } else {
1079*5153720fSfkaag71                            $this->emitError($region, 'error_property_pattern', hsc($group), hsc($variable), hsc($v), hsc($p['pattern']));
1080*5153720fSfkaag71                        }
1081*5153720fSfkaag71                    }
1082*5153720fSfkaag71                } else {
1083*5153720fSfkaag71                    $propertyValues[$variable] = $values;
1084*5153720fSfkaag71                }
1085*5153720fSfkaag71            } else { // Property value has no requirements
1086*5153720fSfkaag71                $propertyValues[$variable] = $values;
1087*5153720fSfkaag71            }
1088*5153720fSfkaag71        }
1089*5153720fSfkaag71    }
1090*5153720fSfkaag71
1091*5153720fSfkaag71    /**
1092*5153720fSfkaag71     * Generates a html error message, ensuring that all utf8 in arguments is escaped correctly.
1093*5153720fSfkaag71     * The generated messages might be accumulated until showErrors is called.
1094*5153720fSfkaag71     *
1095*5153720fSfkaag71     * @param region The region at which the error occurs.
1096*5153720fSfkaag71     * @param msg_id The id of the message in the language file.
1097*5153720fSfkaag71     */
1098*5153720fSfkaag71    function emitError($region, $msg_id) {
1099*5153720fSfkaag71        $args = func_get_args();
1100*5153720fSfkaag71        array_shift($args);
1101*5153720fSfkaag71        array_shift($args);
1102*5153720fSfkaag71        $args = array_map('strval', $args); // convert everything to strings first
1103*5153720fSfkaag71        $args = array_map('utf8_tohtml', $args); // Escape args
1104*5153720fSfkaag71        $msg = vsprintf($this->getLang($msg_id), $args);
1105*5153720fSfkaag71        msg($msg, -1);
1106*5153720fSfkaag71        $this->error .= "<br />\n" . $msg;
1107*5153720fSfkaag71        $this->regions[] = $region;
1108*5153720fSfkaag71    }
1109*5153720fSfkaag71
1110*5153720fSfkaag71    /**
1111*5153720fSfkaag71     * Ensures that all emitted errors are shown.
1112*5153720fSfkaag71     */
1113*5153720fSfkaag71    function showErrors() {
1114*5153720fSfkaag71        if (!empty($this->error)) {
1115*5153720fSfkaag71            $error = $this->error;
1116*5153720fSfkaag71            $regions = $this->regions;
1117*5153720fSfkaag71            $this->error = '';
1118*5153720fSfkaag71            $this->regions = array();
1119*5153720fSfkaag71            throw new strata_exception($error, $regions);
1120*5153720fSfkaag71        }
1121*5153720fSfkaag71    }
1122*5153720fSfkaag71}
1123*5153720fSfkaag71
1124*5153720fSfkaag71// call static initiliazer (PHP doesn't offer this feature)
1125*5153720fSfkaag71helper_plugin_strata_syntax::initialize();
1126