xref: /plugin/strata/helper/syntax.php (revision 0847ebd29a490ea4bc8c536bb9f6dda8b6bbaa1a)
15153720fSfkaag71<?php
25153720fSfkaag71/**
35153720fSfkaag71 * DokuWiki Plugin strata (Helper Component)
45153720fSfkaag71 *
55153720fSfkaag71 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
65153720fSfkaag71 * @author  Brend Wanders <b.wanders@utwente.nl>
75153720fSfkaag71 */
85153720fSfkaag71
95153720fSfkaag71if (!defined('DOKU_INC')) die('meh.');
105153720fSfkaag71
115153720fSfkaag71/**
125153720fSfkaag71 * Helper to construct and handle syntax fragments.
135153720fSfkaag71 */
145153720fSfkaag71class helper_plugin_strata_syntax_RegexHelper {
155153720fSfkaag71    /**
165153720fSfkaag71     * Regular expression fragment table. This is used for interpolation of
175153720fSfkaag71     * syntax patterns, and should be without captures. Do not assume any
185153720fSfkaag71     * specific delimiter.
195153720fSfkaag71     */
205153720fSfkaag71    var $regexFragments = array(
215153720fSfkaag71        'variable'  => '(?:\?[^\s:\(\)\[\]\{\}\<\>\|\~\!\@\#\$\%\^\&\*\?\="]+)',
225153720fSfkaag71        'predicate' => '(?:[^:\(\)\[\]\{\}\<\>\|\~\!\@\#\$\%\^\&\*\?\="]+)',
235153720fSfkaag71        'reflit'    => '(?:\[\[[^]]*\]\])',
245153720fSfkaag71        'type'      => '(?:\[\s*[a-z0-9]+\s*(?:::[^\]]*)?\])',
255153720fSfkaag71        'aggregate' => '(?:@\s*[a-z0-9]+(?:\([^\)]*\))?)',
265153720fSfkaag71        'operator'  => '(?:!=|>=|<=|>|<|=|!~>|!~|!\^~|!\$~|\^~|\$~|~>|~)',
275153720fSfkaag71        'any'       => '(?:.+?)'
285153720fSfkaag71    );
295153720fSfkaag71
305153720fSfkaag71    /**
315153720fSfkaag71     * Patterns used to extract information from captured fragments. These patterns
325153720fSfkaag71     * are used with '/' as delimiter, and should contain at least one capture group.
335153720fSfkaag71     */
345153720fSfkaag71    var $regexCaptures = array(
355153720fSfkaag71        'variable'  => array('\?(.*)', array('name')),
365153720fSfkaag71        'aggregate' => array('@\s*([a-z0-9]+)(?:\(([^\)]*)\))?', array('aggregate','hint')),
375153720fSfkaag71        'type'      => array('\[\s*([a-z0-9]+)\s*(?:::([^\]]*))?\]', array('type', 'hint')),
385153720fSfkaag71        'reflit'    => array('\[\[(.*)\]\]',array('reference'))
395153720fSfkaag71    );
405153720fSfkaag71
415153720fSfkaag71    /**
425153720fSfkaag71     * Grabs the syntax fragment.
435153720fSfkaag71     */
445153720fSfkaag71    function __get($name) {
455153720fSfkaag71        if(array_key_exists($name, $this->regexFragments)) {
465153720fSfkaag71            return $this->regexFragments[$name];
475153720fSfkaag71        } else {
485153720fSfkaag71            $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS);
495153720fSfkaag71            trigger_error("Undefined syntax fragment '$name' on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE);
505153720fSfkaag71        }
515153720fSfkaag71    }
525153720fSfkaag71
535153720fSfkaag71    /**
545153720fSfkaag71     * Extracts information from a fragment, based on the type.
555153720fSfkaag71     */
565153720fSfkaag71    function __call($name, $arguments) {
575153720fSfkaag71        if(array_key_exists($name, $this->regexCaptures)) {
585153720fSfkaag71            list($pattern, $names) = $this->regexCaptures[$name];
595153720fSfkaag71            $result = preg_match("/^{$pattern}$/", $arguments[0], $match);
605153720fSfkaag71            if($result === 1) {
615153720fSfkaag71                array_shift($match);
625153720fSfkaag71                $shortest = min(count($names), count($match));
635153720fSfkaag71                return new helper_plugin_strata_syntax_RegexHelperCapture(array_combine(array_slice($names,0,$shortest), array_slice($match, 0, $shortest)));
645153720fSfkaag71            } else {
655153720fSfkaag71                return null;
665153720fSfkaag71            }
675153720fSfkaag71        } else {
685153720fSfkaag71            $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS);
695153720fSfkaag71            trigger_error("Undefined syntax capture '$name' on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE);
705153720fSfkaag71        }
715153720fSfkaag71    }
725153720fSfkaag71}
735153720fSfkaag71
745153720fSfkaag71/**
755153720fSfkaag71 * A single capture. Used as a return value for the RegexHelper's
765153720fSfkaag71 * capture methods.
775153720fSfkaag71 */
785153720fSfkaag71class helper_plugin_strata_syntax_RegexHelperCapture implements ArrayAccess {
795153720fSfkaag71    function __construct($values) {
805153720fSfkaag71        $this->values = $values;
815153720fSfkaag71    }
825153720fSfkaag71
835153720fSfkaag71    function __get($name) {
845153720fSfkaag71        if(array_key_exists($name, $this->values)) {
855153720fSfkaag71            return $this->values[$name];
865153720fSfkaag71        } else {
875153720fSfkaag71            return null;
885153720fSfkaag71        }
895153720fSfkaag71    }
905153720fSfkaag71
915153720fSfkaag71    function offsetExists($offset) {
925153720fSfkaag71        // the index is valid iff:
935153720fSfkaag71        //   it is an existing field name
945153720fSfkaag71        //   it is a correct nummeric index (with 0 being the first name and count-1 the last)
955153720fSfkaag71        return isset($this->values[$offset]) || ($offset >= 0 && $offset < count($this->values));
965153720fSfkaag71    }
975153720fSfkaag71
985153720fSfkaag71    function offsetGet($offset) {
995153720fSfkaag71        // return the correct offset
1005153720fSfkaag71        if (isset($this->values[$offset])) {
1015153720fSfkaag71            return $this->values[$offset];
1025153720fSfkaag71        } else {
1035153720fSfkaag71            // or try the numeric offsets
1045153720fSfkaag71            if(is_numeric($offset) && $offset >= 0 && $offset < count($this->values)) {
1055153720fSfkaag71                // translate numeric offset to key
1065153720fSfkaag71                $keys = array_keys($this->values);
1075153720fSfkaag71                return $this->values[$keys[intval($offset)]];
1085153720fSfkaag71            } else {
1095153720fSfkaag71                // offset unknown, return without value
1105153720fSfkaag71                return;
1115153720fSfkaag71            }
1125153720fSfkaag71        }
1135153720fSfkaag71    }
1145153720fSfkaag71
1155153720fSfkaag71    function offsetSet($offset, $value) {
1165153720fSfkaag71        // noop
1175153720fSfkaag71        $trace = debug_backtrace();
1185153720fSfkaag71        trigger_error("Syntax fragment fields are read-only on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE);
1195153720fSfkaag71    }
1205153720fSfkaag71
1215153720fSfkaag71    function offsetUnset($offset) {
1225153720fSfkaag71        // noop
1235153720fSfkaag71        $trace = debug_backtrace();
1245153720fSfkaag71        trigger_error("Syntax fragment fields are read-only on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE);
1255153720fSfkaag71    }
1265153720fSfkaag71}
1275153720fSfkaag71
1285153720fSfkaag71/**
1295153720fSfkaag71 * Helper plugin for common syntax parsing.
1305153720fSfkaag71 */
1315153720fSfkaag71class helper_plugin_strata_syntax extends DokuWiki_Plugin {
1325153720fSfkaag71    public static $patterns;
1335153720fSfkaag71
1345153720fSfkaag71    /**
1355153720fSfkaag71     * Static initializer called directly after class declaration.
1365153720fSfkaag71     *
1375153720fSfkaag71     * This static method exists because we want to keep the static $patterns
1385153720fSfkaag71     * and its initialization close together.
1395153720fSfkaag71     */
1405153720fSfkaag71    static function initialize() {
1415153720fSfkaag71        self::$patterns = new helper_plugin_strata_syntax_RegexHelper();
1425153720fSfkaag71    }
1435153720fSfkaag71
1445153720fSfkaag71    /**
1455153720fSfkaag71     * Constructor.
1465153720fSfkaag71     */
1475153720fSfkaag71    function __construct() {
1485153720fSfkaag71        $this->util =& plugin_load('helper', 'strata_util');
1495153720fSfkaag71        $this->error = '';
1505153720fSfkaag71        $this->regions = array();
1515153720fSfkaag71    }
1525153720fSfkaag71
1535153720fSfkaag71    /**
1545153720fSfkaag71     * Returns an object describing the pattern fragments.
1555153720fSfkaag71     */
1565153720fSfkaag71    function getPatterns() {
1575153720fSfkaag71        return self::$patterns;
1585153720fSfkaag71    }
1595153720fSfkaag71
1605153720fSfkaag71    /**
1615153720fSfkaag71     * Determines whether a line can be ignored.
1625153720fSfkaag71     */
1635153720fSfkaag71    function ignorableLine($line) {
1645153720fSfkaag71        $line = utf8_trim($line);
1655153720fSfkaag71        return $line == '' || utf8_substr($line,0,2) == '--';
1665153720fSfkaag71    }
1675153720fSfkaag71
1685153720fSfkaag71    /**
1695153720fSfkaag71     * Updates the given typemap with new information.
1705153720fSfkaag71     *
1715153720fSfkaag71     * @param typemap array a typemap
1725153720fSfkaag71     * @param var string the name of the variable
1735153720fSfkaag71     * @param type string the type of the variable
1745153720fSfkaag71     * @param hint string the type hint of the variable
1755153720fSfkaag71     */
1765153720fSfkaag71    function updateTypemap(&$typemap, $var, $type, $hint=null) {
1775153720fSfkaag71        if(empty($typemap[$var]) && $type) {
1785153720fSfkaag71            $typemap[$var] = array('type'=>$type,'hint'=>$hint);
1795153720fSfkaag71            return true;
1805153720fSfkaag71        }
1815153720fSfkaag71
1825153720fSfkaag71        return false;
1835153720fSfkaag71    }
1845153720fSfkaag71
1855153720fSfkaag71    /**
1865153720fSfkaag71     * Constructs a literal with the given text.
1875153720fSfkaag71     */
1885153720fSfkaag71    function literal($val) {
1895153720fSfkaag71        return array('type'=>'literal', 'text'=>$val);
1905153720fSfkaag71    }
1915153720fSfkaag71
1925153720fSfkaag71    /**
1935153720fSfkaag71     * Constructs a variable with the given name.
1945153720fSfkaag71     */
1955153720fSfkaag71    function variable($var) {
1965153720fSfkaag71        if($var[0] == '?') $var = substr($var,1);
1975153720fSfkaag71        return array('type'=>'variable', 'text'=>$var);
1985153720fSfkaag71    }
1995153720fSfkaag71
2005153720fSfkaag71    function _fail($message, $regions=array()) {
2015153720fSfkaag71        msg($message,-1);
2025153720fSfkaag71
2035153720fSfkaag71        if($this->isGroup($regions) || $this->isText($regions)) {
2045153720fSfkaag71            $regions = array($regions);
2055153720fSfkaag71        }
2065153720fSfkaag71
2075153720fSfkaag71        $lines = array();
2085153720fSfkaag71        foreach($regions as $r) $lines[] = array('start'=>$r['start'], 'end'=>$r['end']);
2095153720fSfkaag71        throw new strata_exception($message, $lines);
2105153720fSfkaag71    }
2115153720fSfkaag71
2125153720fSfkaag71    /**
2135153720fSfkaag71     * Constructs a query from the give tree.
2145153720fSfkaag71     *
2155153720fSfkaag71     * @param root array the tree to transform
2165153720fSfkaag71     * @param typemap array the type information collected so far
2175153720fSfkaag71     * @param projection array the variables to project
2185153720fSfkaag71     * @return a query structure
2195153720fSfkaag71     */
2205153720fSfkaag71    function constructQuery(&$root, &$typemap, $projection) {
2215153720fSfkaag71        $p = $this->getPatterns();
2225153720fSfkaag71
2235153720fSfkaag71        $result = array(
2245153720fSfkaag71            'type'=>'select',
2255153720fSfkaag71            'group'=>array(),
2265153720fSfkaag71            'projection'=>$projection,
2275153720fSfkaag71            'ordering'=>array(),
2285153720fSfkaag71            'grouping'=>false,
2295153720fSfkaag71            'considering'=>array()
2305153720fSfkaag71        );
2315153720fSfkaag71
2325153720fSfkaag71        // extract sort groups
2335153720fSfkaag71        $ordering = $this->extractGroups($root, 'sort');
2345153720fSfkaag71
2355153720fSfkaag71        // extract grouping groups
2365153720fSfkaag71        $grouping = $this->extractGroups($root, 'group');
2375153720fSfkaag71
2385153720fSfkaag71        // extract additional projection groups
2395153720fSfkaag71        $considering = $this->extractGroups($root, 'consider');
2405153720fSfkaag71
2415153720fSfkaag71        // transform actual group
2425153720fSfkaag71        $where = $this->extractGroups($root, 'where');
2435153720fSfkaag71        $tree = null;
2445153720fSfkaag71        if(count($where)==0) {
2455153720fSfkaag71            $tree =& $root;
2465153720fSfkaag71        } elseif(count($where)==1) {
2475153720fSfkaag71            $tree =& $where[0];
2485153720fSfkaag71            if(count($root['cs'])) {
2495153720fSfkaag71                $this->_fail($this->getLang('error_query_outofwhere'), $root['cs']);
2505153720fSfkaag71            }
2515153720fSfkaag71        } else {
2525153720fSfkaag71            $this->_fail($this->getLang('error_query_singlewhere'), $where);
2535153720fSfkaag71        }
2545153720fSfkaag71
2555153720fSfkaag71        list($group, $scope) = $this->transformGroup($tree, $typemap);
2565153720fSfkaag71        $result['group'] = $group;
2575153720fSfkaag71        if(!$group) return false;
2585153720fSfkaag71
2595153720fSfkaag71        // handle sort groups
2605153720fSfkaag71        if(count($ordering)) {
2615153720fSfkaag71            if(count($ordering) > 1) {
2625153720fSfkaag71                $this->_fail($this->getLang('error_query_multisort'), $ordering);
2635153720fSfkaag71            }
2645153720fSfkaag71
2655153720fSfkaag71            // handle each line in the group
2665153720fSfkaag71            foreach($ordering[0]['cs'] as $line) {
2675153720fSfkaag71                if($this->isGroup($line)) {
2685153720fSfkaag71                    $this->_fail($this->getLang('error_query_sortblock'), $line);
2695153720fSfkaag71                }
2705153720fSfkaag71
2715153720fSfkaag71                if(preg_match("/^({$p->variable})\s*(?:\((asc|desc)(?:ending)?\))?$/S",utf8_trim($line['text']),$match)) {
2725153720fSfkaag71                    $var = $p->variable($match[1]);
2735153720fSfkaag71                    if(!in_array($var->name, $scope)) {
2745153720fSfkaag71                        $this->_fail(sprintf($this->getLang('error_query_sortvar'),utf8_tohtml(hsc($var->name))), $line);
2755153720fSfkaag71                    }
2765153720fSfkaag71
2775153720fSfkaag71                    $result['ordering'][] = array('variable'=>$var->name, 'direction'=>($match[2]?:'asc'));
2785153720fSfkaag71                } else {
2795153720fSfkaag71                    $this->_fail(sprintf($this->getLang('error_query_sortline'), utf8_tohtml(hsc($line['text']))), $line);
2805153720fSfkaag71                }
2815153720fSfkaag71            }
2825153720fSfkaag71        }
2835153720fSfkaag71
2845153720fSfkaag71        //handle grouping
2855153720fSfkaag71        if(count($grouping)) {
2865153720fSfkaag71            if(count($grouping) > 1) {
2875153720fSfkaag71                $this->_fail($this->getLang('error_query_multigrouping'), $grouping);
2885153720fSfkaag71            }
2895153720fSfkaag71
2905153720fSfkaag71            // we have a group, so we want grouping
2915153720fSfkaag71            $result['grouping'] = array();
2925153720fSfkaag71
2935153720fSfkaag71            foreach($grouping[0]['cs'] as $line) {
2945153720fSfkaag71                if($this->isGroup($line)) {
2955153720fSfkaag71                    $this->_fail($this->getLang('error_query_groupblock'), $line);
2965153720fSfkaag71                }
2975153720fSfkaag71
2985153720fSfkaag71                if(preg_match("/({$p->variable})$/",utf8_trim($line['text']),$match)) {
2995153720fSfkaag71                    $var = $p->variable($match[1]);
3005153720fSfkaag71                    if(!in_array($var->name, $scope)) {
3015153720fSfkaag71                        $this->_fail(sprintf($this->getLang('error_query_groupvar'),utf8_tohtml(hsc($var->name))), $line);
3025153720fSfkaag71                    }
3035153720fSfkaag71
3045153720fSfkaag71                    $result['grouping'][] = $var->name;
3055153720fSfkaag71                } else {
3065153720fSfkaag71                    $this->_fail(sprintf($this->getLang('error_query_groupline'), utf8_tohtml(hsc($line['text']))), $line);
3075153720fSfkaag71                }
3085153720fSfkaag71            }
3095153720fSfkaag71        }
3105153720fSfkaag71
3115153720fSfkaag71        //handle considering
3125153720fSfkaag71        if(count($considering)) {
3135153720fSfkaag71            if(count($considering) > 1) {
3145153720fSfkaag71                $this->_fail($this->getLang('error_query_multiconsidering'), $considering);
3155153720fSfkaag71            }
3165153720fSfkaag71
3175153720fSfkaag71            foreach($considering[0]['cs'] as $line) {
3185153720fSfkaag71                if($this->isGroup($line)) {
3195153720fSfkaag71                    $this->_fail($this->getLang('error_query_considerblock'), $line);
3205153720fSfkaag71                }
3215153720fSfkaag71
3225153720fSfkaag71                if(preg_match("/^({$p->variable})$/",utf8_trim($line['text']),$match)) {
3235153720fSfkaag71                    $var = $p->variable($match[1]);
3245153720fSfkaag71                    if(!in_array($var->name, $scope)) {
3255153720fSfkaag71                        $this->_fail(sprintf($this->getLang('error_query_considervar'),utf8_tohtml(hsc($var->name))), $line);
3265153720fSfkaag71                    }
3275153720fSfkaag71
3285153720fSfkaag71                    $result['considering'][] = $var->name;
3295153720fSfkaag71                } else {
3305153720fSfkaag71                    $this->_fail(sprintf($this->getLang('error_query_considerline'), utf8_tohtml(hsc($line['text']))), $line);
3315153720fSfkaag71                }
3325153720fSfkaag71            }
3335153720fSfkaag71        }
3345153720fSfkaag71
3355153720fSfkaag71        foreach($projection as $var) {
3365153720fSfkaag71            if(!in_array($var, $scope)) {
3375153720fSfkaag71                $this->_fail(sprintf($this->getLang('error_query_selectvar'), utf8_tohtml(hsc($var))));
3385153720fSfkaag71            }
3395153720fSfkaag71        }
3405153720fSfkaag71
3415153720fSfkaag71        // return final query structure
3425153720fSfkaag71        return array($result, $scope);
3435153720fSfkaag71    }
3445153720fSfkaag71
3455153720fSfkaag71    /**
3465153720fSfkaag71     * Transforms a full query group.
3475153720fSfkaag71     *
3485153720fSfkaag71     * @param root array the tree to transform
3495153720fSfkaag71     * @param typemap array the type information
3505153720fSfkaag71     * @return the transformed group and a list of in-scope variables
3515153720fSfkaag71     */
3525153720fSfkaag71    function transformGroup(&$root, &$typemap) {
3535153720fSfkaag71        // extract patterns and split them in triples and filters
3545153720fSfkaag71        $patterns = $this->extractText($root);
3555153720fSfkaag71
3565153720fSfkaag71        // extract union groups
3575153720fSfkaag71        $unions = $this->extractGroups($root, 'union');
3585153720fSfkaag71
3595153720fSfkaag71        // extract minus groups
3605153720fSfkaag71        $minuses = $this->extractGroups($root,'minus');
3615153720fSfkaag71
3625153720fSfkaag71        // extract optional groups
3635153720fSfkaag71        $optionals = $this->extractGroups($root,'optional');
3645153720fSfkaag71
3655153720fSfkaag71        // check for leftovers
3665153720fSfkaag71        if(count($root['cs'])) {
3675153720fSfkaag71            $this->_fail(sprintf($this->getLang('error_query_group'),( isset($root['cs'][0]['tag']) ? sprintf($this->getLang('named_group'), utf8_tohtml(hsc($root['cs'][0]['tag']))) : $this->getLang('unnamed_group'))), $root['cs']);
3685153720fSfkaag71        }
3695153720fSfkaag71
3705153720fSfkaag71        // split patterns into triples and filters
3715153720fSfkaag71        list($patterns, $filters, $scope) = $this->transformPatterns($patterns, $typemap);
3725153720fSfkaag71
3735153720fSfkaag71        // convert each union into a pattern
3745153720fSfkaag71        foreach($unions as $union) {
3755153720fSfkaag71            list($u, $s) = $this->transformUnion($union, $typemap);
3765153720fSfkaag71            $scope = array_merge($scope, $s);
3775153720fSfkaag71            $patterns[] = $u;
3785153720fSfkaag71        }
3795153720fSfkaag71
3805153720fSfkaag71        if(count($patterns) == 0) {
3815153720fSfkaag71            $this->_fail(sprintf($this->getLang('error_query_grouppattern')), $root);
3825153720fSfkaag71        }
3835153720fSfkaag71
3845153720fSfkaag71        // chain all patterns with ANDs
3855153720fSfkaag71        $result = array_shift($patterns);
3865153720fSfkaag71        foreach($patterns as $pattern) {
3875153720fSfkaag71            $result = array(
3885153720fSfkaag71                'type'=>'and',
3895153720fSfkaag71                'lhs'=>$result,
3905153720fSfkaag71                'rhs'=>$pattern
3915153720fSfkaag71            );
3925153720fSfkaag71        }
3935153720fSfkaag71
3945153720fSfkaag71        // apply all optionals
3955153720fSfkaag71        if(count($optionals)) {
3965153720fSfkaag71            foreach($optionals as $optional) {
3975153720fSfkaag71                // convert eacfh optional
3985153720fSfkaag71                list($optional, $s) = $this->transformGroup($optional, $typemap);
3995153720fSfkaag71                $scope = array_merge($scope, $s);
4005153720fSfkaag71                $result = array(
4015153720fSfkaag71                    'type'=>'optional',
4025153720fSfkaag71                    'lhs'=>$result,
4035153720fSfkaag71                    'rhs'=>$optional
4045153720fSfkaag71                );
4055153720fSfkaag71            }
4065153720fSfkaag71        }
4075153720fSfkaag71
4085153720fSfkaag71
4095153720fSfkaag71        // add all filters; these are a bit weird, as only a single FILTER is really supported
4105153720fSfkaag71        // (we have defined multiple filters as being a conjunction)
4115153720fSfkaag71        if(count($filters)) {
4125153720fSfkaag71            foreach($filters as $f) {
4135153720fSfkaag71                $line = $f['_line'];
4145153720fSfkaag71                unset($f['_line']);
4155153720fSfkaag71                if($f['lhs']['type'] == 'variable' && !in_array($f['lhs']['text'], $scope)) {
4165153720fSfkaag71                    $this->_fail(sprintf($this->getLang('error_query_filterscope'),utf8_tohtml(hsc($f['lhs']['text']))), $line);
4175153720fSfkaag71                }
4185153720fSfkaag71                if($f['rhs']['type'] == 'variable' && !in_array($f['rhs']['text'], $scope)) {
4195153720fSfkaag71                    $this->_fail(sprintf($this->getLang('error_query_filterscope'),utf8_tohtml(hsc($f['rhs']['text']))), $line);
4205153720fSfkaag71                }
4215153720fSfkaag71            }
4225153720fSfkaag71
4235153720fSfkaag71            $result = array(
4245153720fSfkaag71                'type'=>'filter',
4255153720fSfkaag71                'lhs'=>$result,
4265153720fSfkaag71                'rhs'=>$filters
4275153720fSfkaag71            );
4285153720fSfkaag71        }
4295153720fSfkaag71
4305153720fSfkaag71        // apply all minuses
4315153720fSfkaag71        if(count($minuses)) {
4325153720fSfkaag71            foreach($minuses as $minus) {
4335153720fSfkaag71                // convert each minus, and discard their scope
4345153720fSfkaag71                list($minus, $s) = $this->transformGroup($minus, $typemap);
4355153720fSfkaag71                $result = array(
4365153720fSfkaag71                    'type'=>'minus',
4375153720fSfkaag71                    'lhs'=>$result,
4385153720fSfkaag71                    'rhs'=>$minus
4395153720fSfkaag71                );
4405153720fSfkaag71            }
4415153720fSfkaag71        }
4425153720fSfkaag71
4435153720fSfkaag71        return array($result, $scope);
4445153720fSfkaag71    }
4455153720fSfkaag71
4465153720fSfkaag71    /**
4475153720fSfkaag71     * Transforms a union group with multiple subgroups
4485153720fSfkaag71     *
4495153720fSfkaag71     * @param root array the union group to transform
4505153720fSfkaag71     * @param typemap array the type information
4515153720fSfkaag71     * @return the transformed group and a list of in-scope variables
4525153720fSfkaag71     */
4535153720fSfkaag71    function transformUnion(&$root, &$typemap) {
4545153720fSfkaag71        // fetch all child patterns
4555153720fSfkaag71        $subs = $this->extractGroups($root,null);
4565153720fSfkaag71
4575153720fSfkaag71        // do sanity checks
4585153720fSfkaag71        if(count($root['cs'])) {
4595153720fSfkaag71            $this->_fail($this->getLang('error_query_unionblocks'), $root['cs']);
4605153720fSfkaag71        }
4615153720fSfkaag71
4625153720fSfkaag71        if(count($subs) < 2) {
4635153720fSfkaag71            $this->_fail($this->getLang('error_query_unionreq'), $root);
4645153720fSfkaag71        }
4655153720fSfkaag71
4665153720fSfkaag71        // transform the first group
4675153720fSfkaag71        list($result,$scope) = $this->transformGroup(array_shift($subs), $typemap);
4685153720fSfkaag71
4695153720fSfkaag71        // transform each subsequent group
4705153720fSfkaag71        foreach($subs as $sub) {
4715153720fSfkaag71            list($rhs, $s) = $this->transformGroup($sub, $typemap);
4725153720fSfkaag71            $scope = array_merge($scope, $s);
4735153720fSfkaag71            $result = array(
4745153720fSfkaag71                'type'=>'union',
4755153720fSfkaag71                'lhs'=>$result,
4765153720fSfkaag71                'rhs'=>$rhs
4775153720fSfkaag71            );
4785153720fSfkaag71        }
4795153720fSfkaag71
4805153720fSfkaag71        return array($result, $scope);
4815153720fSfkaag71    }
4825153720fSfkaag71
4835153720fSfkaag71    /**
4845153720fSfkaag71     * Transforms a list of patterns into a list of triples and a
4855153720fSfkaag71     * list of filters.
4865153720fSfkaag71     *
4875153720fSfkaag71     * @param lines array a list of lines to transform
4885153720fSfkaag71     * @param typemap array the type information
4895153720fSfkaag71     * @return a list of triples, a list of filters and a list of in-scope variables
4905153720fSfkaag71     */
4915153720fSfkaag71    function transformPatterns(&$lines, &$typemap) {
4925153720fSfkaag71        // we need this to resolve things
4935153720fSfkaag71        global $ID;
4945153720fSfkaag71
4955153720fSfkaag71        // we need patterns
4965153720fSfkaag71        $p = $this->getPatterns();
4975153720fSfkaag71
4985153720fSfkaag71        // result holders
4995153720fSfkaag71        $scope = array();
5005153720fSfkaag71        $triples = array();
5015153720fSfkaag71        $filters = array();
5025153720fSfkaag71
5035153720fSfkaag71        foreach($lines as $lineNode) {
5045153720fSfkaag71            $line = trim($lineNode['text']);
5055153720fSfkaag71
5065153720fSfkaag71            // [grammar] TRIPLEPATTERN := (VARIABLE|REFLIT) ' ' (VARIABLE|PREDICATE) TYPE? : ANY
5075153720fSfkaag71            if(preg_match("/^({$p->variable}|{$p->reflit})\s+({$p->variable}|{$p->predicate})\s*({$p->type})?\s*:\s*({$p->any})$/S",$line,$match)) {
5085153720fSfkaag71                list(, $subject, $predicate, $type, $object) = $match;
5095153720fSfkaag71
5105153720fSfkaag71                $subject = utf8_trim($subject);
5115153720fSfkaag71                if($subject[0] == '?') {
5125153720fSfkaag71                    $subject = $this->variable($subject);
5135153720fSfkaag71                    $scope[] = $subject['text'];
5145153720fSfkaag71                    $this->updateTypemap($typemap, $subject['text'], 'ref');
5155153720fSfkaag71                } else {
5165153720fSfkaag71                    global $ID;
5175153720fSfkaag71                    $subject = $p->reflit($subject)->reference;
5185153720fSfkaag71                    $subject = $this->util->loadType('ref')->normalize($subject,null);
5195153720fSfkaag71                    $subject = $this->literal($subject);
5205153720fSfkaag71                }
5215153720fSfkaag71
5225153720fSfkaag71                $predicate = utf8_trim($predicate);
5235153720fSfkaag71                if($predicate[0] == '?') {
5245153720fSfkaag71                    $predicate = $this->variable($predicate);
5255153720fSfkaag71                    $scope[] = $predicate['text'];
5265153720fSfkaag71                    $this->updateTypemap($typemap, $predicate['text'], 'text');
5275153720fSfkaag71                } else {
5285153720fSfkaag71                    $predicate = $this->literal($this->util->normalizePredicate($predicate));
5295153720fSfkaag71                }
5305153720fSfkaag71
5315153720fSfkaag71                $object = utf8_trim($object);
5325153720fSfkaag71                if($object[0] == '?') {
5335153720fSfkaag71                    // match a proper type variable
5345153720fSfkaag71                    if(preg_match("/^({$p->variable})\s*({$p->type})?$/",$object,$captures)!=1) {
5355153720fSfkaag71                        $this->_fail($this->getLang('error_pattern_garbage'),$lineNode);
5365153720fSfkaag71                    }
537*0847ebd2SFKaag                    $var=$captures[1]??null;
538*0847ebd2SFKaag                    $vtype=$captures[2]??null;
5395153720fSfkaag71
5405153720fSfkaag71                    // create the object node
5415153720fSfkaag71                    $object = $this->variable($var);
5425153720fSfkaag71                    $scope[] = $object['text'];
5435153720fSfkaag71
5445153720fSfkaag71                    // try direct type first, implied type second
5455153720fSfkaag71                    $vtype = $p->type($vtype);
5465153720fSfkaag71                    $type = $p->type($type);
547*0847ebd2SFKaag                    if (isset ($type))
548*0847ebd2SFKaag                    {
5495153720fSfkaag71                      $this->updateTypemap($typemap, $object['text'], $vtype->type, $vtype->hint);
5505153720fSfkaag71                      $this->updateTypemap($typemap, $object['text'], $type->type, $type->hint);
551*0847ebd2SFKaag                    }
5525153720fSfkaag71                } else {
5535153720fSfkaag71                    // check for empty string token
5545153720fSfkaag71                    if($object == '[[]]') {
5555153720fSfkaag71                        $object='';
5565153720fSfkaag71                    }
5575153720fSfkaag71                    if(!$type) {
5585153720fSfkaag71                        list($type, $hint) = $this->util->getDefaultType();
5595153720fSfkaag71                    } else {
5605153720fSfkaag71                        $type = $p->type($type);
5615153720fSfkaag71                        $hint = $type->hint;
5625153720fSfkaag71                        $type = $type->type;
5635153720fSfkaag71                    }
5645153720fSfkaag71                    $type = $this->util->loadType($type);
5655153720fSfkaag71                    $object = $this->literal($type->normalize($object,$hint));
5665153720fSfkaag71                }
5675153720fSfkaag71
5685153720fSfkaag71                $triples[] = array('type'=>'triple','subject'=>$subject, 'predicate'=>$predicate, 'object'=>$object);
5695153720fSfkaag71
5705153720fSfkaag71            // [grammar] FILTER := VARIABLE TYPE? OPERATOR VARIABLE TYPE?
5715153720fSfkaag71            } elseif(preg_match("/^({$p->variable})\s*({$p->type})?\s*({$p->operator})\s*({$p->variable})\s*({$p->type})?$/S",$line, $match)) {
5725153720fSfkaag71                list(,$lhs, $ltype, $operator, $rhs, $rtype) = $match;
5735153720fSfkaag71
5745153720fSfkaag71                $lhs = $this->variable($lhs);
5755153720fSfkaag71                $rhs = $this->variable($rhs);
5765153720fSfkaag71
5775153720fSfkaag71                if($operator == '~>' || $operator == '!~>') $operator = str_replace('~>','^~',$operator);
5785153720fSfkaag71
5795153720fSfkaag71                // do type information propagation
5805153720fSfkaag71                $rtype = $p->type($rtype);
5815153720fSfkaag71                $ltype = $p->type($ltype);
5825153720fSfkaag71
5835153720fSfkaag71                if($ltype) {
5845153720fSfkaag71                    // left has a defined type, so update the map
5855153720fSfkaag71                    $this->updateTypemap($typemap, $lhs['text'], $ltype->type, $ltype->hint);
5865153720fSfkaag71
5875153720fSfkaag71                    // and propagate to right if possible
5885153720fSfkaag71                    if(!$rtype) {
5895153720fSfkaag71                        $this->updateTypemap($typemap, $rhs['text'], $ltype->type, $lhint->hint);
5905153720fSfkaag71                    }
5915153720fSfkaag71                }
5925153720fSfkaag71                if($rtype) {
5935153720fSfkaag71                    // right has a defined type, so update the map
5945153720fSfkaag71                    $this->updateTypemap($typemap, $rhs['text'], $rtype->type, $rtype->hint);
5955153720fSfkaag71
5965153720fSfkaag71                    // and propagate to left if possible
5975153720fSfkaag71                    if(!$ltype) {
5985153720fSfkaag71                        $this->updateTypemap($typemap, $lhs['text'], $rtype->type, $rtype->hint);
5995153720fSfkaag71                    }
6005153720fSfkaag71                }
6015153720fSfkaag71
6025153720fSfkaag71                $filters[] = array('type'=>'filter', 'lhs'=>$lhs, 'operator'=>$operator, 'rhs'=>$rhs, '_line'=>$lineNode);
6035153720fSfkaag71
6045153720fSfkaag71            // [grammar] FILTER := VARIABLE TYPE? OPERATOR ANY
6055153720fSfkaag71            } elseif(preg_match("/^({$p->variable})\s*({$p->type})?\s*({$p->operator})\s*({$p->any})$/S",$line, $match)) {
6065153720fSfkaag71
6075153720fSfkaag71                // filter pattern
6085153720fSfkaag71                list(, $lhs,$ltype,$operator,$rhs) = $match;
6095153720fSfkaag71
6105153720fSfkaag71                $lhs = $this->variable($lhs);
6115153720fSfkaag71
6125153720fSfkaag71                // update typemap if a type was defined
6135153720fSfkaag71                list($type,$hint) = $p->type($ltype);
6145153720fSfkaag71                if($type) {
6155153720fSfkaag71                    $this->updateTypemap($typemap, $lhs['text'],$type,$hint);
6165153720fSfkaag71                } else {
6175153720fSfkaag71                    // use the already declared type if no type was defined
6185153720fSfkaag71                    if(!empty($typemap[$lhs['text']])) {
6195153720fSfkaag71                        extract($typemap[$lhs['text']]);
6205153720fSfkaag71                    } else {
6215153720fSfkaag71                        list($type, $hint) = $this->util->getDefaultType();
6225153720fSfkaag71                    }
6235153720fSfkaag71                }
6245153720fSfkaag71
6255153720fSfkaag71                // check for empty string token
6265153720fSfkaag71                if($rhs == '[[]]') {
6275153720fSfkaag71                    $rhs = '';
6285153720fSfkaag71                }
6295153720fSfkaag71
6305153720fSfkaag71                // special case: the right hand side of the 'in' operator always normalizes with the 'text' type
6315153720fSfkaag71                if($operator == '~>' || $operator == '!~>') {
6325153720fSfkaag71                    $operator = str_replace('~>','^~', $operator);
6335153720fSfkaag71                    $type = 'text';
6345153720fSfkaag71                    unset($hint);
6355153720fSfkaag71                }
6365153720fSfkaag71
6375153720fSfkaag71                // normalize
6385153720fSfkaag71                $type = $this->util->loadType($type);
6395153720fSfkaag71                $rhs = $this->literal($type->normalize($rhs,$hint));
6405153720fSfkaag71
6415153720fSfkaag71                $filters[] = array('type'=>'filter','lhs'=>$lhs, 'operator'=>$operator, 'rhs'=>$rhs, '_line'=>$lineNode);
6425153720fSfkaag71
6435153720fSfkaag71            // [grammar] FILTER := ANY OPERATOR VARIABLE TYPE?
6445153720fSfkaag71            } elseif(preg_match("/^({$p->any})\s*({$p->operator})\s*({$p->variable})\s*({$p->type})?$/S",$line, $match)) {
6455153720fSfkaag71                list(, $lhs,$operator,$rhs,$rtype) = $match;
6465153720fSfkaag71
6475153720fSfkaag71                $rhs = $this->variable($rhs);
6485153720fSfkaag71
6495153720fSfkaag71                // update typemap if a type was defined
6505153720fSfkaag71                list($type, $hint) = $p->type($rtype);
6515153720fSfkaag71                if($type) {
6525153720fSfkaag71                    $this->updateTypemap($typemap, $rhs['text'],$type,$hint);
6535153720fSfkaag71                } else {
6545153720fSfkaag71                    // use the already declared type if no type was defined
6555153720fSfkaag71                    if(!empty($typemap[$rhs['text']])) {
6565153720fSfkaag71                        extract($typemap[$rhs['text']]);
6575153720fSfkaag71                    } else {
6585153720fSfkaag71                        list($type, $hint) = $this->util->getDefaultType();
6595153720fSfkaag71                    }
6605153720fSfkaag71                }
6615153720fSfkaag71
6625153720fSfkaag71                // check for empty string token
6635153720fSfkaag71                if($lhs == '[[]]') {
6645153720fSfkaag71                    $lhs = '';
6655153720fSfkaag71                }
6665153720fSfkaag71
6675153720fSfkaag71                // special case: the left hand side of the 'in' operator always normalizes with the 'page' type
6685153720fSfkaag71                if($operator == '~>' || $operator == '!~>') {
6695153720fSfkaag71                    $operator = str_replace('~>','^~', $operator);
6705153720fSfkaag71                    $type = 'page';
6715153720fSfkaag71                    unset($hint);
6725153720fSfkaag71                }
6735153720fSfkaag71
6745153720fSfkaag71                // normalize
6755153720fSfkaag71                $type = $this->util->loadType($type);
6765153720fSfkaag71                $lhs = $this->literal($type->normalize($lhs,$hint));
6775153720fSfkaag71
6785153720fSfkaag71                $filters[] = array('type'=>'filter','lhs'=>$lhs, 'operator'=>$operator, 'rhs'=>$rhs, '_line'=>$lineNode);
6795153720fSfkaag71            } else {
6805153720fSfkaag71                // unknown lines are fail
6815153720fSfkaag71                $this->_fail(sprintf($this->getLang('error_query_pattern'),utf8_tohtml(hsc($line))), $lineNode);
6825153720fSfkaag71            }
6835153720fSfkaag71        }
6845153720fSfkaag71
6855153720fSfkaag71        return array($triples, $filters, $scope);
6865153720fSfkaag71    }
6875153720fSfkaag71
6885153720fSfkaag71    function getFields(&$tree, &$typemap) {
6895153720fSfkaag71        $fields = array();
6905153720fSfkaag71
6915153720fSfkaag71        // extract the projection information in 'long syntax' if available
6925153720fSfkaag71        $fieldsGroups = $this->extractGroups($tree, 'fields');
6935153720fSfkaag71
6945153720fSfkaag71        // parse 'long syntax' if we don't have projection information yet
6955153720fSfkaag71        if(count($fieldsGroups)) {
6965153720fSfkaag71            if(count($fieldsGroups) > 1) {
6975153720fSfkaag71                $this->_fail($this->getLang('error_query_fieldsgroups'), $fieldsGroups);
6985153720fSfkaag71            }
6995153720fSfkaag71
7005153720fSfkaag71            $fieldsLines = $this->extractText($fieldsGroups[0]);
7015153720fSfkaag71            if(count($fieldsGroups[0]['cs'])) {
7025153720fSfkaag71                $this->_fail(sprintf($this->getLang('error_query_fieldsblock'),( isset($fieldsGroups[0]['cs'][0]['tag']) ? sprintf($this->getLang('named_group'),hsc($fieldsGroups[0]['cs'][0]['tag'])) : $this->getLang('unnamed_group'))), $fieldsGroups[0]['cs']);
7035153720fSfkaag71            }
7045153720fSfkaag71            $fields = $this->parseFieldsLong($fieldsLines, $typemap);
7055153720fSfkaag71            if(!$fields) return array();
7065153720fSfkaag71        }
7075153720fSfkaag71
7085153720fSfkaag71        return $fields;
7095153720fSfkaag71    }
7105153720fSfkaag71
7115153720fSfkaag71    /**
7125153720fSfkaag71     * Parses a projection group in 'long syntax'.
7135153720fSfkaag71     */
7145153720fSfkaag71    function parseFieldsLong($lines, &$typemap) {
7155153720fSfkaag71        $p = $this->getPatterns();
7165153720fSfkaag71        $result = array();
7175153720fSfkaag71
7185153720fSfkaag71        foreach($lines as $lineNode) {
7195153720fSfkaag71            $line = trim($lineNode['text']);
7205153720fSfkaag71            // FIELDLONG := VARIABLE AGGREGATE? TYPE? (':' ANY)?
7215153720fSfkaag71            if(preg_match("/^({$p->variable})\s*({$p->aggregate})?\s*({$p->type})?(?:\s*(:)\s*({$p->any})?\s*)?$/S",$line, $match)) {
7225153720fSfkaag71                list(, $var, $vaggregate, $vtype, $nocaphint, $caption) = $match;
7235153720fSfkaag71                $variable = $p->variable($var)->name;
7245153720fSfkaag71                if(!$nocaphint || (!$nocaphint && !$caption)) $caption = ucfirst($variable);
7255153720fSfkaag71
7265153720fSfkaag71                list($type,$hint) = $p->type($vtype);
7275153720fSfkaag71                list($agg,$agghint) = $p->aggregate($vaggregate);
7285153720fSfkaag71
7295153720fSfkaag71                $this->updateTypemap($typemap, $variable, $type, $hint);
7305153720fSfkaag71                $result[] = array('variable'=>$variable,'caption'=>$caption, 'aggregate'=>$agg, 'aggregateHint'=>$agghint, 'type'=>$type, 'hint'=>$hint);
7315153720fSfkaag71            } else {
7325153720fSfkaag71                $this->_fail(sprintf($this->getLang('error_query_fieldsline'),utf8_tohtml(hsc($line))), $lineNode);
7335153720fSfkaag71            }
7345153720fSfkaag71        }
7355153720fSfkaag71
7365153720fSfkaag71        return $result;
7375153720fSfkaag71    }
7385153720fSfkaag71
7395153720fSfkaag71    /**
7405153720fSfkaag71     * Parses a projection group in 'short syntax'.
7415153720fSfkaag71     */
7425153720fSfkaag71    function parseFieldsShort($line, &$typemap) {
7435153720fSfkaag71        $p = $this->getPatterns();
7445153720fSfkaag71        $result = array();
7455153720fSfkaag71
7465153720fSfkaag71        // FIELDSHORT := VARIABLE AGGREGATE? TYPE? CAPTION?
7475153720fSfkaag71        if(preg_match_all("/\s*({$p->variable})\s*({$p->aggregate})?\s*({$p->type})?\s*(?:(\")([^\"]*)\")?/",$line,$match, PREG_SET_ORDER)) {
7485153720fSfkaag71            foreach($match as $m) {
749*0847ebd2SFKaag                $var=$m[1]??null;
750*0847ebd2SFKaag                $vaggregate=$m[2]??null;
751*0847ebd2SFKaag                $vtype=$m[3]??null;
752*0847ebd2SFKaag                $caption_indicator=$m[4]??null;
753*0847ebd2SFKaag                $caption=$m[5]??null;
754*0847ebd2SFKaag
7555153720fSfkaag71                $variable = $p->variable($var)->name;
7565153720fSfkaag71                list($type, $hint) = $p->type($vtype);
7575153720fSfkaag71                list($agg, $agghint) = $p->aggregate($vaggregate);
7585153720fSfkaag71                if(!$caption_indicator) $caption = ucfirst($variable);
7595153720fSfkaag71                $this->updateTypemap($typemap, $variable, $type, $hint);
7605153720fSfkaag71                $result[] = array('variable'=>$variable,'caption'=>$caption, 'aggregate'=>$agg, 'aggregateHint'=>$agghint, 'type'=>$type, 'hint'=>$hint);
7615153720fSfkaag71            }
7625153720fSfkaag71        }
7635153720fSfkaag71
7645153720fSfkaag71        return $result;
7655153720fSfkaag71    }
7665153720fSfkaag71
7675153720fSfkaag71    /**
7685153720fSfkaag71     * Returns the regex pattern used by the 'short syntax' for projection. This methods can
7695153720fSfkaag71     * be used to get a dokuwiki-lexer-safe regex to embed into your own syntax pattern.
7705153720fSfkaag71     *
7715153720fSfkaag71     * @param captions boolean Whether the pattern should include caption matching (defaults to true)
7725153720fSfkaag71     */
7735153720fSfkaag71    function fieldsShortPattern($captions = true) {
7745153720fSfkaag71        $p = $this->getPatterns();
7755153720fSfkaag71        return "(?:\s*{$p->variable}\s*{$p->aggregate}?\s*{$p->type}?".($captions?'\s*(?:"[^"]*")?':'').")";
7765153720fSfkaag71    }
7775153720fSfkaag71
7785153720fSfkaag71    /**
7795153720fSfkaag71     * Constructs a tagged tree from the given list of lines.
7805153720fSfkaag71     *
7815153720fSfkaag71     * @return a tagged tree
7825153720fSfkaag71     */
7835153720fSfkaag71    function constructTree($lines, $what) {
7845153720fSfkaag71        $root = array(
7855153720fSfkaag71            'tag'=>'',
7865153720fSfkaag71            'cs'=>array(),
7875153720fSfkaag71            'start'=>1,
7885153720fSfkaag71            'end'=>1
7895153720fSfkaag71        );
7905153720fSfkaag71
7915153720fSfkaag71        $stack = array();
7925153720fSfkaag71        $stack[] =& $root;
7935153720fSfkaag71        $top = count($stack)-1;
7945153720fSfkaag71        $lineCount = 0;
7955153720fSfkaag71
7965153720fSfkaag71        foreach($lines as $line) {
7975153720fSfkaag71            $lineCount++;
7985153720fSfkaag71            if($this->ignorableLine($line)) continue;
7995153720fSfkaag71
8005153720fSfkaag71            if(preg_match('/^([^\{]*) *{$/',utf8_trim($line),$match)) {
8015153720fSfkaag71                list(, $tag) = $match;
8025153720fSfkaag71                $tag = utf8_trim($tag);
8035153720fSfkaag71
8045153720fSfkaag71                $stack[$top]['cs'][] = array(
8055153720fSfkaag71                    'tag'=>$tag?:null,
8065153720fSfkaag71                    'cs'=>array(),
8075153720fSfkaag71                    'start'=>$lineCount,
8085153720fSfkaag71                    'end'=>0
8095153720fSfkaag71                );
8105153720fSfkaag71                $stack[] =& $stack[$top]['cs'][count($stack[$top]['cs'])-1];
8115153720fSfkaag71                $top = count($stack)-1;
8125153720fSfkaag71
8135153720fSfkaag71            } elseif(preg_match('/^}$/',utf8_trim($line))) {
8145153720fSfkaag71                $stack[$top]['end'] = $lineCount;
8155153720fSfkaag71                array_pop($stack);
8165153720fSfkaag71                $top = count($stack)-1;
8175153720fSfkaag71
8185153720fSfkaag71            } else {
8195153720fSfkaag71                $stack[$top]['cs'][] = array(
8205153720fSfkaag71                    'text'=>$line,
8215153720fSfkaag71                    'start'=>$lineCount,
8225153720fSfkaag71                    'end'=>$lineCount
8235153720fSfkaag71                );
8245153720fSfkaag71            }
8255153720fSfkaag71        }
8265153720fSfkaag71
8275153720fSfkaag71        if(count($stack) != 1 || $stack[0] != $root) {
8285153720fSfkaag71            msg(sprintf($this->getLang('error_syntax_braces'),$what),-1);
8295153720fSfkaag71        }
8305153720fSfkaag71
8315153720fSfkaag71        $root['end'] = $lineCount;
8325153720fSfkaag71
8335153720fSfkaag71        return $root;
8345153720fSfkaag71    }
8355153720fSfkaag71
8365153720fSfkaag71    /**
8375153720fSfkaag71     * Renders a debug display of the syntax.
8385153720fSfkaag71     *
8395153720fSfkaag71     * @param lines array the lines that form the syntax
8405153720fSfkaag71     * @param region array the region to highlight
8415153720fSfkaag71     * @return a string with markup
8425153720fSfkaag71     */
8435153720fSfkaag71    function debugTree($lines, $regions) {
8445153720fSfkaag71        $result = '';
8455153720fSfkaag71        $lineCount = 0;
8465153720fSfkaag71        $count = 0;
8475153720fSfkaag71
8485153720fSfkaag71        foreach($lines as $line) {
8495153720fSfkaag71            $lineCount++;
8505153720fSfkaag71
8515153720fSfkaag71            foreach($regions as $region) {
8525153720fSfkaag71                if($lineCount == $region['start']) {
8535153720fSfkaag71                    if($count == 0) $result .= '<div class="strata-debug-highlight">';
8545153720fSfkaag71                    $count++;
8555153720fSfkaag71                }
8565153720fSfkaag71
8575153720fSfkaag71                if($lineCount == $region['end']+1) {
8585153720fSfkaag71                    $count--;
8595153720fSfkaag71
8605153720fSfkaag71                    if($count==0) $result .= '</div>';
8615153720fSfkaag71                }
8625153720fSfkaag71            }
8635153720fSfkaag71
8645153720fSfkaag71            if($line != '') {
8655153720fSfkaag71                $result .= '<div class="strata-debug-line">'.hsc($line).'</div>'."\n";
8665153720fSfkaag71            } else {
8675153720fSfkaag71                $result .= '<div class="strata-debug-line"><br/></div>'."\n";
8685153720fSfkaag71            }
8695153720fSfkaag71        }
8705153720fSfkaag71
8715153720fSfkaag71        if($count > 0) {
8725153720fSfkaag71            $result .= '</div>';
8735153720fSfkaag71        }
8745153720fSfkaag71
8755153720fSfkaag71        return '<div class="strata-debug">'.$result.'</div>';
8765153720fSfkaag71    }
8775153720fSfkaag71
8785153720fSfkaag71    /**
8795153720fSfkaag71     * Extract all occurences of tagged groups from the given tree.
8805153720fSfkaag71     * This method does not remove the tagged groups from subtrees of
8815153720fSfkaag71     * the given root.
8825153720fSfkaag71     *
8835153720fSfkaag71     * @param root array the tree to operate on
8845153720fSfkaag71     * @param tag string the tag to remove
8855153720fSfkaag71     * @return an array of groups
8865153720fSfkaag71     */
8875153720fSfkaag71    function extractGroups(&$root, $tag) {
8885153720fSfkaag71        $result = array();
8895153720fSfkaag71        $to_remove = array();
8905153720fSfkaag71        foreach($root['cs'] as $i=>&$tree) {
8915153720fSfkaag71            if(!$this->isGroup($tree)) continue;
8925153720fSfkaag71            if($tree['tag'] == $tag || (($tag=='' || $tag==null) && $tree['tag'] == null) ) {
8935153720fSfkaag71                $result[] =& $tree;
8945153720fSfkaag71                $to_remove[] = $i;
8955153720fSfkaag71            }
8965153720fSfkaag71        }
8975153720fSfkaag71        // invert order of to_remove to always remove higher indices first
8985153720fSfkaag71        rsort($to_remove);
8995153720fSfkaag71        foreach($to_remove as $i) {
9005153720fSfkaag71            array_splice($root['cs'],$i,1);
9015153720fSfkaag71        }
9025153720fSfkaag71        return $result;
9035153720fSfkaag71    }
9045153720fSfkaag71
9055153720fSfkaag71    /**
9065153720fSfkaag71     * Extracts all text elements from the given tree.
9075153720fSfkaag71     * This method does not remove the text elements from subtrees
9085153720fSfkaag71     * of the root.
9095153720fSfkaag71     *
9105153720fSfkaag71     * @param root array the tree to operate on
9115153720fSfkaag71     * @return array an array of text elements
9125153720fSfkaag71     */
9135153720fSfkaag71    function extractText(&$root) {
9145153720fSfkaag71        $result = array();
9155153720fSfkaag71        $to_remove = array();
9165153720fSfkaag71        foreach($root['cs'] as $i=>&$tree) {
9175153720fSfkaag71            if(!$this->isText($tree)) continue;
9185153720fSfkaag71            $result[] =& $tree;
9195153720fSfkaag71            $to_remove[] = $i;
9205153720fSfkaag71        }
9215153720fSfkaag71        // invert order of to_remove to always remove higher indices first
9225153720fSfkaag71        rsort($to_remove);
9235153720fSfkaag71        foreach($to_remove as $i) {
9245153720fSfkaag71            array_splice($root['cs'],$i,1);
9255153720fSfkaag71        }
9265153720fSfkaag71        return $result;
9275153720fSfkaag71    }
9285153720fSfkaag71
9295153720fSfkaag71    /**
9305153720fSfkaag71     * Returns whether the given node is a line.
9315153720fSfkaag71     */
9325153720fSfkaag71    function isText(&$node) {
9335153720fSfkaag71        return array_key_exists('text', $node);
9345153720fSfkaag71    }
9355153720fSfkaag71
9365153720fSfkaag71    /**
9375153720fSfkaag71     * Returns whether the given node is a group.
9385153720fSfkaag71     */
9395153720fSfkaag71    function isGroup(&$node) {
9405153720fSfkaag71        return array_key_exists('tag', $node);
9415153720fSfkaag71    }
9425153720fSfkaag71
9435153720fSfkaag71    /**
9445153720fSfkaag71     * Sets all properties given as '$properties' to the values parsed from '$trees'.
9455153720fSfkaag71     *
9465153720fSfkaag71     * The property array has as keys all possible properties, which are specified by its
9475153720fSfkaag71     * values. Such specification is an array that may have the following keys, with the
9485153720fSfkaag71     * described values:
9495153720fSfkaag71     * - choices: array of possible values, where the keys are the internally used values
9505153720fSfkaag71     *     and the values specify synonyms for the choice, of which the first listed one
9515153720fSfkaag71     *     is most common. For example: 'true' => array('yes', 'yeah') specifies that the
9525153720fSfkaag71     *     user can choose 'yes' or 'yeah' (of which 'yes' is the commonly used value) and
9535153720fSfkaag71     *     that the return value will contain 'true' if this choice was chosen.
9545153720fSfkaag71     * - pattern: regular expression that defines all possible values.
9555153720fSfkaag71     * - pattern_desc: description used for errors when a pattern is specified.
9565153720fSfkaag71     * - minOccur: positive integer specifying the minimum number of values, defaults to 1.
9575153720fSfkaag71     * - maxOccur: integer greater than or equal to minOccur, which specifies the maximum
9585153720fSfkaag71     *     number of values, defaults to minOccur.
9595153720fSfkaag71     * - default: the default value (which must be a value the user is allowed to set).
9605153720fSfkaag71     *     When default is given, this method guarantees that the property is always set,
9615153720fSfkaag71     *     otherwise the property may not be set since all properties are optional.
9625153720fSfkaag71     * Either 'choices' or 'pattern' must be set (not both), all other values are optional.
9635153720fSfkaag71     *
9645153720fSfkaag71     * An example property array is as follows:
9655153720fSfkaag71     * array(
9665153720fSfkaag71     *   'example boolean' => array(
9675153720fSfkaag71     *     'choices' => array('y' => array('yes', 'yeah'), 'n' => array('no', 'nay')),
9685153720fSfkaag71     *     'minOccur' => 1,
9695153720fSfkaag71     *     'maxOccur' => 3,
9705153720fSfkaag71     *     'default' => 'yes'
9715153720fSfkaag71     *   ),
9725153720fSfkaag71     *   'example natural number' => array(
9735153720fSfkaag71     *     'pattern' => '/^[0-9]+$/',
9745153720fSfkaag71     *     'pattern_desc' => $this->getLang('property_Z*')
9755153720fSfkaag71     *   )
9765153720fSfkaag71     * )
9775153720fSfkaag71     *
9785153720fSfkaag71     * @param $properties The properties that can be set.
9795153720fSfkaag71     * @param $trees The trees that contain the values for these properties.
9805153720fSfkaag71     * @return An array with as indices the property names and as value a list of all values given for that property.
9815153720fSfkaag71     */
9825153720fSfkaag71    function setProperties($properties, $trees) {
9835153720fSfkaag71        $propertyValues = array();
9845153720fSfkaag71        $p = $this->getPatterns();
9855153720fSfkaag71
9865153720fSfkaag71        foreach ($trees as $tree) {
9875153720fSfkaag71            $text = $this->extractText($tree);
9885153720fSfkaag71            foreach($text as $lineNode) {
9895153720fSfkaag71                $line = utf8_trim($lineNode['text']);
9905153720fSfkaag71                if (preg_match('/^('.$p->predicate.')(\*)?\s*:\s*('.$p->any.')$/', $line, $match)) {
9915153720fSfkaag71                    list(, $variable, $multi, $value) = $match;
9925153720fSfkaag71                    $this->_setPropertyValue($properties, $tree['tag'], $lineNode, $variable, !empty($multi), $value, $propertyValues);
9935153720fSfkaag71                } else {
9945153720fSfkaag71                    $this->emitError($lineNode, 'error_property_weirdgroupline', hsc($tree['tag']), hsc($line));
9955153720fSfkaag71                }
9965153720fSfkaag71            }
9975153720fSfkaag71            // Warn about unknown groups
9985153720fSfkaag71            foreach ($tree['cs'] as $group) {
9995153720fSfkaag71                $this->emitError($group, 'error_property_unknowngroup', hsc($trees[0]['tag']), hsc($group['tag']));
10005153720fSfkaag71            }
10015153720fSfkaag71        }
10025153720fSfkaag71
10035153720fSfkaag71        // Set property defaults
10045153720fSfkaag71        foreach ($properties as $name => $p) {
10055153720fSfkaag71            if (!isset($propertyValues[$name]) && isset($p['default'])) {
10065153720fSfkaag71                $this->_setPropertyValue($properties, 'default value', null, $name, false, $p['default'], $propertyValues);
10075153720fSfkaag71            }
10085153720fSfkaag71        }
10095153720fSfkaag71
10105153720fSfkaag71        // Show errors, if any
10115153720fSfkaag71        $this->showErrors();
10125153720fSfkaag71
10135153720fSfkaag71        return $propertyValues;
10145153720fSfkaag71    }
10155153720fSfkaag71
10165153720fSfkaag71    function _setPropertyValue($properties, $group, $region, $variable, $isMulti, $value, &$propertyValues) {
10175153720fSfkaag71        if (!isset($properties[$variable])) {
10185153720fSfkaag71            // Unknown property: show error
10195153720fSfkaag71            $property_title_values = $this->getLang('property_title_values');
10205153720fSfkaag71            $propertyList = implode(', ', array_map(function ($n, $p) use ($property_title_values) {
10215153720fSfkaag71                $values = implode(', ', array_map(function ($c) {
10225153720fSfkaag71                    return $c[0];
10235153720fSfkaag71                }, $p['choices']));
10245153720fSfkaag71                $title = sprintf($property_title_values, $values);
10255153720fSfkaag71                return '\'<code title="' . hsc($title) . '">' . hsc($n) . '</code>\'';
10265153720fSfkaag71            }, array_keys($properties), $properties));
10275153720fSfkaag71            $this->emitError($region, 'error_property_unknownproperty', hsc($group), hsc($variable), $propertyList);
10285153720fSfkaag71        } else if (isset($propertyValues[$variable])) {
10295153720fSfkaag71            // Property is specified more than once: show error
10305153720fSfkaag71            $this->emitError($region, 'error_property_multi', hsc($group), hsc($variable));
10315153720fSfkaag71        } else {
10325153720fSfkaag71            $p = $properties[$variable];
10335153720fSfkaag71            $minOccur = isset($p['minOccur']) ? $p['minOccur'] : 1;
10345153720fSfkaag71            $maxOccur = isset($p['maxOccur']) ? $p['maxOccur'] : $minOccur;
10355153720fSfkaag71
10365153720fSfkaag71            if ($isMulti) {
10375153720fSfkaag71                $values = array_map('utf8_trim', explode(',', $value));
10385153720fSfkaag71            } else if ($minOccur == 1 || $minOccur == $maxOccur) {
10395153720fSfkaag71                // Repeat the given value as often as we expect it
10405153720fSfkaag71                $values = array_fill(0, $minOccur, $value);
10415153720fSfkaag71            } else {
10425153720fSfkaag71                // A single value was given, but multiple were expected
10435153720fSfkaag71                $this->emitError($region, 'error_property_notmulti', hsc($group), hsc($variable), $minOccur);
10445153720fSfkaag71                return;
10455153720fSfkaag71            }
10465153720fSfkaag71
10475153720fSfkaag71            if (count($values) < $minOccur || count($values) > $maxOccur) {
10485153720fSfkaag71                // Number of values given differs from expected number
10495153720fSfkaag71                if ($minOccur == $maxOccur) {
10505153720fSfkaag71                    $this->emitError($region, 'error_property_occur', hsc($group), hsc($variable), $minOccur, count($values));
10515153720fSfkaag71                } else {
10525153720fSfkaag71                    $this->emitError($region, 'error_property_occurrange', hsc($group), hsc($variable), $minOccur, $maxOccur, count($values));
10535153720fSfkaag71                }
10545153720fSfkaag71            } else if (isset($p['choices'])) { // Check whether the given property values are valid choices
10555153720fSfkaag71                // Create a mapping from choice to normalized value of the choice
10565153720fSfkaag71                $choices = array();
10575153720fSfkaag71                $choicesInfo = array(); // For nice error messages
10585153720fSfkaag71                foreach ($p['choices'] as $nc => $c) {
10595153720fSfkaag71                    if (is_array($c)) {
10605153720fSfkaag71                        $choices = array_merge($choices, array_fill_keys($c, $nc));
10615153720fSfkaag71                        $title = sprintf($this->getLang('property_title_synonyms'), implode(', ', $c));
10625153720fSfkaag71                        $choicesInfo[] = '\'<code title="' . hsc($title) . '">' . hsc($c[0]) . '</code>\'';
10635153720fSfkaag71                    } else {
10645153720fSfkaag71                        $choices[$c] = $c;
10655153720fSfkaag71                        $choicesInfo[] = '\'<code>' . hsc($c) . '</code>\'';
10665153720fSfkaag71                    }
10675153720fSfkaag71                }
10685153720fSfkaag71                if (!isset($choices['']) && isset($p['default'])) {
10695153720fSfkaag71                    $choices[''] = $choices[$p['default']];
10705153720fSfkaag71                }
10715153720fSfkaag71
10725153720fSfkaag71                $incorrect = array_diff($values, array_keys($choices)); // Find all values that are not a valid choice
10735153720fSfkaag71                if (count($incorrect) > 0) {
10745153720fSfkaag71                    unset($choices['']);
10755153720fSfkaag71                    foreach (array_unique($incorrect) as $v) {
10765153720fSfkaag71                        $this->emitError($region, 'error_property_invalidchoice', hsc($group), hsc($variable), hsc($v), implode(', ', $choicesInfo));
10775153720fSfkaag71                    }
10785153720fSfkaag71                } else {
10795153720fSfkaag71                    $propertyValues[$variable] = array_map(function($v) use ($choices) { return $choices[$v]; }, $values);
10805153720fSfkaag71                }
10815153720fSfkaag71            } else if (isset($p['pattern'])) { // Check whether the given property values match the pattern
10825153720fSfkaag71                $incorrect = array_filter($values, function($v) use ($p) { return !preg_match($p['pattern'], $v); });
10835153720fSfkaag71                if (count($incorrect) > 0) {
10845153720fSfkaag71                    foreach (array_unique($incorrect) as $v) {
10855153720fSfkaag71                        if (isset($p['pattern_desc'])) {
10865153720fSfkaag71                            $this->emitError($region, 'error_property_patterndesc', hsc($group), hsc($variable), hsc($v), $p['pattern_desc']);
10875153720fSfkaag71                        } else {
10885153720fSfkaag71                            $this->emitError($region, 'error_property_pattern', hsc($group), hsc($variable), hsc($v), hsc($p['pattern']));
10895153720fSfkaag71                        }
10905153720fSfkaag71                    }
10915153720fSfkaag71                } else {
10925153720fSfkaag71                    $propertyValues[$variable] = $values;
10935153720fSfkaag71                }
10945153720fSfkaag71            } else { // Property value has no requirements
10955153720fSfkaag71                $propertyValues[$variable] = $values;
10965153720fSfkaag71            }
10975153720fSfkaag71        }
10985153720fSfkaag71    }
10995153720fSfkaag71
11005153720fSfkaag71    /**
11015153720fSfkaag71     * Generates a html error message, ensuring that all utf8 in arguments is escaped correctly.
11025153720fSfkaag71     * The generated messages might be accumulated until showErrors is called.
11035153720fSfkaag71     *
11045153720fSfkaag71     * @param region The region at which the error occurs.
11055153720fSfkaag71     * @param msg_id The id of the message in the language file.
11065153720fSfkaag71     */
11075153720fSfkaag71    function emitError($region, $msg_id) {
11085153720fSfkaag71        $args = func_get_args();
11095153720fSfkaag71        array_shift($args);
11105153720fSfkaag71        array_shift($args);
11115153720fSfkaag71        $args = array_map('strval', $args); // convert everything to strings first
11125153720fSfkaag71        $args = array_map('utf8_tohtml', $args); // Escape args
11135153720fSfkaag71        $msg = vsprintf($this->getLang($msg_id), $args);
11145153720fSfkaag71        msg($msg, -1);
11155153720fSfkaag71        $this->error .= "<br />\n" . $msg;
11165153720fSfkaag71        $this->regions[] = $region;
11175153720fSfkaag71    }
11185153720fSfkaag71
11195153720fSfkaag71    /**
11205153720fSfkaag71     * Ensures that all emitted errors are shown.
11215153720fSfkaag71     */
11225153720fSfkaag71    function showErrors() {
11235153720fSfkaag71        if (!empty($this->error)) {
11245153720fSfkaag71            $error = $this->error;
11255153720fSfkaag71            $regions = $this->regions;
11265153720fSfkaag71            $this->error = '';
11275153720fSfkaag71            $this->regions = array();
11285153720fSfkaag71            throw new strata_exception($error, $regions);
11295153720fSfkaag71        }
11305153720fSfkaag71    }
11315153720fSfkaag71}
11325153720fSfkaag71
11335153720fSfkaag71// call static initiliazer (PHP doesn't offer this feature)
11345153720fSfkaag71helper_plugin_strata_syntax::initialize();
1135