1<?php 2/** 3 * DokuWiki Plugin strata (Helper Component) 4 * 5 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 6 * @author Brend Wanders <b.wanders@utwente.nl> 7 */ 8 9if (!defined('DOKU_INC')) die('meh.'); 10 11/** 12 * Helper to construct and handle syntax fragments. 13 */ 14class helper_plugin_strata_syntax_RegexHelper { 15 /** 16 * Regular expression fragment table. This is used for interpolation of 17 * syntax patterns, and should be without captures. Do not assume any 18 * specific delimiter. 19 */ 20 var $regexFragments = array( 21 'variable' => '(?:\?[^\s:\(\)\[\]\{\}\<\>\|\~\!\@\#\$\%\^\&\*\?\="]+)', 22 'predicate' => '(?:[^:\(\)\[\]\{\}\<\>\|\~\!\@\#\$\%\^\&\*\?\="]+)', 23 'reflit' => '(?:\[\[[^]]*\]\])', 24 'type' => '(?:\[\s*[a-z0-9]+\s*(?:::[^\]]*)?\])', 25 'aggregate' => '(?:@\s*[a-z0-9]+(?:\([^\)]*\))?)', 26 'operator' => '(?:!=|>=|<=|>|<|=|!~>|!~|!\^~|!\$~|\^~|\$~|~>|~)', 27 'any' => '(?:.+?)' 28 ); 29 30 /** 31 * Patterns used to extract information from captured fragments. These patterns 32 * are used with '/' as delimiter, and should contain at least one capture group. 33 */ 34 var $regexCaptures = array( 35 'variable' => array('\?(.*)', array('name')), 36 'aggregate' => array('@\s*([a-z0-9]+)(?:\(([^\)]*)\))?', array('aggregate','hint')), 37 'type' => array('\[\s*([a-z0-9]+)\s*(?:::([^\]]*))?\]', array('type', 'hint')), 38 'reflit' => array('\[\[(.*)\]\]',array('reference')) 39 ); 40 41 /** 42 * Grabs the syntax fragment. 43 */ 44 function __get($name) { 45 if(array_key_exists($name, $this->regexFragments)) { 46 return $this->regexFragments[$name]; 47 } else { 48 $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS); 49 trigger_error("Undefined syntax fragment '$name' on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE); 50 } 51 } 52 53 /** 54 * Extracts information from a fragment, based on the type. 55 */ 56 function __call($name, $arguments) { 57 if(array_key_exists($name, $this->regexCaptures)) { 58 list($pattern, $names) = $this->regexCaptures[$name]; 59 $result = preg_match("/^{$pattern}$/", $arguments[0], $match); 60 if($result === 1) { 61 array_shift($match); 62 $shortest = min(count($names), count($match)); 63 return new helper_plugin_strata_syntax_RegexHelperCapture(array_combine(array_slice($names,0,$shortest), array_slice($match, 0, $shortest))); 64 } else { 65 return null; 66 } 67 } else { 68 $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS); 69 trigger_error("Undefined syntax capture '$name' on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE); 70 } 71 } 72} 73 74/** 75 * A single capture. Used as a return value for the RegexHelper's 76 * capture methods. 77 */ 78class helper_plugin_strata_syntax_RegexHelperCapture implements ArrayAccess { 79 function __construct($values) { 80 $this->values = $values; 81 } 82 83 function __get($name) { 84 if(array_key_exists($name, $this->values)) { 85 return $this->values[$name]; 86 } else { 87 return null; 88 } 89 } 90 91 function offsetExists($offset) { 92 // the index is valid iff: 93 // it is an existing field name 94 // it is a correct nummeric index (with 0 being the first name and count-1 the last) 95 return isset($this->values[$offset]) || ($offset >= 0 && $offset < count($this->values)); 96 } 97 98 function offsetGet($offset) { 99 // return the correct offset 100 if (isset($this->values[$offset])) { 101 return $this->values[$offset]; 102 } else { 103 // or try the numeric offsets 104 if(is_numeric($offset) && $offset >= 0 && $offset < count($this->values)) { 105 // translate numeric offset to key 106 $keys = array_keys($this->values); 107 return $this->values[$keys[intval($offset)]]; 108 } else { 109 // offset unknown, return without value 110 return; 111 } 112 } 113 } 114 115 function offsetSet($offset, $value) { 116 // noop 117 $trace = debug_backtrace(); 118 trigger_error("Syntax fragment fields are read-only on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE); 119 } 120 121 function offsetUnset($offset) { 122 // noop 123 $trace = debug_backtrace(); 124 trigger_error("Syntax fragment fields are read-only on {$trace[0]['file']}:{$trace[0]['line']}", E_USER_NOTICE); 125 } 126} 127 128/** 129 * Helper plugin for common syntax parsing. 130 */ 131class helper_plugin_strata_syntax extends DokuWiki_Plugin { 132 public static $patterns; 133 134 /** 135 * Static initializer called directly after class declaration. 136 * 137 * This static method exists because we want to keep the static $patterns 138 * and its initialization close together. 139 */ 140 static function initialize() { 141 self::$patterns = new helper_plugin_strata_syntax_RegexHelper(); 142 } 143 144 /** 145 * Constructor. 146 */ 147 function __construct() { 148 $this->util =& plugin_load('helper', 'strata_util'); 149 $this->error = ''; 150 $this->regions = array(); 151 } 152 153 /** 154 * Returns an object describing the pattern fragments. 155 */ 156 function getPatterns() { 157 return self::$patterns; 158 } 159 160 /** 161 * Determines whether a line can be ignored. 162 */ 163 function ignorableLine($line) { 164 $line = utf8_trim($line); 165 return $line == '' || utf8_substr($line,0,2) == '--'; 166 } 167 168 /** 169 * Updates the given typemap with new information. 170 * 171 * @param typemap array a typemap 172 * @param var string the name of the variable 173 * @param type string the type of the variable 174 * @param hint string the type hint of the variable 175 */ 176 function updateTypemap(&$typemap, $var, $type, $hint=null) { 177 if(empty($typemap[$var]) && $type) { 178 $typemap[$var] = array('type'=>$type,'hint'=>$hint); 179 return true; 180 } 181 182 return false; 183 } 184 185 /** 186 * Constructs a literal with the given text. 187 */ 188 function literal($val) { 189 return array('type'=>'literal', 'text'=>$val); 190 } 191 192 /** 193 * Constructs a variable with the given name. 194 */ 195 function variable($var) { 196 if($var[0] == '?') $var = substr($var,1); 197 return array('type'=>'variable', 'text'=>$var); 198 } 199 200 function _fail($message, $regions=array()) { 201 msg($message,-1); 202 203 if($this->isGroup($regions) || $this->isText($regions)) { 204 $regions = array($regions); 205 } 206 207 $lines = array(); 208 foreach($regions as $r) $lines[] = array('start'=>$r['start'], 'end'=>$r['end']); 209 throw new strata_exception($message, $lines); 210 } 211 212 /** 213 * Constructs a query from the give tree. 214 * 215 * @param root array the tree to transform 216 * @param typemap array the type information collected so far 217 * @param projection array the variables to project 218 * @return a query structure 219 */ 220 function constructQuery(&$root, &$typemap, $projection) { 221 $p = $this->getPatterns(); 222 223 $result = array( 224 'type'=>'select', 225 'group'=>array(), 226 'projection'=>$projection, 227 'ordering'=>array(), 228 'grouping'=>false, 229 'considering'=>array() 230 ); 231 232 // extract sort groups 233 $ordering = $this->extractGroups($root, 'sort'); 234 235 // extract grouping groups 236 $grouping = $this->extractGroups($root, 'group'); 237 238 // extract additional projection groups 239 $considering = $this->extractGroups($root, 'consider'); 240 241 // transform actual group 242 $where = $this->extractGroups($root, 'where'); 243 $tree = null; 244 if(count($where)==0) { 245 $tree =& $root; 246 } elseif(count($where)==1) { 247 $tree =& $where[0]; 248 if(count($root['cs'])) { 249 $this->_fail($this->getLang('error_query_outofwhere'), $root['cs']); 250 } 251 } else { 252 $this->_fail($this->getLang('error_query_singlewhere'), $where); 253 } 254 255 list($group, $scope) = $this->transformGroup($tree, $typemap); 256 $result['group'] = $group; 257 if(!$group) return false; 258 259 // handle sort groups 260 if(count($ordering)) { 261 if(count($ordering) > 1) { 262 $this->_fail($this->getLang('error_query_multisort'), $ordering); 263 } 264 265 // handle each line in the group 266 foreach($ordering[0]['cs'] as $line) { 267 if($this->isGroup($line)) { 268 $this->_fail($this->getLang('error_query_sortblock'), $line); 269 } 270 271 if(preg_match("/^({$p->variable})\s*(?:\((asc|desc)(?:ending)?\))?$/S",utf8_trim($line['text']),$match)) { 272 $var = $p->variable($match[1]); 273 if(!in_array($var->name, $scope)) { 274 $this->_fail(sprintf($this->getLang('error_query_sortvar'),utf8_tohtml(hsc($var->name))), $line); 275 } 276 277 $result['ordering'][] = array('variable'=>$var->name, 'direction'=>($match[2]?:'asc')); 278 } else { 279 $this->_fail(sprintf($this->getLang('error_query_sortline'), utf8_tohtml(hsc($line['text']))), $line); 280 } 281 } 282 } 283 284 //handle grouping 285 if(count($grouping)) { 286 if(count($grouping) > 1) { 287 $this->_fail($this->getLang('error_query_multigrouping'), $grouping); 288 } 289 290 // we have a group, so we want grouping 291 $result['grouping'] = array(); 292 293 foreach($grouping[0]['cs'] as $line) { 294 if($this->isGroup($line)) { 295 $this->_fail($this->getLang('error_query_groupblock'), $line); 296 } 297 298 if(preg_match("/({$p->variable})$/",utf8_trim($line['text']),$match)) { 299 $var = $p->variable($match[1]); 300 if(!in_array($var->name, $scope)) { 301 $this->_fail(sprintf($this->getLang('error_query_groupvar'),utf8_tohtml(hsc($var->name))), $line); 302 } 303 304 $result['grouping'][] = $var->name; 305 } else { 306 $this->_fail(sprintf($this->getLang('error_query_groupline'), utf8_tohtml(hsc($line['text']))), $line); 307 } 308 } 309 } 310 311 //handle considering 312 if(count($considering)) { 313 if(count($considering) > 1) { 314 $this->_fail($this->getLang('error_query_multiconsidering'), $considering); 315 } 316 317 foreach($considering[0]['cs'] as $line) { 318 if($this->isGroup($line)) { 319 $this->_fail($this->getLang('error_query_considerblock'), $line); 320 } 321 322 if(preg_match("/^({$p->variable})$/",utf8_trim($line['text']),$match)) { 323 $var = $p->variable($match[1]); 324 if(!in_array($var->name, $scope)) { 325 $this->_fail(sprintf($this->getLang('error_query_considervar'),utf8_tohtml(hsc($var->name))), $line); 326 } 327 328 $result['considering'][] = $var->name; 329 } else { 330 $this->_fail(sprintf($this->getLang('error_query_considerline'), utf8_tohtml(hsc($line['text']))), $line); 331 } 332 } 333 } 334 335 foreach($projection as $var) { 336 if(!in_array($var, $scope)) { 337 $this->_fail(sprintf($this->getLang('error_query_selectvar'), utf8_tohtml(hsc($var)))); 338 } 339 } 340 341 // return final query structure 342 return array($result, $scope); 343 } 344 345 /** 346 * Transforms a full query group. 347 * 348 * @param root array the tree to transform 349 * @param typemap array the type information 350 * @return the transformed group and a list of in-scope variables 351 */ 352 function transformGroup(&$root, &$typemap) { 353 // extract patterns and split them in triples and filters 354 $patterns = $this->extractText($root); 355 356 // extract union groups 357 $unions = $this->extractGroups($root, 'union'); 358 359 // extract minus groups 360 $minuses = $this->extractGroups($root,'minus'); 361 362 // extract optional groups 363 $optionals = $this->extractGroups($root,'optional'); 364 365 // check for leftovers 366 if(count($root['cs'])) { 367 $this->_fail(sprintf($this->getLang('error_query_group'),( isset($root['cs'][0]['tag']) ? sprintf($this->getLang('named_group'), utf8_tohtml(hsc($root['cs'][0]['tag']))) : $this->getLang('unnamed_group'))), $root['cs']); 368 } 369 370 // split patterns into triples and filters 371 list($patterns, $filters, $scope) = $this->transformPatterns($patterns, $typemap); 372 373 // convert each union into a pattern 374 foreach($unions as $union) { 375 list($u, $s) = $this->transformUnion($union, $typemap); 376 $scope = array_merge($scope, $s); 377 $patterns[] = $u; 378 } 379 380 if(count($patterns) == 0) { 381 $this->_fail(sprintf($this->getLang('error_query_grouppattern')), $root); 382 } 383 384 // chain all patterns with ANDs 385 $result = array_shift($patterns); 386 foreach($patterns as $pattern) { 387 $result = array( 388 'type'=>'and', 389 'lhs'=>$result, 390 'rhs'=>$pattern 391 ); 392 } 393 394 // apply all optionals 395 if(count($optionals)) { 396 foreach($optionals as $optional) { 397 // convert eacfh optional 398 list($optional, $s) = $this->transformGroup($optional, $typemap); 399 $scope = array_merge($scope, $s); 400 $result = array( 401 'type'=>'optional', 402 'lhs'=>$result, 403 'rhs'=>$optional 404 ); 405 } 406 } 407 408 409 // add all filters; these are a bit weird, as only a single FILTER is really supported 410 // (we have defined multiple filters as being a conjunction) 411 if(count($filters)) { 412 foreach($filters as $f) { 413 $line = $f['_line']; 414 unset($f['_line']); 415 if($f['lhs']['type'] == 'variable' && !in_array($f['lhs']['text'], $scope)) { 416 $this->_fail(sprintf($this->getLang('error_query_filterscope'),utf8_tohtml(hsc($f['lhs']['text']))), $line); 417 } 418 if($f['rhs']['type'] == 'variable' && !in_array($f['rhs']['text'], $scope)) { 419 $this->_fail(sprintf($this->getLang('error_query_filterscope'),utf8_tohtml(hsc($f['rhs']['text']))), $line); 420 } 421 } 422 423 $result = array( 424 'type'=>'filter', 425 'lhs'=>$result, 426 'rhs'=>$filters 427 ); 428 } 429 430 // apply all minuses 431 if(count($minuses)) { 432 foreach($minuses as $minus) { 433 // convert each minus, and discard their scope 434 list($minus, $s) = $this->transformGroup($minus, $typemap); 435 $result = array( 436 'type'=>'minus', 437 'lhs'=>$result, 438 'rhs'=>$minus 439 ); 440 } 441 } 442 443 return array($result, $scope); 444 } 445 446 /** 447 * Transforms a union group with multiple subgroups 448 * 449 * @param root array the union group to transform 450 * @param typemap array the type information 451 * @return the transformed group and a list of in-scope variables 452 */ 453 function transformUnion(&$root, &$typemap) { 454 // fetch all child patterns 455 $subs = $this->extractGroups($root,null); 456 457 // do sanity checks 458 if(count($root['cs'])) { 459 $this->_fail($this->getLang('error_query_unionblocks'), $root['cs']); 460 } 461 462 if(count($subs) < 2) { 463 $this->_fail($this->getLang('error_query_unionreq'), $root); 464 } 465 466 // transform the first group 467 list($result,$scope) = $this->transformGroup(array_shift($subs), $typemap); 468 469 // transform each subsequent group 470 foreach($subs as $sub) { 471 list($rhs, $s) = $this->transformGroup($sub, $typemap); 472 $scope = array_merge($scope, $s); 473 $result = array( 474 'type'=>'union', 475 'lhs'=>$result, 476 'rhs'=>$rhs 477 ); 478 } 479 480 return array($result, $scope); 481 } 482 483 /** 484 * Transforms a list of patterns into a list of triples and a 485 * list of filters. 486 * 487 * @param lines array a list of lines to transform 488 * @param typemap array the type information 489 * @return a list of triples, a list of filters and a list of in-scope variables 490 */ 491 function transformPatterns(&$lines, &$typemap) { 492 // we need this to resolve things 493 global $ID; 494 495 // we need patterns 496 $p = $this->getPatterns(); 497 498 // result holders 499 $scope = array(); 500 $triples = array(); 501 $filters = array(); 502 503 foreach($lines as $lineNode) { 504 $line = trim($lineNode['text']); 505 506 // [grammar] TRIPLEPATTERN := (VARIABLE|REFLIT) ' ' (VARIABLE|PREDICATE) TYPE? : ANY 507 if(preg_match("/^({$p->variable}|{$p->reflit})\s+({$p->variable}|{$p->predicate})\s*({$p->type})?\s*:\s*({$p->any})$/S",$line,$match)) { 508 list(, $subject, $predicate, $type, $object) = $match; 509 510 $subject = utf8_trim($subject); 511 if($subject[0] == '?') { 512 $subject = $this->variable($subject); 513 $scope[] = $subject['text']; 514 $this->updateTypemap($typemap, $subject['text'], 'ref'); 515 } else { 516 global $ID; 517 $subject = $p->reflit($subject)->reference; 518 $subject = $this->util->loadType('ref')->normalize($subject,null); 519 $subject = $this->literal($subject); 520 } 521 522 $predicate = utf8_trim($predicate); 523 if($predicate[0] == '?') { 524 $predicate = $this->variable($predicate); 525 $scope[] = $predicate['text']; 526 $this->updateTypemap($typemap, $predicate['text'], 'text'); 527 } else { 528 $predicate = $this->literal($this->util->normalizePredicate($predicate)); 529 } 530 531 $object = utf8_trim($object); 532 if($object[0] == '?') { 533 // match a proper type variable 534 if(preg_match("/^({$p->variable})\s*({$p->type})?$/",$object,$captures)!=1) { 535 $this->_fail($this->getLang('error_pattern_garbage'),$lineNode); 536 } 537 $var=$captures[1]??null; 538 $vtype=$captures[2]??null; 539 540 // create the object node 541 $object = $this->variable($var); 542 $scope[] = $object['text']; 543 544 // try direct type first, implied type second 545 $vtype = $p->type($vtype); 546 $type = $p->type($type); 547 if (isset ($type)) 548 { 549 $this->updateTypemap($typemap, $object['text'], $vtype->type, $vtype->hint); 550 $this->updateTypemap($typemap, $object['text'], $type->type, $type->hint); 551 } 552 } else { 553 // check for empty string token 554 if($object == '[[]]') { 555 $object=''; 556 } 557 if(!$type) { 558 list($type, $hint) = $this->util->getDefaultType(); 559 } else { 560 $type = $p->type($type); 561 $hint = $type->hint; 562 $type = $type->type; 563 } 564 $type = $this->util->loadType($type); 565 $object = $this->literal($type->normalize($object,$hint)); 566 } 567 568 $triples[] = array('type'=>'triple','subject'=>$subject, 'predicate'=>$predicate, 'object'=>$object); 569 570 // [grammar] FILTER := VARIABLE TYPE? OPERATOR VARIABLE TYPE? 571 } elseif(preg_match("/^({$p->variable})\s*({$p->type})?\s*({$p->operator})\s*({$p->variable})\s*({$p->type})?$/S",$line, $match)) { 572 list(,$lhs, $ltype, $operator, $rhs, $rtype) = $match; 573 574 $lhs = $this->variable($lhs); 575 $rhs = $this->variable($rhs); 576 577 if($operator == '~>' || $operator == '!~>') $operator = str_replace('~>','^~',$operator); 578 579 // do type information propagation 580 $rtype = $p->type($rtype); 581 $ltype = $p->type($ltype); 582 583 if($ltype) { 584 // left has a defined type, so update the map 585 $this->updateTypemap($typemap, $lhs['text'], $ltype->type, $ltype->hint); 586 587 // and propagate to right if possible 588 if(!$rtype) { 589 $this->updateTypemap($typemap, $rhs['text'], $ltype->type, $lhint->hint); 590 } 591 } 592 if($rtype) { 593 // right has a defined type, so update the map 594 $this->updateTypemap($typemap, $rhs['text'], $rtype->type, $rtype->hint); 595 596 // and propagate to left if possible 597 if(!$ltype) { 598 $this->updateTypemap($typemap, $lhs['text'], $rtype->type, $rtype->hint); 599 } 600 } 601 602 $filters[] = array('type'=>'filter', 'lhs'=>$lhs, 'operator'=>$operator, 'rhs'=>$rhs, '_line'=>$lineNode); 603 604 // [grammar] FILTER := VARIABLE TYPE? OPERATOR ANY 605 } elseif(preg_match("/^({$p->variable})\s*({$p->type})?\s*({$p->operator})\s*({$p->any})$/S",$line, $match)) { 606 607 // filter pattern 608 list(, $lhs,$ltype,$operator,$rhs) = $match; 609 610 $lhs = $this->variable($lhs); 611 612 // update typemap if a type was defined 613 list($type,$hint) = $p->type($ltype); 614 if($type) { 615 $this->updateTypemap($typemap, $lhs['text'],$type,$hint); 616 } else { 617 // use the already declared type if no type was defined 618 if(!empty($typemap[$lhs['text']])) { 619 extract($typemap[$lhs['text']]); 620 } else { 621 list($type, $hint) = $this->util->getDefaultType(); 622 } 623 } 624 625 // check for empty string token 626 if($rhs == '[[]]') { 627 $rhs = ''; 628 } 629 630 // special case: the right hand side of the 'in' operator always normalizes with the 'text' type 631 if($operator == '~>' || $operator == '!~>') { 632 $operator = str_replace('~>','^~', $operator); 633 $type = 'text'; 634 unset($hint); 635 } 636 637 // normalize 638 $type = $this->util->loadType($type); 639 $rhs = $this->literal($type->normalize($rhs,$hint)); 640 641 $filters[] = array('type'=>'filter','lhs'=>$lhs, 'operator'=>$operator, 'rhs'=>$rhs, '_line'=>$lineNode); 642 643 // [grammar] FILTER := ANY OPERATOR VARIABLE TYPE? 644 } elseif(preg_match("/^({$p->any})\s*({$p->operator})\s*({$p->variable})\s*({$p->type})?$/S",$line, $match)) { 645 list(, $lhs,$operator,$rhs,$rtype) = $match; 646 647 $rhs = $this->variable($rhs); 648 649 // update typemap if a type was defined 650 list($type, $hint) = $p->type($rtype); 651 if($type) { 652 $this->updateTypemap($typemap, $rhs['text'],$type,$hint); 653 } else { 654 // use the already declared type if no type was defined 655 if(!empty($typemap[$rhs['text']])) { 656 extract($typemap[$rhs['text']]); 657 } else { 658 list($type, $hint) = $this->util->getDefaultType(); 659 } 660 } 661 662 // check for empty string token 663 if($lhs == '[[]]') { 664 $lhs = ''; 665 } 666 667 // special case: the left hand side of the 'in' operator always normalizes with the 'page' type 668 if($operator == '~>' || $operator == '!~>') { 669 $operator = str_replace('~>','^~', $operator); 670 $type = 'page'; 671 unset($hint); 672 } 673 674 // normalize 675 $type = $this->util->loadType($type); 676 $lhs = $this->literal($type->normalize($lhs,$hint)); 677 678 $filters[] = array('type'=>'filter','lhs'=>$lhs, 'operator'=>$operator, 'rhs'=>$rhs, '_line'=>$lineNode); 679 } else { 680 // unknown lines are fail 681 $this->_fail(sprintf($this->getLang('error_query_pattern'),utf8_tohtml(hsc($line))), $lineNode); 682 } 683 } 684 685 return array($triples, $filters, $scope); 686 } 687 688 function getFields(&$tree, &$typemap) { 689 $fields = array(); 690 691 // extract the projection information in 'long syntax' if available 692 $fieldsGroups = $this->extractGroups($tree, 'fields'); 693 694 // parse 'long syntax' if we don't have projection information yet 695 if(count($fieldsGroups)) { 696 if(count($fieldsGroups) > 1) { 697 $this->_fail($this->getLang('error_query_fieldsgroups'), $fieldsGroups); 698 } 699 700 $fieldsLines = $this->extractText($fieldsGroups[0]); 701 if(count($fieldsGroups[0]['cs'])) { 702 $this->_fail(sprintf($this->getLang('error_query_fieldsblock'),( isset($fieldsGroups[0]['cs'][0]['tag']) ? sprintf($this->getLang('named_group'),hsc($fieldsGroups[0]['cs'][0]['tag'])) : $this->getLang('unnamed_group'))), $fieldsGroups[0]['cs']); 703 } 704 $fields = $this->parseFieldsLong($fieldsLines, $typemap); 705 if(!$fields) return array(); 706 } 707 708 return $fields; 709 } 710 711 /** 712 * Parses a projection group in 'long syntax'. 713 */ 714 function parseFieldsLong($lines, &$typemap) { 715 $p = $this->getPatterns(); 716 $result = array(); 717 718 foreach($lines as $lineNode) { 719 $line = trim($lineNode['text']); 720 // FIELDLONG := VARIABLE AGGREGATE? TYPE? (':' ANY)? 721 if(preg_match("/^({$p->variable})\s*({$p->aggregate})?\s*({$p->type})?(?:\s*(:)\s*({$p->any})?\s*)?$/S",$line, $match)) { 722 list(, $var, $vaggregate, $vtype, $nocaphint, $caption) = $match; 723 $variable = $p->variable($var)->name; 724 if(!$nocaphint || (!$nocaphint && !$caption)) $caption = ucfirst($variable); 725 726 list($type,$hint) = $p->type($vtype); 727 list($agg,$agghint) = $p->aggregate($vaggregate); 728 729 $this->updateTypemap($typemap, $variable, $type, $hint); 730 $result[] = array('variable'=>$variable,'caption'=>$caption, 'aggregate'=>$agg, 'aggregateHint'=>$agghint, 'type'=>$type, 'hint'=>$hint); 731 } else { 732 $this->_fail(sprintf($this->getLang('error_query_fieldsline'),utf8_tohtml(hsc($line))), $lineNode); 733 } 734 } 735 736 return $result; 737 } 738 739 /** 740 * Parses a projection group in 'short syntax'. 741 */ 742 function parseFieldsShort($line, &$typemap) { 743 $p = $this->getPatterns(); 744 $result = array(); 745 746 // FIELDSHORT := VARIABLE AGGREGATE? TYPE? CAPTION? 747 if(preg_match_all("/\s*({$p->variable})\s*({$p->aggregate})?\s*({$p->type})?\s*(?:(\")([^\"]*)\")?/",$line,$match, PREG_SET_ORDER)) { 748 foreach($match as $m) { 749 $var=$m[1]??null; 750 $vaggregate=$m[2]??null; 751 $vtype=$m[3]??null; 752 $caption_indicator=$m[4]??null; 753 $caption=$m[5]??null; 754 755 $variable = $p->variable($var)->name; 756 list($type, $hint) = $p->type($vtype); 757 list($agg, $agghint) = $p->aggregate($vaggregate); 758 if(!$caption_indicator) $caption = ucfirst($variable); 759 $this->updateTypemap($typemap, $variable, $type, $hint); 760 $result[] = array('variable'=>$variable,'caption'=>$caption, 'aggregate'=>$agg, 'aggregateHint'=>$agghint, 'type'=>$type, 'hint'=>$hint); 761 } 762 } 763 764 return $result; 765 } 766 767 /** 768 * Returns the regex pattern used by the 'short syntax' for projection. This methods can 769 * be used to get a dokuwiki-lexer-safe regex to embed into your own syntax pattern. 770 * 771 * @param captions boolean Whether the pattern should include caption matching (defaults to true) 772 */ 773 function fieldsShortPattern($captions = true) { 774 $p = $this->getPatterns(); 775 return "(?:\s*{$p->variable}\s*{$p->aggregate}?\s*{$p->type}?".($captions?'\s*(?:"[^"]*")?':'').")"; 776 } 777 778 /** 779 * Constructs a tagged tree from the given list of lines. 780 * 781 * @return a tagged tree 782 */ 783 function constructTree($lines, $what) { 784 $root = array( 785 'tag'=>'', 786 'cs'=>array(), 787 'start'=>1, 788 'end'=>1 789 ); 790 791 $stack = array(); 792 $stack[] =& $root; 793 $top = count($stack)-1; 794 $lineCount = 0; 795 796 foreach($lines as $line) { 797 $lineCount++; 798 if($this->ignorableLine($line)) continue; 799 800 if(preg_match('/^([^\{]*) *{$/',utf8_trim($line),$match)) { 801 list(, $tag) = $match; 802 $tag = utf8_trim($tag); 803 804 $stack[$top]['cs'][] = array( 805 'tag'=>$tag?:null, 806 'cs'=>array(), 807 'start'=>$lineCount, 808 'end'=>0 809 ); 810 $stack[] =& $stack[$top]['cs'][count($stack[$top]['cs'])-1]; 811 $top = count($stack)-1; 812 813 } elseif(preg_match('/^}$/',utf8_trim($line))) { 814 $stack[$top]['end'] = $lineCount; 815 array_pop($stack); 816 $top = count($stack)-1; 817 818 } else { 819 $stack[$top]['cs'][] = array( 820 'text'=>$line, 821 'start'=>$lineCount, 822 'end'=>$lineCount 823 ); 824 } 825 } 826 827 if(count($stack) != 1 || $stack[0] != $root) { 828 msg(sprintf($this->getLang('error_syntax_braces'),$what),-1); 829 } 830 831 $root['end'] = $lineCount; 832 833 return $root; 834 } 835 836 /** 837 * Renders a debug display of the syntax. 838 * 839 * @param lines array the lines that form the syntax 840 * @param region array the region to highlight 841 * @return a string with markup 842 */ 843 function debugTree($lines, $regions) { 844 $result = ''; 845 $lineCount = 0; 846 $count = 0; 847 848 foreach($lines as $line) { 849 $lineCount++; 850 851 foreach($regions as $region) { 852 if($lineCount == $region['start']) { 853 if($count == 0) $result .= '<div class="strata-debug-highlight">'; 854 $count++; 855 } 856 857 if($lineCount == $region['end']+1) { 858 $count--; 859 860 if($count==0) $result .= '</div>'; 861 } 862 } 863 864 if($line != '') { 865 $result .= '<div class="strata-debug-line">'.hsc($line).'</div>'."\n"; 866 } else { 867 $result .= '<div class="strata-debug-line"><br/></div>'."\n"; 868 } 869 } 870 871 if($count > 0) { 872 $result .= '</div>'; 873 } 874 875 return '<div class="strata-debug">'.$result.'</div>'; 876 } 877 878 /** 879 * Extract all occurences of tagged groups from the given tree. 880 * This method does not remove the tagged groups from subtrees of 881 * the given root. 882 * 883 * @param root array the tree to operate on 884 * @param tag string the tag to remove 885 * @return an array of groups 886 */ 887 function extractGroups(&$root, $tag) { 888 $result = array(); 889 $to_remove = array(); 890 foreach($root['cs'] as $i=>&$tree) { 891 if(!$this->isGroup($tree)) continue; 892 if($tree['tag'] == $tag || (($tag=='' || $tag==null) && $tree['tag'] == null) ) { 893 $result[] =& $tree; 894 $to_remove[] = $i; 895 } 896 } 897 // invert order of to_remove to always remove higher indices first 898 rsort($to_remove); 899 foreach($to_remove as $i) { 900 array_splice($root['cs'],$i,1); 901 } 902 return $result; 903 } 904 905 /** 906 * Extracts all text elements from the given tree. 907 * This method does not remove the text elements from subtrees 908 * of the root. 909 * 910 * @param root array the tree to operate on 911 * @return array an array of text elements 912 */ 913 function extractText(&$root) { 914 $result = array(); 915 $to_remove = array(); 916 foreach($root['cs'] as $i=>&$tree) { 917 if(!$this->isText($tree)) continue; 918 $result[] =& $tree; 919 $to_remove[] = $i; 920 } 921 // invert order of to_remove to always remove higher indices first 922 rsort($to_remove); 923 foreach($to_remove as $i) { 924 array_splice($root['cs'],$i,1); 925 } 926 return $result; 927 } 928 929 /** 930 * Returns whether the given node is a line. 931 */ 932 function isText(&$node) { 933 return array_key_exists('text', $node); 934 } 935 936 /** 937 * Returns whether the given node is a group. 938 */ 939 function isGroup(&$node) { 940 return array_key_exists('tag', $node); 941 } 942 943 /** 944 * Sets all properties given as '$properties' to the values parsed from '$trees'. 945 * 946 * The property array has as keys all possible properties, which are specified by its 947 * values. Such specification is an array that may have the following keys, with the 948 * described values: 949 * - choices: array of possible values, where the keys are the internally used values 950 * and the values specify synonyms for the choice, of which the first listed one 951 * is most common. For example: 'true' => array('yes', 'yeah') specifies that the 952 * user can choose 'yes' or 'yeah' (of which 'yes' is the commonly used value) and 953 * that the return value will contain 'true' if this choice was chosen. 954 * - pattern: regular expression that defines all possible values. 955 * - pattern_desc: description used for errors when a pattern is specified. 956 * - minOccur: positive integer specifying the minimum number of values, defaults to 1. 957 * - maxOccur: integer greater than or equal to minOccur, which specifies the maximum 958 * number of values, defaults to minOccur. 959 * - default: the default value (which must be a value the user is allowed to set). 960 * When default is given, this method guarantees that the property is always set, 961 * otherwise the property may not be set since all properties are optional. 962 * Either 'choices' or 'pattern' must be set (not both), all other values are optional. 963 * 964 * An example property array is as follows: 965 * array( 966 * 'example boolean' => array( 967 * 'choices' => array('y' => array('yes', 'yeah'), 'n' => array('no', 'nay')), 968 * 'minOccur' => 1, 969 * 'maxOccur' => 3, 970 * 'default' => 'yes' 971 * ), 972 * 'example natural number' => array( 973 * 'pattern' => '/^[0-9]+$/', 974 * 'pattern_desc' => $this->getLang('property_Z*') 975 * ) 976 * ) 977 * 978 * @param $properties The properties that can be set. 979 * @param $trees The trees that contain the values for these properties. 980 * @return An array with as indices the property names and as value a list of all values given for that property. 981 */ 982 function setProperties($properties, $trees) { 983 $propertyValues = array(); 984 $p = $this->getPatterns(); 985 986 foreach ($trees as $tree) { 987 $text = $this->extractText($tree); 988 foreach($text as $lineNode) { 989 $line = utf8_trim($lineNode['text']); 990 if (preg_match('/^('.$p->predicate.')(\*)?\s*:\s*('.$p->any.')$/', $line, $match)) { 991 list(, $variable, $multi, $value) = $match; 992 $this->_setPropertyValue($properties, $tree['tag'], $lineNode, $variable, !empty($multi), $value, $propertyValues); 993 } else { 994 $this->emitError($lineNode, 'error_property_weirdgroupline', hsc($tree['tag']), hsc($line)); 995 } 996 } 997 // Warn about unknown groups 998 foreach ($tree['cs'] as $group) { 999 $this->emitError($group, 'error_property_unknowngroup', hsc($trees[0]['tag']), hsc($group['tag'])); 1000 } 1001 } 1002 1003 // Set property defaults 1004 foreach ($properties as $name => $p) { 1005 if (!isset($propertyValues[$name]) && isset($p['default'])) { 1006 $this->_setPropertyValue($properties, 'default value', null, $name, false, $p['default'], $propertyValues); 1007 } 1008 } 1009 1010 // Show errors, if any 1011 $this->showErrors(); 1012 1013 return $propertyValues; 1014 } 1015 1016 function _setPropertyValue($properties, $group, $region, $variable, $isMulti, $value, &$propertyValues) { 1017 if (!isset($properties[$variable])) { 1018 // Unknown property: show error 1019 $property_title_values = $this->getLang('property_title_values'); 1020 $propertyList = implode(', ', array_map(function ($n, $p) use ($property_title_values) { 1021 $values = implode(', ', array_map(function ($c) { 1022 return $c[0]; 1023 }, $p['choices'])); 1024 $title = sprintf($property_title_values, $values); 1025 return '\'<code title="' . hsc($title) . '">' . hsc($n) . '</code>\''; 1026 }, array_keys($properties), $properties)); 1027 $this->emitError($region, 'error_property_unknownproperty', hsc($group), hsc($variable), $propertyList); 1028 } else if (isset($propertyValues[$variable])) { 1029 // Property is specified more than once: show error 1030 $this->emitError($region, 'error_property_multi', hsc($group), hsc($variable)); 1031 } else { 1032 $p = $properties[$variable]; 1033 $minOccur = isset($p['minOccur']) ? $p['minOccur'] : 1; 1034 $maxOccur = isset($p['maxOccur']) ? $p['maxOccur'] : $minOccur; 1035 1036 if ($isMulti) { 1037 $values = array_map('utf8_trim', explode(',', $value)); 1038 } else if ($minOccur == 1 || $minOccur == $maxOccur) { 1039 // Repeat the given value as often as we expect it 1040 $values = array_fill(0, $minOccur, $value); 1041 } else { 1042 // A single value was given, but multiple were expected 1043 $this->emitError($region, 'error_property_notmulti', hsc($group), hsc($variable), $minOccur); 1044 return; 1045 } 1046 1047 if (count($values) < $minOccur || count($values) > $maxOccur) { 1048 // Number of values given differs from expected number 1049 if ($minOccur == $maxOccur) { 1050 $this->emitError($region, 'error_property_occur', hsc($group), hsc($variable), $minOccur, count($values)); 1051 } else { 1052 $this->emitError($region, 'error_property_occurrange', hsc($group), hsc($variable), $minOccur, $maxOccur, count($values)); 1053 } 1054 } else if (isset($p['choices'])) { // Check whether the given property values are valid choices 1055 // Create a mapping from choice to normalized value of the choice 1056 $choices = array(); 1057 $choicesInfo = array(); // For nice error messages 1058 foreach ($p['choices'] as $nc => $c) { 1059 if (is_array($c)) { 1060 $choices = array_merge($choices, array_fill_keys($c, $nc)); 1061 $title = sprintf($this->getLang('property_title_synonyms'), implode(', ', $c)); 1062 $choicesInfo[] = '\'<code title="' . hsc($title) . '">' . hsc($c[0]) . '</code>\''; 1063 } else { 1064 $choices[$c] = $c; 1065 $choicesInfo[] = '\'<code>' . hsc($c) . '</code>\''; 1066 } 1067 } 1068 if (!isset($choices['']) && isset($p['default'])) { 1069 $choices[''] = $choices[$p['default']]; 1070 } 1071 1072 $incorrect = array_diff($values, array_keys($choices)); // Find all values that are not a valid choice 1073 if (count($incorrect) > 0) { 1074 unset($choices['']); 1075 foreach (array_unique($incorrect) as $v) { 1076 $this->emitError($region, 'error_property_invalidchoice', hsc($group), hsc($variable), hsc($v), implode(', ', $choicesInfo)); 1077 } 1078 } else { 1079 $propertyValues[$variable] = array_map(function($v) use ($choices) { return $choices[$v]; }, $values); 1080 } 1081 } else if (isset($p['pattern'])) { // Check whether the given property values match the pattern 1082 $incorrect = array_filter($values, function($v) use ($p) { return !preg_match($p['pattern'], $v); }); 1083 if (count($incorrect) > 0) { 1084 foreach (array_unique($incorrect) as $v) { 1085 if (isset($p['pattern_desc'])) { 1086 $this->emitError($region, 'error_property_patterndesc', hsc($group), hsc($variable), hsc($v), $p['pattern_desc']); 1087 } else { 1088 $this->emitError($region, 'error_property_pattern', hsc($group), hsc($variable), hsc($v), hsc($p['pattern'])); 1089 } 1090 } 1091 } else { 1092 $propertyValues[$variable] = $values; 1093 } 1094 } else { // Property value has no requirements 1095 $propertyValues[$variable] = $values; 1096 } 1097 } 1098 } 1099 1100 /** 1101 * Generates a html error message, ensuring that all utf8 in arguments is escaped correctly. 1102 * The generated messages might be accumulated until showErrors is called. 1103 * 1104 * @param region The region at which the error occurs. 1105 * @param msg_id The id of the message in the language file. 1106 */ 1107 function emitError($region, $msg_id) { 1108 $args = func_get_args(); 1109 array_shift($args); 1110 array_shift($args); 1111 $args = array_map('strval', $args); // convert everything to strings first 1112 $args = array_map('utf8_tohtml', $args); // Escape args 1113 $msg = vsprintf($this->getLang($msg_id), $args); 1114 msg($msg, -1); 1115 $this->error .= "<br />\n" . $msg; 1116 $this->regions[] = $region; 1117 } 1118 1119 /** 1120 * Ensures that all emitted errors are shown. 1121 */ 1122 function showErrors() { 1123 if (!empty($this->error)) { 1124 $error = $this->error; 1125 $regions = $this->regions; 1126 $this->error = ''; 1127 $this->regions = array(); 1128 throw new strata_exception($error, $regions); 1129 } 1130 } 1131} 1132 1133// call static initiliazer (PHP doesn't offer this feature) 1134helper_plugin_strata_syntax::initialize(); 1135