xref: /plugin/struct/meta/Search.php (revision ba7f5789bbbcab95d7a655f6ec50a97b731b40d5)
1<?php
2
3namespace dokuwiki\plugin\struct\meta;
4
5use dokuwiki\Debug\DebugHelper;
6use dokuwiki\Parsing\Lexer\Lexer;
7use dokuwiki\plugin\struct\types\AutoSummary;
8use dokuwiki\plugin\struct\types\DateTime;
9use dokuwiki\plugin\struct\types\Decimal;
10use dokuwiki\plugin\struct\types\Page;
11use dokuwiki\plugin\struct\types\User;
12
13class Search
14{
15    /**
16     * This separator will be used to concat multi values to flatten them in the result set
17     */
18    public const CONCAT_SEPARATOR = "\n!_-_-_-_-_!\n";
19
20    /**
21     * The list of known and allowed comparators
22     * (order matters)
23     */
24    public static $COMPARATORS = ['<=', '>=', '=*', '=', '<', '>', '!=', '!~', '~', ' IN '];
25
26    /** @var \helper_plugin_struct_db */
27    protected $dbHelper;
28
29    /** @var  \helper_plugin_sqlite */
30    protected $sqlite;
31
32    /** @var Schema[] list of schemas to query */
33    protected $schemas = [];
34
35    /** @var Column[] list of columns to select */
36    protected $columns = [];
37
38    /** @var array the sorting of the result */
39    protected $sortby = [];
40
41    /** @var array the filters */
42    protected $filter = [];
43
44    /** @var array the filters */
45    protected $dynamicFilter = [];
46
47    /** @var array list of aliases tables can be referenced by */
48    protected $aliases = [];
49
50    /** @var  int begin results from here */
51    protected $range_begin = 0;
52
53    /** @var  int end results here */
54    protected $range_end = 0;
55
56    /**
57     * @var SearchResult
58     */
59    protected $result;
60
61    /** @var bool Include latest = 1 in select query */
62    protected $selectLatest = true;
63
64    /**
65     * Search constructor.
66     */
67    public function __construct()
68    {
69        /** @var  $dbHelper */
70        $this->dbHelper = plugin_load('helper', 'struct_db');
71        $this->sqlite = $this->dbHelper->getDB();
72    }
73
74    public function getDb()
75    {
76        return $this->sqlite;
77    }
78
79    /**
80     * Add a schema to be searched
81     *
82     * Call multiple times for multiple schemas.
83     *
84     * @param string $table
85     * @param string $alias
86     */
87    public function addSchema($table, $alias = '')
88    {
89        $schema = new Schema($table);
90        if (!$schema->getId()) {
91            throw new StructException('schema missing', $table);
92        }
93
94        $this->schemas[$schema->getTable()] = $schema;
95        if ($alias) $this->aliases[$alias] = $schema->getTable();
96    }
97
98    /**
99     * Add a column to be returned by the search
100     *
101     * Call multiple times for multiple columns. Be sure the referenced tables have been
102     * added before
103     *
104     * @param string $colname may contain an alias
105     */
106    public function addColumn($colname)
107    {
108        if ($this->processWildcard($colname)) return; // wildcard?
109        if ($colname[0] == '-') { // remove column from previous wildcard lookup
110            $colname = substr($colname, 1);
111            foreach ($this->columns as $key => $col) {
112                if ($col->getLabel() === $colname) unset($this->columns[$key]);
113            }
114            return;
115        }
116
117        $col = $this->findColumn($colname);
118        if (!$col) return; //FIXME do we really want to ignore missing columns?
119        $this->columns[] = $col;
120    }
121
122    /**
123     * Add sorting options
124     *
125     * Call multiple times for multiple columns. Be sure the referenced tables have been
126     * added before
127     *
128     * @param string $colname may contain an alias
129     * @param bool $asc sort direction (ASC = true, DESC = false)
130     * @param bool $nc set true for caseinsensitivity
131     */
132    public function addSort($colname, $asc = true, $nc = true)
133    {
134        $col = $this->findColumn($colname);
135        if (!$col) return; //FIXME do we really want to ignore missing columns?
136
137        $this->sortby[$col->getFullQualifiedLabel()] = [$col, $asc, $nc];
138    }
139
140    /**
141     * Clear all sorting options
142     *
143     * @return void
144     */
145    public function clearSort()
146    {
147        $this->sortby = [];
148    }
149
150    /**
151     * Returns all set sort columns
152     *
153     * @return array
154     */
155    public function getSorts()
156    {
157        return $this->sortby;
158    }
159
160    /**
161     * Adds a filter
162     *
163     * @param string $colname may contain an alias
164     * @param string|string[] $value
165     * @param string $comp @see self::COMPARATORS
166     * @param string $op either 'OR' or 'AND'
167     */
168    public function addFilter($colname, $value, $comp, $op = 'OR')
169    {
170        $filter = $this->createFilter($colname, $value, $comp, $op);
171        if ($filter) $this->filter[] = $filter;
172    }
173
174    /**
175     * Adds a dynamic filter
176     *
177     * @param string $colname may contain an alias
178     * @param string|string[] $value
179     * @param string $comp @see self::COMPARATORS
180     * @param string $op either 'OR' or 'AND'
181     */
182    public function addDynamicFilter($colname, $value, $comp, $op = 'OR')
183    {
184        $filter = $this->createFilter($colname, $value, $comp, $op);
185        if ($filter) $this->dynamicFilter[] = $filter;
186    }
187
188    /**
189     * Create a filter definition
190     *
191     * @param string $colname may contain an alias
192     * @param string|string[] $value
193     * @param string $comp @see self::COMPARATORS
194     * @param string $op either 'OR' or 'AND'
195     * @return array|null [Column col, string|string[] value, string comp, string op]
196     */
197    protected function createFilter($colname, $value, $comp, $op = 'OR')
198    {
199        /* Convert certain filters into others
200         * this reduces the number of supported filters to implement in types */
201        if ($comp == '*~') {
202            $value = $this->filterWrapAsterisks($value);
203            $comp = '~';
204        } elseif ($comp == '<>') {
205            $comp = '!=';
206        }
207
208        if (!in_array($comp, self::$COMPARATORS))
209            throw new StructException("Bad comperator. Use " . implode(',', self::$COMPARATORS));
210        if ($op != 'OR' && $op != 'AND')
211            throw new StructException('Bad filter type . Only AND or OR allowed');
212
213        $col = $this->findColumn($colname);
214        if (!$col) return null; // ignore missing columns, filter might have been for different schema
215
216        // map filter operators to SQL syntax
217        switch ($comp) {
218            case '~':
219                $comp = 'LIKE';
220                break;
221            case '!~':
222                $comp = 'NOT LIKE';
223                break;
224            case '=*':
225                $comp = 'REGEXP';
226                break;
227        }
228
229        // we use asterisks, but SQL wants percents
230        if ($comp == 'LIKE' || $comp == 'NOT LIKE') {
231            $value = $this->filterChangeToLike($value);
232        }
233
234        if ($comp == ' IN ' && !is_array($value)) {
235            $value = $this->parseFilterValueList($value);
236            //col IN ('a', 'b', 'c') is equal to col = 'a' OR 'col = 'b' OR col = 'c'
237            $comp = '=';
238        }
239
240        // add the filter
241        return [$col, $value, $comp, $op];
242    }
243
244    /**
245     * Parse SQLite row value into array
246     *
247     * @param string $value
248     * @return string[]
249     */
250    protected function parseFilterValueList($value)
251    {
252        $Handler = new FilterValueListHandler();
253        $LexerClass = class_exists('\Doku_Lexer') ? '\Doku_Lexer' : '\dokuwiki\Parsing\Lexer\Lexer';
254        $isLegacy = $LexerClass === '\Doku_Lexer';
255        /** @var \Doku_Lexer|Lexer $Lexer */
256        $Lexer = new $LexerClass($Handler, 'base', true);
257
258
259        $Lexer->addEntryPattern('\(', 'base', 'row');
260        $Lexer->addPattern('\s*,\s*', 'row');
261        $Lexer->addExitPattern('\)', 'row');
262
263        $Lexer->addEntryPattern('"', 'row', 'double_quote_string');
264        $Lexer->addSpecialPattern('\\\\"', 'double_quote_string', 'escapeSequence');
265        $Lexer->addExitPattern('"', 'double_quote_string');
266
267        $Lexer->addEntryPattern("'", 'row', 'singleQuoteString');
268        $Lexer->addSpecialPattern("\\\\'", 'singleQuoteString', 'escapeSequence');
269        $Lexer->addExitPattern("'", 'singleQuoteString');
270
271        $Lexer->mapHandler('double_quote_string', 'singleQuoteString');
272
273        $Lexer->addSpecialPattern('[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?', 'row', 'number');
274
275        $res = $Lexer->parse($value);
276
277        $currentMode = $isLegacy ? $Lexer->_mode->getCurrent() : $Lexer->getModeStack()->getCurrent();
278        if (!$res || $currentMode != 'base') {
279            throw new StructException('invalid row value syntax');
280        }
281
282        return $Handler->getRow();
283    }
284
285    /**
286     * Wrap given value in asterisks
287     *
288     * @param string|string[] $value
289     * @return string|string[]
290     */
291    protected function filterWrapAsterisks($value)
292    {
293        $map = static fn($input) => "*$input*";
294
295        if (is_array($value)) {
296            $value = array_map($map, $value);
297        } else {
298            $value = $map($value);
299        }
300        return $value;
301    }
302
303    /**
304     * Change given string to use % instead of *
305     *
306     * @param string|string[] $value
307     * @return string|string[]
308     */
309    protected function filterChangeToLike($value)
310    {
311        $map = static fn($input) => str_replace('*', '%', $input);
312
313        if (is_array($value)) {
314            $value = array_map($map, $value);
315        } else {
316            $value = $map($value);
317        }
318        return $value;
319    }
320
321    /**
322     * Set offset for the results
323     *
324     * @param int $offset
325     */
326    public function setOffset($offset)
327    {
328        $limit = 0;
329        if ($this->range_end) {
330            // if there was a limit set previously, the range_end needs to be recalculated
331            $limit = $this->range_end - $this->range_begin;
332        }
333        $this->range_begin = $offset;
334        if ($limit) $this->setLimit($limit);
335    }
336
337    /**
338     * Get the current offset for the results
339     *
340     * @return int
341     */
342    public function getOffset()
343    {
344        return $this->range_begin;
345    }
346
347    /**
348     * Limit results to this number
349     *
350     * @param int $limit Set to 0 to disable limit again
351     */
352    public function setLimit($limit)
353    {
354        if ($limit) {
355            $this->range_end = $this->range_begin + $limit;
356        } else {
357            $this->range_end = 0;
358        }
359    }
360
361    /**
362     * Get the current limit for the results
363     *
364     * @return int
365     */
366    public function getLimit()
367    {
368        if ($this->range_end) {
369            return $this->range_end - $this->range_begin;
370        }
371        return 0;
372    }
373
374    /**
375     * Allows disabling default 'latest = 1' clause in select statement
376     *
377     * @param bool $selectLatest
378     */
379    public function setSelectLatest($selectLatest): void
380    {
381        $this->selectLatest = $selectLatest;
382    }
383
384    /**
385     * If the search result object does not exist yet,
386     * the search is run and the result object returned
387     *
388     * @return SearchResult
389     */
390    public function getResult()
391    {
392        if (is_null($this->result)) {
393            $this->result = new SearchResult();
394            $this->run();
395        }
396        return $this->result;
397    }
398
399    /**
400     * Return the number of results (regardless of limit and offset settings)
401     */
402    public function getCount()
403    {
404        return $this->getResult()->getCount();
405    }
406
407    /**
408     * Returns the PID associated with each result row
409     */
410    public function getPids()
411    {
412        return $this->getResult()->getPids();
413    }
414
415    /**
416     * Returns the rid associated with each result row
417     *
418     * @return array
419     */
420    public function getRids()
421    {
422        return $this->getResult()->getRids();
423    }
424
425    /**
426     * Returns the revisions of search results
427     *
428     * @return array
429     */
430    public function getRevs()
431    {
432        return $this->getResult()->getRevs();
433    }
434
435    /**
436     * Returns the actual result rows
437     *
438     * @return Value[][]
439     */
440    public function getRows()
441    {
442        return $this->getResult()->getRows();
443    }
444
445    /**
446     * Execute this search and return the result
447     *
448     * The result is a two dimensional array of Value()s.
449     *
450     * This will always query for the full result (not using offset and limit) and then
451     * return the wanted range, setting the count to the whole result number
452     *
453     * @deprecated Use getRows() instead
454     * @return Value[][]
455     */
456    public function execute()
457    {
458        DebugHelper::dbgDeprecatedFunction('\dokuwiki\plugin\struct\meta\Search::getRows()');
459        return $this->getRows();
460    }
461
462    /**
463     * Run the actual search and populate the result object
464     *
465     * @return void
466     */
467    protected function run()
468    {
469        [$sql, $opts] = $this->getSQL();
470
471        /** @var \PDOStatement $res */
472        $res = $this->sqlite->query($sql, $opts);
473        if ($res === false) throw new StructException("SQL execution failed for\n\n$sql");
474
475        $pageidAndRevOnly = array_reduce(
476            $this->columns,
477            static fn($pageidAndRevOnly, Column $col) => $pageidAndRevOnly && ($col->getTid() == 0),
478            true
479        );
480        while ($row = $res->fetch(\PDO::FETCH_ASSOC)) {
481            $this->result->increaseCount();
482            if ($this->result->getCount() < $this->range_begin) continue;
483            if ($this->range_end && $this->result->getCount() >= $this->range_end) continue;
484
485            $C = 0;
486            $resrow = [];
487            $isempty = true;
488            foreach ($this->columns as $col) {
489                $val = $row["C$C"];
490                if ($col->isMulti()) {
491                    $val = explode(self::CONCAT_SEPARATOR, $val);
492                }
493                $value = new Value($col, $val);
494                $isempty &= $this->isEmptyValue($value);
495                $resrow[] = $value;
496                $C++;
497            }
498
499            // skip empty rows
500            if ($isempty && !$pageidAndRevOnly) {
501                $this->result->decreaseCount();
502                continue;
503            }
504
505            $this->result->addPid($row['PID']);
506            $this->result->addRid($row['rid']);
507            $this->result->addRev($row['rev']);
508            $this->result->addRow($resrow);
509        }
510
511        $res->closeCursor();
512        $this->result->increaseCount();
513    }
514
515    /**
516     * Transform the set search parameters into a statement
517     *
518     * Calls runSQLBuilder()
519     *
520     * @return array ($sql, $opts) The SQL and parameters to execute
521     */
522    public function getSQL()
523    {
524        if (!$this->columns) throw new StructException('nocolname');
525        return $this->runSQLBuilder()->getSQL();
526    }
527
528    /**
529     * Initialize and execute the SQLBuilder
530     *
531     * Called from getSQL(). Can be overwritten to extend the query using the query builder
532     *
533     * @return SearchSQLBuilder
534     */
535    protected function runSQLBuilder()
536    {
537        $sqlBuilder = new SearchSQLBuilder();
538        $sqlBuilder->setSelectLatest($this->selectLatest);
539        $sqlBuilder->addSchemas($this->schemas);
540        $sqlBuilder->addColumns($this->columns);
541        $sqlBuilder->addFilters($this->filter);
542        $sqlBuilder->addFilters($this->dynamicFilter);
543        $sqlBuilder->addSorts($this->sortby);
544        return $sqlBuilder;
545    }
546
547    /**
548     * Returns all the columns that where added to the search
549     *
550     * @return Column[]
551     */
552    public function getColumns()
553    {
554        return $this->columns;
555    }
556
557    /**
558     * All the schemas currently added
559     *
560     * @return Schema[]
561     */
562    public function getSchemas()
563    {
564        return array_values($this->schemas);
565    }
566
567    /**
568     * Checks if the given column is a * wildcard
569     *
570     * If it's a wildcard all matching columns are added to the column list, otherwise
571     * nothing happens
572     *
573     * @param string $colname
574     * @return bool was wildcard?
575     */
576    protected function processWildcard($colname)
577    {
578        [$colname, $table] = $this->resolveColumn($colname);
579        if ($colname !== '*') return false;
580
581        // no table given? assume the first is meant
582        if ($table === null) {
583            $schema_list = array_keys($this->schemas);
584            $table = $schema_list[0];
585        }
586
587        $schema = $this->schemas[$table] ?? null;
588        if (!$schema instanceof Schema) return false;
589        $this->columns = array_merge($this->columns, $schema->getColumns(false));
590        return true;
591    }
592
593    /**
594     * Split a given column name into table and column
595     *
596     * Handles Aliases. Table might be null if none given.
597     *
598     * @param $colname
599     * @return array (colname, table)
600     */
601    protected function resolveColumn($colname)
602    {
603        if (!$this->schemas) throw new StructException('noschemas');
604
605        // resolve the alias or table name
606        [$table, $colname] = sexplode('.', $colname, 2, '');
607        if (!$colname) {
608            $colname = $table;
609            $table = null;
610        }
611        if ($table && isset($this->aliases[$table])) {
612            $table = $this->aliases[$table];
613        }
614
615        if (!$colname) throw new StructException('nocolname');
616
617        return [$colname, $table];
618    }
619
620    /**
621     * Find a column to be used in the search
622     *
623     * @param string $colname may contain an alias
624     * @return bool|Column
625     */
626    public function findColumn($colname, $strict = false)
627    {
628        if (!$this->schemas) throw new StructException('noschemas');
629        $schema_list = array_keys($this->schemas);
630
631        // add "fake" column for special col
632        if ($colname == '%pageid%') {
633            return new PageColumn(0, new Page(), $schema_list[0]);
634        }
635        if ($colname == '%title%') {
636            return new PageColumn(0, new Page(['usetitles' => true]), $schema_list[0]);
637        }
638        if ($colname == '%lastupdate%') {
639            return new RevisionColumn(0, new DateTime(), $schema_list[0]);
640        }
641        if ($colname == '%lasteditor%') {
642            return new UserColumn(0, new User(), $schema_list[0]);
643        }
644        if ($colname == '%lastsummary%') {
645            return new SummaryColumn(0, new AutoSummary(), $schema_list[0]);
646        }
647        if ($colname == '%rowid%') {
648            return new RowColumn(0, new Decimal(), $schema_list[0]);
649        }
650        if ($colname == '%published%') {
651            return new PublishedColumn(0, new Decimal(), $schema_list[0]);
652        }
653
654        [$colname, $table] = $this->resolveColumn($colname);
655
656        /*
657         * If table name is given search only that, otherwise if no strict behavior
658         * is requested by the caller, try all assigned schemas for matching the
659         * column name.
660         */
661        if ($table !== null && isset($this->schemas[$table])) {
662            $schemas = [$table => $this->schemas[$table]];
663        } elseif ($table === null || !$strict) {
664            $schemas = $this->schemas;
665        } else {
666            return false;
667        }
668
669        // find it
670        $col = false;
671        foreach ($schemas as $schema) {
672            if (empty($schema)) {
673                continue;
674            }
675            $col = $schema->findColumn($colname);
676            if ($col) break;
677        }
678
679        return $col;
680    }
681
682    /**
683     * Check if the given row is empty or references our own row
684     *
685     * @param Value $value
686     * @return bool
687     */
688    protected function isEmptyValue(Value $value)
689    {
690        if ($value->isEmpty()) return true;
691        if ($value->getColumn()->getTid() == 0) return true;
692        return false;
693    }
694}
695