1<?php
2
3namespace dokuwiki\plugin\struct\meta;
4
5use dokuwiki\Debug\DebugHelper;
6use dokuwiki\Parsing\Lexer\Lexer;
7use dokuwiki\plugin\struct\types\AutoSummary;
8use dokuwiki\plugin\struct\types\DateTime;
9use dokuwiki\plugin\struct\types\Decimal;
10use dokuwiki\plugin\struct\types\Page;
11use dokuwiki\plugin\struct\types\User;
12
13class Search
14{
15    /**
16     * This separator will be used to concat multi values to flatten them in the result set
17     */
18    public const CONCAT_SEPARATOR = "\n!_-_-_-_-_!\n";
19
20    /**
21     * The list of known and allowed comparators
22     * (order matters)
23     */
24    public static $COMPARATORS = ['<=', '>=', '=*', '=', '<', '>', '!=', '!~', '~', ' IN '];
25
26    /** @var \helper_plugin_struct_db */
27    protected $dbHelper;
28
29    /** @var  \helper_plugin_sqlite */
30    protected $sqlite;
31
32    /** @var Schema[] list of schemas to query */
33    protected $schemas = [];
34
35    /** @var Column[] list of columns to select */
36    protected $columns = [];
37
38    /** @var array the sorting of the result */
39    protected $sortby = [];
40
41    /** @var array the filters */
42    protected $filter = [];
43
44    /** @var array the filters */
45    protected $dynamicFilter = [];
46
47    /** @var array list of aliases tables can be referenced by */
48    protected $aliases = [];
49
50    /** @var  int begin results from here */
51    protected $range_begin = 0;
52
53    /** @var  int end results here */
54    protected $range_end = 0;
55
56    /**
57     * @var SearchResult
58     */
59    protected $result;
60
61    /** @var bool Include latest = 1 in select query */
62    protected $selectLatest = true;
63
64    /**
65     * Search constructor.
66     */
67    public function __construct()
68    {
69        /** @var  $dbHelper */
70        $this->dbHelper = plugin_load('helper', 'struct_db');
71        $this->sqlite = $this->dbHelper->getDB();
72    }
73
74    public function getDb()
75    {
76        return $this->sqlite;
77    }
78
79    /**
80     * Add a schema to be searched
81     *
82     * Call multiple times for multiple schemas.
83     *
84     * @param string $table
85     * @param string $alias
86     */
87    public function addSchema($table, $alias = '')
88    {
89        $schema = new Schema($table);
90        if (!$schema->getId()) {
91            throw new StructException('schema missing', $table);
92        }
93
94        $this->schemas[$schema->getTable()] = $schema;
95        if ($alias) $this->aliases[$alias] = $schema->getTable();
96    }
97
98    /**
99     * Add a column to be returned by the search
100     *
101     * Call multiple times for multiple columns. Be sure the referenced tables have been
102     * added before
103     *
104     * @param string $colname may contain an alias
105     */
106    public function addColumn($colname)
107    {
108        if ($this->processWildcard($colname)) return; // wildcard?
109        if ($colname[0] == '-') { // remove column from previous wildcard lookup
110            $colname = substr($colname, 1);
111            foreach ($this->columns as $key => $col) {
112                if ($col->getLabel() === $colname) unset($this->columns[$key]);
113            }
114            return;
115        }
116
117        $col = $this->findColumn($colname);
118        if (!$col) return; //FIXME do we really want to ignore missing columns?
119        $this->columns[] = $col;
120    }
121
122    /**
123     * Add sorting options
124     *
125     * Call multiple times for multiple columns. Be sure the referenced tables have been
126     * added before
127     *
128     * @param string $colname may contain an alias
129     * @param bool $asc sort direction (ASC = true, DESC = false)
130     * @param bool $nc set true for caseinsensitivity
131     */
132    public function addSort($colname, $asc = true, $nc = true)
133    {
134        $col = $this->findColumn($colname);
135        if (!$col) return; //FIXME do we really want to ignore missing columns?
136
137        $this->sortby[$col->getFullQualifiedLabel()] = [$col, $asc, $nc];
138    }
139
140    /**
141     * Clear all sorting options
142     *
143     * @return void
144     */
145    public function clearSort()
146    {
147        $this->sortby = [];
148    }
149
150    /**
151     * Returns all set sort columns
152     *
153     * @return array
154     */
155    public function getSorts()
156    {
157        return $this->sortby;
158    }
159
160    /**
161     * Adds a filter
162     *
163     * @param string $colname may contain an alias
164     * @param string|string[] $value
165     * @param string $comp @see self::COMPARATORS
166     * @param string $op either 'OR' or 'AND'
167     */
168    public function addFilter($colname, $value, $comp, $op = 'OR')
169    {
170        $filter = $this->createFilter($colname, $value, $comp, $op);
171        if ($filter) $this->filter[] = $filter;
172    }
173
174    /**
175     * Adds a dynamic filter
176     *
177     * @param string $colname may contain an alias
178     * @param string|string[] $value
179     * @param string $comp @see self::COMPARATORS
180     * @param string $op either 'OR' or 'AND'
181     */
182    public function addDynamicFilter($colname, $value, $comp, $op = 'OR')
183    {
184        $filter = $this->createFilter($colname, $value, $comp, $op);
185        if ($filter) $this->dynamicFilter[] = $filter;
186    }
187
188    /**
189     * Create a filter definition
190     *
191     * @param string $colname may contain an alias
192     * @param string|string[] $value
193     * @param string $comp @see self::COMPARATORS
194     * @param string $op either 'OR' or 'AND'
195     * @return array|null [Column col, string|string[] value, string comp, string op]
196     */
197    protected function createFilter($colname, $value, $comp, $op = 'OR')
198    {
199        /* Convert certain filters into others
200         * this reduces the number of supported filters to implement in types */
201        if ($comp == '*~') {
202            $value = $this->filterWrapAsterisks($value);
203            $comp = '~';
204        } elseif ($comp == '<>') {
205            $comp = '!=';
206        }
207
208        if (!in_array($comp, self::$COMPARATORS))
209            throw new StructException("Bad comperator. Use " . implode(',', self::$COMPARATORS));
210        if ($op != 'OR' && $op != 'AND')
211            throw new StructException('Bad filter type . Only AND or OR allowed');
212
213        $col = $this->findColumn($colname);
214        if (!$col) return null; // ignore missing columns, filter might have been for different schema
215
216        // map filter operators to SQL syntax
217        switch ($comp) {
218            case '~':
219                $comp = 'LIKE';
220                break;
221            case '!~':
222                $comp = 'NOT LIKE';
223                break;
224            case '=*':
225                $comp = 'REGEXP';
226                break;
227        }
228
229        // we use asterisks, but SQL wants percents
230        if ($comp == 'LIKE' || $comp == 'NOT LIKE') {
231            $value = $this->filterChangeToLike($value);
232        }
233
234        if ($comp == ' IN ' && !is_array($value)) {
235            $value = $this->parseFilterValueList($value);
236            //col IN ('a', 'b', 'c') is equal to col = 'a' OR 'col = 'b' OR col = 'c'
237            $comp = '=';
238        }
239
240        // add the filter
241        return [$col, $value, $comp, $op];
242    }
243
244    /**
245     * Parse SQLite row value into array
246     *
247     * @param string $value
248     * @return string[]
249     */
250    protected function parseFilterValueList($value)
251    {
252        $Handler = new FilterValueListHandler();
253        $LexerClass = class_exists('\Doku_Lexer') ? '\Doku_Lexer' : '\dokuwiki\Parsing\Lexer\Lexer';
254        $isLegacy = $LexerClass === '\Doku_Lexer';
255        /** @var \Doku_Lexer|Lexer $Lexer */
256        $Lexer = new $LexerClass($Handler, 'base', true);
257
258
259        $Lexer->addEntryPattern('\(', 'base', 'row');
260        $Lexer->addPattern('\s*,\s*', 'row');
261        $Lexer->addExitPattern('\)', 'row');
262
263        $Lexer->addEntryPattern('"', 'row', 'double_quote_string');
264        $Lexer->addSpecialPattern('\\\\"', 'double_quote_string', 'escapeSequence');
265        $Lexer->addExitPattern('"', 'double_quote_string');
266
267        $Lexer->addEntryPattern("'", 'row', 'singleQuoteString');
268        $Lexer->addSpecialPattern("\\\\'", 'singleQuoteString', 'escapeSequence');
269        $Lexer->addExitPattern("'", 'singleQuoteString');
270
271        $Lexer->mapHandler('double_quote_string', 'singleQuoteString');
272
273        $Lexer->addSpecialPattern('[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?', 'row', 'number');
274
275        $res = $Lexer->parse($value);
276
277        $currentMode = $isLegacy ? $Lexer->_mode->getCurrent() : $Lexer->getModeStack()->getCurrent();
278        if (!$res || $currentMode != 'base') {
279            throw new StructException('invalid row value syntax');
280        }
281
282        return $Handler->getRow();
283    }
284
285    /**
286     * Wrap given value in asterisks
287     *
288     * @param string|string[] $value
289     * @return string|string[]
290     */
291    protected function filterWrapAsterisks($value)
292    {
293        $map = static fn($input) => "*$input*";
294
295        if (is_array($value)) {
296            $value = array_map($map, $value);
297        } else {
298            $value = $map($value);
299        }
300        return $value;
301    }
302
303    /**
304     * Change given string to use % instead of *
305     *
306     * @param string|string[] $value
307     * @return string|string[]
308     */
309    protected function filterChangeToLike($value)
310    {
311        $map = static fn($input) => str_replace('*', '%', $input);
312
313        if (is_array($value)) {
314            $value = array_map($map, $value);
315        } else {
316            $value = $map($value);
317        }
318        return $value;
319    }
320
321    /**
322     * Set offset for the results
323     *
324     * @param int $offset
325     */
326    public function setOffset($offset)
327    {
328        $limit = 0;
329        if ($this->range_end) {
330            // if there was a limit set previously, the range_end needs to be recalculated
331            $limit = $this->range_end - $this->range_begin;
332        }
333        $this->range_begin = $offset;
334        if ($limit) $this->setLimit($limit);
335    }
336
337    /**
338     * Get the current offset for the results
339     *
340     * @return int
341     */
342    public function getOffset()
343    {
344        return $this->range_begin;
345    }
346
347    /**
348     * Limit results to this number
349     *
350     * @param int $limit Set to 0 to disable limit again
351     */
352    public function setLimit($limit)
353    {
354        if ($limit) {
355            $this->range_end = $this->range_begin + $limit;
356        } else {
357            $this->range_end = 0;
358        }
359    }
360
361    /**
362     * Get the current limit for the results
363     *
364     * @return int
365     */
366    public function getLimit()
367    {
368        if ($this->range_end) {
369            return $this->range_end - $this->range_begin;
370        }
371        return 0;
372    }
373
374    /**
375     * Allows disabling default 'latest = 1' clause in select statement
376     *
377     * @param bool $selectLatest
378     */
379    public function setSelectLatest($selectLatest): void
380    {
381        $this->selectLatest = $selectLatest;
382    }
383
384    /**
385     * If the search result object does not exist yet,
386     * the search is run and the result object returned
387     *
388     * @return SearchResult
389     */
390    public function getResult()
391    {
392        if (is_null($this->result)) {
393            $this->run();
394        }
395        return $this->result;
396    }
397
398    /**
399     * Return the number of results (regardless of limit and offset settings)
400     */
401    public function getCount()
402    {
403        return $this->getResult()->getCount();
404    }
405
406    /**
407     * Returns the PID associated with each result row
408     */
409    public function getPids()
410    {
411        return $this->getResult()->getPids();
412    }
413
414    /**
415     * Returns the rid associated with each result row
416     *
417     * @return array
418     */
419    public function getRids()
420    {
421        return $this->getResult()->getRids();
422    }
423
424    /**
425     * Returns the revisions of search results
426     *
427     * @return array
428     */
429    public function getRevs()
430    {
431        return $this->getResult()->getRevs();
432    }
433
434    /**
435     * Returns the actual result rows
436     *
437     * @return Value[][]
438     */
439    public function getRows()
440    {
441        return $this->getResult()->getRows();
442    }
443
444    /**
445     * Execute this search and return the result
446     *
447     * The result is a two dimensional array of Value()s.
448     *
449     * This will always query for the full result (not using offset and limit) and then
450     * return the wanted range, setting the count to the whole result number
451     *
452     * @deprecated Use getRows() instead
453     * @return Value[][]
454     */
455    public function execute()
456    {
457        DebugHelper::dbgDeprecatedFunction(\dokuwiki\plugin\struct\meta\Search::class . '::getRows()');
458        return $this->getRows();
459    }
460
461    /**
462     * Run the actual search and populate the result object
463     *
464     * @return void
465     */
466    protected function run()
467    {
468        [$sql, $opts] = $this->getSQL();
469
470        /** @var \PDOStatement $res */
471        $res = $this->sqlite->query($sql, $opts);
472        if ($res === false) throw new StructException("SQL execution failed for\n\n$sql");
473
474        $pageidAndRevOnly = array_reduce(
475            $this->columns,
476            static fn($pageidAndRevOnly, Column $col) => $pageidAndRevOnly && ($col->getTid() == 0),
477            true
478        );
479
480        $this->result = new SearchResult($res, $this->range_begin, $this->range_end, $this->columns, $pageidAndRevOnly);
481        $res->closeCursor();
482    }
483
484    /**
485     * Transform the set search parameters into a statement
486     *
487     * Calls runSQLBuilder()
488     *
489     * @return array ($sql, $opts) The SQL and parameters to execute
490     */
491    public function getSQL()
492    {
493        if (!$this->columns) throw new StructException('nocolname');
494        return $this->runSQLBuilder()->getSQL();
495    }
496
497    /**
498     * Initialize and execute the SQLBuilder
499     *
500     * Called from getSQL(). Can be overwritten to extend the query using the query builder
501     *
502     * @return SearchSQLBuilder
503     */
504    protected function runSQLBuilder()
505    {
506        $sqlBuilder = new SearchSQLBuilder();
507        $sqlBuilder->setSelectLatest($this->selectLatest);
508        $sqlBuilder->addSchemas($this->schemas);
509        $sqlBuilder->addColumns($this->columns);
510        $sqlBuilder->addFilters($this->filter);
511        $sqlBuilder->addFilters($this->dynamicFilter);
512        $sqlBuilder->addSorts($this->sortby);
513        return $sqlBuilder;
514    }
515
516    /**
517     * Returns all the columns that where added to the search
518     *
519     * @return Column[]
520     */
521    public function getColumns()
522    {
523        return $this->columns;
524    }
525
526    /**
527     * All the schemas currently added
528     *
529     * @return Schema[]
530     */
531    public function getSchemas()
532    {
533        return array_values($this->schemas);
534    }
535
536    /**
537     * Checks if the given column is a * wildcard
538     *
539     * If it's a wildcard all matching columns are added to the column list, otherwise
540     * nothing happens
541     *
542     * @param string $colname
543     * @return bool was wildcard?
544     */
545    protected function processWildcard($colname)
546    {
547        [$colname, $table] = $this->resolveColumn($colname);
548        if ($colname !== '*') return false;
549
550        // no table given? assume the first is meant
551        if ($table === null) {
552            $schema_list = array_keys($this->schemas);
553            $table = $schema_list[0];
554        }
555
556        $schema = $this->schemas[$table] ?? null;
557        if (!$schema instanceof Schema) return false;
558        $this->columns = array_merge($this->columns, $schema->getColumns(false));
559        return true;
560    }
561
562    /**
563     * Split a given column name into table and column
564     *
565     * Handles Aliases. Table might be null if none given.
566     *
567     * @param $colname
568     * @return array (colname, table)
569     */
570    protected function resolveColumn($colname)
571    {
572        if (!$this->schemas) throw new StructException('noschemas');
573
574        // resolve the alias or table name
575        [$table, $colname] = sexplode('.', $colname, 2, '');
576        if (!$colname) {
577            $colname = $table;
578            $table = null;
579        }
580        if ($table && isset($this->aliases[$table])) {
581            $table = $this->aliases[$table];
582        }
583
584        if (!$colname) throw new StructException('nocolname');
585
586        return [$colname, $table];
587    }
588
589    /**
590     * Find a column to be used in the search
591     *
592     * @param string $colname may contain an alias
593     * @return bool|Column
594     */
595    public function findColumn($colname, $strict = false)
596    {
597        if (!$this->schemas) throw new StructException('noschemas');
598        $schema_list = array_keys($this->schemas);
599
600        // add "fake" column for special col
601        if ($colname == '%pageid%') {
602            return new PageColumn(0, new Page(), $schema_list[0]);
603        }
604        if ($colname == '%title%') {
605            return new PageColumn(0, new Page(['usetitles' => true]), $schema_list[0]);
606        }
607        if ($colname == '%lastupdate%') {
608            return new RevisionColumn(0, new DateTime(), $schema_list[0]);
609        }
610        if ($colname == '%lasteditor%') {
611            return new UserColumn(0, new User(), $schema_list[0]);
612        }
613        if ($colname == '%lastsummary%') {
614            return new SummaryColumn(0, new AutoSummary(), $schema_list[0]);
615        }
616        if ($colname == '%rowid%') {
617            return new RowColumn(0, new Decimal(), $schema_list[0]);
618        }
619        if ($colname == '%published%') {
620            return new PublishedColumn(0, new Decimal(), $schema_list[0]);
621        }
622
623        [$colname, $table] = $this->resolveColumn($colname);
624
625        /*
626         * If table name is given search only that, otherwise if no strict behavior
627         * is requested by the caller, try all assigned schemas for matching the
628         * column name.
629         */
630        if ($table !== null && isset($this->schemas[$table])) {
631            $schemas = [$table => $this->schemas[$table]];
632        } elseif ($table === null || !$strict) {
633            $schemas = $this->schemas;
634        } else {
635            return false;
636        }
637
638        // find it
639        $col = false;
640        foreach ($schemas as $schema) {
641            if (empty($schema)) {
642                continue;
643            }
644            $col = $schema->findColumn($colname);
645            if ($col) break;
646        }
647
648        return $col;
649    }
650}
651