1<?php 2 3namespace dokuwiki\Search; 4 5use dokuwiki\Extension\Event; 6use dokuwiki\Search\Collection\CollectionSearch; 7use dokuwiki\Search\Collection\PageMetaCollection; 8use dokuwiki\Search\Collection\PageTitleCollection; 9use dokuwiki\Search\Exception\IndexUsageException; 10use dokuwiki\Search\Query\QueryParser; 11use dokuwiki\Utf8; 12 13/** 14 * Class DokuWiki Metadata Search 15 * 16 * Provides search operations on metadata indexes using the Collection/Index architecture. 17 * 18 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 19 * @author Andreas Gohr <andi@splitbrain.org> 20 */ 21class MetadataSearch 22{ 23 /** 24 * Quicksearch for pagenames 25 * 26 * By default it only matches the pagename and ignores the namespace. 27 * This can be changed with the second parameter. 28 * The third parameter allows to search in titles as well. 29 * 30 * The function always returns titles as well 31 * 32 * @triggers SEARCH_QUERY_PAGELOOKUP 33 * @param string $id page id 34 * @param bool $in_ns match against namespace as well? 35 * @param bool $in_title search in title? 36 * @param int|string|null $after only show results with mtime after this date, 37 * accepts timestap or strtotime arguments 38 * @param int|string|null $before only show results with mtime before this date, 39 * accepts timestap or strtotime arguments 40 * 41 * @return string[] 42 * @author Andreas Gohr <andi@splitbrain.org> 43 * @author Adrian Lang <lang@cosmocode.de> 44 * 45 */ 46 public function pageLookup( 47 string $id, 48 bool $in_ns = false, 49 bool $in_title = false, 50 int|string|null $after = null, 51 int|string|null $before = null): array 52 { 53 $data = [ 54 'id' => $id, 55 'in_ns' => $in_ns, 56 'in_title' => $in_title, 57 'after' => $after, 58 'before' => $before 59 ]; 60 $data['has_titles'] = true; // for plugin backward compatibility check 61 return Event::createAndTrigger('SEARCH_QUERY_PAGELOOKUP', $data, $this->pageLookupCallBack(...)); 62 } 63 64 /** 65 * Returns list of pages as array(pageid => First Heading) 66 * 67 * @param array $data event data 68 * @return string[] 69 * @throws IndexUsageException 70 */ 71 public function pageLookupCallBack(array &$data): array 72 { 73 $parsedQuery = (new QueryParser)->convert($data['id']); 74 $ns = $parsedQuery['ns'] ? cleanID($parsedQuery['ns'][0]) . ':' : null; 75 $notns = $parsedQuery['notns'] ? cleanID($parsedQuery['notns'][0]) . ':' : null; 76 $query = ($ns || $notns) ? implode(' ', $parsedQuery['highlight']) : $data['id']; 77 $cleaned = cleanID($query); 78 79 if ($cleaned === '') return []; 80 81 // find pages matching by page name 82 $pages = []; 83 foreach ($this->getPages() as $page) { 84 if ($ns && !str_starts_with($page, $ns)) continue; 85 if ($notns && str_starts_with($page, $notns)) continue; 86 87 $match = $data['in_ns'] ? $page : noNSorNS($page); 88 if (str_contains($match, $cleaned)) { 89 $pages[$page] = p_get_first_heading($page, METADATA_DONT_RENDER); 90 } 91 } 92 93 // additionally find pages matching by title 94 if ($data['in_title']) { 95 foreach ($this->lookupKey('title', $query, static fn($search, $title) => stripos($title, $search) !== false) as $page) { 96 if ($ns && !str_starts_with($page, $ns)) continue; 97 if ($notns && str_starts_with($page, $notns)) continue; 98 99 if (!isset($pages[$page])) { 100 $pages[$page] = p_get_first_heading($page, METADATA_DONT_RENDER); 101 } 102 } 103 } 104 105 $pages = static::filterPages($pages, false, $data['after'], $data['before']); 106 uksort($pages, $this->pagesorter(...)); 107 return $pages; 108 } 109 110 /** 111 * Return a list of all indexed pages, optionally limited to those that have a specific metadata key 112 * 113 * When a key is given, only pages that have any value stored for that metadata key are returned. 114 * This does not filter by the metadata value itself. 115 * 116 * @param string|null $key metadata key name, or null for all pages 117 * @return string[] list of page names 118 */ 119 public function getPages(?string $key = null): array 120 { 121 if ($key === null) { 122 return (new Indexer())->getAllPages(); 123 } 124 125 if ($key === 'title') { 126 return (new PageTitleCollection())->getEntitiesWithData(); 127 } 128 129 return (new PageMetaCollection($key))->getEntitiesWithData(); 130 } 131 132 /** 133 * Find pages containing a metadata value 134 * 135 * The metadata values are compared as case-sensitive strings. Pass a 136 * callback function that returns true or false to use a different 137 * comparison function. The function will be called with the $value being 138 * searched for as the first argument, and the word in the index as the 139 * second argument. The function preg_match can be used directly if the 140 * values are regexes. 141 * 142 * When $value is a string, the result is a flat list of matching page names. 143 * When $value is an array, each value is searched independently and the result 144 * is an associative array keyed by the search values, each containing a list 145 * of matching page names. 146 * 147 * Without a callback, values support wildcard matching with * at the start 148 * and/or end (e.g. '*foo', 'bar*', '*baz*'). 149 * 150 * @param string $key name of the metadata key to look for 151 * @param string|string[] $value search term or array of search terms 152 * @param callable|null $func comparison function: fn($searchValue, $indexWord) => bool 153 * @return array flat list of page names (scalar $value) or [value => [pageName, ...]] (array $value) 154 * 155 * @throws IndexUsageException 156 * @author Michael Hamann <michael@content-space.de> 157 * @author Tom N Harris <tnharris@whoopdedo.org> 158 */ 159 public function lookupKey(string $key, string|array &$value, ?callable $func = null): array 160 { 161 $isScalar = !is_array($value); 162 $valueArray = $isScalar ? [$value] : $value; 163 164 if ($key === 'title') { 165 $collection = new PageTitleCollection(); 166 } else { 167 $collection = new PageMetaCollection($key); 168 } 169 170 $result = (new CollectionSearch($collection))->lookup($valueArray, $func); 171 172 return $isScalar ? $result[$value] : $result; 173 } 174 175 /** 176 * Returns the backlinks for a given page 177 * 178 * @param string $id The id for which links shall be returned 179 * @param bool $ignore_perms Ignore the fact that pages are hidden or read-protected 180 * @return string[] The pages that contain links to the given page 181 * 182 * @throws IndexUsageException 183 * @author Andreas Gohr <andi@splitbrain.org> 184 */ 185 public function backlinks(string $id, bool $ignore_perms = false): array 186 { 187 $result = $this->lookupKey('relation_references', $id); 188 if (!count($result)) return $result; 189 190 $result = array_flip($result); 191 $result = static::filterPages($result, $ignore_perms); 192 $result = array_keys($result); 193 194 Utf8\Sort::sort($result); 195 return $result; 196 } 197 198 /** 199 * Returns the pages that use a given media file 200 * 201 * @param string $id The media id to look for 202 * @param bool $ignore_perms Ignore hidden pages and acls (optional, default: false) 203 * @return string[] A list of pages that use the given media file 204 * 205 * @author Andreas Gohr <andi@splitbrain.org> 206 */ 207 public function mediause(string $id, bool $ignore_perms = false): array 208 { 209 $result = $this->lookupKey('relation_media', $id); 210 if (!count($result)) return $result; 211 212 $result = array_flip($result); 213 $result = static::filterPages($result, $ignore_perms); 214 $result = array_keys($result); 215 216 Utf8\Sort::sort($result); 217 return $result; 218 } 219 220 /** 221 * Filter a list of pages by visibility, existence, permissions, and time range 222 * 223 * @param array $pages pages to filter (keys are page IDs) 224 * @param bool $ignorePerms skip visibility and ACL checks 225 * @param int|string|null $after only keep pages modified after this date 226 * @param int|string|null $before only keep pages modified before this date 227 * @return array filtered pages 228 */ 229 public static function filterPages(array $pages, bool $ignorePerms = false, $after = null, $before = null): array 230 { 231 if ($after) $after = is_int($after) ? $after : strtotime($after); 232 if ($before) $before = is_int($before) ? $before : strtotime($before); 233 234 return array_filter($pages, static function ($value, $id) use ($ignorePerms, $after, $before) { 235 if (!$ignorePerms) { 236 if (isHiddenPage($id) || auth_quickaclcheck($id) < AUTH_READ) { 237 return false; 238 } 239 } 240 if (!page_exists($id, '', false)) { 241 return false; 242 } 243 if ($after || $before) { 244 $mTime = filemtime(wikiFN($id)); 245 if ($after && $after > $mTime) return false; 246 if ($before && $before < $mTime) return false; 247 } 248 return true; 249 }, ARRAY_FILTER_USE_BOTH); 250 } 251 252 /** 253 * Sort pages based on their namespace level first, then on their string 254 * values. This makes higher hierarchy pages rank higher than lower hierarchy 255 * pages. 256 * 257 * @param string $a 258 * @param string $b 259 * @return int Returns < 0 if $a is less than $b; > 0 if $a is greater than $b, 260 * and 0 if they are equal. 261 */ 262 protected function pagesorter(string $a, string $b): int 263 { 264 $diff = substr_count($a, ':') - substr_count($b, ':'); 265 return $diff ?: Utf8\Sort::strcmp($a, $b); 266 } 267} 268