1<?php 2 3namespace dokuwiki\Search; 4 5use dokuwiki\Extension\Event; 6use dokuwiki\Search\Collection\CollectionSearch; 7use dokuwiki\Search\Collection\PageMetaCollection; 8use dokuwiki\Search\Collection\PageTitleCollection; 9use dokuwiki\Search\Exception\IndexUsageException; 10use dokuwiki\Search\Query\QueryParser; 11use dokuwiki\Utf8; 12 13/** 14 * Class DokuWiki Metadata Search 15 * 16 * Provides search operations on metadata indexes using the Collection/Index architecture. 17 * 18 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 19 * @author Andreas Gohr <andi@splitbrain.org> 20 */ 21class MetadataSearch 22{ 23 /** 24 * Quicksearch for pagenames 25 * 26 * By default it only matches the pagename and ignores the namespace. 27 * This can be changed with the second parameter. 28 * The third parameter allows to search in titles as well. 29 * 30 * The function always returns titles as well 31 * 32 * @triggers SEARCH_QUERY_PAGELOOKUP 33 * @param string $id page id 34 * @param bool $in_ns match against namespace as well? 35 * @param bool $in_title search in title? 36 * @param int|string|null $after only show results with mtime after this date, 37 * accepts timestap or strtotime arguments 38 * @param int|string|null $before only show results with mtime before this date, 39 * accepts timestap or strtotime arguments 40 * 41 * @return string[] 42 * @author Andreas Gohr <andi@splitbrain.org> 43 * @author Adrian Lang <lang@cosmocode.de> 44 * 45 */ 46 public function pageLookup( 47 string $id, 48 bool $in_ns = false, 49 bool $in_title = false, 50 int|string|null $after = null, 51 int|string|null $before = null): array 52 { 53 $data = [ 54 'id' => $id, 55 'in_ns' => $in_ns, 56 'in_title' => $in_title, 57 'after' => $after, 58 'before' => $before 59 ]; 60 $data['has_titles'] = true; // for plugin backward compatibility check 61 return Event::createAndTrigger('SEARCH_QUERY_PAGELOOKUP', $data, $this->pageLookupCallBack(...)); 62 } 63 64 /** 65 * Returns list of pages as array(pageid => First Heading) 66 * 67 * @param array $data event data 68 * @return string[] 69 * @throws IndexUsageException 70 */ 71 public function pageLookupCallBack(array &$data): array 72 { 73 $parsedQuery = (new QueryParser)->convert($data['id']); 74 $ns = $parsedQuery['ns'] ? cleanID($parsedQuery['ns'][0]) . ':' : null; 75 $notns = $parsedQuery['notns'] ? cleanID($parsedQuery['notns'][0]) . ':' : null; 76 $query = ($ns || $notns) ? implode(' ', $parsedQuery['highlight']) : $data['id']; 77 $cleaned = cleanID($query); 78 79 if ($cleaned === '') return []; 80 81 // find pages matching by page name 82 $pages = []; 83 foreach ($this->getPages() as $page) { 84 if ($ns && !str_starts_with($page, $ns)) continue; 85 if ($notns && str_starts_with($page, $notns)) continue; 86 87 $match = $data['in_ns'] ? $page : noNSorNS($page); 88 if (str_contains($match, $cleaned)) { 89 $pages[$page] = p_get_first_heading($page, METADATA_DONT_RENDER); 90 } 91 } 92 93 // additionally find pages matching by title 94 if ($data['in_title']) { 95 foreach ($this->lookupKey('title', $query, static function ($search, $title) { 96 if (Utf8\Clean::isASCII($search)) { 97 return stripos($title, $search) !== false; 98 } 99 return Utf8\PhpString::strpos( 100 Utf8\PhpString::strtolower($title), 101 Utf8\PhpString::strtolower($search) 102 ) !== false; 103 }) as $page) { 104 if ($ns && !str_starts_with($page, $ns)) continue; 105 if ($notns && str_starts_with($page, $notns)) continue; 106 107 if (!isset($pages[$page])) { 108 $pages[$page] = p_get_first_heading($page, METADATA_DONT_RENDER); 109 } 110 } 111 } 112 113 $pages = static::filterPages($pages, false, $data['after'], $data['before']); 114 uksort($pages, $this->pagesorter(...)); 115 return $pages; 116 } 117 118 /** 119 * Return a list of all indexed pages, optionally limited to those that have a specific metadata key 120 * 121 * When a key is given, only pages that have any value stored for that metadata key are returned. 122 * This does not filter by the metadata value itself. 123 * 124 * @param string|null $key metadata key name, or null for all pages 125 * @return string[] list of page names 126 */ 127 public function getPages(?string $key = null): array 128 { 129 if ($key === null) { 130 return (new Indexer())->getAllPages(); 131 } 132 133 if ($key === 'title') { 134 return (new PageTitleCollection())->getEntitiesWithData(); 135 } 136 137 return (new PageMetaCollection($key))->getEntitiesWithData(); 138 } 139 140 /** 141 * Find pages containing a metadata value 142 * 143 * The metadata values are compared as case-sensitive strings. Pass a 144 * callback function that returns true or false to use a different 145 * comparison function. The function will be called with the $value being 146 * searched for as the first argument, and the word in the index as the 147 * second argument. The function preg_match can be used directly if the 148 * values are regexes. 149 * 150 * When $value is a string, the result is a flat list of matching page names. 151 * When $value is an array, each value is searched independently and the result 152 * is an associative array keyed by the search values, each containing a list 153 * of matching page names. 154 * 155 * Without a callback, values support wildcard matching with * at the start 156 * and/or end (e.g. '*foo', 'bar*', '*baz*'). 157 * 158 * @param string $key name of the metadata key to look for 159 * @param string|string[] $value search term or array of search terms 160 * @param callable|null $func comparison function: fn($searchValue, $indexWord) => bool 161 * @return array flat list of page names (scalar $value) or [value => [pageName, ...]] (array $value) 162 * 163 * @throws IndexUsageException 164 * @author Michael Hamann <michael@content-space.de> 165 * @author Tom N Harris <tnharris@whoopdedo.org> 166 */ 167 public function lookupKey(string $key, string|array &$value, ?callable $func = null): array 168 { 169 $isScalar = !is_array($value); 170 $valueArray = $isScalar ? [$value] : $value; 171 172 if ($key === 'title') { 173 $collection = new PageTitleCollection(); 174 } else { 175 $collection = new PageMetaCollection($key); 176 } 177 178 $result = (new CollectionSearch($collection))->lookup($valueArray, $func); 179 180 return $isScalar ? $result[$value] : $result; 181 } 182 183 /** 184 * Returns the backlinks for a given page 185 * 186 * @param string $id The id for which links shall be returned 187 * @param bool $ignore_perms Ignore the fact that pages are hidden or read-protected 188 * @return string[] The pages that contain links to the given page 189 * 190 * @throws IndexUsageException 191 * @author Andreas Gohr <andi@splitbrain.org> 192 */ 193 public function backlinks(string $id, bool $ignore_perms = false): array 194 { 195 $result = $this->lookupKey('relation_references', $id); 196 if (!count($result)) return $result; 197 198 $result = array_flip($result); 199 $result = static::filterPages($result, $ignore_perms); 200 $result = array_keys($result); 201 202 Utf8\Sort::sort($result); 203 return $result; 204 } 205 206 /** 207 * Returns the pages that use a given media file 208 * 209 * @param string $id The media id to look for 210 * @param bool $ignore_perms Ignore hidden pages and acls (optional, default: false) 211 * @return string[] A list of pages that use the given media file 212 * 213 * @author Andreas Gohr <andi@splitbrain.org> 214 */ 215 public function mediause(string $id, bool $ignore_perms = false): array 216 { 217 $result = $this->lookupKey('relation_media', $id); 218 if (!count($result)) return $result; 219 220 $result = array_flip($result); 221 $result = static::filterPages($result, $ignore_perms); 222 $result = array_keys($result); 223 224 Utf8\Sort::sort($result); 225 return $result; 226 } 227 228 /** 229 * Filter a list of pages by visibility, existence, permissions, and time range 230 * 231 * @param array $pages pages to filter (keys are page IDs) 232 * @param bool $ignorePerms skip visibility and ACL checks 233 * @param int|string|null $after only keep pages modified after this date 234 * @param int|string|null $before only keep pages modified before this date 235 * @return array filtered pages 236 */ 237 public static function filterPages(array $pages, bool $ignorePerms = false, $after = null, $before = null): array 238 { 239 if ($after) $after = is_int($after) ? $after : strtotime($after); 240 if ($before) $before = is_int($before) ? $before : strtotime($before); 241 242 return array_filter($pages, static function ($value, $id) use ($ignorePerms, $after, $before) { 243 if (!$ignorePerms) { 244 if (isHiddenPage($id) || auth_quickaclcheck($id) < AUTH_READ) { 245 return false; 246 } 247 } 248 if (!page_exists($id, '', false)) { 249 return false; 250 } 251 if ($after || $before) { 252 $mTime = filemtime(wikiFN($id)); 253 if ($after && $after > $mTime) return false; 254 if ($before && $before < $mTime) return false; 255 } 256 return true; 257 }, ARRAY_FILTER_USE_BOTH); 258 } 259 260 /** 261 * Sort pages based on their namespace level first, then on their string 262 * values. This makes higher hierarchy pages rank higher than lower hierarchy 263 * pages. 264 * 265 * @param string $a 266 * @param string $b 267 * @return int Returns < 0 if $a is less than $b; > 0 if $a is greater than $b, 268 * and 0 if they are equal. 269 */ 270 protected function pagesorter(string $a, string $b): int 271 { 272 $diff = substr_count($a, ':') - substr_count($b, ':'); 273 return $diff ?: Utf8\Sort::strcmp($a, $b); 274 } 275} 276