1<?php 2 3namespace dokuwiki\Search; 4 5use dokuwiki\Extension\Event; 6use dokuwiki\Search\Collection\CollectionSearch; 7use dokuwiki\Search\Collection\PageMetaCollection; 8use dokuwiki\Search\Collection\PageTitleCollection; 9use dokuwiki\Search\Query\QueryParser; 10use dokuwiki\Utf8; 11 12/** 13 * Class DokuWiki Metadata Search 14 * 15 * Provides search operations on metadata indexes using the Collection/Index architecture. 16 * 17 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 18 * @author Andreas Gohr <andi@splitbrain.org> 19 */ 20class MetadataSearch 21{ 22 /** 23 * Quicksearch for pagenames 24 * 25 * By default it only matches the pagename and ignores the namespace. 26 * This can be changed with the second parameter. 27 * The third parameter allows to search in titles as well. 28 * 29 * The function always returns titles as well 30 * 31 * @triggers SEARCH_QUERY_PAGELOOKUP 32 * @param string $id page id 33 * @param bool $in_ns match against namespace as well? 34 * @param bool $in_title search in title? 35 * @param int|string|null $after only show results with mtime after this date, 36 * accepts timestap or strtotime arguments 37 * @param int|string|null $before only show results with mtime before this date, 38 * accepts timestap or strtotime arguments 39 * 40 * @return string[] 41 * @author Andreas Gohr <andi@splitbrain.org> 42 * @author Adrian Lang <lang@cosmocode.de> 43 * 44 */ 45 public function pageLookup( 46 string $id, 47 bool $in_ns = false, 48 bool $in_title = false, 49 int|string|null $after = null, 50 int|string|null $before = null): array 51 { 52 $data = [ 53 'id' => $id, 54 'in_ns' => $in_ns, 55 'in_title' => $in_title, 56 'after' => $after, 57 'before' => $before 58 ]; 59 $data['has_titles'] = true; // for plugin backward compatibility check 60 return Event::createAndTrigger('SEARCH_QUERY_PAGELOOKUP', $data, $this->pageLookupCallBack(...)); 61 } 62 63 /** 64 * Returns list of pages as array(pageid => First Heading) 65 * 66 * @param array $data event data 67 * @return string[] 68 * @throws IndexUsageException 69 */ 70 public function pageLookupCallBack(array &$data): array 71 { 72 $parsedQuery = (new QueryParser)->convert($data['id']); 73 $ns = $parsedQuery['ns'] ? cleanID($parsedQuery['ns'][0]) . ':' : null; 74 $notns = $parsedQuery['notns'] ? cleanID($parsedQuery['notns'][0]) . ':' : null; 75 $query = ($ns || $notns) ? implode(' ', $parsedQuery['highlight']) : $data['id']; 76 $cleaned = cleanID($query); 77 78 if ($cleaned === '') return []; 79 80 // find pages matching by page name 81 $pages = []; 82 foreach ($this->getPages() as $page) { 83 if ($ns && !str_starts_with($page, $ns)) continue; 84 if ($notns && str_starts_with($page, $notns)) continue; 85 86 $match = $data['in_ns'] ? $page : noNSorNS($page); 87 if (str_contains($match, $cleaned)) { 88 $pages[$page] = p_get_first_heading($page, METADATA_DONT_RENDER); 89 } 90 } 91 92 // additionally find pages matching by title 93 if ($data['in_title']) { 94 $search = new CollectionSearch(new PageTitleCollection()); 95 $search->caseInsensitive(); 96 $search->addTerm('*' . $query . '*'); 97 $terms = $search->execute(); 98 $term = reset($terms); 99 if ($term) { 100 foreach ($term->getEntityTokens() as $page => $titles) { 101 if ($ns && !str_starts_with($page, $ns)) continue; 102 if ($notns && str_starts_with($page, $notns)) continue; 103 104 if (!isset($pages[$page])) { 105 $pages[$page] = $titles[0]; 106 } 107 } 108 } 109 } 110 111 $pages = static::filterPages($pages, false, $data['after'], $data['before']); 112 uksort($pages, $this->pagesorter(...)); 113 return $pages; 114 } 115 116 /** 117 * Return a list of all indexed pages, optionally limited to those that have a specific metadata key 118 * 119 * When a key is given, only pages that have any value stored for that metadata key are returned. 120 * This does not filter by the metadata value itself. 121 * 122 * @param string|null $key metadata key name, or null for all pages 123 * @return string[] list of page names 124 */ 125 public function getPages(?string $key = null): array 126 { 127 if ($key === null) { 128 return (new Indexer())->getAllPages(); 129 } 130 131 if ($key === 'title') { 132 return (new PageTitleCollection())->getEntitiesWithData(); 133 } 134 135 return (new PageMetaCollection($key))->getEntitiesWithData(); 136 } 137 138 /** 139 * Find pages containing a metadata value 140 * 141 * Values are compared as case-sensitive strings. Wildcard matching with * at 142 * the start and/or end is supported (e.g. '*foo', 'bar*', '*baz*'). 143 * 144 * When $value is a string, the result is a flat list of matching page names. 145 * When $value is an array, each value is searched independently and the result 146 * is an associative array keyed by the search values, each containing a list 147 * of matching page names. 148 * 149 * @param string $key name of the metadata key to look for 150 * @param string|string[] $value search term or array of search terms 151 * @return array flat list of page names (scalar $value) or [value => [pageName, ...]] (array $value) 152 * 153 * @author Michael Hamann <michael@content-space.de> 154 * @author Tom N Harris <tnharris@whoopdedo.org> 155 */ 156 public function lookupKey(string $key, string|array &$value): array 157 { 158 $isScalar = !is_array($value); 159 $valueArray = $isScalar ? [$value] : $value; 160 161 $collection = ($key === 'title') ? new PageTitleCollection() : new PageMetaCollection($key); 162 163 $search = new CollectionSearch($collection); 164 foreach ($valueArray as $v) { 165 $search->addTerm($v); 166 } 167 $terms = $search->execute(); 168 169 $result = []; 170 foreach ($valueArray as $v) { 171 $term = $terms[$v] ?? null; 172 $result[$v] = $term ? array_keys($term->getEntityFrequencies()) : []; 173 } 174 175 return $isScalar ? $result[$value] : $result; 176 } 177 178 /** 179 * Returns the backlinks for a given page 180 * 181 * @param string $id The id for which links shall be returned 182 * @param bool $ignore_perms Ignore the fact that pages are hidden or read-protected 183 * @return string[] The pages that contain links to the given page 184 * 185 * @throws IndexUsageException 186 * @author Andreas Gohr <andi@splitbrain.org> 187 */ 188 public function backlinks(string $id, bool $ignore_perms = false): array 189 { 190 $result = $this->lookupKey('relation_references', $id); 191 if (!count($result)) return $result; 192 193 $result = array_flip($result); 194 $result = static::filterPages($result, $ignore_perms); 195 $result = array_keys($result); 196 197 Utf8\Sort::sort($result); 198 return $result; 199 } 200 201 /** 202 * Returns the pages that use a given media file 203 * 204 * @param string $id The media id to look for 205 * @param bool $ignore_perms Ignore hidden pages and acls (optional, default: false) 206 * @return string[] A list of pages that use the given media file 207 * 208 * @author Andreas Gohr <andi@splitbrain.org> 209 */ 210 public function mediause(string $id, bool $ignore_perms = false): array 211 { 212 $result = $this->lookupKey('relation_media', $id); 213 if (!count($result)) return $result; 214 215 $result = array_flip($result); 216 $result = static::filterPages($result, $ignore_perms); 217 $result = array_keys($result); 218 219 Utf8\Sort::sort($result); 220 return $result; 221 } 222 223 /** 224 * Filter a list of pages by visibility, existence, permissions, and time range 225 * 226 * @param array $pages pages to filter (keys are page IDs) 227 * @param bool $ignorePerms skip visibility and ACL checks 228 * @param int|string|null $after only keep pages modified after this date 229 * @param int|string|null $before only keep pages modified before this date 230 * @return array filtered pages 231 */ 232 public static function filterPages(array $pages, bool $ignorePerms = false, $after = null, $before = null): array 233 { 234 if ($after) $after = is_int($after) ? $after : strtotime($after); 235 if ($before) $before = is_int($before) ? $before : strtotime($before); 236 237 return array_filter($pages, static function ($value, $id) use ($ignorePerms, $after, $before) { 238 if (!$ignorePerms) { 239 if (isHiddenPage($id) || auth_quickaclcheck($id) < AUTH_READ) { 240 return false; 241 } 242 } 243 if (!page_exists($id, '', false)) { 244 return false; 245 } 246 if ($after || $before) { 247 $mTime = filemtime(wikiFN($id)); 248 if ($after && $after > $mTime) return false; 249 if ($before && $before < $mTime) return false; 250 } 251 return true; 252 }, ARRAY_FILTER_USE_BOTH); 253 } 254 255 /** 256 * Sort pages based on their namespace level first, then on their string 257 * values. This makes higher hierarchy pages rank higher than lower hierarchy 258 * pages. 259 * 260 * @param string $a 261 * @param string $b 262 * @return int Returns < 0 if $a is less than $b; > 0 if $a is greater than $b, 263 * and 0 if they are equal. 264 */ 265 protected function pagesorter(string $a, string $b): int 266 { 267 $diff = substr_count($a, ':') - substr_count($b, ':'); 268 return $diff ?: Utf8\Sort::strcmp($a, $b); 269 } 270} 271