1<?php 2 3namespace dokuwiki\Search; 4 5use dokuwiki\Extension\Event; 6use dokuwiki\Search\Collection\CollectionSearch; 7use dokuwiki\Search\Collection\PageMetaCollection; 8use dokuwiki\Search\Collection\PageTitleCollection; 9use dokuwiki\Search\Query\QueryParser; 10use dokuwiki\Utf8\Sort; 11 12/** 13 * Class DokuWiki Metadata Search 14 * 15 * Provides search operations on metadata indexes using the Collection/Index architecture. 16 * 17 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 18 * @author Andreas Gohr <andi@splitbrain.org> 19 */ 20class MetadataSearch 21{ 22 /** 23 * Quicksearch for pagenames 24 * 25 * By default it only matches the pagename and ignores the namespace. 26 * This can be changed with the second parameter. 27 * The third parameter allows to search in titles as well. 28 * 29 * The function always returns titles as well 30 * 31 * @triggers SEARCH_QUERY_PAGELOOKUP 32 * @param string $id page id 33 * @param bool $in_ns match against namespace as well? 34 * @param bool $in_title search in title? 35 * @param int|string|null $after only show results with mtime after this date, 36 * accepts timestap or strtotime arguments 37 * @param int|string|null $before only show results with mtime before this date, 38 * accepts timestap or strtotime arguments 39 * 40 * @return string[] 41 * @author Andreas Gohr <andi@splitbrain.org> 42 * @author Adrian Lang <lang@cosmocode.de> 43 * 44 */ 45 public function pageLookup( 46 string $id, 47 bool $in_ns = false, 48 bool $in_title = false, 49 int|string|null $after = null, 50 int|string|null $before = null 51 ): array { 52 $data = [ 53 'id' => $id, 54 'in_ns' => $in_ns, 55 'in_title' => $in_title, 56 'after' => $after, 57 'before' => $before 58 ]; 59 $data['has_titles'] = true; // for plugin backward compatibility check 60 return Event::createAndTrigger('SEARCH_QUERY_PAGELOOKUP', $data, $this->pageLookupCallBack(...)); 61 } 62 63 /** 64 * Returns list of pages as array(pageid => First Heading) 65 * 66 * @param array $data event data 67 * @return string[] 68 */ 69 public function pageLookupCallBack(array $data): array 70 { 71 $parsedQuery = (new QueryParser())->convert($data['id']); 72 $ns = $parsedQuery['ns'] ? cleanID($parsedQuery['ns'][0]) . ':' : null; 73 $notns = $parsedQuery['notns'] ? cleanID($parsedQuery['notns'][0]) . ':' : null; 74 $query = ($ns || $notns) ? implode(' ', $parsedQuery['highlight']) : $data['id']; 75 $cleaned = cleanID($query); 76 77 if ($cleaned === '') return []; 78 79 // find pages matching by page name 80 $pages = []; 81 foreach ($this->getPages() as $page) { 82 if ($ns && !str_starts_with($page, $ns)) continue; 83 if ($notns && str_starts_with($page, $notns)) continue; 84 85 $match = $data['in_ns'] ? $page : noNSorNS($page); 86 if (str_contains($match, $cleaned)) { 87 $pages[$page] = p_get_first_heading($page, METADATA_DONT_RENDER); 88 } 89 } 90 91 // additionally find pages matching by title 92 if ($data['in_title']) { 93 $search = new CollectionSearch(new PageTitleCollection()); 94 $search->caseInsensitive(); 95 $search->addTerm('*' . $query . '*'); 96 $terms = $search->execute(); 97 $term = reset($terms); 98 if ($term) { 99 foreach ($term->getEntityTokens() as $page => $titles) { 100 if ($ns && !str_starts_with($page, $ns)) continue; 101 if ($notns && str_starts_with($page, $notns)) continue; 102 103 if (!isset($pages[$page])) { 104 $pages[$page] = $titles[0]; 105 } 106 } 107 } 108 } 109 110 $pages = static::filterPages($pages, false, $data['after'], $data['before']); 111 uksort($pages, $this->pagesorter(...)); 112 return $pages; 113 } 114 115 /** 116 * Return a list of all indexed pages, optionally limited to those that have a specific metadata key 117 * 118 * When a key is given, only pages that have any value stored for that metadata key are returned. 119 * This does not filter by the metadata value itself. 120 * 121 * @param string|null $key metadata key name, or null for all pages 122 * @return string[] list of page names 123 */ 124 public function getPages(?string $key = null): array 125 { 126 if ($key === null) { 127 return (new Indexer())->getAllPages(); 128 } 129 130 if ($key === 'title') { 131 return (new PageTitleCollection())->getEntitiesWithData(); 132 } 133 134 return (new PageMetaCollection($key))->getEntitiesWithData(); 135 } 136 137 /** 138 * Find pages containing a metadata value 139 * 140 * Values are compared as case-sensitive strings. Wildcard matching with * at 141 * the start and/or end is supported (e.g. '*foo', 'bar*', '*baz*'). 142 * 143 * When $value is a string, the result is a flat list of matching page names. 144 * When $value is an array, each value is searched independently and the result 145 * is an associative array keyed by the search values, each containing a list 146 * of matching page names. 147 * 148 * @param string $key name of the metadata key to look for 149 * @param string|string[] $value search term or array of search terms 150 * @return array flat list of page names (scalar $value) or [value => [pageName, ...]] (array $value) 151 * 152 * @author Michael Hamann <michael@content-space.de> 153 * @author Tom N Harris <tnharris@whoopdedo.org> 154 */ 155 public function lookupKey(string $key, string|array &$value): array 156 { 157 $isScalar = !is_array($value); 158 $valueArray = $isScalar ? [$value] : $value; 159 160 $collection = ($key === 'title') ? new PageTitleCollection() : new PageMetaCollection($key); 161 162 $search = new CollectionSearch($collection); 163 foreach ($valueArray as $v) { 164 $search->addTerm($v); 165 } 166 $terms = $search->execute(); 167 168 $result = []; 169 foreach ($valueArray as $v) { 170 $term = $terms[$v] ?? null; 171 $result[$v] = $term ? array_keys($term->getEntityFrequencies()) : []; 172 } 173 174 return $isScalar ? $result[$value] : $result; 175 } 176 177 /** 178 * Returns the backlinks for a given page 179 * 180 * @param string $id The id for which links shall be returned 181 * @param bool $ignore_perms Ignore the fact that pages are hidden or read-protected 182 * @return string[] The pages that contain links to the given page 183 * 184 * @author Andreas Gohr <andi@splitbrain.org> 185 */ 186 public function backlinks(string $id, bool $ignore_perms = false): array 187 { 188 $result = $this->lookupKey('relation_references', $id); 189 if ($result === []) return $result; 190 191 $result = array_flip($result); 192 $result = static::filterPages($result, $ignore_perms); 193 $result = array_keys($result); 194 195 Sort::sort($result); 196 return $result; 197 } 198 199 /** 200 * Returns the pages that use a given media file 201 * 202 * @param string $id The media id to look for 203 * @param bool $ignore_perms Ignore hidden pages and acls (optional, default: false) 204 * @return string[] A list of pages that use the given media file 205 * 206 * @author Andreas Gohr <andi@splitbrain.org> 207 */ 208 public function mediause(string $id, bool $ignore_perms = false): array 209 { 210 $result = $this->lookupKey('relation_media', $id); 211 if ($result === []) return $result; 212 213 $result = array_flip($result); 214 $result = static::filterPages($result, $ignore_perms); 215 $result = array_keys($result); 216 217 Sort::sort($result); 218 return $result; 219 } 220 221 /** 222 * Filter a list of pages by visibility, existence, permissions, and time range 223 * 224 * @param array $pages pages to filter (keys are page IDs) 225 * @param bool $ignorePerms skip visibility and ACL checks 226 * @param int|string|null $after only keep pages modified after this date 227 * @param int|string|null $before only keep pages modified before this date 228 * @return array filtered pages 229 */ 230 public static function filterPages( 231 array $pages, 232 bool $ignorePerms = false, 233 int|string|null $after = null, 234 int|string|null $before = null 235 ): array { 236 if ($after) $after = is_int($after) ? $after : strtotime($after); 237 if ($before) $before = is_int($before) ? $before : strtotime($before); 238 239 return array_filter($pages, static function ($value, $id) use ($ignorePerms, $after, $before) { 240 if (!$ignorePerms) { 241 if (isHiddenPage($id) || auth_quickaclcheck($id) < AUTH_READ) { 242 return false; 243 } 244 } 245 if (!page_exists($id, '', false)) { 246 return false; 247 } 248 if ($after || $before) { 249 $mTime = filemtime(wikiFN($id)); 250 if ($after && $after > $mTime) return false; 251 if ($before && $before < $mTime) return false; 252 } 253 return true; 254 }, ARRAY_FILTER_USE_BOTH); 255 } 256 257 /** 258 * Sort pages based on their namespace level first, then on their string 259 * values. This makes higher hierarchy pages rank higher than lower hierarchy 260 * pages. 261 * 262 * @param string $a 263 * @param string $b 264 * @return int Returns < 0 if $a is less than $b; > 0 if $a is greater than $b, 265 * and 0 if they are equal. 266 */ 267 protected function pagesorter(string $a, string $b): int 268 { 269 $diff = substr_count($a, ':') - substr_count($b, ':'); 270 return $diff ?: Sort::strcmp($a, $b); 271 } 272} 273