1<?php 2namespace dokuwiki\Search; 3 4use dokuwiki\Extension\Event; 5use dokuwiki\Search\MetadataIndex; 6use dokuwiki\Search\PageIndex; 7use dokuwiki\Search\QueryParser; 8 9 10/** 11 * Class DokuWiki Metadata Search 12 * 13 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 14 * @author Andreas Gohr <andi@splitbrain.org> 15 */ 16class MetadataSearch 17{ 18 /** 19 * Metadata Search constructor. prevent direct object creation 20 */ 21 protected function __construct() {} 22 23 /** 24 * Returns the backlinks for a given page 25 * 26 * Uses the metadata index. 27 * 28 * @param string $id The id for which links shall be returned 29 * @param bool $ignore_perms Ignore the fact that pages are hidden or read-protected 30 * @return array The pages that contain links to the given page 31 */ 32 public static function backlinks($id, $ignore_perms = false) 33 { 34 $Indexer = MetadataIndex::getInstance(); 35 $result = $Indexer->lookupKey('relation_references', $id); 36 37 if (!count($result)) return $result; 38 39 // check ACL permissions 40 foreach (array_keys($result) as $idx) { 41 if (($ignore_perms !== true 42 && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ) 43 ) || !page_exists($result[$idx], '', false) 44 ) { 45 unset($result[$idx]); 46 } 47 } 48 49 sort($result); 50 return $result; 51 } 52 53 /** 54 * Returns the pages that use a given media file 55 * 56 * Uses the relation media metadata property and the metadata index. 57 * 58 * Note that before 2013-07-31 the second parameter was the maximum number 59 * of results and permissions were ignored. That's why the parameter is now 60 * checked to be explicitely set to true (with type bool) in order to be 61 * compatible with older uses of the function. 62 * 63 * @param string $id The media id to look for 64 * @param bool $ignore_perms Ignore hidden pages and acls (optional, default: false) 65 * @return array A list of pages that use the given media file 66 */ 67 public static function mediause($id, $ignore_perms = false) 68 { 69 $Indexer = MetadataIndex::getInstance(); 70 $result = $Indexer->lookupKey('relation_media', $id); 71 72 if (!count($result)) return $result; 73 74 // check ACL permissions 75 foreach (array_keys($result) as $idx) { 76 if (($ignore_perms !== true 77 && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ) 78 ) || !page_exists($result[$idx], '', false) 79 ) { 80 unset($result[$idx]); 81 } 82 } 83 84 sort($result); 85 return $result; 86 } 87 88 89 /** 90 * Quicksearch for pagenames 91 * 92 * By default it only matches the pagename and ignores the namespace. 93 * This can be changed with the second parameter. 94 * The third parameter allows to search in titles as well. 95 * 96 * The function always returns titles as well 97 * 98 * @triggers SEARCH_QUERY_PAGELOOKUP 99 * @author Andreas Gohr <andi@splitbrain.org> 100 * @author Adrian Lang <lang@cosmocode.de> 101 * 102 * @param string $id page id 103 * @param bool $in_ns match against namespace as well? 104 * @param bool $in_title search in title? 105 * @param int|string $after only show results with mtime after this date, 106 * accepts timestap or strtotime arguments 107 * @param int|string $before only show results with mtime before this date, 108 * accepts timestap or strtotime arguments 109 * 110 * @return string[] 111 */ 112 public static function pageLookup($id, $in_ns = false, $in_title = false, $after = null, $before = null) 113 { 114 $data = [ 115 'id' => $id, 116 'in_ns' => $in_ns, 117 'in_title' => $in_title, 118 'after' => $after, 119 'before' => $before 120 ]; 121 $data['has_titles'] = true; // for plugin backward compatibility check 122 $action = static::class.'::callback_pageLookup'; 123 return Event::createAndTrigger('SEARCH_QUERY_PAGELOOKUP', $data, $action); 124 } 125 126 /** 127 * Returns list of pages as array(pageid => First Heading) 128 * 129 * @param array $data event data 130 * @return string[] 131 */ 132 public static function callback_pageLookup($data) 133 { 134 $Indexer = PageIndex::getInstance(); 135 136 // split out original parameters 137 $id = $data['id']; 138 $parsedQuery = QueryParser::convert($id); 139 140 if (count($parsedQuery['ns']) > 0) { 141 $ns = cleanID($parsedQuery['ns'][0]) . ':'; 142 $id = implode(' ', $parsedQuery['highlight']); 143 } 144 145 $in_ns = $data['in_ns']; 146 $in_title = $data['in_title']; 147 $cleaned = cleanID($id); 148 149 $pages = array(); 150 if ($id !== '' && $cleaned !== '') { 151 $page_idx = $Indexer->getPages(); 152 foreach ($page_idx as $p_id) { 153 if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) !== false)) { 154 if (!isset($pages[$p_id])) { 155 $pages[$p_id] = p_get_first_heading($p_id, METADATA_DONT_RENDER); 156 } 157 } 158 } 159 if ($in_title) { 160 $func = static::class.'::pageLookupTitleCompare'; 161 foreach ($Indexer->MetadataIndex->lookupKey('title', $id, $func) as $p_id) { 162 if (!isset($pages[$p_id])) { 163 $pages[$p_id] = p_get_first_heading($p_id, METADATA_DONT_RENDER); 164 } 165 } 166 } 167 } 168 169 if (isset($ns)) { 170 foreach (array_keys($pages) as $p_id) { 171 if (strpos($p_id, $ns) !== 0) { 172 unset($pages[$p_id]); 173 } 174 } 175 } 176 177 // discard hidden pages 178 // discard nonexistent pages 179 // check ACL permissions 180 foreach (array_keys($pages) as $idx) { 181 if (!isVisiblePage($idx) || !page_exists($idx) || auth_quickaclcheck($idx) < AUTH_READ) { 182 unset($pages[$idx]); 183 } 184 } 185 186 $pages = static::filterResultsByTime($pages, $data['after'], $data['before']); 187 188 uksort($pages, static::class.'::pagesorter'); 189 return $pages; 190 } 191 192 /** 193 * Tiny helper function for comparing the searched title with the title 194 * from the search index. This function is a wrapper around stripos with 195 * adapted argument order and return value. 196 * 197 * @param string $search searched title 198 * @param string $title title from index 199 * @return bool 200 */ 201 protected static function pageLookupTitleCompare($search, $title) 202 { 203 return stripos($title, $search) !== false; 204 } 205 206 /** 207 * Sort pages based on their namespace level first, then on their string 208 * values. This makes higher hierarchy pages rank higher than lower hierarchy 209 * pages. 210 * 211 * @param string $a 212 * @param string $b 213 * @return int Returns < 0 if $a is less than $b; > 0 if $a is greater than $b, 214 * and 0 if they are equal. 215 */ 216 protected static function pagesorter($a, $b) 217 { 218 $ac = count(explode(':',$a)); 219 $bc = count(explode(':',$b)); 220 if ($ac < $bc) { 221 return -1; 222 } elseif ($ac > $bc) { 223 return 1; 224 } 225 return strcmp ($a,$b); 226 } 227 228 /** 229 * @param array $results search results in the form pageid => value 230 * @param int|string $after only returns results with mtime after this date, 231 * accepts timestap or strtotime arguments 232 * @param int|string $before only returns results with mtime after this date, 233 * accepts timestap or strtotime arguments 234 * 235 * @return array 236 */ 237 protected static function filterResultsByTime(array $results, $after, $before) 238 { 239 if ($after || $before) { 240 $after = is_int($after) ? $after : strtotime($after); 241 $before = is_int($before) ? $before : strtotime($before); 242 243 foreach ($results as $id => $value) { 244 $mTime = filemtime(wikiFN($id)); 245 if ($after && $after > $mTime) { 246 unset($results[$id]); 247 continue; 248 } 249 if ($before && $before < $mTime) { 250 unset($results[$id]); 251 } 252 } 253 } 254 return $results; 255 } 256} 257