1<?php 2 3namespace dokuwiki\Search; 4 5use dokuwiki\Extension\Event; 6use dokuwiki\Search\MetadataIndex; 7use dokuwiki\Search\QueryParser; 8 9/** 10 * Class DokuWiki Metadata Search 11 * 12 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 13 * @author Andreas Gohr <andi@splitbrain.org> 14 */ 15class MetadataSearch 16{ 17 /** 18 * Metadata Search constructor. prevent direct object creation 19 */ 20 protected function __construct() {} 21 22 /** 23 * Returns the backlinks for a given page 24 * 25 * Uses the metadata index. 26 * 27 * @param string $id The id for which links shall be returned 28 * @param bool $ignore_perms Ignore the fact that pages are hidden or read-protected 29 * @return array The pages that contain links to the given page 30 */ 31 public static function backlinks($id, $ignore_perms = false) 32 { 33 $MetadataIndex = MetadataIndex::getInstance(); 34 $result = $MetadataIndex->lookupKey('relation_references', $id); 35 36 if (!count($result)) return $result; 37 38 // check ACL permissions 39 foreach (array_keys($result) as $idx) { 40 if (($ignore_perms !== true 41 && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ) 42 ) || !page_exists($result[$idx], '', false) 43 ) { 44 unset($result[$idx]); 45 } 46 } 47 48 sort($result); 49 return $result; 50 } 51 52 /** 53 * Returns the pages that use a given media file 54 * 55 * Uses the relation media metadata property and the metadata index. 56 * 57 * Note that before 2013-07-31 the second parameter was the maximum number 58 * of results and permissions were ignored. That's why the parameter is now 59 * checked to be explicitely set to true (with type bool) in order to be 60 * compatible with older uses of the function. 61 * 62 * @param string $id The media id to look for 63 * @param bool $ignore_perms Ignore hidden pages and acls (optional, default: false) 64 * @return array A list of pages that use the given media file 65 */ 66 public static function mediause($id, $ignore_perms = false) 67 { 68 $MetadataIndex = MetadataIndex::getInstance(); 69 $result = $MetadataIndex->lookupKey('relation_media', $id); 70 71 if (!count($result)) return $result; 72 73 // check ACL permissions 74 foreach (array_keys($result) as $idx) { 75 if (($ignore_perms !== true 76 && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ) 77 ) || !page_exists($result[$idx], '', false) 78 ) { 79 unset($result[$idx]); 80 } 81 } 82 83 sort($result); 84 return $result; 85 } 86 87 88 /** 89 * Quicksearch for pagenames 90 * 91 * By default it only matches the pagename and ignores the namespace. 92 * This can be changed with the second parameter. 93 * The third parameter allows to search in titles as well. 94 * 95 * The function always returns titles as well 96 * 97 * @triggers SEARCH_QUERY_PAGELOOKUP 98 * @author Andreas Gohr <andi@splitbrain.org> 99 * @author Adrian Lang <lang@cosmocode.de> 100 * 101 * @param string $id page id 102 * @param bool $in_ns match against namespace as well? 103 * @param bool $in_title search in title? 104 * @param int|string $after only show results with mtime after this date, 105 * accepts timestap or strtotime arguments 106 * @param int|string $before only show results with mtime before this date, 107 * accepts timestap or strtotime arguments 108 * 109 * @return string[] 110 */ 111 public static function pageLookup($id, $in_ns = false, $in_title = false, $after = null, $before = null) 112 { 113 $data = [ 114 'id' => $id, 115 'in_ns' => $in_ns, 116 'in_title' => $in_title, 117 'after' => $after, 118 'before' => $before 119 ]; 120 $data['has_titles'] = true; // for plugin backward compatibility check 121 $action = static::class.'::pageLookupCallBack'; 122 return Event::createAndTrigger('SEARCH_QUERY_PAGELOOKUP', $data, $action); 123 } 124 125 /** 126 * Returns list of pages as array(pageid => First Heading) 127 * 128 * @param array $data event data 129 * @return string[] 130 */ 131 public static function pageLookupCallBack(&$data) 132 { 133 // split out original parameters 134 $id = $data['id']; 135 $parsedQuery = QueryParser::convert($id); 136 137 if (count($parsedQuery['ns']) > 0) { 138 $ns = cleanID($parsedQuery['ns'][0]) . ':'; 139 $id = implode(' ', $parsedQuery['highlight']); 140 } 141 142 $in_ns = $data['in_ns']; 143 $in_title = $data['in_title']; 144 $cleaned = cleanID($id); 145 146 $pages = array(); 147 if ($id !== '' && $cleaned !== '') { 148 $MetadataIndex = MetadataIndex::getInstance(); 149 $page_idx = $MetadataIndex->getPages(); 150 foreach ($page_idx as $p_id) { 151 if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) !== false)) { 152 if (!isset($pages[$p_id])) { 153 $pages[$p_id] = p_get_first_heading($p_id, METADATA_DONT_RENDER); 154 } 155 } 156 } 157 if ($in_title) { 158 $func = static::class.'::pageLookupTitleCompare'; 159 foreach ($MetadataIndex->lookupKey('title', $id, $func) as $p_id) { 160 if (!isset($pages[$p_id])) { 161 $pages[$p_id] = p_get_first_heading($p_id, METADATA_DONT_RENDER); 162 } 163 } 164 } 165 } 166 167 if (isset($ns)) { 168 foreach (array_keys($pages) as $p_id) { 169 if (strpos($p_id, $ns) !== 0) { 170 unset($pages[$p_id]); 171 } 172 } 173 } 174 175 // discard hidden pages 176 // discard nonexistent pages 177 // check ACL permissions 178 foreach (array_keys($pages) as $idx) { 179 if (!isVisiblePage($idx) || !page_exists($idx) || auth_quickaclcheck($idx) < AUTH_READ) { 180 unset($pages[$idx]); 181 } 182 } 183 184 $pages = static::filterResultsByTime($pages, $data['after'], $data['before']); 185 186 uksort($pages, static::class.'::pagesorter'); 187 return $pages; 188 } 189 190 /** 191 * Tiny helper function for comparing the searched title with the title 192 * from the search index. This function is a wrapper around stripos with 193 * adapted argument order and return value. 194 * 195 * @param string $search searched title 196 * @param string $title title from index 197 * @return bool 198 */ 199 protected static function pageLookupTitleCompare($search, $title) 200 { 201 return stripos($title, $search) !== false; 202 } 203 204 /** 205 * Sort pages based on their namespace level first, then on their string 206 * values. This makes higher hierarchy pages rank higher than lower hierarchy 207 * pages. 208 * 209 * @param string $a 210 * @param string $b 211 * @return int Returns < 0 if $a is less than $b; > 0 if $a is greater than $b, 212 * and 0 if they are equal. 213 */ 214 protected static function pagesorter($a, $b) 215 { 216 $ac = count(explode(':',$a)); 217 $bc = count(explode(':',$b)); 218 if ($ac < $bc) { 219 return -1; 220 } elseif ($ac > $bc) { 221 return 1; 222 } 223 return strcmp ($a,$b); 224 } 225 226 /** 227 * @param array $results search results in the form pageid => value 228 * @param int|string $after only returns results with mtime after this date, 229 * accepts timestap or strtotime arguments 230 * @param int|string $before only returns results with mtime after this date, 231 * accepts timestap or strtotime arguments 232 * 233 * @return array 234 */ 235 protected static function filterResultsByTime(array $results, $after, $before) 236 { 237 if ($after || $before) { 238 $after = is_int($after) ? $after : strtotime($after); 239 $before = is_int($before) ? $before : strtotime($before); 240 241 foreach ($results as $id => $value) { 242 $mTime = filemtime(wikiFN($id)); 243 if ($after && $after > $mTime) { 244 unset($results[$id]); 245 continue; 246 } 247 if ($before && $before < $mTime) { 248 unset($results[$id]); 249 } 250 } 251 } 252 return $results; 253 } 254} 255