1<?php 2/** 3 * Common DokuWiki functions 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9 if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/'); 10 require_once(DOKU_CONF.'dokuwiki.php'); 11 require_once(DOKU_INC.'inc/io.php'); 12 require_once(DOKU_INC.'inc/utf8.php'); 13 require_once(DOKU_INC.'inc/parserutils.php'); 14 15/** 16 * based upon class.search_indexer_phpcms.php::index_entry 17 */ 18function idx_getPageWords($id){ 19 $body = rawWiki($id); 20 $body = utf8_stripspecials($body,' ','._\-:'); 21 $body = utf8_strtolower($body); 22 $body = trim($body); 23 $words = explode(' ',$body); 24 sort($words); 25 26 $index = array(); //resulting index 27 $old = ''; 28 $doit = true; 29 $pos = 0; 30 31 //compact wordlist FIXME check for stopwords 32 33 foreach($words as $word){ 34 if(strlen($word) == 0) continue; 35 36 // it's the same word 37 if($word == $old){ 38 if($doit == false) { 39 // we didn't wanted it last time 40 continue; 41 } 42 // just increase the counter 43 $index[$word]++; 44 continue; 45 } 46 47 // rememember old word 48 $old = $word; 49 $doit = true; 50 51 // checking minimum word-size (excepting numbers) 52 if(!is_numeric($word)) { 53 if(strlen($word) < 3) { #FIXME add config option for max wordsize 54 $doit = false; 55 continue; 56 } 57 } 58 59 //FIXME add stopword check 60 61 // add to index 62 $index[$word] = 1; 63 } 64 65 return $index; 66} 67 68 69 70//Setup VIM: ex: et ts=4 enc=utf-8 : 71