1*b4ce25e9SAndreas Gohr<?php 2*b4ce25e9SAndreas Gohr/** 3*b4ce25e9SAndreas Gohr * Common DokuWiki functions 4*b4ce25e9SAndreas Gohr * 5*b4ce25e9SAndreas Gohr * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6*b4ce25e9SAndreas Gohr * @author Andreas Gohr <andi@splitbrain.org> 7*b4ce25e9SAndreas Gohr */ 8*b4ce25e9SAndreas Gohr 9*b4ce25e9SAndreas Gohr if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/'); 10*b4ce25e9SAndreas Gohr require_once(DOKU_CONF.'dokuwiki.php'); 11*b4ce25e9SAndreas Gohr require_once(DOKU_INC.'inc/io.php'); 12*b4ce25e9SAndreas Gohr require_once(DOKU_INC.'inc/utf8.php'); 13*b4ce25e9SAndreas Gohr require_once(DOKU_INC.'inc/parserutils.php'); 14*b4ce25e9SAndreas Gohr 15*b4ce25e9SAndreas Gohr/** 16*b4ce25e9SAndreas Gohr * based upon class.search_indexer_phpcms.php::index_entry 17*b4ce25e9SAndreas Gohr */ 18*b4ce25e9SAndreas Gohrfunction idx_getPageWords($id){ 19*b4ce25e9SAndreas Gohr $body = rawWiki($id); 20*b4ce25e9SAndreas Gohr $body = utf8_stripspecials($body,' ','._\-:'); 21*b4ce25e9SAndreas Gohr $body = utf8_strtolower($body); 22*b4ce25e9SAndreas Gohr $body = trim($body); 23*b4ce25e9SAndreas Gohr $words = explode(' ',$body); 24*b4ce25e9SAndreas Gohr sort($words); 25*b4ce25e9SAndreas Gohr 26*b4ce25e9SAndreas Gohr $index = array(); //resulting index 27*b4ce25e9SAndreas Gohr $old = ''; 28*b4ce25e9SAndreas Gohr $doit = true; 29*b4ce25e9SAndreas Gohr $pos = 0; 30*b4ce25e9SAndreas Gohr 31*b4ce25e9SAndreas Gohr //compact wordlist FIXME check for stopwords 32*b4ce25e9SAndreas Gohr 33*b4ce25e9SAndreas Gohr foreach($words as $word){ 34*b4ce25e9SAndreas Gohr if(strlen($word) == 0) continue; 35*b4ce25e9SAndreas Gohr 36*b4ce25e9SAndreas Gohr // it's the same word 37*b4ce25e9SAndreas Gohr if($word == $old){ 38*b4ce25e9SAndreas Gohr if($doit == false) { 39*b4ce25e9SAndreas Gohr // we didn't wanted it last time 40*b4ce25e9SAndreas Gohr continue; 41*b4ce25e9SAndreas Gohr } 42*b4ce25e9SAndreas Gohr // just increase the counter 43*b4ce25e9SAndreas Gohr $index[$word]++; 44*b4ce25e9SAndreas Gohr continue; 45*b4ce25e9SAndreas Gohr } 46*b4ce25e9SAndreas Gohr 47*b4ce25e9SAndreas Gohr // rememember old word 48*b4ce25e9SAndreas Gohr $old = $word; 49*b4ce25e9SAndreas Gohr $doit = true; 50*b4ce25e9SAndreas Gohr 51*b4ce25e9SAndreas Gohr // checking minimum word-size (excepting numbers) 52*b4ce25e9SAndreas Gohr if(!is_numeric($word)) { 53*b4ce25e9SAndreas Gohr if(strlen($word) < 3) { #FIXME add config option for max wordsize 54*b4ce25e9SAndreas Gohr $doit = false; 55*b4ce25e9SAndreas Gohr continue; 56*b4ce25e9SAndreas Gohr } 57*b4ce25e9SAndreas Gohr } 58*b4ce25e9SAndreas Gohr 59*b4ce25e9SAndreas Gohr //FIXME add stopword check 60*b4ce25e9SAndreas Gohr 61*b4ce25e9SAndreas Gohr // add to index 62*b4ce25e9SAndreas Gohr $index[$word] = 1; 63*b4ce25e9SAndreas Gohr } 64*b4ce25e9SAndreas Gohr 65*b4ce25e9SAndreas Gohr return $index; 66*b4ce25e9SAndreas Gohr} 67*b4ce25e9SAndreas Gohr 68*b4ce25e9SAndreas Gohr 69*b4ce25e9SAndreas Gohr 70*b4ce25e9SAndreas Gohr//Setup VIM: ex: et ts=4 enc=utf-8 : 71