1<?php 2/** 3 * This file is meant to be run from the command line and indexes all pages 4 * 5 * @package solr 6 * @author Gabriel Birke <birke@d-scribe.de> 7 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 8 */ 9 10// Import DokuWiki constants from environment. This for example allows multiple 11// DokuWiki installations with symlinks. 12$constants = array( 'DOKU_INC', 'DOKU_PLUGIN', 'DOKU_CONF', 'DOKU_E_LEVEL', 13 'DOKU_REL', 'DOKU_URL', 'DOKU_BASE', 'DOKU_BASE', 'DOKU_LF', 'DOKU_TAB', 14 'DOKU_COOKIE', 'DOKU_SCRIPT', 'DOKU_TPL', 'DOKU_TPLINC' 15); 16foreach($constants as $const) { 17 if(!defined($const)) { 18 $env_var = getenv($const); 19 if($env_var !== false) { 20 define($const, $env_var); 21 } 22 } 23} 24$ini_path = defined('DOKU_INC') ? DOKU_INC : realpath(dirname(__FILE__).'/../../../').'/'; 25 26require_once($ini_path.'inc/init.php'); 27require_once(DOKU_INC.'inc/common.php'); 28require_once(DOKU_INC.'inc/search.php'); 29require_once(DOKU_INC.'inc/pageutils.php'); 30require_once DOKU_INC.'inc/cliopts.php'; 31require_once(dirname(__FILE__).'/AddDocument.php'); 32require_once(dirname(__FILE__).'/Pageinfo.php'); 33 34// TODO: Add option for deleting index before adding 35// handle options 36$short_opts = 'hqpd'; 37$long_opts = array('help', 'quiet', 'progress', 'delete'); 38$OPTS = Doku_Cli_Opts::getOptions(__FILE__,$short_opts,$long_opts); 39if ( $OPTS->isError() ) { 40 fwrite( STDERR, $OPTS->getMessage() . "\n"); 41 _usage(); 42 exit(1); 43} 44 45$solr = plugin_load("helper", "solr"); 46 47$QUIET = false; 48$PROGRESS = false; 49foreach ($OPTS->options as $key => $val) { 50 switch ($key) { 51 case 'd': 52 case 'delete': 53 $solr->solr_query('update', "stream.body=".urlencode('<delete><query>*:*</query></delete>')."&commit=true"); 54 break; 55 case 'h': 56 case 'help': 57 _usage(); 58 exit; 59 case 'q': 60 case 'quiet': 61 $QUIET = true; 62 break; 63 case 'p': 64 case 'progress': 65 $PROGESS = true; 66 break; 67 } 68} 69 70 71/** 72 * Commit with n milliseconds 73 */ 74define('COMMIT_WITHIN', 10000); 75 76$data = array( 77 'global_count' => 0, 78 'errors' => array() 79); 80$opts = array(); 81$start = microtime(true); 82search($data, $conf['datadir'], 'search_solr_index', $opts, ''); 83 84if(!$QUIET) { 85 printf("\nImported %d pages in %0.3f seconds\n", $data['global_count'], microtime(true)-$start); 86 if(!empty($data['errors'])) { 87 echo "\nThe following pages encountered an error while importing:\n"; 88 foreach($data['errors'] as $err) { 89 echo "\n{$err['id']}"; 90 } 91 } 92 echo "\n"; 93} 94 95function search_solr_index(&$data,$base,$file,$type,$lvl,$opts) { 96 global $QUIET, $PROGRESS, $solr; 97 if($type=='f') 98 { 99 // Import each file individually to detect errors and minimize unimported docs 100 $id = pathID($file); 101 $info = new Solr_Pageinfo($id); 102 $writer = new XmlWriter(); 103 $writer->openMemory(); 104 $doc = new Solr_AddDocument($writer); 105 $doc->start(COMMIT_WITHIN); 106 $doc->addPage($info->getFields()); 107 $doc->end(); 108 $xmldoc = $writer->outputMemory(); 109 $result = $solr->solr_query('update', '', 'POST', $xmldoc); 110 $xml = simplexml_load_string($result); 111 // Check response 112 if($xml->getName() != "response") { 113 $data['errors'][] = array('id' => $id, 'result' => $result); 114 if(!$QUIET) { 115 /* 116 echo $result; 117 echo $xmldoc; 118 */ 119 } 120 } 121 $data['global_count']++; 122 // Show progress dots every 100 pages 123 if(!($data['global_count'] % 100)) { 124 echo "."; 125 } 126 } 127 return true; 128} 129 130function _usage() { 131 print "Usage: index_all.php <options> 132 133 Update Solr index for all pages.. 134 135 OPTIONS 136 -h, --help show this help and exit 137 -q, --quiet don't produce any output 138 -p, --progress show progress 139 -d, --delete Delete all pages form index before updating 140"; 141} 142 143 144