1<?php
2/**
3 * This file is meant to be run from the command line and indexes all pages
4 *
5 * @package    solr
6 * @author     Gabriel Birke <birke@d-scribe.de>
7 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
8 */
9
10// Import DokuWiki constants from environment. This for example allows multiple
11// DokuWiki installations with symlinks.
12$constants = array( 'DOKU_INC', 'DOKU_PLUGIN', 'DOKU_CONF', 'DOKU_E_LEVEL',
13	'DOKU_REL', 'DOKU_URL', 'DOKU_BASE', 'DOKU_BASE', 'DOKU_LF', 'DOKU_TAB',
14	'DOKU_COOKIE', 'DOKU_SCRIPT', 'DOKU_TPL', 'DOKU_TPLINC'
15);
16foreach($constants as $const) {
17    if(!defined($const)) {
18        $env_var = getenv($const);
19        if($env_var !== false) {
20            define($const, $env_var);
21        }
22    }
23}
24$ini_path = defined('DOKU_INC') ? DOKU_INC : realpath(dirname(__FILE__).'/../../../').'/';
25
26require_once($ini_path.'inc/init.php');
27require_once(DOKU_INC.'inc/common.php');
28require_once(DOKU_INC.'inc/search.php');
29require_once(DOKU_INC.'inc/pageutils.php');
30require_once DOKU_INC.'inc/cliopts.php';
31require_once(dirname(__FILE__).'/AddDocument.php');
32require_once(dirname(__FILE__).'/Pageinfo.php');
33
34// TODO: Add option for deleting index before adding
35// handle options
36$short_opts = 'hqpd';
37$long_opts  = array('help', 'quiet', 'progress', 'delete');
38$OPTS = Doku_Cli_Opts::getOptions(__FILE__,$short_opts,$long_opts);
39if ( $OPTS->isError() ) {
40    fwrite( STDERR, $OPTS->getMessage() . "\n");
41    _usage();
42    exit(1);
43}
44
45$solr = plugin_load("helper", "solr");
46
47$QUIET = false;
48$PROGRESS = false;
49foreach ($OPTS->options as $key => $val) {
50    switch ($key) {
51        case 'd':
52        case 'delete':
53            $solr->solr_query('update', "stream.body=".urlencode('<delete><query>*:*</query></delete>')."&commit=true");
54            break;
55        case 'h':
56        case 'help':
57            _usage();
58            exit;
59        case 'q':
60        case 'quiet':
61          $QUIET = true;
62          break;
63        case 'p':
64        case 'progress':
65         $PROGESS = true;
66         break;
67    }
68}
69
70
71/**
72 * Commit with n milliseconds
73 */
74define('COMMIT_WITHIN', 10000);
75
76$data = array(
77    'global_count' => 0,
78    'errors' => array()
79);
80$opts = array();
81$start = microtime(true);
82search($data, $conf['datadir'], 'search_solr_index', $opts, '');
83
84if(!$QUIET) {
85    printf("\nImported %d pages in %0.3f seconds\n", $data['global_count'], microtime(true)-$start);
86    if(!empty($data['errors'])) {
87        echo "\nThe following pages encountered an error while importing:\n";
88        foreach($data['errors'] as $err) {
89            echo "\n{$err['id']}";
90        }
91    }
92    echo "\n";
93}
94
95function search_solr_index(&$data,$base,$file,$type,$lvl,$opts) {
96    global $QUIET, $PROGRESS, $solr;
97    if($type=='f')
98    {
99        // Import each file individually to detect errors and minimize unimported docs
100        $id = pathID($file);
101        $info = new Solr_Pageinfo($id);
102        $writer = new XmlWriter();
103        $writer->openMemory();
104        $doc = new Solr_AddDocument($writer);
105        $doc->start(COMMIT_WITHIN);
106        $doc->addPage($info->getFields());
107        $doc->end();
108        $xmldoc = $writer->outputMemory();
109        $result = $solr->solr_query('update', '', 'POST', $xmldoc);
110        $xml = simplexml_load_string($result);
111        // Check response
112        if($xml->getName() != "response") {
113            $data['errors'][] = array('id' => $id, 'result' => $result);
114            if(!$QUIET) {
115                /*
116                echo $result;
117                echo $xmldoc;
118                */
119            }
120        }
121        $data['global_count']++;
122        // Show progress dots every 100 pages
123        if(!($data['global_count'] % 100)) {
124            echo ".";
125        }
126    }
127    return true;
128}
129
130function _usage() {
131  print "Usage: index_all.php <options>
132
133  Update Solr index for all pages..
134
135    OPTIONS
136        -h, --help     show this help and exit
137        -q, --quiet    don't produce any output
138        -p, --progress show progress
139        -d, --delete   Delete all pages form index before updating
140";
141}
142
143
144