xref: /plugin/sphinxsearch-was/SphinxSearch.php (revision 135:a06e60d2438c)
1<?php
2/*
3 * To change this template, choose Tools | Templates
4 * and open the template in the editor.
5 */
6
7class SphinxSearch
8{
9    private $_sphinx = null;
10    private $_result = array();
11    private $_index = null;
12    private $_query = '';
13
14    private $_snippetSize = 256;
15    private $_aroundKeyword = 5;
16    private $_resultsPerPage = 10;
17
18    private $_titlePriority = 1;
19    private $_bodyPriority = 1;
20    private $_namespacePriority = 1;
21    private $_pagenamePriority = 1;
22
23    public function  __construct($host, $port, $index)
24    {
25        $this->_sphinx = new SphinxClient();
26        $this->_sphinx->SetServer($host, $port);
27        $this->_sphinx->SetMatchMode(SPH_MATCH_EXTENDED2);
28
29        $this->_index = $index;
30    }
31
32    public function setSearchAllQuery($keywords, $categories)
33    {
34        $keywords = $this->_sphinx->EscapeString($keywords);
35        $keywords = $this->_enableQuotesAndDefis($keywords);
36        $starKeyword = $this->starQuery($keywords);
37        $this->_query = "(@(namespace,pagename) $starKeyword) | (@(body,title) {$keywords})";
38    }
39
40    public function setSearchAllQueryWithCategoryFilter($keywords, $categories)
41    {
42        $keywords = $this->_sphinx->EscapeString($keywords);
43        $keywords = $this->_enableQuotesAndDefis($keywords);
44        $starKeyword = $this->starQuery($keywords);
45        if (strpos($categories, "-") === 0) {
46            $categories = '-"' . substr($categories, 1) . '"';
47        }
48        $this->_query = "(@(namespace,pagename) {$categories}) & ((@(body,title) {$keywords}) | (@(namespace,pagename) {$starKeyword}))";
49    }
50
51    public function setSearchCategoryQuery($keywords, $categories)
52    {
53        $keywords = $this->_sphinx->EscapeString($keywords);
54        $keywords = $this->_enableQuotesAndDefis($keywords);
55
56        $starKeyword = $this->starQuery($keywords);
57        if (!empty($categories)) {
58            $this->_query = "(@(namespace,pagename) $categories $starKeyword)";
59        } else {
60            $this->_query = "(@(namespace,pagename) $starKeyword)";
61        }
62    }
63
64    public function setSearchOnlyPagename()
65    {
66        $this->_query = "(@(pagename) {$this->_query})";
67    }
68
69    public function search($start, $resultsPerPage = 10)
70    {
71        $this->_resultsPerPage = $resultsPerPage;
72
73        $this->_sphinx->SetFieldWeights(
74            array(
75                'namespace' => $this->_namespacePriority,
76                'pagename' => $this->_pagenamePriority,
77                'title' => $this->_titlePriority,
78                'body' => $this->_bodyPriority
79            )
80        );
81
82        $this->_sphinx->SetLimits($start, $resultsPerPage + 100, 1000);
83
84        $this->_result = $this->_sphinx->Query($this->_query, $this->_index);
85
86        if (empty($this->_result['matches'])) {
87            return false;
88        }
89        return true;
90    }
91
92    public function getPages($keywords)
93    {
94        if (empty($this->_result['matches'])) {
95            return false;
96        }
97
98        $pagesIdsAll = $this->getPagesIds();
99        $this->_offset = 0;
100        $counter = 0;
101        $tmpRes = array();
102        $pagesIds = array();
103        foreach ($pagesIdsAll as $id => $pageData) {
104            $this->_offset++;
105            if (auth_quickaclcheck($pageData['page']) >= AUTH_READ) {
106                if (!isset($tmpRes[$pageData['page']])) {
107                    $tmpRes[$pageData['page']] = 1;
108                    $counter++;
109                }
110                $pagesIds[$id] = $pageData;
111                if ($counter == $this->_resultsPerPage) {
112                    break;
113                }
114            } else {
115                // decrease total found counter for the first page if the page is filtered
116                $this->_result['total_found']--;
117            }
118        }
119        if (empty($pagesIds)) {
120            return false;
121        }
122
123        $pagesList = array();
124        $body = array();
125        $titleText = array();
126        $category = array();
127        foreach ($pagesIds as $crc => $data) {
128            if (empty($data['page'])) {
129                continue;
130            }
131            if (!empty($data['hid'])) {
132                $bodyHtml = p_render('xhtml', p_get_instructions(getSectionByTitleLevel($data['page'], $data['title'], true)), $info);
133            } else {
134                $bodyHtml = p_wiki_xhtml($data['page']);
135            }
136            $bodyHtml = preg_replace("#[\s]+?</li>#", "</li>;", $bodyHtml);
137            $bodyHtml = htmlspecialchars_decode($bodyHtml);
138            $body[$crc] = strip_tags($bodyHtml);
139            if (!empty($data['title_text'])) {
140                $titleText[$crc] = strip_tags($data['title_text']);
141            } else {
142                $titleText[$crc] = $data['page'];
143            }
144            $category[$crc] = $data['page'];
145        }
146
147        //$starQuery = $this->starQuery($keywords);
148        $bodyExcerpt = $this->getExcerpt($body, $keywords);
149        $titleTextExcerpt = $this->getExcerpt($titleText, $keywords);
150        $i = 0;
151        $results = array();
152        foreach ($body as $crc => $notused) {
153            $results[$crc] = array(
154                'page' => $pagesIds[$crc]['page'],
155                'bodyExcerpt' => $bodyExcerpt[$i],
156                'titleTextExcerpt' => $titleTextExcerpt[$i],
157                'hid' => $pagesIds[$crc]['hid'],
158                'title' => $pagesIds[$crc]['title'],
159                'title_text' => $pagesIds[$crc]['title_text']
160            );
161            $i++;
162        }
163        return $results;
164    }
165
166    public function getPagesIds()
167    {
168        $pageMapper = new PageMapper();
169
170        return $pageMapper->getByCrc(array_keys($this->_result['matches']));
171    }
172
173    public function getOffset()
174    {
175        return $this->_offset;
176    }
177
178    public function getPageNames()
179    {
180        $pageIds = $this->getPagesIds();
181
182        $matchPages = array();
183        foreach ($pageIds as $page) {
184            if (auth_quickaclcheck($page['page']) < AUTH_READ) {
185                continue;
186            }
187            $matchPages[$page['page']] = $page['hid'];
188        }
189
190        return $matchPages;
191    }
192
193    public function getError()
194    {
195        return $this->_sphinx->GetLastError();
196    }
197
198    public function getTotalFound()
199    {
200        return !empty($this->_result['total_found']) ? $this->_result['total_found'] : 0;
201    }
202
203    public function getExcerpt($data, $query)
204    {
205        return $this->_sphinx->BuildExcerpts(
206            $data,
207            $this->_index,
208            $query,
209            array(
210                'limit' => $this->_snippetSize,
211                'around' => $this->_aroundKeyword,
212                'weight_order' => 1,
213                'sp' => 1
214            )
215        );
216    }
217
218    public function starQuery($query)
219    {
220        $query = $this->removeStars($query);
221        $words = explode(" ", $query);
222        foreach ($words as $id => $word) {
223            $words[$id] = "*" . $word . "*";
224        }
225        return implode(" ", $words);
226    }
227
228    public function removeStars($query)
229    {
230        $words = explode(" ", $query);
231        foreach ($words as $id => $word) {
232            $words[$id] = trim($word, "*");
233        }
234        return implode(" ", $words);
235    }
236
237    public function getQuery()
238    {
239        return $this->_query;
240    }
241
242    public function setSnippetSize($symbols = 256)
243    {
244        $this->_snippetSize = $symbols;
245    }
246
247    public function setArroundWordsCount($words = 5)
248    {
249        $this->_aroundKeyword = $words;
250    }
251
252    public function setTitlePriority($priority)
253    {
254        $this->_titlePriority = $priority;
255    }
256
257    public function setBodyPriority($priority)
258    {
259        $this->_bodyPriority = $priority;
260    }
261
262    public function setNamespacePriority($priority)
263    {
264        $this->_namespacePriority = $priority;
265    }
266
267    public function setPagenamePriority($priority)
268    {
269        $this->_pagenamePriority = $priority;
270    }
271
272    private function _enableQuotesAndDefis($query)
273    {
274        $query = ' ' . $query;
275        $quotesCount = count(explode('"', $query)) - 1;
276        if ($quotesCount && $quotesCount % 2 == 0) {
277            $query = str_replace('\"', '"', $query);
278        }
279        $query = preg_replace("#\s\\\-(\w)#ui", " -$1", $query);
280
281        $query = substr($query, 1);
282
283        return $query;
284    }
285}
286