xref: /plugin/sphinxsearch-was/SphinxSearch.php (revision 135:a06e60d2438c)
10Syaroslav@ivinco.com<?php
2124Syaroslav@ivinco.com/*
30Syaroslav@ivinco.com * To change this template, choose Tools | Templates
40Syaroslav@ivinco.com * and open the template in the editor.
50Syaroslav@ivinco.com */
60Syaroslav@ivinco.com
70Syaroslav@ivinco.comclass SphinxSearch
80Syaroslav@ivinco.com{
90Syaroslav@ivinco.com    private $_sphinx = null;
100Syaroslav@ivinco.com    private $_result = array();
110Syaroslav@ivinco.com    private $_index = null;
1215Syaroslav@ivinco.com    private $_query = '';
1319Syaroslav@ivinco.com
1416Syaroslav@ivinco.com    private $_snippetSize = 256;
1516Syaroslav@ivinco.com    private $_aroundKeyword = 5;
1669Syaroslav@ivinco.com    private $_resultsPerPage = 10;
1719Syaroslav@ivinco.com
1880Syaroslav@ivinco.com    private $_titlePriority = 1;
1980Syaroslav@ivinco.com    private $_bodyPriority = 1;
2080Syaroslav@ivinco.com    private $_namespacePriority = 1;
2180Syaroslav@ivinco.com    private $_pagenamePriority = 1;
22124Syaroslav@ivinco.com
230Syaroslav@ivinco.com    public function  __construct($host, $port, $index)
240Syaroslav@ivinco.com    {
250Syaroslav@ivinco.com        $this->_sphinx = new SphinxClient();
260Syaroslav@ivinco.com        $this->_sphinx->SetServer($host, $port);
27124Syaroslav@ivinco.com        $this->_sphinx->SetMatchMode(SPH_MATCH_EXTENDED2);
280Syaroslav@ivinco.com
290Syaroslav@ivinco.com        $this->_index = $index;
300Syaroslav@ivinco.com    }
310Syaroslav@ivinco.com
3269Syaroslav@ivinco.com    public function setSearchAllQuery($keywords, $categories)
3369Syaroslav@ivinco.com    {
3497Syaroslav@ivinco.com        $keywords = $this->_sphinx->EscapeString($keywords);
35124Syaroslav@ivinco.com        $keywords = $this->_enableQuotesAndDefis($keywords);
3670Syaroslav@ivinco.com        $starKeyword = $this->starQuery($keywords);
3780Syaroslav@ivinco.com        $this->_query = "(@(namespace,pagename) $starKeyword) | (@(body,title) {$keywords})";
3869Syaroslav@ivinco.com    }
3969Syaroslav@ivinco.com
4069Syaroslav@ivinco.com    public function setSearchAllQueryWithCategoryFilter($keywords, $categories)
4119Syaroslav@ivinco.com    {
4297Syaroslav@ivinco.com        $keywords = $this->_sphinx->EscapeString($keywords);
43124Syaroslav@ivinco.com        $keywords = $this->_enableQuotesAndDefis($keywords);
4469Syaroslav@ivinco.com        $starKeyword = $this->starQuery($keywords);
4569Syaroslav@ivinco.com        if (strpos($categories, "-") === 0) {
4669Syaroslav@ivinco.com            $categories = '-"' . substr($categories, 1) . '"';
4769Syaroslav@ivinco.com        }
4880Syaroslav@ivinco.com        $this->_query = "(@(namespace,pagename) {$categories}) & ((@(body,title) {$keywords}) | (@(namespace,pagename) {$starKeyword}))";
4969Syaroslav@ivinco.com    }
5069Syaroslav@ivinco.com
5170Syaroslav@ivinco.com    public function setSearchCategoryQuery($keywords, $categories)
5269Syaroslav@ivinco.com    {
5397Syaroslav@ivinco.com        $keywords = $this->_sphinx->EscapeString($keywords);
54124Syaroslav@ivinco.com        $keywords = $this->_enableQuotesAndDefis($keywords);
55124Syaroslav@ivinco.com
5670Syaroslav@ivinco.com        $starKeyword = $this->starQuery($keywords);
5772Syaroslav@ivinco.com        if (!empty($categories)) {
5880Syaroslav@ivinco.com            $this->_query = "(@(namespace,pagename) $categories $starKeyword)";
5972Syaroslav@ivinco.com        } else {
6080Syaroslav@ivinco.com            $this->_query = "(@(namespace,pagename) $starKeyword)";
6172Syaroslav@ivinco.com        }
62124Syaroslav@ivinco.com    }
6369Syaroslav@ivinco.com
64124Syaroslav@ivinco.com    public function setSearchOnlyPagename()
65124Syaroslav@ivinco.com    {
66124Syaroslav@ivinco.com        $this->_query = "(@(pagename) {$this->_query})";
67124Syaroslav@ivinco.com    }
68124Syaroslav@ivinco.com
69124Syaroslav@ivinco.com    public function search($start, $resultsPerPage = 10)
7069Syaroslav@ivinco.com    {
7169Syaroslav@ivinco.com        $this->_resultsPerPage = $resultsPerPage;
7269Syaroslav@ivinco.com
7369Syaroslav@ivinco.com        $this->_sphinx->SetFieldWeights(
7480Syaroslav@ivinco.com            array(
7580Syaroslav@ivinco.com                'namespace' => $this->_namespacePriority,
7680Syaroslav@ivinco.com                'pagename' => $this->_pagenamePriority,
7780Syaroslav@ivinco.com                'title' => $this->_titlePriority,
7880Syaroslav@ivinco.com                'body' => $this->_bodyPriority
7980Syaroslav@ivinco.com            )
8019Syaroslav@ivinco.com        );
81133Sandrey
8269Syaroslav@ivinco.com        $this->_sphinx->SetLimits($start, $resultsPerPage + 100, 1000);
83133Sandrey
84119Syaroslav        $this->_result = $this->_sphinx->Query($this->_query, $this->_index);
85133Sandrey
860Syaroslav@ivinco.com        if (empty($this->_result['matches'])) {
87133Sandrey            return false;
8869Syaroslav@ivinco.com        }
8969Syaroslav@ivinco.com        return true;
900Syaroslav@ivinco.com    }
9177Syaroslav@ivinco.com
9269Syaroslav@ivinco.com    public function getPages($keywords)
9392Syaroslav@ivinco.com    {
9492Syaroslav@ivinco.com        if (empty($this->_result['matches'])) {
95133Sandrey            return false;
96119Syaroslav        }
9769Syaroslav@ivinco.com
9839Syaroslav@ivinco.com        $pagesIdsAll = $this->getPagesIds();
9939Syaroslav@ivinco.com        $this->_offset = 0;
10043Syaroslav@ivinco.com        $counter = 0;
10143Syaroslav@ivinco.com        $tmpRes = array();
10239Syaroslav@ivinco.com        $pagesIds = array();
10339Syaroslav@ivinco.com        foreach ($pagesIdsAll as $id => $pageData) {
10439Syaroslav@ivinco.com            $this->_offset++;
10543Syaroslav@ivinco.com            if (auth_quickaclcheck($pageData['page']) >= AUTH_READ) {
10643Syaroslav@ivinco.com                if (!isset($tmpRes[$pageData['page']])) {
10743Syaroslav@ivinco.com                    $tmpRes[$pageData['page']] = 1;
108124Syaroslav@ivinco.com                    $counter++;
10939Syaroslav@ivinco.com                }
11069Syaroslav@ivinco.com                $pagesIds[$id] = $pageData;
11139Syaroslav@ivinco.com                if ($counter == $this->_resultsPerPage) {
11239Syaroslav@ivinco.com                    break;
113*135Sandrey                }
114*135Sandrey            } else {
115*135Sandrey                // decrease total found counter for the first page if the page is filtered
116124Syaroslav@ivinco.com                $this->_result['total_found']--;
117124Syaroslav@ivinco.com            }
118124Syaroslav@ivinco.com        }
11927Syaroslav@ivinco.com        if (empty($pagesIds)) {
12027Syaroslav@ivinco.com            return false;
1210Syaroslav@ivinco.com        }
1220Syaroslav@ivinco.com
12310Syaroslav@ivinco.com        $pagesList = array();
12419Syaroslav@ivinco.com        $body = array();
12510Syaroslav@ivinco.com        $titleText = array();
12627Syaroslav@ivinco.com        $category = array();
127124Syaroslav@ivinco.com        foreach ($pagesIds as $crc => $data) {
128124Syaroslav@ivinco.com            if (empty($data['page'])) {
129124Syaroslav@ivinco.com                continue;
13027Syaroslav@ivinco.com            }
13127Syaroslav@ivinco.com            if (!empty($data['hid'])) {
1326Syaroslav@ivinco.com                $bodyHtml = p_render('xhtml', p_get_instructions(getSectionByTitleLevel($data['page'], $data['title'], true)), $info);
13327Syaroslav@ivinco.com            } else {
1348Syaroslav@ivinco.com                $bodyHtml = p_wiki_xhtml($data['page']);
13560Syaroslav@ivinco.com            }
13657Syaroslav@ivinco.com            $bodyHtml = preg_replace("#[\s]+?</li>#", "</li>;", $bodyHtml);
13710Syaroslav@ivinco.com            $bodyHtml = htmlspecialchars_decode($bodyHtml);
13879Syaroslav@ivinco.com            $body[$crc] = strip_tags($bodyHtml);
13979Syaroslav@ivinco.com            if (!empty($data['title_text'])) {
14079Syaroslav@ivinco.com                $titleText[$crc] = strip_tags($data['title_text']);
14179Syaroslav@ivinco.com            } else {
14279Syaroslav@ivinco.com                $titleText[$crc] = $data['page'];
14327Syaroslav@ivinco.com            }
14457Syaroslav@ivinco.com            $category[$crc] = $data['page'];
145134Sandrey        }
14680Syaroslav@ivinco.com
14780Syaroslav@ivinco.com        //$starQuery = $this->starQuery($keywords);
14880Syaroslav@ivinco.com        $bodyExcerpt = $this->getExcerpt($body, $keywords);
1490Syaroslav@ivinco.com        $titleTextExcerpt = $this->getExcerpt($titleText, $keywords);
1504Syaroslav@ivinco.com        $i = 0;
15110Syaroslav@ivinco.com        $results = array();
15210Syaroslav@ivinco.com        foreach ($body as $crc => $notused) {
15310Syaroslav@ivinco.com            $results[$crc] = array(
15410Syaroslav@ivinco.com                'page' => $pagesIds[$crc]['page'],
15519Syaroslav@ivinco.com                'bodyExcerpt' => $bodyExcerpt[$i],
15610Syaroslav@ivinco.com                'titleTextExcerpt' => $titleTextExcerpt[$i],
15719Syaroslav@ivinco.com                'hid' => $pagesIds[$crc]['hid'],
15819Syaroslav@ivinco.com                'title' => $pagesIds[$crc]['title'],
15910Syaroslav@ivinco.com                'title_text' => $pagesIds[$crc]['title_text']
16010Syaroslav@ivinco.com            );
1610Syaroslav@ivinco.com            $i++;
1624Syaroslav@ivinco.com        }
1630Syaroslav@ivinco.com        return $results;
1640Syaroslav@ivinco.com    }
16569Syaroslav@ivinco.com
16669Syaroslav@ivinco.com    public function getPagesIds()
16769Syaroslav@ivinco.com    {
16869Syaroslav@ivinco.com        $pageMapper = new PageMapper();
16969Syaroslav@ivinco.com
17069Syaroslav@ivinco.com        return $pageMapper->getByCrc(array_keys($this->_result['matches']));
17169Syaroslav@ivinco.com    }
17239Syaroslav@ivinco.com
17339Syaroslav@ivinco.com    public function getOffset()
17439Syaroslav@ivinco.com    {
17539Syaroslav@ivinco.com        return $this->_offset;
17639Syaroslav@ivinco.com    }
177134Sandrey
178134Sandrey    public function getPageNames()
179134Sandrey    {
180134Sandrey        $pageIds = $this->getPagesIds();
181134Sandrey
182134Sandrey        $matchPages = array();
183*135Sandrey        foreach ($pageIds as $page) {
184134Sandrey            if (auth_quickaclcheck($page['page']) < AUTH_READ) {
185134Sandrey                continue;
186134Sandrey            }
187134Sandrey            $matchPages[$page['page']] = $page['hid'];
188134Sandrey        }
189134Sandrey
190134Sandrey        return $matchPages;
191134Sandrey    }
19228Syaroslav@ivinco.com
19328Syaroslav@ivinco.com    public function getError()
19428Syaroslav@ivinco.com    {
19528Syaroslav@ivinco.com        return $this->_sphinx->GetLastError();
19628Syaroslav@ivinco.com    }
1970Syaroslav@ivinco.com
1980Syaroslav@ivinco.com    public function getTotalFound()
1990Syaroslav@ivinco.com    {
2000Syaroslav@ivinco.com        return !empty($this->_result['total_found']) ? $this->_result['total_found'] : 0;
20110Syaroslav@ivinco.com    }
20210Syaroslav@ivinco.com
20310Syaroslav@ivinco.com    public function getExcerpt($data, $query)
20416Syaroslav@ivinco.com    {
20516Syaroslav@ivinco.com        return $this->_sphinx->BuildExcerpts(
20616Syaroslav@ivinco.com            $data,
20739Syaroslav@ivinco.com            $this->_index,
20839Syaroslav@ivinco.com            $query,
20939Syaroslav@ivinco.com            array(
21016Syaroslav@ivinco.com                'limit' => $this->_snippetSize,
21116Syaroslav@ivinco.com                'around' => $this->_aroundKeyword,
21210Syaroslav@ivinco.com                'weight_order' => 1,
21310Syaroslav@ivinco.com                'sp' => 1
21410Syaroslav@ivinco.com            )
21510Syaroslav@ivinco.com        );
21661Syaroslav@ivinco.com    }
21710Syaroslav@ivinco.com
21861Syaroslav@ivinco.com    public function starQuery($query)
21961Syaroslav@ivinco.com    {
22010Syaroslav@ivinco.com        $query = $this->removeStars($query);
22161Syaroslav@ivinco.com        $words = explode(" ", $query);
22261Syaroslav@ivinco.com        foreach ($words as $id => $word) {
22361Syaroslav@ivinco.com            $words[$id] = "*" . $word . "*";
22461Syaroslav@ivinco.com        }
22561Syaroslav@ivinco.com        return implode(" ", $words);
22661Syaroslav@ivinco.com    }
22761Syaroslav@ivinco.com
22861Syaroslav@ivinco.com    public function removeStars($query)
22961Syaroslav@ivinco.com    {
23061Syaroslav@ivinco.com        $words = explode(" ", $query);
23110Syaroslav@ivinco.com        foreach ($words as $id => $word) {
23215Syaroslav@ivinco.com            $words[$id] = trim($word, "*");
23315Syaroslav@ivinco.com        }
23415Syaroslav@ivinco.com        return implode(" ", $words);
23515Syaroslav@ivinco.com    }
23615Syaroslav@ivinco.com
23716Syaroslav@ivinco.com    public function getQuery()
23816Syaroslav@ivinco.com    {
23916Syaroslav@ivinco.com        return $this->_query;
24016Syaroslav@ivinco.com    }
24116Syaroslav@ivinco.com
24216Syaroslav@ivinco.com    public function setSnippetSize($symbols = 256)
24316Syaroslav@ivinco.com    {
24416Syaroslav@ivinco.com        $this->_snippetSize = $symbols;
24516Syaroslav@ivinco.com    }
24616Syaroslav@ivinco.com
24719Syaroslav@ivinco.com    public function setArroundWordsCount($words = 5)
24819Syaroslav@ivinco.com    {
24919Syaroslav@ivinco.com        $this->_aroundKeyword = $words;
25019Syaroslav@ivinco.com    }
25119Syaroslav@ivinco.com
25219Syaroslav@ivinco.com    public function setTitlePriority($priority)
25319Syaroslav@ivinco.com    {
25419Syaroslav@ivinco.com        $this->_titlePriority = $priority;
25519Syaroslav@ivinco.com    }
25619Syaroslav@ivinco.com
25719Syaroslav@ivinco.com    public function setBodyPriority($priority)
25880Syaroslav@ivinco.com    {
25919Syaroslav@ivinco.com        $this->_bodyPriority = $priority;
26080Syaroslav@ivinco.com    }
26180Syaroslav@ivinco.com
26280Syaroslav@ivinco.com    public function setNamespacePriority($priority)
26380Syaroslav@ivinco.com    {
26480Syaroslav@ivinco.com        $this->_namespacePriority = $priority;
26580Syaroslav@ivinco.com    }
26619Syaroslav@ivinco.com
267124Syaroslav@ivinco.com    public function setPagenamePriority($priority)
268124Syaroslav@ivinco.com    {
269124Syaroslav@ivinco.com        $this->_pagenamePriority = $priority;
270124Syaroslav@ivinco.com    }
271124Syaroslav@ivinco.com
272124Syaroslav@ivinco.com    private function _enableQuotesAndDefis($query)
273124Syaroslav@ivinco.com    {
274124Syaroslav@ivinco.com        $query = ' ' . $query;
275124Syaroslav@ivinco.com        $quotesCount = count(explode('"', $query)) - 1;
276124Syaroslav@ivinco.com        if ($quotesCount && $quotesCount % 2 == 0) {
277124Syaroslav@ivinco.com            $query = str_replace('\"', '"', $query);
278124Syaroslav@ivinco.com        }
279124Syaroslav@ivinco.com        $query = preg_replace("#\s\\\-(\w)#ui", " -$1", $query);
280124Syaroslav@ivinco.com
2810Syaroslav@ivinco.com        $query = substr($query, 1);
282
283        return $query;
284    }
285}
286