1<?php 2 3// 4// $Id: sphinxapi.php 2055 2009-11-06 23:09:58Z shodan $ 5// 6 7// 8// Copyright (c) 2001-2008, Andrew Aksyonoff. All rights reserved. 9// 10// This program is free software; you can redistribute it and/or modify 11// it under the terms of the GNU General Public License. You should have 12// received a copy of the GPL license along with this program; if you 13// did not, you can find it at http://www.gnu.org/ 14// 15 16///////////////////////////////////////////////////////////////////////////// 17// PHP version of Sphinx searchd client (PHP API) 18///////////////////////////////////////////////////////////////////////////// 19 20/// known searchd commands 21define ( "SEARCHD_COMMAND_SEARCH", 0 ); 22define ( "SEARCHD_COMMAND_EXCERPT", 1 ); 23define ( "SEARCHD_COMMAND_UPDATE", 2 ); 24define ( "SEARCHD_COMMAND_KEYWORDS",3 ); 25define ( "SEARCHD_COMMAND_PERSIST", 4 ); 26define ( "SEARCHD_COMMAND_STATUS", 5 ); 27define ( "SEARCHD_COMMAND_QUERY", 6 ); 28 29/// current client-side command implementation versions 30define ( "VER_COMMAND_SEARCH", 0x116 ); 31define ( "VER_COMMAND_EXCERPT", 0x100 ); 32define ( "VER_COMMAND_UPDATE", 0x102 ); 33define ( "VER_COMMAND_KEYWORDS", 0x100 ); 34define ( "VER_COMMAND_STATUS", 0x100 ); 35define ( "VER_COMMAND_QUERY", 0x100 ); 36 37/// known searchd status codes 38define ( "SEARCHD_OK", 0 ); 39define ( "SEARCHD_ERROR", 1 ); 40define ( "SEARCHD_RETRY", 2 ); 41define ( "SEARCHD_WARNING", 3 ); 42 43/// known match modes 44define ( "SPH_MATCH_ALL", 0 ); 45define ( "SPH_MATCH_ANY", 1 ); 46define ( "SPH_MATCH_PHRASE", 2 ); 47define ( "SPH_MATCH_BOOLEAN", 3 ); 48define ( "SPH_MATCH_EXTENDED", 4 ); 49define ( "SPH_MATCH_FULLSCAN", 5 ); 50define ( "SPH_MATCH_EXTENDED2", 6 ); // extended engine V2 (TEMPORARY, WILL BE REMOVED) 51 52/// known ranking modes (ext2 only) 53define ( "SPH_RANK_PROXIMITY_BM25", 0 ); ///< default mode, phrase proximity major factor and BM25 minor one 54define ( "SPH_RANK_BM25", 1 ); ///< statistical mode, BM25 ranking only (faster but worse quality) 55define ( "SPH_RANK_NONE", 2 ); ///< no ranking, all matches get a weight of 1 56define ( "SPH_RANK_WORDCOUNT", 3 ); ///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts 57define ( "SPH_RANK_PROXIMITY", 4 ); 58define ( "SPH_RANK_MATCHANY", 5 ); 59define ( "SPH_RANK_FIELDMASK", 6 ); 60 61/// known sort modes 62define ( "SPH_SORT_RELEVANCE", 0 ); 63define ( "SPH_SORT_ATTR_DESC", 1 ); 64define ( "SPH_SORT_ATTR_ASC", 2 ); 65define ( "SPH_SORT_TIME_SEGMENTS", 3 ); 66define ( "SPH_SORT_EXTENDED", 4 ); 67define ( "SPH_SORT_EXPR", 5 ); 68 69/// known filter types 70define ( "SPH_FILTER_VALUES", 0 ); 71define ( "SPH_FILTER_RANGE", 1 ); 72define ( "SPH_FILTER_FLOATRANGE", 2 ); 73 74/// known attribute types 75define ( "SPH_ATTR_INTEGER", 1 ); 76define ( "SPH_ATTR_TIMESTAMP", 2 ); 77define ( "SPH_ATTR_ORDINAL", 3 ); 78define ( "SPH_ATTR_BOOL", 4 ); 79define ( "SPH_ATTR_FLOAT", 5 ); 80define ( "SPH_ATTR_BIGINT", 6 ); 81define ( "SPH_ATTR_MULTI", 0x40000000 ); 82 83/// known grouping functions 84define ( "SPH_GROUPBY_DAY", 0 ); 85define ( "SPH_GROUPBY_WEEK", 1 ); 86define ( "SPH_GROUPBY_MONTH", 2 ); 87define ( "SPH_GROUPBY_YEAR", 3 ); 88define ( "SPH_GROUPBY_ATTR", 4 ); 89define ( "SPH_GROUPBY_ATTRPAIR", 5 ); 90 91// important properties of PHP's integers: 92// - always signed (one bit short of PHP_INT_SIZE) 93// - conversion from string to int is saturated 94// - float is double 95// - div converts arguments to floats 96// - mod converts arguments to ints 97 98// the packing code below works as follows: 99// - when we got an int, just pack it 100// if performance is a problem, this is the branch users should aim for 101// 102// - otherwise, we got a number in string form 103// this might be due to different reasons, but we assume that this is 104// because it didn't fit into PHP int 105// 106// - factor the string into high and low ints for packing 107// - if we have bcmath, then it is used 108// - if we don't, we have to do it manually (this is the fun part) 109// 110// - x64 branch does factoring using ints 111// - x32 (ab)uses floats, since we can't fit unsigned 32-bit number into an int 112// 113// unpacking routines are pretty much the same. 114// - return ints if we can 115// - otherwise format number into a string 116 117/// pack 64-bit signed 118function sphPackI64 ( $v ) 119{ 120 assert ( is_numeric($v) ); 121 122 // x64 123 if ( PHP_INT_SIZE>=8 ) 124 { 125 $v = (int)$v; 126 return pack ( "NN", $v>>32, $v&0xFFFFFFFF ); 127 } 128 129 // x32, int 130 if ( is_int($v) ) 131 return pack ( "NN", $v < 0 ? -1 : 0, $v ); 132 133 // x32, bcmath 134 if ( function_exists("bcmul") ) 135 { 136 if ( bccomp ( $v, 0 ) == -1 ) 137 $v = bcadd ( "18446744073709551616", $v ); 138 $h = bcdiv ( $v, "4294967296", 0 ); 139 $l = bcmod ( $v, "4294967296" ); 140 return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit 141 } 142 143 // x32, no-bcmath 144 $p = max(0, strlen($v) - 13); 145 $lo = abs((float)substr($v, $p)); 146 $hi = abs((float)substr($v, 0, $p)); 147 148 $m = $lo + $hi*1316134912.0; // (10 ^ 13) % (1 << 32) = 1316134912 149 $q = floor($m/4294967296.0); 150 $l = $m - ($q*4294967296.0); 151 $h = $hi*2328.0 + $q; // (10 ^ 13) / (1 << 32) = 2328 152 153 if ( $v<0 ) 154 { 155 if ( $l==0 ) 156 $h = 4294967296.0 - $h; 157 else 158 { 159 $h = 4294967295.0 - $h; 160 $l = 4294967296.0 - $l; 161 } 162 } 163 return pack ( "NN", $h, $l ); 164} 165 166/// pack 64-bit unsigned 167function sphPackU64 ( $v ) 168{ 169 assert ( is_numeric($v) ); 170 171 // x64 172 if ( PHP_INT_SIZE>=8 ) 173 { 174 assert ( $v>=0 ); 175 176 // x64, int 177 if ( is_int($v) ) 178 return pack ( "NN", $v>>32, $v&0xFFFFFFFF ); 179 180 // x64, bcmath 181 if ( function_exists("bcmul") ) 182 { 183 $h = bcdiv ( $v, 4294967296, 0 ); 184 $l = bcmod ( $v, 4294967296 ); 185 return pack ( "NN", $h, $l ); 186 } 187 188 // x64, no-bcmath 189 $p = max ( 0, strlen($v) - 13 ); 190 $lo = (int)substr ( $v, $p ); 191 $hi = (int)substr ( $v, 0, $p ); 192 193 $m = $lo + $hi*1316134912; 194 $l = $m % 4294967296; 195 $h = $hi*2328 + (int)($m/4294967296); 196 197 return pack ( "NN", $h, $l ); 198 } 199 200 // x32, int 201 if ( is_int($v) ) 202 return pack ( "NN", 0, $v ); 203 204 // x32, bcmath 205 if ( function_exists("bcmul") ) 206 { 207 $h = bcdiv ( $v, "4294967296", 0 ); 208 $l = bcmod ( $v, "4294967296" ); 209 return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit 210 } 211 212 // x32, no-bcmath 213 $p = max(0, strlen($v) - 13); 214 $lo = (float)substr($v, $p); 215 $hi = (float)substr($v, 0, $p); 216 217 $m = $lo + $hi*1316134912.0; 218 $q = floor($m / 4294967296.0); 219 $l = $m - ($q * 4294967296.0); 220 $h = $hi*2328.0 + $q; 221 222 return pack ( "NN", $h, $l ); 223} 224 225// unpack 64-bit unsigned 226function sphUnpackU64 ( $v ) 227{ 228 list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) ); 229 230 if ( PHP_INT_SIZE>=8 ) 231 { 232 if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again 233 if ( $lo<0 ) $lo += (1<<32); 234 235 // x64, int 236 if ( $hi<=2147483647 ) 237 return ($hi<<32) + $lo; 238 239 // x64, bcmath 240 if ( function_exists("bcmul") ) 241 return bcadd ( $lo, bcmul ( $hi, "4294967296" ) ); 242 243 // x64, no-bcmath 244 $C = 100000; 245 $h = ((int)($hi / $C) << 32) + (int)($lo / $C); 246 $l = (($hi % $C) << 32) + ($lo % $C); 247 if ( $l>$C ) 248 { 249 $h += (int)($l / $C); 250 $l = $l % $C; 251 } 252 253 if ( $h==0 ) 254 return $l; 255 return sprintf ( "%d%05d", $h, $l ); 256 } 257 258 // x32, int 259 if ( $hi==0 ) 260 { 261 if ( $lo>0 ) 262 return $lo; 263 return sprintf ( "%u", $lo ); 264 } 265 266 $hi = sprintf ( "%u", $hi ); 267 $lo = sprintf ( "%u", $lo ); 268 269 // x32, bcmath 270 if ( function_exists("bcmul") ) 271 return bcadd ( $lo, bcmul ( $hi, "4294967296" ) ); 272 273 // x32, no-bcmath 274 $hi = (float)$hi; 275 $lo = (float)$lo; 276 277 $q = floor($hi/10000000.0); 278 $r = $hi - $q*10000000.0; 279 $m = $lo + $r*4967296.0; 280 $mq = floor($m/10000000.0); 281 $l = $m - $mq*10000000.0; 282 $h = $q*4294967296.0 + $r*429.0 + $mq; 283 284 $h = sprintf ( "%.0f", $h ); 285 $l = sprintf ( "%07.0f", $l ); 286 if ( $h=="0" ) 287 return sprintf( "%.0f", (float)$l ); 288 return $h . $l; 289} 290 291// unpack 64-bit signed 292function sphUnpackI64 ( $v ) 293{ 294 list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) ); 295 296 // x64 297 if ( PHP_INT_SIZE>=8 ) 298 { 299 if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again 300 if ( $lo<0 ) $lo += (1<<32); 301 302 return ($hi<<32) + $lo; 303 } 304 305 // x32, int 306 if ( $hi==0 ) 307 { 308 if ( $lo>0 ) 309 return $lo; 310 return sprintf ( "%u", $lo ); 311 } 312 // x32, int 313 elseif ( $hi==-1 ) 314 { 315 if ( $lo<0 ) 316 return $lo; 317 return sprintf ( "%.0f", $lo - 4294967296.0 ); 318 } 319 320 $neg = ""; 321 $c = 0; 322 if ( $hi<0 ) 323 { 324 $hi = ~$hi; 325 $lo = ~$lo; 326 $c = 1; 327 $neg = "-"; 328 } 329 330 $hi = sprintf ( "%u", $hi ); 331 $lo = sprintf ( "%u", $lo ); 332 333 // x32, bcmath 334 if ( function_exists("bcmul") ) 335 return $neg . bcadd ( bcadd ( $lo, bcmul ( $hi, "4294967296" ) ), $c ); 336 337 // x32, no-bcmath 338 $hi = (float)$hi; 339 $lo = (float)$lo; 340 341 $q = floor($hi/10000000.0); 342 $r = $hi - $q*10000000.0; 343 $m = $lo + $r*4967296.0; 344 $mq = floor($m/10000000.0); 345 $l = $m - $mq*10000000.0 + $c; 346 $h = $q*4294967296.0 + $r*429.0 + $mq; 347 if ( $l==10000000 ) 348 { 349 $l = 0; 350 $h += 1; 351 } 352 353 $h = sprintf ( "%.0f", $h ); 354 $l = sprintf ( "%07.0f", $l ); 355 if ( $h=="0" ) 356 return $neg . sprintf( "%.0f", (float)$l ); 357 return $neg . $h . $l; 358} 359 360 361function sphFixUint ( $value ) 362{ 363 if ( PHP_INT_SIZE>=8 ) 364 { 365 // x64 route, workaround broken unpack() in 5.2.2+ 366 if ( $value<0 ) $value += (1<<32); 367 return $value; 368 } 369 else 370 { 371 // x32 route, workaround php signed/unsigned braindamage 372 return sprintf ( "%u", $value ); 373 } 374} 375 376 377/// sphinx searchd client class 378class SphinxClient 379{ 380 var $_host; ///< searchd host (default is "localhost") 381 var $_port; ///< searchd port (default is 9312) 382 var $_offset; ///< how many records to seek from result-set start (default is 0) 383 var $_limit; ///< how many records to return from result-set starting at offset (default is 20) 384 var $_mode; ///< query matching mode (default is SPH_MATCH_ALL) 385 var $_weights; ///< per-field weights (default is 1 for all fields) 386 var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE) 387 var $_sortby; ///< attribute to sort by (defualt is "") 388 var $_min_id; ///< min ID to match (default is 0, which means no limit) 389 var $_max_id; ///< max ID to match (default is 0, which means no limit) 390 var $_filters; ///< search filters 391 var $_groupby; ///< group-by attribute name 392 var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with) 393 var $_groupsort; ///< group-by sorting clause (to sort groups in result set with) 394 var $_groupdistinct;///< group-by count-distinct attribute 395 var $_maxmatches; ///< max matches to retrieve 396 var $_cutoff; ///< cutoff to stop searching at (default is 0) 397 var $_retrycount; ///< distributed retries count 398 var $_retrydelay; ///< distributed retries delay 399 var $_anchor; ///< geographical anchor point 400 var $_indexweights; ///< per-index weights 401 var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25) 402 var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit) 403 var $_fieldweights; ///< per-field-name weights 404 var $_overrides; ///< per-query attribute values overrides 405 var $_select; ///< select-list (attributes or expressions, with optional aliases) 406 407 var $_error; ///< last error message 408 var $_warning; ///< last warning message 409 var $_connerror; ///< connection error vs remote error flag 410 411 var $_reqs; ///< requests array for multi-query 412 var $_mbenc; ///< stored mbstring encoding 413 var $_arrayresult; ///< whether $result["matches"] should be a hash or an array 414 var $_timeout; ///< connect timeout 415 416 ///////////////////////////////////////////////////////////////////////////// 417 // common stuff 418 ///////////////////////////////////////////////////////////////////////////// 419 420 /// create a new client object and fill defaults 421 function SphinxClient () 422 { 423 // per-client-object settings 424 $this->_host = "localhost"; 425 $this->_port = 9312; 426 $this->_path = false; 427 $this->_socket = false; 428 429 // per-query settings 430 $this->_offset = 0; 431 $this->_limit = 20; 432 $this->_mode = SPH_MATCH_ALL; 433 $this->_weights = array (); 434 $this->_sort = SPH_SORT_RELEVANCE; 435 $this->_sortby = ""; 436 $this->_min_id = 0; 437 $this->_max_id = 0; 438 $this->_filters = array (); 439 $this->_groupby = ""; 440 $this->_groupfunc = SPH_GROUPBY_DAY; 441 $this->_groupsort = "@group desc"; 442 $this->_groupdistinct= ""; 443 $this->_maxmatches = 1000; 444 $this->_cutoff = 0; 445 $this->_retrycount = 0; 446 $this->_retrydelay = 0; 447 $this->_anchor = array (); 448 $this->_indexweights= array (); 449 $this->_ranker = SPH_RANK_PROXIMITY_BM25; 450 $this->_maxquerytime= 0; 451 $this->_fieldweights= array(); 452 $this->_overrides = array(); 453 $this->_select = "*"; 454 455 $this->_error = ""; // per-reply fields (for single-query case) 456 $this->_warning = ""; 457 $this->_connerror = false; 458 459 $this->_reqs = array (); // requests storage (for multi-query case) 460 $this->_mbenc = ""; 461 $this->_arrayresult = false; 462 $this->_timeout = 0; 463 } 464 465 function __destruct() 466 { 467 if ( $this->_socket !== false ) 468 fclose ( $this->_socket ); 469 } 470 471 /// get last error message (string) 472 function GetLastError () 473 { 474 return $this->_error; 475 } 476 477 /// get last warning message (string) 478 function GetLastWarning () 479 { 480 return $this->_warning; 481 } 482 483 /// get last error flag (to tell network connection errors from searchd errors or broken responses) 484 function IsConnectError() 485 { 486 return $this->_connerror; 487 } 488 489 /// set searchd host name (string) and port (integer) 490 function SetServer ( $host, $port = 0 ) 491 { 492 assert ( is_string($host) ); 493 if ( $host[0] == '/') 494 { 495 $this->_path = 'unix://' . $host; 496 return; 497 } 498 if ( substr ( $host, 0, 7 )=="unix://" ) 499 { 500 $this->_path = $host; 501 return; 502 } 503 504 assert ( is_int($port) ); 505 $this->_host = $host; 506 $this->_port = $port; 507 $this->_path = ''; 508 509 } 510 511 /// set server connection timeout (0 to remove) 512 function SetConnectTimeout ( $timeout ) 513 { 514 assert ( is_numeric($timeout) ); 515 $this->_timeout = $timeout; 516 } 517 518 519 function _Send ( $handle, $data, $length ) 520 { 521 if ( feof($handle) || fwrite ( $handle, $data, $length ) !== $length ) 522 { 523 $this->_error = 'connection unexpectedly closed (timed out?)'; 524 $this->_connerror = true; 525 return false; 526 } 527 return true; 528 } 529 530 ///////////////////////////////////////////////////////////////////////////// 531 532 /// enter mbstring workaround mode 533 function _MBPush () 534 { 535 $this->_mbenc = ""; 536 if ( ini_get ( "mbstring.func_overload" ) & 2 ) 537 { 538 $this->_mbenc = mb_internal_encoding(); 539 mb_internal_encoding ( "latin1" ); 540 } 541 } 542 543 /// leave mbstring workaround mode 544 function _MBPop () 545 { 546 if ( $this->_mbenc ) 547 mb_internal_encoding ( $this->_mbenc ); 548 } 549 550 /// connect to searchd server 551 function _Connect () 552 { 553 if ( $this->_socket!==false ) 554 { 555 // we are in persistent connection mode, so we have a socket 556 // however, need to check whether it's still alive 557 if ( !@feof ( $this->_socket ) ) 558 return $this->_socket; 559 560 // force reopen 561 $this->_socket = false; 562 } 563 564 $errno = 0; 565 $errstr = ""; 566 $this->_connerror = false; 567 568 if ( $this->_path ) 569 { 570 $host = $this->_path; 571 $port = 0; 572 } 573 else 574 { 575 $host = $this->_host; 576 $port = $this->_port; 577 } 578 579 if ( $this->_timeout<=0 ) 580 $fp = @fsockopen ( $host, $port, $errno, $errstr ); 581 else 582 $fp = @fsockopen ( $host, $port, $errno, $errstr, $this->_timeout ); 583 584 if ( !$fp ) 585 { 586 if ( $this->_path ) 587 $location = $this->_path; 588 else 589 $location = "{$this->_host}:{$this->_port}"; 590 591 $errstr = trim ( $errstr ); 592 $this->_error = "connection to $location failed (errno=$errno, msg=$errstr)"; 593 $this->_connerror = true; 594 return false; 595 } 596 597 // send my version 598 // this is a subtle part. we must do it before (!) reading back from searchd. 599 // because otherwise under some conditions (reported on FreeBSD for instance) 600 // TCP stack could throttle write-write-read pattern because of Nagle. 601 if ( !$this->_Send ( $fp, pack ( "N", 1 ), 4 ) ) 602 { 603 fclose ( $fp ); 604 $this->_error = "failed to send client protocol version"; 605 return false; 606 } 607 608 // check version 609 list(,$v) = unpack ( "N*", fread ( $fp, 4 ) ); 610 $v = (int)$v; 611 if ( $v<1 ) 612 { 613 fclose ( $fp ); 614 $this->_error = "expected searchd protocol version 1+, got version '$v'"; 615 return false; 616 } 617 618 return $fp; 619 } 620 621 /// get and check response packet from searchd server 622 function _GetResponse ( $fp, $client_ver ) 623 { 624 $response = ""; 625 $len = 0; 626 627 $header = fread ( $fp, 8 ); 628 if ( strlen($header)==8 ) 629 { 630 list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) ); 631 $left = $len; 632 while ( $left>0 && !feof($fp) ) 633 { 634 $chunk = fread ( $fp, $left ); 635 if ( $chunk ) 636 { 637 $response .= $chunk; 638 $left -= strlen($chunk); 639 } 640 } 641 } 642 if ( $this->_socket === false ) 643 fclose ( $fp ); 644 645 // check response 646 $read = strlen ( $response ); 647 if ( !$response || $read!=$len ) 648 { 649 $this->_error = $len 650 ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)" 651 : "received zero-sized searchd response"; 652 return false; 653 } 654 655 // check status 656 if ( $status==SEARCHD_WARNING ) 657 { 658 list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) ); 659 $this->_warning = substr ( $response, 4, $wlen ); 660 return substr ( $response, 4+$wlen ); 661 } 662 if ( $status==SEARCHD_ERROR ) 663 { 664 $this->_error = "searchd error: " . substr ( $response, 4 ); 665 return false; 666 } 667 if ( $status==SEARCHD_RETRY ) 668 { 669 $this->_error = "temporary searchd error: " . substr ( $response, 4 ); 670 return false; 671 } 672 if ( $status!=SEARCHD_OK ) 673 { 674 $this->_error = "unknown status code '$status'"; 675 return false; 676 } 677 678 // check version 679 if ( $ver<$client_ver ) 680 { 681 $this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work", 682 $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff ); 683 } 684 685 return $response; 686 } 687 688 ///////////////////////////////////////////////////////////////////////////// 689 // searching 690 ///////////////////////////////////////////////////////////////////////////// 691 692 /// set offset and count into result set, 693 /// and optionally set max-matches and cutoff limits 694 function SetLimits ( $offset, $limit, $max=0, $cutoff=0 ) 695 { 696 assert ( is_int($offset) ); 697 assert ( is_int($limit) ); 698 assert ( $offset>=0 ); 699 assert ( $limit>0 ); 700 assert ( $max>=0 ); 701 $this->_offset = $offset; 702 $this->_limit = $limit; 703 if ( $max>0 ) 704 $this->_maxmatches = $max; 705 if ( $cutoff>0 ) 706 $this->_cutoff = $cutoff; 707 } 708 709 /// set maximum query time, in milliseconds, per-index 710 /// integer, 0 means "do not limit" 711 function SetMaxQueryTime ( $max ) 712 { 713 assert ( is_int($max) ); 714 assert ( $max>=0 ); 715 $this->_maxquerytime = $max; 716 } 717 718 /// set matching mode 719 function SetMatchMode ( $mode ) 720 { 721 assert ( $mode==SPH_MATCH_ALL 722 || $mode==SPH_MATCH_ANY 723 || $mode==SPH_MATCH_PHRASE 724 || $mode==SPH_MATCH_BOOLEAN 725 || $mode==SPH_MATCH_EXTENDED 726 || $mode==SPH_MATCH_FULLSCAN 727 || $mode==SPH_MATCH_EXTENDED2 ); 728 $this->_mode = $mode; 729 } 730 731 /// set ranking mode 732 function SetRankingMode ( $ranker ) 733 { 734 assert ( $ranker==SPH_RANK_PROXIMITY_BM25 735 || $ranker==SPH_RANK_BM25 736 || $ranker==SPH_RANK_NONE 737 || $ranker==SPH_RANK_WORDCOUNT 738 || $ranker==SPH_RANK_PROXIMITY ); 739 $this->_ranker = $ranker; 740 } 741 742 /// set matches sorting mode 743 function SetSortMode ( $mode, $sortby="" ) 744 { 745 assert ( 746 $mode==SPH_SORT_RELEVANCE || 747 $mode==SPH_SORT_ATTR_DESC || 748 $mode==SPH_SORT_ATTR_ASC || 749 $mode==SPH_SORT_TIME_SEGMENTS || 750 $mode==SPH_SORT_EXTENDED || 751 $mode==SPH_SORT_EXPR ); 752 assert ( is_string($sortby) ); 753 assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 ); 754 755 $this->_sort = $mode; 756 $this->_sortby = $sortby; 757 } 758 759 /// bind per-field weights by order 760 /// DEPRECATED; use SetFieldWeights() instead 761 function SetWeights ( $weights ) 762 { 763 assert ( is_array($weights) ); 764 foreach ( $weights as $weight ) 765 assert ( is_int($weight) ); 766 767 $this->_weights = $weights; 768 } 769 770 /// bind per-field weights by name 771 function SetFieldWeights ( $weights ) 772 { 773 assert ( is_array($weights) ); 774 foreach ( $weights as $name=>$weight ) 775 { 776 assert ( is_string($name) ); 777 assert ( is_int($weight) ); 778 } 779 $this->_fieldweights = $weights; 780 } 781 782 /// bind per-index weights by name 783 function SetIndexWeights ( $weights ) 784 { 785 assert ( is_array($weights) ); 786 foreach ( $weights as $index=>$weight ) 787 { 788 assert ( is_string($index) ); 789 assert ( is_int($weight) ); 790 } 791 $this->_indexweights = $weights; 792 } 793 794 /// set IDs range to match 795 /// only match records if document ID is beetwen $min and $max (inclusive) 796 function SetIDRange ( $min, $max ) 797 { 798 assert ( is_numeric($min) ); 799 assert ( is_numeric($max) ); 800 assert ( $min<=$max ); 801 $this->_min_id = $min; 802 $this->_max_id = $max; 803 } 804 805 /// set values set filter 806 /// only match records where $attribute value is in given set 807 function SetFilter ( $attribute, $values, $exclude=false ) 808 { 809 assert ( is_string($attribute) ); 810 assert ( is_array($values) ); 811 assert ( count($values) ); 812 813 if ( is_array($values) && count($values) ) 814 { 815 foreach ( $values as $value ) 816 assert ( is_numeric($value) ); 817 818 $this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values ); 819 } 820 } 821 822 /// set range filter 823 /// only match records if $attribute value is beetwen $min and $max (inclusive) 824 function SetFilterRange ( $attribute, $min, $max, $exclude=false ) 825 { 826 assert ( is_string($attribute) ); 827 assert ( is_numeric($min) ); 828 assert ( is_numeric($max) ); 829 assert ( $min<=$max ); 830 831 $this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max ); 832 } 833 834 /// set float range filter 835 /// only match records if $attribute value is beetwen $min and $max (inclusive) 836 function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false ) 837 { 838 assert ( is_string($attribute) ); 839 assert ( is_float($min) ); 840 assert ( is_float($max) ); 841 assert ( $min<=$max ); 842 843 $this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max ); 844 } 845 846 /// setup anchor point for geosphere distance calculations 847 /// required to use @geodist in filters and sorting 848 /// latitude and longitude must be in radians 849 function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long ) 850 { 851 assert ( is_string($attrlat) ); 852 assert ( is_string($attrlong) ); 853 assert ( is_float($lat) ); 854 assert ( is_float($long) ); 855 856 $this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long ); 857 } 858 859 /// set grouping attribute and function 860 function SetGroupBy ( $attribute, $func, $groupsort="@group desc" ) 861 { 862 assert ( is_string($attribute) ); 863 assert ( is_string($groupsort) ); 864 assert ( $func==SPH_GROUPBY_DAY 865 || $func==SPH_GROUPBY_WEEK 866 || $func==SPH_GROUPBY_MONTH 867 || $func==SPH_GROUPBY_YEAR 868 || $func==SPH_GROUPBY_ATTR 869 || $func==SPH_GROUPBY_ATTRPAIR ); 870 871 $this->_groupby = $attribute; 872 $this->_groupfunc = $func; 873 $this->_groupsort = $groupsort; 874 } 875 876 /// set count-distinct attribute for group-by queries 877 function SetGroupDistinct ( $attribute ) 878 { 879 assert ( is_string($attribute) ); 880 $this->_groupdistinct = $attribute; 881 } 882 883 /// set distributed retries count and delay 884 function SetRetries ( $count, $delay=0 ) 885 { 886 assert ( is_int($count) && $count>=0 ); 887 assert ( is_int($delay) && $delay>=0 ); 888 $this->_retrycount = $count; 889 $this->_retrydelay = $delay; 890 } 891 892 /// set result set format (hash or array; hash by default) 893 /// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs 894 function SetArrayResult ( $arrayresult ) 895 { 896 assert ( is_bool($arrayresult) ); 897 $this->_arrayresult = $arrayresult; 898 } 899 900 /// set attribute values override 901 /// there can be only one override per attribute 902 /// $values must be a hash that maps document IDs to attribute values 903 function SetOverride ( $attrname, $attrtype, $values ) 904 { 905 assert ( is_string ( $attrname ) ); 906 assert ( in_array ( $attrtype, array ( SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT ) ) ); 907 assert ( is_array ( $values ) ); 908 909 $this->_overrides[$attrname] = array ( "attr"=>$attrname, "type"=>$attrtype, "values"=>$values ); 910 } 911 912 /// set select-list (attributes or expressions), SQL-like syntax 913 function SetSelect ( $select ) 914 { 915 assert ( is_string ( $select ) ); 916 $this->_select = $select; 917 } 918 919 ////////////////////////////////////////////////////////////////////////////// 920 921 /// clear all filters (for multi-queries) 922 function ResetFilters () 923 { 924 $this->_filters = array(); 925 $this->_anchor = array(); 926 } 927 928 /// clear groupby settings (for multi-queries) 929 function ResetGroupBy () 930 { 931 $this->_groupby = ""; 932 $this->_groupfunc = SPH_GROUPBY_DAY; 933 $this->_groupsort = "@group desc"; 934 $this->_groupdistinct= ""; 935 } 936 937 /// clear all attribute value overrides (for multi-queries) 938 function ResetOverrides () 939 { 940 $this->_overrides = array (); 941 } 942 943 ////////////////////////////////////////////////////////////////////////////// 944 945 /// connect to searchd server, run given search query through given indexes, 946 /// and return the search results 947 function Query ( $query, $index="*", $comment="" ) 948 { 949 assert ( empty($this->_reqs) ); 950 951 $this->AddQuery ( $query, $index, $comment ); 952 $results = $this->RunQueries (); 953 $this->_reqs = array (); // just in case it failed too early 954 955 if ( !is_array($results) ) 956 return false; // probably network error; error message should be already filled 957 958 $this->_error = $results[0]["error"]; 959 $this->_warning = $results[0]["warning"]; 960 if ( $results[0]["status"]==SEARCHD_ERROR ) 961 return false; 962 else 963 return $results[0]; 964 } 965 966 /// helper to pack floats in network byte order 967 function _PackFloat ( $f ) 968 { 969 $t1 = pack ( "f", $f ); // machine order 970 list(,$t2) = unpack ( "L*", $t1 ); // int in machine order 971 return pack ( "N", $t2 ); 972 } 973 974 /// add query to multi-query batch 975 /// returns index into results array from RunQueries() call 976 function AddQuery ( $query, $index="*", $comment="" ) 977 { 978 // mbstring workaround 979 $this->_MBPush (); 980 981 // build request 982 $req = pack ( "NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort ); // mode and limits 983 $req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby; 984 $req .= pack ( "N", strlen($query) ) . $query; // query itself 985 $req .= pack ( "N", count($this->_weights) ); // weights 986 foreach ( $this->_weights as $weight ) 987 $req .= pack ( "N", (int)$weight ); 988 $req .= pack ( "N", strlen($index) ) . $index; // indexes 989 $req .= pack ( "N", 1 ); // id64 range marker 990 $req .= sphPackU64 ( $this->_min_id ) . sphPackU64 ( $this->_max_id ); // id64 range 991 992 // filters 993 $req .= pack ( "N", count($this->_filters) ); 994 foreach ( $this->_filters as $filter ) 995 { 996 $req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"]; 997 $req .= pack ( "N", $filter["type"] ); 998 switch ( $filter["type"] ) 999 { 1000 case SPH_FILTER_VALUES: 1001 $req .= pack ( "N", count($filter["values"]) ); 1002 foreach ( $filter["values"] as $value ) 1003 $req .= sphPackI64 ( $value ); 1004 break; 1005 1006 case SPH_FILTER_RANGE: 1007 $req .= sphPackI64 ( $filter["min"] ) . sphPackI64 ( $filter["max"] ); 1008 break; 1009 1010 case SPH_FILTER_FLOATRANGE: 1011 $req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] ); 1012 break; 1013 1014 default: 1015 assert ( 0 && "internal error: unhandled filter type" ); 1016 } 1017 $req .= pack ( "N", $filter["exclude"] ); 1018 } 1019 1020 // group-by clause, max-matches count, group-sort clause, cutoff count 1021 $req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby; 1022 $req .= pack ( "N", $this->_maxmatches ); 1023 $req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort; 1024 $req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay ); 1025 $req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct; 1026 1027 // anchor point 1028 if ( empty($this->_anchor) ) 1029 { 1030 $req .= pack ( "N", 0 ); 1031 } else 1032 { 1033 $a =& $this->_anchor; 1034 $req .= pack ( "N", 1 ); 1035 $req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"]; 1036 $req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"]; 1037 $req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] ); 1038 } 1039 1040 // per-index weights 1041 $req .= pack ( "N", count($this->_indexweights) ); 1042 foreach ( $this->_indexweights as $idx=>$weight ) 1043 $req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight ); 1044 1045 // max query time 1046 $req .= pack ( "N", $this->_maxquerytime ); 1047 1048 // per-field weights 1049 $req .= pack ( "N", count($this->_fieldweights) ); 1050 foreach ( $this->_fieldweights as $field=>$weight ) 1051 $req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight ); 1052 1053 // comment 1054 $req .= pack ( "N", strlen($comment) ) . $comment; 1055 1056 // attribute overrides 1057 $req .= pack ( "N", count($this->_overrides) ); 1058 foreach ( $this->_overrides as $key => $entry ) 1059 { 1060 $req .= pack ( "N", strlen($entry["attr"]) ) . $entry["attr"]; 1061 $req .= pack ( "NN", $entry["type"], count($entry["values"]) ); 1062 foreach ( $entry["values"] as $id=>$val ) 1063 { 1064 assert ( is_numeric($id) ); 1065 assert ( is_numeric($val) ); 1066 1067 $req .= sphPackU64 ( $id ); 1068 switch ( $entry["type"] ) 1069 { 1070 case SPH_ATTR_FLOAT: $req .= $this->_PackFloat ( $val ); break; 1071 case SPH_ATTR_BIGINT: $req .= sphPackI64 ( $val ); break; 1072 default: $req .= pack ( "N", $val ); break; 1073 } 1074 } 1075 } 1076 1077 // select-list 1078 $req .= pack ( "N", strlen($this->_select) ) . $this->_select; 1079 1080 // mbstring workaround 1081 $this->_MBPop (); 1082 1083 // store request to requests array 1084 $this->_reqs[] = $req; 1085 return count($this->_reqs)-1; 1086 } 1087 1088 /// connect to searchd, run queries batch, and return an array of result sets 1089 function RunQueries () 1090 { 1091 if ( empty($this->_reqs) ) 1092 { 1093 $this->_error = "no queries defined, issue AddQuery() first"; 1094 return false; 1095 } 1096 1097 // mbstring workaround 1098 $this->_MBPush (); 1099 1100 if (!( $fp = $this->_Connect() )) 1101 { 1102 $this->_MBPop (); 1103 return false; 1104 } 1105 1106 // send query, get response 1107 $nreqs = count($this->_reqs); 1108 $req = join ( "", $this->_reqs ); 1109 $len = 4+strlen($req); 1110 $req = pack ( "nnNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, $nreqs ) . $req; // add header 1111 1112 if ( !( $this->_Send ( $fp, $req, $len+8 ) ) || 1113 !( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ) ) 1114 { 1115 $this->_MBPop (); 1116 return false; 1117 } 1118 1119 // query sent ok; we can reset reqs now 1120 $this->_reqs = array (); 1121 1122 // parse and return response 1123 return $this->_ParseSearchResponse ( $response, $nreqs ); 1124 } 1125 1126 /// parse and return search query (or queries) response 1127 function _ParseSearchResponse ( $response, $nreqs ) 1128 { 1129 $p = 0; // current position 1130 $max = strlen($response); // max position for checks, to protect against broken responses 1131 1132 $results = array (); 1133 for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ ) 1134 { 1135 $results[] = array(); 1136 $result =& $results[$ires]; 1137 1138 $result["error"] = ""; 1139 $result["warning"] = ""; 1140 1141 // extract status 1142 list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1143 $result["status"] = $status; 1144 if ( $status!=SEARCHD_OK ) 1145 { 1146 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1147 $message = substr ( $response, $p, $len ); $p += $len; 1148 1149 if ( $status==SEARCHD_WARNING ) 1150 { 1151 $result["warning"] = $message; 1152 } else 1153 { 1154 $result["error"] = $message; 1155 continue; 1156 } 1157 } 1158 1159 // read schema 1160 $fields = array (); 1161 $attrs = array (); 1162 1163 list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1164 while ( $nfields-->0 && $p<$max ) 1165 { 1166 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1167 $fields[] = substr ( $response, $p, $len ); $p += $len; 1168 } 1169 $result["fields"] = $fields; 1170 1171 list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1172 while ( $nattrs-->0 && $p<$max ) 1173 { 1174 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1175 $attr = substr ( $response, $p, $len ); $p += $len; 1176 list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1177 $attrs[$attr] = $type; 1178 } 1179 $result["attrs"] = $attrs; 1180 1181 // read match count 1182 list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1183 list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1184 1185 // read matches 1186 $idx = -1; 1187 while ( $count-->0 && $p<$max ) 1188 { 1189 // index into result array 1190 $idx++; 1191 1192 // parse document id and weight 1193 if ( $id64 ) 1194 { 1195 $doc = sphUnpackU64 ( substr ( $response, $p, 8 ) ); $p += 8; 1196 list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1197 } 1198 else 1199 { 1200 list ( $doc, $weight ) = array_values ( unpack ( "N*N*", 1201 substr ( $response, $p, 8 ) ) ); 1202 $p += 8; 1203 $doc = sphFixUint($doc); 1204 } 1205 $weight = sprintf ( "%u", $weight ); 1206 1207 // create match entry 1208 if ( $this->_arrayresult ) 1209 $result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight ); 1210 else 1211 $result["matches"][$doc]["weight"] = $weight; 1212 1213 // parse and create attributes 1214 $attrvals = array (); 1215 foreach ( $attrs as $attr=>$type ) 1216 { 1217 // handle 64bit ints 1218 if ( $type==SPH_ATTR_BIGINT ) 1219 { 1220 $attrvals[$attr] = sphUnpackI64 ( substr ( $response, $p, 8 ) ); $p += 8; 1221 continue; 1222 } 1223 1224 // handle floats 1225 if ( $type==SPH_ATTR_FLOAT ) 1226 { 1227 list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1228 list(,$fval) = unpack ( "f*", pack ( "L", $uval ) ); 1229 $attrvals[$attr] = $fval; 1230 continue; 1231 } 1232 1233 // handle everything else as unsigned ints 1234 list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1235 if ( $type & SPH_ATTR_MULTI ) 1236 { 1237 $attrvals[$attr] = array (); 1238 $nvalues = $val; 1239 while ( $nvalues-->0 && $p<$max ) 1240 { 1241 list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1242 $attrvals[$attr][] = sphFixUint($val); 1243 } 1244 } else 1245 { 1246 $attrvals[$attr] = sphFixUint($val); 1247 } 1248 } 1249 1250 if ( $this->_arrayresult ) 1251 $result["matches"][$idx]["attrs"] = $attrvals; 1252 else 1253 $result["matches"][$doc]["attrs"] = $attrvals; 1254 } 1255 1256 list ( $total, $total_found, $msecs, $words ) = 1257 array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) ); 1258 $result["total"] = sprintf ( "%u", $total ); 1259 $result["total_found"] = sprintf ( "%u", $total_found ); 1260 $result["time"] = sprintf ( "%.3f", $msecs/1000 ); 1261 $p += 16; 1262 1263 while ( $words-->0 && $p<$max ) 1264 { 1265 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1266 $word = substr ( $response, $p, $len ); $p += $len; 1267 list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8; 1268 $result["words"][$word] = array ( 1269 "docs"=>sprintf ( "%u", $docs ), 1270 "hits"=>sprintf ( "%u", $hits ) ); 1271 } 1272 } 1273 1274 $this->_MBPop (); 1275 return $results; 1276 } 1277 1278 ///////////////////////////////////////////////////////////////////////////// 1279 // excerpts generation 1280 ///////////////////////////////////////////////////////////////////////////// 1281 1282 /// connect to searchd server, and generate exceprts (snippets) 1283 /// of given documents for given query. returns false on failure, 1284 /// an array of snippets on success 1285 function BuildExcerpts ( $docs, $index, $words, $opts=array() ) 1286 { 1287 assert ( is_array($docs) ); 1288 assert ( is_string($index) ); 1289 assert ( is_string($words) ); 1290 assert ( is_array($opts) ); 1291 1292 $this->_MBPush (); 1293 1294 if (!( $fp = $this->_Connect() )) 1295 { 1296 $this->_MBPop(); 1297 return false; 1298 } 1299 1300 ///////////////// 1301 // fixup options 1302 ///////////////// 1303 1304 if ( !isset($opts["before_match"]) ) $opts["before_match"] = "<b>"; 1305 if ( !isset($opts["after_match"]) ) $opts["after_match"] = "</b>"; 1306 if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... "; 1307 if ( !isset($opts["limit"]) ) $opts["limit"] = 256; 1308 if ( !isset($opts["around"]) ) $opts["around"] = 5; 1309 if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false; 1310 if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false; 1311 if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false; 1312 if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false; 1313 1314 ///////////////// 1315 // build request 1316 ///////////////// 1317 1318 // v.1.0 req 1319 $flags = 1; // remove spaces 1320 if ( $opts["exact_phrase"] ) $flags |= 2; 1321 if ( $opts["single_passage"] ) $flags |= 4; 1322 if ( $opts["use_boundaries"] ) $flags |= 8; 1323 if ( $opts["weight_order"] ) $flags |= 16; 1324 $req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags 1325 $req .= pack ( "N", strlen($index) ) . $index; // req index 1326 $req .= pack ( "N", strlen($words) ) . $words; // req words 1327 1328 // options 1329 $req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"]; 1330 $req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"]; 1331 $req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"]; 1332 $req .= pack ( "N", (int)$opts["limit"] ); 1333 $req .= pack ( "N", (int)$opts["around"] ); 1334 1335 // documents 1336 $req .= pack ( "N", count($docs) ); 1337 foreach ( $docs as $doc ) 1338 { 1339 assert ( is_string($doc) ); 1340 $req .= pack ( "N", strlen($doc) ) . $doc; 1341 } 1342 1343 //////////////////////////// 1344 // send query, get response 1345 //////////////////////////// 1346 1347 $len = strlen($req); 1348 $req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header 1349 if ( !( $this->_Send ( $fp, $req, $len+8 ) ) || 1350 !( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ) ) 1351 { 1352 $this->_MBPop (); 1353 return false; 1354 } 1355 1356 ////////////////// 1357 // parse response 1358 ////////////////// 1359 1360 $pos = 0; 1361 $res = array (); 1362 $rlen = strlen($response); 1363 for ( $i=0; $i<count($docs); $i++ ) 1364 { 1365 list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); 1366 $pos += 4; 1367 1368 if ( $pos+$len > $rlen ) 1369 { 1370 $this->_error = "incomplete reply"; 1371 $this->_MBPop (); 1372 return false; 1373 } 1374 $res[] = $len ? substr ( $response, $pos, $len ) : ""; 1375 $pos += $len; 1376 } 1377 1378 $this->_MBPop (); 1379 return $res; 1380 } 1381 1382 1383 ///////////////////////////////////////////////////////////////////////////// 1384 // keyword generation 1385 ///////////////////////////////////////////////////////////////////////////// 1386 1387 /// connect to searchd server, and generate keyword list for a given query 1388 /// returns false on failure, 1389 /// an array of words on success 1390 function BuildKeywords ( $query, $index, $hits ) 1391 { 1392 assert ( is_string($query) ); 1393 assert ( is_string($index) ); 1394 assert ( is_bool($hits) ); 1395 1396 $this->_MBPush (); 1397 1398 if (!( $fp = $this->_Connect() )) 1399 { 1400 $this->_MBPop(); 1401 return false; 1402 } 1403 1404 ///////////////// 1405 // build request 1406 ///////////////// 1407 1408 // v.1.0 req 1409 $req = pack ( "N", strlen($query) ) . $query; // req query 1410 $req .= pack ( "N", strlen($index) ) . $index; // req index 1411 $req .= pack ( "N", (int)$hits ); 1412 1413 //////////////////////////// 1414 // send query, get response 1415 //////////////////////////// 1416 1417 $len = strlen($req); 1418 $req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ) . $req; // add header 1419 if ( !( $this->_Send ( $fp, $req, $len+8 ) ) || 1420 !( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS ) ) ) 1421 { 1422 $this->_MBPop (); 1423 return false; 1424 } 1425 1426 ////////////////// 1427 // parse response 1428 ////////////////// 1429 1430 $pos = 0; 1431 $res = array (); 1432 $rlen = strlen($response); 1433 list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) ); 1434 $pos += 4; 1435 for ( $i=0; $i<$nwords; $i++ ) 1436 { 1437 list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4; 1438 $tokenized = $len ? substr ( $response, $pos, $len ) : ""; 1439 $pos += $len; 1440 1441 list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4; 1442 $normalized = $len ? substr ( $response, $pos, $len ) : ""; 1443 $pos += $len; 1444 1445 $res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized ); 1446 1447 if ( $hits ) 1448 { 1449 list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) ); 1450 $pos += 8; 1451 $res [$i]["docs"] = $ndocs; 1452 $res [$i]["hits"] = $nhits; 1453 } 1454 1455 if ( $pos > $rlen ) 1456 { 1457 $this->_error = "incomplete reply"; 1458 $this->_MBPop (); 1459 return false; 1460 } 1461 } 1462 1463 $this->_MBPop (); 1464 return $res; 1465 } 1466 1467 function EscapeString ( $string ) 1468 { 1469 $from = array ( '\\', '(',')','|','-','!','@','~','"','&', '/', '^', '$', '=' ); 1470 $to = array ( '\\\\', '\(','\)','\|','\-','\!','\@','\~','\"', '\&', '\/', '\^', '\$', '\=' ); 1471 1472 return str_replace ( $from, $to, $string ); 1473 } 1474 1475 ///////////////////////////////////////////////////////////////////////////// 1476 // attribute updates 1477 ///////////////////////////////////////////////////////////////////////////// 1478 1479 /// batch update given attributes in given rows in given indexes 1480 /// returns amount of updated documents (0 or more) on success, or -1 on failure 1481 function UpdateAttributes ( $index, $attrs, $values, $mva=false ) 1482 { 1483 // verify everything 1484 assert ( is_string($index) ); 1485 assert ( is_bool($mva) ); 1486 1487 assert ( is_array($attrs) ); 1488 foreach ( $attrs as $attr ) 1489 assert ( is_string($attr) ); 1490 1491 assert ( is_array($values) ); 1492 foreach ( $values as $id=>$entry ) 1493 { 1494 assert ( is_numeric($id) ); 1495 assert ( is_array($entry) ); 1496 assert ( count($entry)==count($attrs) ); 1497 foreach ( $entry as $v ) 1498 { 1499 if ( $mva ) 1500 { 1501 assert ( is_array($v) ); 1502 foreach ( $v as $vv ) 1503 assert ( is_int($vv) ); 1504 } else 1505 assert ( is_int($v) ); 1506 } 1507 } 1508 1509 // build request 1510 $req = pack ( "N", strlen($index) ) . $index; 1511 1512 $req .= pack ( "N", count($attrs) ); 1513 foreach ( $attrs as $attr ) 1514 { 1515 $req .= pack ( "N", strlen($attr) ) . $attr; 1516 $req .= pack ( "N", $mva ? 1 : 0 ); 1517 } 1518 1519 $req .= pack ( "N", count($values) ); 1520 foreach ( $values as $id=>$entry ) 1521 { 1522 $req .= sphPackU64 ( $id ); 1523 foreach ( $entry as $v ) 1524 { 1525 $req .= pack ( "N", $mva ? count($v) : $v ); 1526 if ( $mva ) 1527 foreach ( $v as $vv ) 1528 $req .= pack ( "N", $vv ); 1529 } 1530 } 1531 1532 // connect, send query, get response 1533 if (!( $fp = $this->_Connect() )) 1534 return -1; 1535 1536 $len = strlen($req); 1537 $req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header 1538 if ( !$this->_Send ( $fp, $req, $len+8 ) ) 1539 return -1; 1540 1541 if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) )) 1542 return -1; 1543 1544 // parse response 1545 list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) ); 1546 return $updated; 1547 } 1548 1549 ///////////////////////////////////////////////////////////////////////////// 1550 // persistent connections 1551 ///////////////////////////////////////////////////////////////////////////// 1552 1553 function Open() 1554 { 1555 if ( $this->_socket !== false ) 1556 { 1557 $this->_error = 'already connected'; 1558 return false; 1559 } 1560 if ( !$fp = $this->_Connect() ) 1561 return false; 1562 1563 // command, command version = 0, body length = 4, body = 1 1564 $req = pack ( "nnNN", SEARCHD_COMMAND_PERSIST, 0, 4, 1 ); 1565 if ( !$this->_Send ( $fp, $req, 12 ) ) 1566 return false; 1567 1568 $this->_socket = $fp; 1569 return true; 1570 } 1571 1572 function Close() 1573 { 1574 if ( $this->_socket === false ) 1575 { 1576 $this->_error = 'not connected'; 1577 return false; 1578 } 1579 1580 fclose ( $this->_socket ); 1581 $this->_socket = false; 1582 1583 return true; 1584 } 1585 1586 ////////////////////////////////////////////////////////////////////////// 1587 // status 1588 ////////////////////////////////////////////////////////////////////////// 1589 1590 function Status () 1591 { 1592 $this->_MBPush (); 1593 if (!( $fp = $this->_Connect() )) 1594 { 1595 $this->_MBPop(); 1596 return false; 1597 } 1598 1599 $req = pack ( "nnNN", SEARCHD_COMMAND_STATUS, VER_COMMAND_STATUS, 4, 1 ); // len=4, body=1 1600 if ( !( $this->_Send ( $fp, $req, 12 ) ) || 1601 !( $response = $this->_GetResponse ( $fp, VER_COMMAND_STATUS ) ) ) 1602 { 1603 $this->_MBPop (); 1604 return false; 1605 } 1606 1607 $res = substr ( $response, 4 ); // just ignore length, error handling, etc 1608 $p = 0; 1609 list ( $rows, $cols ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8; 1610 1611 $res = array(); 1612 for ( $i=0; $i<$rows; $i++ ) 1613 for ( $j=0; $j<$cols; $j++ ) 1614 { 1615 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 1616 $res[$i][] = substr ( $response, $p, $len ); $p += $len; 1617 } 1618 1619 $this->_MBPop (); 1620 return $res; 1621 } 1622} 1623 1624// 1625// $Id: sphinxapi.php 2055 2009-11-06 23:09:58Z shodan $ 1626// 1627