setHttpHeaders($headers); $this->setUserAgent($userAgent); } /** * Set HTTP headers. * * @param array $httpHeaders */ public function setHttpHeaders($httpHeaders = null) { // use global _SERVER if $httpHeaders aren't defined if (!is_array($httpHeaders) || !count($httpHeaders)) { $httpHeaders = $_SERVER; } // clear existing headers $this->httpHeaders = array(); // Only save HTTP headers. In PHP land, that means only _SERVER vars that // start with HTTP_. foreach ($httpHeaders as $key => $value) { if (substr($key, 0, 5) === 'HTTP_') { $this->httpHeaders[$key] = $value; } } } /** * Return user agent headers. * * @return array */ public function getUaHttpHeaders() { return self::$uaHttpHeaders; } /** * Set the user agent. * * @param string $userAgent */ public function setUserAgent($userAgent = null) { if (false === empty($userAgent)) { return $this->userAgent = $userAgent; } else { $this->userAgent = null; foreach ($this->getUaHttpHeaders() as $altHeader) { if (false === empty($this->httpHeaders[$altHeader])) { // @todo: should use getHttpHeader(), but it would be slow. $this->userAgent .= $this->httpHeaders[$altHeader].' '; } } return $this->userAgent = (!empty($this->userAgent) ? trim($this->userAgent) : null); } } /** * Build the user agent regex. * * @return string */ public function getRegex() { return '('.implode('|', self::$crawlers).')'; } /** * Build the replacement regex. * * @return string */ public function getIgnored() { return '('.implode('|', self::$ignore).')'; } /** * Check user agent string against the regex. * * @param string $userAgent * * @return bool */ public function isCrawler($userAgent = null) { $agent = is_null($userAgent) ? $this->userAgent : $userAgent; $agent = preg_replace('/'.$this->getIgnored().'/i', '', $agent); $result = preg_match('/'.$this->getRegex().'/i', $agent, $matches); if ($matches) { $this->matches = $matches; } return (bool) $result; } /** * Return the matches. * * @return array */ public function getMatches() { return $this->matches[0]; } }