url = ""; $this->set_proxy(null); } function _fix_url($url) { // If only host name was specified, add trailing slash // (e.g. replace http://www.google.com with http://www.google.com/ if (preg_match('#^.*://[^/]+$#', $url)) { $url .= '/'; }; return $url; } function get_base_url() { return $this->url; } function get_data($url) { $this->url = $url; // URL to be fetched $curl = curl_init(); $fixed_url = $this->_fix_url($url); curl_setopt($curl, CURLOPT_URL, $fixed_url); curl_setopt($curl, CURLOPT_USERAGENT, DEFAULT_USER_AGENT); if (!@curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1)) { error_log('CURLOPT_FOLLOWLOCATION will not work in safe_mode; pages with redirects may be rendered incorrectly'); }; curl_setopt($curl, CURLOPT_HEADER, 1); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); $proxy = $this->get_proxy(); if (!is_null($proxy)) { curl_setopt($curl, CURLOPT_PROXY, $proxy); }; /** * Fetch headers and page content to the $response variable * and close CURL session */ $response = curl_exec($curl); if ($response === FALSE) { error_log(sprintf('Cannot open %s, CURL error is: %s', $url, curl_error($curl))); curl_close($curl); return null; } curl_close($curl); /** * According to HTTP standard, headers block separated from * body block with empty line - '\r\n\r\n' sequence. As body * might contain this sequence too, we should use 'non-greedy' * modifier on the first group in the regular expression. * Of course, we should process the response as a whole using * 's' modifier. */ preg_match('/^(.*?)\r\n\r\n(.*)$/s', $response, $matches); /** * Usually there's more than one line in a header block, * separated with '\r\n' sequence. * * The very first line contains HTTP response code (e.g. HTTP/1.1 200 OK), * so we may safely ignore it. */ $headers = array_slice(explode("\r\n", $matches[1]),1); $content = $matches[2]; return new FetchedDataURL($content, $headers, $this->url); } function get_proxy() { return $this->_proxy; } function set_proxy($proxy) { $this->_proxy = $proxy; } } ?>