1<?php 2 3require_once(HTML2PS_DIR.'fetcher._interface.class.php'); 4 5/** 6 * This class handles fetching HTTP code using CURL extension 7 */ 8class FetcherUrlCurl extends Fetcher { 9 /** 10 * @var String URL being fetched 11 * @access private 12 */ 13 var $url; 14 var $_proxy; 15 16 function FetcherUrlCurl() { 17 $this->url = ""; 18 $this->set_proxy(null); 19 } 20 21 function _fix_url($url) { 22 // If only host name was specified, add trailing slash 23 // (e.g. replace http://www.google.com with http://www.google.com/ 24 if (preg_match('#^.*://[^/]+$#', $url)) { 25 $url .= '/'; 26 }; 27 28 return $url; 29 } 30 31 function get_base_url() { 32 return $this->url; 33 } 34 35 function get_data($url) { 36 $this->url = $url; 37 38 // URL to be fetched 39 $curl = curl_init(); 40 41 $fixed_url = $this->_fix_url($url); 42 43 curl_setopt($curl, CURLOPT_URL, $fixed_url); 44 curl_setopt($curl, CURLOPT_USERAGENT, DEFAULT_USER_AGENT); 45 46 if (!@curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1)) { 47 error_log('CURLOPT_FOLLOWLOCATION will not work in safe_mode; pages with redirects may be rendered incorrectly'); 48 }; 49 50 curl_setopt($curl, CURLOPT_HEADER, 1); 51 curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); 52 53 $proxy = $this->get_proxy(); 54 if (!is_null($proxy)) { 55 curl_setopt($curl, CURLOPT_PROXY, $proxy); 56 }; 57 58 /** 59 * Fetch headers and page content to the $response variable 60 * and close CURL session 61 */ 62 $response = curl_exec($curl); 63 64 if ($response === FALSE) { 65 error_log(sprintf('Cannot open %s, CURL error is: %s', 66 $url, 67 curl_error($curl))); 68 curl_close($curl); 69 return null; 70 } 71 72 curl_close($curl); 73 74 /** 75 * According to HTTP standard, headers block separated from 76 * body block with empty line - '\r\n\r\n' sequence. As body 77 * might contain this sequence too, we should use 'non-greedy' 78 * modifier on the first group in the regular expression. 79 * Of course, we should process the response as a whole using 80 * 's' modifier. 81 */ 82 preg_match('/^(.*?)\r\n\r\n(.*)$/s', $response, $matches); 83 84 /** 85 * Usually there's more than one line in a header block, 86 * separated with '\r\n' sequence. 87 * 88 * The very first line contains HTTP response code (e.g. HTTP/1.1 200 OK), 89 * so we may safely ignore it. 90 */ 91 $headers = array_slice(explode("\r\n", $matches[1]),1); 92 $content = $matches[2]; 93 94 return new FetchedDataURL($content, $headers, $this->url); 95 } 96 97 function get_proxy() { 98 return $this->_proxy; 99 } 100 101 function set_proxy($proxy) { 102 $this->_proxy = $proxy; 103 } 104} 105?>