1<?php
2
3require_once(HTML2PS_DIR.'fetcher._interface.class.php');
4
5/**
6 * This class handles fetching HTTP code using CURL extension
7 */
8class FetcherUrlCurl extends Fetcher {
9  /**
10   * @var String URL being fetched
11   * @access private
12   */
13  var $url;
14  var $_proxy;
15
16  function FetcherUrlCurl() {
17    $this->url = "";
18    $this->set_proxy(null);
19  }
20
21  function _fix_url($url) {
22    // If only host name was specified, add trailing slash
23    // (e.g. replace http://www.google.com with http://www.google.com/
24    if (preg_match('#^.*://[^/]+$#', $url)) {
25      $url .= '/';
26    };
27
28    return $url;
29  }
30
31  function get_base_url() {
32    return $this->url;
33  }
34
35  function get_data($url) {
36    $this->url = $url;
37
38    // URL to be fetched
39    $curl = curl_init();
40
41    $fixed_url = $this->_fix_url($url);
42
43    curl_setopt($curl, CURLOPT_URL, $fixed_url);
44    curl_setopt($curl, CURLOPT_USERAGENT, DEFAULT_USER_AGENT);
45
46    if (!@curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1)) {
47      error_log('CURLOPT_FOLLOWLOCATION will not work in safe_mode; pages with redirects may be rendered incorrectly');
48    };
49
50    curl_setopt($curl, CURLOPT_HEADER, 1);
51    curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
52
53    $proxy = $this->get_proxy();
54    if (!is_null($proxy)) {
55      curl_setopt($curl, CURLOPT_PROXY, $proxy);
56    };
57
58    /**
59     * Fetch headers and page content to the $response variable
60     * and close CURL session
61     */
62    $response = curl_exec($curl);
63
64    if ($response === FALSE) {
65      error_log(sprintf('Cannot open %s, CURL error is: %s',
66                        $url,
67                        curl_error($curl)));
68      curl_close($curl);
69      return null;
70    }
71
72    curl_close($curl);
73
74    /**
75     * According to HTTP standard, headers block separated from
76     * body block with empty line - '\r\n\r\n' sequence. As body
77     * might contain this sequence too, we should use 'non-greedy'
78     * modifier on the first group in the regular expression.
79     * Of course, we should process the response as a whole using
80     * 's' modifier.
81     */
82    preg_match('/^(.*?)\r\n\r\n(.*)$/s', $response, $matches);
83
84    /**
85     * Usually there's more than one line in a header block,
86     * separated with '\r\n' sequence.
87     *
88     * The very first line contains HTTP response code (e.g. HTTP/1.1 200 OK),
89     * so we may safely ignore it.
90     */
91    $headers = array_slice(explode("\r\n", $matches[1]),1);
92    $content = $matches[2];
93
94    return new FetchedDataURL($content, $headers, $this->url);
95  }
96
97  function get_proxy() {
98    return $this->_proxy;
99  }
100
101  function set_proxy($proxy) {
102    $this->_proxy = $proxy;
103  }
104}
105?>