1<?php 2 3use dokuwiki\HTTP\DokuHTTPClient; 4use DOMWrap\Document; 5 6/** 7 * DokuWiki Plugin amazonlight (Syntax Component) 8 * 9 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 10 * @author Andreas Gohr <andi@splitbrain.org> 11 */ 12class syntax_plugin_amazonlight extends DokuWiki_Syntax_Plugin 13{ 14 15 /** @var array what regions to use for the different countries */ 16 const REGIONS = [ 17 'us' => 'www.amazon.com', 18 'ca' => 'www.amazon.ca', 19 'de' => 'www.amazon.de', 20 'gb' => 'www.amazon.co.uk', 21 'fr' => 'www.amazon.fr', 22 'jp' => 'www.amazon.co.jp', 23 ]; 24 25 protected DokuHTTPClient $http; 26 27 /** @inheritDoc */ 28 public function getType() 29 { 30 return 'substition'; 31 } 32 33 /** @inheritDoc */ 34 public function getPType() 35 { 36 return 'block'; 37 } 38 39 /** @inheritDoc */ 40 public function getSort() 41 { 42 return 160; 43 } 44 45 /** 46 * Connect lookup pattern to lexer. 47 * 48 * @param string $mode Parser mode 49 */ 50 public function connectTo($mode) 51 { 52 $this->Lexer->addSpecialPattern('\{\{amazon>[\w:\\- =]+\}\}', $mode, 'plugin_amazonlight'); 53 } 54 55 56 public function __construct() 57 { 58 $http = new DokuHTTPClient(); 59 $http->headers['User-Agent'] = 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'; 60 $http->headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'; 61 $http->headers['Accept-Language'] = 'en-US,en;q=0.9'; 62 $http->headers['Upgrade-Insecure-Requests'] = '1'; 63 $http->headers['Sec-Ch-Ua'] = '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"'; 64 $http->headers['Sec-Ch-Ua-Mobile'] = '?0'; 65 $http->headers['Sec-Ch-Ua-Platform'] = '"Linux"'; 66 $http->headers['Sec-Fetch-Dest'] = 'document'; 67 $http->headers['Sec-Fetch-Mode'] = 'navigate'; 68 $http->headers['Sec-Fetch-Site'] = 'none'; 69 $http->headers['Sec-Fetch-User'] = '?1'; 70 $this->http = $http; 71 } 72 73 /** @inheritDoc */ 74 public function handle($match, $state, $pos, Doku_Handler $handler) 75 { 76 $match = substr($match, 9, -2); 77 list($ctry, $asin) = sexplode(':', $match, 2); 78 79 // no country given? 80 if (empty($asin)) { 81 $asin = $ctry; 82 $ctry = 'us'; 83 } 84 85 // default parameters... 86 $params = array( 87 'imgw' => $this->getConf('imgw'), 88 'imgh' => $this->getConf('imgh'), 89 ); 90 // ...can be overridden 91 list($asin, $more) = sexplode(' ', $asin, 2); 92 $params['asin'] = $asin; 93 94 if (preg_match('/(\d+)x(\d+)/i', $more, $match)) { 95 $params['imgw'] = $match[1]; 96 $params['imgh'] = $match[2]; 97 } 98 99 // correct country given? 100 if ($ctry === 'uk') $ctry = 'gb'; 101 if (!preg_match('/^(us|gb|jp|de|fr|ca)$/', $ctry)) { 102 $ctry = 'us'; 103 } 104 $params['country'] = $ctry; 105 106 return $params; 107 } 108 109 /** @inheritDoc */ 110 public function render($mode, Doku_Renderer $renderer, $data) 111 { 112 if ($mode !== 'xhtml') { 113 return false; 114 } 115 116 $html = $this->output($data); 117 if (!$html) { 118 if ($data['country'] == 'de') { 119 $renderer->interwikilink('Amazon', 'Amazon.de', 'amazon.de', $data['asin']); 120 } else { 121 $renderer->interwikilink('Amazon', 'Amazon', 'amazon', $data['asin']); 122 } 123 } 124 125 $renderer->doc .= $html; 126 127 return true; 128 } 129 130 /** 131 * @param array $param 132 * @return string 133 */ 134 protected function output($param) 135 { 136 global $conf; 137 138 try { 139 $data = $this->fetchCachedData($param['asin'], $param['country']); 140 } catch (Exception $e) { 141 msg(hsc($e->getMessage()), -1); 142 return false; 143 } 144 145 $img = ml($data['img'], array('w' => $param['imgw'], 'h' => $param['imgh'])); 146 147 ob_start(); 148 echo '<div class="amazon">'; 149 echo '<a href="' . $data['url'] . '"'; 150 if ($conf['target']['extern']) echo ' target="' . $conf['target']['extern'] . '"'; 151 echo '>'; 152 echo '<img src="' . $img . '" width="' . $param['imgw'] . '" height="' . $param['imgh'] . '" alt="" />'; 153 echo '</a>'; 154 155 echo '<div class="amazon_title">'; 156 echo '<a href="' . $data['url'] . '"'; 157 if ($conf['target']['extern']) echo ' target="' . $conf['target']['extern'] . '"'; 158 echo '>'; 159 echo hsc($data['title']); 160 echo '</a>'; 161 echo '</div>'; 162 163 echo '<div class="amazon_author">'; 164 echo hsc($data['author']); 165 echo '</div>'; 166 167 echo '<div class="amazon_isbn">'; 168 echo hsc($data['isbn']); 169 echo '</div>'; 170 171 echo '</div>'; 172 173 return ob_get_clean(); 174 } 175 176 177 /** 178 * Forever cache the fetched data 179 * 180 * @throws Exception 181 */ 182 protected function fetchCachedData($asin, $country) 183 { 184 $partner = $this->getConf('partner_' . $country); 185 186 187 $cachefile = getCacheName($country . '-' . $asin, '.amazonlight'); 188 if (file_exists($cachefile)) { 189 $data = json_decode(file_get_contents($cachefile), true); 190 } else { 191 $data = $this->fetchData($asin, $country); 192 io_saveFile($cachefile, json_encode($data)); 193 } 194 195 if ($partner) { 196 $data['url'] .= '?tag=' . $partner; 197 } 198 199 return $data; 200 } 201 202 /** 203 * Fetch the meta data 204 * 205 * @param string $asin 206 * @param string $country 207 * @return array 208 * @throws Exception 209 */ 210 protected function fetchData($asin, $country) 211 { 212 $region = self::REGIONS[$country]; 213 214 // get homepage cookies first 215 $this->http->get('https://' . $region); 216 217 $url = 'https://' . $region . '/dp/' . $asin; 218 219 $attempt = 0; 220 $maxAttempts = 3; 221 while ($attempt < $maxAttempts) { 222 sleep($attempt); 223 $attempt++; 224 225 $html = $this->http->get($url); 226 if (!$html) { 227 if ($attempt < $maxAttempts) continue; // try a few times 228 throw new Exception('Failed to fetch data. Status ' . $this->http->status); 229 } 230 if (preg_match('/(captcha|api-services-support@amazon.com)/i', $html)) { 231 if ($attempt < $maxAttempts) continue; // try a few times 232 throw new Exception('Anti-Bot mechanisms triggered, cannot fetch data'); 233 } 234 } 235 236 $doc = new Document(); 237 $doc->html($html); 238 239 $result = [ 240 'title' => $this->extract($doc, '#productTitle'), 241 'author' => $this->extract($doc, '#bylineInfo a'), 242 'rating' => $this->extract($doc, '#averageCustomerReviews span.a-declarative a > span'), 243 'isbn' => $this->extract($doc, '#rpi-attribute-book_details-isbn10 .rpi-attribute-value'), 244 'img' => $this->extract($doc, '#imgTagWrapperId img', 'src'), 245 'url' => $url, 246 ]; 247 248 if (!$result['title']) { 249 $result['title'] = $this->extract($doc, 'title'); 250 } 251 if (!$result['title']) { 252 throw new Exception('Could not find title in data'); 253 } 254 255 return $result; 256 } 257 258 /** 259 * Extract text or attribute from a selector 260 * 261 * @param Document $doc 262 * @param string $selector 263 * @param string|null $attr attribute to extract, omit for text 264 * @return string 265 */ 266 protected function extract(Document $doc, string $selector, $attr = null): string 267 { 268 $element = $doc->find($selector)->first(); 269 if ($element === null) { 270 return ''; 271 } 272 if ($attr) { 273 return $element->attr($attr); 274 } else { 275 return $element->text(); 276 } 277 } 278} 279 280