1<?php 2 3use dokuwiki\HTTP\DokuHTTPClient; 4use DOMWrap\Document; 5 6/** 7 * DokuWiki Plugin amazonlight (Syntax Component) 8 * 9 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 10 * @author Andreas Gohr <andi@splitbrain.org> 11 */ 12class syntax_plugin_amazonlight extends DokuWiki_Syntax_Plugin 13{ 14 15 /** @var array what regions to use for the different countries */ 16 const REGIONS = [ 17 'us' => 'www.amazon.com', 18 'ca' => 'www.amazon.ca', 19 'de' => 'www.amazon.de', 20 'gb' => 'www.amazon.co.uk', 21 'fr' => 'www.amazon.fr', 22 'jp' => 'www.amazon.co.jp', 23 ]; 24 25 protected DokuHTTPClient $http; 26 27 /** @inheritDoc */ 28 public function getType() 29 { 30 return 'substition'; 31 } 32 33 /** @inheritDoc */ 34 public function getPType() 35 { 36 return 'block'; 37 } 38 39 /** @inheritDoc */ 40 public function getSort() 41 { 42 return 160; 43 } 44 45 /** 46 * Connect lookup pattern to lexer. 47 * 48 * @param string $mode Parser mode 49 */ 50 public function connectTo($mode) 51 { 52 $this->Lexer->addSpecialPattern('\{\{amazon>[\w:\\- =]+\}\}', $mode, 'plugin_amazonlight'); 53 } 54 55 56 public function __construct() 57 { 58 $http = new DokuHTTPClient(); 59 $http->headers['User-Agent'] = 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'; 60 $http->headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'; 61 $http->headers['Accept-Language'] = 'en-US,en;q=0.9'; 62 $http->headers['Upgrade-Insecure-Requests'] = '1'; 63 $http->headers['Sec-Ch-Ua'] = '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"'; 64 $http->headers['Sec-Ch-Ua-Mobile'] = '?0'; 65 $http->headers['Sec-Ch-Ua-Platform'] = '"Linux"'; 66 $http->headers['Sec-Fetch-Dest'] = 'document'; 67 $http->headers['Sec-Fetch-Mode'] = 'navigate'; 68 $http->headers['Sec-Fetch-Site'] = 'none'; 69 $http->headers['Sec-Fetch-User'] = '?1'; 70 $this->http = $http; 71 } 72 73 /** @inheritDoc */ 74 public function handle($match, $state, $pos, Doku_Handler $handler) 75 { 76 $match = substr($match, 9, -2); 77 list($ctry, $asin) = sexplode(':', $match, 2); 78 79 // no country given? 80 if (empty($asin)) { 81 $asin = $ctry; 82 $ctry = 'us'; 83 } 84 85 // default parameters... 86 $params = array( 87 'imgw' => $this->getConf('imgw'), 88 'imgh' => $this->getConf('imgh'), 89 'price' => $this->getConf('showprice'), 90 ); 91 // ...can be overridden 92 list($asin, $more) = sexplode(' ', $asin, 2); 93 $params['asin'] = $asin; 94 95 if (preg_match('/(\d+)x(\d+)/i', $more, $match)) { 96 $params['imgw'] = $match[1]; 97 $params['imgh'] = $match[2]; 98 } 99 if (preg_match('/noprice/i', $more, $match)) { 100 $params['price'] = false; 101 } elseif (preg_match('/(show)?price/i', $more, $match)) { 102 $params['price'] = true; 103 } 104 105 // correct country given? 106 if ($ctry === 'uk') $ctry = 'gb'; 107 if (!preg_match('/^(us|gb|jp|de|fr|ca)$/', $ctry)) { 108 $ctry = 'us'; 109 } 110 $params['country'] = $ctry; 111 112 return $params; 113 } 114 115 /** @inheritDoc */ 116 public function render($mode, Doku_Renderer $renderer, $data) 117 { 118 if ($mode !== 'xhtml') { 119 return false; 120 } 121 122 $html = $this->output($data); 123 if (!$html) { 124 if ($data['country'] == 'de') { 125 $renderer->interwikilink('Amazon', 'Amazon.de', 'amazon.de', $data['asin']); 126 } else { 127 $renderer->interwikilink('Amazon', 'Amazon', 'amazon', $data['asin']); 128 } 129 } 130 131 $renderer->doc .= $html; 132 133 return true; 134 } 135 136 /** 137 * @param array $param 138 * @return string 139 */ 140 protected function output($param) 141 { 142 global $conf; 143 144 try { 145 $data = $this->fetchData($param['asin'], $param['country']); 146 } catch (Exception $e) { 147 msg(hsc($e->getMessage()), -1); 148 return false; 149 } 150 151 $img = ml($data['img'], array('w' => $param['imgw'], 'h' => $param['imgh'])); 152 153 ob_start(); 154 echo '<div class="amazon">'; 155 echo '<a href="' . $data['url'] . '"'; 156 if ($conf['target']['extern']) echo ' target="' . $conf['target']['extern'] . '"'; 157 echo '>'; 158 echo '<img src="' . $img . '" width="' . $param['imgw'] . '" height="' . $param['imgh'] . '" alt="" />'; 159 echo '</a>'; 160 161 echo '<div class="amazon_title">'; 162 echo '<a href="' . $data['url'] . '"'; 163 if ($conf['target']['extern']) echo ' target="' . $conf['target']['extern'] . '"'; 164 echo '>'; 165 echo hsc($data['title']); 166 echo '</a>'; 167 echo '</div>'; 168 169 echo '<div class="amazon_author">'; 170 echo hsc($data['author']); 171 echo '</div>'; 172 173 echo '<div class="amazon_isbn">'; 174 echo hsc($data['isbn']); 175 echo '</div>'; 176 177 if ($param['price'] && $data['price']) { 178 echo '<div class="amazon_price">' . hsc($data['price']) . '</div>'; 179 } 180 echo '</div>'; 181 182 return ob_get_clean(); 183 } 184 185 /** 186 * Fetch the meta data 187 * 188 * @param string $asin 189 * @param string $country 190 * @return array 191 * @throws Exception 192 */ 193 protected function fetchData($asin, $country) 194 { 195 $partner = $this->getConf('partner_' . $country); 196 if (!$partner) $partner = 'none'; 197 $region = self::REGIONS[$country]; 198 199 $url = 'https://' . $region . '/dp/' . $asin; 200 201 $attempt = 0; 202 $maxAttempts = 3; 203 while ($attempt < $maxAttempts) { 204 sleep($attempt); 205 $attempt++; 206 207 $html = $this->http->get($url); 208 if (!$html) { 209 if ($attempt < $maxAttempts) continue; // try a few times 210 throw new Exception('Failed to fetch data. Status ' . $this->http->status); 211 } 212 if (preg_match('/(captcha|api-services-support@amazon.com)/i', $html)) { 213 if ($attempt < $maxAttempts) continue; // try a few times 214 throw new Exception('Anti-Bot mechanisms triggered, cannot fetch data'); 215 } 216 } 217 218 $doc = new Document(); 219 $doc->html($html); 220 221 $result = [ 222 'title' => $this->extract($doc, '#productTitle'), 223 'author' => $this->extract($doc, '#bylineInfo a'), 224 'rating' => $this->extract($doc, '#averageCustomerReviews span.a-declarative a > span'), 225 'price' => $this->extract($doc, '.priceToPay'), 226 'isbn' => $this->extract($doc, '#rpi-attribute-book_details-isbn10 .rpi-attribute-value'), 227 'img' => $this->extract($doc, '#imgTagWrapperId img', 'src'), 228 'url' => $url . '?tag=' . $partner, 229 ]; 230 231 if (!$result['title']) { 232 $result['title'] = $this->extract($doc, 'title'); 233 } 234 if (!$result['title']) { 235 throw new Exception('Could not find title in data'); 236 } 237 238 return $result; 239 } 240 241 /** 242 * Extract text or attribute from a selector 243 * 244 * @param Document $doc 245 * @param string $selector 246 * @param string|null $attr attribute to extract, omit for text 247 * @return string 248 */ 249 protected function extract(Document $doc, string $selector, $attr = null): string 250 { 251 $element = $doc->find($selector)->first(); 252 if ($element === null) { 253 return ''; 254 } 255 if ($attr) { 256 return $element->attr($attr); 257 } else { 258 return $element->text(); 259 } 260 } 261} 262 263