1307c6980SAndreas Gohr<?php 2307c6980SAndreas Gohr 3307c6980SAndreas Gohrnamespace dokuwiki\plugin\doi\Resolver; 4307c6980SAndreas Gohr 5307c6980SAndreas Gohruse dokuwiki\HTTP\DokuHTTPClient; 6307c6980SAndreas Gohr 7307c6980SAndreas Gohr/** 8307c6980SAndreas Gohr * ISBN resolver scraping isdn.de 9307c6980SAndreas Gohr */ 10307c6980SAndreas Gohrclass IsbnIsbnDeResolver extends AbstractIsbnResolver 11307c6980SAndreas Gohr{ 12307c6980SAndreas Gohr /** @inheritdoc */ 13307c6980SAndreas Gohr public function getFallbackURL($id) 14307c6980SAndreas Gohr { 15307c6980SAndreas Gohr return 'https://www.isbn.de/buecher/suche/' . rawurlencode($id); 16307c6980SAndreas Gohr } 17307c6980SAndreas Gohr 18307c6980SAndreas Gohr /** @inheritdoc */ 19307c6980SAndreas Gohr public function getData($id) 20307c6980SAndreas Gohr { 21307c6980SAndreas Gohr return $this->fetchCachedData($id); 22307c6980SAndreas Gohr } 23307c6980SAndreas Gohr 24307c6980SAndreas Gohr /** @inheritdoc */ 25307c6980SAndreas Gohr protected function fetchData($id) 26307c6980SAndreas Gohr { 27307c6980SAndreas Gohr $http = new DokuHTTPClient(); 28307c6980SAndreas Gohr $url = $this->getFallbackURL($id); 29307c6980SAndreas Gohr 30307c6980SAndreas Gohr 31307c6980SAndreas Gohr $html = $http->get($url); 32307c6980SAndreas Gohr if (!$html) throw new \Exception('Could not fetch data from isdn.de. ' . $http->error); 33307c6980SAndreas Gohr 34307c6980SAndreas Gohr $data = $this->defaultResult; 35307c6980SAndreas Gohr 36307c6980SAndreas Gohr $data['id'] = $this->extract('/<meta property="og:book:isbn" content="([^"]+)"/', $html); 37307c6980SAndreas Gohr if (!$data['id']) throw new \Exception('ISBN not found at isdn.de.'); 38307c6980SAndreas Gohr $data['url'] = $this->extract('/<meta property="og:url" content="([^"]+)"/', $html); 39307c6980SAndreas Gohr 40307c6980SAndreas Gohr $data['title'] = $this->extract('/<meta property="og:title" content="([^"]+)"/', $html); 419f8980aeSAndreas Gohr if(empty($data['title'])) $data['title'] = $id; 42307c6980SAndreas Gohr $data['published'] = $this->extract('/<meta property="og:book:release_date" content="((\d){4})[^"]+"/', $html); 43307c6980SAndreas Gohr 44307c6980SAndreas Gohr $data['authors'] = $this->extractAll('/<a href="\/person\/.*?">(.+?)<\/a>/', $html); 45307c6980SAndreas Gohr $data['publisher'] = $this->extract('/<a href="\/verlag\/.*?">(.+?)<\/a>/', $html); 46307c6980SAndreas Gohr 47*17101be4SAndreas Gohr $data['image'] = $this->extract('/<meta property="og:image" content="([^"]+)"/', $html); 48*17101be4SAndreas Gohr 49307c6980SAndreas Gohr return $data; 50307c6980SAndreas Gohr } 51307c6980SAndreas Gohr 52307c6980SAndreas Gohr /** 53307c6980SAndreas Gohr * Extract a value from a HTML string using a regex 54307c6980SAndreas Gohr * 55307c6980SAndreas Gohr * @param string $regex 56307c6980SAndreas Gohr * @param string $html 57307c6980SAndreas Gohr * @param int $group 58307c6980SAndreas Gohr * @return string 59307c6980SAndreas Gohr */ 60307c6980SAndreas Gohr protected function extract($regex, $html, $group = 1) 61307c6980SAndreas Gohr { 62307c6980SAndreas Gohr if (preg_match($regex, $html, $m)) { 63307c6980SAndreas Gohr return html_entity_decode($m[$group]); 64307c6980SAndreas Gohr } 65307c6980SAndreas Gohr return ''; 66307c6980SAndreas Gohr } 67307c6980SAndreas Gohr 68307c6980SAndreas Gohr /** 69307c6980SAndreas Gohr * Extract all matching values from a HTML string using a regex 70307c6980SAndreas Gohr * 71307c6980SAndreas Gohr * @param string $regex 72307c6980SAndreas Gohr * @param string $html 73307c6980SAndreas Gohr * @param int $group 74307c6980SAndreas Gohr * @return string 75307c6980SAndreas Gohr */ 76307c6980SAndreas Gohr protected function extractAll($regex, $html, $group = 1) 77307c6980SAndreas Gohr { 78307c6980SAndreas Gohr if (preg_match_all($regex, $html, $m)) { 79307c6980SAndreas Gohr $all = $m[$group]; 80307c6980SAndreas Gohr $all = array_map('html_entity_decode', $all); 81307c6980SAndreas Gohr $all = array_unique($all); 82307c6980SAndreas Gohr return $all; 83307c6980SAndreas Gohr } 84307c6980SAndreas Gohr return []; 85307c6980SAndreas Gohr } 86307c6980SAndreas Gohr} 87