1<?php 2 3namespace dokuwiki\plugin\doi\Resolver; 4 5use dokuwiki\HTTP\DokuHTTPClient; 6 7/** 8 * ISBN resolver scraping isdn.de 9 */ 10class IsbnIsbnDeResolver extends AbstractIsbnResolver 11{ 12 /** @inheritdoc */ 13 public function getFallbackURL($id) 14 { 15 return 'https://www.isbn.de/buecher/suche/' . rawurlencode($id); 16 } 17 18 /** @inheritdoc */ 19 public function getData($id) 20 { 21 return $this->fetchCachedData($id); 22 } 23 24 /** @inheritdoc */ 25 protected function fetchData($id) 26 { 27 $http = new DokuHTTPClient(); 28 $url = $this->getFallbackURL($id); 29 30 31 $html = $http->get($url); 32 if (!$html) throw new \Exception('Could not fetch data from isdn.de. ' . $http->error); 33 34 $data = $this->defaultResult; 35 36 $data['id'] = $this->extract('/<meta property="og:book:isbn" content="([^"]+)"/', $html); 37 if (!$data['id']) throw new \Exception('ISBN not found at isdn.de.'); 38 $data['url'] = $this->extract('/<meta property="og:url" content="([^"]+)"/', $html); 39 40 $data['title'] = $this->extract('/<meta property="og:title" content="([^"]+)"/', $html); 41 $data['published'] = $this->extract('/<meta property="og:book:release_date" content="((\d){4})[^"]+"/', $html); 42 43 $data['authors'] = $this->extractAll('/<a href="\/person\/.*?">(.+?)<\/a>/', $html); 44 $data['publisher'] = $this->extract('/<a href="\/verlag\/.*?">(.+?)<\/a>/', $html); 45 46 return $data; 47 } 48 49 /** 50 * Extract a value from a HTML string using a regex 51 * 52 * @param string $regex 53 * @param string $html 54 * @param int $group 55 * @return string 56 */ 57 protected function extract($regex, $html, $group = 1) 58 { 59 if (preg_match($regex, $html, $m)) { 60 return html_entity_decode($m[$group]); 61 } 62 return ''; 63 } 64 65 /** 66 * Extract all matching values from a HTML string using a regex 67 * 68 * @param string $regex 69 * @param string $html 70 * @param int $group 71 * @return string 72 */ 73 protected function extractAll($regex, $html, $group = 1) 74 { 75 if (preg_match_all($regex, $html, $m)) { 76 $all = $m[$group]; 77 $all = array_map('html_entity_decode', $all); 78 $all = array_unique($all); 79 return $all; 80 } 81 return []; 82 } 83} 84