1<?php 2 3namespace dokuwiki\plugin\doi\Resolver; 4 5use dokuwiki\HTTP\DokuHTTPClient; 6 7/** 8 * ISBN resolver scraping isdn.de 9 */ 10class IsbnIsbnDeResolver extends AbstractIsbnResolver 11{ 12 /** @inheritdoc */ 13 public function getFallbackURL($id) 14 { 15 return 'https://www.isbn.de/buecher/suche/' . rawurlencode($id); 16 } 17 18 /** @inheritdoc */ 19 public function getData($id) 20 { 21 return $this->fetchCachedData($id); 22 } 23 24 /** @inheritdoc */ 25 protected function fetchData($id) 26 { 27 $http = new DokuHTTPClient(); 28 $url = $this->getFallbackURL($id); 29 30 31 $html = $http->get($url); 32 if (!$html) throw new \Exception('Could not fetch data from isdn.de. ' . $http->error); 33 34 $data = $this->defaultResult; 35 36 $data['id'] = $this->extract('/<meta property="og:book:isbn" content="([^"]+)"/', $html); 37 if (!$data['id']) throw new \Exception('ISBN not found at isdn.de.'); 38 $data['url'] = $this->extract('/<meta property="og:url" content="([^"]+)"/', $html); 39 40 $data['title'] = $this->extract('/<meta property="og:title" content="([^"]+)"/', $html); 41 if(empty($data['title'])) $data['title'] = $id; 42 $data['published'] = $this->extract('/<meta property="og:book:release_date" content="((\d){4})[^"]+"/', $html); 43 44 $data['authors'] = $this->extractAll('/<a href="\/person\/.*?">(.+?)<\/a>/', $html); 45 $data['publisher'] = $this->extract('/<a href="\/verlag\/.*?">(.+?)<\/a>/', $html); 46 47 return $data; 48 } 49 50 /** 51 * Extract a value from a HTML string using a regex 52 * 53 * @param string $regex 54 * @param string $html 55 * @param int $group 56 * @return string 57 */ 58 protected function extract($regex, $html, $group = 1) 59 { 60 if (preg_match($regex, $html, $m)) { 61 return html_entity_decode($m[$group]); 62 } 63 return ''; 64 } 65 66 /** 67 * Extract all matching values from a HTML string using a regex 68 * 69 * @param string $regex 70 * @param string $html 71 * @param int $group 72 * @return string 73 */ 74 protected function extractAll($regex, $html, $group = 1) 75 { 76 if (preg_match_all($regex, $html, $m)) { 77 $all = $m[$group]; 78 $all = array_map('html_entity_decode', $all); 79 $all = array_unique($all); 80 return $all; 81 } 82 return []; 83 } 84} 85