1<?php 2 3namespace dokuwiki\plugin\doi\Resolver; 4 5use dokuwiki\HTTP\DokuHTTPClient; 6 7/** 8 * ISBN resolver scraping isdn.de 9 */ 10class IsbnIsbnDeResolver extends AbstractIsbnResolver 11{ 12 /** @inheritdoc */ 13 public function getFallbackURL($id) 14 { 15 return 'https://www.isbn.de/buecher/suche/' . rawurlencode($id); 16 } 17 18 /** @inheritdoc */ 19 public function getData($id) 20 { 21 return $this->fetchCachedData($id); 22 } 23 24 /** @inheritdoc */ 25 protected function fetchData($id) 26 { 27 $http = new DokuHTTPClient(); 28 $url = $this->getFallbackURL($id); 29 30 31 $html = $http->get($url); 32 if (!$html) throw new \Exception('Could not fetch data from isdn.de. ' . $http->error); 33 34 $data = $this->defaultResult; 35 36 $data['id'] = $this->extract('/<meta property="og:book:isbn" content="([^"]+)"/', $html); 37 if (!$data['id']) throw new \Exception('ISBN not found at isdn.de.'); 38 $data['url'] = $this->extract('/<meta property="og:url" content="([^"]+)"/', $html); 39 40 $data['title'] = $this->extract('/<meta property="og:title" content="([^"]+)"/', $html); 41 if(empty($data['title'])) $data['title'] = $id; 42 $data['published'] = $this->extract('/<meta property="og:book:release_date" content="((\d){4})[^"]+"/', $html); 43 44 $data['authors'] = $this->extractAll('/<a href="\/person\/.*?">(.+?)<\/a>/', $html); 45 $data['publisher'] = $this->extract('/<a href="\/verlag\/.*?">(.+?)<\/a>/', $html); 46 47 $data['image'] = $this->extract('/<meta property="og:image" content="([^"]+)"/', $html); 48 49 return $data; 50 } 51 52 /** 53 * Extract a value from a HTML string using a regex 54 * 55 * @param string $regex 56 * @param string $html 57 * @param int $group 58 * @return string 59 */ 60 protected function extract($regex, $html, $group = 1) 61 { 62 if (preg_match($regex, $html, $m)) { 63 return html_entity_decode($m[$group]); 64 } 65 return ''; 66 } 67 68 /** 69 * Extract all matching values from a HTML string using a regex 70 * 71 * @param string $regex 72 * @param string $html 73 * @param int $group 74 * @return string 75 */ 76 protected function extractAll($regex, $html, $group = 1) 77 { 78 if (preg_match_all($regex, $html, $m)) { 79 $all = $m[$group]; 80 $all = array_map('html_entity_decode', $all); 81 $all = array_unique($all); 82 return $all; 83 } 84 return []; 85 } 86} 87