1*307c6980SAndreas Gohr<?php 2*307c6980SAndreas Gohr 3*307c6980SAndreas Gohrnamespace dokuwiki\plugin\doi\Resolver; 4*307c6980SAndreas Gohr 5*307c6980SAndreas Gohruse dokuwiki\HTTP\DokuHTTPClient; 6*307c6980SAndreas Gohr 7*307c6980SAndreas Gohr/** 8*307c6980SAndreas Gohr * ISBN resolver scraping isdn.de 9*307c6980SAndreas Gohr */ 10*307c6980SAndreas Gohrclass IsbnIsbnDeResolver extends AbstractIsbnResolver 11*307c6980SAndreas Gohr{ 12*307c6980SAndreas Gohr /** @inheritdoc */ 13*307c6980SAndreas Gohr public function getFallbackURL($id) 14*307c6980SAndreas Gohr { 15*307c6980SAndreas Gohr return 'https://www.isbn.de/buecher/suche/' . rawurlencode($id); 16*307c6980SAndreas Gohr } 17*307c6980SAndreas Gohr 18*307c6980SAndreas Gohr /** @inheritdoc */ 19*307c6980SAndreas Gohr public function getData($id) 20*307c6980SAndreas Gohr { 21*307c6980SAndreas Gohr return $this->fetchCachedData($id); 22*307c6980SAndreas Gohr } 23*307c6980SAndreas Gohr 24*307c6980SAndreas Gohr /** @inheritdoc */ 25*307c6980SAndreas Gohr protected function fetchData($id) 26*307c6980SAndreas Gohr { 27*307c6980SAndreas Gohr $http = new DokuHTTPClient(); 28*307c6980SAndreas Gohr $url = $this->getFallbackURL($id); 29*307c6980SAndreas Gohr 30*307c6980SAndreas Gohr 31*307c6980SAndreas Gohr $html = $http->get($url); 32*307c6980SAndreas Gohr if (!$html) throw new \Exception('Could not fetch data from isdn.de. ' . $http->error); 33*307c6980SAndreas Gohr 34*307c6980SAndreas Gohr $data = $this->defaultResult; 35*307c6980SAndreas Gohr 36*307c6980SAndreas Gohr $data['id'] = $this->extract('/<meta property="og:book:isbn" content="([^"]+)"/', $html); 37*307c6980SAndreas Gohr if (!$data['id']) throw new \Exception('ISBN not found at isdn.de.'); 38*307c6980SAndreas Gohr $data['url'] = $this->extract('/<meta property="og:url" content="([^"]+)"/', $html); 39*307c6980SAndreas Gohr 40*307c6980SAndreas Gohr $data['title'] = $this->extract('/<meta property="og:title" content="([^"]+)"/', $html); 41*307c6980SAndreas Gohr $data['published'] = $this->extract('/<meta property="og:book:release_date" content="((\d){4})[^"]+"/', $html); 42*307c6980SAndreas Gohr 43*307c6980SAndreas Gohr $data['authors'] = $this->extractAll('/<a href="\/person\/.*?">(.+?)<\/a>/', $html); 44*307c6980SAndreas Gohr $data['publisher'] = $this->extract('/<a href="\/verlag\/.*?">(.+?)<\/a>/', $html); 45*307c6980SAndreas Gohr 46*307c6980SAndreas Gohr return $data; 47*307c6980SAndreas Gohr } 48*307c6980SAndreas Gohr 49*307c6980SAndreas Gohr /** 50*307c6980SAndreas Gohr * Extract a value from a HTML string using a regex 51*307c6980SAndreas Gohr * 52*307c6980SAndreas Gohr * @param string $regex 53*307c6980SAndreas Gohr * @param string $html 54*307c6980SAndreas Gohr * @param int $group 55*307c6980SAndreas Gohr * @return string 56*307c6980SAndreas Gohr */ 57*307c6980SAndreas Gohr protected function extract($regex, $html, $group = 1) 58*307c6980SAndreas Gohr { 59*307c6980SAndreas Gohr if (preg_match($regex, $html, $m)) { 60*307c6980SAndreas Gohr return html_entity_decode($m[$group]); 61*307c6980SAndreas Gohr } 62*307c6980SAndreas Gohr return ''; 63*307c6980SAndreas Gohr } 64*307c6980SAndreas Gohr 65*307c6980SAndreas Gohr /** 66*307c6980SAndreas Gohr * Extract all matching values from a HTML string using a regex 67*307c6980SAndreas Gohr * 68*307c6980SAndreas Gohr * @param string $regex 69*307c6980SAndreas Gohr * @param string $html 70*307c6980SAndreas Gohr * @param int $group 71*307c6980SAndreas Gohr * @return string 72*307c6980SAndreas Gohr */ 73*307c6980SAndreas Gohr protected function extractAll($regex, $html, $group = 1) 74*307c6980SAndreas Gohr { 75*307c6980SAndreas Gohr if (preg_match_all($regex, $html, $m)) { 76*307c6980SAndreas Gohr $all = $m[$group]; 77*307c6980SAndreas Gohr $all = array_map('html_entity_decode', $all); 78*307c6980SAndreas Gohr $all = array_unique($all); 79*307c6980SAndreas Gohr return $all; 80*307c6980SAndreas Gohr } 81*307c6980SAndreas Gohr return []; 82*307c6980SAndreas Gohr } 83*307c6980SAndreas Gohr} 84