1<?php
2
3namespace dokuwiki\plugin\doi\Resolver;
4
5use dokuwiki\HTTP\DokuHTTPClient;
6
7/**
8 * ISBN resolver scraping isdn.de
9 */
10class IsbnIsbnDeResolver extends AbstractIsbnResolver
11{
12    /** @inheritdoc */
13    public function getFallbackURL($id)
14    {
15        return 'https://www.isbn.de/buecher/suche/' . rawurlencode($id);
16    }
17
18    /** @inheritdoc */
19    public function getData($id)
20    {
21        return $this->fetchCachedData($id);
22    }
23
24    /** @inheritdoc */
25    protected function fetchData($id)
26    {
27        $http = new DokuHTTPClient();
28        $url = $this->getFallbackURL($id);
29
30
31        $html = $http->get($url);
32        if (!$html) throw new \Exception('Could not fetch data from isdn.de. ' . $http->error);
33
34        $data = $this->defaultResult;
35
36        $data['id'] = $this->extract('/<meta property="og:book:isbn" content="([^"]+)"/', $html);
37        if (!$data['id']) throw new \Exception('ISBN not found at isdn.de.');
38        $data['url'] = $this->extract('/<meta property="og:url" content="([^"]+)"/', $html);
39
40        $data['title'] = $this->extract('/<meta property="og:title" content="([^"]+)"/', $html);
41        if(empty($data['title'])) $data['title'] = $id;
42        $data['published'] = $this->extract('/<meta property="og:book:release_date" content="((\d){4})[^"]+"/', $html);
43
44        $data['authors'] = $this->extractAll('/<a href="\/person\/.*?">(.+?)<\/a>/', $html);
45        $data['publisher'] = $this->extract('/<a href="\/verlag\/.*?">(.+?)<\/a>/', $html);
46
47        return $data;
48    }
49
50    /**
51     * Extract a value from a HTML string using a regex
52     *
53     * @param string $regex
54     * @param string $html
55     * @param int $group
56     * @return string
57     */
58    protected function extract($regex, $html, $group = 1)
59    {
60        if (preg_match($regex, $html, $m)) {
61            return html_entity_decode($m[$group]);
62        }
63        return '';
64    }
65
66    /**
67     * Extract all matching values from a HTML string using a regex
68     *
69     * @param string $regex
70     * @param string $html
71     * @param int $group
72     * @return string
73     */
74    protected function extractAll($regex, $html, $group = 1)
75    {
76        if (preg_match_all($regex, $html, $m)) {
77            $all = $m[$group];
78            $all = array_map('html_entity_decode', $all);
79            $all = array_unique($all);
80            return $all;
81        }
82        return [];
83    }
84}
85