xref: /plugin/doi/Resolver/IsbnIsbnDeResolver.php (revision 307c69801b7d8b027f4d0b7119b53e92e2931cb6)
1<?php
2
3namespace dokuwiki\plugin\doi\Resolver;
4
5use dokuwiki\HTTP\DokuHTTPClient;
6
7/**
8 * ISBN resolver scraping isdn.de
9 */
10class IsbnIsbnDeResolver extends AbstractIsbnResolver
11{
12    /** @inheritdoc */
13    public function getFallbackURL($id)
14    {
15        return 'https://www.isbn.de/buecher/suche/' . rawurlencode($id);
16    }
17
18    /** @inheritdoc */
19    public function getData($id)
20    {
21        return $this->fetchCachedData($id);
22    }
23
24    /** @inheritdoc */
25    protected function fetchData($id)
26    {
27        $http = new DokuHTTPClient();
28        $url = $this->getFallbackURL($id);
29
30
31        $html = $http->get($url);
32        if (!$html) throw new \Exception('Could not fetch data from isdn.de. ' . $http->error);
33
34        $data = $this->defaultResult;
35
36        $data['id'] = $this->extract('/<meta property="og:book:isbn" content="([^"]+)"/', $html);
37        if (!$data['id']) throw new \Exception('ISBN not found at isdn.de.');
38        $data['url'] = $this->extract('/<meta property="og:url" content="([^"]+)"/', $html);
39
40        $data['title'] = $this->extract('/<meta property="og:title" content="([^"]+)"/', $html);
41        $data['published'] = $this->extract('/<meta property="og:book:release_date" content="((\d){4})[^"]+"/', $html);
42
43        $data['authors'] = $this->extractAll('/<a href="\/person\/.*?">(.+?)<\/a>/', $html);
44        $data['publisher'] = $this->extract('/<a href="\/verlag\/.*?">(.+?)<\/a>/', $html);
45
46        return $data;
47    }
48
49    /**
50     * Extract a value from a HTML string using a regex
51     *
52     * @param string $regex
53     * @param string $html
54     * @param int $group
55     * @return string
56     */
57    protected function extract($regex, $html, $group = 1)
58    {
59        if (preg_match($regex, $html, $m)) {
60            return html_entity_decode($m[$group]);
61        }
62        return '';
63    }
64
65    /**
66     * Extract all matching values from a HTML string using a regex
67     *
68     * @param string $regex
69     * @param string $html
70     * @param int $group
71     * @return string
72     */
73    protected function extractAll($regex, $html, $group = 1)
74    {
75        if (preg_match_all($regex, $html, $m)) {
76            $all = $m[$group];
77            $all = array_map('html_entity_decode', $all);
78            $all = array_unique($all);
79            return $all;
80        }
81        return [];
82    }
83}
84