xref: /plugin/doi/Resolver/IsbnIsbnDeResolver.php (revision 17101be411563598a3f2f83cf104a5211ddaee03)
1307c6980SAndreas Gohr<?php
2307c6980SAndreas Gohr
3307c6980SAndreas Gohrnamespace dokuwiki\plugin\doi\Resolver;
4307c6980SAndreas Gohr
5307c6980SAndreas Gohruse dokuwiki\HTTP\DokuHTTPClient;
6307c6980SAndreas Gohr
7307c6980SAndreas Gohr/**
8307c6980SAndreas Gohr * ISBN resolver scraping isdn.de
9307c6980SAndreas Gohr */
10307c6980SAndreas Gohrclass IsbnIsbnDeResolver extends AbstractIsbnResolver
11307c6980SAndreas Gohr{
12307c6980SAndreas Gohr    /** @inheritdoc */
13307c6980SAndreas Gohr    public function getFallbackURL($id)
14307c6980SAndreas Gohr    {
15307c6980SAndreas Gohr        return 'https://www.isbn.de/buecher/suche/' . rawurlencode($id);
16307c6980SAndreas Gohr    }
17307c6980SAndreas Gohr
18307c6980SAndreas Gohr    /** @inheritdoc */
19307c6980SAndreas Gohr    public function getData($id)
20307c6980SAndreas Gohr    {
21307c6980SAndreas Gohr        return $this->fetchCachedData($id);
22307c6980SAndreas Gohr    }
23307c6980SAndreas Gohr
24307c6980SAndreas Gohr    /** @inheritdoc */
25307c6980SAndreas Gohr    protected function fetchData($id)
26307c6980SAndreas Gohr    {
27307c6980SAndreas Gohr        $http = new DokuHTTPClient();
28307c6980SAndreas Gohr        $url = $this->getFallbackURL($id);
29307c6980SAndreas Gohr
30307c6980SAndreas Gohr
31307c6980SAndreas Gohr        $html = $http->get($url);
32307c6980SAndreas Gohr        if (!$html) throw new \Exception('Could not fetch data from isdn.de. ' . $http->error);
33307c6980SAndreas Gohr
34307c6980SAndreas Gohr        $data = $this->defaultResult;
35307c6980SAndreas Gohr
36307c6980SAndreas Gohr        $data['id'] = $this->extract('/<meta property="og:book:isbn" content="([^"]+)"/', $html);
37307c6980SAndreas Gohr        if (!$data['id']) throw new \Exception('ISBN not found at isdn.de.');
38307c6980SAndreas Gohr        $data['url'] = $this->extract('/<meta property="og:url" content="([^"]+)"/', $html);
39307c6980SAndreas Gohr
40307c6980SAndreas Gohr        $data['title'] = $this->extract('/<meta property="og:title" content="([^"]+)"/', $html);
419f8980aeSAndreas Gohr        if(empty($data['title'])) $data['title'] = $id;
42307c6980SAndreas Gohr        $data['published'] = $this->extract('/<meta property="og:book:release_date" content="((\d){4})[^"]+"/', $html);
43307c6980SAndreas Gohr
44307c6980SAndreas Gohr        $data['authors'] = $this->extractAll('/<a href="\/person\/.*?">(.+?)<\/a>/', $html);
45307c6980SAndreas Gohr        $data['publisher'] = $this->extract('/<a href="\/verlag\/.*?">(.+?)<\/a>/', $html);
46307c6980SAndreas Gohr
47*17101be4SAndreas Gohr        $data['image'] = $this->extract('/<meta property="og:image" content="([^"]+)"/', $html);
48*17101be4SAndreas Gohr
49307c6980SAndreas Gohr        return $data;
50307c6980SAndreas Gohr    }
51307c6980SAndreas Gohr
52307c6980SAndreas Gohr    /**
53307c6980SAndreas Gohr     * Extract a value from a HTML string using a regex
54307c6980SAndreas Gohr     *
55307c6980SAndreas Gohr     * @param string $regex
56307c6980SAndreas Gohr     * @param string $html
57307c6980SAndreas Gohr     * @param int $group
58307c6980SAndreas Gohr     * @return string
59307c6980SAndreas Gohr     */
60307c6980SAndreas Gohr    protected function extract($regex, $html, $group = 1)
61307c6980SAndreas Gohr    {
62307c6980SAndreas Gohr        if (preg_match($regex, $html, $m)) {
63307c6980SAndreas Gohr            return html_entity_decode($m[$group]);
64307c6980SAndreas Gohr        }
65307c6980SAndreas Gohr        return '';
66307c6980SAndreas Gohr    }
67307c6980SAndreas Gohr
68307c6980SAndreas Gohr    /**
69307c6980SAndreas Gohr     * Extract all matching values from a HTML string using a regex
70307c6980SAndreas Gohr     *
71307c6980SAndreas Gohr     * @param string $regex
72307c6980SAndreas Gohr     * @param string $html
73307c6980SAndreas Gohr     * @param int $group
74307c6980SAndreas Gohr     * @return string
75307c6980SAndreas Gohr     */
76307c6980SAndreas Gohr    protected function extractAll($regex, $html, $group = 1)
77307c6980SAndreas Gohr    {
78307c6980SAndreas Gohr        if (preg_match_all($regex, $html, $m)) {
79307c6980SAndreas Gohr            $all = $m[$group];
80307c6980SAndreas Gohr            $all = array_map('html_entity_decode', $all);
81307c6980SAndreas Gohr            $all = array_unique($all);
82307c6980SAndreas Gohr            return $all;
83307c6980SAndreas Gohr        }
84307c6980SAndreas Gohr        return [];
85307c6980SAndreas Gohr    }
86307c6980SAndreas Gohr}
87