xref: /dokuwiki/inc/Sitemap/Mapper.php (revision 9369b4a991666bc911474806b106d8958e79f4c1)
1<?php
2
3/**
4 * Sitemap handling functions
5 *
6 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
7 * @author     Michael Hamann <michael@content-space.de>
8 */
9
10namespace dokuwiki\Sitemap;
11
12use dokuwiki\Extension\Event;
13use dokuwiki\HTTP\DokuHTTPClient;
14use dokuwiki\Logger;
15use dokuwiki\Search\Indexer;
16
17/**
18 * A class for building sitemaps and pinging search engines with the sitemap URL.
19 *
20 * @author Michael Hamann
21 */
22class Mapper
23{
24    /**
25     * Builds a Google Sitemap of all public pages known to the indexer
26     *
27     * The map is placed in the cache directory named sitemap.xml.gz - This
28     * file needs to be writable!
29     *
30     * @author Michael Hamann
31     * @author Andreas Gohr
32     * @link   https://www.google.com/webmasters/sitemaps/docs/en/about.html
33     * @link   http://www.sitemaps.org/
34     *
35     * @return bool
36     */
37    public static function generate()
38    {
39        global $conf;
40        if ($conf['sitemap'] < 1 || !is_numeric($conf['sitemap'])) return false;
41
42        $sitemap = Mapper::getFilePath();
43
44        if (file_exists($sitemap)) {
45            if (!is_writable($sitemap)) return false;
46        } elseif (!is_writable(dirname($sitemap))) {
47            return false;
48        }
49
50        if (
51            @filesize($sitemap) &&
52            @filemtime($sitemap) > (time() - ($conf['sitemap'] * 86400)) // 60*60*24=86400
53        ) {
54            Logger::debug('Sitemapper::generate(): Sitemap up to date');
55            return false;
56        }
57
58        Logger::debug("Sitemapper::generate(): using $sitemap");
59
60        $pages = (new \dokuwiki\Search\Indexer())->getAllPages();
61        Logger::debug('Sitemapper::generate(): creating sitemap using ' . count($pages) . ' pages');
62        $items = [];
63
64        // build the sitemap items
65        foreach ($pages as $id) {
66            //skip hidden, non existing and restricted files
67            if (isHiddenPage($id)) continue;
68            if (auth_aclcheck($id, '', []) < AUTH_READ) continue;
69            $item = Item::createFromID($id);
70            if ($item instanceof Item)
71                $items[] = $item;
72        }
73
74        $eventData = ['items' => &$items, 'sitemap' => &$sitemap];
75        $event = new Event('SITEMAP_GENERATE', $eventData);
76        if ($event->advise_before(true)) {
77            //save the new sitemap
78            $event->result = io_saveFile($sitemap, (new Mapper())->getXML($items));
79        }
80        $event->advise_after();
81
82        return $event->result;
83    }
84
85    /**
86     * Builds the sitemap XML string from the given array auf SitemapItems.
87     *
88     * @param $items array The SitemapItems that shall be included in the sitemap.
89     * @return string The sitemap XML.
90     *
91     * @author Michael Hamann
92     */
93    private function getXML($items)
94    {
95        ob_start();
96        echo '<?xml version="1.0" encoding="UTF-8"?>' . NL;
97        echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . NL;
98        foreach ($items as $item) {
99            /** @var Item $item */
100            echo $item->toXML();
101        }
102        echo '</urlset>' . NL;
103        $result = ob_get_contents();
104        ob_end_clean();
105        return $result;
106    }
107
108    /**
109     * Helper function for getting the path to the sitemap file.
110     *
111     * @return string The path to the sitemap file.
112     *
113     * @author Michael Hamann
114     */
115    public static function getFilePath()
116    {
117        global $conf;
118
119        $sitemap = $conf['cachedir'] . '/sitemap.xml';
120        if (self::sitemapIsCompressed()) {
121            $sitemap .= '.gz';
122        }
123
124        return $sitemap;
125    }
126
127    /**
128     * Helper function for checking if the sitemap is compressed
129     *
130     * @return bool If the sitemap file is compressed
131     */
132    public static function sitemapIsCompressed()
133    {
134        global $conf;
135        return $conf['compression'] === 'bz2' || $conf['compression'] === 'gz';
136    }
137
138    /**
139     * Pings search engines with the sitemap url. Plugins can add or remove
140     * urls to ping using the SITEMAP_PING event.
141     *
142     * @author Michael Hamann
143     *
144     * @return bool
145     */
146    public static function pingSearchEngines()
147    {
148        //ping search engines...
149        $http = new DokuHTTPClient();
150        $http->timeout = 8;
151
152        $encoded_sitemap_url = urlencode(wl('', ['do' => 'sitemap'], true, '&'));
153        $ping_urls = [
154            'google'    => 'https://www.google.com/ping?sitemap=' . $encoded_sitemap_url,
155            'yandex'    => 'https://webmaster.yandex.com/ping?sitemap=' . $encoded_sitemap_url
156        ];
157
158        $data = [
159            'ping_urls' => $ping_urls,
160            'encoded_sitemap_url' => $encoded_sitemap_url
161        ];
162        $event = new Event('SITEMAP_PING', $data);
163        if ($event->advise_before(true)) {
164            foreach ($data['ping_urls'] as $name => $url) {
165                Logger::debug("Sitemapper::PingSearchEngines(): pinging $name");
166                $resp = $http->get($url);
167                if ($http->error) {
168                    Logger::debug("Sitemapper:pingSearchengines(): $http->error", $resp);
169                }
170            }
171        }
172        $event->advise_after();
173
174        return true;
175    }
176}
177