xref: /dokuwiki/inc/Sitemap/Mapper.php (revision bff2c9d24314e25b31ceb53d51de76d678a0a4dc)
1<?php
2/**
3 * Sitemap handling functions
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Michael Hamann <michael@content-space.de>
7 */
8
9namespace dokuwiki\Sitemap;
10
11use dokuwiki\Extension\Event;
12use dokuwiki\HTTP\DokuHTTPClient;
13use dokuwiki\Logger;
14
15/**
16 * A class for building sitemaps and pinging search engines with the sitemap URL.
17 *
18 * @author Michael Hamann
19 */
20class Mapper {
21    /**
22     * Builds a Google Sitemap of all public pages known to the indexer
23     *
24     * The map is placed in the cache directory named sitemap.xml.gz - This
25     * file needs to be writable!
26     *
27     * @author Michael Hamann
28     * @author Andreas Gohr
29     * @link   https://www.google.com/webmasters/sitemaps/docs/en/about.html
30     * @link   http://www.sitemaps.org/
31     *
32     * @return bool
33     */
34    public static function generate(){
35        global $conf;
36        if($conf['sitemap'] < 1 || !is_numeric($conf['sitemap'])) return false;
37
38        $sitemap = Mapper::getFilePath();
39
40        if (file_exists($sitemap)) {
41            if(!is_writable($sitemap)) return false;
42        } elseif (!is_writable(dirname($sitemap))) {
43            return false;
44        }
45
46        if(@filesize($sitemap) &&
47           @filemtime($sitemap) > (time()-($conf['sitemap']*86400))){ // 60*60*24=86400
48            Logger::debug('Sitemapper::generate(): Sitemap up to date');
49            return false;
50        }
51
52        Logger::debug("Sitemapper::generate(): using $sitemap");
53
54        $pages = idx_get_indexer()->getPages();
55        Logger::debug('Sitemapper::generate(): creating sitemap using '.count($pages).' pages');
56        $items = [];
57
58        // build the sitemap items
59        foreach($pages as $id){
60            //skip hidden, non existing and restricted files
61            if(isHiddenPage($id)) continue;
62            if(auth_aclcheck($id,'',[]) < AUTH_READ) continue;
63            $item = Item::createFromID($id);
64            if ($item instanceof Item)
65                $items[] = $item;
66        }
67
68        $eventData = ['items' => &$items, 'sitemap' => &$sitemap];
69        $event = new Event('SITEMAP_GENERATE', $eventData);
70        if ($event->advise_before(true)) {
71            //save the new sitemap
72            $event->result = io_saveFile($sitemap, (new Mapper())->getXML($items));
73        }
74        $event->advise_after();
75
76        return $event->result;
77    }
78
79    /**
80     * Builds the sitemap XML string from the given array auf SitemapItems.
81     *
82     * @param $items array The SitemapItems that shall be included in the sitemap.
83     * @return string The sitemap XML.
84     *
85     * @author Michael Hamann
86     */
87    private function getXML($items) {
88        ob_start();
89        echo '<?xml version="1.0" encoding="UTF-8"?>'.NL;
90        echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL;
91        foreach ($items as $item) {
92            /** @var Item $item */
93            echo $item->toXML();
94        }
95        echo '</urlset>'.NL;
96        $result = ob_get_contents();
97        ob_end_clean();
98        return $result;
99    }
100
101    /**
102     * Helper function for getting the path to the sitemap file.
103     *
104     * @return string The path to the sitemap file.
105     *
106     * @author Michael Hamann
107     */
108    public static function getFilePath() {
109        global $conf;
110
111        $sitemap = $conf['cachedir'].'/sitemap.xml';
112        if (self::sitemapIsCompressed()) {
113            $sitemap .= '.gz';
114        }
115
116        return $sitemap;
117    }
118
119    /**
120     * Helper function for checking if the sitemap is compressed
121     *
122     * @return bool If the sitemap file is compressed
123     */
124    public static function sitemapIsCompressed() {
125        global $conf;
126        return $conf['compression'] === 'bz2' || $conf['compression'] === 'gz';
127    }
128
129    /**
130     * Pings search engines with the sitemap url. Plugins can add or remove
131     * urls to ping using the SITEMAP_PING event.
132     *
133     * @author Michael Hamann
134     *
135     * @return bool
136     */
137    public static function pingSearchEngines() {
138        //ping search engines...
139        $http = new DokuHTTPClient();
140        $http->timeout = 8;
141
142        $encoded_sitemap_url = urlencode(wl('', ['do' => 'sitemap'], true, '&'));
143        $ping_urls = [
144            'google'    => 'https://www.google.com/ping?sitemap='.$encoded_sitemap_url,
145            'yandex'    => 'https://webmaster.yandex.com/ping?sitemap='.$encoded_sitemap_url
146        ];
147
148        $data = [
149            'ping_urls' => $ping_urls,
150            'encoded_sitemap_url' => $encoded_sitemap_url
151        ];
152        $event = new Event('SITEMAP_PING', $data);
153        if ($event->advise_before(true)) {
154            foreach ($data['ping_urls'] as $name => $url) {
155                Logger::debug("Sitemapper::PingSearchEngines(): pinging $name");
156                $resp = $http->get($url);
157                if($http->error) {
158                    Logger::debug("Sitemapper:pingSearchengines(): $http->error", $resp);
159                }
160            }
161        }
162        $event->advise_after();
163
164        return true;
165    }
166}
167
168