xref: /dokuwiki/inc/Sitemap/Mapper.php (revision 8788dbbd585b42284320d64cc932f3c875eab6b2)
1432adb37SAndreas Gohr<?php
2d4f83172SAndreas Gohr
3432adb37SAndreas Gohr/**
4432adb37SAndreas Gohr * Sitemap handling functions
5432adb37SAndreas Gohr *
6432adb37SAndreas Gohr * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
7432adb37SAndreas Gohr * @author     Michael Hamann <michael@content-space.de>
8432adb37SAndreas Gohr */
9432adb37SAndreas Gohr
10432adb37SAndreas Gohrnamespace dokuwiki\Sitemap;
11432adb37SAndreas Gohr
12be5c1ea2SSatoshi Saharause dokuwiki\Extension\Event;
135a8d6e48SMichael Großeuse dokuwiki\HTTP\DokuHTTPClient;
1431667ec6SAndreas Gohruse dokuwiki\Logger;
154027a91aSSatoshi Saharause dokuwiki\Search\Indexer;
16198564abSMichael Große
17432adb37SAndreas Gohr/**
18432adb37SAndreas Gohr * A class for building sitemaps and pinging search engines with the sitemap URL.
19432adb37SAndreas Gohr *
20432adb37SAndreas Gohr * @author Michael Hamann
21432adb37SAndreas Gohr */
22c1803f3dSSatoshi Saharaclass Mapper
23c1803f3dSSatoshi Sahara{
24432adb37SAndreas Gohr    /**
25432adb37SAndreas Gohr     * Builds a Google Sitemap of all public pages known to the indexer
26432adb37SAndreas Gohr     *
27432adb37SAndreas Gohr     * The map is placed in the cache directory named sitemap.xml.gz - This
28432adb37SAndreas Gohr     * file needs to be writable!
29432adb37SAndreas Gohr     *
30432adb37SAndreas Gohr     * @author Michael Hamann
31432adb37SAndreas Gohr     * @author Andreas Gohr
32432adb37SAndreas Gohr     * @link   https://www.google.com/webmasters/sitemaps/docs/en/about.html
33432adb37SAndreas Gohr     * @link   http://www.sitemaps.org/
34432adb37SAndreas Gohr     *
35432adb37SAndreas Gohr     * @return bool
36432adb37SAndreas Gohr     */
37be5c1ea2SSatoshi Sahara    public static function generate()
38be5c1ea2SSatoshi Sahara    {
39432adb37SAndreas Gohr        global $conf;
40432adb37SAndreas Gohr        if ($conf['sitemap'] < 1 || !is_numeric($conf['sitemap'])) return false;
41432adb37SAndreas Gohr
42432adb37SAndreas Gohr        $sitemap = Mapper::getFilePath();
43432adb37SAndreas Gohr
44432adb37SAndreas Gohr        if (file_exists($sitemap)) {
45432adb37SAndreas Gohr            if (!is_writable($sitemap)) return false;
4624870174SAndreas Gohr        } elseif (!is_writable(dirname($sitemap))) {
4724870174SAndreas Gohr            return false;
48432adb37SAndreas Gohr        }
49432adb37SAndreas Gohr
50db8be586SAndreas Gohr        if (
51db8be586SAndreas Gohr            @filesize($sitemap) &&
52be5c1ea2SSatoshi Sahara            @filemtime($sitemap) > (time() - ($conf['sitemap'] * 86400)) // 60*60*24=86400
53be5c1ea2SSatoshi Sahara        ) {
5431667ec6SAndreas Gohr            Logger::debug('Sitemapper::generate(): Sitemap up to date');
55432adb37SAndreas Gohr            return false;
56432adb37SAndreas Gohr        }
57432adb37SAndreas Gohr
5831667ec6SAndreas Gohr        Logger::debug("Sitemapper::generate(): using $sitemap");
59432adb37SAndreas Gohr
60*8788dbbdSsplitbrain        $pages = (new Indexer())->getAllPages();
6131667ec6SAndreas Gohr        Logger::debug('Sitemapper::generate(): creating sitemap using ' . count($pages) . ' pages');
6224870174SAndreas Gohr        $items = [];
63432adb37SAndreas Gohr
64432adb37SAndreas Gohr        // build the sitemap items
65432adb37SAndreas Gohr        foreach ($pages as $id) {
66432adb37SAndreas Gohr            //skip hidden, non existing and restricted files
67432adb37SAndreas Gohr            if (isHiddenPage($id)) continue;
6824870174SAndreas Gohr            if (auth_aclcheck($id, '', []) < AUTH_READ) continue;
69432adb37SAndreas Gohr            $item = Item::createFromID($id);
7024870174SAndreas Gohr            if ($item instanceof Item)
71432adb37SAndreas Gohr                $items[] = $item;
72432adb37SAndreas Gohr        }
73432adb37SAndreas Gohr
7424870174SAndreas Gohr        $eventData = ['items' => &$items, 'sitemap' => &$sitemap];
75be5c1ea2SSatoshi Sahara        $event = new Event('SITEMAP_GENERATE', $eventData);
76432adb37SAndreas Gohr        if ($event->advise_before(true)) {
77432adb37SAndreas Gohr            //save the new sitemap
7824870174SAndreas Gohr            $event->result = io_saveFile($sitemap, (new Mapper())->getXML($items));
79432adb37SAndreas Gohr        }
80432adb37SAndreas Gohr        $event->advise_after();
81432adb37SAndreas Gohr
82432adb37SAndreas Gohr        return $event->result;
83432adb37SAndreas Gohr    }
84432adb37SAndreas Gohr
85432adb37SAndreas Gohr    /**
86432adb37SAndreas Gohr     * Builds the sitemap XML string from the given array auf SitemapItems.
87432adb37SAndreas Gohr     *
88432adb37SAndreas Gohr     * @param $items array The SitemapItems that shall be included in the sitemap.
89432adb37SAndreas Gohr     * @return string The sitemap XML.
90432adb37SAndreas Gohr     *
91432adb37SAndreas Gohr     * @author Michael Hamann
92432adb37SAndreas Gohr     */
93d868eb89SAndreas Gohr    private function getXML($items)
94be5c1ea2SSatoshi Sahara    {
95432adb37SAndreas Gohr        ob_start();
96432adb37SAndreas Gohr        echo '<?xml version="1.0" encoding="UTF-8"?>' . NL;
97432adb37SAndreas Gohr        echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . NL;
98432adb37SAndreas Gohr        foreach ($items as $item) {
99432adb37SAndreas Gohr            /** @var Item $item */
100432adb37SAndreas Gohr            echo $item->toXML();
101432adb37SAndreas Gohr        }
102432adb37SAndreas Gohr        echo '</urlset>' . NL;
103432adb37SAndreas Gohr        $result = ob_get_contents();
104432adb37SAndreas Gohr        ob_end_clean();
105432adb37SAndreas Gohr        return $result;
106432adb37SAndreas Gohr    }
107432adb37SAndreas Gohr
108432adb37SAndreas Gohr    /**
109432adb37SAndreas Gohr     * Helper function for getting the path to the sitemap file.
110432adb37SAndreas Gohr     *
111432adb37SAndreas Gohr     * @return string The path to the sitemap file.
112432adb37SAndreas Gohr     *
113432adb37SAndreas Gohr     * @author Michael Hamann
114432adb37SAndreas Gohr     */
115be5c1ea2SSatoshi Sahara    public static function getFilePath()
116be5c1ea2SSatoshi Sahara    {
117432adb37SAndreas Gohr        global $conf;
118432adb37SAndreas Gohr
119432adb37SAndreas Gohr        $sitemap = $conf['cachedir'] . '/sitemap.xml';
120432adb37SAndreas Gohr        if (self::sitemapIsCompressed()) {
121432adb37SAndreas Gohr            $sitemap .= '.gz';
122432adb37SAndreas Gohr        }
123432adb37SAndreas Gohr
124432adb37SAndreas Gohr        return $sitemap;
125432adb37SAndreas Gohr    }
126432adb37SAndreas Gohr
127432adb37SAndreas Gohr    /**
128432adb37SAndreas Gohr     * Helper function for checking if the sitemap is compressed
129432adb37SAndreas Gohr     *
130432adb37SAndreas Gohr     * @return bool If the sitemap file is compressed
131432adb37SAndreas Gohr     */
132be5c1ea2SSatoshi Sahara    public static function sitemapIsCompressed()
133be5c1ea2SSatoshi Sahara    {
134432adb37SAndreas Gohr        global $conf;
135432adb37SAndreas Gohr        return $conf['compression'] === 'bz2' || $conf['compression'] === 'gz';
136432adb37SAndreas Gohr    }
137432adb37SAndreas Gohr
138432adb37SAndreas Gohr    /**
139432adb37SAndreas Gohr     * Pings search engines with the sitemap url. Plugins can add or remove
140432adb37SAndreas Gohr     * urls to ping using the SITEMAP_PING event.
141432adb37SAndreas Gohr     *
142432adb37SAndreas Gohr     * @author Michael Hamann
143432adb37SAndreas Gohr     *
144432adb37SAndreas Gohr     * @return bool
145432adb37SAndreas Gohr     */
146be5c1ea2SSatoshi Sahara    public static function pingSearchEngines()
147be5c1ea2SSatoshi Sahara    {
148432adb37SAndreas Gohr        //ping search engines...
149198564abSMichael Große        $http = new DokuHTTPClient();
150432adb37SAndreas Gohr        $http->timeout = 8;
151432adb37SAndreas Gohr
15224870174SAndreas Gohr        $encoded_sitemap_url = urlencode(wl('', ['do' => 'sitemap'], true, '&'));
15324870174SAndreas Gohr        $ping_urls = [
1541ef741dbSNicolas Friedli            'google'    => 'https://www.google.com/ping?sitemap=' . $encoded_sitemap_url,
1551560cda4SAndreas Gohr            'yandex'    => 'https://webmaster.yandex.com/ping?sitemap=' . $encoded_sitemap_url
15624870174SAndreas Gohr        ];
157432adb37SAndreas Gohr
15824870174SAndreas Gohr        $data = [
159be5c1ea2SSatoshi Sahara            'ping_urls' => $ping_urls,
160432adb37SAndreas Gohr            'encoded_sitemap_url' => $encoded_sitemap_url
16124870174SAndreas Gohr        ];
162be5c1ea2SSatoshi Sahara        $event = new Event('SITEMAP_PING', $data);
163432adb37SAndreas Gohr        if ($event->advise_before(true)) {
164432adb37SAndreas Gohr            foreach ($data['ping_urls'] as $name => $url) {
16531667ec6SAndreas Gohr                Logger::debug("Sitemapper::PingSearchEngines(): pinging $name");
166432adb37SAndreas Gohr                $resp = $http->get($url);
16731667ec6SAndreas Gohr                if ($http->error) {
16831667ec6SAndreas Gohr                    Logger::debug("Sitemapper:pingSearchengines(): $http->error", $resp);
16931667ec6SAndreas Gohr                }
170432adb37SAndreas Gohr            }
171432adb37SAndreas Gohr        }
172432adb37SAndreas Gohr        $event->advise_after();
173432adb37SAndreas Gohr
174432adb37SAndreas Gohr        return true;
175432adb37SAndreas Gohr    }
176432adb37SAndreas Gohr}
177