1<?php 2/** 3 * Sitemap handling functions 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Michael Hamann <michael@content-space.de> 7 */ 8 9namespace dokuwiki\Sitemap; 10 11use dokuwiki\Extension\Event; 12use dokuwiki\HTTP\DokuHTTPClient; 13use dokuwiki\Search\Indexer; 14 15/** 16 * A class for building sitemaps and pinging search engines with the sitemap URL. 17 * 18 * @author Michael Hamann 19 */ 20class Mapper { 21 /** 22 * Builds a Google Sitemap of all public pages known to the indexer 23 * 24 * The map is placed in the cache directory named sitemap.xml.gz - This 25 * file needs to be writable! 26 * 27 * @author Michael Hamann 28 * @author Andreas Gohr 29 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 30 * @link http://www.sitemaps.org/ 31 * 32 * @return bool 33 */ 34 public static function generate() 35 { 36 global $conf; 37 if ($conf['sitemap'] < 1 || !is_numeric($conf['sitemap'])) return false; 38 39 $sitemap = Mapper::getFilePath(); 40 41 if (file_exists($sitemap)) { 42 if (!is_writable($sitemap)) return false; 43 } else { 44 if (!is_writable(dirname($sitemap))) return false; 45 } 46 47 if (@filesize($sitemap) && 48 @filemtime($sitemap) > (time()-($conf['sitemap']*86400)) // 60*60*24=86400 49 ) { 50 dbglog('Sitemapper::generate(): Sitemap up to date'); 51 return false; 52 } 53 54 dbglog("Sitemapper::generate(): using $sitemap"); 55 56 $Indexer = Indexer::getInstance(); 57 $pages = $Indexer->getPages(); 58 dbglog('Sitemapper::generate(): creating sitemap using '.count($pages).' pages'); 59 $items = array(); 60 61 // build the sitemap items 62 foreach ($pages as $id) { 63 //skip hidden, non existing and restricted files 64 if (isHiddenPage($id)) continue; 65 if (auth_aclcheck($id,'',array()) < AUTH_READ) continue; 66 $item = Item::createFromID($id); 67 if ($item !== null) 68 $items[] = $item; 69 } 70 71 $eventData = array('items' => &$items, 'sitemap' => &$sitemap); 72 $event = new Event('SITEMAP_GENERATE', $eventData); 73 if ($event->advise_before(true)) { 74 //save the new sitemap 75 $event->result = io_saveFile($sitemap, Mapper::getXML($items)); 76 } 77 $event->advise_after(); 78 79 return $event->result; 80 } 81 82 /** 83 * Builds the sitemap XML string from the given array auf SitemapItems. 84 * 85 * @param $items array The SitemapItems that shall be included in the sitemap. 86 * @return string The sitemap XML. 87 * 88 * @author Michael Hamann 89 */ 90 private static function getXML($items) 91 { 92 ob_start(); 93 echo '<?xml version="1.0" encoding="UTF-8"?>'.NL; 94 echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL; 95 foreach ($items as $item) { 96 /** @var Item $item */ 97 echo $item->toXML(); 98 } 99 echo '</urlset>'.NL; 100 $result = ob_get_contents(); 101 ob_end_clean(); 102 return $result; 103 } 104 105 /** 106 * Helper function for getting the path to the sitemap file. 107 * 108 * @return string The path to the sitemap file. 109 * 110 * @author Michael Hamann 111 */ 112 public static function getFilePath() 113 { 114 global $conf; 115 116 $sitemap = $conf['cachedir'].'/sitemap.xml'; 117 if (self::sitemapIsCompressed()) { 118 $sitemap .= '.gz'; 119 } 120 121 return $sitemap; 122 } 123 124 /** 125 * Helper function for checking if the sitemap is compressed 126 * 127 * @return bool If the sitemap file is compressed 128 */ 129 public static function sitemapIsCompressed() 130 { 131 global $conf; 132 return $conf['compression'] === 'bz2' || $conf['compression'] === 'gz'; 133 } 134 135 /** 136 * Pings search engines with the sitemap url. Plugins can add or remove 137 * urls to ping using the SITEMAP_PING event. 138 * 139 * @author Michael Hamann 140 * 141 * @return bool 142 */ 143 public static function pingSearchEngines() 144 { 145 //ping search engines... 146 $http = new DokuHTTPClient(); 147 $http->timeout = 8; 148 149 $encoded_sitemap_url = urlencode(wl('', array('do' => 'sitemap'), true, '&')); 150 $ping_urls = array( 151 'google' => 'http://www.google.com/webmasters/sitemaps/ping?sitemap='.$encoded_sitemap_url, 152 'microsoft' => 'http://www.bing.com/webmaster/ping.aspx?siteMap='.$encoded_sitemap_url, 153 'yandex' => 'http://blogs.yandex.ru/pings/?status=success&url='.$encoded_sitemap_url 154 ); 155 156 $data = array( 157 'ping_urls' => $ping_urls, 158 'encoded_sitemap_url' => $encoded_sitemap_url 159 ); 160 $event = new Event('SITEMAP_PING', $data); 161 if ($event->advise_before(true)) { 162 foreach ($data['ping_urls'] as $name => $url) { 163 dbglog("Sitemapper::PingSearchEngines(): pinging $name"); 164 $resp = $http->get($url); 165 if ($http->error) dbglog("Sitemapper:pingSearchengines(): $http->error"); 166 dbglog('Sitemapper:pingSearchengines(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp))); 167 } 168 } 169 $event->advise_after(); 170 171 return true; 172 } 173} 174