1*432adb37SAndreas Gohr<?php 2*432adb37SAndreas Gohr/** 3*432adb37SAndreas Gohr * Sitemap handling functions 4*432adb37SAndreas Gohr * 5*432adb37SAndreas Gohr * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6*432adb37SAndreas Gohr * @author Michael Hamann <michael@content-space.de> 7*432adb37SAndreas Gohr */ 8*432adb37SAndreas Gohr 9*432adb37SAndreas Gohrnamespace dokuwiki\Sitemap; 10*432adb37SAndreas Gohr 11*432adb37SAndreas Gohr/** 12*432adb37SAndreas Gohr * A class for building sitemaps and pinging search engines with the sitemap URL. 13*432adb37SAndreas Gohr * 14*432adb37SAndreas Gohr * @author Michael Hamann 15*432adb37SAndreas Gohr */ 16*432adb37SAndreas Gohrclass Mapper { 17*432adb37SAndreas Gohr /** 18*432adb37SAndreas Gohr * Builds a Google Sitemap of all public pages known to the indexer 19*432adb37SAndreas Gohr * 20*432adb37SAndreas Gohr * The map is placed in the cache directory named sitemap.xml.gz - This 21*432adb37SAndreas Gohr * file needs to be writable! 22*432adb37SAndreas Gohr * 23*432adb37SAndreas Gohr * @author Michael Hamann 24*432adb37SAndreas Gohr * @author Andreas Gohr 25*432adb37SAndreas Gohr * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 26*432adb37SAndreas Gohr * @link http://www.sitemaps.org/ 27*432adb37SAndreas Gohr * 28*432adb37SAndreas Gohr * @return bool 29*432adb37SAndreas Gohr */ 30*432adb37SAndreas Gohr public static function generate(){ 31*432adb37SAndreas Gohr global $conf; 32*432adb37SAndreas Gohr if($conf['sitemap'] < 1 || !is_numeric($conf['sitemap'])) return false; 33*432adb37SAndreas Gohr 34*432adb37SAndreas Gohr $sitemap = Mapper::getFilePath(); 35*432adb37SAndreas Gohr 36*432adb37SAndreas Gohr if(file_exists($sitemap)){ 37*432adb37SAndreas Gohr if(!is_writable($sitemap)) return false; 38*432adb37SAndreas Gohr }else{ 39*432adb37SAndreas Gohr if(!is_writable(dirname($sitemap))) return false; 40*432adb37SAndreas Gohr } 41*432adb37SAndreas Gohr 42*432adb37SAndreas Gohr if(@filesize($sitemap) && 43*432adb37SAndreas Gohr @filemtime($sitemap) > (time()-($conf['sitemap']*86400))){ // 60*60*24=86400 44*432adb37SAndreas Gohr dbglog('Sitemapper::generate(): Sitemap up to date'); 45*432adb37SAndreas Gohr return false; 46*432adb37SAndreas Gohr } 47*432adb37SAndreas Gohr 48*432adb37SAndreas Gohr dbglog("Sitemapper::generate(): using $sitemap"); 49*432adb37SAndreas Gohr 50*432adb37SAndreas Gohr $pages = idx_get_indexer()->getPages(); 51*432adb37SAndreas Gohr dbglog('Sitemapper::generate(): creating sitemap using '.count($pages).' pages'); 52*432adb37SAndreas Gohr $items = array(); 53*432adb37SAndreas Gohr 54*432adb37SAndreas Gohr // build the sitemap items 55*432adb37SAndreas Gohr foreach($pages as $id){ 56*432adb37SAndreas Gohr //skip hidden, non existing and restricted files 57*432adb37SAndreas Gohr if(isHiddenPage($id)) continue; 58*432adb37SAndreas Gohr if(auth_aclcheck($id,'',array()) < AUTH_READ) continue; 59*432adb37SAndreas Gohr $item = Item::createFromID($id); 60*432adb37SAndreas Gohr if ($item !== null) 61*432adb37SAndreas Gohr $items[] = $item; 62*432adb37SAndreas Gohr } 63*432adb37SAndreas Gohr 64*432adb37SAndreas Gohr $eventData = array('items' => &$items, 'sitemap' => &$sitemap); 65*432adb37SAndreas Gohr $event = new \Doku_Event('SITEMAP_GENERATE', $eventData); 66*432adb37SAndreas Gohr if ($event->advise_before(true)) { 67*432adb37SAndreas Gohr //save the new sitemap 68*432adb37SAndreas Gohr $event->result = io_saveFile($sitemap, Mapper::getXML($items)); 69*432adb37SAndreas Gohr } 70*432adb37SAndreas Gohr $event->advise_after(); 71*432adb37SAndreas Gohr 72*432adb37SAndreas Gohr return $event->result; 73*432adb37SAndreas Gohr } 74*432adb37SAndreas Gohr 75*432adb37SAndreas Gohr /** 76*432adb37SAndreas Gohr * Builds the sitemap XML string from the given array auf SitemapItems. 77*432adb37SAndreas Gohr * 78*432adb37SAndreas Gohr * @param $items array The SitemapItems that shall be included in the sitemap. 79*432adb37SAndreas Gohr * @return string The sitemap XML. 80*432adb37SAndreas Gohr * 81*432adb37SAndreas Gohr * @author Michael Hamann 82*432adb37SAndreas Gohr */ 83*432adb37SAndreas Gohr private static function getXML($items) { 84*432adb37SAndreas Gohr ob_start(); 85*432adb37SAndreas Gohr echo '<?xml version="1.0" encoding="UTF-8"?>'.NL; 86*432adb37SAndreas Gohr echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL; 87*432adb37SAndreas Gohr foreach ($items as $item) { 88*432adb37SAndreas Gohr /** @var Item $item */ 89*432adb37SAndreas Gohr echo $item->toXML(); 90*432adb37SAndreas Gohr } 91*432adb37SAndreas Gohr echo '</urlset>'.NL; 92*432adb37SAndreas Gohr $result = ob_get_contents(); 93*432adb37SAndreas Gohr ob_end_clean(); 94*432adb37SAndreas Gohr return $result; 95*432adb37SAndreas Gohr } 96*432adb37SAndreas Gohr 97*432adb37SAndreas Gohr /** 98*432adb37SAndreas Gohr * Helper function for getting the path to the sitemap file. 99*432adb37SAndreas Gohr * 100*432adb37SAndreas Gohr * @return string The path to the sitemap file. 101*432adb37SAndreas Gohr * 102*432adb37SAndreas Gohr * @author Michael Hamann 103*432adb37SAndreas Gohr */ 104*432adb37SAndreas Gohr public static function getFilePath() { 105*432adb37SAndreas Gohr global $conf; 106*432adb37SAndreas Gohr 107*432adb37SAndreas Gohr $sitemap = $conf['cachedir'].'/sitemap.xml'; 108*432adb37SAndreas Gohr if (self::sitemapIsCompressed()) { 109*432adb37SAndreas Gohr $sitemap .= '.gz'; 110*432adb37SAndreas Gohr } 111*432adb37SAndreas Gohr 112*432adb37SAndreas Gohr return $sitemap; 113*432adb37SAndreas Gohr } 114*432adb37SAndreas Gohr 115*432adb37SAndreas Gohr /** 116*432adb37SAndreas Gohr * Helper function for checking if the sitemap is compressed 117*432adb37SAndreas Gohr * 118*432adb37SAndreas Gohr * @return bool If the sitemap file is compressed 119*432adb37SAndreas Gohr */ 120*432adb37SAndreas Gohr public static function sitemapIsCompressed() { 121*432adb37SAndreas Gohr global $conf; 122*432adb37SAndreas Gohr return $conf['compression'] === 'bz2' || $conf['compression'] === 'gz'; 123*432adb37SAndreas Gohr } 124*432adb37SAndreas Gohr 125*432adb37SAndreas Gohr /** 126*432adb37SAndreas Gohr * Pings search engines with the sitemap url. Plugins can add or remove 127*432adb37SAndreas Gohr * urls to ping using the SITEMAP_PING event. 128*432adb37SAndreas Gohr * 129*432adb37SAndreas Gohr * @author Michael Hamann 130*432adb37SAndreas Gohr * 131*432adb37SAndreas Gohr * @return bool 132*432adb37SAndreas Gohr */ 133*432adb37SAndreas Gohr public static function pingSearchEngines() { 134*432adb37SAndreas Gohr //ping search engines... 135*432adb37SAndreas Gohr $http = new \DokuHTTPClient(); 136*432adb37SAndreas Gohr $http->timeout = 8; 137*432adb37SAndreas Gohr 138*432adb37SAndreas Gohr $encoded_sitemap_url = urlencode(wl('', array('do' => 'sitemap'), true, '&')); 139*432adb37SAndreas Gohr $ping_urls = array( 140*432adb37SAndreas Gohr 'google' => 'http://www.google.com/webmasters/sitemaps/ping?sitemap='.$encoded_sitemap_url, 141*432adb37SAndreas Gohr 'microsoft' => 'http://www.bing.com/webmaster/ping.aspx?siteMap='.$encoded_sitemap_url, 142*432adb37SAndreas Gohr 'yandex' => 'http://blogs.yandex.ru/pings/?status=success&url='.$encoded_sitemap_url 143*432adb37SAndreas Gohr ); 144*432adb37SAndreas Gohr 145*432adb37SAndreas Gohr $data = array('ping_urls' => $ping_urls, 146*432adb37SAndreas Gohr 'encoded_sitemap_url' => $encoded_sitemap_url 147*432adb37SAndreas Gohr ); 148*432adb37SAndreas Gohr $event = new \Doku_Event('SITEMAP_PING', $data); 149*432adb37SAndreas Gohr if ($event->advise_before(true)) { 150*432adb37SAndreas Gohr foreach ($data['ping_urls'] as $name => $url) { 151*432adb37SAndreas Gohr dbglog("Sitemapper::PingSearchEngines(): pinging $name"); 152*432adb37SAndreas Gohr $resp = $http->get($url); 153*432adb37SAndreas Gohr if($http->error) dbglog("Sitemapper:pingSearchengines(): $http->error"); 154*432adb37SAndreas Gohr dbglog('Sitemapper:pingSearchengines(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp))); 155*432adb37SAndreas Gohr } 156*432adb37SAndreas Gohr } 157*432adb37SAndreas Gohr $event->advise_after(); 158*432adb37SAndreas Gohr 159*432adb37SAndreas Gohr return true; 160*432adb37SAndreas Gohr } 161*432adb37SAndreas Gohr} 162*432adb37SAndreas Gohr 163