1<?php 2 3use ComboStrap\Image; 4use ComboStrap\LogUtility; 5use ComboStrap\MetadataUtility; 6use ComboStrap\PluginUtility; 7use ComboStrap\Page; 8use ComboStrap\Site; 9use ComboStrap\StringUtility; 10 11if (!defined('DOKU_INC')) die(); 12 13require_once(__DIR__ . '/../class/Site.php'); 14 15/** 16 * 17 * 18 * To test locally use ngrok 19 * https://developers.google.com/search/docs/guides/debug#testing-firewalled-pages 20 * 21 * 22 * Ref: 23 * https://developers.google.com/search/docs/guides/intro-structured-data 24 * https://github.com/giterlizzi/dokuwiki-plugin-semantic/blob/master/helper.php 25 * https://json-ld.org/ 26 * https://schema.org/docs/documents.html 27 * https://search.google.com/structured-data/testing-tool/u/0/#url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FPacu_jawi 28 */ 29class action_plugin_combo_metagoogle extends DokuWiki_Action_Plugin 30{ 31 32 33 const CANONICAL = "google"; 34 const JSON_LD_PROPERTY = "json-ld"; 35 const NEWSARTICLE_SCHEMA_ORG_LOWERCASE = "newsarticle"; 36 const BLOGPOSTING_SCHEMA_ORG_LOWERCASE = "blogposting"; 37 const DATE_PUBLISHED_KEY = "datePublished"; 38 const DATE_MODIFIED_KEY = "dateModified"; 39 const SPEAKABLE = "speakable"; 40 const PUBLISHER = "publisher"; 41 42 function __construct() 43 { 44 // enable direct access to language strings 45 // ie $this->lang 46 $this->setupLocale(); 47 } 48 49 public function register(Doku_Event_Handler $controller) 50 { 51 $controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'metaGoogleProcessing', array()); 52 } 53 54 /** 55 * 56 * @param $event 57 */ 58 function metaGoogleProcessing($event) 59 { 60 61 62 global $ID; 63 if (empty($ID)) { 64 // $ID is null 65 // case on "/lib/exe/mediamanager.php" 66 return; 67 } 68 $page = new Page($ID); 69 if(!$page->existInFs()){ 70 return; 71 } 72 /** 73 * No metadata for bars 74 */ 75 if ($page->isBar()) { 76 return; 77 } 78 79 $type = $page->getType(); 80 if (empty($type)) { 81 return; 82 } 83 switch (strtolower($type)) { 84 case Page::WEBSITE_TYPE: 85 86 /** 87 * https://schema.org/WebSite 88 * https://developers.google.com/search/docs/data-types/sitelinks-searchbox 89 */ 90 91 $ldJson = array( 92 '@context' => 'http://schema.org', 93 '@type' => 'WebSite', 94 'url' => Site::getUrl(), 95 'name' => Site::getTitle() 96 ); 97 98 if ($page->isHomePage()) { 99 100 $ldJson['potentialAction'] = array( 101 '@type' => 'SearchAction', 102 'target' => Site::getUrl() . DOKU_SCRIPT . '?do=search&id={search_term_string}', 103 'query-input' => 'required name=search_term_string', 104 ); 105 } 106 107 $tag = Site::getTag(); 108 if (!empty($tag)) { 109 $ldJson['description'] = $tag; 110 } 111 $siteImageUrl = Site::getLogoUrlAsPng(); 112 if (!empty($siteImageUrl)) { 113 $ldJson['image'] = $siteImageUrl; 114 } 115 116 break; 117 118 case Page::ORGANIZATION_TYPE: 119 120 /** 121 * Organization + Logo 122 * https://developers.google.com/search/docs/data-types/logo 123 */ 124 $ldJson = array( 125 "@context" => "https://schema.org", 126 "@type" => "Organization", 127 "url" => Site::getUrl(), 128 "logo" => Site::getLogoUrlAsPng() 129 ); 130 131 break; 132 133 case Page::ARTICLE_TYPE: 134 case Page::NEWS_TYPE: 135 case Page::BLOG_TYPE: 136 case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE: 137 case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE: 138 139 $schemaType = "Article"; 140 switch (strtolower($type)) { 141 case Page::NEWS_TYPE: 142 case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE: 143 $schemaType = "NewsArticle"; 144 break; 145 case Page::BLOG_TYPE: 146 case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE: 147 $schemaType = "BlogPosting"; 148 break; 149 } 150 // https://developers.google.com/search/docs/data-types/article 151 // https://schema.org/Article 152 153 // Image (at least 696 pixels wide) 154 // https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats 155 // BMP, GIF, JPEG, PNG, WebP, and SVG. 156 157 // Date should be https://en.wikipedia.org/wiki/ISO_8601 158 159 $ldJson = array( 160 "@context" => "https://schema.org", 161 "@type" => $schemaType, 162 'url' => $page->getCanonicalUrlOrDefault(), 163 "headline" => $page->getTitleNotEmpty(), 164 self::DATE_PUBLISHED_KEY => date('c', $page->getPublishedElseCreationTimeStamp()), 165 self::DATE_MODIFIED_KEY => date('c', $page->getModifiedTimestamp()), 166 ); 167 168 /** 169 * Publisher info 170 */ 171 $publisher = array( 172 "@type" => "Organization", 173 "name" => Site::getTitle() 174 ); 175 $logoUrlAsPng = Site::getLogoUrlAsPng(); 176 if (!empty($logoUrlAsPng)) { 177 $publisher["logo"] = array( 178 "@type" => "ImageObject", 179 "url" => $logoUrlAsPng 180 ); 181 } 182 $ldJson["publisher"] = $publisher; 183 184 185 $imagesSet = $page->getImageSet(); 186 $schemaImages = array(); 187 foreach ($imagesSet as $imageId) { 188 $image = new Image($imageId); 189 if ($image->exists()) { 190 $imageObjectSchema = array( 191 "@type" => "ImageObject", 192 "url" => $image->getUrl() 193 ); 194 if ($image->isAnalyzable()) { 195 if (!empty($image->getWidth())) { 196 $imageObjectSchema["width"] = $image->getWidth(); 197 } 198 if (!empty($image->getHeight())) { 199 $imageObjectSchema["height"] = $image->getHeight(); 200 } 201 } 202 $schemaImages[] = $imageObjectSchema; 203 } else { 204 LogUtility::msg("The image ($imageId) does not exist and was not added to the google ld-json", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 205 } 206 } 207 if (!empty($schemaImages)) { 208 $ldJson["image"] = $schemaImages; 209 } 210 break; 211 212 default: 213 214 // May be added manually by the user itself 215 $ldJson = array( 216 '@context' => 'http://schema.org', 217 '@type' => $type, 218 'url' => $page->getCanonicalUrlOrDefault() 219 ); 220 break; 221 } 222 223 224 /** 225 * https://developers.google.com/search/docs/data-types/speakable 226 */ 227 $speakableXpath = array(); 228 if (!empty($page->getTitle())) { 229 $speakableXpath[] = "/html/head/title"; 230 } 231 if (!empty($page->getDescription())) { 232 /** 233 * Only the description written otherwise this is not speakable 234 * you can have link and other strangeness 235 */ 236 $speakableXpath[] = "/html/head/meta[@name='description']/@content"; 237 } 238 $ldJson[self::SPEAKABLE] = array( 239 "@type" => "SpeakableSpecification", 240 "xpath" => $speakableXpath 241 ); 242 243 /** 244 * Do we have extra ld-json properties 245 */ 246 $extraLdJson = $page->getMetadata(self::JSON_LD_PROPERTY); 247 if (!empty($extraLdJson)) { 248 $ldJson = array_merge($ldJson, $extraLdJson); 249 } 250 251 252 /** 253 * Publish 254 */ 255 if (!empty($ldJson)) { 256 $event->data["script"][] = array( 257 "type" => "application/ld+json", 258 "_data" => json_encode($ldJson, JSON_PRETTY_PRINT), 259 ); 260 } 261 } 262 263 264} 265