1<?php 2 3use ComboStrap\Image; 4use ComboStrap\Iso8601Date; 5use ComboStrap\LogUtility; 6use ComboStrap\Page; 7use ComboStrap\RasterImageLink; 8use ComboStrap\Site; 9 10if (!defined('DOKU_INC')) die(); 11 12require_once(__DIR__ . '/../ComboStrap/Site.php'); 13 14/** 15 * 16 * 17 * To test locally use ngrok 18 * https://developers.google.com/search/docs/guides/debug#testing-firewalled-pages 19 * 20 * Tool: 21 * https://support.google.com/webmasters/answer/2774099# - Data Highlighter 22 * to tag page manually (you see well what kind of information they need) 23 * 24 * Ref: 25 * https://developers.google.com/search/docs/guides/intro-structured-data 26 * https://github.com/giterlizzi/dokuwiki-plugin-semantic/blob/master/helper.php 27 * https://json-ld.org/ 28 * https://schema.org/docs/documents.html 29 * https://search.google.com/structured-data/testing-tool/u/0/#url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FPacu_jawi 30 */ 31class action_plugin_combo_metagoogle extends DokuWiki_Action_Plugin 32{ 33 34 35 const CANONICAL = "google"; 36 const JSON_LD_META_PROPERTY = "json-ld"; 37 const NEWSARTICLE_SCHEMA_ORG_LOWERCASE = "newsarticle"; 38 const BLOGPOSTING_SCHEMA_ORG_LOWERCASE = "blogposting"; 39 const DATE_PUBLISHED_KEY = "datePublished"; 40 const DATE_MODIFIED_KEY = "dateModified"; 41 const SPEAKABLE = "speakable"; 42 const PUBLISHER = "publisher"; 43 44 function __construct() 45 { 46 // enable direct access to language strings 47 // ie $this->lang 48 $this->setupLocale(); 49 } 50 51 private static function addImage(array &$ldJson, $page) 52 { 53 /** 54 * Image must belong to the page 55 * https://developers.google.com/search/docs/guides/sd-policies#images 56 * 57 * Image may have IPTC metadata: not yet implemented 58 * https://developers.google.com/search/docs/advanced/appearance/image-rights-metadata 59 * 60 * Image must have the supported format 61 * https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats 62 * BMP, GIF, JPEG, PNG, WebP, and SVG 63 */ 64 $supportedMime = [ 65 "image/bmp", 66 "image/gif", 67 "image/jpeg", 68 "image/png", 69 "image/webp", 70 "image/svg+xml", 71 ]; 72 $imagesSet = $page->getLocalImageSet(); 73 $schemaImages = array(); 74 foreach ($imagesSet as $image) { 75 76 $mime = $image->getMime(); 77 if (in_array($mime, $supportedMime)) { 78 if ($image->exists()) { 79 $imageObjectSchema = array( 80 "@type" => "ImageObject", 81 "url" => $image->getAbsoluteUrl() 82 ); 83 if (!empty($image->getIntrinsicWidth())) { 84 $imageObjectSchema["width"] = $image->getIntrinsicWidth(); 85 } 86 if (!empty($image->getIntrinsicHeight())) { 87 $imageObjectSchema["height"] = $image->getIntrinsicHeight(); 88 } 89 $schemaImages[] = $imageObjectSchema; 90 } else { 91 LogUtility::msg("The image ($image) does not exist and was not added to the google ld-json", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 92 } 93 } 94 } 95 96 if (!empty($schemaImages)) { 97 $ldJson["image"] = $schemaImages; 98 } 99 } 100 101 public function register(Doku_Event_Handler $controller) 102 { 103 $controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'metaGoogleProcessing', array()); 104 } 105 106 /** 107 * 108 * @param $event 109 */ 110 function metaGoogleProcessing($event) 111 { 112 113 114 global $ID; 115 if (empty($ID)) { 116 // $ID is null 117 // case on "/lib/exe/mediamanager.php" 118 return; 119 } 120 $page = Page::createPageFromId($ID); 121 if (!$page->exists()) { 122 return; 123 } 124 125 /** 126 * No metadata for bars 127 */ 128 if ($page->isSlot()) { 129 return; 130 } 131 132 $type = $page->getType(); 133 if (empty($type)) { 134 return; 135 } 136 switch (strtolower($type)) { 137 case Page::WEBSITE_TYPE: 138 139 /** 140 * https://schema.org/WebSite 141 * https://developers.google.com/search/docs/data-types/sitelinks-searchbox 142 */ 143 144 $ldJson = array( 145 '@context' => 'http://schema.org', 146 '@type' => 'WebSite', 147 'url' => Site::getUrl(), 148 'name' => Site::getTitle() 149 ); 150 151 if ($page->isHomePage()) { 152 153 $ldJson['potentialAction'] = array( 154 '@type' => 'SearchAction', 155 'target' => Site::getUrl() . DOKU_SCRIPT . '?do=search&id={search_term_string}', 156 'query-input' => 'required name=search_term_string', 157 ); 158 } 159 160 $tag = Site::getTag(); 161 if (!empty($tag)) { 162 $ldJson['description'] = $tag; 163 } 164 $siteImageUrl = Site::getLogoUrlAsPng(); 165 if (!empty($siteImageUrl)) { 166 $ldJson['image'] = $siteImageUrl; 167 } 168 169 break; 170 171 case Page::ORGANIZATION_TYPE: 172 173 /** 174 * Organization + Logo 175 * https://developers.google.com/search/docs/data-types/logo 176 */ 177 $ldJson = array( 178 "@context" => "https://schema.org", 179 "@type" => "Organization", 180 "url" => Site::getUrl(), 181 "logo" => Site::getLogoUrlAsPng() 182 ); 183 184 break; 185 186 case Page::ARTICLE_TYPE: 187 case Page::NEWS_TYPE: 188 case Page::BLOG_TYPE: 189 case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE: 190 case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE: 191 192 $schemaType = "Article"; 193 switch (strtolower($type)) { 194 case Page::NEWS_TYPE: 195 case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE: 196 $schemaType = "NewsArticle"; 197 break; 198 case Page::BLOG_TYPE: 199 case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE: 200 $schemaType = "BlogPosting"; 201 break; 202 } 203 // https://developers.google.com/search/docs/data-types/article 204 // https://schema.org/Article 205 206 // Image (at least 696 pixels wide) 207 // https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats 208 // BMP, GIF, JPEG, PNG, WebP, and SVG. 209 210 // Date should be https://en.wikipedia.org/wiki/ISO_8601 211 212 213 $ldJson = array( 214 "@context" => "https://schema.org", 215 "@type" => $schemaType, 216 'url' => $page->getCanonicalUrlOrDefault(), 217 "headline" => $page->getTitleNotEmpty(), 218 self::DATE_PUBLISHED_KEY => $page->getPublishedElseCreationTime()->format(Iso8601Date::getFormat()) 219 ); 220 221 /** 222 * Modified Time 223 */ 224 $modifiedTime = $page->getModifiedTime(); 225 if ($modifiedTime != null) { 226 $ldJson[self::DATE_MODIFIED_KEY] = $modifiedTime->format(Iso8601Date::getFormat()); 227 }; 228 229 /** 230 * Publisher info 231 */ 232 $publisher = array( 233 "@type" => "Organization", 234 "name" => Site::getTitle() 235 ); 236 $logoUrlAsPng = Site::getLogoUrlAsPng(); 237 if (!empty($logoUrlAsPng)) { 238 $publisher["logo"] = array( 239 "@type" => "ImageObject", 240 "url" => $logoUrlAsPng 241 ); 242 } 243 $ldJson["publisher"] = $publisher; 244 245 self::addImage($ldJson, $page); 246 break; 247 248 case PAGE::EVENT_TYPE: 249 // https://developers.google.com/search/docs/advanced/structured-data/event 250 $ldJson = array( 251 "@context" => "https://schema.org", 252 "@type" => "Event"); 253 $eventName = $page->getPageName(); 254 if (!blank($eventName)) { 255 $ldJson["name"] = $eventName; 256 } else { 257 LogUtility::msg("The name metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 258 return; 259 } 260 $eventDescription = $page->getDescription(); 261 if (blank($eventDescription)) { 262 LogUtility::msg("The description metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 263 return; 264 } 265 $ldJson["description"] = $eventDescription; 266 $startDate = $page->getStartDateAsString(); 267 if($startDate===null){ 268 LogUtility::msg("The date_start metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 269 return; 270 } 271 $ldJson["startDate"] = $page->getStartDateAsString(); 272 273 $endDate = $page->getEndDateAsString(); 274 if($endDate===null){ 275 LogUtility::msg("The date_end metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 276 return; 277 } 278 $ldJson["endDate"] = $page->getEndDateAsString(); 279 280 281 self::addImage($ldJson, $page); 282 break; 283 284 default: 285 286 // May be added manually by the user itself 287 $ldJson = array( 288 '@context' => 'http://schema.org', 289 '@type' => $type, 290 'url' => $page->getCanonicalUrlOrDefault() 291 ); 292 break; 293 } 294 295 296 /** 297 * https://developers.google.com/search/docs/data-types/speakable 298 */ 299 $speakableXpath = array(); 300 if (!empty($page->getTitle())) { 301 $speakableXpath[] = "/html/head/title"; 302 } 303 if (!empty($page->getDescription())) { 304 /** 305 * Only the description written otherwise this is not speakable 306 * you can have link and other strangeness 307 */ 308 $speakableXpath[] = "/html/head/meta[@name='description']/@content"; 309 } 310 $ldJson[self::SPEAKABLE] = array( 311 "@type" => "SpeakableSpecification", 312 "xpath" => $speakableXpath 313 ); 314 315 /** 316 * Do we have extra ld-json properties 317 */ 318 $extraLdJson = $page->getMetadata(self::JSON_LD_META_PROPERTY); 319 if (!empty($extraLdJson)) { 320 $ldJson = array_merge($ldJson, $extraLdJson); 321 } 322 323 324 /** 325 * Publish 326 */ 327 if (!empty($ldJson)) { 328 $jsonEncode = json_encode($ldJson, JSON_PRETTY_PRINT); 329 $event->data["script"][] = array( 330 "type" => "application/ld+json", 331 "_data" => $jsonEncode, 332 ); 333 } 334 } 335 336 337} 338