1<?php 2 3use ComboStrap\LogUtility; 4use ComboStrap\Page; 5use ComboStrap\RasterImageLink; 6use ComboStrap\Site; 7 8if (!defined('DOKU_INC')) die(); 9 10require_once(__DIR__ . '/../class/Site.php'); 11 12/** 13 * 14 * 15 * To test locally use ngrok 16 * https://developers.google.com/search/docs/guides/debug#testing-firewalled-pages 17 * 18 * 19 * Ref: 20 * https://developers.google.com/search/docs/guides/intro-structured-data 21 * https://github.com/giterlizzi/dokuwiki-plugin-semantic/blob/master/helper.php 22 * https://json-ld.org/ 23 * https://schema.org/docs/documents.html 24 * https://search.google.com/structured-data/testing-tool/u/0/#url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FPacu_jawi 25 */ 26class action_plugin_combo_metagoogle extends DokuWiki_Action_Plugin 27{ 28 29 30 const CANONICAL = "google"; 31 const JSON_LD_PROPERTY = "json-ld"; 32 const NEWSARTICLE_SCHEMA_ORG_LOWERCASE = "newsarticle"; 33 const BLOGPOSTING_SCHEMA_ORG_LOWERCASE = "blogposting"; 34 const DATE_PUBLISHED_KEY = "datePublished"; 35 const DATE_MODIFIED_KEY = "dateModified"; 36 const SPEAKABLE = "speakable"; 37 const PUBLISHER = "publisher"; 38 39 function __construct() 40 { 41 // enable direct access to language strings 42 // ie $this->lang 43 $this->setupLocale(); 44 } 45 46 public function register(Doku_Event_Handler $controller) 47 { 48 $controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'metaGoogleProcessing', array()); 49 } 50 51 /** 52 * 53 * @param $event 54 */ 55 function metaGoogleProcessing($event) 56 { 57 58 59 global $ID; 60 if (empty($ID)) { 61 // $ID is null 62 // case on "/lib/exe/mediamanager.php" 63 return; 64 } 65 $page = Page::createPageFromId($ID); 66 if (!$page->exists()) { 67 return; 68 } 69 70 /** 71 * No metadata for bars 72 */ 73 if ($page->isSlot()) { 74 return; 75 } 76 77 $type = $page->getType(); 78 if (empty($type)) { 79 return; 80 } 81 switch (strtolower($type)) { 82 case Page::WEBSITE_TYPE: 83 84 /** 85 * https://schema.org/WebSite 86 * https://developers.google.com/search/docs/data-types/sitelinks-searchbox 87 */ 88 89 $ldJson = array( 90 '@context' => 'http://schema.org', 91 '@type' => 'WebSite', 92 'url' => Site::getUrl(), 93 'name' => Site::getTitle() 94 ); 95 96 if ($page->isHomePage()) { 97 98 $ldJson['potentialAction'] = array( 99 '@type' => 'SearchAction', 100 'target' => Site::getUrl() . DOKU_SCRIPT . '?do=search&id={search_term_string}', 101 'query-input' => 'required name=search_term_string', 102 ); 103 } 104 105 $tag = Site::getTag(); 106 if (!empty($tag)) { 107 $ldJson['description'] = $tag; 108 } 109 $siteImageUrl = Site::getLogoUrlAsPng(); 110 if (!empty($siteImageUrl)) { 111 $ldJson['image'] = $siteImageUrl; 112 } 113 114 break; 115 116 case Page::ORGANIZATION_TYPE: 117 118 /** 119 * Organization + Logo 120 * https://developers.google.com/search/docs/data-types/logo 121 */ 122 $ldJson = array( 123 "@context" => "https://schema.org", 124 "@type" => "Organization", 125 "url" => Site::getUrl(), 126 "logo" => Site::getLogoUrlAsPng() 127 ); 128 129 break; 130 131 case Page::ARTICLE_TYPE: 132 case Page::NEWS_TYPE: 133 case Page::BLOG_TYPE: 134 case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE: 135 case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE: 136 137 $schemaType = "Article"; 138 switch (strtolower($type)) { 139 case Page::NEWS_TYPE: 140 case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE: 141 $schemaType = "NewsArticle"; 142 break; 143 case Page::BLOG_TYPE: 144 case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE: 145 $schemaType = "BlogPosting"; 146 break; 147 } 148 // https://developers.google.com/search/docs/data-types/article 149 // https://schema.org/Article 150 151 // Image (at least 696 pixels wide) 152 // https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats 153 // BMP, GIF, JPEG, PNG, WebP, and SVG. 154 155 // Date should be https://en.wikipedia.org/wiki/ISO_8601 156 157 158 $ldJson = array( 159 "@context" => "https://schema.org", 160 "@type" => $schemaType, 161 'url' => $page->getCanonicalUrlOrDefault(), 162 "headline" => $page->getTitleNotEmpty(), 163 self::DATE_PUBLISHED_KEY => $page->getPublishedElseCreationTime()->format(DATE_ISO8601) 164 ); 165 166 /** 167 * Modified Time 168 */ 169 $modifiedTime = $page->getModifiedTime(); 170 if ($modifiedTime != null) { 171 $ldJson[self::DATE_MODIFIED_KEY] = $modifiedTime->format(DATE_ISO8601); 172 }; 173 174 /** 175 * Publisher info 176 */ 177 $publisher = array( 178 "@type" => "Organization", 179 "name" => Site::getTitle() 180 ); 181 $logoUrlAsPng = Site::getLogoUrlAsPng(); 182 if (!empty($logoUrlAsPng)) { 183 $publisher["logo"] = array( 184 "@type" => "ImageObject", 185 "url" => $logoUrlAsPng 186 ); 187 } 188 $ldJson["publisher"] = $publisher; 189 190 /** 191 * Image must belong to the page 192 * https://developers.google.com/search/docs/guides/sd-policies#images 193 * 194 * Image may have IPTC metadata: not yet implemented 195 * https://developers.google.com/search/docs/advanced/appearance/image-rights-metadata 196 * 197 * Image must have the supported format 198 * https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats 199 * BMP, GIF, JPEG, PNG, WebP, and SVG 200 */ 201 $supportedMime = [ 202 "image/bmp", 203 "image/gif", 204 "image/jpeg", 205 "image/png", 206 "image/webp", 207 "image/svg+xml", 208 ]; 209 $imagesSet = $page->getLocalImageSet(); 210 $schemaImages = array(); 211 foreach ($imagesSet as $image) { 212 213 $mime = $image->getMime(); 214 if (in_array($mime, $supportedMime)) { 215 if ($image->exists()) { 216 $imageObjectSchema = array( 217 "@type" => "ImageObject", 218 "url" => $image->getAbsoluteUrl() 219 ); 220 if ($image instanceof RasterImageLink) { 221 if ($image->isAnalyzable()) { 222 if (!empty($image->getMediaWidth())) { 223 $imageObjectSchema["width"] = $image->getMediaWidth(); 224 } 225 if (!empty($image->getMediaHeight())) { 226 $imageObjectSchema["height"] = $image->getMediaHeight(); 227 } 228 } 229 } 230 $schemaImages[] = $imageObjectSchema; 231 } else { 232 LogUtility::msg("The image ($image) does not exist and was not added to the google ld-json", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 233 } 234 } 235 } 236 237 if (!empty($schemaImages)) { 238 $ldJson["image"] = $schemaImages; 239 } 240 break; 241 242 default: 243 244 // May be added manually by the user itself 245 $ldJson = array( 246 '@context' => 'http://schema.org', 247 '@type' => $type, 248 'url' => $page->getCanonicalUrlOrDefault() 249 ); 250 break; 251 } 252 253 254 /** 255 * https://developers.google.com/search/docs/data-types/speakable 256 */ 257 $speakableXpath = array(); 258 if (!empty($page->getTitle())) { 259 $speakableXpath[] = "/html/head/title"; 260 } 261 if (!empty($page->getDescription())) { 262 /** 263 * Only the description written otherwise this is not speakable 264 * you can have link and other strangeness 265 */ 266 $speakableXpath[] = "/html/head/meta[@name='description']/@content"; 267 } 268 $ldJson[self::SPEAKABLE] = array( 269 "@type" => "SpeakableSpecification", 270 "xpath" => $speakableXpath 271 ); 272 273 /** 274 * Do we have extra ld-json properties 275 */ 276 $extraLdJson = $page->getMetadata(self::JSON_LD_PROPERTY); 277 if (!empty($extraLdJson)) { 278 $ldJson = array_merge($ldJson, $extraLdJson); 279 } 280 281 282 /** 283 * Publish 284 */ 285 if (!empty($ldJson)) { 286 $jsonEncode = json_encode($ldJson, JSON_PRETTY_PRINT); 287 $event->data["script"][] = array( 288 "type" => "application/ld+json", 289 "_data" => $jsonEncode, 290 ); 291 } 292 } 293 294 295} 296