1<?php 2 3use ComboStrap\LogUtility; 4use ComboStrap\Page; 5use ComboStrap\RasterImageLink; 6use ComboStrap\Site; 7 8if (!defined('DOKU_INC')) die(); 9 10require_once(__DIR__ . '/../class/Site.php'); 11 12/** 13 * 14 * 15 * To test locally use ngrok 16 * https://developers.google.com/search/docs/guides/debug#testing-firewalled-pages 17 * 18 * 19 * Ref: 20 * https://developers.google.com/search/docs/guides/intro-structured-data 21 * https://github.com/giterlizzi/dokuwiki-plugin-semantic/blob/master/helper.php 22 * https://json-ld.org/ 23 * https://schema.org/docs/documents.html 24 * https://search.google.com/structured-data/testing-tool/u/0/#url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FPacu_jawi 25 */ 26class action_plugin_combo_metagoogle extends DokuWiki_Action_Plugin 27{ 28 29 30 const CANONICAL = "google"; 31 const JSON_LD_PROPERTY = "json-ld"; 32 const NEWSARTICLE_SCHEMA_ORG_LOWERCASE = "newsarticle"; 33 const BLOGPOSTING_SCHEMA_ORG_LOWERCASE = "blogposting"; 34 const DATE_PUBLISHED_KEY = "datePublished"; 35 const DATE_MODIFIED_KEY = "dateModified"; 36 const SPEAKABLE = "speakable"; 37 const PUBLISHER = "publisher"; 38 39 function __construct() 40 { 41 // enable direct access to language strings 42 // ie $this->lang 43 $this->setupLocale(); 44 } 45 46 public function register(Doku_Event_Handler $controller) 47 { 48 $controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'metaGoogleProcessing', array()); 49 } 50 51 /** 52 * 53 * @param $event 54 */ 55 function metaGoogleProcessing($event) 56 { 57 58 59 global $ID; 60 if (empty($ID)) { 61 // $ID is null 62 // case on "/lib/exe/mediamanager.php" 63 return; 64 } 65 $page = new Page($ID); 66 if (!$page->existInFs()) { 67 return; 68 } 69 70 /** 71 * No metadata for bars 72 */ 73 if ($page->isBar()) { 74 return; 75 } 76 77 $type = $page->getType(); 78 if (empty($type)) { 79 return; 80 } 81 switch (strtolower($type)) { 82 case Page::WEBSITE_TYPE: 83 84 /** 85 * https://schema.org/WebSite 86 * https://developers.google.com/search/docs/data-types/sitelinks-searchbox 87 */ 88 89 $ldJson = array( 90 '@context' => 'http://schema.org', 91 '@type' => 'WebSite', 92 'url' => Site::getUrl(), 93 'name' => Site::getTitle() 94 ); 95 96 if ($page->isHomePage()) { 97 98 $ldJson['potentialAction'] = array( 99 '@type' => 'SearchAction', 100 'target' => Site::getUrl() . DOKU_SCRIPT . '?do=search&id={search_term_string}', 101 'query-input' => 'required name=search_term_string', 102 ); 103 } 104 105 $tag = Site::getTag(); 106 if (!empty($tag)) { 107 $ldJson['description'] = $tag; 108 } 109 $siteImageUrl = Site::getLogoUrlAsPng(); 110 if (!empty($siteImageUrl)) { 111 $ldJson['image'] = $siteImageUrl; 112 } 113 114 break; 115 116 case Page::ORGANIZATION_TYPE: 117 118 /** 119 * Organization + Logo 120 * https://developers.google.com/search/docs/data-types/logo 121 */ 122 $ldJson = array( 123 "@context" => "https://schema.org", 124 "@type" => "Organization", 125 "url" => Site::getUrl(), 126 "logo" => Site::getLogoUrlAsPng() 127 ); 128 129 break; 130 131 case Page::ARTICLE_TYPE: 132 case Page::NEWS_TYPE: 133 case Page::BLOG_TYPE: 134 case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE: 135 case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE: 136 137 $schemaType = "Article"; 138 switch (strtolower($type)) { 139 case Page::NEWS_TYPE: 140 case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE: 141 $schemaType = "NewsArticle"; 142 break; 143 case Page::BLOG_TYPE: 144 case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE: 145 $schemaType = "BlogPosting"; 146 break; 147 } 148 // https://developers.google.com/search/docs/data-types/article 149 // https://schema.org/Article 150 151 // Image (at least 696 pixels wide) 152 // https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats 153 // BMP, GIF, JPEG, PNG, WebP, and SVG. 154 155 // Date should be https://en.wikipedia.org/wiki/ISO_8601 156 157 $ldJson = array( 158 "@context" => "https://schema.org", 159 "@type" => $schemaType, 160 'url' => $page->getCanonicalUrlOrDefault(), 161 "headline" => $page->getTitleNotEmpty(), 162 self::DATE_PUBLISHED_KEY => date('c', $page->getPublishedElseCreationTimeStamp()), 163 self::DATE_MODIFIED_KEY => date('c', $page->getModifiedTimestamp()), 164 ); 165 166 /** 167 * Publisher info 168 */ 169 $publisher = array( 170 "@type" => "Organization", 171 "name" => Site::getTitle() 172 ); 173 $logoUrlAsPng = Site::getLogoUrlAsPng(); 174 if (!empty($logoUrlAsPng)) { 175 $publisher["logo"] = array( 176 "@type" => "ImageObject", 177 "url" => $logoUrlAsPng 178 ); 179 } 180 $ldJson["publisher"] = $publisher; 181 182 /** 183 * Image must belong to the page 184 * https://developers.google.com/search/docs/guides/sd-policies#images 185 * 186 * Image may have IPTC metadata: not yet implemented 187 * https://developers.google.com/search/docs/advanced/appearance/image-rights-metadata 188 * 189 * Image must have the supported format 190 * https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats 191 * BMP, GIF, JPEG, PNG, WebP, and SVG 192 */ 193 $supportedMime = [ 194 "image/bmp", 195 "image/gif", 196 "image/jpeg", 197 "image/png", 198 "image/webp", 199 "image/svg+xml", 200 ]; 201 $imagesSet = $page->getImageSet(); 202 $schemaImages = array(); 203 foreach ($imagesSet as $image) { 204 205 $mime = $image->getMime(); 206 if (in_array($mime, $supportedMime)) { 207 if ($image->exists()) { 208 $imageObjectSchema = array( 209 "@type" => "ImageObject", 210 "url" => $image->getAbsoluteUrl() 211 ); 212 if ($image instanceof RasterImageLink) { 213 if ($image->isAnalyzable()) { 214 if (!empty($image->getMediaWidth())) { 215 $imageObjectSchema["width"] = $image->getMediaWidth(); 216 } 217 if (!empty($image->getMediaHeight())) { 218 $imageObjectSchema["height"] = $image->getMediaHeight(); 219 } 220 } 221 } 222 $schemaImages[] = $imageObjectSchema; 223 } else { 224 LogUtility::msg("The image ($image) does not exist and was not added to the google ld-json", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 225 } 226 } 227 } 228 229 if (!empty($schemaImages)) { 230 $ldJson["image"] = $schemaImages; 231 } 232 break; 233 234 default: 235 236 // May be added manually by the user itself 237 $ldJson = array( 238 '@context' => 'http://schema.org', 239 '@type' => $type, 240 'url' => $page->getCanonicalUrlOrDefault() 241 ); 242 break; 243 } 244 245 246 /** 247 * https://developers.google.com/search/docs/data-types/speakable 248 */ 249 $speakableXpath = array(); 250 if (!empty($page->getTitle())) { 251 $speakableXpath[] = "/html/head/title"; 252 } 253 if (!empty($page->getDescription())) { 254 /** 255 * Only the description written otherwise this is not speakable 256 * you can have link and other strangeness 257 */ 258 $speakableXpath[] = "/html/head/meta[@name='description']/@content"; 259 } 260 $ldJson[self::SPEAKABLE] = array( 261 "@type" => "SpeakableSpecification", 262 "xpath" => $speakableXpath 263 ); 264 265 /** 266 * Do we have extra ld-json properties 267 */ 268 $extraLdJson = $page->getMetadata(self::JSON_LD_PROPERTY); 269 if (!empty($extraLdJson)) { 270 $ldJson = array_merge($ldJson, $extraLdJson); 271 } 272 273 274 /** 275 * Publish 276 */ 277 if (!empty($ldJson)) { 278 $event->data["script"][] = array( 279 "type" => "application/ld+json", 280 "_data" => json_encode($ldJson, JSON_PRETTY_PRINT), 281 ); 282 } 283 } 284 285 286} 287