1<?php 2 3 4namespace ComboStrap; 5 6 7use action_plugin_combo_metagoogle; 8 9/** 10 * 11 * 12 * To test locally use ngrok 13 * https://developers.google.com/search/docs/guides/debug#testing-firewalled-pages 14 * 15 * Tool: 16 * https://support.google.com/webmasters/answer/2774099# - Data Highlighter 17 * to tag page manually (you see well what kind of information they need) 18 * 19 * Ref: 20 * https://developers.google.com/search/docs/guides/intro-structured-data 21 * https://github.com/giterlizzi/dokuwiki-plugin-semantic/blob/master/helper.php 22 * https://json-ld.org/ 23 * https://schema.org/docs/documents.html 24 * https://search.google.com/structured-data/testing-tool/u/0/#url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FPacu_jawi 25 */ 26class LdJson extends MetadataJson 27{ 28 29 public const PROPERTY_NAME = "json-ld"; 30 31 public const SPEAKABLE = "speakable"; 32 public const NEWSARTICLE_SCHEMA_ORG_LOWERCASE = "newsarticle"; 33 public const BLOGPOSTING_SCHEMA_ORG_LOWERCASE = "blogposting"; 34 /** 35 * @deprecated 36 * This attribute was used to hold json-ld organization 37 * data 38 */ 39 public const OLD_ORGANIZATION_PROPERTY = "organization"; 40 public const DATE_PUBLISHED_KEY = "datePublished"; 41 public const DATE_MODIFIED_KEY = "dateModified"; 42 43 public static function createForPage(Page $page): LdJson 44 { 45 return (new LdJson()) 46 ->setResource($page); 47 } 48 49 /** 50 * @param array $ldJson 51 * @param Page $page 52 */ 53 public static function addImage(array &$ldJson, Page $page) 54 { 55 /** 56 * Image must belong to the page 57 * https://developers.google.com/search/docs/guides/sd-policies#images 58 * 59 * Image may have IPTC metadata: not yet implemented 60 * https://developers.google.com/search/docs/advanced/appearance/image-rights-metadata 61 * 62 * Image must have the supported format 63 * https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats 64 * BMP, GIF, JPEG, PNG, WebP, and SVG 65 */ 66 $supportedMime = [ 67 Mime::BMP, 68 Mime::GIF, 69 Mime::JPEG, 70 Mime::PNG, 71 Mime::WEBP, 72 Mime::SVG, 73 ]; 74 $imagesSet = $page->getImagesOrDefaultForTheFollowingUsages([PageImageUsage::ALL, PageImageUsage::SOCIAL, PageImageUsage::GOOGLE]); 75 $schemaImages = array(); 76 foreach ($imagesSet as $image) { 77 78 $mime = $image->getPath()->getMime()->toString(); 79 if (in_array($mime, $supportedMime)) { 80 if ($image->exists()) { 81 $imageObjectSchema = array( 82 "@type" => "ImageObject", 83 "url" => $image->getAbsoluteUrl() 84 ); 85 if (!empty($image->getIntrinsicWidth())) { 86 $imageObjectSchema["width"] = $image->getIntrinsicWidth(); 87 } 88 if (!empty($image->getIntrinsicHeight())) { 89 $imageObjectSchema["height"] = $image->getIntrinsicHeight(); 90 } 91 $schemaImages[] = $imageObjectSchema; 92 } else { 93 LogUtility::msg("The image ($image) does not exist and was not added to the google ld-json", LogUtility::LVL_MSG_ERROR, action_plugin_combo_metagoogle::CANONICAL); 94 } 95 } 96 } 97 98 if (!empty($schemaImages)) { 99 $ldJson["image"] = $schemaImages; 100 } 101 } 102 103 public static function getName(): string 104 { 105 return self::PROPERTY_NAME; 106 } 107 108 public function getPersistenceType(): string 109 { 110 return MetadataDokuWikiStore::PERSISTENT_METADATA; 111 } 112 113 public function getCanonical(): string 114 { 115 return action_plugin_combo_metagoogle::CANONICAL; 116 } 117 118 119 public function getDescription(): string 120 { 121 return "Advanced Page metadata definition with the json-ld format"; 122 } 123 124 public function getLabel(): string 125 { 126 return "Json-ld"; 127 } 128 129 public function getTab(): string 130 { 131 return MetaManagerForm::TAB_TYPE_VALUE; 132 } 133 134 135 public function getMutable(): bool 136 { 137 return true; 138 } 139 140 public function getDefaultValue(): ?string 141 { 142 143 $ldJson = $this->mergeWithDefaultValueAndGet(); 144 if ($ldJson === null) { 145 return null; 146 } 147 148 /** 149 * Return 150 */ 151 return Json::createFromArray($ldJson)->toPrettyJsonString(); 152 153 } 154 155 public function buildFromStoreValue($value): Metadata 156 { 157 158 if ($value === null) { 159 $resourceCombo = $this->getResource(); 160 if (($resourceCombo instanceof Page)) { 161 // Deprecated, old organization syntax 162 if ($resourceCombo->getTypeOrDefault() === PageType::ORGANIZATION_TYPE) { 163 $store = $this->getReadStore(); 164 $metadata = $store->getFromPersistentName( self::OLD_ORGANIZATION_PROPERTY); 165 if ($metadata !== null) { 166 $organization = array( 167 "organization" => $metadata 168 ); 169 $ldJsonOrganization = $this->mergeWithDefaultValueAndGet($organization); 170 $value = Json::createFromArray($ldJsonOrganization)->toPrettyJsonString(); 171 } 172 173 } 174 } 175 } 176 parent::buildFromStoreValue($value); 177 return $this; 178 179 180 } 181 182 /** 183 * The ldJson value 184 * @return false|string|null 185 */ 186 public function getLdJsonMergedWithDefault() 187 { 188 189 $value = $this->getValue(); 190 $actualValueAsArray = null; 191 if ($value !== null) { 192 try { 193 $actualValueAsArray = Json::createFromString($value)->toArray(); 194 } catch (ExceptionCombo $e) { 195 LogUtility::msg("The string value is not a valid Json. Value: $value"); 196 return $value; 197 } 198 } 199 $actualValueAsArray = $this->mergeWithDefaultValueAndGet($actualValueAsArray); 200 return Json::createFromArray($actualValueAsArray)->toPrettyJsonString(); 201 } 202 203 204 private function mergeWithDefaultValueAndGet($actualValue = null): ?array 205 { 206 $page = $this->getResource(); 207 if (!($page instanceof Page)) { 208 return $actualValue; 209 } 210 211 $type = $page->getTypeOrDefault(); 212 switch (strtolower($type)) { 213 case PageType::WEBSITE_TYPE: 214 215 /** 216 * https://schema.org/WebSite 217 * https://developers.google.com/search/docs/data-types/sitelinks-searchbox 218 */ 219 $ldJson = array( 220 '@context' => 'https://schema.org', 221 '@type' => 'WebSite', 222 'url' => Site::getBaseUrl(), 223 'name' => Site::getTitle() 224 ); 225 226 if ($page->isRootHomePage()) { 227 228 $ldJson['potentialAction'] = array( 229 '@type' => 'SearchAction', 230 'target' => Site::getBaseUrl() . DOKU_SCRIPT . '?do=search&id={search_term_string}', 231 'query-input' => 'required name=search_term_string', 232 ); 233 } 234 235 $tag = Site::getTag(); 236 if (!empty($tag)) { 237 $ldJson['description'] = $tag; 238 } 239 $siteImageUrl = Site::getLogoUrlAsPng(); 240 if (!empty($siteImageUrl)) { 241 $ldJson['image'] = $siteImageUrl; 242 } 243 244 break; 245 246 case PageType::ORGANIZATION_TYPE: 247 248 /** 249 * Organization + Logo 250 * https://developers.google.com/search/docs/data-types/logo 251 */ 252 $ldJson = array( 253 "@context" => "https://schema.org", 254 "@type" => "Organization", 255 "url" => Site::getBaseUrl(), 256 "logo" => Site::getLogoUrlAsPng() 257 ); 258 259 break; 260 261 case PageType::ARTICLE_TYPE: 262 case PageType::NEWS_TYPE: 263 case PageType::BLOG_TYPE: 264 case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE: 265 case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE: 266 case PageType::HOME_TYPE: 267 case PageType::WEB_PAGE_TYPE: 268 269 switch (strtolower($type)) { 270 case PageType::NEWS_TYPE: 271 case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE: 272 $schemaType = "NewsArticle"; 273 break; 274 case PageType::BLOG_TYPE: 275 case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE: 276 $schemaType = "BlogPosting"; 277 break; 278 case PageType::HOME_TYPE: 279 case PageType::WEB_PAGE_TYPE: 280 // https://schema.org/WebPage 281 $schemaType = "WebPage"; 282 break; 283 case PageType::ARTICLE_TYPE: 284 default: 285 $schemaType = "Article"; 286 break; 287 288 } 289 // https://developers.google.com/search/docs/data-types/article 290 // https://schema.org/Article 291 292 // Image (at least 696 pixels wide) 293 // https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats 294 // BMP, GIF, JPEG, PNG, WebP, and SVG. 295 296 // Date should be https://en.wikipedia.org/wiki/ISO_8601 297 298 299 $ldJson = array( 300 "@context" => "https://schema.org", 301 "@type" => $schemaType, 302 'url' => $page->getAbsoluteCanonicalUrl(), 303 "headline" => $page->getTitleOrDefault(), 304 self::DATE_PUBLISHED_KEY => $page->getPublishedElseCreationTime()->format(Iso8601Date::getFormat()) 305 ); 306 307 /** 308 * Modified Time 309 */ 310 $modifiedTime = $page->getModifiedTimeOrDefault(); 311 if ($modifiedTime != null) { 312 $ldJson[self::DATE_MODIFIED_KEY] = $modifiedTime->format(Iso8601Date::getFormat()); 313 }; 314 315 /** 316 * Publisher info 317 */ 318 $publisher = array( 319 "@type" => "Organization", 320 "name" => Site::getTitle() 321 ); 322 $logoUrlAsPng = Site::getLogoUrlAsPng(); 323 if (!empty($logoUrlAsPng)) { 324 $publisher["logo"] = array( 325 "@type" => "ImageObject", 326 "url" => $logoUrlAsPng 327 ); 328 } 329 $ldJson["publisher"] = $publisher; 330 331 self::addImage($ldJson, $page); 332 break; 333 334 case PageType::EVENT_TYPE: 335 // https://developers.google.com/search/docs/advanced/structured-data/event 336 $ldJson = array( 337 "@context" => "https://schema.org", 338 "@type" => "Event"); 339 $eventName = $page->getName(); 340 if (!blank($eventName)) { 341 $ldJson["name"] = $eventName; 342 } else { 343 LogUtility::msg("The name metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 344 return null; 345 } 346 $eventDescription = $page->getDescription(); 347 if (blank($eventDescription)) { 348 LogUtility::msg("The description metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 349 return null; 350 } 351 $ldJson["description"] = $eventDescription; 352 $startDate = $page->getStartDateAsString(); 353 if ($startDate === null) { 354 LogUtility::msg("The date_start metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 355 return null; 356 } 357 $ldJson["startDate"] = $page->getStartDateAsString(); 358 359 $endDate = $page->getEndDateAsString(); 360 if ($endDate === null) { 361 LogUtility::msg("The date_end metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 362 return null; 363 } 364 $ldJson["endDate"] = $page->getEndDateAsString(); 365 366 367 self::addImage($ldJson, $page); 368 break; 369 370 371 default: 372 373 // May be added manually by the user itself 374 $ldJson = array( 375 '@context' => 'https://schema.org', 376 '@type' => $type, 377 'url' => $page->getAbsoluteCanonicalUrl() 378 ); 379 break; 380 } 381 382 383 /** 384 * https://developers.google.com/search/docs/data-types/speakable 385 */ 386 $speakableXpath = array(); 387 if (!empty($page->getTitleOrDefault())) { 388 $speakableXpath[] = "/html/head/title"; 389 } 390 if (!empty($page->getDescription())) { 391 /** 392 * Only the description written otherwise this is not speakable 393 * you can have link and other strangeness 394 */ 395 $speakableXpath[] = "/html/head/meta[@name='description']/@content"; 396 } 397 $ldJson[self::SPEAKABLE] = array( 398 "@type" => "SpeakableSpecification", 399 "xpath" => $speakableXpath 400 ); 401 402 /** 403 * merge with the extra 404 */ 405 if ($actualValue !== null) { 406 return array_merge($ldJson, $actualValue); 407 } 408 return $ldJson; 409 } 410 411 412 413} 414