1<?php 2 3 4namespace ComboStrap; 5 6 7use action_plugin_combo_metagoogle; 8use ComboStrap\Meta\Api\Metadata; 9use ComboStrap\Meta\Api\MetadataJson; 10use ComboStrap\Meta\Store\MetadataDokuWikiStore; 11 12/** 13 * 14 * 15 * To test locally use ngrok 16 * https://developers.google.com/search/docs/guides/debug#testing-firewalled-pages 17 * 18 * Tool: 19 * https://support.google.com/webmasters/answer/2774099# - Data Highlighter 20 * to tag page manually (you see well what kind of information they need) 21 * 22 * Ref: 23 * https://developers.google.com/search/docs/guides/intro-structured-data 24 * https://github.com/giterlizzi/dokuwiki-plugin-semantic/blob/master/helper.php 25 * https://json-ld.org/ 26 * https://schema.org/docs/documents.html 27 * https://search.google.com/structured-data/testing-tool/u/0/#url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FPacu_jawi 28 */ 29class LdJson extends MetadataJson 30{ 31 32 public const PROPERTY_NAME = "json-ld"; 33 34 public const SPEAKABLE = "speakable"; 35 public const NEWSARTICLE_SCHEMA_ORG_LOWERCASE = "newsarticle"; 36 public const BLOGPOSTING_SCHEMA_ORG_LOWERCASE = "blogposting"; 37 /** 38 * @deprecated 39 * This attribute was used to hold json-ld organization 40 * data 41 */ 42 public const OLD_ORGANIZATION_PROPERTY = "organization"; 43 public const DATE_PUBLISHED_KEY = "datePublished"; 44 public const DATE_MODIFIED_KEY = "dateModified"; 45 46 public const CANONICAL = action_plugin_combo_metagoogle::CANONICAL; 47 48 public static function createForPage(MarkupPath $page): LdJson 49 { 50 return (new LdJson()) 51 ->setResource($page); 52 } 53 54 /** 55 * @param array $ldJson 56 * @param MarkupPath $page 57 */ 58 public static function addImage(array &$ldJson, MarkupPath $page) 59 { 60 /** 61 * Image must belong to the page 62 * https://developers.google.com/search/docs/guides/sd-policies#images 63 * 64 * Image may have IPTC metadata: not yet implemented 65 * https://developers.google.com/search/docs/advanced/appearance/image-rights-metadata 66 * 67 * Image must have the supported format 68 * https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats 69 * BMP, GIF, JPEG, PNG, WebP, and SVG 70 */ 71 $supportedMime = [ 72 Mime::BMP, 73 Mime::GIF, 74 Mime::JPEG, 75 Mime::PNG, 76 Mime::WEBP, 77 Mime::SVG, 78 ]; 79 $imagesSet = $page->getImagesForTheFollowingUsages([PageImageUsage::ALL, PageImageUsage::SOCIAL, PageImageUsage::GOOGLE]); 80 $schemaImages = array(); 81 foreach ($imagesSet as $pageImage) { 82 83 try { 84 $pageImagePath = $pageImage->getSourcePath()->toWikiPath(); 85 } catch (ExceptionCast $e) { 86 LogUtility::internalError("The page image should come from a wiki path", self::CANONICAL, $e); 87 continue; 88 } 89 try { 90 $mime = $pageImagePath->getMime()->toString(); 91 } catch (ExceptionNotFound $e) { 92 // should not happen 93 LogUtility::internalError("The page image mime could not be determined. Error:" . $e->getMessage(), self::CANONICAL, $e); 94 $mime = "unknown"; 95 } 96 if (in_array($mime, $supportedMime)) { 97 if (FileSystems::exists($pageImagePath)) { 98 try { 99 $fetcherPageImage = IFetcherLocalImage::createImageFetchFromPath($pageImagePath); 100 } catch (ExceptionBadArgument|ExceptionBadSyntax|ExceptionNotExists $e) { 101 LogUtility::error("The image ($pageImagePath) could not be added as page image. Error: {$e->getMessage()}"); 102 continue; 103 } 104 $imageObjectSchema = array( 105 "@type" => "ImageObject", 106 "url" => $fetcherPageImage->getFetchUrl()->toAbsoluteUrlString() 107 ); 108 if (!empty($fetcherPageImage->getIntrinsicWidth())) { 109 $imageObjectSchema["width"] = $fetcherPageImage->getIntrinsicWidth(); 110 } 111 if (!empty($fetcherPageImage->getIntrinsicHeight())) { 112 $imageObjectSchema["height"] = $fetcherPageImage->getIntrinsicHeight(); 113 } 114 $schemaImages[] = $imageObjectSchema; 115 } else { 116 LogUtility::msg("The image ($pageImagePath) does not exist and was not added to the google ld-json", LogUtility::LVL_MSG_ERROR, action_plugin_combo_metagoogle::CANONICAL); 117 } 118 } 119 } 120 121 if (!empty($schemaImages)) { 122 $ldJson["image"] = $schemaImages; 123 } 124 } 125 126 public static function getName(): string 127 { 128 return self::PROPERTY_NAME; 129 } 130 131 static public function getPersistenceType(): string 132 { 133 return MetadataDokuWikiStore::PERSISTENT_DOKUWIKI_KEY; 134 } 135 136 static public function getCanonical(): string 137 { 138 return action_plugin_combo_metagoogle::CANONICAL; 139 } 140 141 142 static public function getDescription(): string 143 { 144 return "Advanced Page metadata definition with the json-ld format"; 145 } 146 147 static public function getLabel(): string 148 { 149 return "Json-ld"; 150 } 151 152 static public function getTab(): string 153 { 154 return MetaManagerForm::TAB_TYPE_VALUE; 155 } 156 157 158 static public function isMutable(): bool 159 { 160 return true; 161 } 162 163 public function getDefaultValue(): ?string 164 { 165 166 $ldJson = $this->mergeWithDefaultValueAndGet(); 167 if ($ldJson === null) { 168 return null; 169 } 170 171 /** 172 * Return 173 */ 174 return Json::createFromArray($ldJson)->toPrettyJsonString(); 175 176 } 177 178 public function setFromStoreValueWithoutException($value): Metadata 179 { 180 181 if ($value === null) { 182 $resourceCombo = $this->getResource(); 183 if (($resourceCombo instanceof MarkupPath)) { 184 /** 185 * Deprecated, old organization syntax 186 * We could add this predicate 187 * 188 * but we don't want to lose any data 189 * (ie if the page was set to no be an organization table, 190 * the frontmatter would not take it) 191 */ 192 $store = $this->getReadStore(); 193 $metadata = $store->getFromName(self::OLD_ORGANIZATION_PROPERTY); 194 if ($metadata !== null) { 195 $organization = array( 196 "organization" => $metadata 197 ); 198 $ldJsonOrganization = $this->mergeWithDefaultValueAndGet($organization); 199 $value = Json::createFromArray($ldJsonOrganization)->toPrettyJsonString(); 200 } 201 } 202 } 203 parent::setFromStoreValueWithoutException($value); 204 return $this; 205 206 207 } 208 209 /** 210 * The ldJson value 211 * @return false|string|null 212 */ 213 public function getLdJsonMergedWithDefault() 214 { 215 216 try { 217 $value = $this->getValue(); 218 try { 219 $actualValueAsArray = Json::createFromString($value)->toArray(); 220 } catch (ExceptionCompile $e) { 221 LogUtility::error("The string value is not a valid Json. Value: $value", self::CANONICAL); 222 return $value; 223 } 224 } catch (ExceptionNotFound $e) { 225 $actualValueAsArray = []; 226 } 227 $actualValueAsArray = $this->mergeWithDefaultValueAndGet($actualValueAsArray); 228 return Json::createFromArray($actualValueAsArray)->toPrettyJsonString(); 229 } 230 231 232 private function mergeWithDefaultValueAndGet($actualValue = null): ?array 233 { 234 $page = $this->getResource(); 235 if (!($page instanceof MarkupPath)) { 236 return $actualValue; 237 } 238 239 $readStore = $this->getReadStore(); 240 $type = PageType::createForPage($page) 241 ->setReadStore(MetadataDokuWikiStore::class) 242 ->getValueOrDefault(); 243 if (!($readStore instanceof MetadataDokuWikiStore)) { 244 /** 245 * Edge case we set the readstore because in a frontmatter, 246 * the type may have been set 247 */ 248 try { 249 $type = PageType::createForPage($page) 250 ->setReadStore($readStore) 251 ->getValue(); 252 } catch (ExceptionNotFound $e) { 253 // ok 254 } 255 } 256 switch (strtolower($type)) { 257 case PageType::WEBSITE_TYPE: 258 259 /** 260 * https://schema.org/WebSite 261 * https://developers.google.com/search/docs/data-types/sitelinks-searchbox 262 */ 263 $ldJson = array( 264 '@context' => 'https://schema.org', 265 '@type' => 'WebSite', 266 'url' => Site::getBaseUrl(), 267 'name' => Site::getTitle() 268 ); 269 270 if ($page->isRootHomePage()) { 271 272 $ldJson['potentialAction'] = array( 273 '@type' => 'SearchAction', 274 'target' => Site::getBaseUrl() . DOKU_SCRIPT . '?do=search&id={search_term_string}', 275 'query-input' => 'required name=search_term_string', 276 ); 277 } 278 279 $tag = Site::getTag(); 280 if (!empty($tag)) { 281 $ldJson['description'] = $tag; 282 } 283 $siteImageUrl = Site::getLogoUrlAsPng(); 284 if (!empty($siteImageUrl)) { 285 $ldJson['image'] = $siteImageUrl; 286 } 287 288 break; 289 290 case PageType::ORGANIZATION_TYPE: 291 292 /** 293 * Organization + Logo 294 * https://developers.google.com/search/docs/data-types/logo 295 */ 296 $ldJson = array( 297 "@context" => "https://schema.org", 298 "@type" => "Organization", 299 "url" => Site::getBaseUrl(), 300 "logo" => Site::getLogoUrlAsPng() 301 ); 302 303 break; 304 305 case PageType::ARTICLE_TYPE: 306 case PageType::NEWS_TYPE: 307 case PageType::BLOG_TYPE: 308 case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE: 309 case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE: 310 case PageType::HOME_TYPE: 311 case PageType::WEB_PAGE_TYPE: 312 313 switch (strtolower($type)) { 314 case PageType::NEWS_TYPE: 315 case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE: 316 $schemaType = "NewsArticle"; 317 break; 318 case PageType::BLOG_TYPE: 319 case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE: 320 $schemaType = "BlogPosting"; 321 break; 322 case PageType::HOME_TYPE: 323 case PageType::WEB_PAGE_TYPE: 324 // https://schema.org/WebPage 325 $schemaType = "WebPage"; 326 break; 327 case PageType::ARTICLE_TYPE: 328 default: 329 $schemaType = "Article"; 330 break; 331 332 } 333 // https://developers.google.com/search/docs/data-types/article 334 // https://schema.org/Article 335 336 // Image (at least 696 pixels wide) 337 // https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats 338 // BMP, GIF, JPEG, PNG, WebP, and SVG. 339 340 // Date should be https://en.wikipedia.org/wiki/ISO_8601 341 342 343 $ldJson = array( 344 "@context" => "https://schema.org", 345 "@type" => $schemaType, 346 'url' => $page->getAbsoluteCanonicalUrl()->toString(), 347 "headline" => $page->getTitleOrDefault(), 348 349 ); 350 351 try { 352 $ldJson[self::DATE_PUBLISHED_KEY] = $page 353 ->getPublishedElseCreationTime() 354 ->format(Iso8601Date::getFormat()); 355 } catch (ExceptionNotFound $e) { 356 // Internal error, the page should exist 357 LogUtility::error("Internal Error: We were unable to define the publication date for the page ($page). Error: {$e->getMessage()}", self::CANONICAL); 358 } 359 360 /** 361 * Modified Time 362 */ 363 try { 364 $modifiedTime = $page->getModifiedTimeOrDefault(); 365 $ldJson[self::DATE_MODIFIED_KEY] = $modifiedTime->format(Iso8601Date::getFormat()); 366 } catch (ExceptionNotFound $e) { 367 // Internal error, the page should exist 368 LogUtility::error("Internal Error: We were unable to define the modification date for the page ($page)", self::CANONICAL); 369 } 370 371 /** 372 * Publisher info 373 */ 374 $publisher = array( 375 "@type" => "Organization", 376 "name" => Site::getName() 377 ); 378 $logoUrlAsPng = Site::getLogoUrlAsPng(); 379 if (!empty($logoUrlAsPng)) { 380 $publisher["logo"] = array( 381 "@type" => "ImageObject", 382 "url" => $logoUrlAsPng 383 ); 384 } 385 $ldJson["publisher"] = $publisher; 386 387 self::addImage($ldJson, $page); 388 break; 389 390 case PageType::EVENT_TYPE: 391 // https://developers.google.com/search/docs/advanced/structured-data/event 392 $ldJson = array( 393 "@context" => "https://schema.org", 394 "@type" => "Event"); 395 try { 396 $eventName = $page->getName(); 397 $ldJson["name"] = $eventName; 398 } catch (ExceptionNotFound $e) { 399 LogUtility::msg("The name metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 400 return null; 401 } 402 403 try { 404 $eventDescription = $page->getDescription(); 405 } catch (ExceptionNotFound $e) { 406 LogUtility::msg("The description metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 407 return null; 408 } 409 410 $ldJson["description"] = $eventDescription; 411 try { 412 $startDate = $page->getStartDate(); 413 } catch (ExceptionNotFound $e) { 414 LogUtility::msg("The date_start metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 415 return null; 416 } 417 $ldJson["startDate"] = $startDate->format(Iso8601Date::getFormat()); 418 419 try { 420 $endDate = $page->getEndDate(); 421 } catch (ExceptionNotFound $e) { 422 LogUtility::msg("The date_end metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 423 return null; 424 } 425 $ldJson["endDate"] = $endDate->format(Iso8601Date::getFormat()); 426 427 428 self::addImage($ldJson, $page); 429 break; 430 431 432 default: 433 434 // May be added manually by the user itself 435 $ldJson = array( 436 '@context' => 'https://schema.org', 437 '@type' => $type, 438 'url' => $page->getAbsoluteCanonicalUrl()->toString() 439 ); 440 break; 441 } 442 443 444 /** 445 * https://developers.google.com/search/docs/data-types/speakable 446 */ 447 $speakableXpath = array(); 448 $speakableXpath[] = "/html/head/title"; 449 try { 450 PageDescription::createForPage($page) 451 ->getValue(); 452 /** 453 * Only the description written otherwise this is not speakable 454 * you can have link and other strangeness 455 */ 456 $speakableXpath[] = "/html/head/meta[@name='description']/@content"; 457 } catch (ExceptionNotFound $e) { 458 // ok, no description 459 } 460 $ldJson[self::SPEAKABLE] = array( 461 "@type" => "SpeakableSpecification", 462 "xpath" => $speakableXpath 463 ); 464 465 /** 466 * merge with the extra 467 */ 468 if ($actualValue !== null) { 469 return array_merge($ldJson, $actualValue); 470 } 471 return $ldJson; 472 } 473 474 475 static public function isOnForm(): bool 476 { 477 return true; 478 } 479 480 481} 482