1<?php 2 3 4namespace ComboStrap; 5 6 7use action_plugin_combo_metagoogle; 8use ComboStrap\Meta\Api\Metadata; 9use ComboStrap\Meta\Api\MetadataJson; 10use ComboStrap\Meta\Store\MetadataDokuWikiStore; 11use ComboStrap\Web\Url; 12use ComboStrap\Web\UrlEndpoint; 13 14/** 15 * 16 * 17 * To test locally use ngrok 18 * https://developers.google.com/search/docs/guides/debug#testing-firewalled-pages 19 * 20 * Tool: 21 * https://support.google.com/webmasters/answer/2774099# - Data Highlighter 22 * to tag page manually (you see well what kind of information they need) 23 * 24 * Ref: 25 * https://developers.google.com/search/docs/guides/intro-structured-data 26 * https://github.com/giterlizzi/dokuwiki-plugin-semantic/blob/master/helper.php 27 * https://json-ld.org/ 28 * https://schema.org/docs/documents.html 29 * https://search.google.com/structured-data/testing-tool/u/0/#url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FPacu_jawi 30 */ 31class LdJson extends MetadataJson 32{ 33 34 public const PROPERTY_NAME = "json-ld"; 35 36 public const SPEAKABLE = "speakable"; 37 public const NEWSARTICLE_SCHEMA_ORG_LOWERCASE = "newsarticle"; 38 public const BLOGPOSTING_SCHEMA_ORG_LOWERCASE = "blogposting"; 39 /** 40 * @deprecated 41 * This attribute was used to hold json-ld organization 42 * data 43 */ 44 public const OLD_ORGANIZATION_PROPERTY = "organization"; 45 public const DATE_PUBLISHED_KEY = "datePublished"; 46 public const DATE_MODIFIED_KEY = "dateModified"; 47 48 public const CANONICAL = action_plugin_combo_metagoogle::CANONICAL; 49 50 public static function createForPage(MarkupPath $page): LdJson 51 { 52 return (new LdJson()) 53 ->setResource($page); 54 } 55 56 /** 57 * @param array $ldJson 58 * @param MarkupPath $page 59 */ 60 public static function addImage(array &$ldJson, MarkupPath $page) 61 { 62 /** 63 * Image must belong to the page 64 * https://developers.google.com/search/docs/guides/sd-policies#images 65 * 66 * Image may have IPTC metadata: not yet implemented 67 * https://developers.google.com/search/docs/advanced/appearance/image-rights-metadata 68 * 69 * Image must have the supported format 70 * https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats 71 * BMP, GIF, JPEG, PNG, WebP, and SVG 72 */ 73 $supportedMime = [ 74 Mime::BMP, 75 Mime::GIF, 76 Mime::JPEG, 77 Mime::PNG, 78 Mime::WEBP, 79 Mime::SVG, 80 ]; 81 $imagesSet = $page->getImagesForTheFollowingUsages([PageImageUsage::ALL, PageImageUsage::SOCIAL, PageImageUsage::GOOGLE]); 82 $schemaImages = array(); 83 foreach ($imagesSet as $pageImage) { 84 85 try { 86 $pageImagePath = $pageImage->getSourcePath()->toWikiPath(); 87 } catch (ExceptionCast $e) { 88 LogUtility::internalError("The page image should come from a wiki path", self::CANONICAL, $e); 89 continue; 90 } 91 try { 92 $mime = $pageImagePath->getMime()->toString(); 93 } catch (ExceptionNotFound $e) { 94 // should not happen 95 LogUtility::internalError("The page image mime could not be determined. Error:" . $e->getMessage(), self::CANONICAL, $e); 96 $mime = "unknown"; 97 } 98 if (in_array($mime, $supportedMime)) { 99 if (FileSystems::exists($pageImagePath)) { 100 try { 101 $fetcherPageImage = IFetcherLocalImage::createImageFetchFromPath($pageImagePath); 102 } catch (ExceptionBadArgument|ExceptionBadSyntax|ExceptionNotExists $e) { 103 LogUtility::error("The image ($pageImagePath) could not be added as page image. Error: {$e->getMessage()}"); 104 continue; 105 } 106 $imageObjectSchema = array( 107 "@type" => "ImageObject", 108 "url" => $fetcherPageImage->getFetchUrl()->toAbsoluteUrlString() 109 ); 110 if (!empty($fetcherPageImage->getIntrinsicWidth())) { 111 $imageObjectSchema["width"] = $fetcherPageImage->getIntrinsicWidth(); 112 } 113 if (!empty($fetcherPageImage->getIntrinsicHeight())) { 114 $imageObjectSchema["height"] = $fetcherPageImage->getIntrinsicHeight(); 115 } 116 $schemaImages[] = $imageObjectSchema; 117 } else { 118 LogUtility::msg("The image ($pageImagePath) does not exist and was not added to the google ld-json", LogUtility::LVL_MSG_ERROR, action_plugin_combo_metagoogle::CANONICAL); 119 } 120 } 121 } 122 123 if (!empty($schemaImages)) { 124 $ldJson["image"] = $schemaImages; 125 } 126 } 127 128 public static function getName(): string 129 { 130 return self::PROPERTY_NAME; 131 } 132 133 static public function getPersistenceType(): string 134 { 135 return MetadataDokuWikiStore::PERSISTENT_DOKUWIKI_KEY; 136 } 137 138 static public function getCanonical(): string 139 { 140 return action_plugin_combo_metagoogle::CANONICAL; 141 } 142 143 144 static public function getDescription(): string 145 { 146 return "Advanced Page metadata definition with the json-ld format"; 147 } 148 149 static public function getLabel(): string 150 { 151 return "Json-ld"; 152 } 153 154 static public function getTab(): string 155 { 156 return MetaManagerForm::TAB_TYPE_VALUE; 157 } 158 159 160 static public function isMutable(): bool 161 { 162 return true; 163 } 164 165 public function getDefaultValue(): ?string 166 { 167 168 $ldJson = $this->mergeWithDefaultValueAndGet(); 169 if ($ldJson === null) { 170 return null; 171 } 172 173 /** 174 * Return 175 */ 176 return Json::createFromArray($ldJson)->toPrettyJsonString(); 177 178 } 179 180 public function setFromStoreValueWithoutException($value): Metadata 181 { 182 183 if ($value === null) { 184 $resourceCombo = $this->getResource(); 185 if (($resourceCombo instanceof MarkupPath)) { 186 /** 187 * Deprecated, old organization syntax 188 * We could add this predicate 189 * 190 * but we don't want to lose any data 191 * (ie if the page was set to no be an organization table, 192 * the frontmatter would not take it) 193 */ 194 $store = $this->getReadStore(); 195 $metadata = $store->getFromName(self::OLD_ORGANIZATION_PROPERTY); 196 if ($metadata !== null) { 197 $organization = array( 198 "organization" => $metadata 199 ); 200 $ldJsonOrganization = $this->mergeWithDefaultValueAndGet($organization); 201 $value = Json::createFromArray($ldJsonOrganization)->toPrettyJsonString(); 202 } 203 } 204 } 205 parent::setFromStoreValueWithoutException($value); 206 return $this; 207 208 209 } 210 211 /** 212 * The ldJson value 213 * @return false|string|null 214 */ 215 public function getLdJsonMergedWithDefault() 216 { 217 218 try { 219 $value = $this->getValue(); 220 try { 221 $actualValueAsArray = Json::createFromString($value)->toArray(); 222 } catch (ExceptionCompile $e) { 223 LogUtility::error("The string value is not a valid Json. Value: $value", self::CANONICAL); 224 return $value; 225 } 226 } catch (ExceptionNotFound $e) { 227 $actualValueAsArray = []; 228 } 229 $actualValueAsArray = $this->mergeWithDefaultValueAndGet($actualValueAsArray); 230 return Json::createFromArray($actualValueAsArray)->toPrettyJsonString(); 231 } 232 233 234 private function mergeWithDefaultValueAndGet($actualValue = null): ?array 235 { 236 $page = $this->getResource(); 237 if (!($page instanceof MarkupPath)) { 238 return $actualValue; 239 } 240 241 $readStore = $this->getReadStore(); 242 $type = PageType::createForPage($page) 243 ->setReadStore(MetadataDokuWikiStore::class) 244 ->getValueOrDefault(); 245 if (!($readStore instanceof MetadataDokuWikiStore)) { 246 /** 247 * Edge case we set the readstore because in a frontmatter, 248 * the type may have been set 249 */ 250 try { 251 $type = PageType::createForPage($page) 252 ->setReadStore($readStore) 253 ->getValue(); 254 } catch (ExceptionNotFound $e) { 255 // ok 256 } 257 } 258 switch (strtolower($type)) { 259 case PageType::WEBSITE_TYPE: 260 261 /** 262 * https://schema.org/WebSite 263 * https://developers.google.com/search/docs/data-types/sitelinks-searchbox 264 */ 265 $ldJson = array( 266 '@context' => 'https://schema.org', 267 '@type' => 'WebSite', 268 'url' => Site::getBaseUrl(), 269 'name' => Site::getTitle() 270 ); 271 272 if ($page->isRootHomePage()) { 273 274 $target = UrlEndpoint::createDokuUrl() 275 ->addQueryParameter("do", ExecutionContext::SEARCH_ACTION) 276 ->toAbsoluteUrl() 277 ->toHtmlString() 278 . Url::AMPERSAND_URL_ENCODED_FOR_HTML . 'id={search_term_string}'; 279 $ldJson['potentialAction'] = array( 280 '@type' => 'SearchAction', 281 'target' => $target, 282 'query-input' => 'required name=search_term_string', 283 ); 284 } 285 286 $tag = Site::getTag(); 287 if (!empty($tag)) { 288 $ldJson['description'] = $tag; 289 } 290 $siteImageUrl = Site::getLogoUrlAsPng(); 291 if (!empty($siteImageUrl)) { 292 $ldJson['image'] = $siteImageUrl; 293 } 294 295 break; 296 297 case PageType::ORGANIZATION_TYPE: 298 299 /** 300 * Organization + Logo 301 * https://developers.google.com/search/docs/data-types/logo 302 */ 303 $ldJson = array( 304 "@context" => "https://schema.org", 305 "@type" => "Organization", 306 "url" => Site::getBaseUrl(), 307 "logo" => Site::getLogoUrlAsPng() 308 ); 309 310 break; 311 312 case PageType::ARTICLE_TYPE: 313 case PageType::NEWS_TYPE: 314 case PageType::BLOG_TYPE: 315 case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE: 316 case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE: 317 case PageType::HOME_TYPE: 318 case PageType::WEB_PAGE_TYPE: 319 320 switch (strtolower($type)) { 321 case PageType::NEWS_TYPE: 322 case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE: 323 $schemaType = "NewsArticle"; 324 break; 325 case PageType::BLOG_TYPE: 326 case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE: 327 $schemaType = "BlogPosting"; 328 break; 329 case PageType::HOME_TYPE: 330 case PageType::WEB_PAGE_TYPE: 331 // https://schema.org/WebPage 332 $schemaType = "WebPage"; 333 break; 334 case PageType::ARTICLE_TYPE: 335 default: 336 $schemaType = "Article"; 337 break; 338 339 } 340 // https://developers.google.com/search/docs/data-types/article 341 // https://schema.org/Article 342 343 // Image (at least 696 pixels wide) 344 // https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats 345 // BMP, GIF, JPEG, PNG, WebP, and SVG. 346 347 // Date should be https://en.wikipedia.org/wiki/ISO_8601 348 349 350 $ldJson = array( 351 "@context" => "https://schema.org", 352 "@type" => $schemaType, 353 'url' => $page->getAbsoluteCanonicalUrl()->toString(), 354 "headline" => $page->getTitleOrDefault(), 355 356 ); 357 358 try { 359 $ldJson[self::DATE_PUBLISHED_KEY] = $page 360 ->getPublishedElseCreationTime() 361 ->format(Iso8601Date::getFormat()); 362 } catch (ExceptionNotFound $e) { 363 // Internal error, the page should exist 364 LogUtility::error("Internal Error: We were unable to define the publication date for the page ($page). Error: {$e->getMessage()}", self::CANONICAL); 365 } 366 367 /** 368 * Modified Time 369 */ 370 try { 371 $modifiedTime = $page->getModifiedTimeOrDefault(); 372 $ldJson[self::DATE_MODIFIED_KEY] = $modifiedTime->format(Iso8601Date::getFormat()); 373 } catch (ExceptionNotFound $e) { 374 // Internal error, the page should exist 375 LogUtility::error("Internal Error: We were unable to define the modification date for the page ($page)", self::CANONICAL); 376 } 377 378 /** 379 * Publisher info 380 */ 381 $publisher = array( 382 "@type" => "Organization", 383 "name" => Site::getName() 384 ); 385 $logoUrlAsPng = Site::getLogoUrlAsPng(); 386 if (!empty($logoUrlAsPng)) { 387 $publisher["logo"] = array( 388 "@type" => "ImageObject", 389 "url" => $logoUrlAsPng 390 ); 391 } 392 $ldJson["publisher"] = $publisher; 393 394 self::addImage($ldJson, $page); 395 break; 396 397 case PageType::EVENT_TYPE: 398 // https://developers.google.com/search/docs/advanced/structured-data/event 399 $ldJson = array( 400 "@context" => "https://schema.org", 401 "@type" => "Event"); 402 try { 403 $eventName = $page->getName(); 404 $ldJson["name"] = $eventName; 405 } catch (ExceptionNotFound $e) { 406 LogUtility::msg("The name metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 407 return null; 408 } 409 410 try { 411 $eventDescription = $page->getDescription(); 412 } catch (ExceptionNotFound $e) { 413 LogUtility::msg("The description metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 414 return null; 415 } 416 417 $ldJson["description"] = $eventDescription; 418 try { 419 $startDate = $page->getStartDate(); 420 } catch (ExceptionNotFound $e) { 421 LogUtility::msg("The date_start metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 422 return null; 423 } 424 $ldJson["startDate"] = $startDate->format(Iso8601Date::getFormat()); 425 426 try { 427 $endDate = $page->getEndDate(); 428 } catch (ExceptionNotFound $e) { 429 LogUtility::msg("The date_end metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 430 return null; 431 } 432 $ldJson["endDate"] = $endDate->format(Iso8601Date::getFormat()); 433 434 435 self::addImage($ldJson, $page); 436 break; 437 438 439 default: 440 441 // May be added manually by the user itself 442 $ldJson = array( 443 '@context' => 'https://schema.org', 444 '@type' => $type, 445 'url' => $page->getAbsoluteCanonicalUrl()->toString() 446 ); 447 break; 448 } 449 450 451 /** 452 * https://developers.google.com/search/docs/data-types/speakable 453 */ 454 $speakableXpath = array(); 455 $speakableXpath[] = "/html/head/title"; 456 try { 457 PageDescription::createForPage($page) 458 ->getValue(); 459 /** 460 * Only the description written otherwise this is not speakable 461 * you can have link and other strangeness 462 */ 463 $speakableXpath[] = "/html/head/meta[@name='description']/@content"; 464 } catch (ExceptionNotFound $e) { 465 // ok, no description 466 } 467 $ldJson[self::SPEAKABLE] = array( 468 "@type" => "SpeakableSpecification", 469 "xpath" => $speakableXpath 470 ); 471 472 /** 473 * merge with the extra 474 */ 475 if ($actualValue !== null) { 476 return array_merge($ldJson, $actualValue); 477 } 478 return $ldJson; 479 } 480 481 482 static public function isOnForm(): bool 483 { 484 return true; 485 } 486 487 488} 489