1*c3437056SNickeau<?php 2*c3437056SNickeau 3*c3437056SNickeau 4*c3437056SNickeaunamespace ComboStrap; 5*c3437056SNickeau 6*c3437056SNickeau 7*c3437056SNickeauuse action_plugin_combo_metagoogle; 8*c3437056SNickeau 9*c3437056SNickeau/** 10*c3437056SNickeau * 11*c3437056SNickeau * 12*c3437056SNickeau * To test locally use ngrok 13*c3437056SNickeau * https://developers.google.com/search/docs/guides/debug#testing-firewalled-pages 14*c3437056SNickeau * 15*c3437056SNickeau * Tool: 16*c3437056SNickeau * https://support.google.com/webmasters/answer/2774099# - Data Highlighter 17*c3437056SNickeau * to tag page manually (you see well what kind of information they need) 18*c3437056SNickeau * 19*c3437056SNickeau * Ref: 20*c3437056SNickeau * https://developers.google.com/search/docs/guides/intro-structured-data 21*c3437056SNickeau * https://github.com/giterlizzi/dokuwiki-plugin-semantic/blob/master/helper.php 22*c3437056SNickeau * https://json-ld.org/ 23*c3437056SNickeau * https://schema.org/docs/documents.html 24*c3437056SNickeau * https://search.google.com/structured-data/testing-tool/u/0/#url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FPacu_jawi 25*c3437056SNickeau */ 26*c3437056SNickeauclass LdJson extends MetadataJson 27*c3437056SNickeau{ 28*c3437056SNickeau 29*c3437056SNickeau public const PROPERTY_NAME = "json-ld"; 30*c3437056SNickeau 31*c3437056SNickeau public const SPEAKABLE = "speakable"; 32*c3437056SNickeau public const NEWSARTICLE_SCHEMA_ORG_LOWERCASE = "newsarticle"; 33*c3437056SNickeau public const BLOGPOSTING_SCHEMA_ORG_LOWERCASE = "blogposting"; 34*c3437056SNickeau /** 35*c3437056SNickeau * @deprecated 36*c3437056SNickeau * This attribute was used to hold json-ld organization 37*c3437056SNickeau * data 38*c3437056SNickeau */ 39*c3437056SNickeau public const OLD_ORGANIZATION_PROPERTY = "organization"; 40*c3437056SNickeau public const DATE_PUBLISHED_KEY = "datePublished"; 41*c3437056SNickeau public const DATE_MODIFIED_KEY = "dateModified"; 42*c3437056SNickeau 43*c3437056SNickeau public static function createForPage(Page $page): LdJson 44*c3437056SNickeau { 45*c3437056SNickeau return (new LdJson()) 46*c3437056SNickeau ->setResource($page); 47*c3437056SNickeau } 48*c3437056SNickeau 49*c3437056SNickeau /** 50*c3437056SNickeau * @param array $ldJson 51*c3437056SNickeau * @param Page $page 52*c3437056SNickeau */ 53*c3437056SNickeau public static function addImage(array &$ldJson, Page $page) 54*c3437056SNickeau { 55*c3437056SNickeau /** 56*c3437056SNickeau * Image must belong to the page 57*c3437056SNickeau * https://developers.google.com/search/docs/guides/sd-policies#images 58*c3437056SNickeau * 59*c3437056SNickeau * Image may have IPTC metadata: not yet implemented 60*c3437056SNickeau * https://developers.google.com/search/docs/advanced/appearance/image-rights-metadata 61*c3437056SNickeau * 62*c3437056SNickeau * Image must have the supported format 63*c3437056SNickeau * https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats 64*c3437056SNickeau * BMP, GIF, JPEG, PNG, WebP, and SVG 65*c3437056SNickeau */ 66*c3437056SNickeau $supportedMime = [ 67*c3437056SNickeau Mime::BMP, 68*c3437056SNickeau Mime::GIF, 69*c3437056SNickeau Mime::JPEG, 70*c3437056SNickeau Mime::PNG, 71*c3437056SNickeau Mime::WEBP, 72*c3437056SNickeau Mime::SVG, 73*c3437056SNickeau ]; 74*c3437056SNickeau $imagesSet = $page->getImagesOrDefaultForTheFollowingUsages([PageImageUsage::ALL, PageImageUsage::SOCIAL, PageImageUsage::GOOGLE]); 75*c3437056SNickeau $schemaImages = array(); 76*c3437056SNickeau foreach ($imagesSet as $image) { 77*c3437056SNickeau 78*c3437056SNickeau $mime = $image->getPath()->getMime()->toString(); 79*c3437056SNickeau if (in_array($mime, $supportedMime)) { 80*c3437056SNickeau if ($image->exists()) { 81*c3437056SNickeau $imageObjectSchema = array( 82*c3437056SNickeau "@type" => "ImageObject", 83*c3437056SNickeau "url" => $image->getAbsoluteUrl() 84*c3437056SNickeau ); 85*c3437056SNickeau if (!empty($image->getIntrinsicWidth())) { 86*c3437056SNickeau $imageObjectSchema["width"] = $image->getIntrinsicWidth(); 87*c3437056SNickeau } 88*c3437056SNickeau if (!empty($image->getIntrinsicHeight())) { 89*c3437056SNickeau $imageObjectSchema["height"] = $image->getIntrinsicHeight(); 90*c3437056SNickeau } 91*c3437056SNickeau $schemaImages[] = $imageObjectSchema; 92*c3437056SNickeau } else { 93*c3437056SNickeau LogUtility::msg("The image ($image) does not exist and was not added to the google ld-json", LogUtility::LVL_MSG_ERROR, action_plugin_combo_metagoogle::CANONICAL); 94*c3437056SNickeau } 95*c3437056SNickeau } 96*c3437056SNickeau } 97*c3437056SNickeau 98*c3437056SNickeau if (!empty($schemaImages)) { 99*c3437056SNickeau $ldJson["image"] = $schemaImages; 100*c3437056SNickeau } 101*c3437056SNickeau } 102*c3437056SNickeau 103*c3437056SNickeau public static function getName(): string 104*c3437056SNickeau { 105*c3437056SNickeau return self::PROPERTY_NAME; 106*c3437056SNickeau } 107*c3437056SNickeau 108*c3437056SNickeau public function getPersistenceType(): string 109*c3437056SNickeau { 110*c3437056SNickeau return MetadataDokuWikiStore::PERSISTENT_METADATA; 111*c3437056SNickeau } 112*c3437056SNickeau 113*c3437056SNickeau public function getCanonical(): string 114*c3437056SNickeau { 115*c3437056SNickeau return action_plugin_combo_metagoogle::CANONICAL; 116*c3437056SNickeau } 117*c3437056SNickeau 118*c3437056SNickeau 119*c3437056SNickeau public function getDescription(): string 120*c3437056SNickeau { 121*c3437056SNickeau return "Advanced Page metadata definition with the json-ld format"; 122*c3437056SNickeau } 123*c3437056SNickeau 124*c3437056SNickeau public function getLabel(): string 125*c3437056SNickeau { 126*c3437056SNickeau return "Json-ld"; 127*c3437056SNickeau } 128*c3437056SNickeau 129*c3437056SNickeau public function getTab(): string 130*c3437056SNickeau { 131*c3437056SNickeau return MetaManagerForm::TAB_TYPE_VALUE; 132*c3437056SNickeau } 133*c3437056SNickeau 134*c3437056SNickeau 135*c3437056SNickeau public function getMutable(): bool 136*c3437056SNickeau { 137*c3437056SNickeau return true; 138*c3437056SNickeau } 139*c3437056SNickeau 140*c3437056SNickeau public function getDefaultValue(): ?string 141*c3437056SNickeau { 142*c3437056SNickeau 143*c3437056SNickeau $ldJson = $this->mergeWithDefaultValueAndGet(); 144*c3437056SNickeau if ($ldJson === null) { 145*c3437056SNickeau return null; 146*c3437056SNickeau } 147*c3437056SNickeau 148*c3437056SNickeau /** 149*c3437056SNickeau * Return 150*c3437056SNickeau */ 151*c3437056SNickeau return Json::createFromArray($ldJson)->toPrettyJsonString(); 152*c3437056SNickeau 153*c3437056SNickeau } 154*c3437056SNickeau 155*c3437056SNickeau public function buildFromStoreValue($value): Metadata 156*c3437056SNickeau { 157*c3437056SNickeau 158*c3437056SNickeau if ($value === null) { 159*c3437056SNickeau $resourceCombo = $this->getResource(); 160*c3437056SNickeau if (($resourceCombo instanceof Page)) { 161*c3437056SNickeau // Deprecated, old organization syntax 162*c3437056SNickeau if ($resourceCombo->getTypeOrDefault() === PageType::ORGANIZATION_TYPE) { 163*c3437056SNickeau $store = $this->getReadStore(); 164*c3437056SNickeau $metadata = $store->getFromPersistentName( self::OLD_ORGANIZATION_PROPERTY); 165*c3437056SNickeau if ($metadata !== null) { 166*c3437056SNickeau $organization = array( 167*c3437056SNickeau "organization" => $metadata 168*c3437056SNickeau ); 169*c3437056SNickeau $ldJsonOrganization = $this->mergeWithDefaultValueAndGet($organization); 170*c3437056SNickeau $value = Json::createFromArray($ldJsonOrganization)->toPrettyJsonString(); 171*c3437056SNickeau } 172*c3437056SNickeau 173*c3437056SNickeau } 174*c3437056SNickeau } 175*c3437056SNickeau } 176*c3437056SNickeau parent::buildFromStoreValue($value); 177*c3437056SNickeau return $this; 178*c3437056SNickeau 179*c3437056SNickeau 180*c3437056SNickeau } 181*c3437056SNickeau 182*c3437056SNickeau /** 183*c3437056SNickeau * The ldJson value 184*c3437056SNickeau * @return false|string|null 185*c3437056SNickeau */ 186*c3437056SNickeau public function getLdJsonMergedWithDefault() 187*c3437056SNickeau { 188*c3437056SNickeau 189*c3437056SNickeau $value = $this->getValue(); 190*c3437056SNickeau $actualValueAsArray = null; 191*c3437056SNickeau if ($value !== null) { 192*c3437056SNickeau try { 193*c3437056SNickeau $actualValueAsArray = Json::createFromString($value)->toArray(); 194*c3437056SNickeau } catch (ExceptionCombo $e) { 195*c3437056SNickeau LogUtility::msg("The string value is not a valid Json. Value: $value"); 196*c3437056SNickeau return $value; 197*c3437056SNickeau } 198*c3437056SNickeau } 199*c3437056SNickeau $actualValueAsArray = $this->mergeWithDefaultValueAndGet($actualValueAsArray); 200*c3437056SNickeau return Json::createFromArray($actualValueAsArray)->toPrettyJsonString(); 201*c3437056SNickeau } 202*c3437056SNickeau 203*c3437056SNickeau 204*c3437056SNickeau private function mergeWithDefaultValueAndGet($actualValue = null): ?array 205*c3437056SNickeau { 206*c3437056SNickeau $page = $this->getResource(); 207*c3437056SNickeau if (!($page instanceof Page)) { 208*c3437056SNickeau return $actualValue; 209*c3437056SNickeau } 210*c3437056SNickeau 211*c3437056SNickeau $type = $page->getTypeOrDefault(); 212*c3437056SNickeau switch (strtolower($type)) { 213*c3437056SNickeau case PageType::WEBSITE_TYPE: 214*c3437056SNickeau 215*c3437056SNickeau /** 216*c3437056SNickeau * https://schema.org/WebSite 217*c3437056SNickeau * https://developers.google.com/search/docs/data-types/sitelinks-searchbox 218*c3437056SNickeau */ 219*c3437056SNickeau $ldJson = array( 220*c3437056SNickeau '@context' => 'https://schema.org', 221*c3437056SNickeau '@type' => 'WebSite', 222*c3437056SNickeau 'url' => Site::getBaseUrl(), 223*c3437056SNickeau 'name' => Site::getTitle() 224*c3437056SNickeau ); 225*c3437056SNickeau 226*c3437056SNickeau if ($page->isRootHomePage()) { 227*c3437056SNickeau 228*c3437056SNickeau $ldJson['potentialAction'] = array( 229*c3437056SNickeau '@type' => 'SearchAction', 230*c3437056SNickeau 'target' => Site::getBaseUrl() . DOKU_SCRIPT . '?do=search&id={search_term_string}', 231*c3437056SNickeau 'query-input' => 'required name=search_term_string', 232*c3437056SNickeau ); 233*c3437056SNickeau } 234*c3437056SNickeau 235*c3437056SNickeau $tag = Site::getTag(); 236*c3437056SNickeau if (!empty($tag)) { 237*c3437056SNickeau $ldJson['description'] = $tag; 238*c3437056SNickeau } 239*c3437056SNickeau $siteImageUrl = Site::getLogoUrlAsPng(); 240*c3437056SNickeau if (!empty($siteImageUrl)) { 241*c3437056SNickeau $ldJson['image'] = $siteImageUrl; 242*c3437056SNickeau } 243*c3437056SNickeau 244*c3437056SNickeau break; 245*c3437056SNickeau 246*c3437056SNickeau case PageType::ORGANIZATION_TYPE: 247*c3437056SNickeau 248*c3437056SNickeau /** 249*c3437056SNickeau * Organization + Logo 250*c3437056SNickeau * https://developers.google.com/search/docs/data-types/logo 251*c3437056SNickeau */ 252*c3437056SNickeau $ldJson = array( 253*c3437056SNickeau "@context" => "https://schema.org", 254*c3437056SNickeau "@type" => "Organization", 255*c3437056SNickeau "url" => Site::getBaseUrl(), 256*c3437056SNickeau "logo" => Site::getLogoUrlAsPng() 257*c3437056SNickeau ); 258*c3437056SNickeau 259*c3437056SNickeau break; 260*c3437056SNickeau 261*c3437056SNickeau case PageType::ARTICLE_TYPE: 262*c3437056SNickeau case PageType::NEWS_TYPE: 263*c3437056SNickeau case PageType::BLOG_TYPE: 264*c3437056SNickeau case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE: 265*c3437056SNickeau case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE: 266*c3437056SNickeau case PageType::HOME_TYPE: 267*c3437056SNickeau case PageType::WEB_PAGE_TYPE: 268*c3437056SNickeau 269*c3437056SNickeau switch (strtolower($type)) { 270*c3437056SNickeau case PageType::NEWS_TYPE: 271*c3437056SNickeau case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE: 272*c3437056SNickeau $schemaType = "NewsArticle"; 273*c3437056SNickeau break; 274*c3437056SNickeau case PageType::BLOG_TYPE: 275*c3437056SNickeau case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE: 276*c3437056SNickeau $schemaType = "BlogPosting"; 277*c3437056SNickeau break; 278*c3437056SNickeau case PageType::HOME_TYPE: 279*c3437056SNickeau case PageType::WEB_PAGE_TYPE: 280*c3437056SNickeau // https://schema.org/WebPage 281*c3437056SNickeau $schemaType = "WebPage"; 282*c3437056SNickeau break; 283*c3437056SNickeau case PageType::ARTICLE_TYPE: 284*c3437056SNickeau default: 285*c3437056SNickeau $schemaType = "Article"; 286*c3437056SNickeau break; 287*c3437056SNickeau 288*c3437056SNickeau } 289*c3437056SNickeau // https://developers.google.com/search/docs/data-types/article 290*c3437056SNickeau // https://schema.org/Article 291*c3437056SNickeau 292*c3437056SNickeau // Image (at least 696 pixels wide) 293*c3437056SNickeau // https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats 294*c3437056SNickeau // BMP, GIF, JPEG, PNG, WebP, and SVG. 295*c3437056SNickeau 296*c3437056SNickeau // Date should be https://en.wikipedia.org/wiki/ISO_8601 297*c3437056SNickeau 298*c3437056SNickeau 299*c3437056SNickeau $ldJson = array( 300*c3437056SNickeau "@context" => "https://schema.org", 301*c3437056SNickeau "@type" => $schemaType, 302*c3437056SNickeau 'url' => $page->getAbsoluteCanonicalUrl(), 303*c3437056SNickeau "headline" => $page->getTitleOrDefault(), 304*c3437056SNickeau self::DATE_PUBLISHED_KEY => $page->getPublishedElseCreationTime()->format(Iso8601Date::getFormat()) 305*c3437056SNickeau ); 306*c3437056SNickeau 307*c3437056SNickeau /** 308*c3437056SNickeau * Modified Time 309*c3437056SNickeau */ 310*c3437056SNickeau $modifiedTime = $page->getModifiedTimeOrDefault(); 311*c3437056SNickeau if ($modifiedTime != null) { 312*c3437056SNickeau $ldJson[self::DATE_MODIFIED_KEY] = $modifiedTime->format(Iso8601Date::getFormat()); 313*c3437056SNickeau }; 314*c3437056SNickeau 315*c3437056SNickeau /** 316*c3437056SNickeau * Publisher info 317*c3437056SNickeau */ 318*c3437056SNickeau $publisher = array( 319*c3437056SNickeau "@type" => "Organization", 320*c3437056SNickeau "name" => Site::getTitle() 321*c3437056SNickeau ); 322*c3437056SNickeau $logoUrlAsPng = Site::getLogoUrlAsPng(); 323*c3437056SNickeau if (!empty($logoUrlAsPng)) { 324*c3437056SNickeau $publisher["logo"] = array( 325*c3437056SNickeau "@type" => "ImageObject", 326*c3437056SNickeau "url" => $logoUrlAsPng 327*c3437056SNickeau ); 328*c3437056SNickeau } 329*c3437056SNickeau $ldJson["publisher"] = $publisher; 330*c3437056SNickeau 331*c3437056SNickeau self::addImage($ldJson, $page); 332*c3437056SNickeau break; 333*c3437056SNickeau 334*c3437056SNickeau case PageType::EVENT_TYPE: 335*c3437056SNickeau // https://developers.google.com/search/docs/advanced/structured-data/event 336*c3437056SNickeau $ldJson = array( 337*c3437056SNickeau "@context" => "https://schema.org", 338*c3437056SNickeau "@type" => "Event"); 339*c3437056SNickeau $eventName = $page->getName(); 340*c3437056SNickeau if (!blank($eventName)) { 341*c3437056SNickeau $ldJson["name"] = $eventName; 342*c3437056SNickeau } else { 343*c3437056SNickeau LogUtility::msg("The name metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 344*c3437056SNickeau return null; 345*c3437056SNickeau } 346*c3437056SNickeau $eventDescription = $page->getDescription(); 347*c3437056SNickeau if (blank($eventDescription)) { 348*c3437056SNickeau LogUtility::msg("The description metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 349*c3437056SNickeau return null; 350*c3437056SNickeau } 351*c3437056SNickeau $ldJson["description"] = $eventDescription; 352*c3437056SNickeau $startDate = $page->getStartDateAsString(); 353*c3437056SNickeau if ($startDate === null) { 354*c3437056SNickeau LogUtility::msg("The date_start metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 355*c3437056SNickeau return null; 356*c3437056SNickeau } 357*c3437056SNickeau $ldJson["startDate"] = $page->getStartDateAsString(); 358*c3437056SNickeau 359*c3437056SNickeau $endDate = $page->getEndDateAsString(); 360*c3437056SNickeau if ($endDate === null) { 361*c3437056SNickeau LogUtility::msg("The date_end metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL); 362*c3437056SNickeau return null; 363*c3437056SNickeau } 364*c3437056SNickeau $ldJson["endDate"] = $page->getEndDateAsString(); 365*c3437056SNickeau 366*c3437056SNickeau 367*c3437056SNickeau self::addImage($ldJson, $page); 368*c3437056SNickeau break; 369*c3437056SNickeau 370*c3437056SNickeau 371*c3437056SNickeau default: 372*c3437056SNickeau 373*c3437056SNickeau // May be added manually by the user itself 374*c3437056SNickeau $ldJson = array( 375*c3437056SNickeau '@context' => 'https://schema.org', 376*c3437056SNickeau '@type' => $type, 377*c3437056SNickeau 'url' => $page->getAbsoluteCanonicalUrl() 378*c3437056SNickeau ); 379*c3437056SNickeau break; 380*c3437056SNickeau } 381*c3437056SNickeau 382*c3437056SNickeau 383*c3437056SNickeau /** 384*c3437056SNickeau * https://developers.google.com/search/docs/data-types/speakable 385*c3437056SNickeau */ 386*c3437056SNickeau $speakableXpath = array(); 387*c3437056SNickeau if (!empty($page->getTitleOrDefault())) { 388*c3437056SNickeau $speakableXpath[] = "/html/head/title"; 389*c3437056SNickeau } 390*c3437056SNickeau if (!empty($page->getDescription())) { 391*c3437056SNickeau /** 392*c3437056SNickeau * Only the description written otherwise this is not speakable 393*c3437056SNickeau * you can have link and other strangeness 394*c3437056SNickeau */ 395*c3437056SNickeau $speakableXpath[] = "/html/head/meta[@name='description']/@content"; 396*c3437056SNickeau } 397*c3437056SNickeau $ldJson[self::SPEAKABLE] = array( 398*c3437056SNickeau "@type" => "SpeakableSpecification", 399*c3437056SNickeau "xpath" => $speakableXpath 400*c3437056SNickeau ); 401*c3437056SNickeau 402*c3437056SNickeau /** 403*c3437056SNickeau * merge with the extra 404*c3437056SNickeau */ 405*c3437056SNickeau if ($actualValue !== null) { 406*c3437056SNickeau return array_merge($ldJson, $actualValue); 407*c3437056SNickeau } 408*c3437056SNickeau return $ldJson; 409*c3437056SNickeau } 410*c3437056SNickeau 411*c3437056SNickeau 412*c3437056SNickeau 413*c3437056SNickeau} 414