xref: /template/strap/ComboStrap/LdJson.php (revision c3437056399326d621a01da73b649707fbb0ae69)
1*c3437056SNickeau<?php
2*c3437056SNickeau
3*c3437056SNickeau
4*c3437056SNickeaunamespace ComboStrap;
5*c3437056SNickeau
6*c3437056SNickeau
7*c3437056SNickeauuse action_plugin_combo_metagoogle;
8*c3437056SNickeau
9*c3437056SNickeau/**
10*c3437056SNickeau *
11*c3437056SNickeau *
12*c3437056SNickeau * To test locally use ngrok
13*c3437056SNickeau * https://developers.google.com/search/docs/guides/debug#testing-firewalled-pages
14*c3437056SNickeau *
15*c3437056SNickeau * Tool:
16*c3437056SNickeau * https://support.google.com/webmasters/answer/2774099# - Data Highlighter
17*c3437056SNickeau * to tag page manually (you see well what kind of information they need)
18*c3437056SNickeau *
19*c3437056SNickeau * Ref:
20*c3437056SNickeau * https://developers.google.com/search/docs/guides/intro-structured-data
21*c3437056SNickeau * https://github.com/giterlizzi/dokuwiki-plugin-semantic/blob/master/helper.php
22*c3437056SNickeau * https://json-ld.org/
23*c3437056SNickeau * https://schema.org/docs/documents.html
24*c3437056SNickeau * https://search.google.com/structured-data/testing-tool/u/0/#url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FPacu_jawi
25*c3437056SNickeau */
26*c3437056SNickeauclass LdJson extends MetadataJson
27*c3437056SNickeau{
28*c3437056SNickeau
29*c3437056SNickeau    public const PROPERTY_NAME = "json-ld";
30*c3437056SNickeau
31*c3437056SNickeau    public const SPEAKABLE = "speakable";
32*c3437056SNickeau    public const NEWSARTICLE_SCHEMA_ORG_LOWERCASE = "newsarticle";
33*c3437056SNickeau    public const BLOGPOSTING_SCHEMA_ORG_LOWERCASE = "blogposting";
34*c3437056SNickeau    /**
35*c3437056SNickeau     * @deprecated
36*c3437056SNickeau     * This attribute was used to hold json-ld organization
37*c3437056SNickeau     * data
38*c3437056SNickeau     */
39*c3437056SNickeau    public const OLD_ORGANIZATION_PROPERTY = "organization";
40*c3437056SNickeau    public const DATE_PUBLISHED_KEY = "datePublished";
41*c3437056SNickeau    public const DATE_MODIFIED_KEY = "dateModified";
42*c3437056SNickeau
43*c3437056SNickeau    public static function createForPage(Page $page): LdJson
44*c3437056SNickeau    {
45*c3437056SNickeau        return (new LdJson())
46*c3437056SNickeau            ->setResource($page);
47*c3437056SNickeau    }
48*c3437056SNickeau
49*c3437056SNickeau    /**
50*c3437056SNickeau     * @param array $ldJson
51*c3437056SNickeau     * @param Page $page
52*c3437056SNickeau     */
53*c3437056SNickeau    public static function addImage(array &$ldJson, Page $page)
54*c3437056SNickeau    {
55*c3437056SNickeau        /**
56*c3437056SNickeau         * Image must belong to the page
57*c3437056SNickeau         * https://developers.google.com/search/docs/guides/sd-policies#images
58*c3437056SNickeau         *
59*c3437056SNickeau         * Image may have IPTC metadata: not yet implemented
60*c3437056SNickeau         * https://developers.google.com/search/docs/advanced/appearance/image-rights-metadata
61*c3437056SNickeau         *
62*c3437056SNickeau         * Image must have the supported format
63*c3437056SNickeau         * https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats
64*c3437056SNickeau         * BMP, GIF, JPEG, PNG, WebP, and SVG
65*c3437056SNickeau         */
66*c3437056SNickeau        $supportedMime = [
67*c3437056SNickeau            Mime::BMP,
68*c3437056SNickeau            Mime::GIF,
69*c3437056SNickeau            Mime::JPEG,
70*c3437056SNickeau            Mime::PNG,
71*c3437056SNickeau            Mime::WEBP,
72*c3437056SNickeau            Mime::SVG,
73*c3437056SNickeau        ];
74*c3437056SNickeau        $imagesSet = $page->getImagesOrDefaultForTheFollowingUsages([PageImageUsage::ALL, PageImageUsage::SOCIAL, PageImageUsage::GOOGLE]);
75*c3437056SNickeau        $schemaImages = array();
76*c3437056SNickeau        foreach ($imagesSet as $image) {
77*c3437056SNickeau
78*c3437056SNickeau            $mime = $image->getPath()->getMime()->toString();
79*c3437056SNickeau            if (in_array($mime, $supportedMime)) {
80*c3437056SNickeau                if ($image->exists()) {
81*c3437056SNickeau                    $imageObjectSchema = array(
82*c3437056SNickeau                        "@type" => "ImageObject",
83*c3437056SNickeau                        "url" => $image->getAbsoluteUrl()
84*c3437056SNickeau                    );
85*c3437056SNickeau                    if (!empty($image->getIntrinsicWidth())) {
86*c3437056SNickeau                        $imageObjectSchema["width"] = $image->getIntrinsicWidth();
87*c3437056SNickeau                    }
88*c3437056SNickeau                    if (!empty($image->getIntrinsicHeight())) {
89*c3437056SNickeau                        $imageObjectSchema["height"] = $image->getIntrinsicHeight();
90*c3437056SNickeau                    }
91*c3437056SNickeau                    $schemaImages[] = $imageObjectSchema;
92*c3437056SNickeau                } else {
93*c3437056SNickeau                    LogUtility::msg("The image ($image) does not exist and was not added to the google ld-json", LogUtility::LVL_MSG_ERROR, action_plugin_combo_metagoogle::CANONICAL);
94*c3437056SNickeau                }
95*c3437056SNickeau            }
96*c3437056SNickeau        }
97*c3437056SNickeau
98*c3437056SNickeau        if (!empty($schemaImages)) {
99*c3437056SNickeau            $ldJson["image"] = $schemaImages;
100*c3437056SNickeau        }
101*c3437056SNickeau    }
102*c3437056SNickeau
103*c3437056SNickeau    public static function getName(): string
104*c3437056SNickeau    {
105*c3437056SNickeau        return self::PROPERTY_NAME;
106*c3437056SNickeau    }
107*c3437056SNickeau
108*c3437056SNickeau    public function getPersistenceType(): string
109*c3437056SNickeau    {
110*c3437056SNickeau        return MetadataDokuWikiStore::PERSISTENT_METADATA;
111*c3437056SNickeau    }
112*c3437056SNickeau
113*c3437056SNickeau    public function getCanonical(): string
114*c3437056SNickeau    {
115*c3437056SNickeau        return action_plugin_combo_metagoogle::CANONICAL;
116*c3437056SNickeau    }
117*c3437056SNickeau
118*c3437056SNickeau
119*c3437056SNickeau    public function getDescription(): string
120*c3437056SNickeau    {
121*c3437056SNickeau        return "Advanced Page metadata definition with the json-ld format";
122*c3437056SNickeau    }
123*c3437056SNickeau
124*c3437056SNickeau    public function getLabel(): string
125*c3437056SNickeau    {
126*c3437056SNickeau        return "Json-ld";
127*c3437056SNickeau    }
128*c3437056SNickeau
129*c3437056SNickeau    public function getTab(): string
130*c3437056SNickeau    {
131*c3437056SNickeau        return MetaManagerForm::TAB_TYPE_VALUE;
132*c3437056SNickeau    }
133*c3437056SNickeau
134*c3437056SNickeau
135*c3437056SNickeau    public function getMutable(): bool
136*c3437056SNickeau    {
137*c3437056SNickeau        return true;
138*c3437056SNickeau    }
139*c3437056SNickeau
140*c3437056SNickeau    public function getDefaultValue(): ?string
141*c3437056SNickeau    {
142*c3437056SNickeau
143*c3437056SNickeau        $ldJson = $this->mergeWithDefaultValueAndGet();
144*c3437056SNickeau        if ($ldJson === null) {
145*c3437056SNickeau            return null;
146*c3437056SNickeau        }
147*c3437056SNickeau
148*c3437056SNickeau        /**
149*c3437056SNickeau         * Return
150*c3437056SNickeau         */
151*c3437056SNickeau        return Json::createFromArray($ldJson)->toPrettyJsonString();
152*c3437056SNickeau
153*c3437056SNickeau    }
154*c3437056SNickeau
155*c3437056SNickeau    public function buildFromStoreValue($value): Metadata
156*c3437056SNickeau    {
157*c3437056SNickeau
158*c3437056SNickeau        if ($value === null) {
159*c3437056SNickeau            $resourceCombo = $this->getResource();
160*c3437056SNickeau            if (($resourceCombo instanceof Page)) {
161*c3437056SNickeau                // Deprecated, old organization syntax
162*c3437056SNickeau                if ($resourceCombo->getTypeOrDefault() === PageType::ORGANIZATION_TYPE) {
163*c3437056SNickeau                    $store = $this->getReadStore();
164*c3437056SNickeau                    $metadata = $store->getFromPersistentName( self::OLD_ORGANIZATION_PROPERTY);
165*c3437056SNickeau                    if ($metadata !== null) {
166*c3437056SNickeau                        $organization = array(
167*c3437056SNickeau                            "organization" => $metadata
168*c3437056SNickeau                        );
169*c3437056SNickeau                        $ldJsonOrganization = $this->mergeWithDefaultValueAndGet($organization);
170*c3437056SNickeau                        $value = Json::createFromArray($ldJsonOrganization)->toPrettyJsonString();
171*c3437056SNickeau                    }
172*c3437056SNickeau
173*c3437056SNickeau                }
174*c3437056SNickeau            }
175*c3437056SNickeau        }
176*c3437056SNickeau        parent::buildFromStoreValue($value);
177*c3437056SNickeau        return $this;
178*c3437056SNickeau
179*c3437056SNickeau
180*c3437056SNickeau    }
181*c3437056SNickeau
182*c3437056SNickeau    /**
183*c3437056SNickeau     * The ldJson value
184*c3437056SNickeau     * @return false|string|null
185*c3437056SNickeau     */
186*c3437056SNickeau    public function getLdJsonMergedWithDefault()
187*c3437056SNickeau    {
188*c3437056SNickeau
189*c3437056SNickeau        $value = $this->getValue();
190*c3437056SNickeau        $actualValueAsArray = null;
191*c3437056SNickeau        if ($value !== null) {
192*c3437056SNickeau            try {
193*c3437056SNickeau                $actualValueAsArray = Json::createFromString($value)->toArray();
194*c3437056SNickeau            } catch (ExceptionCombo $e) {
195*c3437056SNickeau                LogUtility::msg("The string value is not a valid Json. Value: $value");
196*c3437056SNickeau                return $value;
197*c3437056SNickeau            }
198*c3437056SNickeau        }
199*c3437056SNickeau        $actualValueAsArray = $this->mergeWithDefaultValueAndGet($actualValueAsArray);
200*c3437056SNickeau        return Json::createFromArray($actualValueAsArray)->toPrettyJsonString();
201*c3437056SNickeau    }
202*c3437056SNickeau
203*c3437056SNickeau
204*c3437056SNickeau    private function mergeWithDefaultValueAndGet($actualValue = null): ?array
205*c3437056SNickeau    {
206*c3437056SNickeau        $page = $this->getResource();
207*c3437056SNickeau        if (!($page instanceof Page)) {
208*c3437056SNickeau            return $actualValue;
209*c3437056SNickeau        }
210*c3437056SNickeau
211*c3437056SNickeau        $type = $page->getTypeOrDefault();
212*c3437056SNickeau        switch (strtolower($type)) {
213*c3437056SNickeau            case PageType::WEBSITE_TYPE:
214*c3437056SNickeau
215*c3437056SNickeau                /**
216*c3437056SNickeau                 * https://schema.org/WebSite
217*c3437056SNickeau                 * https://developers.google.com/search/docs/data-types/sitelinks-searchbox
218*c3437056SNickeau                 */
219*c3437056SNickeau                $ldJson = array(
220*c3437056SNickeau                    '@context' => 'https://schema.org',
221*c3437056SNickeau                    '@type' => 'WebSite',
222*c3437056SNickeau                    'url' => Site::getBaseUrl(),
223*c3437056SNickeau                    'name' => Site::getTitle()
224*c3437056SNickeau                );
225*c3437056SNickeau
226*c3437056SNickeau                if ($page->isRootHomePage()) {
227*c3437056SNickeau
228*c3437056SNickeau                    $ldJson['potentialAction'] = array(
229*c3437056SNickeau                        '@type' => 'SearchAction',
230*c3437056SNickeau                        'target' => Site::getBaseUrl() . DOKU_SCRIPT . '?do=search&amp;id={search_term_string}',
231*c3437056SNickeau                        'query-input' => 'required name=search_term_string',
232*c3437056SNickeau                    );
233*c3437056SNickeau                }
234*c3437056SNickeau
235*c3437056SNickeau                $tag = Site::getTag();
236*c3437056SNickeau                if (!empty($tag)) {
237*c3437056SNickeau                    $ldJson['description'] = $tag;
238*c3437056SNickeau                }
239*c3437056SNickeau                $siteImageUrl = Site::getLogoUrlAsPng();
240*c3437056SNickeau                if (!empty($siteImageUrl)) {
241*c3437056SNickeau                    $ldJson['image'] = $siteImageUrl;
242*c3437056SNickeau                }
243*c3437056SNickeau
244*c3437056SNickeau                break;
245*c3437056SNickeau
246*c3437056SNickeau            case PageType::ORGANIZATION_TYPE:
247*c3437056SNickeau
248*c3437056SNickeau                /**
249*c3437056SNickeau                 * Organization + Logo
250*c3437056SNickeau                 * https://developers.google.com/search/docs/data-types/logo
251*c3437056SNickeau                 */
252*c3437056SNickeau                $ldJson = array(
253*c3437056SNickeau                    "@context" => "https://schema.org",
254*c3437056SNickeau                    "@type" => "Organization",
255*c3437056SNickeau                    "url" => Site::getBaseUrl(),
256*c3437056SNickeau                    "logo" => Site::getLogoUrlAsPng()
257*c3437056SNickeau                );
258*c3437056SNickeau
259*c3437056SNickeau                break;
260*c3437056SNickeau
261*c3437056SNickeau            case PageType::ARTICLE_TYPE:
262*c3437056SNickeau            case PageType::NEWS_TYPE:
263*c3437056SNickeau            case PageType::BLOG_TYPE:
264*c3437056SNickeau            case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE:
265*c3437056SNickeau            case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE:
266*c3437056SNickeau            case PageType::HOME_TYPE:
267*c3437056SNickeau            case PageType::WEB_PAGE_TYPE:
268*c3437056SNickeau
269*c3437056SNickeau                switch (strtolower($type)) {
270*c3437056SNickeau                    case PageType::NEWS_TYPE:
271*c3437056SNickeau                    case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE:
272*c3437056SNickeau                        $schemaType = "NewsArticle";
273*c3437056SNickeau                        break;
274*c3437056SNickeau                    case PageType::BLOG_TYPE:
275*c3437056SNickeau                    case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE:
276*c3437056SNickeau                        $schemaType = "BlogPosting";
277*c3437056SNickeau                        break;
278*c3437056SNickeau                    case PageType::HOME_TYPE:
279*c3437056SNickeau                    case PageType::WEB_PAGE_TYPE:
280*c3437056SNickeau                        // https://schema.org/WebPage
281*c3437056SNickeau                        $schemaType = "WebPage";
282*c3437056SNickeau                        break;
283*c3437056SNickeau                    case PageType::ARTICLE_TYPE:
284*c3437056SNickeau                    default:
285*c3437056SNickeau                        $schemaType = "Article";
286*c3437056SNickeau                        break;
287*c3437056SNickeau
288*c3437056SNickeau                }
289*c3437056SNickeau                // https://developers.google.com/search/docs/data-types/article
290*c3437056SNickeau                // https://schema.org/Article
291*c3437056SNickeau
292*c3437056SNickeau                // Image (at least 696 pixels wide)
293*c3437056SNickeau                // https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats
294*c3437056SNickeau                // BMP, GIF, JPEG, PNG, WebP, and SVG.
295*c3437056SNickeau
296*c3437056SNickeau                // Date should be https://en.wikipedia.org/wiki/ISO_8601
297*c3437056SNickeau
298*c3437056SNickeau
299*c3437056SNickeau                $ldJson = array(
300*c3437056SNickeau                    "@context" => "https://schema.org",
301*c3437056SNickeau                    "@type" => $schemaType,
302*c3437056SNickeau                    'url' => $page->getAbsoluteCanonicalUrl(),
303*c3437056SNickeau                    "headline" => $page->getTitleOrDefault(),
304*c3437056SNickeau                    self::DATE_PUBLISHED_KEY => $page->getPublishedElseCreationTime()->format(Iso8601Date::getFormat())
305*c3437056SNickeau                );
306*c3437056SNickeau
307*c3437056SNickeau                /**
308*c3437056SNickeau                 * Modified Time
309*c3437056SNickeau                 */
310*c3437056SNickeau                $modifiedTime = $page->getModifiedTimeOrDefault();
311*c3437056SNickeau                if ($modifiedTime != null) {
312*c3437056SNickeau                    $ldJson[self::DATE_MODIFIED_KEY] = $modifiedTime->format(Iso8601Date::getFormat());
313*c3437056SNickeau                };
314*c3437056SNickeau
315*c3437056SNickeau                /**
316*c3437056SNickeau                 * Publisher info
317*c3437056SNickeau                 */
318*c3437056SNickeau                $publisher = array(
319*c3437056SNickeau                    "@type" => "Organization",
320*c3437056SNickeau                    "name" => Site::getTitle()
321*c3437056SNickeau                );
322*c3437056SNickeau                $logoUrlAsPng = Site::getLogoUrlAsPng();
323*c3437056SNickeau                if (!empty($logoUrlAsPng)) {
324*c3437056SNickeau                    $publisher["logo"] = array(
325*c3437056SNickeau                        "@type" => "ImageObject",
326*c3437056SNickeau                        "url" => $logoUrlAsPng
327*c3437056SNickeau                    );
328*c3437056SNickeau                }
329*c3437056SNickeau                $ldJson["publisher"] = $publisher;
330*c3437056SNickeau
331*c3437056SNickeau                self::addImage($ldJson, $page);
332*c3437056SNickeau                break;
333*c3437056SNickeau
334*c3437056SNickeau            case PageType::EVENT_TYPE:
335*c3437056SNickeau                // https://developers.google.com/search/docs/advanced/structured-data/event
336*c3437056SNickeau                $ldJson = array(
337*c3437056SNickeau                    "@context" => "https://schema.org",
338*c3437056SNickeau                    "@type" => "Event");
339*c3437056SNickeau                $eventName = $page->getName();
340*c3437056SNickeau                if (!blank($eventName)) {
341*c3437056SNickeau                    $ldJson["name"] = $eventName;
342*c3437056SNickeau                } else {
343*c3437056SNickeau                    LogUtility::msg("The name metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
344*c3437056SNickeau                    return null;
345*c3437056SNickeau                }
346*c3437056SNickeau                $eventDescription = $page->getDescription();
347*c3437056SNickeau                if (blank($eventDescription)) {
348*c3437056SNickeau                    LogUtility::msg("The description metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
349*c3437056SNickeau                    return null;
350*c3437056SNickeau                }
351*c3437056SNickeau                $ldJson["description"] = $eventDescription;
352*c3437056SNickeau                $startDate = $page->getStartDateAsString();
353*c3437056SNickeau                if ($startDate === null) {
354*c3437056SNickeau                    LogUtility::msg("The date_start metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
355*c3437056SNickeau                    return null;
356*c3437056SNickeau                }
357*c3437056SNickeau                $ldJson["startDate"] = $page->getStartDateAsString();
358*c3437056SNickeau
359*c3437056SNickeau                $endDate = $page->getEndDateAsString();
360*c3437056SNickeau                if ($endDate === null) {
361*c3437056SNickeau                    LogUtility::msg("The date_end metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
362*c3437056SNickeau                    return null;
363*c3437056SNickeau                }
364*c3437056SNickeau                $ldJson["endDate"] = $page->getEndDateAsString();
365*c3437056SNickeau
366*c3437056SNickeau
367*c3437056SNickeau                self::addImage($ldJson, $page);
368*c3437056SNickeau                break;
369*c3437056SNickeau
370*c3437056SNickeau
371*c3437056SNickeau            default:
372*c3437056SNickeau
373*c3437056SNickeau                // May be added manually by the user itself
374*c3437056SNickeau                $ldJson = array(
375*c3437056SNickeau                    '@context' => 'https://schema.org',
376*c3437056SNickeau                    '@type' => $type,
377*c3437056SNickeau                    'url' => $page->getAbsoluteCanonicalUrl()
378*c3437056SNickeau                );
379*c3437056SNickeau                break;
380*c3437056SNickeau        }
381*c3437056SNickeau
382*c3437056SNickeau
383*c3437056SNickeau        /**
384*c3437056SNickeau         * https://developers.google.com/search/docs/data-types/speakable
385*c3437056SNickeau         */
386*c3437056SNickeau        $speakableXpath = array();
387*c3437056SNickeau        if (!empty($page->getTitleOrDefault())) {
388*c3437056SNickeau            $speakableXpath[] = "/html/head/title";
389*c3437056SNickeau        }
390*c3437056SNickeau        if (!empty($page->getDescription())) {
391*c3437056SNickeau            /**
392*c3437056SNickeau             * Only the description written otherwise this is not speakable
393*c3437056SNickeau             * you can have link and other strangeness
394*c3437056SNickeau             */
395*c3437056SNickeau            $speakableXpath[] = "/html/head/meta[@name='description']/@content";
396*c3437056SNickeau        }
397*c3437056SNickeau        $ldJson[self::SPEAKABLE] = array(
398*c3437056SNickeau            "@type" => "SpeakableSpecification",
399*c3437056SNickeau            "xpath" => $speakableXpath
400*c3437056SNickeau        );
401*c3437056SNickeau
402*c3437056SNickeau        /**
403*c3437056SNickeau         * merge with the extra
404*c3437056SNickeau         */
405*c3437056SNickeau        if ($actualValue !== null) {
406*c3437056SNickeau            return array_merge($ldJson, $actualValue);
407*c3437056SNickeau        }
408*c3437056SNickeau        return $ldJson;
409*c3437056SNickeau    }
410*c3437056SNickeau
411*c3437056SNickeau
412*c3437056SNickeau
413*c3437056SNickeau}
414