xref: /plugin/combo/ComboStrap/LdJson.php (revision 04fd306c7c155fa133ebb3669986875d65988276)
1<?php
2
3
4namespace ComboStrap;
5
6
7use action_plugin_combo_metagoogle;
8use ComboStrap\Meta\Api\Metadata;
9use ComboStrap\Meta\Api\MetadataJson;
10use ComboStrap\Meta\Store\MetadataDokuWikiStore;
11
12/**
13 *
14 *
15 * To test locally use ngrok
16 * https://developers.google.com/search/docs/guides/debug#testing-firewalled-pages
17 *
18 * Tool:
19 * https://support.google.com/webmasters/answer/2774099# - Data Highlighter
20 * to tag page manually (you see well what kind of information they need)
21 *
22 * Ref:
23 * https://developers.google.com/search/docs/guides/intro-structured-data
24 * https://github.com/giterlizzi/dokuwiki-plugin-semantic/blob/master/helper.php
25 * https://json-ld.org/
26 * https://schema.org/docs/documents.html
27 * https://search.google.com/structured-data/testing-tool/u/0/#url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FPacu_jawi
28 */
29class LdJson extends MetadataJson
30{
31
32    public const PROPERTY_NAME = "json-ld";
33
34    public const SPEAKABLE = "speakable";
35    public const NEWSARTICLE_SCHEMA_ORG_LOWERCASE = "newsarticle";
36    public const BLOGPOSTING_SCHEMA_ORG_LOWERCASE = "blogposting";
37    /**
38     * @deprecated
39     * This attribute was used to hold json-ld organization
40     * data
41     */
42    public const OLD_ORGANIZATION_PROPERTY = "organization";
43    public const DATE_PUBLISHED_KEY = "datePublished";
44    public const DATE_MODIFIED_KEY = "dateModified";
45
46    public const CANONICAL = action_plugin_combo_metagoogle::CANONICAL;
47
48    public static function createForPage(MarkupPath $page): LdJson
49    {
50        return (new LdJson())
51            ->setResource($page);
52    }
53
54    /**
55     * @param array $ldJson
56     * @param MarkupPath $page
57     */
58    public static function addImage(array &$ldJson, MarkupPath $page)
59    {
60        /**
61         * Image must belong to the page
62         * https://developers.google.com/search/docs/guides/sd-policies#images
63         *
64         * Image may have IPTC metadata: not yet implemented
65         * https://developers.google.com/search/docs/advanced/appearance/image-rights-metadata
66         *
67         * Image must have the supported format
68         * https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats
69         * BMP, GIF, JPEG, PNG, WebP, and SVG
70         */
71        $supportedMime = [
72            Mime::BMP,
73            Mime::GIF,
74            Mime::JPEG,
75            Mime::PNG,
76            Mime::WEBP,
77            Mime::SVG,
78        ];
79        $imagesSet = $page->getImagesForTheFollowingUsages([PageImageUsage::ALL, PageImageUsage::SOCIAL, PageImageUsage::GOOGLE]);
80        $schemaImages = array();
81        foreach ($imagesSet as $pageImage) {
82
83            try {
84                $pageImagePath = $pageImage->getSourcePath()->toWikiPath();
85            } catch (ExceptionCast $e) {
86                LogUtility::internalError("The page image should come from a wiki path", self::CANONICAL, $e);
87                continue;
88            }
89            try {
90                $mime = $pageImagePath->getMime()->toString();
91            } catch (ExceptionNotFound $e) {
92                // should not happen
93                LogUtility::internalError("The page image mime could not be determined. Error:" . $e->getMessage(), self::CANONICAL, $e);
94                $mime = "unknown";
95            }
96            if (in_array($mime, $supportedMime)) {
97                if (FileSystems::exists($pageImagePath)) {
98                    try {
99                        $fetcherPageImage = IFetcherLocalImage::createImageFetchFromPath($pageImagePath);
100                    } catch (ExceptionBadArgument|ExceptionBadSyntax|ExceptionNotExists $e) {
101                        LogUtility::error("The image ($pageImagePath) could not be added as page image. Error: {$e->getMessage()}");
102                        continue;
103                    }
104                    $imageObjectSchema = array(
105                        "@type" => "ImageObject",
106                        "url" => $fetcherPageImage->getFetchUrl()->toAbsoluteUrlString()
107                    );
108                    if (!empty($fetcherPageImage->getIntrinsicWidth())) {
109                        $imageObjectSchema["width"] = $fetcherPageImage->getIntrinsicWidth();
110                    }
111                    if (!empty($fetcherPageImage->getIntrinsicHeight())) {
112                        $imageObjectSchema["height"] = $fetcherPageImage->getIntrinsicHeight();
113                    }
114                    $schemaImages[] = $imageObjectSchema;
115                } else {
116                    LogUtility::msg("The image ($pageImagePath) does not exist and was not added to the google ld-json", LogUtility::LVL_MSG_ERROR, action_plugin_combo_metagoogle::CANONICAL);
117                }
118            }
119        }
120
121        if (!empty($schemaImages)) {
122            $ldJson["image"] = $schemaImages;
123        }
124    }
125
126    public static function getName(): string
127    {
128        return self::PROPERTY_NAME;
129    }
130
131    static public function getPersistenceType(): string
132    {
133        return MetadataDokuWikiStore::PERSISTENT_DOKUWIKI_KEY;
134    }
135
136    static public function getCanonical(): string
137    {
138        return action_plugin_combo_metagoogle::CANONICAL;
139    }
140
141
142    static public function getDescription(): string
143    {
144        return "Advanced Page metadata definition with the json-ld format";
145    }
146
147    static public function getLabel(): string
148    {
149        return "Json-ld";
150    }
151
152    static public function getTab(): string
153    {
154        return MetaManagerForm::TAB_TYPE_VALUE;
155    }
156
157
158    static public function isMutable(): bool
159    {
160        return true;
161    }
162
163    public function getDefaultValue(): ?string
164    {
165
166        $ldJson = $this->mergeWithDefaultValueAndGet();
167        if ($ldJson === null) {
168            return null;
169        }
170
171        /**
172         * Return
173         */
174        return Json::createFromArray($ldJson)->toPrettyJsonString();
175
176    }
177
178    public function setFromStoreValueWithoutException($value): Metadata
179    {
180
181        if ($value === null) {
182            $resourceCombo = $this->getResource();
183            if (($resourceCombo instanceof MarkupPath)) {
184                /**
185                 * Deprecated, old organization syntax
186                 * We could add this predicate
187                 *
188                 * but we don't want to lose any data
189                 * (ie if the page was set to no be an organization table,
190                 * the frontmatter would not take it)
191                 */
192                $store = $this->getReadStore();
193                $metadata = $store->getFromName(self::OLD_ORGANIZATION_PROPERTY);
194                if ($metadata !== null) {
195                    $organization = array(
196                        "organization" => $metadata
197                    );
198                    $ldJsonOrganization = $this->mergeWithDefaultValueAndGet($organization);
199                    $value = Json::createFromArray($ldJsonOrganization)->toPrettyJsonString();
200                }
201            }
202        }
203        parent::setFromStoreValueWithoutException($value);
204        return $this;
205
206
207    }
208
209    /**
210     * The ldJson value
211     * @return false|string|null
212     */
213    public function getLdJsonMergedWithDefault()
214    {
215
216        try {
217            $value = $this->getValue();
218            try {
219                $actualValueAsArray = Json::createFromString($value)->toArray();
220            } catch (ExceptionCompile $e) {
221                LogUtility::error("The string value is not a valid Json. Value: $value", self::CANONICAL);
222                return $value;
223            }
224        } catch (ExceptionNotFound $e) {
225            $actualValueAsArray = [];
226        }
227        $actualValueAsArray = $this->mergeWithDefaultValueAndGet($actualValueAsArray);
228        return Json::createFromArray($actualValueAsArray)->toPrettyJsonString();
229    }
230
231
232    private function mergeWithDefaultValueAndGet($actualValue = null): ?array
233    {
234        $page = $this->getResource();
235        if (!($page instanceof MarkupPath)) {
236            return $actualValue;
237        }
238
239        $readStore = $this->getReadStore();
240        $type = PageType::createForPage($page)
241            ->setReadStore(MetadataDokuWikiStore::class)
242            ->getValueOrDefault();
243        if (!($readStore instanceof MetadataDokuWikiStore)) {
244            /**
245             * Edge case we set the readstore because in a frontmatter,
246             * the type may have been set
247             */
248            try {
249                $type = PageType::createForPage($page)
250                    ->setReadStore($readStore)
251                    ->getValue();
252            } catch (ExceptionNotFound $e) {
253                // ok
254            }
255        }
256        switch (strtolower($type)) {
257            case PageType::WEBSITE_TYPE:
258
259                /**
260                 * https://schema.org/WebSite
261                 * https://developers.google.com/search/docs/data-types/sitelinks-searchbox
262                 */
263                $ldJson = array(
264                    '@context' => 'https://schema.org',
265                    '@type' => 'WebSite',
266                    'url' => Site::getBaseUrl(),
267                    'name' => Site::getTitle()
268                );
269
270                if ($page->isRootHomePage()) {
271
272                    $ldJson['potentialAction'] = array(
273                        '@type' => 'SearchAction',
274                        'target' => Site::getBaseUrl() . DOKU_SCRIPT . '?do=search&amp;id={search_term_string}',
275                        'query-input' => 'required name=search_term_string',
276                    );
277                }
278
279                $tag = Site::getTag();
280                if (!empty($tag)) {
281                    $ldJson['description'] = $tag;
282                }
283                $siteImageUrl = Site::getLogoUrlAsPng();
284                if (!empty($siteImageUrl)) {
285                    $ldJson['image'] = $siteImageUrl;
286                }
287
288                break;
289
290            case PageType::ORGANIZATION_TYPE:
291
292                /**
293                 * Organization + Logo
294                 * https://developers.google.com/search/docs/data-types/logo
295                 */
296                $ldJson = array(
297                    "@context" => "https://schema.org",
298                    "@type" => "Organization",
299                    "url" => Site::getBaseUrl(),
300                    "logo" => Site::getLogoUrlAsPng()
301                );
302
303                break;
304
305            case PageType::ARTICLE_TYPE:
306            case PageType::NEWS_TYPE:
307            case PageType::BLOG_TYPE:
308            case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE:
309            case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE:
310            case PageType::HOME_TYPE:
311            case PageType::WEB_PAGE_TYPE:
312
313                switch (strtolower($type)) {
314                    case PageType::NEWS_TYPE:
315                    case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE:
316                        $schemaType = "NewsArticle";
317                        break;
318                    case PageType::BLOG_TYPE:
319                    case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE:
320                        $schemaType = "BlogPosting";
321                        break;
322                    case PageType::HOME_TYPE:
323                    case PageType::WEB_PAGE_TYPE:
324                        // https://schema.org/WebPage
325                        $schemaType = "WebPage";
326                        break;
327                    case PageType::ARTICLE_TYPE:
328                    default:
329                        $schemaType = "Article";
330                        break;
331
332                }
333                // https://developers.google.com/search/docs/data-types/article
334                // https://schema.org/Article
335
336                // Image (at least 696 pixels wide)
337                // https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats
338                // BMP, GIF, JPEG, PNG, WebP, and SVG.
339
340                // Date should be https://en.wikipedia.org/wiki/ISO_8601
341
342
343                $ldJson = array(
344                    "@context" => "https://schema.org",
345                    "@type" => $schemaType,
346                    'url' => $page->getAbsoluteCanonicalUrl()->toString(),
347                    "headline" => $page->getTitleOrDefault(),
348
349                );
350
351                try {
352                    $ldJson[self::DATE_PUBLISHED_KEY] = $page
353                        ->getPublishedElseCreationTime()
354                        ->format(Iso8601Date::getFormat());
355                } catch (ExceptionNotFound $e) {
356                    // Internal error, the page should exist
357                    LogUtility::error("Internal Error: We were unable to define the publication date for the page ($page). Error: {$e->getMessage()}", self::CANONICAL);
358                }
359
360                /**
361                 * Modified Time
362                 */
363                try {
364                    $modifiedTime = $page->getModifiedTimeOrDefault();
365                    $ldJson[self::DATE_MODIFIED_KEY] = $modifiedTime->format(Iso8601Date::getFormat());
366                } catch (ExceptionNotFound $e) {
367                    // Internal error, the page should exist
368                    LogUtility::error("Internal Error: We were unable to define the modification date for the page ($page)", self::CANONICAL);
369                }
370
371                /**
372                 * Publisher info
373                 */
374                $publisher = array(
375                    "@type" => "Organization",
376                    "name" => Site::getName()
377                );
378                $logoUrlAsPng = Site::getLogoUrlAsPng();
379                if (!empty($logoUrlAsPng)) {
380                    $publisher["logo"] = array(
381                        "@type" => "ImageObject",
382                        "url" => $logoUrlAsPng
383                    );
384                }
385                $ldJson["publisher"] = $publisher;
386
387                self::addImage($ldJson, $page);
388                break;
389
390            case PageType::EVENT_TYPE:
391                // https://developers.google.com/search/docs/advanced/structured-data/event
392                $ldJson = array(
393                    "@context" => "https://schema.org",
394                    "@type" => "Event");
395                try {
396                    $eventName = $page->getName();
397                    $ldJson["name"] = $eventName;
398                } catch (ExceptionNotFound $e) {
399                    LogUtility::msg("The name metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
400                    return null;
401                }
402
403                try {
404                    $eventDescription = $page->getDescription();
405                } catch (ExceptionNotFound $e) {
406                    LogUtility::msg("The description metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
407                    return null;
408                }
409
410                $ldJson["description"] = $eventDescription;
411                try {
412                    $startDate = $page->getStartDate();
413                } catch (ExceptionNotFound $e) {
414                    LogUtility::msg("The date_start metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
415                    return null;
416                }
417                $ldJson["startDate"] = $startDate->format(Iso8601Date::getFormat());
418
419                try {
420                    $endDate = $page->getEndDate();
421                } catch (ExceptionNotFound $e) {
422                    LogUtility::msg("The date_end metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
423                    return null;
424                }
425                $ldJson["endDate"] = $endDate->format(Iso8601Date::getFormat());
426
427
428                self::addImage($ldJson, $page);
429                break;
430
431
432            default:
433
434                // May be added manually by the user itself
435                $ldJson = array(
436                    '@context' => 'https://schema.org',
437                    '@type' => $type,
438                    'url' => $page->getAbsoluteCanonicalUrl()->toString()
439                );
440                break;
441        }
442
443
444        /**
445         * https://developers.google.com/search/docs/data-types/speakable
446         */
447        $speakableXpath = array();
448        $speakableXpath[] = "/html/head/title";
449        try {
450            PageDescription::createForPage($page)
451                ->getValue();
452            /**
453             * Only the description written otherwise this is not speakable
454             * you can have link and other strangeness
455             */
456            $speakableXpath[] = "/html/head/meta[@name='description']/@content";
457        } catch (ExceptionNotFound $e) {
458            // ok, no description
459        }
460        $ldJson[self::SPEAKABLE] = array(
461            "@type" => "SpeakableSpecification",
462            "xpath" => $speakableXpath
463        );
464
465        /**
466         * merge with the extra
467         */
468        if ($actualValue !== null) {
469            return array_merge($ldJson, $actualValue);
470        }
471        return $ldJson;
472    }
473
474
475    static public function isOnForm(): bool
476    {
477        return true;
478    }
479
480
481}
482