1<?php
2
3
4namespace ComboStrap;
5
6
7use action_plugin_combo_metagoogle;
8use ComboStrap\Meta\Api\Metadata;
9use ComboStrap\Meta\Api\MetadataJson;
10use ComboStrap\Meta\Store\MetadataDokuWikiStore;
11use ComboStrap\Web\Url;
12use ComboStrap\Web\UrlEndpoint;
13
14/**
15 *
16 *
17 * To test locally use ngrok
18 * https://developers.google.com/search/docs/guides/debug#testing-firewalled-pages
19 *
20 * Tool:
21 * https://support.google.com/webmasters/answer/2774099# - Data Highlighter
22 * to tag page manually (you see well what kind of information they need)
23 *
24 * Ref:
25 * https://developers.google.com/search/docs/guides/intro-structured-data
26 * https://github.com/giterlizzi/dokuwiki-plugin-semantic/blob/master/helper.php
27 * https://json-ld.org/
28 * https://schema.org/docs/documents.html
29 * https://search.google.com/structured-data/testing-tool/u/0/#url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FPacu_jawi
30 */
31class LdJson extends MetadataJson
32{
33
34    public const PROPERTY_NAME = "json-ld";
35
36    public const SPEAKABLE = "speakable";
37    public const NEWSARTICLE_SCHEMA_ORG_LOWERCASE = "newsarticle";
38    public const BLOGPOSTING_SCHEMA_ORG_LOWERCASE = "blogposting";
39    /**
40     * @deprecated
41     * This attribute was used to hold json-ld organization
42     * data
43     */
44    public const OLD_ORGANIZATION_PROPERTY = "organization";
45    public const DATE_PUBLISHED_KEY = "datePublished";
46    public const DATE_MODIFIED_KEY = "dateModified";
47
48    public const CANONICAL = action_plugin_combo_metagoogle::CANONICAL;
49
50    public static function createForPage(MarkupPath $page): LdJson
51    {
52        return (new LdJson())
53            ->setResource($page);
54    }
55
56    /**
57     * @param array $ldJson
58     * @param MarkupPath $page
59     */
60    public static function addImage(array &$ldJson, MarkupPath $page)
61    {
62        /**
63         * Image must belong to the page
64         * https://developers.google.com/search/docs/guides/sd-policies#images
65         *
66         * Image may have IPTC metadata: not yet implemented
67         * https://developers.google.com/search/docs/advanced/appearance/image-rights-metadata
68         *
69         * Image must have the supported format
70         * https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats
71         * BMP, GIF, JPEG, PNG, WebP, and SVG
72         */
73        $supportedMime = [
74            Mime::BMP,
75            Mime::GIF,
76            Mime::JPEG,
77            Mime::PNG,
78            Mime::WEBP,
79            Mime::SVG,
80        ];
81        $imagesSet = $page->getImagesForTheFollowingUsages([PageImageUsage::ALL, PageImageUsage::SOCIAL, PageImageUsage::GOOGLE]);
82        $schemaImages = array();
83        foreach ($imagesSet as $pageImage) {
84
85            try {
86                $pageImagePath = $pageImage->getSourcePath()->toWikiPath();
87            } catch (ExceptionCast $e) {
88                LogUtility::internalError("The page image should come from a wiki path", self::CANONICAL, $e);
89                continue;
90            }
91            try {
92                $mime = $pageImagePath->getMime()->toString();
93            } catch (ExceptionNotFound $e) {
94                // should not happen
95                LogUtility::internalError("The page image mime could not be determined. Error:" . $e->getMessage(), self::CANONICAL, $e);
96                $mime = "unknown";
97            }
98            if (in_array($mime, $supportedMime)) {
99                if (FileSystems::exists($pageImagePath)) {
100                    try {
101                        $fetcherPageImage = IFetcherLocalImage::createImageFetchFromPath($pageImagePath);
102                    } catch (ExceptionBadArgument|ExceptionBadSyntax|ExceptionNotExists $e) {
103                        LogUtility::error("The image ($pageImagePath) could not be added as page image. Error: {$e->getMessage()}");
104                        continue;
105                    }
106                    $imageObjectSchema = array(
107                        "@type" => "ImageObject",
108                        "url" => $fetcherPageImage->getFetchUrl()->toAbsoluteUrlString()
109                    );
110                    if (!empty($fetcherPageImage->getIntrinsicWidth())) {
111                        $imageObjectSchema["width"] = $fetcherPageImage->getIntrinsicWidth();
112                    }
113                    if (!empty($fetcherPageImage->getIntrinsicHeight())) {
114                        $imageObjectSchema["height"] = $fetcherPageImage->getIntrinsicHeight();
115                    }
116                    $schemaImages[] = $imageObjectSchema;
117                } else {
118                    LogUtility::msg("The image ($pageImagePath) does not exist and was not added to the google ld-json", LogUtility::LVL_MSG_ERROR, action_plugin_combo_metagoogle::CANONICAL);
119                }
120            }
121        }
122
123        if (!empty($schemaImages)) {
124            $ldJson["image"] = $schemaImages;
125        }
126    }
127
128    public static function getName(): string
129    {
130        return self::PROPERTY_NAME;
131    }
132
133    static public function getPersistenceType(): string
134    {
135        return MetadataDokuWikiStore::PERSISTENT_DOKUWIKI_KEY;
136    }
137
138    static public function getCanonical(): string
139    {
140        return action_plugin_combo_metagoogle::CANONICAL;
141    }
142
143
144    static public function getDescription(): string
145    {
146        return "Advanced Page metadata definition with the json-ld format";
147    }
148
149    static public function getLabel(): string
150    {
151        return "Json-ld";
152    }
153
154    static public function getTab(): string
155    {
156        return MetaManagerForm::TAB_TYPE_VALUE;
157    }
158
159
160    static public function isMutable(): bool
161    {
162        return true;
163    }
164
165    public function getDefaultValue(): ?string
166    {
167
168        $ldJson = $this->mergeWithDefaultValueAndGet();
169        if ($ldJson === null) {
170            return null;
171        }
172
173        /**
174         * Return
175         */
176        return Json::createFromArray($ldJson)->toPrettyJsonString();
177
178    }
179
180    public function setFromStoreValueWithoutException($value): Metadata
181    {
182
183        if ($value === null) {
184            $resourceCombo = $this->getResource();
185            if (($resourceCombo instanceof MarkupPath)) {
186                /**
187                 * Deprecated, old organization syntax
188                 * We could add this predicate
189                 *
190                 * but we don't want to lose any data
191                 * (ie if the page was set to no be an organization table,
192                 * the frontmatter would not take it)
193                 */
194                $store = $this->getReadStore();
195                $metadata = $store->getFromName(self::OLD_ORGANIZATION_PROPERTY);
196                if ($metadata !== null) {
197                    $organization = array(
198                        "organization" => $metadata
199                    );
200                    $ldJsonOrganization = $this->mergeWithDefaultValueAndGet($organization);
201                    $value = Json::createFromArray($ldJsonOrganization)->toPrettyJsonString();
202                }
203            }
204        }
205        parent::setFromStoreValueWithoutException($value);
206        return $this;
207
208
209    }
210
211    /**
212     * The ldJson value
213     * @return false|string|null
214     */
215    public function getLdJsonMergedWithDefault()
216    {
217
218        try {
219            $value = $this->getValue();
220            try {
221                $actualValueAsArray = Json::createFromString($value)->toArray();
222            } catch (ExceptionCompile $e) {
223                LogUtility::error("The string value is not a valid Json. Value: $value", self::CANONICAL);
224                return $value;
225            }
226        } catch (ExceptionNotFound $e) {
227            $actualValueAsArray = [];
228        }
229        $actualValueAsArray = $this->mergeWithDefaultValueAndGet($actualValueAsArray);
230        return Json::createFromArray($actualValueAsArray)->toPrettyJsonString();
231    }
232
233
234    private function mergeWithDefaultValueAndGet($actualValue = null): ?array
235    {
236        $page = $this->getResource();
237        if (!($page instanceof MarkupPath)) {
238            return $actualValue;
239        }
240
241        $readStore = $this->getReadStore();
242        $type = PageType::createForPage($page)
243            ->setReadStore(MetadataDokuWikiStore::class)
244            ->getValueOrDefault();
245        if (!($readStore instanceof MetadataDokuWikiStore)) {
246            /**
247             * Edge case we set the readstore because in a frontmatter,
248             * the type may have been set
249             */
250            try {
251                $type = PageType::createForPage($page)
252                    ->setReadStore($readStore)
253                    ->getValue();
254            } catch (ExceptionNotFound $e) {
255                // ok
256            }
257        }
258        switch (strtolower($type)) {
259            case PageType::WEBSITE_TYPE:
260
261                /**
262                 * https://schema.org/WebSite
263                 * https://developers.google.com/search/docs/data-types/sitelinks-searchbox
264                 */
265                $ldJson = array(
266                    '@context' => 'https://schema.org',
267                    '@type' => 'WebSite',
268                    'url' => Site::getBaseUrl(),
269                    'name' => Site::getTitle()
270                );
271
272                if ($page->isRootHomePage()) {
273
274                    $target = UrlEndpoint::createDokuUrl()
275                            ->addQueryParameter("do", ExecutionContext::SEARCH_ACTION)
276                            ->toAbsoluteUrl()
277                            ->toHtmlString()
278                        . Url::AMPERSAND_URL_ENCODED_FOR_HTML . 'id={search_term_string}';
279                    $ldJson['potentialAction'] = array(
280                        '@type' => 'SearchAction',
281                        'target' => $target,
282                        'query-input' => 'required name=search_term_string',
283                    );
284                }
285
286                $tag = Site::getTag();
287                if (!empty($tag)) {
288                    $ldJson['description'] = $tag;
289                }
290                $siteImageUrl = Site::getLogoUrlAsPng();
291                if (!empty($siteImageUrl)) {
292                    $ldJson['image'] = $siteImageUrl;
293                }
294
295                break;
296
297            case PageType::ORGANIZATION_TYPE:
298
299                /**
300                 * Organization + Logo
301                 * https://developers.google.com/search/docs/data-types/logo
302                 */
303                $ldJson = array(
304                    "@context" => "https://schema.org",
305                    "@type" => "Organization",
306                    "url" => Site::getBaseUrl(),
307                    "logo" => Site::getLogoUrlAsPng()
308                );
309
310                break;
311
312            case PageType::ARTICLE_TYPE:
313            case PageType::NEWS_TYPE:
314            case PageType::BLOG_TYPE:
315            case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE:
316            case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE:
317            case PageType::HOME_TYPE:
318            case PageType::WEB_PAGE_TYPE:
319
320                switch (strtolower($type)) {
321                    case PageType::NEWS_TYPE:
322                    case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE:
323                        $schemaType = "NewsArticle";
324                        break;
325                    case PageType::BLOG_TYPE:
326                    case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE:
327                        $schemaType = "BlogPosting";
328                        break;
329                    case PageType::HOME_TYPE:
330                    case PageType::WEB_PAGE_TYPE:
331                        // https://schema.org/WebPage
332                        $schemaType = "WebPage";
333                        break;
334                    case PageType::ARTICLE_TYPE:
335                    default:
336                        $schemaType = "Article";
337                        break;
338
339                }
340                // https://developers.google.com/search/docs/data-types/article
341                // https://schema.org/Article
342
343                // Image (at least 696 pixels wide)
344                // https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats
345                // BMP, GIF, JPEG, PNG, WebP, and SVG.
346
347                // Date should be https://en.wikipedia.org/wiki/ISO_8601
348
349
350                $ldJson = array(
351                    "@context" => "https://schema.org",
352                    "@type" => $schemaType,
353                    'url' => $page->getAbsoluteCanonicalUrl()->toString(),
354                    "headline" => $page->getTitleOrDefault(),
355
356                );
357
358                try {
359                    $ldJson[self::DATE_PUBLISHED_KEY] = $page
360                        ->getPublishedElseCreationTime()
361                        ->format(Iso8601Date::getFormat());
362                } catch (ExceptionNotFound $e) {
363                    // Internal error, the page should exist
364                    LogUtility::error("Internal Error: We were unable to define the publication date for the page ($page). Error: {$e->getMessage()}", self::CANONICAL);
365                }
366
367                /**
368                 * Modified Time
369                 */
370                try {
371                    $modifiedTime = $page->getModifiedTimeOrDefault();
372                    $ldJson[self::DATE_MODIFIED_KEY] = $modifiedTime->format(Iso8601Date::getFormat());
373                } catch (ExceptionNotFound $e) {
374                    // Internal error, the page should exist
375                    LogUtility::error("Internal Error: We were unable to define the modification date for the page ($page)", self::CANONICAL);
376                }
377
378                /**
379                 * Publisher info
380                 */
381                $publisher = array(
382                    "@type" => "Organization",
383                    "name" => Site::getName()
384                );
385                $logoUrlAsPng = Site::getLogoUrlAsPng();
386                if (!empty($logoUrlAsPng)) {
387                    $publisher["logo"] = array(
388                        "@type" => "ImageObject",
389                        "url" => $logoUrlAsPng
390                    );
391                }
392                $ldJson["publisher"] = $publisher;
393
394                self::addImage($ldJson, $page);
395                break;
396
397            case PageType::EVENT_TYPE:
398                // https://developers.google.com/search/docs/advanced/structured-data/event
399                $ldJson = array(
400                    "@context" => "https://schema.org",
401                    "@type" => "Event");
402                try {
403                    $eventName = $page->getName();
404                    $ldJson["name"] = $eventName;
405                } catch (ExceptionNotFound $e) {
406                    LogUtility::msg("The name metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
407                    return null;
408                }
409
410                try {
411                    $eventDescription = $page->getDescription();
412                } catch (ExceptionNotFound $e) {
413                    LogUtility::msg("The description metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
414                    return null;
415                }
416
417                $ldJson["description"] = $eventDescription;
418                try {
419                    $startDate = $page->getStartDate();
420                } catch (ExceptionNotFound $e) {
421                    LogUtility::msg("The date_start metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
422                    return null;
423                }
424                $ldJson["startDate"] = $startDate->format(Iso8601Date::getFormat());
425
426                try {
427                    $endDate = $page->getEndDate();
428                } catch (ExceptionNotFound $e) {
429                    LogUtility::msg("The date_end metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
430                    return null;
431                }
432                $ldJson["endDate"] = $endDate->format(Iso8601Date::getFormat());
433
434
435                self::addImage($ldJson, $page);
436                break;
437
438
439            default:
440
441                // May be added manually by the user itself
442                $ldJson = array(
443                    '@context' => 'https://schema.org',
444                    '@type' => $type,
445                    'url' => $page->getAbsoluteCanonicalUrl()->toString()
446                );
447                break;
448        }
449
450
451        /**
452         * https://developers.google.com/search/docs/data-types/speakable
453         */
454        $speakableXpath = array();
455        $speakableXpath[] = "/html/head/title";
456        try {
457            PageDescription::createForPage($page)
458                ->getValue();
459            /**
460             * Only the description written otherwise this is not speakable
461             * you can have link and other strangeness
462             */
463            $speakableXpath[] = "/html/head/meta[@name='description']/@content";
464        } catch (ExceptionNotFound $e) {
465            // ok, no description
466        }
467        $ldJson[self::SPEAKABLE] = array(
468            "@type" => "SpeakableSpecification",
469            "xpath" => $speakableXpath
470        );
471
472        /**
473         * merge with the extra
474         */
475        if ($actualValue !== null) {
476            return array_merge($ldJson, $actualValue);
477        }
478        return $ldJson;
479    }
480
481
482    static public function isOnForm(): bool
483    {
484        return true;
485    }
486
487
488}
489