1<?php
2
3
4namespace ComboStrap;
5
6
7use action_plugin_combo_metagoogle;
8
9/**
10 *
11 *
12 * To test locally use ngrok
13 * https://developers.google.com/search/docs/guides/debug#testing-firewalled-pages
14 *
15 * Tool:
16 * https://support.google.com/webmasters/answer/2774099# - Data Highlighter
17 * to tag page manually (you see well what kind of information they need)
18 *
19 * Ref:
20 * https://developers.google.com/search/docs/guides/intro-structured-data
21 * https://github.com/giterlizzi/dokuwiki-plugin-semantic/blob/master/helper.php
22 * https://json-ld.org/
23 * https://schema.org/docs/documents.html
24 * https://search.google.com/structured-data/testing-tool/u/0/#url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FPacu_jawi
25 */
26class LdJson extends MetadataJson
27{
28
29    public const PROPERTY_NAME = "json-ld";
30
31    public const SPEAKABLE = "speakable";
32    public const NEWSARTICLE_SCHEMA_ORG_LOWERCASE = "newsarticle";
33    public const BLOGPOSTING_SCHEMA_ORG_LOWERCASE = "blogposting";
34    /**
35     * @deprecated
36     * This attribute was used to hold json-ld organization
37     * data
38     */
39    public const OLD_ORGANIZATION_PROPERTY = "organization";
40    public const DATE_PUBLISHED_KEY = "datePublished";
41    public const DATE_MODIFIED_KEY = "dateModified";
42
43    public static function createForPage(Page $page): LdJson
44    {
45        return (new LdJson())
46            ->setResource($page);
47    }
48
49    /**
50     * @param array $ldJson
51     * @param Page $page
52     */
53    public static function addImage(array &$ldJson, Page $page)
54    {
55        /**
56         * Image must belong to the page
57         * https://developers.google.com/search/docs/guides/sd-policies#images
58         *
59         * Image may have IPTC metadata: not yet implemented
60         * https://developers.google.com/search/docs/advanced/appearance/image-rights-metadata
61         *
62         * Image must have the supported format
63         * https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats
64         * BMP, GIF, JPEG, PNG, WebP, and SVG
65         */
66        $supportedMime = [
67            Mime::BMP,
68            Mime::GIF,
69            Mime::JPEG,
70            Mime::PNG,
71            Mime::WEBP,
72            Mime::SVG,
73        ];
74        $imagesSet = $page->getImagesOrDefaultForTheFollowingUsages([PageImageUsage::ALL, PageImageUsage::SOCIAL, PageImageUsage::GOOGLE]);
75        $schemaImages = array();
76        foreach ($imagesSet as $image) {
77
78            $mime = $image->getPath()->getMime()->toString();
79            if (in_array($mime, $supportedMime)) {
80                if ($image->exists()) {
81                    $imageObjectSchema = array(
82                        "@type" => "ImageObject",
83                        "url" => $image->getAbsoluteUrl()
84                    );
85                    if (!empty($image->getIntrinsicWidth())) {
86                        $imageObjectSchema["width"] = $image->getIntrinsicWidth();
87                    }
88                    if (!empty($image->getIntrinsicHeight())) {
89                        $imageObjectSchema["height"] = $image->getIntrinsicHeight();
90                    }
91                    $schemaImages[] = $imageObjectSchema;
92                } else {
93                    LogUtility::msg("The image ($image) does not exist and was not added to the google ld-json", LogUtility::LVL_MSG_ERROR, action_plugin_combo_metagoogle::CANONICAL);
94                }
95            }
96        }
97
98        if (!empty($schemaImages)) {
99            $ldJson["image"] = $schemaImages;
100        }
101    }
102
103    public static function getName(): string
104    {
105        return self::PROPERTY_NAME;
106    }
107
108    public function getPersistenceType(): string
109    {
110        return MetadataDokuWikiStore::PERSISTENT_METADATA;
111    }
112
113    public function getCanonical(): string
114    {
115        return action_plugin_combo_metagoogle::CANONICAL;
116    }
117
118
119    public function getDescription(): string
120    {
121        return "Advanced Page metadata definition with the json-ld format";
122    }
123
124    public function getLabel(): string
125    {
126        return "Json-ld";
127    }
128
129    public function getTab(): string
130    {
131        return MetaManagerForm::TAB_TYPE_VALUE;
132    }
133
134
135    public function getMutable(): bool
136    {
137        return true;
138    }
139
140    public function getDefaultValue(): ?string
141    {
142
143        $ldJson = $this->mergeWithDefaultValueAndGet();
144        if ($ldJson === null) {
145            return null;
146        }
147
148        /**
149         * Return
150         */
151        return Json::createFromArray($ldJson)->toPrettyJsonString();
152
153    }
154
155    public function buildFromStoreValue($value): Metadata
156    {
157
158        if ($value === null) {
159            $resourceCombo = $this->getResource();
160            if (($resourceCombo instanceof Page)) {
161                // Deprecated, old organization syntax
162                if ($resourceCombo->getTypeOrDefault() === PageType::ORGANIZATION_TYPE) {
163                    $store = $this->getReadStore();
164                    $metadata = $store->getFromPersistentName( self::OLD_ORGANIZATION_PROPERTY);
165                    if ($metadata !== null) {
166                        $organization = array(
167                            "organization" => $metadata
168                        );
169                        $ldJsonOrganization = $this->mergeWithDefaultValueAndGet($organization);
170                        $value = Json::createFromArray($ldJsonOrganization)->toPrettyJsonString();
171                    }
172
173                }
174            }
175        }
176        parent::buildFromStoreValue($value);
177        return $this;
178
179
180    }
181
182    /**
183     * The ldJson value
184     * @return false|string|null
185     */
186    public function getLdJsonMergedWithDefault()
187    {
188
189        $value = $this->getValue();
190        $actualValueAsArray = null;
191        if ($value !== null) {
192            try {
193                $actualValueAsArray = Json::createFromString($value)->toArray();
194            } catch (ExceptionCombo $e) {
195                LogUtility::msg("The string value is not a valid Json. Value: $value");
196                return $value;
197            }
198        }
199        $actualValueAsArray = $this->mergeWithDefaultValueAndGet($actualValueAsArray);
200        return Json::createFromArray($actualValueAsArray)->toPrettyJsonString();
201    }
202
203
204    private function mergeWithDefaultValueAndGet($actualValue = null): ?array
205    {
206        $page = $this->getResource();
207        if (!($page instanceof Page)) {
208            return $actualValue;
209        }
210
211        $type = $page->getTypeOrDefault();
212        switch (strtolower($type)) {
213            case PageType::WEBSITE_TYPE:
214
215                /**
216                 * https://schema.org/WebSite
217                 * https://developers.google.com/search/docs/data-types/sitelinks-searchbox
218                 */
219                $ldJson = array(
220                    '@context' => 'https://schema.org',
221                    '@type' => 'WebSite',
222                    'url' => Site::getBaseUrl(),
223                    'name' => Site::getTitle()
224                );
225
226                if ($page->isRootHomePage()) {
227
228                    $ldJson['potentialAction'] = array(
229                        '@type' => 'SearchAction',
230                        'target' => Site::getBaseUrl() . DOKU_SCRIPT . '?do=search&amp;id={search_term_string}',
231                        'query-input' => 'required name=search_term_string',
232                    );
233                }
234
235                $tag = Site::getTag();
236                if (!empty($tag)) {
237                    $ldJson['description'] = $tag;
238                }
239                $siteImageUrl = Site::getLogoUrlAsPng();
240                if (!empty($siteImageUrl)) {
241                    $ldJson['image'] = $siteImageUrl;
242                }
243
244                break;
245
246            case PageType::ORGANIZATION_TYPE:
247
248                /**
249                 * Organization + Logo
250                 * https://developers.google.com/search/docs/data-types/logo
251                 */
252                $ldJson = array(
253                    "@context" => "https://schema.org",
254                    "@type" => "Organization",
255                    "url" => Site::getBaseUrl(),
256                    "logo" => Site::getLogoUrlAsPng()
257                );
258
259                break;
260
261            case PageType::ARTICLE_TYPE:
262            case PageType::NEWS_TYPE:
263            case PageType::BLOG_TYPE:
264            case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE:
265            case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE:
266            case PageType::HOME_TYPE:
267            case PageType::WEB_PAGE_TYPE:
268
269                switch (strtolower($type)) {
270                    case PageType::NEWS_TYPE:
271                    case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE:
272                        $schemaType = "NewsArticle";
273                        break;
274                    case PageType::BLOG_TYPE:
275                    case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE:
276                        $schemaType = "BlogPosting";
277                        break;
278                    case PageType::HOME_TYPE:
279                    case PageType::WEB_PAGE_TYPE:
280                        // https://schema.org/WebPage
281                        $schemaType = "WebPage";
282                        break;
283                    case PageType::ARTICLE_TYPE:
284                    default:
285                        $schemaType = "Article";
286                        break;
287
288                }
289                // https://developers.google.com/search/docs/data-types/article
290                // https://schema.org/Article
291
292                // Image (at least 696 pixels wide)
293                // https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats
294                // BMP, GIF, JPEG, PNG, WebP, and SVG.
295
296                // Date should be https://en.wikipedia.org/wiki/ISO_8601
297
298
299                $ldJson = array(
300                    "@context" => "https://schema.org",
301                    "@type" => $schemaType,
302                    'url' => $page->getAbsoluteCanonicalUrl(),
303                    "headline" => $page->getTitleOrDefault(),
304                    self::DATE_PUBLISHED_KEY => $page->getPublishedElseCreationTime()->format(Iso8601Date::getFormat())
305                );
306
307                /**
308                 * Modified Time
309                 */
310                $modifiedTime = $page->getModifiedTimeOrDefault();
311                if ($modifiedTime != null) {
312                    $ldJson[self::DATE_MODIFIED_KEY] = $modifiedTime->format(Iso8601Date::getFormat());
313                };
314
315                /**
316                 * Publisher info
317                 */
318                $publisher = array(
319                    "@type" => "Organization",
320                    "name" => Site::getTitle()
321                );
322                $logoUrlAsPng = Site::getLogoUrlAsPng();
323                if (!empty($logoUrlAsPng)) {
324                    $publisher["logo"] = array(
325                        "@type" => "ImageObject",
326                        "url" => $logoUrlAsPng
327                    );
328                }
329                $ldJson["publisher"] = $publisher;
330
331                self::addImage($ldJson, $page);
332                break;
333
334            case PageType::EVENT_TYPE:
335                // https://developers.google.com/search/docs/advanced/structured-data/event
336                $ldJson = array(
337                    "@context" => "https://schema.org",
338                    "@type" => "Event");
339                $eventName = $page->getName();
340                if (!blank($eventName)) {
341                    $ldJson["name"] = $eventName;
342                } else {
343                    LogUtility::msg("The name metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
344                    return null;
345                }
346                $eventDescription = $page->getDescription();
347                if (blank($eventDescription)) {
348                    LogUtility::msg("The description metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
349                    return null;
350                }
351                $ldJson["description"] = $eventDescription;
352                $startDate = $page->getStartDateAsString();
353                if ($startDate === null) {
354                    LogUtility::msg("The date_start metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
355                    return null;
356                }
357                $ldJson["startDate"] = $page->getStartDateAsString();
358
359                $endDate = $page->getEndDateAsString();
360                if ($endDate === null) {
361                    LogUtility::msg("The date_end metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
362                    return null;
363                }
364                $ldJson["endDate"] = $page->getEndDateAsString();
365
366
367                self::addImage($ldJson, $page);
368                break;
369
370
371            default:
372
373                // May be added manually by the user itself
374                $ldJson = array(
375                    '@context' => 'https://schema.org',
376                    '@type' => $type,
377                    'url' => $page->getAbsoluteCanonicalUrl()
378                );
379                break;
380        }
381
382
383        /**
384         * https://developers.google.com/search/docs/data-types/speakable
385         */
386        $speakableXpath = array();
387        if (!empty($page->getTitleOrDefault())) {
388            $speakableXpath[] = "/html/head/title";
389        }
390        if (!empty($page->getDescription())) {
391            /**
392             * Only the description written otherwise this is not speakable
393             * you can have link and other strangeness
394             */
395            $speakableXpath[] = "/html/head/meta[@name='description']/@content";
396        }
397        $ldJson[self::SPEAKABLE] = array(
398            "@type" => "SpeakableSpecification",
399            "xpath" => $speakableXpath
400        );
401
402        /**
403         * merge with the extra
404         */
405        if ($actualValue !== null) {
406            return array_merge($ldJson, $actualValue);
407        }
408        return $ldJson;
409    }
410
411
412
413}
414