xref: /plugin/combo/action/metagoogle.php (revision 1fa8c418ed5809db58049141be41b7738471dd32)
1<?php
2
3use ComboStrap\Image;
4use ComboStrap\Iso8601Date;
5use ComboStrap\LogUtility;
6use ComboStrap\Page;
7use ComboStrap\RasterImageLink;
8use ComboStrap\Site;
9
10if (!defined('DOKU_INC')) die();
11
12require_once(__DIR__ . '/../ComboStrap/Site.php');
13
14/**
15 *
16 *
17 * To test locally use ngrok
18 * https://developers.google.com/search/docs/guides/debug#testing-firewalled-pages
19 *
20 * Tool:
21 * https://support.google.com/webmasters/answer/2774099# - Data Highlighter
22 * to tag page manually (you see well what kind of information they need)
23 *
24 * Ref:
25 * https://developers.google.com/search/docs/guides/intro-structured-data
26 * https://github.com/giterlizzi/dokuwiki-plugin-semantic/blob/master/helper.php
27 * https://json-ld.org/
28 * https://schema.org/docs/documents.html
29 * https://search.google.com/structured-data/testing-tool/u/0/#url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FPacu_jawi
30 */
31class action_plugin_combo_metagoogle extends DokuWiki_Action_Plugin
32{
33
34
35    const CANONICAL = "google";
36    const JSON_LD_META_PROPERTY = "json-ld";
37    const NEWSARTICLE_SCHEMA_ORG_LOWERCASE = "newsarticle";
38    const BLOGPOSTING_SCHEMA_ORG_LOWERCASE = "blogposting";
39    const DATE_PUBLISHED_KEY = "datePublished";
40    const DATE_MODIFIED_KEY = "dateModified";
41    const SPEAKABLE = "speakable";
42    const PUBLISHER = "publisher";
43
44    function __construct()
45    {
46        // enable direct access to language strings
47        // ie $this->lang
48        $this->setupLocale();
49    }
50
51    private static function addImage(array &$ldJson, $page)
52    {
53        /**
54         * Image must belong to the page
55         * https://developers.google.com/search/docs/guides/sd-policies#images
56         *
57         * Image may have IPTC metadata: not yet implemented
58         * https://developers.google.com/search/docs/advanced/appearance/image-rights-metadata
59         *
60         * Image must have the supported format
61         * https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats
62         * BMP, GIF, JPEG, PNG, WebP, and SVG
63         */
64        $supportedMime = [
65            "image/bmp",
66            "image/gif",
67            "image/jpeg",
68            "image/png",
69            "image/webp",
70            "image/svg+xml",
71        ];
72        $imagesSet = $page->getLocalImageSet();
73        $schemaImages = array();
74        foreach ($imagesSet as $image) {
75
76            $mime = $image->getMime();
77            if (in_array($mime, $supportedMime)) {
78                if ($image->exists()) {
79                    $imageObjectSchema = array(
80                        "@type" => "ImageObject",
81                        "url" => $image->getAbsoluteUrl()
82                    );
83                    if (!empty($image->getIntrinsicWidth())) {
84                        $imageObjectSchema["width"] = $image->getIntrinsicWidth();
85                    }
86                    if (!empty($image->getIntrinsicHeight())) {
87                        $imageObjectSchema["height"] = $image->getIntrinsicHeight();
88                    }
89                    $schemaImages[] = $imageObjectSchema;
90                } else {
91                    LogUtility::msg("The image ($image) does not exist and was not added to the google ld-json", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
92                }
93            }
94        }
95
96        if (!empty($schemaImages)) {
97            $ldJson["image"] = $schemaImages;
98        }
99    }
100
101    public function register(Doku_Event_Handler $controller)
102    {
103        $controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'metaGoogleProcessing', array());
104    }
105
106    /**
107     *
108     * @param $event
109     */
110    function metaGoogleProcessing($event)
111    {
112
113
114        global $ID;
115        if (empty($ID)) {
116            // $ID is null
117            // case on "/lib/exe/mediamanager.php"
118            return;
119        }
120        $page = Page::createPageFromId($ID);
121        if (!$page->exists()) {
122            return;
123        }
124
125        /**
126         * No metadata for bars
127         */
128        if ($page->isSlot()) {
129            return;
130        }
131
132        $type = $page->getType();
133        if (empty($type)) {
134            return;
135        }
136        switch (strtolower($type)) {
137            case Page::WEBSITE_TYPE:
138
139                /**
140                 * https://schema.org/WebSite
141                 * https://developers.google.com/search/docs/data-types/sitelinks-searchbox
142                 */
143
144                $ldJson = array(
145                    '@context' => 'http://schema.org',
146                    '@type' => 'WebSite',
147                    'url' => Site::getUrl(),
148                    'name' => Site::getTitle()
149                );
150
151                if ($page->isHomePage()) {
152
153                    $ldJson['potentialAction'] = array(
154                        '@type' => 'SearchAction',
155                        'target' => Site::getUrl() . DOKU_SCRIPT . '?do=search&amp;id={search_term_string}',
156                        'query-input' => 'required name=search_term_string',
157                    );
158                }
159
160                $tag = Site::getTag();
161                if (!empty($tag)) {
162                    $ldJson['description'] = $tag;
163                }
164                $siteImageUrl = Site::getLogoUrlAsPng();
165                if (!empty($siteImageUrl)) {
166                    $ldJson['image'] = $siteImageUrl;
167                }
168
169                break;
170
171            case Page::ORGANIZATION_TYPE:
172
173                /**
174                 * Organization + Logo
175                 * https://developers.google.com/search/docs/data-types/logo
176                 */
177                $ldJson = array(
178                    "@context" => "https://schema.org",
179                    "@type" => "Organization",
180                    "url" => Site::getUrl(),
181                    "logo" => Site::getLogoUrlAsPng()
182                );
183
184                break;
185
186            case Page::ARTICLE_TYPE:
187            case Page::NEWS_TYPE:
188            case Page::BLOG_TYPE:
189            case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE:
190            case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE:
191
192                $schemaType = "Article";
193                switch (strtolower($type)) {
194                    case Page::NEWS_TYPE:
195                    case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE:
196                        $schemaType = "NewsArticle";
197                        break;
198                    case Page::BLOG_TYPE:
199                    case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE:
200                        $schemaType = "BlogPosting";
201                        break;
202                }
203                // https://developers.google.com/search/docs/data-types/article
204                // https://schema.org/Article
205
206                // Image (at least 696 pixels wide)
207                // https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats
208                // BMP, GIF, JPEG, PNG, WebP, and SVG.
209
210                // Date should be https://en.wikipedia.org/wiki/ISO_8601
211
212
213                $ldJson = array(
214                    "@context" => "https://schema.org",
215                    "@type" => $schemaType,
216                    'url' => $page->getCanonicalUrlOrDefault(),
217                    "headline" => $page->getTitleNotEmpty(),
218                    self::DATE_PUBLISHED_KEY => $page->getPublishedElseCreationTime()->format(Iso8601Date::getFormat())
219                );
220
221                /**
222                 * Modified Time
223                 */
224                $modifiedTime = $page->getModifiedTime();
225                if ($modifiedTime != null) {
226                    $ldJson[self::DATE_MODIFIED_KEY] = $modifiedTime->format(Iso8601Date::getFormat());
227                };
228
229                /**
230                 * Publisher info
231                 */
232                $publisher = array(
233                    "@type" => "Organization",
234                    "name" => Site::getTitle()
235                );
236                $logoUrlAsPng = Site::getLogoUrlAsPng();
237                if (!empty($logoUrlAsPng)) {
238                    $publisher["logo"] = array(
239                        "@type" => "ImageObject",
240                        "url" => $logoUrlAsPng
241                    );
242                }
243                $ldJson["publisher"] = $publisher;
244
245                self::addImage($ldJson, $page);
246                break;
247
248            case PAGE::EVENT_TYPE:
249                // https://developers.google.com/search/docs/advanced/structured-data/event
250                $ldJson = array(
251                    "@context" => "https://schema.org",
252                    "@type" => "Event");
253                $eventName = $page->getPageName();
254                if (!blank($eventName)) {
255                    $ldJson["name"] = $eventName;
256                } else {
257                    LogUtility::msg("The name metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
258                    return;
259                }
260                $eventDescription = $page->getDescription();
261                if (blank($eventDescription)) {
262                    LogUtility::msg("The description metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
263                    return;
264                }
265                $ldJson["description"] = $eventDescription;
266                $startDate = $page->getStartDateAsString();
267                if($startDate===null){
268                    LogUtility::msg("The date_start metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
269                    return;
270                }
271                $ldJson["startDate"] = $page->getStartDateAsString();
272
273                $endDate = $page->getEndDateAsString();
274                if($endDate===null){
275                    LogUtility::msg("The date_end metadata is mandatory for a event page", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
276                    return;
277                }
278                $ldJson["endDate"] = $page->getEndDateAsString();
279
280
281                self::addImage($ldJson, $page);
282                break;
283
284            default:
285
286                // May be added manually by the user itself
287                $ldJson = array(
288                    '@context' => 'http://schema.org',
289                    '@type' => $type,
290                    'url' => $page->getCanonicalUrlOrDefault()
291                );
292                break;
293        }
294
295
296        /**
297         * https://developers.google.com/search/docs/data-types/speakable
298         */
299        $speakableXpath = array();
300        if (!empty($page->getTitle())) {
301            $speakableXpath[] = "/html/head/title";
302        }
303        if (!empty($page->getDescription())) {
304            /**
305             * Only the description written otherwise this is not speakable
306             * you can have link and other strangeness
307             */
308            $speakableXpath[] = "/html/head/meta[@name='description']/@content";
309        }
310        $ldJson[self::SPEAKABLE] = array(
311            "@type" => "SpeakableSpecification",
312            "xpath" => $speakableXpath
313        );
314
315        /**
316         * Do we have extra ld-json properties
317         */
318        $extraLdJson = $page->getMetadata(self::JSON_LD_META_PROPERTY);
319        if (!empty($extraLdJson)) {
320            $ldJson = array_merge($ldJson, $extraLdJson);
321        }
322
323
324        /**
325         * Publish
326         */
327        if (!empty($ldJson)) {
328            $jsonEncode = json_encode($ldJson, JSON_PRETTY_PRINT);
329            $event->data["script"][] = array(
330                "type" => "application/ld+json",
331                "_data" => $jsonEncode,
332            );
333        }
334    }
335
336
337}
338