xref: /plugin/combo/action/metagoogle.php (revision 37748cd8654635afbeca80942126742f0f4cc346)
1<?php
2
3use ComboStrap\LogUtility;
4use ComboStrap\Page;
5use ComboStrap\RasterImageLink;
6use ComboStrap\Site;
7
8if (!defined('DOKU_INC')) die();
9
10require_once(__DIR__ . '/../ComboStrap/Site.php');
11
12/**
13 *
14 *
15 * To test locally use ngrok
16 * https://developers.google.com/search/docs/guides/debug#testing-firewalled-pages
17 *
18 *
19 * Ref:
20 * https://developers.google.com/search/docs/guides/intro-structured-data
21 * https://github.com/giterlizzi/dokuwiki-plugin-semantic/blob/master/helper.php
22 * https://json-ld.org/
23 * https://schema.org/docs/documents.html
24 * https://search.google.com/structured-data/testing-tool/u/0/#url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FPacu_jawi
25 */
26class action_plugin_combo_metagoogle extends DokuWiki_Action_Plugin
27{
28
29
30    const CANONICAL = "google";
31    const JSON_LD_META_PROPERTY = "json-ld";
32    const NEWSARTICLE_SCHEMA_ORG_LOWERCASE = "newsarticle";
33    const BLOGPOSTING_SCHEMA_ORG_LOWERCASE = "blogposting";
34    const DATE_PUBLISHED_KEY = "datePublished";
35    const DATE_MODIFIED_KEY = "dateModified";
36    const SPEAKABLE = "speakable";
37    const PUBLISHER = "publisher";
38
39    function __construct()
40    {
41        // enable direct access to language strings
42        // ie $this->lang
43        $this->setupLocale();
44    }
45
46    public function register(Doku_Event_Handler $controller)
47    {
48        $controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'metaGoogleProcessing', array());
49    }
50
51    /**
52     *
53     * @param $event
54     */
55    function metaGoogleProcessing($event)
56    {
57
58
59        global $ID;
60        if (empty($ID)) {
61            // $ID is null
62            // case on "/lib/exe/mediamanager.php"
63            return;
64        }
65        $page = Page::createPageFromId($ID);
66        if (!$page->exists()) {
67            return;
68        }
69
70        /**
71         * No metadata for bars
72         */
73        if ($page->isSlot()) {
74            return;
75        }
76
77        $type = $page->getType();
78        if (empty($type)) {
79            return;
80        }
81        switch (strtolower($type)) {
82            case Page::WEBSITE_TYPE:
83
84                /**
85                 * https://schema.org/WebSite
86                 * https://developers.google.com/search/docs/data-types/sitelinks-searchbox
87                 */
88
89                $ldJson = array(
90                    '@context' => 'http://schema.org',
91                    '@type' => 'WebSite',
92                    'url' => Site::getUrl(),
93                    'name' => Site::getTitle()
94                );
95
96                if ($page->isHomePage()) {
97
98                    $ldJson['potentialAction'] = array(
99                        '@type' => 'SearchAction',
100                        'target' => Site::getUrl() . DOKU_SCRIPT . '?do=search&amp;id={search_term_string}',
101                        'query-input' => 'required name=search_term_string',
102                    );
103                }
104
105                $tag = Site::getTag();
106                if (!empty($tag)) {
107                    $ldJson['description'] = $tag;
108                }
109                $siteImageUrl = Site::getLogoUrlAsPng();
110                if (!empty($siteImageUrl)) {
111                    $ldJson['image'] = $siteImageUrl;
112                }
113
114                break;
115
116            case Page::ORGANIZATION_TYPE:
117
118                /**
119                 * Organization + Logo
120                 * https://developers.google.com/search/docs/data-types/logo
121                 */
122                $ldJson = array(
123                    "@context" => "https://schema.org",
124                    "@type" => "Organization",
125                    "url" => Site::getUrl(),
126                    "logo" => Site::getLogoUrlAsPng()
127                );
128
129                break;
130
131            case Page::ARTICLE_TYPE:
132            case Page::NEWS_TYPE:
133            case Page::BLOG_TYPE:
134            case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE:
135            case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE:
136
137                $schemaType = "Article";
138                switch (strtolower($type)) {
139                    case Page::NEWS_TYPE:
140                    case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE:
141                        $schemaType = "NewsArticle";
142                        break;
143                    case Page::BLOG_TYPE:
144                    case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE:
145                        $schemaType = "BlogPosting";
146                        break;
147                }
148                // https://developers.google.com/search/docs/data-types/article
149                // https://schema.org/Article
150
151                // Image (at least 696 pixels wide)
152                // https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats
153                // BMP, GIF, JPEG, PNG, WebP, and SVG.
154
155                // Date should be https://en.wikipedia.org/wiki/ISO_8601
156
157
158                $ldJson = array(
159                    "@context" => "https://schema.org",
160                    "@type" => $schemaType,
161                    'url' => $page->getCanonicalUrlOrDefault(),
162                    "headline" => $page->getTitleNotEmpty(),
163                    self::DATE_PUBLISHED_KEY => $page->getPublishedElseCreationTime()->format(DATE_ISO8601)
164                );
165
166                /**
167                 * Modified Time
168                 */
169                $modifiedTime = $page->getModifiedTime();
170                if ($modifiedTime != null) {
171                    $ldJson[self::DATE_MODIFIED_KEY] = $modifiedTime->format(DATE_ISO8601);
172                };
173
174                /**
175                 * Publisher info
176                 */
177                $publisher = array(
178                    "@type" => "Organization",
179                    "name" => Site::getTitle()
180                );
181                $logoUrlAsPng = Site::getLogoUrlAsPng();
182                if (!empty($logoUrlAsPng)) {
183                    $publisher["logo"] = array(
184                        "@type" => "ImageObject",
185                        "url" => $logoUrlAsPng
186                    );
187                }
188                $ldJson["publisher"] = $publisher;
189
190                /**
191                 * Image must belong to the page
192                 * https://developers.google.com/search/docs/guides/sd-policies#images
193                 *
194                 * Image may have IPTC metadata: not yet implemented
195                 * https://developers.google.com/search/docs/advanced/appearance/image-rights-metadata
196                 *
197                 * Image must have the supported format
198                 * https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats
199                 * BMP, GIF, JPEG, PNG, WebP, and SVG
200                 */
201                $supportedMime = [
202                    "image/bmp",
203                    "image/gif",
204                    "image/jpeg",
205                    "image/png",
206                    "image/webp",
207                    "image/svg+xml",
208                ];
209                $imagesSet = $page->getLocalImageSet();
210                $schemaImages = array();
211                foreach ($imagesSet as $image) {
212
213                    $mime = $image->getMime();
214                    if (in_array($mime, $supportedMime)) {
215                        if ($image->exists()) {
216                            $imageObjectSchema = array(
217                                "@type" => "ImageObject",
218                                "url" => $image->getAbsoluteUrl()
219                            );
220                            if ($image instanceof RasterImageLink) {
221                                if ($image->isAnalyzable()) {
222                                    if (!empty($image->getMediaWidth())) {
223                                        $imageObjectSchema["width"] = $image->getMediaWidth();
224                                    }
225                                    if (!empty($image->getMediaHeight())) {
226                                        $imageObjectSchema["height"] = $image->getMediaHeight();
227                                    }
228                                }
229                            }
230                            $schemaImages[] = $imageObjectSchema;
231                        } else {
232                            LogUtility::msg("The image ($image) does not exist and was not added to the google ld-json", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
233                        }
234                    }
235                }
236
237                if (!empty($schemaImages)) {
238                    $ldJson["image"] = $schemaImages;
239                }
240                break;
241
242            default:
243
244                // May be added manually by the user itself
245                $ldJson = array(
246                    '@context' => 'http://schema.org',
247                    '@type' => $type,
248                    'url' => $page->getCanonicalUrlOrDefault()
249                );
250                break;
251        }
252
253
254        /**
255         * https://developers.google.com/search/docs/data-types/speakable
256         */
257        $speakableXpath = array();
258        if (!empty($page->getTitle())) {
259            $speakableXpath[] = "/html/head/title";
260        }
261        if (!empty($page->getDescription())) {
262            /**
263             * Only the description written otherwise this is not speakable
264             * you can have link and other strangeness
265             */
266            $speakableXpath[] = "/html/head/meta[@name='description']/@content";
267        }
268        $ldJson[self::SPEAKABLE] = array(
269            "@type" => "SpeakableSpecification",
270            "xpath" => $speakableXpath
271        );
272
273        /**
274         * Do we have extra ld-json properties
275         */
276        $extraLdJson = $page->getMetadata(self::JSON_LD_META_PROPERTY);
277        if (!empty($extraLdJson)) {
278            $ldJson = array_merge($ldJson, $extraLdJson);
279        }
280
281
282        /**
283         * Publish
284         */
285        if (!empty($ldJson)) {
286            $jsonEncode = json_encode($ldJson, JSON_PRETTY_PRINT);
287            $event->data["script"][] = array(
288                "type" => "application/ld+json",
289                "_data" => $jsonEncode,
290            );
291        }
292    }
293
294
295}
296