xref: /plugin/combo/action/metagoogle.php (revision 21913ab3235d516e2fa19c7e3929b555b3a2bda1)
1<?php
2
3use ComboStrap\LogUtility;
4use ComboStrap\Page;
5use ComboStrap\RasterImageLink;
6use ComboStrap\Site;
7
8if (!defined('DOKU_INC')) die();
9
10require_once(__DIR__ . '/../class/Site.php');
11
12/**
13 *
14 *
15 * To test locally use ngrok
16 * https://developers.google.com/search/docs/guides/debug#testing-firewalled-pages
17 *
18 *
19 * Ref:
20 * https://developers.google.com/search/docs/guides/intro-structured-data
21 * https://github.com/giterlizzi/dokuwiki-plugin-semantic/blob/master/helper.php
22 * https://json-ld.org/
23 * https://schema.org/docs/documents.html
24 * https://search.google.com/structured-data/testing-tool/u/0/#url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FPacu_jawi
25 */
26class action_plugin_combo_metagoogle extends DokuWiki_Action_Plugin
27{
28
29
30    const CANONICAL = "google";
31    const JSON_LD_PROPERTY = "json-ld";
32    const NEWSARTICLE_SCHEMA_ORG_LOWERCASE = "newsarticle";
33    const BLOGPOSTING_SCHEMA_ORG_LOWERCASE = "blogposting";
34    const DATE_PUBLISHED_KEY = "datePublished";
35    const DATE_MODIFIED_KEY = "dateModified";
36    const SPEAKABLE = "speakable";
37    const PUBLISHER = "publisher";
38
39    function __construct()
40    {
41        // enable direct access to language strings
42        // ie $this->lang
43        $this->setupLocale();
44    }
45
46    public function register(Doku_Event_Handler $controller)
47    {
48        $controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'metaGoogleProcessing', array());
49    }
50
51    /**
52     *
53     * @param $event
54     */
55    function metaGoogleProcessing($event)
56    {
57
58
59        global $ID;
60        if (empty($ID)) {
61            // $ID is null
62            // case on "/lib/exe/mediamanager.php"
63            return;
64        }
65        $page = new Page($ID);
66        if (!$page->existInFs()) {
67            return;
68        }
69
70        /**
71         * No metadata for bars
72         */
73        if ($page->isBar()) {
74            return;
75        }
76
77        $type = $page->getType();
78        if (empty($type)) {
79            return;
80        }
81        switch (strtolower($type)) {
82            case Page::WEBSITE_TYPE:
83
84                /**
85                 * https://schema.org/WebSite
86                 * https://developers.google.com/search/docs/data-types/sitelinks-searchbox
87                 */
88
89                $ldJson = array(
90                    '@context' => 'http://schema.org',
91                    '@type' => 'WebSite',
92                    'url' => Site::getUrl(),
93                    'name' => Site::getTitle()
94                );
95
96                if ($page->isHomePage()) {
97
98                    $ldJson['potentialAction'] = array(
99                        '@type' => 'SearchAction',
100                        'target' => Site::getUrl() . DOKU_SCRIPT . '?do=search&amp;id={search_term_string}',
101                        'query-input' => 'required name=search_term_string',
102                    );
103                }
104
105                $tag = Site::getTag();
106                if (!empty($tag)) {
107                    $ldJson['description'] = $tag;
108                }
109                $siteImageUrl = Site::getLogoUrlAsPng();
110                if (!empty($siteImageUrl)) {
111                    $ldJson['image'] = $siteImageUrl;
112                }
113
114                break;
115
116            case Page::ORGANIZATION_TYPE:
117
118                /**
119                 * Organization + Logo
120                 * https://developers.google.com/search/docs/data-types/logo
121                 */
122                $ldJson = array(
123                    "@context" => "https://schema.org",
124                    "@type" => "Organization",
125                    "url" => Site::getUrl(),
126                    "logo" => Site::getLogoUrlAsPng()
127                );
128
129                break;
130
131            case Page::ARTICLE_TYPE:
132            case Page::NEWS_TYPE:
133            case Page::BLOG_TYPE:
134            case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE:
135            case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE:
136
137                $schemaType = "Article";
138                switch (strtolower($type)) {
139                    case Page::NEWS_TYPE:
140                    case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE:
141                        $schemaType = "NewsArticle";
142                        break;
143                    case Page::BLOG_TYPE:
144                    case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE:
145                        $schemaType = "BlogPosting";
146                        break;
147                }
148                // https://developers.google.com/search/docs/data-types/article
149                // https://schema.org/Article
150
151                // Image (at least 696 pixels wide)
152                // https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats
153                // BMP, GIF, JPEG, PNG, WebP, and SVG.
154
155                // Date should be https://en.wikipedia.org/wiki/ISO_8601
156
157                $ldJson = array(
158                    "@context" => "https://schema.org",
159                    "@type" => $schemaType,
160                    'url' => $page->getCanonicalUrlOrDefault(),
161                    "headline" => $page->getTitleNotEmpty(),
162                    self::DATE_PUBLISHED_KEY => date('c', $page->getPublishedElseCreationTimeStamp()),
163                    self::DATE_MODIFIED_KEY => date('c', $page->getModifiedTimestamp()),
164                );
165
166                /**
167                 * Publisher info
168                 */
169                $publisher = array(
170                    "@type" => "Organization",
171                    "name" => Site::getTitle()
172                );
173                $logoUrlAsPng = Site::getLogoUrlAsPng();
174                if (!empty($logoUrlAsPng)) {
175                    $publisher["logo"] = array(
176                        "@type" => "ImageObject",
177                        "url" => $logoUrlAsPng
178                    );
179                }
180                $ldJson["publisher"] = $publisher;
181
182                /**
183                 * Image must belong to the page
184                 * https://developers.google.com/search/docs/guides/sd-policies#images
185                 *
186                 * Image may have IPTC metadata: not yet implemented
187                 * https://developers.google.com/search/docs/advanced/appearance/image-rights-metadata
188                 *
189                 * Image must have the supported format
190                 * https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats
191                 * BMP, GIF, JPEG, PNG, WebP, and SVG
192                 */
193                $supportedMime = [
194                    "image/bmp",
195                    "image/gif",
196                    "image/jpeg",
197                    "image/png",
198                    "image/webp",
199                    "image/svg+xml",
200                ];
201                $imagesSet = $page->getImageSet();
202                $schemaImages = array();
203                foreach ($imagesSet as $image) {
204
205                    $mime = $image->getMime();
206                    if (in_array($mime, $supportedMime)) {
207                        if ($image->exists()) {
208                            $imageObjectSchema = array(
209                                "@type" => "ImageObject",
210                                "url" => $image->getAbsoluteUrl()
211                            );
212                            if ($image instanceof RasterImageLink) {
213                                if ($image->isAnalyzable()) {
214                                    if (!empty($image->getMediaWidth())) {
215                                        $imageObjectSchema["width"] = $image->getMediaWidth();
216                                    }
217                                    if (!empty($image->getMediaHeight())) {
218                                        $imageObjectSchema["height"] = $image->getMediaHeight();
219                                    }
220                                }
221                            }
222                            $schemaImages[] = $imageObjectSchema;
223                        } else {
224                            LogUtility::msg("The image ($image) does not exist and was not added to the google ld-json", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
225                        }
226                    }
227                }
228
229                if (!empty($schemaImages)) {
230                    $ldJson["image"] = $schemaImages;
231                }
232                break;
233
234            default:
235
236                // May be added manually by the user itself
237                $ldJson = array(
238                    '@context' => 'http://schema.org',
239                    '@type' => $type,
240                    'url' => $page->getCanonicalUrlOrDefault()
241                );
242                break;
243        }
244
245
246        /**
247         * https://developers.google.com/search/docs/data-types/speakable
248         */
249        $speakableXpath = array();
250        if (!empty($page->getTitle())) {
251            $speakableXpath[] = "/html/head/title";
252        }
253        if (!empty($page->getDescription())) {
254            /**
255             * Only the description written otherwise this is not speakable
256             * you can have link and other strangeness
257             */
258            $speakableXpath[] = "/html/head/meta[@name='description']/@content";
259        }
260        $ldJson[self::SPEAKABLE] = array(
261            "@type" => "SpeakableSpecification",
262            "xpath" => $speakableXpath
263        );
264
265        /**
266         * Do we have extra ld-json properties
267         */
268        $extraLdJson = $page->getMetadata(self::JSON_LD_PROPERTY);
269        if (!empty($extraLdJson)) {
270            $ldJson = array_merge($ldJson, $extraLdJson);
271        }
272
273
274        /**
275         * Publish
276         */
277        if (!empty($ldJson)) {
278            $event->data["script"][] = array(
279                "type" => "application/ld+json",
280                "_data" => json_encode($ldJson, JSON_PRETTY_PRINT),
281            );
282        }
283    }
284
285
286}
287