xref: /plugin/combo/action/metagoogle.php (revision 5f891b7e09648e05e78f5882f3fdde1e9df9b0f1)
1<?php
2
3use ComboStrap\Image;
4use ComboStrap\LogUtility;
5use ComboStrap\MetadataUtility;
6use ComboStrap\PluginUtility;
7use ComboStrap\Page;
8use ComboStrap\Site;
9use ComboStrap\StringUtility;
10
11if (!defined('DOKU_INC')) die();
12
13require_once(__DIR__ . '/../class/Site.php');
14
15/**
16 *
17 *
18 * To test locally use ngrok
19 * https://developers.google.com/search/docs/guides/debug#testing-firewalled-pages
20 *
21 *
22 * Ref:
23 * https://developers.google.com/search/docs/guides/intro-structured-data
24 * https://github.com/giterlizzi/dokuwiki-plugin-semantic/blob/master/helper.php
25 * https://json-ld.org/
26 * https://schema.org/docs/documents.html
27 * https://search.google.com/structured-data/testing-tool/u/0/#url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FPacu_jawi
28 */
29class action_plugin_combo_metagoogle extends DokuWiki_Action_Plugin
30{
31
32
33    const CANONICAL = "google";
34    const JSON_LD_PROPERTY = "json-ld";
35    const NEWSARTICLE_SCHEMA_ORG_LOWERCASE = "newsarticle";
36    const BLOGPOSTING_SCHEMA_ORG_LOWERCASE = "blogposting";
37    const DATE_PUBLISHED_KEY = "datePublished";
38    const DATE_MODIFIED_KEY = "dateModified";
39    const SPEAKABLE = "speakable";
40    const PUBLISHER = "publisher";
41
42    function __construct()
43    {
44        // enable direct access to language strings
45        // ie $this->lang
46        $this->setupLocale();
47    }
48
49    public function register(Doku_Event_Handler $controller)
50    {
51        $controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'metaGoogleProcessing', array());
52    }
53
54    /**
55     *
56     * @param $event
57     */
58    function metaGoogleProcessing($event)
59    {
60
61
62        global $ID;
63        if (empty($ID)) {
64            // $ID is null
65            // case on "/lib/exe/mediamanager.php"
66            return;
67        }
68        $page = new Page($ID);
69
70        /**
71         * No metadata for bars
72         */
73        if ($page->isBar()) {
74            return;
75        }
76
77        $type = $page->getType();
78        if (empty($type)) {
79            return;
80        }
81        switch (strtolower($type)) {
82            case Page::WEBSITE_TYPE:
83
84                /**
85                 * https://schema.org/WebSite
86                 * https://developers.google.com/search/docs/data-types/sitelinks-searchbox
87                 */
88
89                $ldJson = array(
90                    '@context' => 'http://schema.org',
91                    '@type' => 'WebSite',
92                    'url' => Site::getUrl(),
93                    'name' => Site::getTitle()
94                );
95
96                if ($page->isHomePage()) {
97
98                    $ldJson['potentialAction'] = array(
99                        '@type' => 'SearchAction',
100                        'target' => Site::getUrl() . DOKU_SCRIPT . '?do=search&amp;id={search_term_string}',
101                        'query-input' => 'required name=search_term_string',
102                    );
103                }
104
105                $tag = Site::getTag();
106                if (!empty($tag)) {
107                    $ldJson['description'] = $tag;
108                }
109                $siteImageUrl = Site::getLogoUrlAsPng();
110                if (!empty($siteImageUrl)) {
111                    $ldJson['image'] = $siteImageUrl;
112                }
113
114                break;
115
116            case Page::ORGANIZATION_TYPE:
117
118                /**
119                 * Organization + Logo
120                 * https://developers.google.com/search/docs/data-types/logo
121                 */
122                $ldJson = array(
123                    "@context" => "https://schema.org",
124                    "@type" => "Organization",
125                    "url" => Site::getUrl(),
126                    "logo" => Site::getLogoUrlAsPng()
127                );
128
129                break;
130
131            case Page::ARTICLE_TYPE:
132            case Page::NEWS_TYPE:
133            case Page::BLOG_TYPE:
134            case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE:
135            case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE:
136
137                $schemaType = "Article";
138                switch (strtolower($type)) {
139                    case Page::NEWS_TYPE:
140                    case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE:
141                        $schemaType = "NewsArticle";
142                        break;
143                    case Page::BLOG_TYPE:
144                    case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE:
145                        $schemaType = "BlogPosting";
146                        break;
147                }
148                // https://developers.google.com/search/docs/data-types/article
149                // https://schema.org/Article
150
151                // Image (at least 696 pixels wide)
152                // https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats
153                // BMP, GIF, JPEG, PNG, WebP, and SVG.
154
155                // Date should be https://en.wikipedia.org/wiki/ISO_8601
156
157                $ldJson = array(
158                    "@context" => "https://schema.org",
159                    "@type" => $schemaType,
160                    'url' => $page->getCanonicalUrlOrDefault(),
161                    "headline" => $page->getTitleNotEmpty(),
162                    self::DATE_PUBLISHED_KEY => date('c', $page->getPublishedElseCreationTimeStamp()),
163                    self::DATE_MODIFIED_KEY => date('c', $page->getModifiedTimestamp()),
164                );
165
166                /**
167                 * Publisher info
168                 */
169                $publisher = array(
170                    "@type" => "Organization",
171                    "name" => Site::getTitle()
172                );
173                $logoUrlAsPng = Site::getLogoUrlAsPng();
174                if (!empty($logoUrlAsPng)) {
175                    $publisher["logo"] = array(
176                        "@type" => "ImageObject",
177                        "url" => $logoUrlAsPng
178                    );
179                }
180                $ldJson["publisher"] = $publisher;
181
182
183                $imagesSet = $page->getImageSet();
184                $schemaImages = array();
185                foreach ($imagesSet as $imageId) {
186                    $image = new Image($imageId);
187                    if ($image->exists()) {
188                        $imageObjectSchema = array(
189                            "@type" => "ImageObject",
190                            "url" => $image->getUrl()
191                        );
192                        if ($image->isAnalyzable()) {
193                            if (!empty($image->getWidth())) {
194                                $imageObjectSchema["width"] = $image->getWidth();
195                            }
196                            if (!empty($image->getHeight())) {
197                                $imageObjectSchema["height"] = $image->getHeight();
198                            }
199                        }
200                        $schemaImages[] = $imageObjectSchema;
201                    } else {
202                        LogUtility::msg("The image ($imageId) does not exist and was not added to the google ld-json", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
203                    }
204                }
205                if (!empty($schemaImages)) {
206                    $ldJson["image"] = $schemaImages;
207                }
208                break;
209
210            default:
211
212                // May be added manually by the user itself
213                $ldJson = array(
214                    '@context' => 'http://schema.org',
215                    '@type' => $type,
216                    'url' => $page->getCanonicalUrlOrDefault()
217                );
218                break;
219        }
220
221
222        /**
223         * https://developers.google.com/search/docs/data-types/speakable
224         */
225        $speakableXpath = array();
226        if (!empty($page->getTitle())) {
227            $speakableXpath[] = "/html/head/title";
228        }
229        if (!empty($page->getDescription())) {
230            /**
231             * Only the description written otherwise this is not speakable
232             * you can have link and other strangeness
233             */
234            $speakableXpath[] = "/html/head/meta[@name='description']/@content";
235        }
236        $ldJson[self::SPEAKABLE] = array(
237            "@type" => "SpeakableSpecification",
238            "xpath" => $speakableXpath
239        );
240
241        /**
242         * Do we have extra ld-json properties
243         */
244        $extraLdJson = $page->getMetadata(self::JSON_LD_PROPERTY);
245        if (!empty($extraLdJson)) {
246            $ldJson = array_merge($ldJson, $extraLdJson);
247        }
248
249
250        /**
251         * Publish
252         */
253        if (!empty($ldJson)) {
254            $event->data["script"][] = array(
255                "type" => "application/ld+json",
256                "_data" => json_encode($ldJson, JSON_PRETTY_PRINT),
257            );
258        }
259    }
260
261
262}
263