xref: /plugin/combo/action/metagoogle.php (revision dd39a644a4a7ec3e65135baaf48acd5f7f628510)
1<?php
2
3use ComboStrap\Image;
4use ComboStrap\LogUtility;
5use ComboStrap\MetadataUtility;
6use ComboStrap\PluginUtility;
7use ComboStrap\Page;
8use ComboStrap\Site;
9use ComboStrap\StringUtility;
10
11if (!defined('DOKU_INC')) die();
12
13require_once(__DIR__ . '/../class/Site.php');
14
15/**
16 *
17 *
18 * To test locally use ngrok
19 * https://developers.google.com/search/docs/guides/debug#testing-firewalled-pages
20 *
21 *
22 * Ref:
23 * https://developers.google.com/search/docs/guides/intro-structured-data
24 * https://github.com/giterlizzi/dokuwiki-plugin-semantic/blob/master/helper.php
25 * https://json-ld.org/
26 * https://schema.org/docs/documents.html
27 * https://search.google.com/structured-data/testing-tool/u/0/#url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FPacu_jawi
28 */
29class action_plugin_combo_metagoogle extends DokuWiki_Action_Plugin
30{
31
32
33    const CANONICAL = "google";
34    const JSON_LD_PROPERTY = "json-ld";
35    const NEWSARTICLE_SCHEMA_ORG_LOWERCASE = "newsarticle";
36    const BLOGPOSTING_SCHEMA_ORG_LOWERCASE = "blogposting";
37    const DATE_PUBLISHED_KEY = "datePublished";
38    const DATE_MODIFIED_KEY = "dateModified";
39    const SPEAKABLE = "speakable";
40    const PUBLISHER = "publisher";
41
42    function __construct()
43    {
44        // enable direct access to language strings
45        // ie $this->lang
46        $this->setupLocale();
47    }
48
49    public function register(Doku_Event_Handler $controller)
50    {
51        $controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'metaGoogleProcessing', array());
52    }
53
54    /**
55     *
56     * @param $event
57     */
58    function metaGoogleProcessing($event)
59    {
60
61
62        global $ID;
63        if (empty($ID)) {
64            // $ID is null
65            // case on "/lib/exe/mediamanager.php"
66            return;
67        }
68        $page = new Page($ID);
69        if(!$page->existInFs()){
70            return;
71        }
72        /**
73         * No metadata for bars
74         */
75        if ($page->isBar()) {
76            return;
77        }
78
79        $type = $page->getType();
80        if (empty($type)) {
81            return;
82        }
83        switch (strtolower($type)) {
84            case Page::WEBSITE_TYPE:
85
86                /**
87                 * https://schema.org/WebSite
88                 * https://developers.google.com/search/docs/data-types/sitelinks-searchbox
89                 */
90
91                $ldJson = array(
92                    '@context' => 'http://schema.org',
93                    '@type' => 'WebSite',
94                    'url' => Site::getUrl(),
95                    'name' => Site::getTitle()
96                );
97
98                if ($page->isHomePage()) {
99
100                    $ldJson['potentialAction'] = array(
101                        '@type' => 'SearchAction',
102                        'target' => Site::getUrl() . DOKU_SCRIPT . '?do=search&amp;id={search_term_string}',
103                        'query-input' => 'required name=search_term_string',
104                    );
105                }
106
107                $tag = Site::getTag();
108                if (!empty($tag)) {
109                    $ldJson['description'] = $tag;
110                }
111                $siteImageUrl = Site::getLogoUrlAsPng();
112                if (!empty($siteImageUrl)) {
113                    $ldJson['image'] = $siteImageUrl;
114                }
115
116                break;
117
118            case Page::ORGANIZATION_TYPE:
119
120                /**
121                 * Organization + Logo
122                 * https://developers.google.com/search/docs/data-types/logo
123                 */
124                $ldJson = array(
125                    "@context" => "https://schema.org",
126                    "@type" => "Organization",
127                    "url" => Site::getUrl(),
128                    "logo" => Site::getLogoUrlAsPng()
129                );
130
131                break;
132
133            case Page::ARTICLE_TYPE:
134            case Page::NEWS_TYPE:
135            case Page::BLOG_TYPE:
136            case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE:
137            case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE:
138
139                $schemaType = "Article";
140                switch (strtolower($type)) {
141                    case Page::NEWS_TYPE:
142                    case self::NEWSARTICLE_SCHEMA_ORG_LOWERCASE:
143                        $schemaType = "NewsArticle";
144                        break;
145                    case Page::BLOG_TYPE:
146                    case self::BLOGPOSTING_SCHEMA_ORG_LOWERCASE:
147                        $schemaType = "BlogPosting";
148                        break;
149                }
150                // https://developers.google.com/search/docs/data-types/article
151                // https://schema.org/Article
152
153                // Image (at least 696 pixels wide)
154                // https://developers.google.com/search/docs/advanced/guidelines/google-images#supported-image-formats
155                // BMP, GIF, JPEG, PNG, WebP, and SVG.
156
157                // Date should be https://en.wikipedia.org/wiki/ISO_8601
158
159                $ldJson = array(
160                    "@context" => "https://schema.org",
161                    "@type" => $schemaType,
162                    'url' => $page->getCanonicalUrlOrDefault(),
163                    "headline" => $page->getTitleNotEmpty(),
164                    self::DATE_PUBLISHED_KEY => date('c', $page->getPublishedElseCreationTimeStamp()),
165                    self::DATE_MODIFIED_KEY => date('c', $page->getModifiedTimestamp()),
166                );
167
168                /**
169                 * Publisher info
170                 */
171                $publisher = array(
172                    "@type" => "Organization",
173                    "name" => Site::getTitle()
174                );
175                $logoUrlAsPng = Site::getLogoUrlAsPng();
176                if (!empty($logoUrlAsPng)) {
177                    $publisher["logo"] = array(
178                        "@type" => "ImageObject",
179                        "url" => $logoUrlAsPng
180                    );
181                }
182                $ldJson["publisher"] = $publisher;
183
184
185                $imagesSet = $page->getImageSet();
186                $schemaImages = array();
187                foreach ($imagesSet as $imageId) {
188                    $image = new Image($imageId);
189                    if ($image->exists()) {
190                        $imageObjectSchema = array(
191                            "@type" => "ImageObject",
192                            "url" => $image->getUrl()
193                        );
194                        if ($image->isAnalyzable()) {
195                            if (!empty($image->getWidth())) {
196                                $imageObjectSchema["width"] = $image->getWidth();
197                            }
198                            if (!empty($image->getHeight())) {
199                                $imageObjectSchema["height"] = $image->getHeight();
200                            }
201                        }
202                        $schemaImages[] = $imageObjectSchema;
203                    } else {
204                        LogUtility::msg("The image ($imageId) does not exist and was not added to the google ld-json", LogUtility::LVL_MSG_ERROR, self::CANONICAL);
205                    }
206                }
207                if (!empty($schemaImages)) {
208                    $ldJson["image"] = $schemaImages;
209                }
210                break;
211
212            default:
213
214                // May be added manually by the user itself
215                $ldJson = array(
216                    '@context' => 'http://schema.org',
217                    '@type' => $type,
218                    'url' => $page->getCanonicalUrlOrDefault()
219                );
220                break;
221        }
222
223
224        /**
225         * https://developers.google.com/search/docs/data-types/speakable
226         */
227        $speakableXpath = array();
228        if (!empty($page->getTitle())) {
229            $speakableXpath[] = "/html/head/title";
230        }
231        if (!empty($page->getDescription())) {
232            /**
233             * Only the description written otherwise this is not speakable
234             * you can have link and other strangeness
235             */
236            $speakableXpath[] = "/html/head/meta[@name='description']/@content";
237        }
238        $ldJson[self::SPEAKABLE] = array(
239            "@type" => "SpeakableSpecification",
240            "xpath" => $speakableXpath
241        );
242
243        /**
244         * Do we have extra ld-json properties
245         */
246        $extraLdJson = $page->getMetadata(self::JSON_LD_PROPERTY);
247        if (!empty($extraLdJson)) {
248            $ldJson = array_merge($ldJson, $extraLdJson);
249        }
250
251
252        /**
253         * Publish
254         */
255        if (!empty($ldJson)) {
256            $event->data["script"][] = array(
257                "type" => "application/ld+json",
258                "_data" => json_encode($ldJson, JSON_PRETTY_PRINT),
259            );
260        }
261    }
262
263
264}
265