1<?php
2/**
3 * This is the Dokuwiki export for FINDOLOGIC.
4 * If any bugs occur, please submit a new issue
5 * @see https://github.com/findologic/dokuwiki-plugin-findologic-xml-export/issues/new
6 * @author Dominik Brader <support@findologic.com>
7 */
8
9if (!defined('DOKU_INC')) {
10    define('DOKU_INC', realpath(dirname(__FILE__) . '/../../../') . '/');
11}
12
13require_once(DOKU_INC . 'inc/init.php');
14require_once(__DIR__ . '/PageGetter.php');
15require(__DIR__ . '/vendor/autoload.php');
16
17use FINDOLOGIC\Export\Exporter;
18use FINDOLOGIC\Export\Data\Ordernumber;
19use FINDOLOGIC\Export\Data\Attribute;
20use FINDOLOGIC\Export\Data\Keyword;
21
22class DokuwikiXMLExport
23{
24    /**
25     * Default value for a price. DokuWiki pages do not have a price and this is just a placeholder.
26     * FINDOLOGIC requires the price attribute, so this is the reason why it is exported.
27     */
28    const PRICE_PLACEHOLDER = 0.0;
29
30    /**
31     * This value is needed to tell FINDOLOGIC this is a category.
32     */
33    const CATEGORY_KEY = 'cat';
34
35    /**
36     * Delimiter for category depth.
37     */
38    const CATEGORY_DELIMITER = '_';
39
40    /**
41     * In the DokuWiki, the Keyword seperator is a space.
42     * To be able to have tags for multiple words, add an '_'
43     */
44    const KEYWORD_SPACE = '_';
45
46    /**
47     * DokuWiki saves keywords/tags in the subject of the page.
48     * The subject is an array with all keywords/tags from the page in it.
49     */
50    const KEYWORD_KEY = 'subject';
51
52    /**
53     * The default usergroup is an empty string.
54     */
55    const DEFAULT_USERGROUP = '';
56
57    /**
58     * @var array $conf DokuWiki configuration.
59     */
60    protected $conf;
61
62    /**
63     * @var array $pages All pageIds.
64     */
65    protected $pages;
66
67    /**
68     * DokuwikiXMLExport constructor.
69     * @param $conf array DokuWiki configuration array.
70     */
71    public function __construct($conf)
72    {
73        $this->conf = $conf;
74        $this->pages = $this->getPageIds();
75    }
76
77    /**
78     * Returns all pageIds, excluding those who were set in the configuration.
79     *
80     * @return array pageIds.
81     */
82    private function getPageIds()
83    {
84        $indexer = new Doku_Indexer();
85        $pagesAndDeletedPages = $indexer->getPages();
86
87        // Get all pages that do have a description and a title set
88        $pagesAndDeletedPages = array_filter($pagesAndDeletedPages, function ($page, $k) {
89            $pageDescriptionIsNotEmpty = !empty(p_get_metadata($page)['description']);
90            $pageTitleIsNotEmpty = !empty(p_get_metadata($page)['title']);
91            return $pageDescriptionIsNotEmpty && $pageTitleIsNotEmpty;
92        }, ARRAY_FILTER_USE_BOTH);
93
94        $excludedPages = $this->splitConfigToArray($this->conf['plugin']['findologicxmlexport']['excludePages']);
95        $ids = array_diff($pagesAndDeletedPages, $excludedPages);
96
97        return array_values($ids);
98    }
99
100    /**
101     * Formats Config string to an array.
102     *
103     * @param string $config Excluded pages in a string.
104     * @return array Returns the pages that should be excluded as array.
105     */
106    private function splitConfigToArray($config)
107    {
108        return preg_split('/\s*,\s*/', $config);
109    }
110
111    /**
112     * Generate the entire XML Export based on the DokuWiki metadata.
113     *
114     * @param $start integer Determines the first item (offset) to be exported.
115     * @param $submittedCount integer Determines the interval size / number of items to be exported.
116     * @return string Returns the XML as string.
117     */
118    public function generateXMLExport($start, $submittedCount)
119    {
120        $exporter = Exporter::create(Exporter::TYPE_XML, $submittedCount);
121
122        $total = count($this->pages);
123        $count = min($total, $submittedCount); // The count can't be higher then the total number of pages.
124
125        $this->pages = array_slice($this->pages, $start, $count);
126
127        $items = [];
128        foreach ($this->pages as $key => $page) {
129            $item = $exporter->createItem($start + $key);
130            $this->fillDataToItem($page, $item);
131            $items[] = $item;
132        }
133        return $exporter->serializeItems($items, $start, $submittedCount, $total);
134    }
135
136    /**
137     * Gets the Name of the current page.
138     *
139     * @param $pageId string Id of the DokuWiki page.
140     * @return string Returns the Name/Title of the page.
141     */
142    private function getName($pageId)
143    {
144        $metadata = p_get_metadata($pageId);
145        return $metadata['title'];
146    }
147
148    /**
149     * Gets the Summary of the current page.
150     *
151     * @param $pageId string Id of the DokuWiki page.
152     * @return string Returns the Summary of the page.
153     */
154    private function getSummary($pageId)
155    {
156        $metadata = p_get_metadata($pageId);
157        return $metadata['description']['abstract'];
158    }
159
160    /**
161     * Gets the Description of the current page.
162     *
163     * @param $pageId string Id of the DokuWiki page.
164     * @return string Returns the Description of the page.
165     */
166    private function getDescription($pageId)
167    {
168        return rawWiki($pageId);
169    }
170
171    /**
172     * Gets the Url of the current page.
173     *
174     * @param $pageId string Id of the DokuWiki page.
175     * @return string Returns the Url of the page.
176     */
177    private function getUrl($pageId)
178    {
179        $url = wl($pageId, '', true);
180        return $url;
181    }
182
183    /**
184     * Gets the DateTime of the current page.
185     *
186     * @param $pageId string Id of the DokuWiki page.
187     * @return DateTime Returns the Date formatted in ATOM DateTime of the page.
188     */
189    private function getDateAdded($pageId)
190    {
191        $metadata = p_get_metadata($pageId);
192        $date = new DateTime();
193        $date->setTimestamp($metadata['date']['created']);
194        return $date;
195    }
196
197    /**
198     * Returns the id of a given page.
199     * Note: This function is trivial, but is used for legibility reasons.
200     *
201     * @param $pageId string Id of the DokuWiki page.
202     * @return string Returns the pageId.
203     */
204    private function getPageId($pageId)
205    {
206        return $pageId;
207    }
208
209    /**
210     * Gets the Category Attribute of the current page.
211     *
212     * Formats DokuWiki IDs to categories (FINDOLOGIC scheme).
213     *
214     * Examples:
215     *
216     * "customer_account:synonyms" -> "customer account:synonyms" -> "customer account_synonyms" -> "Customer account_Synonyms"
217     * "plugin:dokuwiki-plugin-findologic-xml-export" -> "plugin:dokuwiki-plugin-findologic-xml-export" -> "plugin_findologicxmlexport" -> "Plugin_Findologicxmlexport"
218     * "wiki:syntax" -> "wiki:syntax" -> "wiki_syntax" -> "Wiki_Syntax"
219     *
220     * @param $pageId string Id of the DokuWiki page.
221     * @return array Returns the category attribute based on the export scheme.
222     */
223    private function getAttributesCategory($pageId)
224    {
225        $attribute = str_replace(self::CATEGORY_DELIMITER, ' ', $pageId); // Replace underscores with spaces
226        $attribute = str_replace(':', self::CATEGORY_DELIMITER, $attribute); // Replace colons with underscores
227        $attribute = ucwords($attribute, self::CATEGORY_DELIMITER); // Capitalize each category
228        return (array($attribute));
229    }
230
231    /**
232     * Gets the Keywords of the current page.
233     *
234     * @param $pageId string Id of the DokuWiki page.
235     * @return array Returns all Keywords for the given page.
236     */
237    private function getKeywords($pageId)
238    {
239        $metadata = p_get_metadata($pageId);
240        $allKeywords = $metadata[self::KEYWORD_KEY];
241
242        if (empty($allKeywords)) {
243            return [];
244        }
245
246        $keywords = [];
247        foreach ($allKeywords as $key => $keyword) {
248            // Keywords with multiple words are separated by an underscore.
249            // To export them correctly, those underscores will be replaced by spaces.
250            $keyword = str_replace(self::KEYWORD_SPACE, ' ', $keyword);
251            $keywords[] = new Keyword($keyword);
252        }
253
254        $keywords = [self::DEFAULT_USERGROUP => $keywords];
255
256        return $keywords;
257    }
258
259    /**
260     * @param $page int Page number.
261     * @param $item FINDOLOGIC\Export\Data\Item Item without data.
262     *
263     * @return FINDOLOGIC\Export\Data\Item Item with filled data.
264     */
265    public function fillDataToItem($page, $item)
266    {
267        $item->addName($this->getName($page));
268
269        $item->addSummary($this->getSummary($page));
270
271        $item->addDescription($this->getDescription($page));
272
273        $item->addPrice(self::PRICE_PLACEHOLDER);
274
275        $item->addUrl($this->getUrl($page));
276
277        $item->addDateAdded($this->getDateAdded($page));
278
279        $item->addOrdernumber(new Ordernumber($this->getPageId($page)));
280
281        $keywordsData = $this->getKeywords($page);
282        $item->setAllKeywords($keywordsData);
283
284        $attributeCategory = new Attribute(self::CATEGORY_KEY, $this->getAttributesCategory($page));
285        $item->addAttribute($attributeCategory);
286
287        return $item;
288    }
289}
290