1<?php 2/** 3 * This is the Dokuwiki export for FINDOLOGIC. 4 * If any bugs occur, please submit a new issue 5 * @see https://github.com/findologic/dokuwiki-plugin-findologic-xml-export/issues/new 6 * @author Dominik Brader <support@findologic.com> 7 */ 8 9if (!defined('DOKU_INC')) { 10 define('DOKU_INC', realpath(dirname(__FILE__) . '/../../../') . '/'); 11} 12 13require_once(DOKU_INC . 'inc/init.php'); 14require_once(__DIR__ . '/PageGetter.php'); 15require(__DIR__ . '/vendor/autoload.php'); 16 17use FINDOLOGIC\Export\Exporter; 18use FINDOLOGIC\Export\Data\Ordernumber; 19use FINDOLOGIC\Export\Data\Attribute; 20use FINDOLOGIC\Export\Data\Keyword; 21 22class DokuwikiXMLExport 23{ 24 /** 25 * Default value for a price. DokuWiki pages do not have a price and this is just a placeholder. 26 * FINDOLOGIC requires the price attribute, so this is the reason why it is exported. 27 */ 28 const PRICE_PLACEHOLDER = 0.0; 29 30 /** 31 * This value is needed to tell FINDOLOGIC this is a category. 32 */ 33 const CATEGORY_KEY = 'cat'; 34 35 /** 36 * Delimiter for category depth. 37 */ 38 const CATEGORY_DELIMITER = '_'; 39 40 /** 41 * In the DokuWiki, the Keyword seperator is a space. 42 * To be able to have tags for multiple words, add an '_' 43 */ 44 const KEYWORD_SPACE = '_'; 45 46 /** 47 * DokuWiki saves keywords/tags in the subject of the page. 48 * The subject is an array with all keywords/tags from the page in it. 49 */ 50 const KEYWORD_KEY = 'subject'; 51 52 /** 53 * The default usergroup is an empty string. 54 */ 55 const DEFAULT_USERGROUP = ''; 56 57 /** 58 * @var array $conf DokuWiki configuration. 59 */ 60 protected $conf; 61 62 /** 63 * @var array $pages All pageIds. 64 */ 65 protected $pages; 66 67 /** 68 * DokuwikiXMLExport constructor. 69 * @param $conf array DokuWiki configuration array. 70 */ 71 public function __construct($conf) 72 { 73 $this->conf = $conf; 74 $this->pages = $this->getPageIds(); 75 } 76 77 /** 78 * Returns all pageIds, excluding those who were set in the configuration. 79 * 80 * @return array pageIds. 81 */ 82 private function getPageIds() 83 { 84 $indexer = new Doku_Indexer(); 85 $pagesAndDeletedPages = $indexer->getPages(); 86 87 // Get all pages that do have a description and a title set 88 $pagesAndDeletedPages = array_filter($pagesAndDeletedPages, function ($page, $k) { 89 $pageDescriptionIsNotEmpty = !empty(p_get_metadata($page)['description']); 90 $pageTitleIsNotEmpty = !empty(p_get_metadata($page)['title']); 91 return $pageDescriptionIsNotEmpty && $pageTitleIsNotEmpty; 92 }, ARRAY_FILTER_USE_BOTH); 93 94 $excludedPages = $this->splitConfigToArray($this->conf['plugin']['findologicxmlexport']['excludePages']); 95 $ids = array_diff($pagesAndDeletedPages, $excludedPages); 96 97 return array_values($ids); 98 } 99 100 /** 101 * Formats Config string to an array. 102 * 103 * @param string $config Excluded pages in a string. 104 * @return array Returns the pages that should be excluded as array. 105 */ 106 private function splitConfigToArray($config) 107 { 108 return preg_split('/\s*,\s*/', $config); 109 } 110 111 /** 112 * Generate the entire XML Export based on the DokuWiki metadata. 113 * 114 * @param $start integer Determines the first item (offset) to be exported. 115 * @param $submittedCount integer Determines the interval size / number of items to be exported. 116 * @return string Returns the XML as string. 117 */ 118 public function generateXMLExport($start, $submittedCount) 119 { 120 $exporter = Exporter::create(Exporter::TYPE_XML, $submittedCount); 121 122 $total = count($this->pages); 123 $count = min($total, $submittedCount); // The count can't be higher then the total number of pages. 124 125 $this->pages = array_slice($this->pages, $start, $count); 126 127 $items = []; 128 foreach ($this->pages as $key => $page) { 129 $item = $exporter->createItem($start + $key); 130 $this->fillDataToItem($page, $item); 131 $items[] = $item; 132 } 133 return $exporter->serializeItems($items, $start, $submittedCount, $total); 134 } 135 136 /** 137 * Gets the Name of the current page. 138 * 139 * @param $pageId string Id of the DokuWiki page. 140 * @return string Returns the Name/Title of the page. 141 */ 142 private function getName($pageId) 143 { 144 $metadata = p_get_metadata($pageId); 145 return $metadata['title']; 146 } 147 148 /** 149 * Gets the Summary of the current page. 150 * 151 * @param $pageId string Id of the DokuWiki page. 152 * @return string Returns the Summary of the page. 153 */ 154 private function getSummary($pageId) 155 { 156 $metadata = p_get_metadata($pageId); 157 return $metadata['description']['abstract']; 158 } 159 160 /** 161 * Gets the Description of the current page. 162 * 163 * @param $pageId string Id of the DokuWiki page. 164 * @return string Returns the Description of the page. 165 */ 166 private function getDescription($pageId) 167 { 168 return rawWiki($pageId); 169 } 170 171 /** 172 * Gets the Url of the current page. 173 * 174 * @param $pageId string Id of the DokuWiki page. 175 * @return string Returns the Url of the page. 176 */ 177 private function getUrl($pageId) 178 { 179 $url = wl($pageId, '', true); 180 return $url; 181 } 182 183 /** 184 * Gets the DateTime of the current page. 185 * 186 * @param $pageId string Id of the DokuWiki page. 187 * @return DateTime Returns the Date formatted in ATOM DateTime of the page. 188 */ 189 private function getDateAdded($pageId) 190 { 191 $metadata = p_get_metadata($pageId); 192 $date = new DateTime(); 193 $date->setTimestamp($metadata['date']['created']); 194 return $date; 195 } 196 197 /** 198 * Returns the id of a given page. 199 * Note: This function is trivial, but is used for legibility reasons. 200 * 201 * @param $pageId string Id of the DokuWiki page. 202 * @return string Returns the pageId. 203 */ 204 private function getPageId($pageId) 205 { 206 return $pageId; 207 } 208 209 /** 210 * Gets the Category Attribute of the current page. 211 * 212 * Formats DokuWiki IDs to categories (FINDOLOGIC scheme). 213 * 214 * Examples: 215 * 216 * "customer_account:synonyms" -> "customer account:synonyms" -> "customer account_synonyms" -> "Customer account_Synonyms" 217 * "plugin:dokuwiki-plugin-findologic-xml-export" -> "plugin:dokuwiki-plugin-findologic-xml-export" -> "plugin_findologicxmlexport" -> "Plugin_Findologicxmlexport" 218 * "wiki:syntax" -> "wiki:syntax" -> "wiki_syntax" -> "Wiki_Syntax" 219 * 220 * @param $pageId string Id of the DokuWiki page. 221 * @return array Returns the category attribute based on the export scheme. 222 */ 223 private function getAttributesCategory($pageId) 224 { 225 $attribute = str_replace(self::CATEGORY_DELIMITER, ' ', $pageId); // Replace underscores with spaces 226 $attribute = str_replace(':', self::CATEGORY_DELIMITER, $attribute); // Replace colons with underscores 227 $attribute = ucwords($attribute, self::CATEGORY_DELIMITER); // Capitalize each category 228 return (array($attribute)); 229 } 230 231 /** 232 * Gets the Keywords of the current page. 233 * 234 * @param $pageId string Id of the DokuWiki page. 235 * @return array Returns all Keywords for the given page. 236 */ 237 private function getKeywords($pageId) 238 { 239 $metadata = p_get_metadata($pageId); 240 $allKeywords = $metadata[self::KEYWORD_KEY]; 241 242 if (empty($allKeywords)) { 243 return []; 244 } 245 246 $keywords = []; 247 foreach ($allKeywords as $key => $keyword) { 248 // Keywords with multiple words are separated by an underscore. 249 // To export them correctly, those underscores will be replaced by spaces. 250 $keyword = str_replace(self::KEYWORD_SPACE, ' ', $keyword); 251 $keywords[] = new Keyword($keyword); 252 } 253 254 $keywords = [self::DEFAULT_USERGROUP => $keywords]; 255 256 return $keywords; 257 } 258 259 /** 260 * @param $page int Page number. 261 * @param $item FINDOLOGIC\Export\Data\Item Item without data. 262 * 263 * @return FINDOLOGIC\Export\Data\Item Item with filled data. 264 */ 265 public function fillDataToItem($page, $item) 266 { 267 $item->addName($this->getName($page)); 268 269 $item->addSummary($this->getSummary($page)); 270 271 $item->addDescription($this->getDescription($page)); 272 273 $item->addPrice(self::PRICE_PLACEHOLDER); 274 275 $item->addUrl($this->getUrl($page)); 276 277 $item->addDateAdded($this->getDateAdded($page)); 278 279 $item->addOrdernumber(new Ordernumber($this->getPageId($page))); 280 281 $keywordsData = $this->getKeywords($page); 282 $item->setAllKeywords($keywordsData); 283 284 $attributeCategory = new Attribute(self::CATEGORY_KEY, $this->getAttributesCategory($page)); 285 $item->addAttribute($attributeCategory); 286 287 return $item; 288 } 289} 290