xref: /plugin/skillforge/helper.php (revision 12fa7ce06de4829ce464095e917e18197b9d4bba)
1<?php
2if (!defined('DOKU_INC')) die();
3require_once __DIR__ . '/classes/StoredZipWriter.php';
4require_once __DIR__ . '/classes/DokuMarkdownConverter.php';
5
6class helper_plugin_skillforge extends DokuWiki_Plugin {
7    public function exportPage($id, $options = array()) {
8        $id = trim($this->normalizeId($id), ':');
9        if ($id === '' || strpos($id, ':') === false) {
10            throw new Exception('Skill pages must be inside a namespace to be exported.');
11        }
12
13        $parts = explode(':', $id);
14        $sourcePage = array_pop($parts);
15        $namespace = implode(':', $parts);
16        if ($namespace === '') throw new Exception('Namespace is required.');
17
18        return $this->exportNamespace($namespace, $sourcePage . '.txt', $options);
19    }
20
21    public function exportNamespace($namespace, $sourcePage, $options = array()) {
22        global $conf;
23        $namespace = trim((string)$namespace, ':');
24        if ($namespace === '') throw new Exception('Namespace is required.');
25        $sourcePage = $sourcePage ?: $this->getConf('default_skill_source');
26        $recursive = isset($options['recursive']) ? (bool)$options['recursive'] : (bool)$this->getConf('recursive');
27        $includeMedia = isset($options['include_media']) ? (bool)$options['include_media'] : (bool)$this->getConf('include_media');
28
29        $sourceId = $this->resolveSourceId($namespace, $sourcePage);
30        $pages = $this->collectPages($namespace, $recursive);
31
32        // Be forgiving: the source page may exist even if the scanner missed it
33        // because of DokuWiki storage differences, cleanID rules or installations
34        // with custom datadir/path handling.
35        $sourceFile = $this->pageFile($sourceId);
36        if (is_readable($sourceFile)) {
37            $pages[$sourceId] = $sourceFile;
38            ksort($pages);
39        }
40
41        if (!$pages) throw new Exception('No pages found in namespace: ' . hsc($namespace));
42        if (!isset($pages[$sourceId])) {
43            throw new Exception('Skill source page not found: ' . hsc($sourceId) . ' (' . hsc($sourceFile) . ')');
44        }
45
46        $converter = new SkillForge_DokuMarkdownConverter();
47        $baseFolder = $this->safeName($namespace) . '-skill';
48        $zip = new SkillForge_StoredZipWriter();
49        $manifest = array(
50            'name' => $this->safeName($namespace),
51            'namespace' => $namespace,
52            'entry' => $this->getConf('output_skill_filename') ?: 'SKILL.md',
53            'generated_at' => date('c'),
54            'source' => 'dokuwiki',
55            'generator' => 'SkillForge',
56            'files' => array()
57        );
58
59        foreach ($pages as $id => $file) {
60            $raw = io_readFile($file, false);
61            $md = $converter->convert($raw);
62            $metadata = $converter->extractMetadata($raw);
63            $outName = ($id === $sourceId) ? ($this->getConf('output_skill_filename') ?: 'SKILL.md') : $this->idToMarkdownFilename($namespace, $id);
64            if ($id === $sourceId) {
65                $md = $this->buildSkillMarkdown($namespace, $metadata, $md, $pages, $sourceId);
66            } else {
67                $md = $this->ensureFrontmatter($id, $namespace, $metadata, $md);
68            }
69            $zip->addFileFromString($baseFolder . '/' . $outName, $md);
70            $manifest['files'][] = $outName;
71        }
72
73        if ($this->getConf('generate_index')) {
74            $zip->addFileFromString($baseFolder . '/index.md', $this->buildIndex($namespace, $manifest['files']));
75            $manifest['files'][] = 'index.md';
76        }
77
78        $zip->addFileFromString($baseFolder . '/skill.json', json_encode($manifest, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE));
79
80        if ($includeMedia) {
81            foreach ($this->collectMedia($namespace, $recursive) as $mediaId => $mediaFile) {
82                $zip->addFile($baseFolder . '/media/' . basename($mediaFile), $mediaFile);
83            }
84        }
85
86        $tmpDir = rtrim($conf['tmpdir'], '/\\') . '/skillforge';
87        if (!is_dir($tmpDir)) io_mkdir_p($tmpDir);
88        $zipName = $this->makeZipName($namespace);
89        $target = $tmpDir . '/' . $zipName;
90        if (!$zip->save($target)) throw new Exception('Could not write ZIP file to tmp directory.');
91        return array('file' => $target, 'name' => $zipName, 'count' => count($pages));
92    }
93
94
95    public function listNamespaces() {
96        global $conf;
97        $namespaces = array();
98        $base = isset($conf['datadir']) ? rtrim($conf['datadir'], '/\\') : '';
99        if ($base === '' || !is_dir($base)) return array();
100
101        $iterator = new RecursiveIteratorIterator(new RecursiveDirectoryIterator($base, FilesystemIterator::SKIP_DOTS));
102        foreach ($iterator as $file) {
103            if (!$file->isFile() || strtolower($file->getExtension()) !== 'txt') continue;
104            $id = $this->fileToPageId($file->getPathname());
105            if ($id === '' || strpos($id, ':') === false) continue;
106            $parts = explode(':', $id);
107            array_pop($parts); // remove page name
108            $current = array();
109            foreach ($parts as $part) {
110                if ($part === '') continue;
111                $current[] = $part;
112                $namespaces[implode(':', $current)] = true;
113            }
114        }
115        $out = array_keys($namespaces);
116        natcasesort($out);
117        return array_values($out);
118    }
119
120    public function sendDownload($name) {
121        global $conf;
122        $name = basename((string)$name);
123        if (!preg_match('/\.zip$/i', $name)) throw new Exception('Invalid download filename.');
124        $file = rtrim($conf['tmpdir'], '/\\') . '/skillforge/' . $name;
125        if (!is_readable($file)) throw new Exception('Export file not found: ' . $name);
126
127        // Avoid corrupt ZIP downloads if something has already started output.
128        while (ob_get_level() > 0) {
129            @ob_end_clean();
130        }
131
132        header('Content-Description: File Transfer');
133        header('Content-Type: application/zip');
134        header('Content-Disposition: attachment; filename="' . $name . '"');
135        header('Content-Transfer-Encoding: binary');
136        header('Content-Length: ' . filesize($file));
137        header('Cache-Control: private, no-cache, no-store, must-revalidate');
138        header('Pragma: public');
139        header('Expires: 0');
140        readfile($file);
141        exit;
142    }
143
144    private function collectPages($namespace, $recursive) {
145        global $conf;
146        $pages = array();
147        $namespace = trim($this->normalizeId($namespace), ':');
148        $root = rtrim($conf['datadir'], '/\\') . '/' . str_replace(':', '/', $namespace);
149
150        // Preferred path: scan the namespace directory directly.
151        if (is_dir($root)) {
152            $iterator = $recursive
153                ? new RecursiveIteratorIterator(new RecursiveDirectoryIterator($root, FilesystemIterator::SKIP_DOTS))
154                : new IteratorIterator(new DirectoryIterator($root));
155            foreach ($iterator as $file) {
156                if (!$file->isFile() || strtolower($file->getExtension()) !== 'txt') continue;
157                $path = $file->getPathname();
158                $id = $this->fileToPageId($path);
159                if ($id !== '' && ($id === $namespace || strpos($id, $namespace . ':') === 0)) {
160                    $pages[$id] = $path;
161                }
162            }
163        }
164
165        // Fallback path: scan all pages and filter by namespace. This helps on
166        // installations where datadir or storage behavior differs from defaults.
167        if (!$pages && is_dir($conf['datadir'])) {
168            $iterator = new RecursiveIteratorIterator(new RecursiveDirectoryIterator($conf['datadir'], FilesystemIterator::SKIP_DOTS));
169            foreach ($iterator as $file) {
170                if (!$file->isFile() || strtolower($file->getExtension()) !== 'txt') continue;
171                $path = $file->getPathname();
172                $id = $this->fileToPageId($path);
173                if ($id === '' || $id === $namespace) continue;
174                if ($recursive) {
175                    if (strpos($id, $namespace . ':') === 0) $pages[$id] = $path;
176                } else {
177                    $tail = substr($id, strlen($namespace . ':'));
178                    if (strpos($id, $namespace . ':') === 0 && strpos($tail, ':') === false) $pages[$id] = $path;
179                }
180            }
181        }
182
183        ksort($pages);
184        return $pages;
185    }
186
187    private function collectMedia($namespace, $recursive) {
188        global $conf;
189        $root = rtrim($conf['mediadir'], '/\\') . '/' . str_replace(':', '/', $namespace);
190        $media = array();
191        if (!is_dir($root)) return $media;
192        $iterator = $recursive ? new RecursiveIteratorIterator(new RecursiveDirectoryIterator($root, FilesystemIterator::SKIP_DOTS)) : new IteratorIterator(new DirectoryIterator($root));
193        foreach ($iterator as $file) {
194            if (!$file->isFile()) continue;
195            $path = $file->getPathname();
196            $rel = substr($path, strlen(rtrim($conf['mediadir'], '/\\')) + 1);
197            $media[str_replace('/', ':', $rel)] = $path;
198        }
199        return $media;
200    }
201
202    private function resolveSourceId($namespace, $sourcePage) {
203        $namespace = trim($this->normalizeId($namespace), ':');
204        $sourcePage = preg_replace('/\.txt$/i', '', trim((string)$sourcePage));
205        $sourcePage = trim(str_replace('\\', '/', $sourcePage));
206        $sourcePage = trim(str_replace('/', ':', $sourcePage), ':');
207        $sourcePage = $this->normalizeId($sourcePage);
208        if ($sourcePage === '') $sourcePage = 'start';
209
210        // Full DokuWiki ID given, e.g. :skilltest:start or skilltest:start.
211        if (strpos($sourcePage, ':') !== false) return trim($sourcePage, ':');
212
213        // Relative page given, e.g. start or start.txt.
214        return trim($namespace . ':' . $sourcePage, ':');
215    }
216
217    private function normalizeId($id) {
218        $id = trim(str_replace('\\', '/', (string)$id));
219        $id = trim(str_replace('/', ':', $id), ':');
220        if (function_exists('cleanID')) return cleanID($id);
221        return strtolower($id);
222    }
223
224    private function pageFile($id) {
225        global $conf;
226        $id = trim($this->normalizeId($id), ':');
227        if (function_exists('wikiFN')) return wikiFN($id);
228        return rtrim($conf['datadir'], '/\\') . '/' . str_replace(':', '/', $id) . '.txt';
229    }
230
231    private function fileToPageId($path) {
232        global $conf;
233        $base = rtrim(realpath($conf['datadir']) ?: $conf['datadir'], '/\\');
234        $real = realpath($path) ?: $path;
235        $rel = substr($real, strlen($base) + 1);
236        $rel = str_replace('\\', '/', $rel);
237        if (substr($rel, -4) !== '.txt') return '';
238        return trim(str_replace('/', ':', substr($rel, 0, -4)), ':');
239    }
240
241    private function idToMarkdownFilename($namespace, $id) {
242        $rel = preg_replace('/^' . preg_quote($namespace, '/') . ':?/', '', $id);
243        $rel = trim(str_replace(':', '/', $rel), '/');
244        if ($rel === '') $rel = 'page';
245        return $rel . '.md';
246    }
247
248    private function buildSkillMarkdown($namespace, $metadata, $body, $pages, $sourceId) {
249        if ($metadata === '') {
250            $metadata = "name: " . $this->safeName($namespace) . "\ndescription: Exported DokuWiki namespace as an AI skill.\nversion: 0.1.0\nsource: dokuwiki\nnamespace: " . $namespace;
251        }
252        $links = "\n\n## Knowledge files\n\n";
253        foreach ($pages as $id => $file) {
254            if ($id === $sourceId) continue;
255            $links .= '- [' . $this->titleFromId($id) . '](' . $this->idToMarkdownFilename($namespace, $id) . ")\n";
256        }
257        return "---\n" . trim($metadata) . "\n---\n\n" . trim($body) . $links . "\n";
258    }
259
260    private function ensureFrontmatter($id, $namespace, $metadata, $body) {
261        if ($metadata === '') {
262            $metadata = "title: " . $this->titleFromId($id) . "\ntype: page\nsource: dokuwiki\ndokuwiki_id: " . $id . "\nnamespace: " . $namespace;
263        }
264        return "---\n" . trim($metadata) . "\n---\n\n" . trim($body) . "\n";
265    }
266
267    private function buildIndex($namespace, $files) {
268        $out = "---\ntitle: Skill Index\ntype: index\nsource: dokuwiki\nnamespace: " . $namespace . "\ngenerator: SkillForge\n---\n\n# Skill Index\n\nThis package was generated from the DokuWiki namespace `" . $namespace . "`.\n\n## Files\n\n";
269        foreach ($files as $file) {
270            if ($file === 'index.md') continue;
271            $out .= '- [' . $file . '](' . $file . ")\n";
272        }
273        return $out;
274    }
275
276    private function makeZipName($namespace) {
277        $pattern = $this->getConf('zip_filename_pattern') ?: '{namespace}-skill-{date}.zip';
278        $name = str_replace(array('{namespace}', '{date}'), array($this->safeName($namespace), date('Y-m-d')), $pattern);
279        $name = preg_replace('/[^A-Za-z0-9._-]+/', '-', $name);
280        if (!preg_match('/\.zip$/i', $name)) $name .= '.zip';
281        return $name;
282    }
283
284    private function safeName($value) {
285        $value = strtolower(str_replace(':', '-', $value));
286        $value = preg_replace('/[^a-z0-9._-]+/', '-', $value);
287        return trim($value, '-') ?: 'skillforge';
288    }
289
290    private function titleFromId($id) {
291        $base = basename(str_replace(':', '/', $id));
292        return ucwords(str_replace(array('_', '-'), ' ', $base));
293    }
294}
295