xref: /dokuwiki/inc/Search/Collection/PageMetaCollection.php (revision 83b3acccb42578eaa33f84e6b13612436320090b)
1<?php
2
3namespace dokuwiki\Search\Collection;
4
5use dokuwiki\Search\Index\AbstractIndex;
6use dokuwiki\Utf8;
7
8/**
9 * Collection for arbitrary page metadata
10 *
11 * A lookup collection where each token appears at most once per page.
12 * Initialized with a subject string (e.g. 'relation_references', 'relation_media')
13 * to derive index file names dynamically.
14 *
15 * Replaces the separate ReferencesCollection and MediaCollection classes and
16 * handles arbitrary plugin metadata keys.
17 *
18 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
19 * @author Andreas Gohr <andi@splitbrain.org>
20 */
21class PageMetaCollection extends LookupCollection
22{
23    /** @inheritdoc */
24    public function __construct(string $subject, ?AbstractIndex $pageIndex = null)
25    {
26        $clean = self::cleanName($subject);
27        parent::__construct(
28            $pageIndex ?? 'page',
29            $clean . '_w',
30            $clean . '_i',
31            $clean . '_p'
32        );
33    }
34
35    /**
36     * Clean a name for use as a file name
37     *
38     * Romanizes non-latin characters, then strips away anything that's
39     * not a letter, number, or underscore.
40     *
41     * @param string $name
42     * @return string
43     */
44    public static function cleanName(string $name): string
45    {
46        $name = Utf8\Clean::romanize(trim($name));
47        $name = preg_replace('#[ \./\\:-]+#', '_', $name);
48        $name = preg_replace('/[^A-Za-z0-9_]/', '', $name);
49        return strtolower($name);
50    }
51}
52