xref: /dokuwiki/inc/Search/Collection/PageMetaCollection.php (revision 06053dca2fac9a1da4eb1accf8c2488942da5d2a)
1<?php
2
3namespace dokuwiki\Search\Collection;
4
5use dokuwiki\Utf8\Clean;
6use dokuwiki\Search\Index\AbstractIndex;
7use dokuwiki\Utf8;
8
9/**
10 * Collection for arbitrary page metadata
11 *
12 * A lookup collection where each token appears at most once per page.
13 * Initialized with a subject string (e.g. 'relation_references', 'relation_media')
14 * to derive index file names dynamically.
15 *
16 * Replaces the separate ReferencesCollection and MediaCollection classes and
17 * handles arbitrary plugin metadata keys.
18 *
19 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
20 * @author Andreas Gohr <andi@splitbrain.org>
21 */
22class PageMetaCollection extends LookupCollection
23{
24    /** @inheritdoc */
25    public function __construct(string $subject, ?AbstractIndex $pageIndex = null)
26    {
27        $clean = self::cleanName($subject);
28        parent::__construct(
29            $pageIndex ?? 'page',
30            $clean . '_w',
31            $clean . '_i',
32            $clean . '_p'
33        );
34    }
35
36    /**
37     * Clean a name for use as a file name
38     *
39     * Romanizes non-latin characters, then strips away anything that's
40     * not a letter, number, or underscore.
41     *
42     * @param string $name
43     * @return string
44     */
45    public static function cleanName(string $name): string
46    {
47        $name = Clean::romanize(trim($name));
48        $name = preg_replace('#[ \./\\:-]+#', '_', $name);
49        $name = preg_replace('/[^A-Za-z0-9_]/', '', $name);
50        return strtolower($name);
51    }
52}
53