xref: /dokuwiki/inc/Search/Collection/PageMetaCollection.php (revision 83b3acccb42578eaa33f84e6b13612436320090b)
1*83b3acccSAndreas Gohr<?php
2*83b3acccSAndreas Gohr
3*83b3acccSAndreas Gohrnamespace dokuwiki\Search\Collection;
4*83b3acccSAndreas Gohr
5*83b3acccSAndreas Gohruse dokuwiki\Search\Index\AbstractIndex;
6*83b3acccSAndreas Gohruse dokuwiki\Utf8;
7*83b3acccSAndreas Gohr
8*83b3acccSAndreas Gohr/**
9*83b3acccSAndreas Gohr * Collection for arbitrary page metadata
10*83b3acccSAndreas Gohr *
11*83b3acccSAndreas Gohr * A lookup collection where each token appears at most once per page.
12*83b3acccSAndreas Gohr * Initialized with a subject string (e.g. 'relation_references', 'relation_media')
13*83b3acccSAndreas Gohr * to derive index file names dynamically.
14*83b3acccSAndreas Gohr *
15*83b3acccSAndreas Gohr * Replaces the separate ReferencesCollection and MediaCollection classes and
16*83b3acccSAndreas Gohr * handles arbitrary plugin metadata keys.
17*83b3acccSAndreas Gohr *
18*83b3acccSAndreas Gohr * @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
19*83b3acccSAndreas Gohr * @author Andreas Gohr <andi@splitbrain.org>
20*83b3acccSAndreas Gohr */
21*83b3acccSAndreas Gohrclass PageMetaCollection extends LookupCollection
22*83b3acccSAndreas Gohr{
23*83b3acccSAndreas Gohr    /** @inheritdoc */
24*83b3acccSAndreas Gohr    public function __construct(string $subject, ?AbstractIndex $pageIndex = null)
25*83b3acccSAndreas Gohr    {
26*83b3acccSAndreas Gohr        $clean = self::cleanName($subject);
27*83b3acccSAndreas Gohr        parent::__construct(
28*83b3acccSAndreas Gohr            $pageIndex ?? 'page',
29*83b3acccSAndreas Gohr            $clean . '_w',
30*83b3acccSAndreas Gohr            $clean . '_i',
31*83b3acccSAndreas Gohr            $clean . '_p'
32*83b3acccSAndreas Gohr        );
33*83b3acccSAndreas Gohr    }
34*83b3acccSAndreas Gohr
35*83b3acccSAndreas Gohr    /**
36*83b3acccSAndreas Gohr     * Clean a name for use as a file name
37*83b3acccSAndreas Gohr     *
38*83b3acccSAndreas Gohr     * Romanizes non-latin characters, then strips away anything that's
39*83b3acccSAndreas Gohr     * not a letter, number, or underscore.
40*83b3acccSAndreas Gohr     *
41*83b3acccSAndreas Gohr     * @param string $name
42*83b3acccSAndreas Gohr     * @return string
43*83b3acccSAndreas Gohr     */
44*83b3acccSAndreas Gohr    public static function cleanName(string $name): string
45*83b3acccSAndreas Gohr    {
46*83b3acccSAndreas Gohr        $name = Utf8\Clean::romanize(trim($name));
47*83b3acccSAndreas Gohr        $name = preg_replace('#[ \./\\:-]+#', '_', $name);
48*83b3acccSAndreas Gohr        $name = preg_replace('/[^A-Za-z0-9_]/', '', $name);
49*83b3acccSAndreas Gohr        return strtolower($name);
50*83b3acccSAndreas Gohr    }
51*83b3acccSAndreas Gohr}
52