xref: /dokuwiki/inc/Search/Collection/PageMetaCollection.php (revision 9369b4a991666bc911474806b106d8958e79f4c1)
183b3acccSAndreas Gohr<?php
283b3acccSAndreas Gohr
383b3acccSAndreas Gohrnamespace dokuwiki\Search\Collection;
483b3acccSAndreas Gohr
5*9369b4a9SAndreas Gohruse dokuwiki\Utf8\Clean;
683b3acccSAndreas Gohruse dokuwiki\Search\Index\AbstractIndex;
783b3acccSAndreas Gohruse dokuwiki\Utf8;
883b3acccSAndreas Gohr
983b3acccSAndreas Gohr/**
1083b3acccSAndreas Gohr * Collection for arbitrary page metadata
1183b3acccSAndreas Gohr *
1283b3acccSAndreas Gohr * A lookup collection where each token appears at most once per page.
1383b3acccSAndreas Gohr * Initialized with a subject string (e.g. 'relation_references', 'relation_media')
1483b3acccSAndreas Gohr * to derive index file names dynamically.
1583b3acccSAndreas Gohr *
1683b3acccSAndreas Gohr * Replaces the separate ReferencesCollection and MediaCollection classes and
1783b3acccSAndreas Gohr * handles arbitrary plugin metadata keys.
1883b3acccSAndreas Gohr *
1983b3acccSAndreas Gohr * @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
2083b3acccSAndreas Gohr * @author Andreas Gohr <andi@splitbrain.org>
2183b3acccSAndreas Gohr */
2283b3acccSAndreas Gohrclass PageMetaCollection extends LookupCollection
2383b3acccSAndreas Gohr{
2483b3acccSAndreas Gohr    /** @inheritdoc */
2583b3acccSAndreas Gohr    public function __construct(string $subject, ?AbstractIndex $pageIndex = null)
2683b3acccSAndreas Gohr    {
2783b3acccSAndreas Gohr        $clean = self::cleanName($subject);
2883b3acccSAndreas Gohr        parent::__construct(
2983b3acccSAndreas Gohr            $pageIndex ?? 'page',
3083b3acccSAndreas Gohr            $clean . '_w',
3183b3acccSAndreas Gohr            $clean . '_i',
3283b3acccSAndreas Gohr            $clean . '_p'
3383b3acccSAndreas Gohr        );
3483b3acccSAndreas Gohr    }
3583b3acccSAndreas Gohr
3683b3acccSAndreas Gohr    /**
3783b3acccSAndreas Gohr     * Clean a name for use as a file name
3883b3acccSAndreas Gohr     *
3983b3acccSAndreas Gohr     * Romanizes non-latin characters, then strips away anything that's
4083b3acccSAndreas Gohr     * not a letter, number, or underscore.
4183b3acccSAndreas Gohr     *
4283b3acccSAndreas Gohr     * @param string $name
4383b3acccSAndreas Gohr     * @return string
4483b3acccSAndreas Gohr     */
4583b3acccSAndreas Gohr    public static function cleanName(string $name): string
4683b3acccSAndreas Gohr    {
47*9369b4a9SAndreas Gohr        $name = Clean::romanize(trim($name));
4883b3acccSAndreas Gohr        $name = preg_replace('#[ \./\\:-]+#', '_', $name);
4983b3acccSAndreas Gohr        $name = preg_replace('/[^A-Za-z0-9_]/', '', $name);
5083b3acccSAndreas Gohr        return strtolower($name);
5183b3acccSAndreas Gohr    }
5283b3acccSAndreas Gohr}
53