xref: /dokuwiki/inc/Utf8/Sort.php (revision dccd6b2bba7367e4d1d2d7aa84c9f9d15584b593)
12d85e841SAndreas Gohr<?php
22d85e841SAndreas Gohr
32d85e841SAndreas Gohrnamespace dokuwiki\Utf8;
42d85e841SAndreas Gohr
5704bf768SAndreas Gohruse dokuwiki\Logger;
6704bf768SAndreas Gohr
72d85e841SAndreas Gohr/**
82d85e841SAndreas Gohr * DokuWiki sort functions
92d85e841SAndreas Gohr *
102d85e841SAndreas Gohr * When "intl" extension is available, all sorts are done using a collator.
112d85e841SAndreas Gohr * Otherwise, primitive PHP functions are called.
122d85e841SAndreas Gohr *
132d85e841SAndreas Gohr * The collator is created using the locale given in $conf['lang'].
142d85e841SAndreas Gohr * It always uses case insensitive "natural" ordering in its collation.
152d85e841SAndreas Gohr * The fallback solution uses the primitive PHP functions that return almost the same results
162d85e841SAndreas Gohr * when the input is text with only [A-Za-z0-9] characters.
172d85e841SAndreas Gohr *
182d85e841SAndreas Gohr * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
192d85e841SAndreas Gohr * @author     Moisés Braga Ribeiro <moisesbr@gmail.com>
200489c64bSMoisés Braga Ribeiro * @author     Andreas Gohr <andi@splitbrain.org>
212d85e841SAndreas Gohr */
222d85e841SAndreas Gohrclass Sort
232d85e841SAndreas Gohr{
242d85e841SAndreas Gohr    /** @var \Collator[] language specific collators, usually only one */
250489c64bSMoisés Braga Ribeiro    protected static $collators = [];
26f9aa34a3SAndreas Gohr
27f9aa34a3SAndreas Gohr    /** @var bool should the intl extension be used if available? For testing only */
28f9aa34a3SAndreas Gohr    protected static $useIntl = true;
292d85e841SAndreas Gohr
302d85e841SAndreas Gohr    /**
312d85e841SAndreas Gohr     * Initialization of a collator using $conf['lang'] as the locale.
320489c64bSMoisés Braga Ribeiro     * The initialization is done only once.
332d85e841SAndreas Gohr     * The collation takes "natural ordering" into account, that is, "page 2" is before "page 10".
342d85e841SAndreas Gohr     *
352d85e841SAndreas Gohr     * @return \Collator Returns a configured collator or null if the collator cannot be created.
362d85e841SAndreas Gohr     *
372d85e841SAndreas Gohr     * @author Moisés Braga Ribeiro <moisesbr@gmail.com>
382d85e841SAndreas Gohr     */
390489c64bSMoisés Braga Ribeiro    protected static function getCollator()
402d85e841SAndreas Gohr    {
412d85e841SAndreas Gohr        global $conf;
422d85e841SAndreas Gohr        $lc = $conf['lang'];
432d85e841SAndreas Gohr
442d85e841SAndreas Gohr        // check if intl extension is available
45f9aa34a3SAndreas Gohr        if (!self::$useIntl || !class_exists('\Collator')) {
462d85e841SAndreas Gohr            return null;
472d85e841SAndreas Gohr        }
482d85e841SAndreas Gohr
492d85e841SAndreas Gohr        // load collator if not available yet
500489c64bSMoisés Braga Ribeiro        if (!isset(self::$collators[$lc])) {
512d85e841SAndreas Gohr            $collator = \Collator::create($lc);
520489c64bSMoisés Braga Ribeiro            if (!isset($collator)) return null; // check needed as stated in the docs
532d85e841SAndreas Gohr            $collator->setAttribute(\Collator::NUMERIC_COLLATION, \Collator::ON);
54704bf768SAndreas Gohr            Logger::getInstance(Logger::LOG_DEBUG)->log(
55704bf768SAndreas Gohr                'Collator created with locale "' . $lc . '": numeric collation on, ' .
560489c64bSMoisés Braga Ribeiro                'valid locale "' . $collator->getLocale(\Locale::VALID_LOCALE) . '", ' .
57704bf768SAndreas Gohr                'actual locale "' . $collator->getLocale(\Locale::ACTUAL_LOCALE) . '"',
58*dccd6b2bSAndreas Gohr                null,
59*dccd6b2bSAndreas Gohr                __FILE__,
60*dccd6b2bSAndreas Gohr                __LINE__
61704bf768SAndreas Gohr            );
620489c64bSMoisés Braga Ribeiro            self::$collators[$lc] = $collator;
632d85e841SAndreas Gohr        }
642d85e841SAndreas Gohr
650489c64bSMoisés Braga Ribeiro        return self::$collators[$lc];
662d85e841SAndreas Gohr    }
672d85e841SAndreas Gohr
682d85e841SAndreas Gohr    /**
690489c64bSMoisés Braga Ribeiro     * Enable or disable the use of the "intl" extension collator.
700489c64bSMoisés Braga Ribeiro     * This is used for testing and should not be used in normal code.
71f9aa34a3SAndreas Gohr     *
72f9aa34a3SAndreas Gohr     * @param bool $use
730489c64bSMoisés Braga Ribeiro     *
740489c64bSMoisés Braga Ribeiro     * @author Andreas Gohr <andi@splitbrain.org>
75f9aa34a3SAndreas Gohr     */
76f9aa34a3SAndreas Gohr    public static function useIntl($use = true)
77f9aa34a3SAndreas Gohr    {
78f9aa34a3SAndreas Gohr        self::$useIntl = $use;
79f9aa34a3SAndreas Gohr    }
80f9aa34a3SAndreas Gohr
81f9aa34a3SAndreas Gohr    /**
822d85e841SAndreas Gohr     * Drop-in replacement for strcmp(), strcasecmp(), strnatcmp() and strnatcasecmp().
832d85e841SAndreas Gohr     * It uses a collator-based comparison, or strnatcasecmp() as a fallback.
842d85e841SAndreas Gohr     *
852d85e841SAndreas Gohr     * @param string $str1 The first string.
862d85e841SAndreas Gohr     * @param string $str2 The second string.
872d85e841SAndreas Gohr     * @return int Returns < 0 if $str1 is less than $str2; > 0 if $str1 is greater than $str2, and 0 if they are equal.
882d85e841SAndreas Gohr     *
892d85e841SAndreas Gohr     * @author Moisés Braga Ribeiro <moisesbr@gmail.com>
902d85e841SAndreas Gohr     */
912d85e841SAndreas Gohr    public static function strcmp($str1, $str2)
922d85e841SAndreas Gohr    {
932d85e841SAndreas Gohr        $collator = self::getCollator();
942d85e841SAndreas Gohr        if (isset($collator)) {
952d85e841SAndreas Gohr            return $collator->compare($str1, $str2);
962d85e841SAndreas Gohr        } else {
972d85e841SAndreas Gohr            return strnatcasecmp($str1, $str2);
982d85e841SAndreas Gohr        }
992d85e841SAndreas Gohr    }
1002d85e841SAndreas Gohr
1012d85e841SAndreas Gohr    /**
1022d85e841SAndreas Gohr     * Drop-in replacement for sort().
1032d85e841SAndreas Gohr     * It uses a collator-based sort, or sort() with flags SORT_NATURAL and SORT_FLAG_CASE as a fallback.
1042d85e841SAndreas Gohr     *
1052d85e841SAndreas Gohr     * @param array $array The input array.
1062d85e841SAndreas Gohr     * @return bool Returns true on success or false on failure.
1072d85e841SAndreas Gohr     *
1082d85e841SAndreas Gohr     * @author Moisés Braga Ribeiro <moisesbr@gmail.com>
1092d85e841SAndreas Gohr     */
1102d85e841SAndreas Gohr    public static function sort(&$array)
1112d85e841SAndreas Gohr    {
1122d85e841SAndreas Gohr        $collator = self::getCollator();
1132d85e841SAndreas Gohr        if (isset($collator)) {
1142d85e841SAndreas Gohr            return $collator->sort($array);
1152d85e841SAndreas Gohr        } else {
1162d85e841SAndreas Gohr            return sort($array, SORT_NATURAL | SORT_FLAG_CASE);
1172d85e841SAndreas Gohr        }
1182d85e841SAndreas Gohr    }
1192d85e841SAndreas Gohr
1202d85e841SAndreas Gohr    /**
1212d85e841SAndreas Gohr     * Drop-in replacement for ksort().
1222d85e841SAndreas Gohr     * It uses a collator-based sort, or ksort() with flags SORT_NATURAL and SORT_FLAG_CASE as a fallback.
1232d85e841SAndreas Gohr     *
1242d85e841SAndreas Gohr     * @param array $array The input array.
1252d85e841SAndreas Gohr     * @return bool Returns true on success or false on failure.
1262d85e841SAndreas Gohr     *
1272d85e841SAndreas Gohr     * @author Moisés Braga Ribeiro <moisesbr@gmail.com>
1282d85e841SAndreas Gohr     */
1292d85e841SAndreas Gohr    public static function ksort(&$array)
1302d85e841SAndreas Gohr    {
1312d85e841SAndreas Gohr        $collator = self::getCollator();
1322d85e841SAndreas Gohr        if (isset($collator)) {
133e025be72SAndreas Gohr            return uksort($array, [$collator, 'compare']);
1342d85e841SAndreas Gohr        } else {
1352d85e841SAndreas Gohr            return ksort($array, SORT_NATURAL | SORT_FLAG_CASE);
1362d85e841SAndreas Gohr        }
1372d85e841SAndreas Gohr    }
1382d85e841SAndreas Gohr
1392d85e841SAndreas Gohr    /**
1402d85e841SAndreas Gohr     * Drop-in replacement for asort(), natsort() and natcasesort().
1412d85e841SAndreas Gohr     * It uses a collator-based sort, or asort() with flags SORT_NATURAL and SORT_FLAG_CASE as a fallback.
1422d85e841SAndreas Gohr     *
1432d85e841SAndreas Gohr     * @param array $array The input array.
1442d85e841SAndreas Gohr     * @return bool Returns true on success or false on failure.
1452d85e841SAndreas Gohr     *
1462d85e841SAndreas Gohr     * @author Moisés Braga Ribeiro <moisesbr@gmail.com>
1472d85e841SAndreas Gohr     */
1482d85e841SAndreas Gohr    public static function asort(&$array)
1492d85e841SAndreas Gohr    {
1502d85e841SAndreas Gohr        $collator = self::getCollator();
1512d85e841SAndreas Gohr        if (isset($collator)) {
1522d85e841SAndreas Gohr            return $collator->asort($array);
1532d85e841SAndreas Gohr        } else {
1542d85e841SAndreas Gohr            return asort($array, SORT_NATURAL | SORT_FLAG_CASE);
1552d85e841SAndreas Gohr        }
1562d85e841SAndreas Gohr    }
1572d85e841SAndreas Gohr
1582d85e841SAndreas Gohr    /**
1592d85e841SAndreas Gohr     * Drop-in replacement for asort(), natsort() and natcasesort() when the parameter is an array of filenames.
1602d85e841SAndreas Gohr     * Filenames may not be equal to page names, depending on the setting in $conf['fnencode'],
1612d85e841SAndreas Gohr     * so the correct behavior is to sort page names and reflect this sorting in the filename array.
1622d85e841SAndreas Gohr     *
1632d85e841SAndreas Gohr     * @param array $array The input array.
1642d85e841SAndreas Gohr     * @return bool Returns true on success or false on failure.
1652d85e841SAndreas Gohr     *
1662d85e841SAndreas Gohr     * @author Moisés Braga Ribeiro <moisesbr@gmail.com>
1670489c64bSMoisés Braga Ribeiro     * @author Andreas Gohr <andi@splitbrain.org>
1682d85e841SAndreas Gohr     */
1692d85e841SAndreas Gohr    public static function asortFN(&$array)
1702d85e841SAndreas Gohr    {
1712d85e841SAndreas Gohr        $collator = self::getCollator();
1722d85e841SAndreas Gohr        return uasort($array, function ($fn1, $fn2) use ($collator) {
1732d85e841SAndreas Gohr            if (isset($collator)) {
1742d85e841SAndreas Gohr                return $collator->compare(utf8_decodeFN($fn1), utf8_decodeFN($fn2));
1752d85e841SAndreas Gohr            } else {
1762d85e841SAndreas Gohr                return strnatcasecmp(utf8_decodeFN($fn1), utf8_decodeFN($fn2));
1772d85e841SAndreas Gohr            }
1782d85e841SAndreas Gohr        });
1792d85e841SAndreas Gohr    }
1802d85e841SAndreas Gohr}
181