xref: /dokuwiki/inc/Utf8/Sort.php (revision 704bf768643b49d040d9b4f00641e69c9f24dfa3)
12d85e841SAndreas Gohr<?php
22d85e841SAndreas Gohr
32d85e841SAndreas Gohrnamespace dokuwiki\Utf8;
42d85e841SAndreas Gohr
5*704bf768SAndreas Gohruse dokuwiki\Logger;
6*704bf768SAndreas Gohr
72d85e841SAndreas Gohr/**
82d85e841SAndreas Gohr * DokuWiki sort functions
92d85e841SAndreas Gohr *
102d85e841SAndreas Gohr * When "intl" extension is available, all sorts are done using a collator.
112d85e841SAndreas Gohr * Otherwise, primitive PHP functions are called.
122d85e841SAndreas Gohr *
132d85e841SAndreas Gohr * The collator is created using the locale given in $conf['lang'].
142d85e841SAndreas Gohr * It always uses case insensitive "natural" ordering in its collation.
152d85e841SAndreas Gohr * The fallback solution uses the primitive PHP functions that return almost the same results
162d85e841SAndreas Gohr * when the input is text with only [A-Za-z0-9] characters.
172d85e841SAndreas Gohr *
182d85e841SAndreas Gohr * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
192d85e841SAndreas Gohr * @author     Moisés Braga Ribeiro <moisesbr@gmail.com>
200489c64bSMoisés Braga Ribeiro * @author     Andreas Gohr <andi@splitbrain.org>
212d85e841SAndreas Gohr */
222d85e841SAndreas Gohrclass Sort
232d85e841SAndreas Gohr{
242d85e841SAndreas Gohr    /** @var \Collator[] language specific collators, usually only one */
250489c64bSMoisés Braga Ribeiro    protected static $collators = [];
26f9aa34a3SAndreas Gohr
27f9aa34a3SAndreas Gohr    /** @var bool should the intl extension be used if available? For testing only */
28f9aa34a3SAndreas Gohr    protected static $useIntl = true;
292d85e841SAndreas Gohr
302d85e841SAndreas Gohr    /**
312d85e841SAndreas Gohr     * Initialization of a collator using $conf['lang'] as the locale.
320489c64bSMoisés Braga Ribeiro     * The initialization is done only once.
332d85e841SAndreas Gohr     * The collation takes "natural ordering" into account, that is, "page 2" is before "page 10".
342d85e841SAndreas Gohr     *
352d85e841SAndreas Gohr     * @return \Collator Returns a configured collator or null if the collator cannot be created.
362d85e841SAndreas Gohr     *
372d85e841SAndreas Gohr     * @author Moisés Braga Ribeiro <moisesbr@gmail.com>
382d85e841SAndreas Gohr     */
390489c64bSMoisés Braga Ribeiro    protected static function getCollator()
402d85e841SAndreas Gohr    {
412d85e841SAndreas Gohr        global $conf;
422d85e841SAndreas Gohr        $lc = $conf['lang'];
432d85e841SAndreas Gohr
442d85e841SAndreas Gohr        // check if intl extension is available
45f9aa34a3SAndreas Gohr        if (!self::$useIntl || !class_exists('\Collator')) {
462d85e841SAndreas Gohr            return null;
472d85e841SAndreas Gohr        }
482d85e841SAndreas Gohr
492d85e841SAndreas Gohr        // load collator if not available yet
500489c64bSMoisés Braga Ribeiro        if (!isset(self::$collators[$lc])) {
512d85e841SAndreas Gohr            $collator = \Collator::create($lc);
520489c64bSMoisés Braga Ribeiro            if (!isset($collator)) return null; // check needed as stated in the docs
532d85e841SAndreas Gohr            $collator->setAttribute(\Collator::NUMERIC_COLLATION, \Collator::ON);
54*704bf768SAndreas Gohr            Logger::getInstance(Logger::LOG_DEBUG)->log(
55*704bf768SAndreas Gohr                'Collator created with locale "' . $lc . '": numeric collation on, ' .
560489c64bSMoisés Braga Ribeiro                'valid locale "' . $collator->getLocale(\Locale::VALID_LOCALE) . '", ' .
57*704bf768SAndreas Gohr                'actual locale "' . $collator->getLocale(\Locale::ACTUAL_LOCALE) . '"',
58*704bf768SAndreas Gohr                null, __FILE__, __LINE__
59*704bf768SAndreas Gohr            );
600489c64bSMoisés Braga Ribeiro            self::$collators[$lc] = $collator;
612d85e841SAndreas Gohr        }
622d85e841SAndreas Gohr
630489c64bSMoisés Braga Ribeiro        return self::$collators[$lc];
642d85e841SAndreas Gohr    }
652d85e841SAndreas Gohr
662d85e841SAndreas Gohr    /**
670489c64bSMoisés Braga Ribeiro     * Enable or disable the use of the "intl" extension collator.
680489c64bSMoisés Braga Ribeiro     * This is used for testing and should not be used in normal code.
69f9aa34a3SAndreas Gohr     *
70f9aa34a3SAndreas Gohr     * @param bool $use
710489c64bSMoisés Braga Ribeiro     *
720489c64bSMoisés Braga Ribeiro     * @author Andreas Gohr <andi@splitbrain.org>
73f9aa34a3SAndreas Gohr     */
74f9aa34a3SAndreas Gohr    public static function useIntl($use = true)
75f9aa34a3SAndreas Gohr    {
76f9aa34a3SAndreas Gohr        self::$useIntl = $use;
77f9aa34a3SAndreas Gohr    }
78f9aa34a3SAndreas Gohr
79f9aa34a3SAndreas Gohr    /**
802d85e841SAndreas Gohr     * Drop-in replacement for strcmp(), strcasecmp(), strnatcmp() and strnatcasecmp().
812d85e841SAndreas Gohr     * It uses a collator-based comparison, or strnatcasecmp() as a fallback.
822d85e841SAndreas Gohr     *
832d85e841SAndreas Gohr     * @param string $str1 The first string.
842d85e841SAndreas Gohr     * @param string $str2 The second string.
852d85e841SAndreas Gohr     * @return int Returns < 0 if $str1 is less than $str2; > 0 if $str1 is greater than $str2, and 0 if they are equal.
862d85e841SAndreas Gohr     *
872d85e841SAndreas Gohr     * @author Moisés Braga Ribeiro <moisesbr@gmail.com>
882d85e841SAndreas Gohr     */
892d85e841SAndreas Gohr    public static function strcmp($str1, $str2)
902d85e841SAndreas Gohr    {
912d85e841SAndreas Gohr        $collator = self::getCollator();
922d85e841SAndreas Gohr        if (isset($collator)) {
932d85e841SAndreas Gohr            return $collator->compare($str1, $str2);
942d85e841SAndreas Gohr        } else {
952d85e841SAndreas Gohr            return strnatcasecmp($str1, $str2);
962d85e841SAndreas Gohr        }
972d85e841SAndreas Gohr    }
982d85e841SAndreas Gohr
992d85e841SAndreas Gohr    /**
1002d85e841SAndreas Gohr     * Drop-in replacement for sort().
1012d85e841SAndreas Gohr     * It uses a collator-based sort, or sort() with flags SORT_NATURAL and SORT_FLAG_CASE as a fallback.
1022d85e841SAndreas Gohr     *
1032d85e841SAndreas Gohr     * @param array $array The input array.
1042d85e841SAndreas Gohr     * @return bool Returns true on success or false on failure.
1052d85e841SAndreas Gohr     *
1062d85e841SAndreas Gohr     * @author Moisés Braga Ribeiro <moisesbr@gmail.com>
1072d85e841SAndreas Gohr     */
1082d85e841SAndreas Gohr    public static function sort(&$array)
1092d85e841SAndreas Gohr    {
1102d85e841SAndreas Gohr        $collator = self::getCollator();
1112d85e841SAndreas Gohr        if (isset($collator)) {
1122d85e841SAndreas Gohr            return $collator->sort($array);
1132d85e841SAndreas Gohr        } else {
1142d85e841SAndreas Gohr            return sort($array, SORT_NATURAL | SORT_FLAG_CASE);
1152d85e841SAndreas Gohr        }
1162d85e841SAndreas Gohr    }
1172d85e841SAndreas Gohr
1182d85e841SAndreas Gohr    /**
1192d85e841SAndreas Gohr     * Drop-in replacement for ksort().
1202d85e841SAndreas Gohr     * It uses a collator-based sort, or ksort() with flags SORT_NATURAL and SORT_FLAG_CASE as a fallback.
1212d85e841SAndreas Gohr     *
1222d85e841SAndreas Gohr     * @param array $array The input array.
1232d85e841SAndreas Gohr     * @return bool Returns true on success or false on failure.
1242d85e841SAndreas Gohr     *
1252d85e841SAndreas Gohr     * @author Moisés Braga Ribeiro <moisesbr@gmail.com>
1262d85e841SAndreas Gohr     */
1272d85e841SAndreas Gohr    public static function ksort(&$array)
1282d85e841SAndreas Gohr    {
1292d85e841SAndreas Gohr        $collator = self::getCollator();
1302d85e841SAndreas Gohr        if (isset($collator)) {
1312d85e841SAndreas Gohr            return uksort($array, array($collator, 'compare'));
1322d85e841SAndreas Gohr        } else {
1332d85e841SAndreas Gohr            return ksort($array, SORT_NATURAL | SORT_FLAG_CASE);
1342d85e841SAndreas Gohr        }
1352d85e841SAndreas Gohr    }
1362d85e841SAndreas Gohr
1372d85e841SAndreas Gohr    /**
1382d85e841SAndreas Gohr     * Drop-in replacement for asort(), natsort() and natcasesort().
1392d85e841SAndreas Gohr     * It uses a collator-based sort, or asort() with flags SORT_NATURAL and SORT_FLAG_CASE as a fallback.
1402d85e841SAndreas Gohr     *
1412d85e841SAndreas Gohr     * @param array $array The input array.
1422d85e841SAndreas Gohr     * @return bool Returns true on success or false on failure.
1432d85e841SAndreas Gohr     *
1442d85e841SAndreas Gohr     * @author Moisés Braga Ribeiro <moisesbr@gmail.com>
1452d85e841SAndreas Gohr     */
1462d85e841SAndreas Gohr    public static function asort(&$array)
1472d85e841SAndreas Gohr    {
1482d85e841SAndreas Gohr        $collator = self::getCollator();
1492d85e841SAndreas Gohr        if (isset($collator)) {
1502d85e841SAndreas Gohr            return $collator->asort($array);
1512d85e841SAndreas Gohr        } else {
1522d85e841SAndreas Gohr            return asort($array, SORT_NATURAL | SORT_FLAG_CASE);
1532d85e841SAndreas Gohr        }
1542d85e841SAndreas Gohr    }
1552d85e841SAndreas Gohr
1562d85e841SAndreas Gohr    /**
1572d85e841SAndreas Gohr     * Drop-in replacement for asort(), natsort() and natcasesort() when the parameter is an array of filenames.
1582d85e841SAndreas Gohr     * Filenames may not be equal to page names, depending on the setting in $conf['fnencode'],
1592d85e841SAndreas Gohr     * so the correct behavior is to sort page names and reflect this sorting in the filename array.
1602d85e841SAndreas Gohr     *
1612d85e841SAndreas Gohr     * @param array $array The input array.
1622d85e841SAndreas Gohr     * @return bool Returns true on success or false on failure.
1632d85e841SAndreas Gohr     *
1642d85e841SAndreas Gohr     * @author Moisés Braga Ribeiro <moisesbr@gmail.com>
1650489c64bSMoisés Braga Ribeiro     * @author Andreas Gohr <andi@splitbrain.org>
1662d85e841SAndreas Gohr     */
1672d85e841SAndreas Gohr    public static function asortFN(&$array)
1682d85e841SAndreas Gohr    {
1692d85e841SAndreas Gohr        $collator = self::getCollator();
1702d85e841SAndreas Gohr        return uasort($array, function ($fn1, $fn2) use ($collator) {
1712d85e841SAndreas Gohr            if (isset($collator)) {
1722d85e841SAndreas Gohr                return $collator->compare(utf8_decodeFN($fn1), utf8_decodeFN($fn2));
1732d85e841SAndreas Gohr            } else {
1742d85e841SAndreas Gohr                return strnatcasecmp(utf8_decodeFN($fn1), utf8_decodeFN($fn2));
1752d85e841SAndreas Gohr            }
1762d85e841SAndreas Gohr        });
1772d85e841SAndreas Gohr    }
1782d85e841SAndreas Gohr}
179