xref: /plugin/combo/ComboStrap/StringUtility.php (revision 04fd306c7c155fa133ebb3669986875d65988276)
137748cd8SNickeau<?php
237748cd8SNickeau
337748cd8SNickeaunamespace ComboStrap;
437748cd8SNickeau
5*04fd306cSNickeauuse ComboStrap\Web\Url;
6*04fd306cSNickeau
737748cd8SNickeau/**
837748cd8SNickeau * Class StringUtility
937748cd8SNickeau * @package ComboStrap
1037748cd8SNickeau * A class with string utility
1137748cd8SNickeau */
1237748cd8SNickeauclass StringUtility
1337748cd8SNickeau{
1437748cd8SNickeau
154cadd4f8SNickeau    public const SEPARATORS_CHARACTERS = [".", "(", ")", ",", "-"];
164cadd4f8SNickeau
1737748cd8SNickeau
1837748cd8SNickeau    /**
1937748cd8SNickeau     * Generate a text with a max length of $length
2037748cd8SNickeau     * and add ... if above
2137748cd8SNickeau     * @param $myString
2237748cd8SNickeau     * @param $length
2337748cd8SNickeau     * @return string
2437748cd8SNickeau     */
251fa8c418SNickeau    static function truncateString($myString, $length): string
2637748cd8SNickeau    {
2737748cd8SNickeau
2837748cd8SNickeau        if (strlen($myString) > $length) {
2937748cd8SNickeau            $suffix = ' ...';
3037748cd8SNickeau            $myString = substr($myString, 0, ($length - 1) - strlen($suffix)) . $suffix;
3137748cd8SNickeau        }
3237748cd8SNickeau        return $myString;
3337748cd8SNickeau    }
3437748cd8SNickeau
3537748cd8SNickeau    /**
3637748cd8SNickeau     * @param $string
3737748cd8SNickeau     * @return string - the string without any carriage return
3837748cd8SNickeau     * Used to compare string without worrying about carriage return
3937748cd8SNickeau     */
4037748cd8SNickeau    public static function normalized($string)
4137748cd8SNickeau    {
4237748cd8SNickeau        return str_replace("\n", "", $string);
4337748cd8SNickeau    }
4437748cd8SNickeau
4537748cd8SNickeau    /**
4637748cd8SNickeau     * @param $needle
4737748cd8SNickeau     * @param $haystack
4837748cd8SNickeau     * @return bool
4937748cd8SNickeau     */
5037748cd8SNickeau    public static function contain($needle, $haystack)
5137748cd8SNickeau    {
5237748cd8SNickeau        $pos = strpos($haystack, $needle);
5337748cd8SNickeau        if ($pos === FALSE) {
5437748cd8SNickeau            return false;
5537748cd8SNickeau        } else {
5637748cd8SNickeau            return true;
5737748cd8SNickeau        }
5837748cd8SNickeau    }
5937748cd8SNickeau
6037748cd8SNickeau    public static function toString($value)
6137748cd8SNickeau    {
6237748cd8SNickeau        /**
6337748cd8SNickeau         * No transformation if it's a string
6437748cd8SNickeau         * var_export below is not idempotent
6537748cd8SNickeau         * ie \ would become \\
6637748cd8SNickeau         */
6737748cd8SNickeau        if (is_string($value)) {
6837748cd8SNickeau            return $value;
6937748cd8SNickeau        }
7037748cd8SNickeau
71c3437056SNickeau        if (is_array($value)) {
7237748cd8SNickeau            $string = var_export($value, true);
7337748cd8SNickeau
7437748cd8SNickeau            // An array value gets command in var_export
7537748cd8SNickeau            $lastCharacterIndex = strlen($string) - 1;
7637748cd8SNickeau            if ($string[0] === "'" && $string[$lastCharacterIndex] === "'") {
7737748cd8SNickeau                $string = substr($string, 1, strlen($string) - 2);
7837748cd8SNickeau            }
7937748cd8SNickeau            return $string;
80c3437056SNickeau        }
81c3437056SNickeau
82c3437056SNickeau        if (is_object($value)) {
83c3437056SNickeau            if (method_exists($value, "__toString")) {
84c3437056SNickeau                return strval($value);
85c3437056SNickeau            } else {
86c3437056SNickeau                return get_class($value);
87c3437056SNickeau            }
88c3437056SNickeau        }
89c3437056SNickeau
90c3437056SNickeau        if (is_numeric($value)) {
91c3437056SNickeau            return strval($value);
92c3437056SNickeau        }
93c3437056SNickeau
94c3437056SNickeau        if (is_bool($value)) {
95c3437056SNickeau            return var_export($value, true);
96c3437056SNickeau        }
97c3437056SNickeau
98c3437056SNickeau        $string = var_export($value, true);
99c3437056SNickeau        LogUtility::msg("The type of the value ($string) is unknown and could not be properly cast to string", LogUtility::LVL_MSG_WARNING);
100c3437056SNickeau        return $string;
10137748cd8SNickeau
10237748cd8SNickeau    }
10337748cd8SNickeau
10437748cd8SNickeau    /**
10537748cd8SNickeau     * Add an EOL if not present at the end of the string
10637748cd8SNickeau     * @param $doc
10737748cd8SNickeau     */
10837748cd8SNickeau    public static function addEolCharacterIfNotPresent(&$doc)
10937748cd8SNickeau    {
11037748cd8SNickeau        if ($doc[strlen($doc) - 1] != DOKU_LF) {
11137748cd8SNickeau            $doc .= DOKU_LF;
11237748cd8SNickeau        }
11337748cd8SNickeau    }
11437748cd8SNickeau
11537748cd8SNickeau    /**
11637748cd8SNickeau     * Delete the string from the end
11737748cd8SNickeau     * This is used generally to delete the previous opening tag of an header or a blockquote
11837748cd8SNickeau     * @param $doc
11937748cd8SNickeau     * @param $string
12037748cd8SNickeau     */
12137748cd8SNickeau    public static function rtrim(&$doc, $string)
12237748cd8SNickeau    {
12337748cd8SNickeau
12437748cd8SNickeau        /**
12537748cd8SNickeau         * We trim because in the process, we may get extra {@link DOKU_LF} at the end
12637748cd8SNickeau         */
12737748cd8SNickeau        $doc = trim($doc);
12837748cd8SNickeau        $string = trim($string);
12937748cd8SNickeau        $length = strlen($doc) - strlen($string);
13037748cd8SNickeau        if (substr($doc, $length) === $string) {
13137748cd8SNickeau            $doc = substr($doc, 0, $length);
13237748cd8SNickeau        }
13337748cd8SNickeau
13437748cd8SNickeau    }
13537748cd8SNickeau
13637748cd8SNickeau    /**
13737748cd8SNickeau     * Delete the string from the beginning
13837748cd8SNickeau     * This is used to delete a tag for instance
13937748cd8SNickeau     * @param $doc
14037748cd8SNickeau     * @param $string
14137748cd8SNickeau     */
14237748cd8SNickeau    public static function ltrim(&$doc, $string)
14337748cd8SNickeau    {
14437748cd8SNickeau
14537748cd8SNickeau        $doc = trim($doc);
14637748cd8SNickeau        $string = trim($string);
14737748cd8SNickeau        $length = strlen($string);
14837748cd8SNickeau        if (substr($doc, 0, $length) === $string) {
14937748cd8SNickeau            $doc = substr($doc, $length);
15037748cd8SNickeau        }
15137748cd8SNickeau
15237748cd8SNickeau    }
15337748cd8SNickeau
15437748cd8SNickeau    /**
15537748cd8SNickeau     * The word count does not take into account
15637748cd8SNickeau     * words with non-words characters such as < =
15737748cd8SNickeau     * Therefore the node <node> and attribute name=value are not taken in the count
15837748cd8SNickeau     * @param $text
15937748cd8SNickeau     * @return int the number of words
16037748cd8SNickeau     */
16137748cd8SNickeau    public static function getWordCount($text)
16237748cd8SNickeau    {
16337748cd8SNickeau        /**
16437748cd8SNickeau         * Delete the frontmatter
16537748cd8SNickeau         */
16637748cd8SNickeau        $text = preg_replace("/^---(json)?$.*^---$/Ums", "", $text);
16737748cd8SNickeau        /**
16837748cd8SNickeau         * New line for node
16937748cd8SNickeau         */
17037748cd8SNickeau        $text = str_replace("<", "\n<", $text);
17137748cd8SNickeau        $text = str_replace(">", ">\n", $text);
17237748cd8SNickeau        // \s shorthand for whitespace
17337748cd8SNickeau        // | the table and links are separated with a |
17437748cd8SNickeau        // / to take into account expression such as and/or
17537748cd8SNickeau        // /u for unicode support (https://www.php.net/manual/en/reference.pcre.pattern.modifiers.php)
17637748cd8SNickeau        $wordSeparator = '/[\s|\/]/u';
17737748cd8SNickeau        $preg_split = preg_split($wordSeparator, $text);
17837748cd8SNickeau        $wordsWithoutEmpty = array_filter($preg_split, 'self::isWord');
17937748cd8SNickeau        return count($wordsWithoutEmpty);
18037748cd8SNickeau    }
18137748cd8SNickeau
18237748cd8SNickeau    public static function normalize($expected)
18337748cd8SNickeau    {
18437748cd8SNickeau        $expected = preg_replace("/[\s]/", " ", $expected);
18537748cd8SNickeau        $expected = str_replace("  ", " ", $expected);
18637748cd8SNickeau        $expected = str_replace("  ", " ", $expected);
18737748cd8SNickeau        $expected = str_replace("  ", " ", $expected);
18837748cd8SNickeau        $expected = str_replace("  ", " ", $expected);
18937748cd8SNickeau        return trim($expected);
19037748cd8SNickeau
19137748cd8SNickeau    }
19237748cd8SNickeau
19337748cd8SNickeau    /**
19437748cd8SNickeau     * @param $text
19537748cd8SNickeau     * @return bool
19637748cd8SNickeau     */
19737748cd8SNickeau    public static function isWord($text)
19837748cd8SNickeau    {
19937748cd8SNickeau        if (empty($text)) {
20037748cd8SNickeau            return false;
20137748cd8SNickeau        }
20237748cd8SNickeau        /**
20337748cd8SNickeau         * We also allow `-` minus
20437748cd8SNickeau         *
20537748cd8SNickeau         * And because otherwise the words are not counted:
20637748cd8SNickeau         *   * `'` (used to highlight words)
20737748cd8SNickeau         *   * `[]` used in links
20837748cd8SNickeau         *   * `,` used at the end of a sentenct
20937748cd8SNickeau         */
21037748cd8SNickeau        $preg_match = preg_match("/^[\w\-'\]\[,]*$/u", $text);
21137748cd8SNickeau        return $preg_match == 1;
21237748cd8SNickeau    }
21337748cd8SNickeau
21437748cd8SNickeau    public static function match($subject, $pattern)
21537748cd8SNickeau    {
21637748cd8SNickeau        return preg_match("/$pattern/", $subject) === 1;
21737748cd8SNickeau    }
21837748cd8SNickeau
21937748cd8SNickeau    public static function endWiths($string, $suffix)
22037748cd8SNickeau    {
22137748cd8SNickeau        $suffixStartPosition = strlen($string) - strlen($suffix);
22237748cd8SNickeau        return strrpos($string, $suffix) === $suffixStartPosition;
22337748cd8SNickeau    }
22437748cd8SNickeau
22537748cd8SNickeau    public static function explodeAndTrim($string, $delimiter = ",")
22637748cd8SNickeau    {
22737748cd8SNickeau        return array_map('trim', explode($delimiter, $string));
22837748cd8SNickeau    }
22937748cd8SNickeau
23037748cd8SNickeau    public static function lastIndexOf($haystack, $needle)
23137748cd8SNickeau    {
23237748cd8SNickeau        /**
23337748cd8SNickeau         * strRpos
23437748cd8SNickeau         * and not strpos
23537748cd8SNickeau         */
23637748cd8SNickeau        return strrpos($haystack, $needle);
23737748cd8SNickeau    }
23837748cd8SNickeau
23937748cd8SNickeau    public static function startWiths($string, $prefix)
24037748cd8SNickeau    {
24137748cd8SNickeau        return strrpos($string, $prefix) === 0;
24237748cd8SNickeau    }
24337748cd8SNickeau
2444cadd4f8SNickeau    /**
2454cadd4f8SNickeau     * @param $string
2464cadd4f8SNickeau     * @param null $separatorsCharacters - characters that will separate the words
2474cadd4f8SNickeau     * @return array a words
2484cadd4f8SNickeau     */
2494cadd4f8SNickeau    public static function getWords($string, $separatorsCharacters = null): array
2504cadd4f8SNickeau    {
2514cadd4f8SNickeau        // Reserved characters to space
2524cadd4f8SNickeau        if ($separatorsCharacters === null) {
2534cadd4f8SNickeau            $separatorsCharacters = StringUtility::getAllSeparators();
2544cadd4f8SNickeau        }
2554cadd4f8SNickeau        if (!is_array($separatorsCharacters)) {
2564cadd4f8SNickeau            LogUtility::msg("The separators characters are not an array, default characters used");
2574cadd4f8SNickeau            $separatorsCharacters = StringUtility::getAllSeparators();
2584cadd4f8SNickeau        }
2594cadd4f8SNickeau
2604cadd4f8SNickeau        $string = str_replace($separatorsCharacters, " ", $string);
2614cadd4f8SNickeau        // Doubles spaces to space
2624cadd4f8SNickeau        $string = preg_replace("/\s{2,}/", " ", $string);
2634cadd4f8SNickeau        // Trim space
2644cadd4f8SNickeau        $string = trim($string);
2654cadd4f8SNickeau
2664cadd4f8SNickeau        return explode(" ", $string);
2674cadd4f8SNickeau    }
2684cadd4f8SNickeau
2694cadd4f8SNickeau    private static function getAllSeparators(): array
2704cadd4f8SNickeau    {
2714cadd4f8SNickeau        return array_merge(
2724cadd4f8SNickeau            Url::RESERVED_WORDS,
2734cadd4f8SNickeau            LocalPath::RESERVED_WINDOWS_CHARACTERS,
2744cadd4f8SNickeau            StringUtility::SEPARATORS_CHARACTERS
2754cadd4f8SNickeau        );
2764cadd4f8SNickeau    }
2774cadd4f8SNickeau
27837748cd8SNickeau}
279