137748cd8SNickeau<?php 237748cd8SNickeau 337748cd8SNickeaunamespace ComboStrap; 437748cd8SNickeau 504fd306cSNickeauuse ComboStrap\Web\Url; 604fd306cSNickeau 737748cd8SNickeau/** 837748cd8SNickeau * Class StringUtility 937748cd8SNickeau * @package ComboStrap 1037748cd8SNickeau * A class with string utility 1137748cd8SNickeau */ 1237748cd8SNickeauclass StringUtility 1337748cd8SNickeau{ 1437748cd8SNickeau 154cadd4f8SNickeau public const SEPARATORS_CHARACTERS = [".", "(", ")", ",", "-"]; 164cadd4f8SNickeau 1737748cd8SNickeau 1837748cd8SNickeau /** 1937748cd8SNickeau * Generate a text with a max length of $length 2037748cd8SNickeau * and add ... if above 2137748cd8SNickeau * @param $myString 2237748cd8SNickeau * @param $length 2337748cd8SNickeau * @return string 2437748cd8SNickeau */ 251fa8c418SNickeau static function truncateString($myString, $length): string 2637748cd8SNickeau { 2737748cd8SNickeau 2837748cd8SNickeau if (strlen($myString) > $length) { 2937748cd8SNickeau $suffix = ' ...'; 3037748cd8SNickeau $myString = substr($myString, 0, ($length - 1) - strlen($suffix)) . $suffix; 3137748cd8SNickeau } 3237748cd8SNickeau return $myString; 3337748cd8SNickeau } 3437748cd8SNickeau 3537748cd8SNickeau /** 3637748cd8SNickeau * @param $string 3737748cd8SNickeau * @return string - the string without any carriage return 3837748cd8SNickeau * Used to compare string without worrying about carriage return 3937748cd8SNickeau */ 4037748cd8SNickeau public static function normalized($string) 4137748cd8SNickeau { 4237748cd8SNickeau return str_replace("\n", "", $string); 4337748cd8SNickeau } 4437748cd8SNickeau 4537748cd8SNickeau /** 4637748cd8SNickeau * @param $needle 4737748cd8SNickeau * @param $haystack 4837748cd8SNickeau * @return bool 4937748cd8SNickeau */ 5037748cd8SNickeau public static function contain($needle, $haystack) 5137748cd8SNickeau { 5237748cd8SNickeau $pos = strpos($haystack, $needle); 5337748cd8SNickeau if ($pos === FALSE) { 5437748cd8SNickeau return false; 5537748cd8SNickeau } else { 5637748cd8SNickeau return true; 5737748cd8SNickeau } 5837748cd8SNickeau } 5937748cd8SNickeau 6037748cd8SNickeau public static function toString($value) 6137748cd8SNickeau { 6237748cd8SNickeau /** 6337748cd8SNickeau * No transformation if it's a string 6437748cd8SNickeau * var_export below is not idempotent 6537748cd8SNickeau * ie \ would become \\ 6637748cd8SNickeau */ 6737748cd8SNickeau if (is_string($value)) { 6837748cd8SNickeau return $value; 6937748cd8SNickeau } 7037748cd8SNickeau 71c3437056SNickeau if (is_array($value)) { 7237748cd8SNickeau $string = var_export($value, true); 7337748cd8SNickeau 7437748cd8SNickeau // An array value gets command in var_export 7537748cd8SNickeau $lastCharacterIndex = strlen($string) - 1; 7637748cd8SNickeau if ($string[0] === "'" && $string[$lastCharacterIndex] === "'") { 7737748cd8SNickeau $string = substr($string, 1, strlen($string) - 2); 7837748cd8SNickeau } 7937748cd8SNickeau return $string; 80c3437056SNickeau } 81c3437056SNickeau 82c3437056SNickeau if (is_object($value)) { 83c3437056SNickeau if (method_exists($value, "__toString")) { 84c3437056SNickeau return strval($value); 85c3437056SNickeau } else { 86c3437056SNickeau return get_class($value); 87c3437056SNickeau } 88c3437056SNickeau } 89c3437056SNickeau 90c3437056SNickeau if (is_numeric($value)) { 91c3437056SNickeau return strval($value); 92c3437056SNickeau } 93c3437056SNickeau 94c3437056SNickeau if (is_bool($value)) { 95c3437056SNickeau return var_export($value, true); 96c3437056SNickeau } 97c3437056SNickeau 98c3437056SNickeau $string = var_export($value, true); 99c3437056SNickeau LogUtility::msg("The type of the value ($string) is unknown and could not be properly cast to string", LogUtility::LVL_MSG_WARNING); 100c3437056SNickeau return $string; 10137748cd8SNickeau 10237748cd8SNickeau } 10337748cd8SNickeau 10437748cd8SNickeau /** 10537748cd8SNickeau * Add an EOL if not present at the end of the string 10637748cd8SNickeau * @param $doc 10737748cd8SNickeau */ 10837748cd8SNickeau public static function addEolCharacterIfNotPresent(&$doc) 10937748cd8SNickeau { 110*70bbd7f1Sgerardnico $strlen = strlen($doc); 111*70bbd7f1Sgerardnico if ($strlen < 1) { 112*70bbd7f1Sgerardnico return; 113*70bbd7f1Sgerardnico } 114*70bbd7f1Sgerardnico if ($doc[$strlen - 1] != DOKU_LF) { 11537748cd8SNickeau $doc .= DOKU_LF; 11637748cd8SNickeau } 11737748cd8SNickeau } 11837748cd8SNickeau 11937748cd8SNickeau /** 12037748cd8SNickeau * Delete the string from the end 12137748cd8SNickeau * This is used generally to delete the previous opening tag of an header or a blockquote 12237748cd8SNickeau * @param $doc 12337748cd8SNickeau * @param $string 12437748cd8SNickeau */ 12537748cd8SNickeau public static function rtrim(&$doc, $string) 12637748cd8SNickeau { 12737748cd8SNickeau 12837748cd8SNickeau /** 12937748cd8SNickeau * We trim because in the process, we may get extra {@link DOKU_LF} at the end 13037748cd8SNickeau */ 13137748cd8SNickeau $doc = trim($doc); 13237748cd8SNickeau $string = trim($string); 13337748cd8SNickeau $length = strlen($doc) - strlen($string); 13437748cd8SNickeau if (substr($doc, $length) === $string) { 13537748cd8SNickeau $doc = substr($doc, 0, $length); 13637748cd8SNickeau } 13737748cd8SNickeau 13837748cd8SNickeau } 13937748cd8SNickeau 14037748cd8SNickeau /** 14137748cd8SNickeau * Delete the string from the beginning 14237748cd8SNickeau * This is used to delete a tag for instance 14337748cd8SNickeau * @param $doc 14437748cd8SNickeau * @param $string 14537748cd8SNickeau */ 14637748cd8SNickeau public static function ltrim(&$doc, $string) 14737748cd8SNickeau { 14837748cd8SNickeau 14937748cd8SNickeau $doc = trim($doc); 15037748cd8SNickeau $string = trim($string); 15137748cd8SNickeau $length = strlen($string); 15237748cd8SNickeau if (substr($doc, 0, $length) === $string) { 15337748cd8SNickeau $doc = substr($doc, $length); 15437748cd8SNickeau } 15537748cd8SNickeau 15637748cd8SNickeau } 15737748cd8SNickeau 15837748cd8SNickeau /** 15937748cd8SNickeau * The word count does not take into account 16037748cd8SNickeau * words with non-words characters such as < = 16137748cd8SNickeau * Therefore the node <node> and attribute name=value are not taken in the count 16237748cd8SNickeau * @param $text 16337748cd8SNickeau * @return int the number of words 16437748cd8SNickeau */ 16537748cd8SNickeau public static function getWordCount($text) 16637748cd8SNickeau { 16737748cd8SNickeau /** 16837748cd8SNickeau * Delete the frontmatter 16937748cd8SNickeau */ 17037748cd8SNickeau $text = preg_replace("/^---(json)?$.*^---$/Ums", "", $text); 17137748cd8SNickeau /** 17237748cd8SNickeau * New line for node 17337748cd8SNickeau */ 17437748cd8SNickeau $text = str_replace("<", "\n<", $text); 17537748cd8SNickeau $text = str_replace(">", ">\n", $text); 17637748cd8SNickeau // \s shorthand for whitespace 17737748cd8SNickeau // | the table and links are separated with a | 17837748cd8SNickeau // / to take into account expression such as and/or 17937748cd8SNickeau // /u for unicode support (https://www.php.net/manual/en/reference.pcre.pattern.modifiers.php) 18037748cd8SNickeau $wordSeparator = '/[\s|\/]/u'; 18137748cd8SNickeau $preg_split = preg_split($wordSeparator, $text); 182*70bbd7f1Sgerardnico $wordsWithoutEmpty = array_filter($preg_split, self::class . '::isWord'); 18337748cd8SNickeau return count($wordsWithoutEmpty); 18437748cd8SNickeau } 18537748cd8SNickeau 18637748cd8SNickeau public static function normalize($expected) 18737748cd8SNickeau { 18837748cd8SNickeau $expected = preg_replace("/[\s]/", " ", $expected); 18937748cd8SNickeau $expected = str_replace(" ", " ", $expected); 19037748cd8SNickeau $expected = str_replace(" ", " ", $expected); 19137748cd8SNickeau $expected = str_replace(" ", " ", $expected); 19237748cd8SNickeau $expected = str_replace(" ", " ", $expected); 19337748cd8SNickeau return trim($expected); 19437748cd8SNickeau 19537748cd8SNickeau } 19637748cd8SNickeau 19737748cd8SNickeau /** 19837748cd8SNickeau * @param $text 19937748cd8SNickeau * @return bool 20037748cd8SNickeau */ 20137748cd8SNickeau public static function isWord($text) 20237748cd8SNickeau { 20337748cd8SNickeau if (empty($text)) { 20437748cd8SNickeau return false; 20537748cd8SNickeau } 20637748cd8SNickeau /** 20737748cd8SNickeau * We also allow `-` minus 20837748cd8SNickeau * 20937748cd8SNickeau * And because otherwise the words are not counted: 21037748cd8SNickeau * * `'` (used to highlight words) 21137748cd8SNickeau * * `[]` used in links 21237748cd8SNickeau * * `,` used at the end of a sentenct 21337748cd8SNickeau */ 21437748cd8SNickeau $preg_match = preg_match("/^[\w\-'\]\[,]*$/u", $text); 21537748cd8SNickeau return $preg_match == 1; 21637748cd8SNickeau } 21737748cd8SNickeau 21837748cd8SNickeau public static function match($subject, $pattern) 21937748cd8SNickeau { 22037748cd8SNickeau return preg_match("/$pattern/", $subject) === 1; 22137748cd8SNickeau } 22237748cd8SNickeau 22337748cd8SNickeau public static function endWiths($string, $suffix) 22437748cd8SNickeau { 22537748cd8SNickeau $suffixStartPosition = strlen($string) - strlen($suffix); 22637748cd8SNickeau return strrpos($string, $suffix) === $suffixStartPosition; 22737748cd8SNickeau } 22837748cd8SNickeau 22937748cd8SNickeau public static function explodeAndTrim($string, $delimiter = ",") 23037748cd8SNickeau { 23137748cd8SNickeau return array_map('trim', explode($delimiter, $string)); 23237748cd8SNickeau } 23337748cd8SNickeau 23437748cd8SNickeau public static function lastIndexOf($haystack, $needle) 23537748cd8SNickeau { 23637748cd8SNickeau /** 23737748cd8SNickeau * strRpos 23837748cd8SNickeau * and not strpos 23937748cd8SNickeau */ 24037748cd8SNickeau return strrpos($haystack, $needle); 24137748cd8SNickeau } 24237748cd8SNickeau 24337748cd8SNickeau public static function startWiths($string, $prefix) 24437748cd8SNickeau { 24537748cd8SNickeau return strrpos($string, $prefix) === 0; 24637748cd8SNickeau } 24737748cd8SNickeau 2484cadd4f8SNickeau /** 2494cadd4f8SNickeau * @param $string 2504cadd4f8SNickeau * @param null $separatorsCharacters - characters that will separate the words 2514cadd4f8SNickeau * @return array a words 2524cadd4f8SNickeau */ 2534cadd4f8SNickeau public static function getWords($string, $separatorsCharacters = null): array 2544cadd4f8SNickeau { 2554cadd4f8SNickeau // Reserved characters to space 2564cadd4f8SNickeau if ($separatorsCharacters === null) { 2574cadd4f8SNickeau $separatorsCharacters = StringUtility::getAllSeparators(); 2584cadd4f8SNickeau } 2594cadd4f8SNickeau if (!is_array($separatorsCharacters)) { 2604cadd4f8SNickeau LogUtility::msg("The separators characters are not an array, default characters used"); 2614cadd4f8SNickeau $separatorsCharacters = StringUtility::getAllSeparators(); 2624cadd4f8SNickeau } 2634cadd4f8SNickeau 2644cadd4f8SNickeau $string = str_replace($separatorsCharacters, " ", $string); 2654cadd4f8SNickeau // Doubles spaces to space 2664cadd4f8SNickeau $string = preg_replace("/\s{2,}/", " ", $string); 2674cadd4f8SNickeau // Trim space 2684cadd4f8SNickeau $string = trim($string); 2694cadd4f8SNickeau 2704cadd4f8SNickeau return explode(" ", $string); 2714cadd4f8SNickeau } 2724cadd4f8SNickeau 2734cadd4f8SNickeau private static function getAllSeparators(): array 2744cadd4f8SNickeau { 2754cadd4f8SNickeau return array_merge( 2764cadd4f8SNickeau Url::RESERVED_WORDS, 2774cadd4f8SNickeau LocalPath::RESERVED_WINDOWS_CHARACTERS, 2784cadd4f8SNickeau StringUtility::SEPARATORS_CHARACTERS 2794cadd4f8SNickeau ); 2804cadd4f8SNickeau } 2814cadd4f8SNickeau 28237748cd8SNickeau} 283