xref: /plugin/davcal/vendor/sabre/vobject/lib/StringUtil.php (revision a1a3b6794e0e143a4a8b51d3185ce2d339be61ab)
1*a1a3b679SAndreas Boehler<?php
2*a1a3b679SAndreas Boehler
3*a1a3b679SAndreas Boehlernamespace Sabre\VObject;
4*a1a3b679SAndreas Boehler
5*a1a3b679SAndreas Boehler/**
6*a1a3b679SAndreas Boehler * Useful utilities for working with various strings.
7*a1a3b679SAndreas Boehler *
8*a1a3b679SAndreas Boehler * @copyright Copyright (C) 2011-2015 fruux GmbH (https://fruux.com/).
9*a1a3b679SAndreas Boehler * @author Evert Pot (http://evertpot.com/)
10*a1a3b679SAndreas Boehler * @license http://sabre.io/license/ Modified BSD License
11*a1a3b679SAndreas Boehler */
12*a1a3b679SAndreas Boehlerclass StringUtil {
13*a1a3b679SAndreas Boehler
14*a1a3b679SAndreas Boehler    /**
15*a1a3b679SAndreas Boehler     * Returns true or false depending on if a string is valid UTF-8
16*a1a3b679SAndreas Boehler     *
17*a1a3b679SAndreas Boehler     * @param string $str
18*a1a3b679SAndreas Boehler     * @return bool
19*a1a3b679SAndreas Boehler     */
20*a1a3b679SAndreas Boehler    static public function isUTF8($str) {
21*a1a3b679SAndreas Boehler
22*a1a3b679SAndreas Boehler        // Control characters
23*a1a3b679SAndreas Boehler        if (preg_match('%[\x00-\x08\x0B-\x0C\x0E\x0F]%', $str)) {
24*a1a3b679SAndreas Boehler            return false;
25*a1a3b679SAndreas Boehler        }
26*a1a3b679SAndreas Boehler
27*a1a3b679SAndreas Boehler        return (bool)preg_match('%%u', $str);
28*a1a3b679SAndreas Boehler
29*a1a3b679SAndreas Boehler    }
30*a1a3b679SAndreas Boehler
31*a1a3b679SAndreas Boehler    /**
32*a1a3b679SAndreas Boehler     * This method tries its best to convert the input string to UTF-8.
33*a1a3b679SAndreas Boehler     *
34*a1a3b679SAndreas Boehler     * Currently only ISO-5991-1 input and UTF-8 input is supported, but this
35*a1a3b679SAndreas Boehler     * may be expanded upon if we receive other examples.
36*a1a3b679SAndreas Boehler     *
37*a1a3b679SAndreas Boehler     * @param string $str
38*a1a3b679SAndreas Boehler     * @return string
39*a1a3b679SAndreas Boehler     */
40*a1a3b679SAndreas Boehler    static public function convertToUTF8($str) {
41*a1a3b679SAndreas Boehler
42*a1a3b679SAndreas Boehler        $encoding = mb_detect_encoding($str , array('UTF-8','ISO-8859-1', 'WINDOWS-1252'), true);
43*a1a3b679SAndreas Boehler
44*a1a3b679SAndreas Boehler        switch($encoding) {
45*a1a3b679SAndreas Boehler            case 'ISO-8859-1' :
46*a1a3b679SAndreas Boehler                $newStr = utf8_encode($str);
47*a1a3b679SAndreas Boehler                break;
48*a1a3b679SAndreas Boehler            /* Unreachable code. Not sure yet how we can improve this
49*a1a3b679SAndreas Boehler             * situation.
50*a1a3b679SAndreas Boehler            case 'WINDOWS-1252' :
51*a1a3b679SAndreas Boehler                $newStr = iconv('cp1252', 'UTF-8', $str);
52*a1a3b679SAndreas Boehler                break;
53*a1a3b679SAndreas Boehler             */
54*a1a3b679SAndreas Boehler            default :
55*a1a3b679SAndreas Boehler                 $newStr = $str;
56*a1a3b679SAndreas Boehler
57*a1a3b679SAndreas Boehler        }
58*a1a3b679SAndreas Boehler
59*a1a3b679SAndreas Boehler        // Removing any control characters
60*a1a3b679SAndreas Boehler        return (preg_replace('%(?:[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F])%', '', $newStr));
61*a1a3b679SAndreas Boehler
62*a1a3b679SAndreas Boehler    }
63*a1a3b679SAndreas Boehler
64*a1a3b679SAndreas Boehler}
65*a1a3b679SAndreas Boehler
66