xref: /dokuwiki/inc/utf8.php (revision 0da4ba1be8e299965722c2c8103df5789cd9e880)
1ed7b5f09Sandi<?php
282257610Sandi/**
382257610Sandi * UTF8 helper functions
482257610Sandi *
5*0da4ba1bSAndreas Gohr * This file now only intitializes the UTF-8 capability detection and defines helper
6*0da4ba1bSAndreas Gohr * functions if needed. All actual code is in the \dokuwiki\Utf8 classes
7*0da4ba1bSAndreas Gohr *
882257610Sandi * @author     Andreas Gohr <andi@splitbrain.org>
982257610Sandi */
1082257610Sandi
11*0da4ba1bSAndreas Gohruse dokuwiki\Utf8\Clean;
12*0da4ba1bSAndreas Gohruse dokuwiki\Utf8\Conversion;
13*0da4ba1bSAndreas Gohruse dokuwiki\Utf8\PhpString;
14*0da4ba1bSAndreas Gohruse dokuwiki\Utf8\Unicode;
15*0da4ba1bSAndreas Gohr
16ab77016bSAndreas Gohr/**
17ab77016bSAndreas Gohr * check for mb_string support
18ab77016bSAndreas Gohr */
19ab77016bSAndreas Gohrif (!defined('UTF8_MBSTRING')) {
20ab77016bSAndreas Gohr    if (function_exists('mb_substr') && !defined('UTF8_NOMBSTRING')) {
21ab77016bSAndreas Gohr        define('UTF8_MBSTRING', 1);
22ab77016bSAndreas Gohr    } else {
23ab77016bSAndreas Gohr        define('UTF8_MBSTRING', 0);
24ab77016bSAndreas Gohr    }
25ab77016bSAndreas Gohr}
26ab77016bSAndreas Gohr
273161005dSAndreas Gohr/**
283161005dSAndreas Gohr * Check if PREG was compiled with UTF-8 support
293161005dSAndreas Gohr *
303161005dSAndreas Gohr * Without this many of the functions below will not work, so this is a minimal requirement
313161005dSAndreas Gohr */
323161005dSAndreas Gohrif (!defined('UTF8_PREGSUPPORT')) {
333161005dSAndreas Gohr    define('UTF8_PREGSUPPORT', (bool)@preg_match('/^.$/u', 'ñ'));
343161005dSAndreas Gohr}
353161005dSAndreas Gohr
363161005dSAndreas Gohr/**
373161005dSAndreas Gohr * Check if PREG was compiled with Unicode Property support
383161005dSAndreas Gohr *
393161005dSAndreas Gohr * This is not required for the functions below, but might be needed in a UTF-8 aware application
403161005dSAndreas Gohr */
413161005dSAndreas Gohrif (!defined('UTF8_PROPERTYSUPPORT')) {
423161005dSAndreas Gohr    define('UTF8_PROPERTYSUPPORT', (bool)@preg_match('/^\pL$/u', 'ñ'));
433161005dSAndreas Gohr}
443161005dSAndreas Gohr
453161005dSAndreas Gohr
46f41bbe4cSAndreas Gohrif (UTF8_MBSTRING) {
47f41bbe4cSAndreas Gohr    mb_internal_encoding('UTF-8');
48f41bbe4cSAndreas Gohr}
49f41bbe4cSAndreas Gohr
505e613a5cSchris
51df957b36SAndreas Gohrif (!function_exists('utf8_isASCII')) {
52f41bbe4cSAndreas Gohr    function utf8_isASCII($str)
53f41bbe4cSAndreas Gohr    {
54*0da4ba1bSAndreas Gohr        return Clean::isASCII($str);
5544f669e9Sandi    }
56df957b36SAndreas Gohr}
5744f669e9Sandi
58f41bbe4cSAndreas Gohr
59df957b36SAndreas Gohrif (!function_exists('utf8_strip')) {
60f41bbe4cSAndreas Gohr    function utf8_strip($str)
61f41bbe4cSAndreas Gohr    {
62*0da4ba1bSAndreas Gohr        return Clean::strip($str);
63e1906e6eSandi    }
64df957b36SAndreas Gohr}
65e1906e6eSandi
66df957b36SAndreas Gohrif (!function_exists('utf8_check')) {
67f41bbe4cSAndreas Gohr    function utf8_check($str)
68f41bbe4cSAndreas Gohr    {
69*0da4ba1bSAndreas Gohr        return Clean::isUtf8($str);
70f29bd553Sandi    }
71df957b36SAndreas Gohr}
7249c713a3Sandi
73f393a4ebSAndreas Gohrif (!function_exists('utf8_basename')) {
74f41bbe4cSAndreas Gohr    function utf8_basename($path, $suffix = '')
75f41bbe4cSAndreas Gohr    {
76*0da4ba1bSAndreas Gohr        return PhpString::basename($path, $suffix);
77f393a4ebSAndreas Gohr    }
78f393a4ebSAndreas Gohr}
79f393a4ebSAndreas Gohr
80df957b36SAndreas Gohrif (!function_exists('utf8_strlen')) {
81f41bbe4cSAndreas Gohr    function utf8_strlen($str)
82f41bbe4cSAndreas Gohr    {
83*0da4ba1bSAndreas Gohr        return PhpString::strlen($str);
842f954959Sandi    }
85df957b36SAndreas Gohr}
862f954959Sandi
87df957b36SAndreas Gohrif (!function_exists('utf8_substr')) {
88f41bbe4cSAndreas Gohr    function utf8_substr($str, $offset, $length = null)
89f41bbe4cSAndreas Gohr    {
90*0da4ba1bSAndreas Gohr        return PhpString::substr($str, $offset, $length);
912626ee0cSchris    }
92df957b36SAndreas Gohr}
9310f09f2aSAndreas Gohr
94df957b36SAndreas Gohrif (!function_exists('utf8_substr_replace')) {
95f41bbe4cSAndreas Gohr    function utf8_substr_replace($string, $replacement, $start, $length = 0)
96f41bbe4cSAndreas Gohr    {
97*0da4ba1bSAndreas Gohr        return PhpString::substr_replace($string, $replacement, $start, $length);
98dc57ef04Sandi    }
99df957b36SAndreas Gohr}
100dc57ef04Sandi
101df957b36SAndreas Gohrif (!function_exists('utf8_ltrim')) {
102f41bbe4cSAndreas Gohr    function utf8_ltrim($str, $charlist = '')
103f41bbe4cSAndreas Gohr    {
104*0da4ba1bSAndreas Gohr        return PhpString::ltrim($str, $charlist);
105f29317c1Sandi    }
106df957b36SAndreas Gohr}
107f29317c1Sandi
108df957b36SAndreas Gohrif (!function_exists('utf8_rtrim')) {
109f41bbe4cSAndreas Gohr    function utf8_rtrim($str, $charlist = '')
110f41bbe4cSAndreas Gohr    {
111*0da4ba1bSAndreas Gohr        return PhpString::rtrim($str, $charlist);
112f29317c1Sandi    }
113df957b36SAndreas Gohr}
114f29317c1Sandi
115df957b36SAndreas Gohrif (!function_exists('utf8_trim')) {
116f41bbe4cSAndreas Gohr    function utf8_trim($str, $charlist = '')
117f41bbe4cSAndreas Gohr    {
118*0da4ba1bSAndreas Gohr        return PhpString::trim($str, $charlist);
119f29317c1Sandi    }
120df957b36SAndreas Gohr}
121f29317c1Sandi
122df957b36SAndreas Gohrif (!function_exists('utf8_strtolower')) {
123f41bbe4cSAndreas Gohr    function utf8_strtolower($str)
124f41bbe4cSAndreas Gohr    {
125*0da4ba1bSAndreas Gohr        return PhpString::strtolower($str);
12682257610Sandi    }
127df957b36SAndreas Gohr}
12882257610Sandi
129df957b36SAndreas Gohrif (!function_exists('utf8_strtoupper')) {
130f41bbe4cSAndreas Gohr    function utf8_strtoupper($str)
131f41bbe4cSAndreas Gohr    {
132*0da4ba1bSAndreas Gohr        return PhpString::strtoupper($str);
13382257610Sandi    }
134df957b36SAndreas Gohr}
13582257610Sandi
136df957b36SAndreas Gohrif (!function_exists('utf8_ucfirst')) {
137f41bbe4cSAndreas Gohr    function utf8_ucfirst($str)
138f41bbe4cSAndreas Gohr    {
139*0da4ba1bSAndreas Gohr        return PhpString::ucfirst($str);
14026ece5a7SAndreas Gohr    }
141df957b36SAndreas Gohr}
14226ece5a7SAndreas Gohr
143df957b36SAndreas Gohrif (!function_exists('utf8_ucwords')) {
144f41bbe4cSAndreas Gohr    function utf8_ucwords($str)
145f41bbe4cSAndreas Gohr    {
146*0da4ba1bSAndreas Gohr        return PhpString::ucwords($str);
14726ece5a7SAndreas Gohr    }
148df957b36SAndreas Gohr}
14926ece5a7SAndreas Gohr
150df957b36SAndreas Gohrif (!function_exists('utf8_deaccent')) {
151f41bbe4cSAndreas Gohr    function utf8_deaccent($str, $case = 0)
152f41bbe4cSAndreas Gohr    {
153*0da4ba1bSAndreas Gohr        return Clean::deaccent($str, $case);
15482257610Sandi    }
155df957b36SAndreas Gohr}
15682257610Sandi
157df957b36SAndreas Gohrif (!function_exists('utf8_romanize')) {
158f41bbe4cSAndreas Gohr    function utf8_romanize($str)
159f41bbe4cSAndreas Gohr    {
160*0da4ba1bSAndreas Gohr        return Clean::romanize($str);
1618a831f2bSAndreas Gohr    }
162df957b36SAndreas Gohr}
1638a831f2bSAndreas Gohr
164df957b36SAndreas Gohrif (!function_exists('utf8_stripspecials')) {
165f41bbe4cSAndreas Gohr    function utf8_stripspecials($str, $repl = '', $additional = '')
166f41bbe4cSAndreas Gohr    {
167*0da4ba1bSAndreas Gohr        return Clean::stripspecials($str, $repl, $additional);
168099ada41Sandi    }
169df957b36SAndreas Gohr}
170099ada41Sandi
171df957b36SAndreas Gohrif (!function_exists('utf8_strpos')) {
172f41bbe4cSAndreas Gohr    function utf8_strpos($haystack, $needle, $offset = 0)
173f41bbe4cSAndreas Gohr    {
174*0da4ba1bSAndreas Gohr        return PhpString::strpos($haystack, $needle, $offset);
17572de9068SAndreas Gohr    }
176df957b36SAndreas Gohr}
177f29317c1Sandi
178df957b36SAndreas Gohrif (!function_exists('utf8_tohtml')) {
179f41bbe4cSAndreas Gohr    function utf8_tohtml($str)
180f41bbe4cSAndreas Gohr    {
181*0da4ba1bSAndreas Gohr        return Conversion::toHtml($str);
1829f9fb0e5STom N Harris    }
183df957b36SAndreas Gohr}
1849f9fb0e5STom N Harris
185df957b36SAndreas Gohrif (!function_exists('utf8_unhtml')) {
186f41bbe4cSAndreas Gohr    function utf8_unhtml($str, $enties = false)
187f41bbe4cSAndreas Gohr    {
188*0da4ba1bSAndreas Gohr        return Conversion::fromHtml($str, $enties);
189ea2eed85Sandi    }
190df957b36SAndreas Gohr}
191ea2eed85Sandi
192df957b36SAndreas Gohrif (!function_exists('utf8_to_unicode')) {
193f41bbe4cSAndreas Gohr    function utf8_to_unicode($str, $strict = false)
194f41bbe4cSAndreas Gohr    {
195*0da4ba1bSAndreas Gohr        return Unicode::fromUtf8($str, $strict);
19682257610Sandi    }
197df957b36SAndreas Gohr}
19882257610Sandi
199df957b36SAndreas Gohrif (!function_exists('unicode_to_utf8')) {
200f41bbe4cSAndreas Gohr    function unicode_to_utf8($arr, $strict = false)
201f41bbe4cSAndreas Gohr    {
202*0da4ba1bSAndreas Gohr        return Unicode::toUtf8($arr, $strict);
20382257610Sandi    }
204df957b36SAndreas Gohr}
20582257610Sandi
206df957b36SAndreas Gohrif (!function_exists('utf8_to_utf16be')) {
207f41bbe4cSAndreas Gohr    function utf8_to_utf16be($str, $bom = false)
208f41bbe4cSAndreas Gohr    {
209*0da4ba1bSAndreas Gohr        return Conversion::toUtf16be($str, $bom);
21015fa0b4fSAndreas Gohr    }
211df957b36SAndreas Gohr}
21215fa0b4fSAndreas Gohr
213df957b36SAndreas Gohrif (!function_exists('utf16be_to_utf8')) {
214f41bbe4cSAndreas Gohr    function utf16be_to_utf8($str)
215f41bbe4cSAndreas Gohr    {
216*0da4ba1bSAndreas Gohr        return Conversion::fromUtf16be($str);
21715fa0b4fSAndreas Gohr    }
218df957b36SAndreas Gohr}
21915fa0b4fSAndreas Gohr
220df957b36SAndreas Gohrif (!function_exists('utf8_bad_replace')) {
221f41bbe4cSAndreas Gohr    function utf8_bad_replace($str, $replace = '')
222f41bbe4cSAndreas Gohr    {
223*0da4ba1bSAndreas Gohr        return Clean::replaceBadBytes($str, $replace);
2240eac1afbSAndreas Gohr    }
225df957b36SAndreas Gohr}
226ab77016bSAndreas Gohr
227df957b36SAndreas Gohrif (!function_exists('utf8_correctIdx')) {
228f41bbe4cSAndreas Gohr    function utf8_correctIdx($str, $i, $next = false)
229f41bbe4cSAndreas Gohr    {
230*0da4ba1bSAndreas Gohr        return Clean::correctIdx($str, $i, $next);
2315953e889Schris    }
232df957b36SAndreas Gohr}
233