xref: /plugin/aichat/vendor/vanderlee/php-sentence/doc/source-class-Sentence.html (revision 8817535b0c67f8b10e9b8c05dcdf58fc17827423)
1*8817535bSAndreas Gohr<!DOCTYPE html>
2*8817535bSAndreas Gohr<html>
3*8817535bSAndreas Gohr<head>
4*8817535bSAndreas Gohr	<meta charset="utf-8">
5*8817535bSAndreas Gohr	<meta name="robots" content="noindex">
6*8817535bSAndreas Gohr
7*8817535bSAndreas Gohr	<title>File classes/Sentence.php | phpSentence</title>
8*8817535bSAndreas Gohr
9*8817535bSAndreas Gohr	<link rel="stylesheet" href="resources/style.css?e99947befd7bf673c6b43ff75e9e0f170c88a60e">
10*8817535bSAndreas Gohr
11*8817535bSAndreas Gohr</head>
12*8817535bSAndreas Gohr
13*8817535bSAndreas Gohr<body>
14*8817535bSAndreas Gohr<div id="left">
15*8817535bSAndreas Gohr	<div id="menu">
16*8817535bSAndreas Gohr		<a href="index.html" title="Overview"><span>Overview</span></a>
17*8817535bSAndreas Gohr
18*8817535bSAndreas Gohr
19*8817535bSAndreas Gohr		<div id="groups">
20*8817535bSAndreas Gohr		</div>
21*8817535bSAndreas Gohr
22*8817535bSAndreas Gohr
23*8817535bSAndreas Gohr
24*8817535bSAndreas Gohr		<div id="elements">
25*8817535bSAndreas Gohr			<h3>Classes</h3>
26*8817535bSAndreas Gohr			<ul>
27*8817535bSAndreas Gohr				<li><a href="class-Sentence.html">Sentence</a></li>
28*8817535bSAndreas Gohr				<li><a href="class-SentenceTest.html">SentenceTest</a></li>
29*8817535bSAndreas Gohr			</ul>
30*8817535bSAndreas Gohr
31*8817535bSAndreas Gohr
32*8817535bSAndreas Gohr
33*8817535bSAndreas Gohr
34*8817535bSAndreas Gohr
35*8817535bSAndreas Gohr			<h3>Functions</h3>
36*8817535bSAndreas Gohr			<ul>
37*8817535bSAndreas Gohr				<li><a href="function-Sentence_autoloader.html">Sentence_autoloader</a></li>
38*8817535bSAndreas Gohr			</ul>
39*8817535bSAndreas Gohr		</div>
40*8817535bSAndreas Gohr	</div>
41*8817535bSAndreas Gohr</div>
42*8817535bSAndreas Gohr
43*8817535bSAndreas Gohr<div id="splitter"></div>
44*8817535bSAndreas Gohr
45*8817535bSAndreas Gohr<div id="right">
46*8817535bSAndreas Gohr<div id="rightInner">
47*8817535bSAndreas Gohr	<form id="search">
48*8817535bSAndreas Gohr		<input type="hidden" name="cx" value="">
49*8817535bSAndreas Gohr		<input type="hidden" name="ie" value="UTF-8">
50*8817535bSAndreas Gohr		<input type="text" name="q" class="text" placeholder="Search">
51*8817535bSAndreas Gohr	</form>
52*8817535bSAndreas Gohr
53*8817535bSAndreas Gohr	<div id="navigation">
54*8817535bSAndreas Gohr		<ul>
55*8817535bSAndreas Gohr			<li>
56*8817535bSAndreas Gohr				<a href="index.html" title="Overview"><span>Overview</span></a>
57*8817535bSAndreas Gohr			</li>
58*8817535bSAndreas Gohr			<li>
59*8817535bSAndreas Gohr<span>Class</span>			</li>
60*8817535bSAndreas Gohr		</ul>
61*8817535bSAndreas Gohr		<ul>
62*8817535bSAndreas Gohr		</ul>
63*8817535bSAndreas Gohr		<ul>
64*8817535bSAndreas Gohr		</ul>
65*8817535bSAndreas Gohr	</div>
66*8817535bSAndreas Gohr
67*8817535bSAndreas Gohr<pre><code><span id="1" class="l"><a href="#1">  1: </a><span class="xlang">&lt;?php</span>
68*8817535bSAndreas Gohr</span><span id="2" class="l"><a href="#2">  2: </a>
69*8817535bSAndreas Gohr</span><span id="3" class="l"><a href="#3">  3: </a><span class="php-comment">/**
70*8817535bSAndreas Gohr</span></span><span id="4" class="l"><a href="#4">  4: </a><span class="php-comment"> * Segments sentences.
71*8817535bSAndreas Gohr</span></span><span id="5" class="l"><a href="#5">  5: </a><span class="php-comment"> * Clipping may not be perfect.
72*8817535bSAndreas Gohr</span></span><span id="6" class="l"><a href="#6">  6: </a><span class="php-comment"> * Sentence count should be VERY close to the truth.
73*8817535bSAndreas Gohr</span></span><span id="7" class="l"><a href="#7">  7: </a><span class="php-comment"> *
74*8817535bSAndreas Gohr</span></span><span id="8" class="l"><a href="#8">  8: </a><span class="php-comment"> * Multibyte safe (atleast for UTF-8), but rules based on germanic
75*8817535bSAndreas Gohr</span></span><span id="9" class="l"><a href="#9">  9: </a><span class="php-comment"> * language stucture (English, Dutch, German). Should work for most
76*8817535bSAndreas Gohr</span></span><span id="10" class="l"><a href="#10"> 10: </a><span class="php-comment"> * latin-alphabet languages.
77*8817535bSAndreas Gohr</span></span><span id="11" class="l"><a href="#11"> 11: </a><span class="php-comment"> */</span>
78*8817535bSAndreas Gohr</span><span id="12" class="l"><a href="#12"> 12: </a><span class="php-keyword1">class</span> Sentence {
79*8817535bSAndreas Gohr</span><span id="13" class="l"><a href="#13"> 13: </a>    <span class="php-comment">/**
80*8817535bSAndreas Gohr</span></span><span id="14" class="l"><a href="#14"> 14: </a><span class="php-comment">     * Specify this flag with the split method to trim whitespace.
81*8817535bSAndreas Gohr</span></span><span id="15" class="l"><a href="#15"> 15: </a><span class="php-comment">     */</span>
82*8817535bSAndreas Gohr</span><span id="16" class="l"><a href="#16"> 16: </a>    <span class="php-keyword1">const</span> SPLIT_TRIM        = <span class="php-num">0x1</span>;
83*8817535bSAndreas Gohr</span><span id="17" class="l"><a href="#17"> 17: </a>
84*8817535bSAndreas Gohr</span><span id="18" class="l"><a href="#18"> 18: </a>    <span class="php-comment">/**
85*8817535bSAndreas Gohr</span></span><span id="19" class="l"><a href="#19"> 19: </a><span class="php-comment">     * List of characters used to terminate sentences.
86*8817535bSAndreas Gohr</span></span><span id="20" class="l"><a href="#20"> 20: </a><span class="php-comment">     * @var array
87*8817535bSAndreas Gohr</span></span><span id="21" class="l"><a href="#21"> 21: </a><span class="php-comment">     */</span>
88*8817535bSAndreas Gohr</span><span id="22" class="l"><a href="#22"> 22: </a>    <span class="php-keyword1">private</span> <span class="php-var">$terminals</span>      = <span class="php-keyword1">array</span>(<span class="php-quote">'.'</span>, <span class="php-quote">'!'</span>, <span class="php-quote">'?'</span>);
89*8817535bSAndreas Gohr</span><span id="23" class="l"><a href="#23"> 23: </a>
90*8817535bSAndreas Gohr</span><span id="24" class="l"><a href="#24"> 24: </a>    <span class="php-comment">/**
91*8817535bSAndreas Gohr</span></span><span id="25" class="l"><a href="#25"> 25: </a><span class="php-comment">     * List of characters used for abbreviations.
92*8817535bSAndreas Gohr</span></span><span id="26" class="l"><a href="#26"> 26: </a><span class="php-comment">     * @var array
93*8817535bSAndreas Gohr</span></span><span id="27" class="l"><a href="#27"> 27: </a><span class="php-comment">     */</span>
94*8817535bSAndreas Gohr</span><span id="28" class="l"><a href="#28"> 28: </a>    <span class="php-keyword1">private</span> <span class="php-var">$abbreviators</span>   = <span class="php-keyword1">array</span>(<span class="php-quote">'.'</span>);
95*8817535bSAndreas Gohr</span><span id="29" class="l"><a href="#29"> 29: </a>
96*8817535bSAndreas Gohr</span><span id="30" class="l"><a href="#30"> 30: </a>    <span class="php-comment">/**
97*8817535bSAndreas Gohr</span></span><span id="31" class="l"><a href="#31"> 31: </a><span class="php-comment">     * Multibyte safe version of standard trim() function.
98*8817535bSAndreas Gohr</span></span><span id="32" class="l"><a href="#32"> 32: </a><span class="php-comment">     * @param string $string
99*8817535bSAndreas Gohr</span></span><span id="33" class="l"><a href="#33"> 33: </a><span class="php-comment">     * @return string
100*8817535bSAndreas Gohr</span></span><span id="34" class="l"><a href="#34"> 34: </a><span class="php-comment">     */</span>
101*8817535bSAndreas Gohr</span><span id="35" class="l"><a href="#35"> 35: </a>    <span class="php-keyword1">private</span> <span class="php-keyword1">static</span> <span class="php-keyword1">function</span> mbTrim(<span class="php-var">$string</span>) {
102*8817535bSAndreas Gohr</span><span id="36" class="l"><a href="#36"> 36: </a>        <span class="php-keyword1">return</span> <span class="php-keyword2">mb_ereg_replace</span>(<span class="php-quote">'^\s*([\s\S]*?)\s*$'</span>, <span class="php-quote">'\1'</span>, <span class="php-var">$string</span>);
103*8817535bSAndreas Gohr</span><span id="37" class="l"><a href="#37"> 37: </a>    }
104*8817535bSAndreas Gohr</span><span id="38" class="l"><a href="#38"> 38: </a>
105*8817535bSAndreas Gohr</span><span id="39" class="l"><a href="#39"> 39: </a>    <span class="php-comment">/**
106*8817535bSAndreas Gohr</span></span><span id="40" class="l"><a href="#40"> 40: </a><span class="php-comment">     * A cross between mb_split and preg_split, adding the preg_split flags
107*8817535bSAndreas Gohr</span></span><span id="41" class="l"><a href="#41"> 41: </a><span class="php-comment">     * to mb_split.
108*8817535bSAndreas Gohr</span></span><span id="42" class="l"><a href="#42"> 42: </a><span class="php-comment">     * @param string $pattern
109*8817535bSAndreas Gohr</span></span><span id="43" class="l"><a href="#43"> 43: </a><span class="php-comment">     * @param string $string
110*8817535bSAndreas Gohr</span></span><span id="44" class="l"><a href="#44"> 44: </a><span class="php-comment">     * @param int $limit
111*8817535bSAndreas Gohr</span></span><span id="45" class="l"><a href="#45"> 45: </a><span class="php-comment">     * @param int $flags
112*8817535bSAndreas Gohr</span></span><span id="46" class="l"><a href="#46"> 46: </a><span class="php-comment">     * @return array
113*8817535bSAndreas Gohr</span></span><span id="47" class="l"><a href="#47"> 47: </a><span class="php-comment">     */</span>
114*8817535bSAndreas Gohr</span><span id="48" class="l"><a href="#48"> 48: </a>    <span class="php-keyword1">private</span> <span class="php-keyword1">static</span> <span class="php-keyword1">function</span> <span class="php-keyword2">mbSplit</span>(<span class="php-var">$pattern</span>, <span class="php-var">$string</span>, <span class="php-var">$limit</span> = -<span class="php-num">1</span>, <span class="php-var">$flags</span> = <span class="php-num">0</span>) {
115*8817535bSAndreas Gohr</span><span id="49" class="l"><a href="#49"> 49: </a>        <span class="php-var">$strlen</span> = <span class="php-keyword2">strlen</span>(<span class="php-var">$string</span>);      <span class="php-comment">// bytes!   </span>
116*8817535bSAndreas Gohr</span><span id="50" class="l"><a href="#50"> 50: </a>        <span class="php-keyword2">mb_ereg_search_init</span>(<span class="php-var">$string</span>);
117*8817535bSAndreas Gohr</span><span id="51" class="l"><a href="#51"> 51: </a>
118*8817535bSAndreas Gohr</span><span id="52" class="l"><a href="#52"> 52: </a>        <span class="php-var">$lengths</span> = <span class="php-keyword1">array</span>();
119*8817535bSAndreas Gohr</span><span id="53" class="l"><a href="#53"> 53: </a>        <span class="php-var">$position</span> = <span class="php-num">0</span>;
120*8817535bSAndreas Gohr</span><span id="54" class="l"><a href="#54"> 54: </a>        <span class="php-keyword1">while</span> ((<span class="php-var">$array</span> = <span class="php-keyword2">mb_ereg_search_pos</span>(<span class="php-var">$pattern</span>, <span class="php-quote">''</span>)) !== <span class="php-keyword1">false</span>) {
121*8817535bSAndreas Gohr</span><span id="55" class="l"><a href="#55"> 55: </a>            <span class="php-comment">// capture split</span>
122*8817535bSAndreas Gohr</span><span id="56" class="l"><a href="#56"> 56: </a>            <span class="php-var">$lengths</span>[] = <span class="php-keyword1">array</span>(<span class="php-var">$array</span>[<span class="php-num">0</span>] - <span class="php-var">$position</span>, <span class="php-keyword1">false</span>, <span class="php-keyword1">null</span>);
123*8817535bSAndreas Gohr</span><span id="57" class="l"><a href="#57"> 57: </a>
124*8817535bSAndreas Gohr</span><span id="58" class="l"><a href="#58"> 58: </a>            <span class="php-comment">// move position</span>
125*8817535bSAndreas Gohr</span><span id="59" class="l"><a href="#59"> 59: </a>            <span class="php-var">$position</span> = <span class="php-var">$array</span>[<span class="php-num">0</span>] + <span class="php-var">$array</span>[<span class="php-num">1</span>];
126*8817535bSAndreas Gohr</span><span id="60" class="l"><a href="#60"> 60: </a>
127*8817535bSAndreas Gohr</span><span id="61" class="l"><a href="#61"> 61: </a>            <span class="php-comment">// capture delimiter</span>
128*8817535bSAndreas Gohr</span><span id="62" class="l"><a href="#62"> 62: </a>            <span class="php-var">$regs</span> = <span class="php-keyword2">mb_ereg_search_getregs</span>();
129*8817535bSAndreas Gohr</span><span id="63" class="l"><a href="#63"> 63: </a>            <span class="php-var">$lengths</span>[] = <span class="php-keyword1">array</span>(<span class="php-var">$array</span>[<span class="php-num">1</span>], <span class="php-keyword1">true</span>, <span class="php-keyword1">isset</span>(<span class="php-var">$regs</span>[<span class="php-num">1</span>]) &amp;&amp; <span class="php-var">$regs</span>[<span class="php-num">1</span>]);
130*8817535bSAndreas Gohr</span><span id="64" class="l"><a href="#64"> 64: </a>
131*8817535bSAndreas Gohr</span><span id="65" class="l"><a href="#65"> 65: </a>            <span class="php-comment">// Continue on?</span>
132*8817535bSAndreas Gohr</span><span id="66" class="l"><a href="#66"> 66: </a>            <span class="php-keyword1">if</span> (<span class="php-var">$position</span> &gt;= <span class="php-var">$strlen</span>) {
133*8817535bSAndreas Gohr</span><span id="67" class="l"><a href="#67"> 67: </a>                <span class="php-keyword1">break</span>;
134*8817535bSAndreas Gohr</span><span id="68" class="l"><a href="#68"> 68: </a>            }
135*8817535bSAndreas Gohr</span><span id="69" class="l"><a href="#69"> 69: </a>        }
136*8817535bSAndreas Gohr</span><span id="70" class="l"><a href="#70"> 70: </a>
137*8817535bSAndreas Gohr</span><span id="71" class="l"><a href="#71"> 71: </a>        <span class="php-comment">// Add last bit, if not ending with split</span>
138*8817535bSAndreas Gohr</span><span id="72" class="l"><a href="#72"> 72: </a>        <span class="php-var">$lengths</span>[] = <span class="php-keyword1">array</span>(<span class="php-var">$strlen</span> - <span class="php-var">$position</span>, <span class="php-keyword1">false</span>, <span class="php-keyword1">null</span>);
139*8817535bSAndreas Gohr</span><span id="73" class="l"><a href="#73"> 73: </a>
140*8817535bSAndreas Gohr</span><span id="74" class="l"><a href="#74"> 74: </a>        <span class="php-comment">// Substrings</span>
141*8817535bSAndreas Gohr</span><span id="75" class="l"><a href="#75"> 75: </a>        <span class="php-var">$parts</span> = <span class="php-keyword1">array</span>();
142*8817535bSAndreas Gohr</span><span id="76" class="l"><a href="#76"> 76: </a>        <span class="php-var">$position</span> = <span class="php-num">0</span>;
143*8817535bSAndreas Gohr</span><span id="77" class="l"><a href="#77"> 77: </a>        <span class="php-var">$count</span> = <span class="php-num">1</span>;
144*8817535bSAndreas Gohr</span><span id="78" class="l"><a href="#78"> 78: </a>        <span class="php-keyword1">foreach</span> (<span class="php-var">$lengths</span> <span class="php-keyword1">as</span> <span class="php-var">$length</span>) {
145*8817535bSAndreas Gohr</span><span id="79" class="l"><a href="#79"> 79: </a>            <span class="php-var">$is_delimiter</span>   = <span class="php-var">$length</span>[<span class="php-num">1</span>];
146*8817535bSAndreas Gohr</span><span id="80" class="l"><a href="#80"> 80: </a>            <span class="php-var">$is_captured</span>    = <span class="php-var">$length</span>[<span class="php-num">2</span>];
147*8817535bSAndreas Gohr</span><span id="81" class="l"><a href="#81"> 81: </a>
148*8817535bSAndreas Gohr</span><span id="82" class="l"><a href="#82"> 82: </a>            <span class="php-keyword1">if</span> (<span class="php-var">$limit</span> &gt; <span class="php-num">0</span> &amp;&amp; !<span class="php-var">$is_delimiter</span> &amp;&amp; (<span class="php-var">$length</span>[<span class="php-num">0</span>] || ~<span class="php-var">$flags</span> &amp; PREG_SPLIT_NO_EMPTY) &amp;&amp; ++<span class="php-var">$count</span> &gt; <span class="php-var">$limit</span>) {
149*8817535bSAndreas Gohr</span><span id="83" class="l"><a href="#83"> 83: </a>                <span class="php-keyword1">if</span> (<span class="php-var">$length</span>[<span class="php-num">0</span>] &gt; <span class="php-num">0</span> || ~<span class="php-var">$flags</span> &amp; PREG_SPLIT_NO_EMPTY) {
150*8817535bSAndreas Gohr</span><span id="84" class="l"><a href="#84"> 84: </a>                    <span class="php-var">$parts</span>[]    = <span class="php-var">$flags</span> &amp; PREG_SPLIT_OFFSET_CAPTURE
151*8817535bSAndreas Gohr</span><span id="85" class="l"><a href="#85"> 85: </a>                                ? <span class="php-keyword1">array</span>(<span class="php-keyword2">mb_strcut</span>(<span class="php-var">$string</span>, <span class="php-var">$position</span>), <span class="php-var">$position</span>)
152*8817535bSAndreas Gohr</span><span id="86" class="l"><a href="#86"> 86: </a>                                : <span class="php-keyword2">mb_strcut</span>(<span class="php-var">$string</span>, <span class="php-var">$position</span>);
153*8817535bSAndreas Gohr</span><span id="87" class="l"><a href="#87"> 87: </a>                }
154*8817535bSAndreas Gohr</span><span id="88" class="l"><a href="#88"> 88: </a>                <span class="php-keyword1">break</span>;
155*8817535bSAndreas Gohr</span><span id="89" class="l"><a href="#89"> 89: </a>            } <span class="php-keyword1">elseif</span> ((!<span class="php-var">$is_delimiter</span> || (<span class="php-var">$flags</span> &amp; PREG_SPLIT_DELIM_CAPTURE &amp;&amp; <span class="php-var">$is_captured</span>))
156*8817535bSAndreas Gohr</span><span id="90" class="l"><a href="#90"> 90: </a>                   &amp;&amp; (<span class="php-var">$length</span>[<span class="php-num">0</span>] || ~<span class="php-var">$flags</span> &amp; PREG_SPLIT_NO_EMPTY)) {
157*8817535bSAndreas Gohr</span><span id="91" class="l"><a href="#91"> 91: </a>                <span class="php-var">$parts</span>[]    = <span class="php-var">$flags</span> &amp; PREG_SPLIT_OFFSET_CAPTURE
158*8817535bSAndreas Gohr</span><span id="92" class="l"><a href="#92"> 92: </a>                            ? <span class="php-keyword1">array</span>(<span class="php-keyword2">mb_strcut</span>(<span class="php-var">$string</span>, <span class="php-var">$position</span>, <span class="php-var">$length</span>[<span class="php-num">0</span>]), <span class="php-var">$position</span>)
159*8817535bSAndreas Gohr</span><span id="93" class="l"><a href="#93"> 93: </a>                            : <span class="php-keyword2">mb_strcut</span>(<span class="php-var">$string</span>, <span class="php-var">$position</span>, <span class="php-var">$length</span>[<span class="php-num">0</span>]);
160*8817535bSAndreas Gohr</span><span id="94" class="l"><a href="#94"> 94: </a>            }
161*8817535bSAndreas Gohr</span><span id="95" class="l"><a href="#95"> 95: </a>
162*8817535bSAndreas Gohr</span><span id="96" class="l"><a href="#96"> 96: </a>            <span class="php-var">$position</span> += <span class="php-var">$length</span>[<span class="php-num">0</span>];
163*8817535bSAndreas Gohr</span><span id="97" class="l"><a href="#97"> 97: </a>        }
164*8817535bSAndreas Gohr</span><span id="98" class="l"><a href="#98"> 98: </a>
165*8817535bSAndreas Gohr</span><span id="99" class="l"><a href="#99"> 99: </a>        <span class="php-keyword1">return</span> <span class="php-var">$parts</span>;
166*8817535bSAndreas Gohr</span><span id="100" class="l"><a href="#100">100: </a>    }
167*8817535bSAndreas Gohr</span><span id="101" class="l"><a href="#101">101: </a>
168*8817535bSAndreas Gohr</span><span id="102" class="l"><a href="#102">102: </a>    <span class="php-comment">/**
169*8817535bSAndreas Gohr</span></span><span id="103" class="l"><a href="#103">103: </a><span class="php-comment">     * Breaks a piece of text into lines by linebreak.
170*8817535bSAndreas Gohr</span></span><span id="104" class="l"><a href="#104">104: </a><span class="php-comment">     * Eats up any linebreak characters as if one.
171*8817535bSAndreas Gohr</span></span><span id="105" class="l"><a href="#105">105: </a><span class="php-comment">     *
172*8817535bSAndreas Gohr</span></span><span id="106" class="l"><a href="#106">106: </a><span class="php-comment">     * Multibyte safe
173*8817535bSAndreas Gohr</span></span><span id="107" class="l"><a href="#107">107: </a><span class="php-comment">     *
174*8817535bSAndreas Gohr</span></span><span id="108" class="l"><a href="#108">108: </a><span class="php-comment">     * @param string $text
175*8817535bSAndreas Gohr</span></span><span id="109" class="l"><a href="#109">109: </a><span class="php-comment">     * @return array
176*8817535bSAndreas Gohr</span></span><span id="110" class="l"><a href="#110">110: </a><span class="php-comment">     */</span>
177*8817535bSAndreas Gohr</span><span id="111" class="l"><a href="#111">111: </a>    <span class="php-keyword1">private</span> <span class="php-keyword1">static</span> <span class="php-keyword1">function</span> linebreakSplit(<span class="php-var">$text</span>) {
178*8817535bSAndreas Gohr</span><span id="112" class="l"><a href="#112">112: </a>        <span class="php-var">$lines</span> = <span class="php-keyword1">array</span>();
179*8817535bSAndreas Gohr</span><span id="113" class="l"><a href="#113">113: </a>        <span class="php-var">$line</span> = <span class="php-quote">''</span>;
180*8817535bSAndreas Gohr</span><span id="114" class="l"><a href="#114">114: </a>
181*8817535bSAndreas Gohr</span><span id="115" class="l"><a href="#115">115: </a>        <span class="php-keyword1">foreach</span> (self::<span class="php-keyword2">mbSplit</span>(<span class="php-quote">'([\r\n]+)'</span>, <span class="php-var">$text</span>, -<span class="php-num">1</span>, PREG_SPLIT_DELIM_CAPTURE) <span class="php-keyword1">as</span> <span class="php-var">$part</span>) {
182*8817535bSAndreas Gohr</span><span id="116" class="l"><a href="#116">116: </a>            <span class="php-var">$line</span> .= <span class="php-var">$part</span>;
183*8817535bSAndreas Gohr</span><span id="117" class="l"><a href="#117">117: </a>            <span class="php-keyword1">if</span> (self::mbTrim(<span class="php-var">$part</span>) === <span class="php-quote">''</span>) {
184*8817535bSAndreas Gohr</span><span id="118" class="l"><a href="#118">118: </a>                <span class="php-var">$lines</span>[] = <span class="php-var">$line</span>;
185*8817535bSAndreas Gohr</span><span id="119" class="l"><a href="#119">119: </a>                <span class="php-var">$line</span> = <span class="php-quote">''</span>;
186*8817535bSAndreas Gohr</span><span id="120" class="l"><a href="#120">120: </a>            }
187*8817535bSAndreas Gohr</span><span id="121" class="l"><a href="#121">121: </a>        }
188*8817535bSAndreas Gohr</span><span id="122" class="l"><a href="#122">122: </a>        <span class="php-var">$lines</span>[] = <span class="php-var">$line</span>;
189*8817535bSAndreas Gohr</span><span id="123" class="l"><a href="#123">123: </a>
190*8817535bSAndreas Gohr</span><span id="124" class="l"><a href="#124">124: </a>        <span class="php-keyword1">return</span> <span class="php-var">$lines</span>;
191*8817535bSAndreas Gohr</span><span id="125" class="l"><a href="#125">125: </a>    }
192*8817535bSAndreas Gohr</span><span id="126" class="l"><a href="#126">126: </a>
193*8817535bSAndreas Gohr</span><span id="127" class="l"><a href="#127">127: </a>    <span class="php-comment">/**
194*8817535bSAndreas Gohr</span></span><span id="128" class="l"><a href="#128">128: </a><span class="php-comment">     * Splits an array of lines by (consecutive sequences of)
195*8817535bSAndreas Gohr</span></span><span id="129" class="l"><a href="#129">129: </a><span class="php-comment">     * terminals, keeping terminals.
196*8817535bSAndreas Gohr</span></span><span id="130" class="l"><a href="#130">130: </a><span class="php-comment">     *
197*8817535bSAndreas Gohr</span></span><span id="131" class="l"><a href="#131">131: </a><span class="php-comment">     * Multibyte safe (atleast for UTF-8)
198*8817535bSAndreas Gohr</span></span><span id="132" class="l"><a href="#132">132: </a><span class="php-comment">     *
199*8817535bSAndreas Gohr</span></span><span id="133" class="l"><a href="#133">133: </a><span class="php-comment">     * For example:
200*8817535bSAndreas Gohr</span></span><span id="134" class="l"><a href="#134">134: </a><span class="php-comment">     *  &quot;There ... is. More!&quot;
201*8817535bSAndreas Gohr</span></span><span id="135" class="l"><a href="#135">135: </a><span class="php-comment">     *      ... becomes ...
202*8817535bSAndreas Gohr</span></span><span id="136" class="l"><a href="#136">136: </a><span class="php-comment">     *  [ &quot;There &quot;, &quot;...&quot;, &quot; is&quot;, &quot;.&quot;, &quot; More&quot;, &quot;!&quot; ]
203*8817535bSAndreas Gohr</span></span><span id="137" class="l"><a href="#137">137: </a><span class="php-comment">     *
204*8817535bSAndreas Gohr</span></span><span id="138" class="l"><a href="#138">138: </a><span class="php-comment">     * @param array $lines
205*8817535bSAndreas Gohr</span></span><span id="139" class="l"><a href="#139">139: </a><span class="php-comment">     * @return array
206*8817535bSAndreas Gohr</span></span><span id="140" class="l"><a href="#140">140: </a><span class="php-comment">     */</span>
207*8817535bSAndreas Gohr</span><span id="141" class="l"><a href="#141">141: </a>    <span class="php-keyword1">private</span> <span class="php-keyword1">function</span> punctuationSplit(<span class="php-var">$line</span>) {
208*8817535bSAndreas Gohr</span><span id="142" class="l"><a href="#142">142: </a>        <span class="php-var">$parts</span> = <span class="php-keyword1">array</span>();
209*8817535bSAndreas Gohr</span><span id="143" class="l"><a href="#143">143: </a>
210*8817535bSAndreas Gohr</span><span id="144" class="l"><a href="#144">144: </a>        <span class="php-var">$chars</span> = <span class="php-keyword2">preg_split</span>(<span class="php-quote">'//u'</span>, <span class="php-var">$line</span>, -<span class="php-num">1</span>, PREG_SPLIT_NO_EMPTY); <span class="php-comment">// This is UTF8 multibyte safe!</span>
211*8817535bSAndreas Gohr</span><span id="145" class="l"><a href="#145">145: </a>        <span class="php-var">$is_terminal</span> = <span class="php-keyword2">in_array</span>(<span class="php-var">$chars</span>[<span class="php-num">0</span>], <span class="php-var">$this</span>-&gt;terminals);
212*8817535bSAndreas Gohr</span><span id="146" class="l"><a href="#146">146: </a>
213*8817535bSAndreas Gohr</span><span id="147" class="l"><a href="#147">147: </a>        <span class="php-var">$part</span> = <span class="php-quote">''</span>;
214*8817535bSAndreas Gohr</span><span id="148" class="l"><a href="#148">148: </a>        <span class="php-keyword1">foreach</span> (<span class="php-var">$chars</span> <span class="php-keyword1">as</span> <span class="php-var">$index</span> =&gt; <span class="php-var">$char</span>) {
215*8817535bSAndreas Gohr</span><span id="149" class="l"><a href="#149">149: </a>            <span class="php-keyword1">if</span> (<span class="php-keyword2">in_array</span>(<span class="php-var">$char</span>, <span class="php-var">$this</span>-&gt;terminals) !== <span class="php-var">$is_terminal</span>) {
216*8817535bSAndreas Gohr</span><span id="150" class="l"><a href="#150">150: </a>                <span class="php-var">$parts</span>[] = <span class="php-var">$part</span>;
217*8817535bSAndreas Gohr</span><span id="151" class="l"><a href="#151">151: </a>                <span class="php-var">$part</span> = <span class="php-quote">''</span>;
218*8817535bSAndreas Gohr</span><span id="152" class="l"><a href="#152">152: </a>                <span class="php-var">$is_terminal</span> = !<span class="php-var">$is_terminal</span>;
219*8817535bSAndreas Gohr</span><span id="153" class="l"><a href="#153">153: </a>            }
220*8817535bSAndreas Gohr</span><span id="154" class="l"><a href="#154">154: </a>            <span class="php-var">$part</span> .= <span class="php-var">$char</span>;
221*8817535bSAndreas Gohr</span><span id="155" class="l"><a href="#155">155: </a>        }
222*8817535bSAndreas Gohr</span><span id="156" class="l"><a href="#156">156: </a>
223*8817535bSAndreas Gohr</span><span id="157" class="l"><a href="#157">157: </a>        <span class="php-keyword1">if</span> (!<span class="php-keyword1">empty</span>(<span class="php-var">$part</span>)) {
224*8817535bSAndreas Gohr</span><span id="158" class="l"><a href="#158">158: </a>            <span class="php-var">$parts</span>[] = <span class="php-var">$part</span>;
225*8817535bSAndreas Gohr</span><span id="159" class="l"><a href="#159">159: </a>        }
226*8817535bSAndreas Gohr</span><span id="160" class="l"><a href="#160">160: </a>
227*8817535bSAndreas Gohr</span><span id="161" class="l"><a href="#161">161: </a>        <span class="php-keyword1">return</span> <span class="php-var">$parts</span>;
228*8817535bSAndreas Gohr</span><span id="162" class="l"><a href="#162">162: </a>    }
229*8817535bSAndreas Gohr</span><span id="163" class="l"><a href="#163">163: </a>
230*8817535bSAndreas Gohr</span><span id="164" class="l"><a href="#164">164: </a>    <span class="php-comment">/**
231*8817535bSAndreas Gohr</span></span><span id="165" class="l"><a href="#165">165: </a><span class="php-comment">     * Appends each terminal item after it's preceding
232*8817535bSAndreas Gohr</span></span><span id="166" class="l"><a href="#166">166: </a><span class="php-comment">     * non-terminals.
233*8817535bSAndreas Gohr</span></span><span id="167" class="l"><a href="#167">167: </a><span class="php-comment">     *
234*8817535bSAndreas Gohr</span></span><span id="168" class="l"><a href="#168">168: </a><span class="php-comment">     * Multibyte safe (atleast for UTF-8)
235*8817535bSAndreas Gohr</span></span><span id="169" class="l"><a href="#169">169: </a><span class="php-comment">     *
236*8817535bSAndreas Gohr</span></span><span id="170" class="l"><a href="#170">170: </a><span class="php-comment">     * For example:
237*8817535bSAndreas Gohr</span></span><span id="171" class="l"><a href="#171">171: </a><span class="php-comment">     *  [ &quot;There &quot;, &quot;...&quot;, &quot; is&quot;, &quot;.&quot;, &quot; More&quot;, &quot;!&quot; ]
238*8817535bSAndreas Gohr</span></span><span id="172" class="l"><a href="#172">172: </a><span class="php-comment">     *      ... becomes ...
239*8817535bSAndreas Gohr</span></span><span id="173" class="l"><a href="#173">173: </a><span class="php-comment">     *  [ &quot;There ... is.&quot;, &quot;More!&quot; ]
240*8817535bSAndreas Gohr</span></span><span id="174" class="l"><a href="#174">174: </a><span class="php-comment">     *
241*8817535bSAndreas Gohr</span></span><span id="175" class="l"><a href="#175">175: </a><span class="php-comment">     * @param array $punctuations
242*8817535bSAndreas Gohr</span></span><span id="176" class="l"><a href="#176">176: </a><span class="php-comment">     * @return array
243*8817535bSAndreas Gohr</span></span><span id="177" class="l"><a href="#177">177: </a><span class="php-comment">     */</span>
244*8817535bSAndreas Gohr</span><span id="178" class="l"><a href="#178">178: </a>    <span class="php-keyword1">private</span> <span class="php-keyword1">function</span> punctuationMerge(<span class="php-var">$punctuations</span>) {
245*8817535bSAndreas Gohr</span><span id="179" class="l"><a href="#179">179: </a>        <span class="php-var">$definite_terminals</span> = <span class="php-keyword2">array_diff</span>(<span class="php-var">$this</span>-&gt;terminals, <span class="php-var">$this</span>-&gt;abbreviators);
246*8817535bSAndreas Gohr</span><span id="180" class="l"><a href="#180">180: </a>
247*8817535bSAndreas Gohr</span><span id="181" class="l"><a href="#181">181: </a>        <span class="php-var">$merges</span> = <span class="php-keyword1">array</span>();
248*8817535bSAndreas Gohr</span><span id="182" class="l"><a href="#182">182: </a>        <span class="php-var">$merge</span> = <span class="php-quote">''</span>;
249*8817535bSAndreas Gohr</span><span id="183" class="l"><a href="#183">183: </a>
250*8817535bSAndreas Gohr</span><span id="184" class="l"><a href="#184">184: </a>        <span class="php-keyword1">foreach</span> (<span class="php-var">$punctuations</span> <span class="php-keyword1">as</span> <span class="php-var">$punctuation</span>) {
251*8817535bSAndreas Gohr</span><span id="185" class="l"><a href="#185">185: </a>            <span class="php-keyword1">if</span> (<span class="php-var">$punctuation</span> !== <span class="php-quote">''</span>) {
252*8817535bSAndreas Gohr</span><span id="186" class="l"><a href="#186">186: </a>                <span class="php-var">$merge</span>.= <span class="php-var">$punctuation</span>;
253*8817535bSAndreas Gohr</span><span id="187" class="l"><a href="#187">187: </a>                <span class="php-keyword1">if</span> (<span class="php-keyword2">mb_strlen</span>(<span class="php-var">$punctuation</span>) === <span class="php-num">1</span> &amp;&amp; <span class="php-keyword2">in_array</span>(<span class="php-var">$punctuation</span>, <span class="php-var">$this</span>-&gt;terminals)) {
254*8817535bSAndreas Gohr</span><span id="188" class="l"><a href="#188">188: </a>                    <span class="php-var">$merges</span>[] = <span class="php-var">$merge</span>;
255*8817535bSAndreas Gohr</span><span id="189" class="l"><a href="#189">189: </a>                    <span class="php-var">$merge</span> = <span class="php-quote">''</span>;
256*8817535bSAndreas Gohr</span><span id="190" class="l"><a href="#190">190: </a>                } <span class="php-keyword1">else</span> {
257*8817535bSAndreas Gohr</span><span id="191" class="l"><a href="#191">191: </a>                    <span class="php-keyword1">foreach</span> (<span class="php-var">$definite_terminals</span> <span class="php-keyword1">as</span> <span class="php-var">$terminal</span>) {
258*8817535bSAndreas Gohr</span><span id="192" class="l"><a href="#192">192: </a>                        <span class="php-keyword1">if</span> (<span class="php-keyword2">mb_strpos</span>(<span class="php-var">$punctuation</span>, <span class="php-var">$terminal</span>) !== <span class="php-keyword1">false</span>) {
259*8817535bSAndreas Gohr</span><span id="193" class="l"><a href="#193">193: </a>                            <span class="php-var">$merges</span>[] = <span class="php-var">$merge</span>;
260*8817535bSAndreas Gohr</span><span id="194" class="l"><a href="#194">194: </a>                            <span class="php-var">$merge</span> = <span class="php-quote">''</span>;
261*8817535bSAndreas Gohr</span><span id="195" class="l"><a href="#195">195: </a>                            <span class="php-keyword1">break</span>;
262*8817535bSAndreas Gohr</span><span id="196" class="l"><a href="#196">196: </a>                        }
263*8817535bSAndreas Gohr</span><span id="197" class="l"><a href="#197">197: </a>                    }
264*8817535bSAndreas Gohr</span><span id="198" class="l"><a href="#198">198: </a>                }
265*8817535bSAndreas Gohr</span><span id="199" class="l"><a href="#199">199: </a>            }
266*8817535bSAndreas Gohr</span><span id="200" class="l"><a href="#200">200: </a>        }
267*8817535bSAndreas Gohr</span><span id="201" class="l"><a href="#201">201: </a>        <span class="php-keyword1">if</span> (!<span class="php-keyword1">empty</span>(<span class="php-var">$merge</span>)) {
268*8817535bSAndreas Gohr</span><span id="202" class="l"><a href="#202">202: </a>            <span class="php-var">$merges</span>[] = <span class="php-var">$merge</span>;
269*8817535bSAndreas Gohr</span><span id="203" class="l"><a href="#203">203: </a>        }
270*8817535bSAndreas Gohr</span><span id="204" class="l"><a href="#204">204: </a>
271*8817535bSAndreas Gohr</span><span id="205" class="l"><a href="#205">205: </a>        <span class="php-keyword1">return</span> <span class="php-var">$merges</span>;
272*8817535bSAndreas Gohr</span><span id="206" class="l"><a href="#206">206: </a>    }
273*8817535bSAndreas Gohr</span><span id="207" class="l"><a href="#207">207: </a>
274*8817535bSAndreas Gohr</span><span id="208" class="l"><a href="#208">208: </a>    <span class="php-comment">/**
275*8817535bSAndreas Gohr</span></span><span id="209" class="l"><a href="#209">209: </a><span class="php-comment">     * Merges any one-word items with it's preceding items.
276*8817535bSAndreas Gohr</span></span><span id="210" class="l"><a href="#210">210: </a><span class="php-comment">     *
277*8817535bSAndreas Gohr</span></span><span id="211" class="l"><a href="#211">211: </a><span class="php-comment">     * Multibyte safe
278*8817535bSAndreas Gohr</span></span><span id="212" class="l"><a href="#212">212: </a><span class="php-comment">     *
279*8817535bSAndreas Gohr</span></span><span id="213" class="l"><a href="#213">213: </a><span class="php-comment">     * For example:
280*8817535bSAndreas Gohr</span></span><span id="214" class="l"><a href="#214">214: </a><span class="php-comment">     *  [ &quot;There ... is.&quot;, &quot;More!&quot; ]
281*8817535bSAndreas Gohr</span></span><span id="215" class="l"><a href="#215">215: </a><span class="php-comment">     *      ... becomes ...
282*8817535bSAndreas Gohr</span></span><span id="216" class="l"><a href="#216">216: </a><span class="php-comment">     *  [ &quot;There ... is. More!&quot; ]
283*8817535bSAndreas Gohr</span></span><span id="217" class="l"><a href="#217">217: </a><span class="php-comment">     *
284*8817535bSAndreas Gohr</span></span><span id="218" class="l"><a href="#218">218: </a><span class="php-comment">     * @param array $fragments
285*8817535bSAndreas Gohr</span></span><span id="219" class="l"><a href="#219">219: </a><span class="php-comment">     * @return array
286*8817535bSAndreas Gohr</span></span><span id="220" class="l"><a href="#220">220: </a><span class="php-comment">     */</span>
287*8817535bSAndreas Gohr</span><span id="221" class="l"><a href="#221">221: </a>    <span class="php-keyword1">private</span> <span class="php-keyword1">function</span> abbreviationMerge(<span class="php-var">$fragments</span>) {
288*8817535bSAndreas Gohr</span><span id="222" class="l"><a href="#222">222: </a>        <span class="php-var">$non_abbreviating_terminals</span> = <span class="php-keyword2">array_diff</span>(<span class="php-var">$this</span>-&gt;terminals, <span class="php-var">$this</span>-&gt;abbreviators);
289*8817535bSAndreas Gohr</span><span id="223" class="l"><a href="#223">223: </a>
290*8817535bSAndreas Gohr</span><span id="224" class="l"><a href="#224">224: </a>        <span class="php-var">$abbreviations</span> = <span class="php-keyword1">array</span>();
291*8817535bSAndreas Gohr</span><span id="225" class="l"><a href="#225">225: </a>
292*8817535bSAndreas Gohr</span><span id="226" class="l"><a href="#226">226: </a>        <span class="php-var">$abbreviation</span> = <span class="php-quote">''</span>;
293*8817535bSAndreas Gohr</span><span id="227" class="l"><a href="#227">227: </a>
294*8817535bSAndreas Gohr</span><span id="228" class="l"><a href="#228">228: </a>        <span class="php-var">$previous_word_count</span> = <span class="php-keyword1">null</span>;
295*8817535bSAndreas Gohr</span><span id="229" class="l"><a href="#229">229: </a>        <span class="php-var">$previous_word_ending</span> = <span class="php-keyword1">null</span>;
296*8817535bSAndreas Gohr</span><span id="230" class="l"><a href="#230">230: </a>        <span class="php-keyword1">foreach</span> (<span class="php-var">$fragments</span> <span class="php-keyword1">as</span> <span class="php-var">$fragment</span>) {
297*8817535bSAndreas Gohr</span><span id="231" class="l"><a href="#231">231: </a>            <span class="php-var">$word_count</span> = <span class="php-keyword2">count</span>(<span class="php-keyword2">mb_split</span>(<span class="php-quote">'\s+'</span>, self::mbTrim(<span class="php-var">$fragment</span>)));
298*8817535bSAndreas Gohr</span><span id="232" class="l"><a href="#232">232: </a>            <span class="php-var">$starts_with_space</span> = <span class="php-keyword2">mb_ereg_match</span>(<span class="php-quote">'^\s+'</span>, <span class="php-var">$fragment</span>);
299*8817535bSAndreas Gohr</span><span id="233" class="l"><a href="#233">233: </a>            <span class="php-var">$after_non_abbreviating_terminal</span> = <span class="php-keyword2">in_array</span>(<span class="php-var">$previous_word_ending</span>, <span class="php-var">$non_abbreviating_terminals</span>);
300*8817535bSAndreas Gohr</span><span id="234" class="l"><a href="#234">234: </a>
301*8817535bSAndreas Gohr</span><span id="235" class="l"><a href="#235">235: </a>            <span class="php-keyword1">if</span> (<span class="php-var">$after_non_abbreviating_terminal</span> || (<span class="php-var">$previous_word_count</span> !== <span class="php-keyword1">null</span> &amp;&amp; (<span class="php-var">$previous_word_count</span> !== <span class="php-num">1</span> || <span class="php-var">$word_count</span> !== <span class="php-num">1</span>) &amp;&amp; <span class="php-var">$starts_with_space</span>)) {
302*8817535bSAndreas Gohr</span><span id="236" class="l"><a href="#236">236: </a>                <span class="php-var">$abbreviations</span>[] = <span class="php-var">$abbreviation</span>;
303*8817535bSAndreas Gohr</span><span id="237" class="l"><a href="#237">237: </a>                <span class="php-var">$abbreviation</span> = <span class="php-quote">''</span>;
304*8817535bSAndreas Gohr</span><span id="238" class="l"><a href="#238">238: </a>            }
305*8817535bSAndreas Gohr</span><span id="239" class="l"><a href="#239">239: </a>
306*8817535bSAndreas Gohr</span><span id="240" class="l"><a href="#240">240: </a>            <span class="php-var">$abbreviation</span>           .= <span class="php-var">$fragment</span>;
307*8817535bSAndreas Gohr</span><span id="241" class="l"><a href="#241">241: </a>            <span class="php-var">$previous_word_count</span>    = <span class="php-var">$word_count</span>;
308*8817535bSAndreas Gohr</span><span id="242" class="l"><a href="#242">242: </a>            <span class="php-var">$previous_word_ending</span>   = <span class="php-keyword2">mb_substr</span>(<span class="php-var">$fragment</span>, -<span class="php-num">1</span>);
309*8817535bSAndreas Gohr</span><span id="243" class="l"><a href="#243">243: </a>        }
310*8817535bSAndreas Gohr</span><span id="244" class="l"><a href="#244">244: </a>        <span class="php-keyword1">if</span> (<span class="php-var">$abbreviation</span> !== <span class="php-quote">''</span>) {
311*8817535bSAndreas Gohr</span><span id="245" class="l"><a href="#245">245: </a>            <span class="php-var">$abbreviations</span>[] = <span class="php-var">$abbreviation</span>;
312*8817535bSAndreas Gohr</span><span id="246" class="l"><a href="#246">246: </a>        }
313*8817535bSAndreas Gohr</span><span id="247" class="l"><a href="#247">247: </a>
314*8817535bSAndreas Gohr</span><span id="248" class="l"><a href="#248">248: </a>        <span class="php-keyword1">return</span> <span class="php-var">$abbreviations</span>;
315*8817535bSAndreas Gohr</span><span id="249" class="l"><a href="#249">249: </a>    }
316*8817535bSAndreas Gohr</span><span id="250" class="l"><a href="#250">250: </a>
317*8817535bSAndreas Gohr</span><span id="251" class="l"><a href="#251">251: </a>    <span class="php-comment">/**
318*8817535bSAndreas Gohr</span></span><span id="252" class="l"><a href="#252">252: </a><span class="php-comment">     * Merges items into larger sentences.
319*8817535bSAndreas Gohr</span></span><span id="253" class="l"><a href="#253">253: </a><span class="php-comment">     *
320*8817535bSAndreas Gohr</span></span><span id="254" class="l"><a href="#254">254: </a><span class="php-comment">     * Multibyte safe
321*8817535bSAndreas Gohr</span></span><span id="255" class="l"><a href="#255">255: </a><span class="php-comment">     *
322*8817535bSAndreas Gohr</span></span><span id="256" class="l"><a href="#256">256: </a><span class="php-comment">     * @param array $shorts
323*8817535bSAndreas Gohr</span></span><span id="257" class="l"><a href="#257">257: </a><span class="php-comment">     * @return array
324*8817535bSAndreas Gohr</span></span><span id="258" class="l"><a href="#258">258: </a><span class="php-comment">     */</span>
325*8817535bSAndreas Gohr</span><span id="259" class="l"><a href="#259">259: </a>    <span class="php-keyword1">private</span> <span class="php-keyword1">function</span> sentenceMerge(<span class="php-var">$shorts</span>) {
326*8817535bSAndreas Gohr</span><span id="260" class="l"><a href="#260">260: </a>        <span class="php-var">$non_abbreviating_terminals</span> = <span class="php-keyword2">array_diff</span>(<span class="php-var">$this</span>-&gt;terminals, <span class="php-var">$this</span>-&gt;abbreviators);
327*8817535bSAndreas Gohr</span><span id="261" class="l"><a href="#261">261: </a>
328*8817535bSAndreas Gohr</span><span id="262" class="l"><a href="#262">262: </a>        <span class="php-var">$sentences</span> = <span class="php-keyword1">array</span>();
329*8817535bSAndreas Gohr</span><span id="263" class="l"><a href="#263">263: </a>
330*8817535bSAndreas Gohr</span><span id="264" class="l"><a href="#264">264: </a>        <span class="php-var">$sentence</span> = <span class="php-quote">''</span>;
331*8817535bSAndreas Gohr</span><span id="265" class="l"><a href="#265">265: </a>        <span class="php-var">$has_words</span> = <span class="php-keyword1">false</span>;
332*8817535bSAndreas Gohr</span><span id="266" class="l"><a href="#266">266: </a>        <span class="php-var">$previous_word_ending</span> = <span class="php-keyword1">null</span>;
333*8817535bSAndreas Gohr</span><span id="267" class="l"><a href="#267">267: </a>        <span class="php-keyword1">foreach</span> (<span class="php-var">$shorts</span> <span class="php-keyword1">as</span> <span class="php-var">$short</span>) {
334*8817535bSAndreas Gohr</span><span id="268" class="l"><a href="#268">268: </a>            <span class="php-var">$word_count</span> = <span class="php-keyword2">count</span>(<span class="php-keyword2">mb_split</span>(<span class="php-quote">'\s+'</span>, self::mbTrim(<span class="php-var">$short</span>)));
335*8817535bSAndreas Gohr</span><span id="269" class="l"><a href="#269">269: </a>            <span class="php-var">$after_non_abbreviating_terminal</span> = <span class="php-keyword2">in_array</span>(<span class="php-var">$previous_word_ending</span>, <span class="php-var">$non_abbreviating_terminals</span>);
336*8817535bSAndreas Gohr</span><span id="270" class="l"><a href="#270">270: </a>
337*8817535bSAndreas Gohr</span><span id="271" class="l"><a href="#271">271: </a>            <span class="php-keyword1">if</span> (<span class="php-var">$after_non_abbreviating_terminal</span> || (<span class="php-var">$has_words</span> &amp;&amp; <span class="php-var">$word_count</span> &gt; <span class="php-num">1</span>)) {
338*8817535bSAndreas Gohr</span><span id="272" class="l"><a href="#272">272: </a>                <span class="php-var">$sentences</span>[] = <span class="php-var">$sentence</span>;
339*8817535bSAndreas Gohr</span><span id="273" class="l"><a href="#273">273: </a>                <span class="php-var">$sentence</span> = <span class="php-quote">''</span>;
340*8817535bSAndreas Gohr</span><span id="274" class="l"><a href="#274">274: </a>                <span class="php-var">$has_words</span> = <span class="php-var">$word_count</span> &gt; <span class="php-num">1</span>;
341*8817535bSAndreas Gohr</span><span id="275" class="l"><a href="#275">275: </a>            } <span class="php-keyword1">else</span> {
342*8817535bSAndreas Gohr</span><span id="276" class="l"><a href="#276">276: </a>                <span class="php-var">$has_words</span> = <span class="php-var">$has_words</span> || <span class="php-var">$word_count</span> &gt; <span class="php-num">1</span>;
343*8817535bSAndreas Gohr</span><span id="277" class="l"><a href="#277">277: </a>            }
344*8817535bSAndreas Gohr</span><span id="278" class="l"><a href="#278">278: </a>
345*8817535bSAndreas Gohr</span><span id="279" class="l"><a href="#279">279: </a>            <span class="php-var">$sentence</span>.= <span class="php-var">$short</span>;
346*8817535bSAndreas Gohr</span><span id="280" class="l"><a href="#280">280: </a>            <span class="php-var">$previous_word_ending</span> = <span class="php-keyword2">mb_substr</span>(<span class="php-var">$short</span>, -<span class="php-num">1</span>);
347*8817535bSAndreas Gohr</span><span id="281" class="l"><a href="#281">281: </a>        }
348*8817535bSAndreas Gohr</span><span id="282" class="l"><a href="#282">282: </a>        <span class="php-keyword1">if</span> (!<span class="php-keyword1">empty</span>(<span class="php-var">$sentence</span>)) {
349*8817535bSAndreas Gohr</span><span id="283" class="l"><a href="#283">283: </a>            <span class="php-var">$sentences</span>[] = <span class="php-var">$sentence</span>;
350*8817535bSAndreas Gohr</span><span id="284" class="l"><a href="#284">284: </a>        }
351*8817535bSAndreas Gohr</span><span id="285" class="l"><a href="#285">285: </a>
352*8817535bSAndreas Gohr</span><span id="286" class="l"><a href="#286">286: </a>        <span class="php-keyword1">return</span> <span class="php-var">$sentences</span>;
353*8817535bSAndreas Gohr</span><span id="287" class="l"><a href="#287">287: </a>    }
354*8817535bSAndreas Gohr</span><span id="288" class="l"><a href="#288">288: </a>
355*8817535bSAndreas Gohr</span><span id="289" class="l"><a href="#289">289: </a>    <span class="php-comment">/**
356*8817535bSAndreas Gohr</span></span><span id="290" class="l"><a href="#290">290: </a><span class="php-comment">     * Return the sentences sentences detected in the provided text.
357*8817535bSAndreas Gohr</span></span><span id="291" class="l"><a href="#291">291: </a><span class="php-comment">     * Set the Sentence::SPLIT_TRIM flag to trim whitespace.
358*8817535bSAndreas Gohr</span></span><span id="292" class="l"><a href="#292">292: </a><span class="php-comment">     * @param string $text
359*8817535bSAndreas Gohr</span></span><span id="293" class="l"><a href="#293">293: </a><span class="php-comment">     * @param integer $flags
360*8817535bSAndreas Gohr</span></span><span id="294" class="l"><a href="#294">294: </a><span class="php-comment">     * @return array
361*8817535bSAndreas Gohr</span></span><span id="295" class="l"><a href="#295">295: </a><span class="php-comment">     */</span>
362*8817535bSAndreas Gohr</span><span id="296" class="l"><a href="#296">296: </a>    <span class="php-keyword1">public</span> <span class="php-keyword1">function</span> <span class="php-keyword2">split</span>(<span class="php-var">$text</span>, <span class="php-var">$flags</span> = <span class="php-num">0</span>) {
363*8817535bSAndreas Gohr</span><span id="297" class="l"><a href="#297">297: </a>        <span class="php-var">$sentences</span> = <span class="php-keyword1">array</span>();
364*8817535bSAndreas Gohr</span><span id="298" class="l"><a href="#298">298: </a>
365*8817535bSAndreas Gohr</span><span id="299" class="l"><a href="#299">299: </a>        <span class="php-comment">// Split</span>
366*8817535bSAndreas Gohr</span><span id="300" class="l"><a href="#300">300: </a>        <span class="php-keyword1">foreach</span> (self::linebreakSplit(<span class="php-var">$text</span>) <span class="php-keyword1">as</span> <span class="php-var">$line</span>) {
367*8817535bSAndreas Gohr</span><span id="301" class="l"><a href="#301">301: </a>            <span class="php-keyword1">if</span> (self::mbTrim(<span class="php-var">$line</span>) !== <span class="php-quote">''</span>) {
368*8817535bSAndreas Gohr</span><span id="302" class="l"><a href="#302">302: </a>                <span class="php-var">$punctuations</span>   = <span class="php-var">$this</span>-&gt;punctuationSplit(<span class="php-var">$line</span>);
369*8817535bSAndreas Gohr</span><span id="303" class="l"><a href="#303">303: </a>                <span class="php-var">$merges</span>         = <span class="php-var">$this</span>-&gt;punctuationMerge(<span class="php-var">$punctuations</span>);
370*8817535bSAndreas Gohr</span><span id="304" class="l"><a href="#304">304: </a>                <span class="php-var">$shorts</span>         = <span class="php-var">$this</span>-&gt;abbreviationMerge(<span class="php-var">$merges</span>);
371*8817535bSAndreas Gohr</span><span id="305" class="l"><a href="#305">305: </a>                <span class="php-var">$sentences</span>      = <span class="php-keyword2">array_merge</span>(<span class="php-var">$sentences</span>, <span class="php-var">$this</span>-&gt;sentenceMerge(<span class="php-var">$shorts</span>));
372*8817535bSAndreas Gohr</span><span id="306" class="l"><a href="#306">306: </a>            }
373*8817535bSAndreas Gohr</span><span id="307" class="l"><a href="#307">307: </a>        }
374*8817535bSAndreas Gohr</span><span id="308" class="l"><a href="#308">308: </a>
375*8817535bSAndreas Gohr</span><span id="309" class="l"><a href="#309">309: </a>        <span class="php-comment">// Post process</span>
376*8817535bSAndreas Gohr</span><span id="310" class="l"><a href="#310">310: </a>        <span class="php-keyword1">if</span> (<span class="php-var">$flags</span> &amp; self::SPLIT_TRIM) {
377*8817535bSAndreas Gohr</span><span id="311" class="l"><a href="#311">311: </a>            <span class="php-keyword1">foreach</span> (<span class="php-var">$sentences</span> <span class="php-keyword1">as</span> &amp;<span class="php-var">$sentence</span>) {
378*8817535bSAndreas Gohr</span><span id="312" class="l"><a href="#312">312: </a>                <span class="php-var">$sentence</span> = self::mbTrim(<span class="php-var">$sentence</span>);
379*8817535bSAndreas Gohr</span><span id="313" class="l"><a href="#313">313: </a>            }
380*8817535bSAndreas Gohr</span><span id="314" class="l"><a href="#314">314: </a>            <span class="php-keyword1">unset</span>(<span class="php-var">$sentence</span>);
381*8817535bSAndreas Gohr</span><span id="315" class="l"><a href="#315">315: </a>        }
382*8817535bSAndreas Gohr</span><span id="316" class="l"><a href="#316">316: </a>
383*8817535bSAndreas Gohr</span><span id="317" class="l"><a href="#317">317: </a>        <span class="php-keyword1">return</span> <span class="php-var">$sentences</span>;
384*8817535bSAndreas Gohr</span><span id="318" class="l"><a href="#318">318: </a>    }
385*8817535bSAndreas Gohr</span><span id="319" class="l"><a href="#319">319: </a>
386*8817535bSAndreas Gohr</span><span id="320" class="l"><a href="#320">320: </a>    <span class="php-comment">/**
387*8817535bSAndreas Gohr</span></span><span id="321" class="l"><a href="#321">321: </a><span class="php-comment">     * Return the number of sentences detected in the provided text.
388*8817535bSAndreas Gohr</span></span><span id="322" class="l"><a href="#322">322: </a><span class="php-comment">     * @param string $text
389*8817535bSAndreas Gohr</span></span><span id="323" class="l"><a href="#323">323: </a><span class="php-comment">     * @return integer
390*8817535bSAndreas Gohr</span></span><span id="324" class="l"><a href="#324">324: </a><span class="php-comment">     */</span>
391*8817535bSAndreas Gohr</span><span id="325" class="l"><a href="#325">325: </a>    <span class="php-keyword1">public</span> <span class="php-keyword1">function</span> <span class="php-keyword2">count</span>(<span class="php-var">$text</span>) {
392*8817535bSAndreas Gohr</span><span id="326" class="l"><a href="#326">326: </a>        <span class="php-keyword1">return</span> <span class="php-keyword2">count</span>(<span class="php-var">$this</span>-&gt;<span class="php-keyword2">split</span>(<span class="php-var">$text</span>));
393*8817535bSAndreas Gohr</span><span id="327" class="l"><a href="#327">327: </a>    }
394*8817535bSAndreas Gohr</span><span id="328" class="l"><a href="#328">328: </a>}</span></code></pre>
395*8817535bSAndreas Gohr
396*8817535bSAndreas Gohr	<div id="footer">
397*8817535bSAndreas Gohr		phpSentence API documentation generated by <a href="http://apigen.org">ApiGen</a>
398*8817535bSAndreas Gohr	</div>
399*8817535bSAndreas Gohr</div>
400*8817535bSAndreas Gohr</div>
401*8817535bSAndreas Gohr<script src="resources/combined.js?dc3592a696e654c132a2cb2ca318def0ec6c3f17"></script>
402*8817535bSAndreas Gohr<script src="elementlist.js?94082770cba9dfa8d9d0c03634ee64ddac29c138"></script>
403*8817535bSAndreas Gohr</body>
404*8817535bSAndreas Gohr</html>
405