1<?php
2
3require_once(HTML2PS_DIR.'error.php');
4
5// Non-tailorable Line Breaking Classes
6define('UC_LINE_BREAK_BK', 1);
7define('UC_LINE_BREAK_CR', 2);
8define('UC_LINE_BREAK_LF', 3);
9define('UC_LINE_BREAK_CM', 4);
10define('UC_LINE_BREAK_NL', 5);
11define('UC_LINE_BREAK_SG', 6);
12define('UC_LINE_BREAK_WJ', 7);
13define('UC_LINE_BREAK_ZW', 8);
14define('UC_LINE_BREAK_GL', 9);
15define('UC_LINE_BREAK_SP', 10);
16
17// Break opportunities
18define('UC_LINE_BREAK_B2', 11);
19define('UC_LINE_BREAK_BA', 12);
20define('UC_LINE_BREAK_BB', 13);
21define('UC_LINE_BREAK_HY', 14);
22define('UC_LINE_BREAK_CB', 15);
23
24// Characters Prohibiting Certain Breaks
25define('UC_LINE_BREAK_CL', 16);
26define('UC_LINE_BREAK_EX', 17);
27define('UC_LINE_BREAK_IN', 18);
28define('UC_LINE_BREAK_NS', 19);
29define('UC_LINE_BREAK_OP', 20);
30define('UC_LINE_BREAK_QU', 21);
31
32// Numeric Context
33define('UC_LINE_BREAK_IS', 22);
34define('UC_LINE_BREAK_NU', 23);
35define('UC_LINE_BREAK_PO', 24);
36define('UC_LINE_BREAK_PR', 25);
37define('UC_LINE_BREAK_SY', 26);
38
39// Other Characters
40define('UC_LINE_BREAK_AI', 27);
41define('UC_LINE_BREAK_AL', 28);
42define('UC_LINE_BREAK_H2', 29);
43define('UC_LINE_BREAK_H3', 30);
44define('UC_LINE_BREAK_ID', 31);
45define('UC_LINE_BREAK_JL', 32);
46define('UC_LINE_BREAK_JV', 33);
47define('UC_LINE_BREAK_JT', 34);
48define('UC_LINE_BREAK_SA', 35);
49define('UC_LINE_BREAK_XX', 36);
50
51// Break modes
52define('LB_PROHIBITED', 1);
53define('LB_INDIRECT', 2);
54define('LB_PROHIBITED_CM', 3);
55define('LB_INDIRECT_CM', 4);
56define('LB_DIRECT', 5);
57define('LB_EXPLICIT', 6);
58
59$GLOBALS['_g_line_break_class_table'] =
60array(UC_LINE_BREAK_OP => array(UC_LINE_BREAK_OP => LB_PROHIBITED,
61                                UC_LINE_BREAK_CL => LB_PROHIBITED,
62                                UC_LINE_BREAK_QU => LB_PROHIBITED,
63                                UC_LINE_BREAK_GL => LB_PROHIBITED,
64                                UC_LINE_BREAK_NS => LB_PROHIBITED,
65                                UC_LINE_BREAK_EX => LB_PROHIBITED,
66                                UC_LINE_BREAK_SY => LB_PROHIBITED,
67                                UC_LINE_BREAK_IS => LB_PROHIBITED,
68                                UC_LINE_BREAK_PR => LB_PROHIBITED,
69                                UC_LINE_BREAK_PO => LB_PROHIBITED,
70                                UC_LINE_BREAK_NU => LB_PROHIBITED,
71                                UC_LINE_BREAK_AL => LB_PROHIBITED,
72                                UC_LINE_BREAK_ID => LB_PROHIBITED,
73                                UC_LINE_BREAK_IN => LB_PROHIBITED,
74                                UC_LINE_BREAK_HY => LB_PROHIBITED,
75                                UC_LINE_BREAK_BA => LB_PROHIBITED,
76                                UC_LINE_BREAK_BB => LB_PROHIBITED,
77                                UC_LINE_BREAK_B2 => LB_PROHIBITED,
78                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
79                                UC_LINE_BREAK_CM => LB_PROHIBITED_CM,
80                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
81                                UC_LINE_BREAK_H2 => LB_PROHIBITED,
82                                UC_LINE_BREAK_H3 => LB_PROHIBITED,
83                                UC_LINE_BREAK_JL => LB_PROHIBITED,
84                                UC_LINE_BREAK_JV => LB_PROHIBITED,
85                                UC_LINE_BREAK_JT => LB_PROHIBITED),
86      UC_LINE_BREAK_CL => array(UC_LINE_BREAK_OP => LB_DIRECT,
87                                UC_LINE_BREAK_CL => LB_PROHIBITED,
88                                UC_LINE_BREAK_QU => LB_INDIRECT,
89                                UC_LINE_BREAK_GL => LB_INDIRECT,
90                                UC_LINE_BREAK_NS => LB_PROHIBITED,
91                                UC_LINE_BREAK_EX => LB_PROHIBITED,
92                                UC_LINE_BREAK_SY => LB_PROHIBITED,
93                                UC_LINE_BREAK_IS => LB_PROHIBITED,
94                                UC_LINE_BREAK_PR => LB_INDIRECT,
95                                UC_LINE_BREAK_PO => LB_INDIRECT,
96                                UC_LINE_BREAK_NU => LB_INDIRECT,
97                                UC_LINE_BREAK_AL => LB_INDIRECT,
98                                UC_LINE_BREAK_ID => LB_DIRECT,
99                                UC_LINE_BREAK_IN => LB_DIRECT,
100                                UC_LINE_BREAK_HY => LB_INDIRECT,
101                                UC_LINE_BREAK_BA => LB_INDIRECT,
102                                UC_LINE_BREAK_BB => LB_DIRECT,
103                                UC_LINE_BREAK_B2 => LB_DIRECT,
104                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
105                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
106                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
107                                UC_LINE_BREAK_H2 => LB_DIRECT,
108                                UC_LINE_BREAK_H3 => LB_DIRECT,
109                                UC_LINE_BREAK_JL => LB_DIRECT,
110                                UC_LINE_BREAK_JV => LB_DIRECT,
111                                UC_LINE_BREAK_JT => LB_DIRECT),
112      UC_LINE_BREAK_QU => array(UC_LINE_BREAK_OP => LB_PROHIBITED,
113                                UC_LINE_BREAK_CL => LB_PROHIBITED,
114                                UC_LINE_BREAK_QU => LB_INDIRECT,
115                                UC_LINE_BREAK_GL => LB_INDIRECT,
116                                UC_LINE_BREAK_NS => LB_INDIRECT,
117                                UC_LINE_BREAK_EX => LB_PROHIBITED,
118                                UC_LINE_BREAK_SY => LB_PROHIBITED,
119                                UC_LINE_BREAK_IS => LB_PROHIBITED,
120                                UC_LINE_BREAK_PR => LB_INDIRECT,
121                                UC_LINE_BREAK_PO => LB_INDIRECT,
122                                UC_LINE_BREAK_NU => LB_INDIRECT,
123                                UC_LINE_BREAK_AL => LB_INDIRECT,
124                                UC_LINE_BREAK_ID => LB_INDIRECT,
125                                UC_LINE_BREAK_IN => LB_INDIRECT,
126                                UC_LINE_BREAK_HY => LB_INDIRECT,
127                                UC_LINE_BREAK_BA => LB_INDIRECT,
128                                UC_LINE_BREAK_BB => LB_INDIRECT,
129                                UC_LINE_BREAK_B2 => LB_INDIRECT,
130                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
131                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
132                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
133                                UC_LINE_BREAK_H2 => LB_INDIRECT,
134                                UC_LINE_BREAK_H3 => LB_INDIRECT,
135                                UC_LINE_BREAK_JL => LB_INDIRECT,
136                                UC_LINE_BREAK_JV => LB_INDIRECT,
137                                UC_LINE_BREAK_JT => LB_INDIRECT),
138      UC_LINE_BREAK_GL => array(UC_LINE_BREAK_OP => LB_INDIRECT,
139                                UC_LINE_BREAK_CL => LB_PROHIBITED,
140                                UC_LINE_BREAK_QU => LB_INDIRECT,
141                                UC_LINE_BREAK_GL => LB_INDIRECT,
142                                UC_LINE_BREAK_NS => LB_INDIRECT,
143                                UC_LINE_BREAK_EX => LB_PROHIBITED,
144                                UC_LINE_BREAK_SY => LB_PROHIBITED,
145                                UC_LINE_BREAK_IS => LB_PROHIBITED,
146                                UC_LINE_BREAK_PR => LB_INDIRECT,
147                                UC_LINE_BREAK_PO => LB_INDIRECT,
148                                UC_LINE_BREAK_NU => LB_INDIRECT,
149                                UC_LINE_BREAK_AL => LB_INDIRECT,
150                                UC_LINE_BREAK_ID => LB_INDIRECT,
151                                UC_LINE_BREAK_IN => LB_INDIRECT,
152                                UC_LINE_BREAK_HY => LB_INDIRECT,
153                                UC_LINE_BREAK_BA => LB_INDIRECT,
154                                UC_LINE_BREAK_BB => LB_INDIRECT,
155                                UC_LINE_BREAK_B2 => LB_INDIRECT,
156                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
157                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
158                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
159                                UC_LINE_BREAK_H2 => LB_INDIRECT,
160                                UC_LINE_BREAK_H3 => LB_INDIRECT,
161                                UC_LINE_BREAK_JL => LB_INDIRECT,
162                                UC_LINE_BREAK_JV => LB_INDIRECT,
163                                UC_LINE_BREAK_JT => LB_INDIRECT),
164      UC_LINE_BREAK_NS => array(UC_LINE_BREAK_OP => LB_DIRECT,
165                                UC_LINE_BREAK_CL => LB_PROHIBITED,
166                                UC_LINE_BREAK_QU => LB_INDIRECT,
167                                UC_LINE_BREAK_GL => LB_INDIRECT,
168                                UC_LINE_BREAK_NS => LB_INDIRECT,
169                                UC_LINE_BREAK_EX => LB_PROHIBITED,
170                                UC_LINE_BREAK_SY => LB_PROHIBITED,
171                                UC_LINE_BREAK_IS => LB_PROHIBITED,
172                                UC_LINE_BREAK_PR => LB_DIRECT,
173                                UC_LINE_BREAK_PO => LB_DIRECT,
174                                UC_LINE_BREAK_NU => LB_DIRECT,
175                                UC_LINE_BREAK_AL => LB_DIRECT,
176                                UC_LINE_BREAK_ID => LB_DIRECT,
177                                UC_LINE_BREAK_IN => LB_DIRECT,
178                                UC_LINE_BREAK_HY => LB_INDIRECT,
179                                UC_LINE_BREAK_BA => LB_INDIRECT,
180                                UC_LINE_BREAK_BB => LB_DIRECT,
181                                UC_LINE_BREAK_B2 => LB_DIRECT,
182                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
183                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
184                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
185                                UC_LINE_BREAK_H2 => LB_DIRECT,
186                                UC_LINE_BREAK_H3 => LB_DIRECT,
187                                UC_LINE_BREAK_JL => LB_DIRECT,
188                                UC_LINE_BREAK_JV => LB_DIRECT,
189                                UC_LINE_BREAK_JT => LB_DIRECT),
190      UC_LINE_BREAK_EX => array(UC_LINE_BREAK_OP => LB_DIRECT,
191                                UC_LINE_BREAK_CL => LB_PROHIBITED,
192                                UC_LINE_BREAK_QU => LB_INDIRECT,
193                                UC_LINE_BREAK_GL => LB_INDIRECT,
194                                UC_LINE_BREAK_NS => LB_INDIRECT,
195                                UC_LINE_BREAK_EX => LB_PROHIBITED,
196                                UC_LINE_BREAK_SY => LB_PROHIBITED,
197                                UC_LINE_BREAK_IS => LB_PROHIBITED,
198                                UC_LINE_BREAK_PR => LB_DIRECT,
199                                UC_LINE_BREAK_PO => LB_DIRECT,
200                                UC_LINE_BREAK_NU => LB_DIRECT,
201                                UC_LINE_BREAK_AL => LB_DIRECT,
202                                UC_LINE_BREAK_ID => LB_DIRECT,
203                                UC_LINE_BREAK_IN => LB_DIRECT,
204                                UC_LINE_BREAK_HY => LB_INDIRECT,
205                                UC_LINE_BREAK_BA => LB_INDIRECT,
206                                UC_LINE_BREAK_BB => LB_DIRECT,
207                                UC_LINE_BREAK_B2 => LB_DIRECT,
208                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
209                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
210                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
211                                UC_LINE_BREAK_H2 => LB_DIRECT,
212                                UC_LINE_BREAK_H3 => LB_DIRECT,
213                                UC_LINE_BREAK_JL => LB_DIRECT,
214                                UC_LINE_BREAK_JV => LB_DIRECT,
215                                UC_LINE_BREAK_JT => LB_DIRECT),
216      UC_LINE_BREAK_SY => array(UC_LINE_BREAK_OP => LB_DIRECT,
217                                UC_LINE_BREAK_CL => LB_PROHIBITED,
218                                UC_LINE_BREAK_QU => LB_INDIRECT,
219                                UC_LINE_BREAK_GL => LB_INDIRECT,
220                                UC_LINE_BREAK_NS => LB_INDIRECT,
221                                UC_LINE_BREAK_EX => LB_PROHIBITED,
222                                UC_LINE_BREAK_SY => LB_PROHIBITED,
223                                UC_LINE_BREAK_IS => LB_PROHIBITED,
224                                UC_LINE_BREAK_PR => LB_DIRECT,
225                                UC_LINE_BREAK_PO => LB_DIRECT,
226                                UC_LINE_BREAK_NU => LB_INDIRECT,
227                                UC_LINE_BREAK_AL => LB_DIRECT,
228                                UC_LINE_BREAK_ID => LB_DIRECT,
229                                UC_LINE_BREAK_IN => LB_DIRECT,
230                                UC_LINE_BREAK_HY => LB_INDIRECT,
231                                UC_LINE_BREAK_BA => LB_INDIRECT,
232                                UC_LINE_BREAK_BB => LB_DIRECT,
233                                UC_LINE_BREAK_B2 => LB_DIRECT,
234                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
235                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
236                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
237                                UC_LINE_BREAK_H2 => LB_DIRECT,
238                                UC_LINE_BREAK_H3 => LB_DIRECT,
239                                UC_LINE_BREAK_JL => LB_DIRECT,
240                                UC_LINE_BREAK_JV => LB_DIRECT,
241                                UC_LINE_BREAK_JT => LB_DIRECT),
242      UC_LINE_BREAK_IS => array(UC_LINE_BREAK_OP => LB_DIRECT,
243                                UC_LINE_BREAK_CL => LB_PROHIBITED,
244                                UC_LINE_BREAK_QU => LB_INDIRECT,
245                                UC_LINE_BREAK_GL => LB_INDIRECT,
246                                UC_LINE_BREAK_NS => LB_INDIRECT,
247                                UC_LINE_BREAK_EX => LB_PROHIBITED,
248                                UC_LINE_BREAK_SY => LB_PROHIBITED,
249                                UC_LINE_BREAK_IS => LB_PROHIBITED,
250                                UC_LINE_BREAK_PR => LB_DIRECT,
251                                UC_LINE_BREAK_PO => LB_DIRECT,
252                                UC_LINE_BREAK_NU => LB_INDIRECT,
253                                UC_LINE_BREAK_AL => LB_INDIRECT,
254                                UC_LINE_BREAK_ID => LB_DIRECT,
255                                UC_LINE_BREAK_IN => LB_DIRECT,
256                                UC_LINE_BREAK_HY => LB_INDIRECT,
257                                UC_LINE_BREAK_BA => LB_INDIRECT,
258                                UC_LINE_BREAK_BB => LB_DIRECT,
259                                UC_LINE_BREAK_B2 => LB_DIRECT,
260                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
261                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
262                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
263                                UC_LINE_BREAK_H2 => LB_DIRECT,
264                                UC_LINE_BREAK_H3 => LB_DIRECT,
265                                UC_LINE_BREAK_JL => LB_DIRECT,
266                                UC_LINE_BREAK_JV => LB_DIRECT,
267                                UC_LINE_BREAK_JT => LB_DIRECT),
268      UC_LINE_BREAK_PR => array(UC_LINE_BREAK_OP => LB_INDIRECT,
269                                UC_LINE_BREAK_CL => LB_PROHIBITED,
270                                UC_LINE_BREAK_QU => LB_INDIRECT,
271                                UC_LINE_BREAK_GL => LB_INDIRECT,
272                                UC_LINE_BREAK_NS => LB_INDIRECT,
273                                UC_LINE_BREAK_EX => LB_PROHIBITED,
274                                UC_LINE_BREAK_SY => LB_PROHIBITED,
275                                UC_LINE_BREAK_IS => LB_PROHIBITED,
276                                UC_LINE_BREAK_PR => LB_DIRECT,
277                                UC_LINE_BREAK_PO => LB_DIRECT,
278                                UC_LINE_BREAK_NU => LB_INDIRECT,
279                                UC_LINE_BREAK_AL => LB_INDIRECT,
280                                UC_LINE_BREAK_ID => LB_INDIRECT,
281                                UC_LINE_BREAK_IN => LB_DIRECT,
282                                UC_LINE_BREAK_HY => LB_INDIRECT,
283                                UC_LINE_BREAK_BA => LB_INDIRECT,
284                                UC_LINE_BREAK_BB => LB_DIRECT,
285                                UC_LINE_BREAK_B2 => LB_DIRECT,
286                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
287                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
288                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
289                                UC_LINE_BREAK_H2 => LB_INDIRECT,
290                                UC_LINE_BREAK_H3 => LB_INDIRECT,
291                                UC_LINE_BREAK_JL => LB_INDIRECT,
292                                UC_LINE_BREAK_JV => LB_INDIRECT,
293                                UC_LINE_BREAK_JT => LB_INDIRECT),
294      UC_LINE_BREAK_PO => array(UC_LINE_BREAK_OP => LB_INDIRECT,
295                                UC_LINE_BREAK_CL => LB_PROHIBITED,
296                                UC_LINE_BREAK_QU => LB_INDIRECT,
297                                UC_LINE_BREAK_GL => LB_INDIRECT,
298                                UC_LINE_BREAK_NS => LB_INDIRECT,
299                                UC_LINE_BREAK_EX => LB_PROHIBITED,
300                                UC_LINE_BREAK_SY => LB_PROHIBITED,
301                                UC_LINE_BREAK_IS => LB_PROHIBITED,
302                                UC_LINE_BREAK_PR => LB_DIRECT,
303                                UC_LINE_BREAK_PO => LB_DIRECT,
304                                UC_LINE_BREAK_NU => LB_INDIRECT,
305                                UC_LINE_BREAK_AL => LB_INDIRECT,
306                                UC_LINE_BREAK_ID => LB_DIRECT,
307                                UC_LINE_BREAK_IN => LB_DIRECT,
308                                UC_LINE_BREAK_HY => LB_INDIRECT,
309                                UC_LINE_BREAK_BA => LB_INDIRECT,
310                                UC_LINE_BREAK_BB => LB_DIRECT,
311                                UC_LINE_BREAK_B2 => LB_DIRECT,
312                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
313                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
314                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
315                                UC_LINE_BREAK_H2 => LB_DIRECT,
316                                UC_LINE_BREAK_H3 => LB_DIRECT,
317                                UC_LINE_BREAK_JL => LB_DIRECT,
318                                UC_LINE_BREAK_JV => LB_DIRECT,
319                                UC_LINE_BREAK_JT => LB_DIRECT),
320      UC_LINE_BREAK_NU => array(UC_LINE_BREAK_OP => LB_INDIRECT,
321                                UC_LINE_BREAK_CL => LB_PROHIBITED,
322                                UC_LINE_BREAK_QU => LB_INDIRECT,
323                                UC_LINE_BREAK_GL => LB_INDIRECT,
324                                UC_LINE_BREAK_NS => LB_INDIRECT,
325                                UC_LINE_BREAK_EX => LB_PROHIBITED,
326                                UC_LINE_BREAK_SY => LB_PROHIBITED,
327                                UC_LINE_BREAK_IS => LB_PROHIBITED,
328                                UC_LINE_BREAK_PR => LB_INDIRECT,
329                                UC_LINE_BREAK_PO => LB_INDIRECT,
330                                UC_LINE_BREAK_NU => LB_INDIRECT,
331                                UC_LINE_BREAK_AL => LB_INDIRECT,
332                                UC_LINE_BREAK_ID => LB_DIRECT,
333                                UC_LINE_BREAK_IN => LB_INDIRECT,
334                                UC_LINE_BREAK_HY => LB_INDIRECT,
335                                UC_LINE_BREAK_BA => LB_INDIRECT,
336                                UC_LINE_BREAK_BB => LB_DIRECT,
337                                UC_LINE_BREAK_B2 => LB_DIRECT,
338                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
339                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
340                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
341                                UC_LINE_BREAK_H2 => LB_DIRECT,
342                                UC_LINE_BREAK_H3 => LB_DIRECT,
343                                UC_LINE_BREAK_JL => LB_DIRECT,
344                                UC_LINE_BREAK_JV => LB_DIRECT,
345                                UC_LINE_BREAK_JT => LB_DIRECT),
346      UC_LINE_BREAK_AL => array(UC_LINE_BREAK_OP => LB_INDIRECT,
347                                UC_LINE_BREAK_CL => LB_PROHIBITED,
348                                UC_LINE_BREAK_QU => LB_INDIRECT,
349                                UC_LINE_BREAK_GL => LB_INDIRECT,
350                                UC_LINE_BREAK_NS => LB_INDIRECT,
351                                UC_LINE_BREAK_EX => LB_PROHIBITED,
352                                UC_LINE_BREAK_SY => LB_PROHIBITED,
353                                UC_LINE_BREAK_IS => LB_PROHIBITED,
354                                UC_LINE_BREAK_PR => LB_DIRECT,
355                                UC_LINE_BREAK_PO => LB_DIRECT,
356                                UC_LINE_BREAK_NU => LB_INDIRECT,
357                                UC_LINE_BREAK_AL => LB_INDIRECT,
358                                UC_LINE_BREAK_ID => LB_DIRECT,
359                                UC_LINE_BREAK_IN => LB_INDIRECT,
360                                UC_LINE_BREAK_HY => LB_INDIRECT,
361                                UC_LINE_BREAK_BA => LB_INDIRECT,
362                                UC_LINE_BREAK_BB => LB_DIRECT,
363                                UC_LINE_BREAK_B2 => LB_DIRECT,
364                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
365                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
366                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
367                                UC_LINE_BREAK_H2 => LB_DIRECT,
368                                UC_LINE_BREAK_H3 => LB_DIRECT,
369                                UC_LINE_BREAK_JL => LB_DIRECT,
370                                UC_LINE_BREAK_JV => LB_DIRECT,
371                                UC_LINE_BREAK_JT => LB_DIRECT),
372      UC_LINE_BREAK_ID => array(UC_LINE_BREAK_OP => LB_DIRECT,
373                                UC_LINE_BREAK_CL => LB_PROHIBITED,
374                                UC_LINE_BREAK_QU => LB_INDIRECT,
375                                UC_LINE_BREAK_GL => LB_INDIRECT,
376                                UC_LINE_BREAK_NS => LB_INDIRECT,
377                                UC_LINE_BREAK_EX => LB_PROHIBITED,
378                                UC_LINE_BREAK_SY => LB_PROHIBITED,
379                                UC_LINE_BREAK_IS => LB_PROHIBITED,
380                                UC_LINE_BREAK_PR => LB_DIRECT,
381                                UC_LINE_BREAK_PO => LB_INDIRECT,
382                                UC_LINE_BREAK_NU => LB_DIRECT,
383                                UC_LINE_BREAK_AL => LB_DIRECT,
384                                UC_LINE_BREAK_ID => LB_DIRECT,
385                                UC_LINE_BREAK_IN => LB_INDIRECT,
386                                UC_LINE_BREAK_HY => LB_INDIRECT,
387                                UC_LINE_BREAK_BA => LB_INDIRECT,
388                                UC_LINE_BREAK_BB => LB_DIRECT,
389                                UC_LINE_BREAK_B2 => LB_DIRECT,
390                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
391                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
392                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
393                                UC_LINE_BREAK_H2 => LB_DIRECT,
394                                UC_LINE_BREAK_H3 => LB_DIRECT,
395                                UC_LINE_BREAK_JL => LB_DIRECT,
396                                UC_LINE_BREAK_JV => LB_DIRECT,
397                                UC_LINE_BREAK_JT => LB_DIRECT),
398      UC_LINE_BREAK_IN => array(UC_LINE_BREAK_OP => LB_DIRECT,
399                                UC_LINE_BREAK_CL => LB_PROHIBITED,
400                                UC_LINE_BREAK_QU => LB_INDIRECT,
401                                UC_LINE_BREAK_GL => LB_INDIRECT,
402                                UC_LINE_BREAK_NS => LB_INDIRECT,
403                                UC_LINE_BREAK_EX => LB_PROHIBITED,
404                                UC_LINE_BREAK_SY => LB_PROHIBITED,
405                                UC_LINE_BREAK_IS => LB_PROHIBITED,
406                                UC_LINE_BREAK_PR => LB_DIRECT,
407                                UC_LINE_BREAK_PO => LB_DIRECT,
408                                UC_LINE_BREAK_NU => LB_DIRECT,
409                                UC_LINE_BREAK_AL => LB_DIRECT,
410                                UC_LINE_BREAK_ID => LB_DIRECT,
411                                UC_LINE_BREAK_IN => LB_INDIRECT,
412                                UC_LINE_BREAK_HY => LB_INDIRECT,
413                                UC_LINE_BREAK_BA => LB_INDIRECT,
414                                UC_LINE_BREAK_BB => LB_DIRECT,
415                                UC_LINE_BREAK_B2 => LB_DIRECT,
416                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
417                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
418                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
419                                UC_LINE_BREAK_H2 => LB_DIRECT,
420                                UC_LINE_BREAK_H3 => LB_DIRECT,
421                                UC_LINE_BREAK_JL => LB_DIRECT,
422                                UC_LINE_BREAK_JV => LB_DIRECT,
423                                UC_LINE_BREAK_JT => LB_DIRECT),
424      UC_LINE_BREAK_HY => array(UC_LINE_BREAK_OP => LB_DIRECT,
425                                UC_LINE_BREAK_CL => LB_PROHIBITED,
426                                UC_LINE_BREAK_QU => LB_INDIRECT,
427                                UC_LINE_BREAK_GL => LB_INDIRECT,
428                                UC_LINE_BREAK_NS => LB_INDIRECT,
429                                UC_LINE_BREAK_EX => LB_PROHIBITED,
430                                UC_LINE_BREAK_SY => LB_PROHIBITED,
431                                UC_LINE_BREAK_IS => LB_PROHIBITED,
432                                UC_LINE_BREAK_PR => LB_DIRECT,
433                                UC_LINE_BREAK_PO => LB_DIRECT,
434                                UC_LINE_BREAK_NU => LB_INDIRECT,
435                                UC_LINE_BREAK_AL => LB_DIRECT,
436                                UC_LINE_BREAK_ID => LB_DIRECT,
437                                UC_LINE_BREAK_IN => LB_DIRECT,
438                                UC_LINE_BREAK_HY => LB_INDIRECT,
439                                UC_LINE_BREAK_BA => LB_INDIRECT,
440                                UC_LINE_BREAK_BB => LB_DIRECT,
441                                UC_LINE_BREAK_B2 => LB_DIRECT,
442                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
443                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
444                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
445                                UC_LINE_BREAK_H2 => LB_DIRECT,
446                                UC_LINE_BREAK_H3 => LB_DIRECT,
447                                UC_LINE_BREAK_JL => LB_DIRECT,
448                                UC_LINE_BREAK_JV => LB_DIRECT,
449                                UC_LINE_BREAK_JT => LB_DIRECT),
450      UC_LINE_BREAK_BA => array(UC_LINE_BREAK_OP => LB_DIRECT,
451                                UC_LINE_BREAK_CL => LB_PROHIBITED,
452                                UC_LINE_BREAK_QU => LB_INDIRECT,
453                                UC_LINE_BREAK_GL => LB_INDIRECT,
454                                UC_LINE_BREAK_NS => LB_INDIRECT,
455                                UC_LINE_BREAK_EX => LB_PROHIBITED,
456                                UC_LINE_BREAK_SY => LB_PROHIBITED,
457                                UC_LINE_BREAK_IS => LB_PROHIBITED,
458                                UC_LINE_BREAK_PR => LB_DIRECT,
459                                UC_LINE_BREAK_PO => LB_DIRECT,
460                                UC_LINE_BREAK_NU => LB_DIRECT,
461                                UC_LINE_BREAK_AL => LB_DIRECT,
462                                UC_LINE_BREAK_ID => LB_DIRECT,
463                                UC_LINE_BREAK_IN => LB_DIRECT,
464                                UC_LINE_BREAK_HY => LB_INDIRECT,
465                                UC_LINE_BREAK_BA => LB_INDIRECT,
466                                UC_LINE_BREAK_BB => LB_DIRECT,
467                                UC_LINE_BREAK_B2 => LB_DIRECT,
468                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
469                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
470                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
471                                UC_LINE_BREAK_H2 => LB_DIRECT,
472                                UC_LINE_BREAK_H3 => LB_DIRECT,
473                                UC_LINE_BREAK_JL => LB_DIRECT,
474                                UC_LINE_BREAK_JV => LB_DIRECT,
475                                UC_LINE_BREAK_JT => LB_DIRECT),
476      UC_LINE_BREAK_BB => array(UC_LINE_BREAK_OP => LB_INDIRECT,
477                                UC_LINE_BREAK_CL => LB_PROHIBITED,
478                                UC_LINE_BREAK_QU => LB_INDIRECT,
479                                UC_LINE_BREAK_GL => LB_INDIRECT,
480                                UC_LINE_BREAK_NS => LB_INDIRECT,
481                                UC_LINE_BREAK_EX => LB_PROHIBITED,
482                                UC_LINE_BREAK_SY => LB_PROHIBITED,
483                                UC_LINE_BREAK_IS => LB_PROHIBITED,
484                                UC_LINE_BREAK_PR => LB_INDIRECT,
485                                UC_LINE_BREAK_PO => LB_INDIRECT,
486                                UC_LINE_BREAK_NU => LB_INDIRECT,
487                                UC_LINE_BREAK_AL => LB_INDIRECT,
488                                UC_LINE_BREAK_ID => LB_INDIRECT,
489                                UC_LINE_BREAK_IN => LB_INDIRECT,
490                                UC_LINE_BREAK_HY => LB_INDIRECT,
491                                UC_LINE_BREAK_BA => LB_INDIRECT,
492                                UC_LINE_BREAK_BB => LB_INDIRECT,
493                                UC_LINE_BREAK_B2 => LB_INDIRECT,
494                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
495                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
496                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
497                                UC_LINE_BREAK_H2 => LB_INDIRECT,
498                                UC_LINE_BREAK_H3 => LB_INDIRECT,
499                                UC_LINE_BREAK_JL => LB_INDIRECT,
500                                UC_LINE_BREAK_JV => LB_INDIRECT,
501                                UC_LINE_BREAK_JT => LB_INDIRECT),
502      UC_LINE_BREAK_B2 => array(UC_LINE_BREAK_OP => LB_DIRECT,
503                                UC_LINE_BREAK_CL => LB_PROHIBITED,
504                                UC_LINE_BREAK_QU => LB_INDIRECT,
505                                UC_LINE_BREAK_GL => LB_INDIRECT,
506                                UC_LINE_BREAK_NS => LB_INDIRECT,
507                                UC_LINE_BREAK_EX => LB_PROHIBITED,
508                                UC_LINE_BREAK_SY => LB_PROHIBITED,
509                                UC_LINE_BREAK_IS => LB_PROHIBITED,
510                                UC_LINE_BREAK_PR => LB_DIRECT,
511                                UC_LINE_BREAK_PO => LB_DIRECT,
512                                UC_LINE_BREAK_NU => LB_DIRECT,
513                                UC_LINE_BREAK_AL => LB_DIRECT,
514                                UC_LINE_BREAK_ID => LB_DIRECT,
515                                UC_LINE_BREAK_IN => LB_DIRECT,
516                                UC_LINE_BREAK_HY => LB_INDIRECT,
517                                UC_LINE_BREAK_BA => LB_INDIRECT,
518                                UC_LINE_BREAK_BB => LB_DIRECT,
519                                UC_LINE_BREAK_B2 => LB_PROHIBITED,
520                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
521                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
522                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
523                                UC_LINE_BREAK_H2 => LB_DIRECT,
524                                UC_LINE_BREAK_H3 => LB_DIRECT,
525                                UC_LINE_BREAK_JL => LB_DIRECT,
526                                UC_LINE_BREAK_JV => LB_DIRECT,
527                                UC_LINE_BREAK_JT => LB_DIRECT),
528      UC_LINE_BREAK_ZW => array(UC_LINE_BREAK_OP => LB_DIRECT,
529                                UC_LINE_BREAK_CL => LB_DIRECT,
530                                UC_LINE_BREAK_QU => LB_DIRECT,
531                                UC_LINE_BREAK_GL => LB_DIRECT,
532                                UC_LINE_BREAK_NS => LB_DIRECT,
533                                UC_LINE_BREAK_EX => LB_DIRECT,
534                                UC_LINE_BREAK_SY => LB_DIRECT,
535                                UC_LINE_BREAK_IS => LB_DIRECT,
536                                UC_LINE_BREAK_PR => LB_DIRECT,
537                                UC_LINE_BREAK_PO => LB_DIRECT,
538                                UC_LINE_BREAK_NU => LB_DIRECT,
539                                UC_LINE_BREAK_AL => LB_DIRECT,
540                                UC_LINE_BREAK_ID => LB_DIRECT,
541                                UC_LINE_BREAK_IN => LB_DIRECT,
542                                UC_LINE_BREAK_HY => LB_DIRECT,
543                                UC_LINE_BREAK_BA => LB_DIRECT,
544                                UC_LINE_BREAK_BB => LB_DIRECT,
545                                UC_LINE_BREAK_B2 => LB_DIRECT,
546                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
547                                UC_LINE_BREAK_CM => LB_DIRECT,
548                                UC_LINE_BREAK_WJ => LB_DIRECT,
549                                UC_LINE_BREAK_H2 => LB_DIRECT,
550                                UC_LINE_BREAK_H3 => LB_DIRECT,
551                                UC_LINE_BREAK_JL => LB_DIRECT,
552                                UC_LINE_BREAK_JV => LB_DIRECT,
553                                UC_LINE_BREAK_JT => LB_DIRECT),
554      UC_LINE_BREAK_CM => array(UC_LINE_BREAK_OP => LB_DIRECT,
555                                UC_LINE_BREAK_CL => LB_PROHIBITED,
556                                UC_LINE_BREAK_QU => LB_INDIRECT,
557                                UC_LINE_BREAK_GL => LB_INDIRECT,
558                                UC_LINE_BREAK_NS => LB_INDIRECT,
559                                UC_LINE_BREAK_EX => LB_PROHIBITED,
560                                UC_LINE_BREAK_SY => LB_PROHIBITED,
561                                UC_LINE_BREAK_IS => LB_PROHIBITED,
562                                UC_LINE_BREAK_PR => LB_DIRECT,
563                                UC_LINE_BREAK_PO => LB_DIRECT,
564                                UC_LINE_BREAK_NU => LB_INDIRECT,
565                                UC_LINE_BREAK_AL => LB_INDIRECT,
566                                UC_LINE_BREAK_ID => LB_DIRECT,
567                                UC_LINE_BREAK_IN => LB_INDIRECT,
568                                UC_LINE_BREAK_HY => LB_INDIRECT,
569                                UC_LINE_BREAK_BA => LB_INDIRECT,
570                                UC_LINE_BREAK_BB => LB_DIRECT,
571                                UC_LINE_BREAK_B2 => LB_DIRECT,
572                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
573                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
574                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
575                                UC_LINE_BREAK_H2 => LB_DIRECT,
576                                UC_LINE_BREAK_H3 => LB_DIRECT,
577                                UC_LINE_BREAK_JL => LB_DIRECT,
578                                UC_LINE_BREAK_JV => LB_DIRECT,
579                                UC_LINE_BREAK_JT => LB_DIRECT),
580      UC_LINE_BREAK_WJ => array(UC_LINE_BREAK_OP => LB_INDIRECT,
581                                UC_LINE_BREAK_CL => LB_PROHIBITED,
582                                UC_LINE_BREAK_QU => LB_INDIRECT,
583                                UC_LINE_BREAK_GL => LB_INDIRECT,
584                                UC_LINE_BREAK_NS => LB_INDIRECT,
585                                UC_LINE_BREAK_EX => LB_PROHIBITED,
586                                UC_LINE_BREAK_SY => LB_PROHIBITED,
587                                UC_LINE_BREAK_IS => LB_PROHIBITED,
588                                UC_LINE_BREAK_PR => LB_INDIRECT,
589                                UC_LINE_BREAK_PO => LB_INDIRECT,
590                                UC_LINE_BREAK_NU => LB_INDIRECT,
591                                UC_LINE_BREAK_AL => LB_INDIRECT,
592                                UC_LINE_BREAK_ID => LB_INDIRECT,
593                                UC_LINE_BREAK_IN => LB_INDIRECT,
594                                UC_LINE_BREAK_HY => LB_INDIRECT,
595                                UC_LINE_BREAK_BA => LB_INDIRECT,
596                                UC_LINE_BREAK_BB => LB_INDIRECT,
597                                UC_LINE_BREAK_B2 => LB_INDIRECT,
598                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
599                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
600                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
601                                UC_LINE_BREAK_H2 => LB_INDIRECT,
602                                UC_LINE_BREAK_H3 => LB_INDIRECT,
603                                UC_LINE_BREAK_JL => LB_INDIRECT,
604                                UC_LINE_BREAK_JV => LB_INDIRECT,
605                                UC_LINE_BREAK_JT => LB_INDIRECT),
606      UC_LINE_BREAK_H2 => array(UC_LINE_BREAK_OP => LB_DIRECT,
607                                UC_LINE_BREAK_CL => LB_PROHIBITED,
608                                UC_LINE_BREAK_QU => LB_INDIRECT,
609                                UC_LINE_BREAK_GL => LB_INDIRECT,
610                                UC_LINE_BREAK_NS => LB_INDIRECT,
611                                UC_LINE_BREAK_EX => LB_PROHIBITED,
612                                UC_LINE_BREAK_SY => LB_PROHIBITED,
613                                UC_LINE_BREAK_IS => LB_PROHIBITED,
614                                UC_LINE_BREAK_PR => LB_DIRECT,
615                                UC_LINE_BREAK_PO => LB_INDIRECT,
616                                UC_LINE_BREAK_NU => LB_DIRECT,
617                                UC_LINE_BREAK_AL => LB_DIRECT,
618                                UC_LINE_BREAK_ID => LB_DIRECT,
619                                UC_LINE_BREAK_IN => LB_INDIRECT,
620                                UC_LINE_BREAK_HY => LB_INDIRECT,
621                                UC_LINE_BREAK_BA => LB_INDIRECT,
622                                UC_LINE_BREAK_BB => LB_DIRECT,
623                                UC_LINE_BREAK_B2 => LB_DIRECT,
624                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
625                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
626                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
627                                UC_LINE_BREAK_H2 => LB_DIRECT,
628                                UC_LINE_BREAK_H3 => LB_DIRECT,
629                                UC_LINE_BREAK_JL => LB_DIRECT,
630                                UC_LINE_BREAK_JV => LB_INDIRECT,
631                                UC_LINE_BREAK_JT => LB_INDIRECT),
632      UC_LINE_BREAK_H3 => array(UC_LINE_BREAK_OP => LB_DIRECT,
633                                UC_LINE_BREAK_CL => LB_PROHIBITED,
634                                UC_LINE_BREAK_QU => LB_INDIRECT,
635                                UC_LINE_BREAK_GL => LB_INDIRECT,
636                                UC_LINE_BREAK_NS => LB_INDIRECT,
637                                UC_LINE_BREAK_EX => LB_PROHIBITED,
638                                UC_LINE_BREAK_SY => LB_PROHIBITED,
639                                UC_LINE_BREAK_IS => LB_PROHIBITED,
640                                UC_LINE_BREAK_PR => LB_DIRECT,
641                                UC_LINE_BREAK_PO => LB_INDIRECT,
642                                UC_LINE_BREAK_NU => LB_DIRECT,
643                                UC_LINE_BREAK_AL => LB_DIRECT,
644                                UC_LINE_BREAK_ID => LB_DIRECT,
645                                UC_LINE_BREAK_IN => LB_INDIRECT,
646                                UC_LINE_BREAK_HY => LB_INDIRECT,
647                                UC_LINE_BREAK_BA => LB_INDIRECT,
648                                UC_LINE_BREAK_BB => LB_DIRECT,
649                                UC_LINE_BREAK_B2 => LB_DIRECT,
650                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
651                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
652                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
653                                UC_LINE_BREAK_H2 => LB_DIRECT,
654                                UC_LINE_BREAK_H3 => LB_DIRECT,
655                                UC_LINE_BREAK_JL => LB_DIRECT,
656                                UC_LINE_BREAK_JV => LB_DIRECT,
657                                UC_LINE_BREAK_JT => LB_INDIRECT),
658      UC_LINE_BREAK_JL => array(UC_LINE_BREAK_OP => LB_DIRECT,
659                                UC_LINE_BREAK_CL => LB_PROHIBITED,
660                                UC_LINE_BREAK_QU => LB_INDIRECT,
661                                UC_LINE_BREAK_GL => LB_INDIRECT,
662                                UC_LINE_BREAK_NS => LB_INDIRECT,
663                                UC_LINE_BREAK_EX => LB_PROHIBITED,
664                                UC_LINE_BREAK_SY => LB_PROHIBITED,
665                                UC_LINE_BREAK_IS => LB_PROHIBITED,
666                                UC_LINE_BREAK_PR => LB_DIRECT,
667                                UC_LINE_BREAK_PO => LB_INDIRECT,
668                                UC_LINE_BREAK_NU => LB_DIRECT,
669                                UC_LINE_BREAK_AL => LB_DIRECT,
670                                UC_LINE_BREAK_ID => LB_DIRECT,
671                                UC_LINE_BREAK_IN => LB_INDIRECT,
672                                UC_LINE_BREAK_HY => LB_INDIRECT,
673                                UC_LINE_BREAK_BA => LB_INDIRECT,
674                                UC_LINE_BREAK_BB => LB_DIRECT,
675                                UC_LINE_BREAK_B2 => LB_DIRECT,
676                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
677                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
678                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
679                                UC_LINE_BREAK_H2 => LB_INDIRECT,
680                                UC_LINE_BREAK_H3 => LB_INDIRECT,
681                                UC_LINE_BREAK_JL => LB_INDIRECT,
682                                UC_LINE_BREAK_JV => LB_INDIRECT,
683                                UC_LINE_BREAK_JT => LB_DIRECT),
684      UC_LINE_BREAK_JV => array(UC_LINE_BREAK_OP => LB_DIRECT,
685                                UC_LINE_BREAK_CL => LB_PROHIBITED,
686                                UC_LINE_BREAK_QU => LB_INDIRECT,
687                                UC_LINE_BREAK_GL => LB_INDIRECT,
688                                UC_LINE_BREAK_NS => LB_INDIRECT,
689                                UC_LINE_BREAK_EX => LB_PROHIBITED,
690                                UC_LINE_BREAK_SY => LB_PROHIBITED,
691                                UC_LINE_BREAK_IS => LB_PROHIBITED,
692                                UC_LINE_BREAK_PR => LB_DIRECT,
693                                UC_LINE_BREAK_PO => LB_INDIRECT,
694                                UC_LINE_BREAK_NU => LB_DIRECT,
695                                UC_LINE_BREAK_AL => LB_DIRECT,
696                                UC_LINE_BREAK_ID => LB_DIRECT,
697                                UC_LINE_BREAK_IN => LB_INDIRECT,
698                                UC_LINE_BREAK_HY => LB_INDIRECT,
699                                UC_LINE_BREAK_BA => LB_INDIRECT,
700                                UC_LINE_BREAK_BB => LB_DIRECT,
701                                UC_LINE_BREAK_B2 => LB_DIRECT,
702                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
703                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
704                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
705                                UC_LINE_BREAK_H2 => LB_DIRECT,
706                                UC_LINE_BREAK_H3 => LB_DIRECT,
707                                UC_LINE_BREAK_JL => LB_DIRECT,
708                                UC_LINE_BREAK_JV => LB_INDIRECT,
709                                UC_LINE_BREAK_JT => LB_INDIRECT),
710      UC_LINE_BREAK_JT => array(UC_LINE_BREAK_OP => LB_DIRECT,
711                                UC_LINE_BREAK_CL => LB_PROHIBITED,
712                                UC_LINE_BREAK_QU => LB_INDIRECT,
713                                UC_LINE_BREAK_GL => LB_INDIRECT,
714                                UC_LINE_BREAK_NS => LB_INDIRECT,
715                                UC_LINE_BREAK_EX => LB_PROHIBITED,
716                                UC_LINE_BREAK_SY => LB_PROHIBITED,
717                                UC_LINE_BREAK_IS => LB_PROHIBITED,
718                                UC_LINE_BREAK_PR => LB_DIRECT,
719                                UC_LINE_BREAK_PO => LB_INDIRECT,
720                                UC_LINE_BREAK_NU => LB_DIRECT,
721                                UC_LINE_BREAK_AL => LB_DIRECT,
722                                UC_LINE_BREAK_ID => LB_DIRECT,
723                                UC_LINE_BREAK_IN => LB_INDIRECT,
724                                UC_LINE_BREAK_HY => LB_INDIRECT,
725                                UC_LINE_BREAK_BA => LB_INDIRECT,
726                                UC_LINE_BREAK_BB => LB_DIRECT,
727                                UC_LINE_BREAK_B2 => LB_DIRECT,
728                                UC_LINE_BREAK_ZW => LB_PROHIBITED,
729                                UC_LINE_BREAK_CM => LB_INDIRECT_CM,
730                                UC_LINE_BREAK_WJ => LB_PROHIBITED,
731                                UC_LINE_BREAK_H2 => LB_DIRECT,
732                                UC_LINE_BREAK_H3 => LB_DIRECT,
733                                UC_LINE_BREAK_JL => LB_DIRECT,
734                                UC_LINE_BREAK_JV => LB_DIRECT,
735                                UC_LINE_BREAK_JT => LB_INDIRECT));
736
737/**
738 * See CSS 2.1 16.6.1 The 'white-space' processing model
739 */
740class InlineContentBuilder {
741  function InlineContentBuilder() {
742  }
743
744  function add_line_break(&$box, &$pipeline) {
745    $break_box =& new BRBox();
746    $break_box->readCSS($pipeline->get_current_css_state());
747    $box->add_child($break_box);
748  }
749
750  function build(&$box, $text, &$pipeline) {
751    error_no_method('build', get_class($this));
752  }
753
754  function break_into_lines($content) {
755    return preg_split('/[\r\n]/u', $content);
756  }
757
758  function break_into_words($content) {
759    $content = trim($content);
760    if ($content == '') {
761      return array();
762    };
763
764    // Extract Unicode characters from the raw content data
765    $ptr = 0;
766    $utf8_chars = array();
767    $ucs2_chars = array();
768    $size = strlen($content);
769    while ($ptr < $size) {
770      $utf8_char = ManagerEncoding::get_next_utf8_char($content, $ptr);
771      $utf8_chars[] = $utf8_char;
772      $ucs2_chars[] = utf8_to_code($utf8_char);
773    };
774
775    // Get unicode line breaking classes
776    $classes = array_map(array($this, 'get_line_break_class'), $ucs2_chars);
777    $this->find_line_break($classes, $breaks, count($classes));
778
779    // Make words array
780    $words = array();
781    $word = '';
782    for ($i = 0, $size = count($breaks); $i < $size; $i++) {
783      $word .= $utf8_chars[$i];
784
785      $break = $breaks[$i];
786      if ($break == LB_INDIRECT ||
787          $break == LB_INDIRECT_CM ||
788          $break == LB_DIRECT ||
789          $break == LB_EXPLICIT) {
790        $words[] = trim($word);
791        $word = '';
792      };
793    };
794
795    return $words;
796  }
797
798  function find_complex_break($current_class, $classes, &$breaks, $offset, $length) {
799    if ($offset >= $length) {
800      return 0;
801    };
802
803    for ($i = $offset; $i < $length; $i++) {
804      // TODO
805      $breaks[$i - 1] = LB_PROHIBITED;
806      if ($classes[$i] != UC_LINE_BREAK_SA) {
807        break;
808      };
809    };
810
811    return $i;
812  }
813
814  function find_line_break($classes, &$breaks, $length) {
815    if (!$length) {
816      return 0;
817    };
818
819    $class = $classes[0]; // class of 'before' character
820
821    if ($class == UC_LINE_BREAK_LF ||
822        $class == UC_LINE_BREAK_NL) {
823      $class = UL_LINE_BREAK_BK;
824    }
825
826    // loop over all pairs in the string up to a hard break
827    for ($i = 1; ($i < $length) && ($class != UC_LINE_BREAK_BK); $i++) {
828      // handle explicit breaks here
829      // handle BK, NL and LF explicitly
830      if ($classes[$i] == UC_LINE_BREAK_BK ||
831          $classes[$i] == UC_LINE_BREAK_NL ||
832          $classes[$i] == UC_LINE_BREAK_LF) {
833        $breaks[$i-1] = LB_PROHIBITED;
834        $class = UC_LINE_BREAK_BK;
835        continue;
836      }
837
838      // handle CR explicitly
839      if ($classes[$i] == UC_LINE_BREAK_CR) {
840        $breaks[$i-1] = LB_PROHIBITED;
841        $class = UC_LINE_BREAK_CR;
842        continue;
843      }
844
845      // handle spaces explicitly
846      if ($classes[$i] == UC_LINE_BREAK_SP) {
847        $breaks[$i-1] = LB_PROHIBITED;
848        continue;
849      };
850
851      // handle complex scripts in a separate function
852      if ($classes[$i] == UC_LINE_BREAK_SA) {
853        $i += $this->find_complex_break($class, $classes, $breaks, $i, $length);
854
855        if ($i < $length) {
856          $class = $classes[$i];
857          continue;
858        };
859      };
860
861      // lookup pair table information
862      $current_class = $classes[$i];
863
864      $break = $GLOBALS['_g_line_break_class_table'][$class][$current_class];
865      $breaks[$i - 1] = $break;
866
867      if ($break == LB_INDIRECT) {
868        if ($classes[$i - 1] == UC_LINE_BREAK_SP) {
869          $breaks[$i - 1] = LB_INDIRECT;
870        } else {
871          $breaks[$i - 1] = LB_PROHIBITED;
872        };
873
874      // handle breaks involving a combining mark
875      } elseif ($break == LB_INDIRECT_CM) {
876        $breaks[$i - 1]= LB_PROHIBITED;
877
878        if ($classes[$i - 1] == UC_LINE_BREAK_SP) {
879          $breaks[$i - 1] = LB_INDIRECT_CM;
880        } else {
881          continue; // do not update cls
882        };
883      } elseif ($break == LB_PROHIBITED_CM) {
884        $breaks[$i - 1] = LB_PROHIBITED_CM;
885
886        if ($classes[$i - 1] != UC_LINE_BREAK_SP) {
887          continue;
888        };
889      };
890
891      // save cls of 'before' character (unless bypassed by 'continue')
892      $class = $classes[$i];
893    };
894
895    $breaks[$i-1] = LB_EXPLICIT;
896
897    return $i;
898  }
899
900  function is_break_allowed($previous_class, $current_class) {
901    return true;
902  }
903
904  function get_line_break_class($ucs2_char) {
905    static $class_cache = array();
906
907    if (!isset($class_cache[$ucs2_char])) {
908      $table_handle = $this->get_line_break_class_table_handle();
909      fseek($table_handle, $ucs2_char /* as integer */ , SEEK_SET);
910      $class_cache[$ucs2_char] = ord(fread($table_handle, 1));
911    };
912
913    // Apply rule LB1 from the Unicode algorithm:
914    //
915    // Assign  a  line  breaking  class  to each  code  point  of  the
916    // input. Resolve AI, CB, SA,  SG, and XX into other line breaking
917    // classes  depending  on  criteria  outside  the  scope  of  this
918    // algorithm.
919    //
920    // In the absence of such criteria, it is recommended that classes
921    // AI, SA, SG, and XX be resolved to AL, except that characters of
922    // class SA that have General_Category  Mn or Mc be resolved to CM
923    // (see SA). Unresolved class CB is handled in rule LB20.
924
925    // Resolve AI, SA, SG, and XX to AL
926    if (in_array($class_cache[$ucs2_char],
927                 array(UC_LINE_BREAK_AI,
928                       UC_LINE_BREAK_SA,
929                       UC_LINE_BREAK_SG,
930                       UC_LINE_BREAK_XX))) {
931      return UC_LINE_BREAK_AL;
932    };
933
934    return $class_cache[$ucs2_char];
935  }
936
937  function get_line_break_class_table_handle() {
938    static $table_handle = null;
939
940    if (is_null($table_handle)) {
941      $filename = CACHE_DIR.'unicode.lb.classes.dat';
942      if (!file_exists($filename)) {
943        $this->generate_line_break_class_table($filename);
944      };
945
946      $table_handle = fopen($filename, 'rb');
947      flock($table_handle, LOCK_SH);
948    };
949
950    return $table_handle;
951  }
952
953  function generate_line_break_class_table($output_filename) {
954    $class_codes = array('BK' => 1,
955                         'CR' => 2,
956                         'LF' => 3,
957                         'CM' => 4,
958                         'NL' => 5,
959                         'SG' => 6,
960                         'WJ' => 7,
961                         'ZW' => 8,
962                         'GL' => 9,
963                         'SP' => 10,
964                         'B2' => 11,
965                         'BA' => 12,
966                         'BB' => 13,
967                         'HY' => 14,
968                         'CB' => 15,
969                         'CL' => 16,
970                         'EX' => 17,
971                         'IN' => 18,
972                         'NS' => 19,
973                         'OP' => 20,
974                         'QU' => 21,
975                         'IS' => 22,
976                         'NU' => 23,
977                         'PO' => 24,
978                         'PR' => 25,
979                         'SY' => 26,
980                         'AI' => 27,
981                         'AL' => 28,
982                         'H2' => 29,
983                         'H3' => 30,
984                         'ID' => 31,
985                         'JL' => 32,
986                         'JV' => 33,
987                         'JT' => 34,
988                         'SA' => 35,
989                         'XX' => 36);
990
991    $output_handle = fopen($output_filename, 'wb');
992    flock($output_handle, LOCK_EX);
993
994    $input_handle = fopen(HTML2PS_DIR.'/data/LineBreak.txt', 'r');
995    $last_position = 0;
996    while ($line = fgets($input_handle)) {
997      $line = trim($line);
998
999      if (strlen($line) == 0 || $line[0] == '#') {
1000        continue;
1001      };
1002
1003      if (preg_match('/^([0-9a-f]+);(\w\w) #/i', $line, $matches)) {
1004        $unicode_position = hexdec($matches[1]);
1005        $class = $matches[2];
1006
1007        if ($unicode_position > $last_position + 1) {
1008          fwrite($output_handle, str_repeat(chr(0), $unicode_position - $last_position - 1));
1009        };
1010
1011        fwrite($output_handle, chr($class_codes[$class]));
1012
1013        $last_position = $unicode_position;
1014      } elseif (preg_match('/^([0-9a-f]+)\.\.([0-9a-f]+);(\w\w) #/i', $line, $matches)) {
1015        $unicode_start_position = hexdec($matches[1]);
1016        $unicode_end_position = hexdec($matches[2]);
1017        $class = $matches[3];
1018
1019        if ($unicode_start_position > $last_position + 1) {
1020          fwrite($output_handle, str_repeat(chr(0), $unicode_start_position - $last_position - 1));
1021        };
1022
1023        fwrite($output_handle, str_repeat(chr($class_codes[$class]), $unicode_end_position - $unicode_start_position + 1));
1024
1025        $last_position = $unicode_end_position;
1026      } else {
1027        var_dump($line); die();
1028      }
1029    };
1030
1031    fclose($input_handle);
1032
1033    flock($output_handle, LOCK_UN);
1034    fclose($output_handle);
1035  }
1036
1037  function collapse_whitespace($content) {
1038    return preg_replace('/[\r\n\t ]+/u', ' ', $content);
1039  }
1040
1041  function remove_leading_linefeeds($content) {
1042    return preg_replace('/^ *[\r\n]+/u', '', $content);
1043  }
1044
1045  function remove_trailing_linefeeds($content) {
1046    return preg_replace('/[\r\n]+$/u', '', $content);
1047  }
1048}
1049
1050?>