1<?php
2
3namespace Mpdf;
4
5class Hyphenator
6{
7
8	/**
9	 * @var \Mpdf\Mpdf
10	 */
11	private $mpdf;
12
13	private $patterns;
14
15	private $dictionary;
16
17	private $words;
18
19	private $loadedPatterns;
20
21	/**
22	 * @var bool
23	 */
24	private $dictionaryLoaded;
25
26	public function __construct(Mpdf $mpdf)
27	{
28		$this->mpdf = $mpdf;
29
30		$this->dictionaryLoaded = false;
31
32		$this->patterns = [];
33		$this->dictionary = [];
34		$this->words = [];
35	}
36
37	/**
38	 * @param string $word
39	 * @param int $currptr
40	 *
41	 * @return int
42	 */
43	public function hyphenateWord($word, $currptr)
44	{
45		// Do everything inside this function in utf-8
46		// Don't hyphenate web addresses
47		if (preg_match('/^(http:|www\.)/', $word)) {
48			return -1;
49		}
50
51		$ptr = -1;
52
53		if (!$this->dictionaryLoaded) {
54			$this->loadDictionary();
55		}
56
57		if (!in_array($this->mpdf->SHYlang, $this->mpdf->SHYlanguages)) {
58			return -1;
59		}
60
61		// If no pattern loaded or not the best one
62		if (!$this->patternsLoaded()) {
63			$this->loadPatterns();
64		}
65
66		if ($this->mpdf->usingCoreFont) {
67			$word = mb_convert_encoding($word, 'UTF-8', $this->mpdf->mb_enc);
68		}
69
70		$prepre = '';
71		$postpost = '';
72		$startpunctuation = "\xc2\xab\xc2\xbf\xe2\x80\x98\xe2\x80\x9b\xe2\x80\x9c\xe2\x80\x9f";
73		$endpunctuation = "\xe2\x80\x9e\xe2\x80\x9d\xe2\x80\x9a\xe2\x80\x99\xc2\xbb";
74
75		if (preg_match('/^(["\'' . $startpunctuation . '])+(.{' . $this->mpdf->SHYcharmin . ',})$/u', $word, $m)) {
76			$prepre = $m[1];
77			$word = $m[2];
78		}
79
80		if (preg_match('/^(.{' . $this->mpdf->SHYcharmin . ',})([\'\.,;:!?"' . $endpunctuation . ']+)$/u', $word, $m)) {
81			$word = $m[1];
82			$postpost = $m[2];
83		}
84
85		if (mb_strlen($word, 'UTF-8') < $this->mpdf->SHYcharmin) {
86			return -1;
87		}
88
89		$success = false;
90		$preprelen = mb_strlen($prepre);
91
92		if (isset($this->words[mb_strtolower($word)])) {
93			foreach ($this->words[mb_strtolower($word)] as $i) {
94				if (($i + $preprelen) >= $currptr) {
95					break;
96				}
97
98				$ptr = $i + $preprelen;
99				$success = true;
100			}
101		}
102
103		if (!$success) {
104			$text_word = '_' . $word . '_';
105			$word_length = mb_strlen($text_word, 'UTF-8');
106			$text_word = mb_strtolower($text_word, 'UTF-8');
107			$hyphenated_word = [];
108
109			$numbers = [
110				'0' => true,
111				'1' => true,
112				'2' => true,
113				'3' => true,
114				'4' => true,
115				'5' => true,
116				'6' => true,
117				'7' => true,
118				'8' => true,
119				'9' => true
120			];
121
122			for ($position = 0; $position <= ($word_length - $this->mpdf->SHYcharmin); $position++) {
123				$maxwins = min($word_length - $position, $this->mpdf->SHYcharmax);
124				for ($win = $this->mpdf->SHYcharmin; $win <= $maxwins; $win++) {
125					if (isset($this->patterns[mb_substr($text_word, $position, $win, 'UTF-8')])) {
126						$pattern = $this->patterns[mb_substr($text_word, $position, $win, 'UTF-8')];
127						$digits = 1;
128						$pattern_length = mb_strlen($pattern, 'UTF-8');
129
130						for ($i = 0; $i < $pattern_length; $i++) {
131							$char = $pattern[$i];
132							if (isset($numbers[$char])) {
133								$zero = $i === 0 ? $position - 1 : $position + $i - $digits;
134								if (!isset($hyphenated_word[$zero]) || $hyphenated_word[$zero] !== $char) {
135									$hyphenated_word[$zero] = $char;
136								}
137								$digits++;
138							}
139						}
140					}
141				}
142			}
143
144			for ($i = $this->mpdf->SHYleftmin; $i <= (mb_strlen($word, 'UTF-8') - $this->mpdf->SHYrightmin); $i++) {
145				if (isset($hyphenated_word[$i]) && $hyphenated_word[$i] % 2 !== 0) {
146					if (($i + $preprelen) > $currptr) {
147						break;
148					}
149					$ptr = $i + $preprelen;
150				}
151			}
152		}
153
154		return $ptr;
155	}
156
157	private function patternsLoaded()
158	{
159		return !(count($this->patterns) < 1 || ($this->loadedPatterns && $this->loadedPatterns !== $this->mpdf->SHYlang));
160	}
161
162	private function loadPatterns()
163	{
164		$patterns = require __DIR__ . '/../data/patterns/' . $this->mpdf->SHYlang . '.php';
165		$patterns = explode(' ', $patterns);
166
167		$new_patterns = [];
168		$patternCount = count($patterns);
169		for ($i = 0; $i < $patternCount; $i++) {
170			$value = $patterns[$i];
171			$new_patterns[preg_replace('/[0-9]/', '', $value)] = $value;
172		}
173
174		$this->patterns = $new_patterns;
175		$this->loadedPatterns = $this->mpdf->SHYlang;
176	}
177
178	private function loadDictionary()
179	{
180		if (file_exists($this->mpdf->hyphenationDictionaryFile)) {
181			$this->dictionary = file($this->mpdf->hyphenationDictionaryFile, FILE_SKIP_EMPTY_LINES);
182			foreach ($this->dictionary as $entry) {
183				$entry = trim($entry);
184				$poss = [];
185				$offset = 0;
186				$p = true;
187				$wl = mb_strlen($entry, 'UTF-8');
188				while ($offset < $wl) {
189					$p = mb_strpos($entry, '/', $offset, 'UTF-8');
190					if ($p !== false) {
191						$poss[] = $p - count($poss);
192					} else {
193						break;
194					}
195					$offset = $p + 1;
196				}
197				if (count($poss)) {
198					$this->words[str_replace('/', '', mb_strtolower($entry))] = $poss;
199				}
200			}
201		} elseif ($this->mpdf->debug) {
202			throw new \Mpdf\MpdfException(sprintf('Unable to open hyphenation dictionary "%s"', $this->mpdf->hyphenationDictionaryFile));
203		}
204
205		$this->dictionaryLoaded = true;
206	}
207}
208