1<?php 2 3namespace Mpdf; 4 5class Hyphenator 6{ 7 8 /** 9 * @var \Mpdf\Mpdf 10 */ 11 private $mpdf; 12 13 private $patterns; 14 15 private $dictionary; 16 17 private $words; 18 19 private $loadedPatterns; 20 21 /** 22 * @var bool 23 */ 24 private $dictionaryLoaded; 25 26 public function __construct(Mpdf $mpdf) 27 { 28 $this->mpdf = $mpdf; 29 30 $this->dictionaryLoaded = false; 31 32 $this->patterns = []; 33 $this->dictionary = []; 34 $this->words = []; 35 } 36 37 /** 38 * @param string $word 39 * @param int $currptr 40 * 41 * @return int 42 */ 43 public function hyphenateWord($word, $currptr) 44 { 45 // Do everything inside this function in utf-8 46 // Don't hyphenate web addresses 47 if (preg_match('/^(http:|www\.)/', $word)) { 48 return -1; 49 } 50 51 $ptr = -1; 52 53 if (!$this->dictionaryLoaded) { 54 $this->loadDictionary(); 55 } 56 57 if (!in_array($this->mpdf->SHYlang, $this->mpdf->SHYlanguages)) { 58 return -1; 59 } 60 61 // If no pattern loaded or not the best one 62 if (!$this->patternsLoaded()) { 63 $this->loadPatterns(); 64 } 65 66 if ($this->mpdf->usingCoreFont) { 67 $word = mb_convert_encoding($word, 'UTF-8', $this->mpdf->mb_enc); 68 } 69 70 $prepre = ''; 71 $postpost = ''; 72 $startpunctuation = "\xc2\xab\xc2\xbf\xe2\x80\x98\xe2\x80\x9b\xe2\x80\x9c\xe2\x80\x9f"; 73 $endpunctuation = "\xe2\x80\x9e\xe2\x80\x9d\xe2\x80\x9a\xe2\x80\x99\xc2\xbb"; 74 75 if (preg_match('/^(["\'' . $startpunctuation . '])+(.{' . $this->mpdf->SHYcharmin . ',})$/u', $word, $m)) { 76 $prepre = $m[1]; 77 $word = $m[2]; 78 } 79 80 if (preg_match('/^(.{' . $this->mpdf->SHYcharmin . ',})([\'\.,;:!?"' . $endpunctuation . ']+)$/u', $word, $m)) { 81 $word = $m[1]; 82 $postpost = $m[2]; 83 } 84 85 if (mb_strlen($word, 'UTF-8') < $this->mpdf->SHYcharmin) { 86 return -1; 87 } 88 89 $success = false; 90 $preprelen = mb_strlen($prepre); 91 92 if (isset($this->words[mb_strtolower($word)])) { 93 foreach ($this->words[mb_strtolower($word)] as $i) { 94 if (($i + $preprelen) >= $currptr) { 95 break; 96 } 97 98 $ptr = $i + $preprelen; 99 $success = true; 100 } 101 } 102 103 if (!$success) { 104 $text_word = '_' . $word . '_'; 105 $word_length = mb_strlen($text_word, 'UTF-8'); 106 $text_word = mb_strtolower($text_word, 'UTF-8'); 107 $hyphenated_word = []; 108 109 $numbers = [ 110 '0' => true, 111 '1' => true, 112 '2' => true, 113 '3' => true, 114 '4' => true, 115 '5' => true, 116 '6' => true, 117 '7' => true, 118 '8' => true, 119 '9' => true 120 ]; 121 122 for ($position = 0; $position <= ($word_length - $this->mpdf->SHYcharmin); $position++) { 123 $maxwins = min($word_length - $position, $this->mpdf->SHYcharmax); 124 for ($win = $this->mpdf->SHYcharmin; $win <= $maxwins; $win++) { 125 if (isset($this->patterns[mb_substr($text_word, $position, $win, 'UTF-8')])) { 126 $pattern = $this->patterns[mb_substr($text_word, $position, $win, 'UTF-8')]; 127 $digits = 1; 128 $pattern_length = mb_strlen($pattern, 'UTF-8'); 129 130 for ($i = 0; $i < $pattern_length; $i++) { 131 $char = $pattern[$i]; 132 if (isset($numbers[$char])) { 133 $zero = $i === 0 ? $position - 1 : $position + $i - $digits; 134 if (!isset($hyphenated_word[$zero]) || $hyphenated_word[$zero] !== $char) { 135 $hyphenated_word[$zero] = $char; 136 } 137 $digits++; 138 } 139 } 140 } 141 } 142 } 143 144 for ($i = $this->mpdf->SHYleftmin; $i <= (mb_strlen($word, 'UTF-8') - $this->mpdf->SHYrightmin); $i++) { 145 if (isset($hyphenated_word[$i]) && $hyphenated_word[$i] % 2 !== 0) { 146 if (($i + $preprelen) > $currptr) { 147 break; 148 } 149 $ptr = $i + $preprelen; 150 } 151 } 152 } 153 154 return $ptr; 155 } 156 157 private function patternsLoaded() 158 { 159 return !(count($this->patterns) < 1 || ($this->loadedPatterns && $this->loadedPatterns !== $this->mpdf->SHYlang)); 160 } 161 162 private function loadPatterns() 163 { 164 $patterns = require __DIR__ . '/../data/patterns/' . $this->mpdf->SHYlang . '.php'; 165 $patterns = explode(' ', $patterns); 166 167 $new_patterns = []; 168 $patternCount = count($patterns); 169 for ($i = 0; $i < $patternCount; $i++) { 170 $value = $patterns[$i]; 171 $new_patterns[preg_replace('/[0-9]/', '', $value)] = $value; 172 } 173 174 $this->patterns = $new_patterns; 175 $this->loadedPatterns = $this->mpdf->SHYlang; 176 } 177 178 private function loadDictionary() 179 { 180 if (file_exists($this->mpdf->hyphenationDictionaryFile)) { 181 $this->dictionary = file($this->mpdf->hyphenationDictionaryFile, FILE_SKIP_EMPTY_LINES); 182 foreach ($this->dictionary as $entry) { 183 $entry = trim($entry); 184 $poss = []; 185 $offset = 0; 186 $p = true; 187 $wl = mb_strlen($entry, 'UTF-8'); 188 while ($offset < $wl) { 189 $p = mb_strpos($entry, '/', $offset, 'UTF-8'); 190 if ($p !== false) { 191 $poss[] = $p - count($poss); 192 } else { 193 break; 194 } 195 $offset = $p + 1; 196 } 197 if (count($poss)) { 198 $this->words[str_replace('/', '', mb_strtolower($entry))] = $poss; 199 } 200 } 201 } elseif ($this->mpdf->debug) { 202 throw new \Mpdf\MpdfException(sprintf('Unable to open hyphenation dictionary "%s"', $this->mpdf->hyphenationDictionaryFile)); 203 } 204 205 $this->dictionaryLoaded = true; 206 } 207} 208