1<?php 2 3declare(strict_types = 1); 4 5namespace LanguageDetection; 6 7/** 8 * Class Language 9 * 10 * @copyright Patrick Schur 11 * @license https://opensource.org/licenses/mit-license.html MIT 12 * @author Patrick Schur <patrick_schur@outlook.de> 13 * @package LanguageDetection 14 */ 15class Language extends NgramParser 16{ 17 /** 18 * @var array<string, array<string, int>> 19 */ 20 protected $tokens = []; 21 22 /** 23 * Loads all language files 24 * 25 * @param array $lang List of ISO 639-1 codes, that should be used in the detection phase 26 * @param string $dirname Name of the directory where the translations files are located 27 */ 28 public function __construct(array $lang = [], string $dirname = '') 29 { 30 if (empty($dirname)) 31 { 32 $dirname = __DIR__ . '/../../resources/*/*.php'; 33 } 34 else if (!\is_dir($dirname) || !\is_readable($dirname)) 35 { 36 throw new \InvalidArgumentException('Provided directory could not be found or is not readable'); 37 } 38 else 39 { 40 $dirname = \rtrim($dirname, '/'); 41 $dirname .= '/*/*.php'; 42 } 43 44 $isEmpty = empty($lang); 45 $tokens = []; 46 47 foreach (\glob($dirname) as $file) 48 { 49 if ($isEmpty || \in_array(\basename($file, '.php'), $lang)) 50 { 51 $tokens += require $file; 52 } 53 } 54 55 foreach ($tokens as $lang => $value) { 56 $this->tokens[$lang] = \array_flip($value); 57 } 58 } 59 60 /** 61 * Detects the language from a given text string 62 * 63 * @param string $str 64 * @return LanguageResult 65 */ 66 public function detect(string $str): LanguageResult 67 { 68 $str = \mb_strtolower($str); 69 70 $samples = $this->getNgrams($str); 71 72 $result = []; 73 74 if (\count($samples) > 0) 75 { 76 foreach ($this->tokens as $lang => $value) 77 { 78 $index = $sum = 0; 79 80 foreach ($samples as $v) 81 { 82 if (isset($value[$v])) 83 { 84 $x = $index++ - $value[$v]; 85 $y = $x >> (PHP_INT_SIZE * 8); 86 $sum += ($x + $y) ^ $y; 87 continue; 88 } 89 90 $sum += $this->maxNgrams; 91 ++$index; 92 } 93 94 $result[$lang] = 1 - ($sum / ($this->maxNgrams * $index)); 95 } 96 97 \arsort($result, SORT_NUMERIC); 98 } 99 100 return new LanguageResult($result); 101 } 102} 103