1<?php 2 3declare(strict_types = 1); 4 5namespace LanguageDetection\Tokenizer; 6 7/** 8 * Class WhitespaceTokenizer 9 * 10 * @copyright Patrick Schur 11 * @license https://opensource.org/licenses/mit-license.html MIT 12 * @author Patrick Schur <patrick_schur@outlook.de> 13 * @package LanguageDetection 14 */ 15class WhitespaceTokenizer implements TokenizerInterface 16{ 17 /** 18 * @param string $str 19 * @return array 20 */ 21 public function tokenize(string $str): array 22 { 23 return \array_map(function ($word) { 24 return "_{$word}_"; 25 }, 26 \preg_split('/[^\pL]+(?<![\x27\x60\x{2019}])/u', $str, -1, PREG_SPLIT_NO_EMPTY) 27 ); 28 } 29}