1<?php
2
3declare(strict_types = 1);
4
5namespace LanguageDetection\Tokenizer;
6
7/**
8 * Class WhitespaceTokenizer
9 *
10 * @copyright Patrick Schur
11 * @license https://opensource.org/licenses/mit-license.html MIT
12 * @author Patrick Schur <patrick_schur@outlook.de>
13 * @package LanguageDetection
14 */
15class WhitespaceTokenizer implements TokenizerInterface
16{
17    /**
18     * @param string $str
19     * @return array
20     */
21    public function tokenize(string $str): array
22    {
23        return \array_map(function ($word) {
24                return "_{$word}_";
25            },
26            \preg_split('/[^\pL]+(?<![\x27\x60\x{2019}])/u', $str, -1, PREG_SPLIT_NO_EMPTY)
27        );
28    }
29}