xref: /plugin/struct/meta/CSVImporter.php (revision 47a35992e0ce1be4688eb244a608bd5d16351626)
1a0b3799eSAndreas Gohr<?php
2a0b3799eSAndreas Gohr
3a0b3799eSAndreas Gohrnamespace dokuwiki\plugin\struct\meta;
4a0b3799eSAndreas Gohr
54e4edb41SAnna Dabrowskause dokuwiki\plugin\struct\types\Page;
64e4edb41SAnna Dabrowska
7a0b3799eSAndreas Gohr/**
8f36cc634SAndreas Gohr * Class CSVImporter
9a0b3799eSAndreas Gohr *
10f36cc634SAndreas Gohr * Imports CSV data into a lookup schema
11a0b3799eSAndreas Gohr *
12a0b3799eSAndreas Gohr * @package dokuwiki\plugin\struct\meta
13a0b3799eSAndreas Gohr */
144e4edb41SAnna Dabrowskaclass CSVImporter
15d6d97f60SAnna Dabrowska{
16a0b3799eSAndreas Gohr
17a0b3799eSAndreas Gohr    /** @var  Schema */
18a0b3799eSAndreas Gohr    protected $schema;
19a0b3799eSAndreas Gohr
20a0b3799eSAndreas Gohr    /** @var  resource */
21a0b3799eSAndreas Gohr    protected $fh;
22a0b3799eSAndreas Gohr
23a0b3799eSAndreas Gohr    /** @var  \helper_plugin_sqlite */
24a0b3799eSAndreas Gohr    protected $sqlite;
25a0b3799eSAndreas Gohr
26a0b3799eSAndreas Gohr    /** @var Column[] The single values to store index => col */
27a0b3799eSAndreas Gohr    protected $columns = array();
28a0b3799eSAndreas Gohr
29a0b3799eSAndreas Gohr    /** @var int current line number */
30a0b3799eSAndreas Gohr    protected $line = 0;
31a0b3799eSAndreas Gohr
324e4edb41SAnna Dabrowska    /** @var array list of headers */
331fc2361fSSzymon Olewniczak    protected $header;
341fc2361fSSzymon Olewniczak
351fc2361fSSzymon Olewniczak    /** @var  array list of validation errors */
361fc2361fSSzymon Olewniczak    protected $errors;
371fc2361fSSzymon Olewniczak
38a0b3799eSAndreas Gohr    /**
394e4edb41SAnna Dabrowska     * @var string data type, must be one of page, lookup, serial
404e4edb41SAnna Dabrowska     */
414e4edb41SAnna Dabrowska    protected $type;
424e4edb41SAnna Dabrowska
434e4edb41SAnna Dabrowska    /**
44a0b3799eSAndreas Gohr     * CSVImporter constructor.
45a0b3799eSAndreas Gohr     *
46a0b3799eSAndreas Gohr     * @param string $table
47a0b3799eSAndreas Gohr     * @param string $file
484e4edb41SAnna Dabrowska     * @param string $type
49a0b3799eSAndreas Gohr     */
504e4edb41SAnna Dabrowska    public function __construct($table, $file, $type)
51d6d97f60SAnna Dabrowska    {
524e4edb41SAnna Dabrowska        $this->type = $type;
5317dbef8aSMichael Große        $this->openFile($file);
54a0b3799eSAndreas Gohr
55a0b3799eSAndreas Gohr        $this->schema = new Schema($table);
56a0b3799eSAndreas Gohr        if (!$this->schema->getId()) throw new StructException('Schema does not exist');
57a0b3799eSAndreas Gohr
58a0b3799eSAndreas Gohr        /** @var \helper_plugin_struct_db $db */
59a0b3799eSAndreas Gohr        $db = plugin_load('helper', 'struct_db');
60a0b3799eSAndreas Gohr        $this->sqlite = $db->getDB(true);
611fc2361fSSzymon Olewniczak    }
62a0b3799eSAndreas Gohr
631fc2361fSSzymon Olewniczak    /**
641fc2361fSSzymon Olewniczak     * Import the data from file.
651fc2361fSSzymon Olewniczak     *
661fc2361fSSzymon Olewniczak     * @throws StructException
671fc2361fSSzymon Olewniczak     */
68d6d97f60SAnna Dabrowska    public function import()
69d6d97f60SAnna Dabrowska    {
70a0b3799eSAndreas Gohr        // Do the import
71a0b3799eSAndreas Gohr        $this->readHeaders();
72a0b3799eSAndreas Gohr        $this->importCSV();
73a0b3799eSAndreas Gohr    }
74a0b3799eSAndreas Gohr
75a0b3799eSAndreas Gohr    /**
7617dbef8aSMichael Große     * Open a given file path
7717dbef8aSMichael Große     *
7817dbef8aSMichael Große     * The main purpose of this method is to be overridden in a mock for testing
7917dbef8aSMichael Große     *
8017dbef8aSMichael Große     * @param string $file the file path
8117dbef8aSMichael Große     *
8217dbef8aSMichael Große     * @return void
8317dbef8aSMichael Große     */
8417dbef8aSMichael Große    protected function openFile($file)
8517dbef8aSMichael Große    {
8617dbef8aSMichael Große        $this->fh = fopen($file, 'rb');
8717dbef8aSMichael Große        if (!$this->fh) {
8817dbef8aSMichael Große            throw new StructException('Failed to open CSV file for reading');
8917dbef8aSMichael Große        }
9017dbef8aSMichael Große    }
9117dbef8aSMichael Große
9217dbef8aSMichael Große    /**
9317dbef8aSMichael Große     * Get a parsed line from the opened CSV file
9417dbef8aSMichael Große     *
9517dbef8aSMichael Große     * The main purpose of this method is to be overridden in a mock for testing
9617dbef8aSMichael Große     *
9717dbef8aSMichael Große     * @return array|false|null
9817dbef8aSMichael Große     */
9917dbef8aSMichael Große    protected function getLine()
10017dbef8aSMichael Große    {
10117dbef8aSMichael Große        return fgetcsv($this->fh);
10217dbef8aSMichael Große    }
10317dbef8aSMichael Große
10417dbef8aSMichael Große    /**
105a0b3799eSAndreas Gohr     * Read the CSV headers and match it with the Schema columns
106a0b3799eSAndreas Gohr     */
107d6d97f60SAnna Dabrowska    protected function readHeaders()
108d6d97f60SAnna Dabrowska    {
10917dbef8aSMichael Große        $header = $this->getLine();
110a0b3799eSAndreas Gohr        if (!$header) throw new StructException('Failed to read CSV');
111a0b3799eSAndreas Gohr        $this->line++;
112a0b3799eSAndreas Gohr
113*47a35992SAnna Dabrowska        // we might have to create a page column first
1144e4edb41SAnna Dabrowska        if ($this->type !== 'lookup') {
1154e4edb41SAnna Dabrowska            $pageType = new Page(null, 'pid');
1164e4edb41SAnna Dabrowska            $pidCol = new Column(0, $pageType, 0, true, $this->schema->getTable());
1174e4edb41SAnna Dabrowska            $this->columns[] = $pidCol;
1184e4edb41SAnna Dabrowska        }
1194e4edb41SAnna Dabrowska
120a0b3799eSAndreas Gohr        foreach ($header as $i => $head) {
121a0b3799eSAndreas Gohr            $col = $this->schema->findColumn($head);
122*47a35992SAnna Dabrowska            // just skip the checks for 'pid' but discard other columns not present in the schema
123*47a35992SAnna Dabrowska            if (!$col) {
124*47a35992SAnna Dabrowska                if ($head !== 'pid') {
125*47a35992SAnna Dabrowska                    unset($header[$i]);
126*47a35992SAnna Dabrowska                }
127*47a35992SAnna Dabrowska                continue;
128*47a35992SAnna Dabrowska            }
129a0b3799eSAndreas Gohr            if (!$col->isEnabled()) continue;
130a0b3799eSAndreas Gohr            $this->columns[$i] = $col;
131a0b3799eSAndreas Gohr        }
132a0b3799eSAndreas Gohr
133a0b3799eSAndreas Gohr        if (!$this->columns) {
134a0b3799eSAndreas Gohr            throw new StructException('None of the CSV headers matched any of the schema\'s fields');
135a0b3799eSAndreas Gohr        }
1361fc2361fSSzymon Olewniczak
1371fc2361fSSzymon Olewniczak        $this->header = $header;
138a0b3799eSAndreas Gohr    }
139a0b3799eSAndreas Gohr
140a0b3799eSAndreas Gohr    /**
141a0b3799eSAndreas Gohr     * Walks through the CSV and imports
142a0b3799eSAndreas Gohr     */
143d6d97f60SAnna Dabrowska    protected function importCSV()
144d6d97f60SAnna Dabrowska    {
14517dbef8aSMichael Große        while (($data = $this->getLine()) !== false) {
146a0b3799eSAndreas Gohr            $this->line++;
147*47a35992SAnna Dabrowska            $this->importLine($data);
148a0b3799eSAndreas Gohr        }
1490e489a46SSzymon Olewniczak    }
150a0b3799eSAndreas Gohr
151a0b3799eSAndreas Gohr    /**
1521fc2361fSSzymon Olewniczak     * The errors that occured during validation
153a0b3799eSAndreas Gohr     *
1541fc2361fSSzymon Olewniczak     * @return string[] already translated error messages
155a0b3799eSAndreas Gohr     */
156d6d97f60SAnna Dabrowska    public function getErrors()
157d6d97f60SAnna Dabrowska    {
1581fc2361fSSzymon Olewniczak        return $this->errors;
1591fc2361fSSzymon Olewniczak    }
1601fc2361fSSzymon Olewniczak
1611fc2361fSSzymon Olewniczak    /**
1621fc2361fSSzymon Olewniczak     * Validate a single value
1631fc2361fSSzymon Olewniczak     *
1641fc2361fSSzymon Olewniczak     * @param Column $col the column of that value
1651fc2361fSSzymon Olewniczak     * @param mixed &$rawvalue the value, will be fixed according to the type
1661fc2361fSSzymon Olewniczak     * @return bool true if the data validates, otherwise false
1671fc2361fSSzymon Olewniczak     */
168d6d97f60SAnna Dabrowska    protected function validateValue(Column $col, &$rawvalue)
169d6d97f60SAnna Dabrowska    {
1701fc2361fSSzymon Olewniczak        //by default no validation
1711fc2361fSSzymon Olewniczak        return true;
1721fc2361fSSzymon Olewniczak    }
1731fc2361fSSzymon Olewniczak
1741fc2361fSSzymon Olewniczak    /**
1751fc2361fSSzymon Olewniczak     * Read and validate CSV parsed line
1761fc2361fSSzymon Olewniczak     *
177*47a35992SAnna Dabrowska     * @param $line
178*47a35992SAnna Dabrowska     * @return array|bool
1791fc2361fSSzymon Olewniczak     */
180*47a35992SAnna Dabrowska    protected function readLine($line)
181d6d97f60SAnna Dabrowska    {
182a0b3799eSAndreas Gohr        // prepare values for single value table
183a0b3799eSAndreas Gohr        $values = array();
184a0b3799eSAndreas Gohr        foreach ($this->columns as $i => $column) {
185a0b3799eSAndreas Gohr            if (!isset($line[$i])) throw new StructException('Missing field at CSV line %d', $this->line);
186a0b3799eSAndreas Gohr
1871fc2361fSSzymon Olewniczak            if (!$this->validateValue($column, $line[$i])) return false;
1881fc2361fSSzymon Olewniczak
189a0b3799eSAndreas Gohr            if ($column->isMulti()) {
190*47a35992SAnna Dabrowska                // multi values get split on comma, but JSON values contain commas too, hence preg_split
191*47a35992SAnna Dabrowska                if ($line[$i][0] === '[') {
192*47a35992SAnna Dabrowska                    $line[$i] = preg_split('/,(?=\[)/', $line[$i]);
193a0b3799eSAndreas Gohr                } else {
194*47a35992SAnna Dabrowska                    $line[$i] = array_map('trim', explode(',', $line[$i]));
195a0b3799eSAndreas Gohr                }
196a0b3799eSAndreas Gohr            }
197*47a35992SAnna Dabrowska            // data access will handle multivalues, no need to manipulate them here
198*47a35992SAnna Dabrowska            $values[] = $line[$i];
199*47a35992SAnna Dabrowska        }
2001fc2361fSSzymon Olewniczak        //if no ok don't import
2011fc2361fSSzymon Olewniczak        return $values;
2021fc2361fSSzymon Olewniczak    }
203a0b3799eSAndreas Gohr
2041fc2361fSSzymon Olewniczak    /**
20569c07c1bSSzymon Olewniczak     * Save one CSV line into database
20669c07c1bSSzymon Olewniczak     *
2071fc2361fSSzymon Olewniczak     * @param string[] $values parsed line values
2081fc2361fSSzymon Olewniczak     */
209*47a35992SAnna Dabrowska    protected function saveLine($values)
210d6d97f60SAnna Dabrowska    {
211*47a35992SAnna Dabrowska        $data = array_combine($this->header, $values);
212*47a35992SAnna Dabrowska        // pid is a non-data column and must be supplied to the AccessTable separately
213*47a35992SAnna Dabrowska        $pid = isset($data['pid']) ? $data['pid'] : '';
214*47a35992SAnna Dabrowska        unset($data['pid']);
215*47a35992SAnna Dabrowska        $table = $this->schema->getTable();
216*47a35992SAnna Dabrowska        // page data accessor requires a timestamp of a revision
217*47a35992SAnna Dabrowska        $ts = ($this->type === 'page') ? time() : 0;
218*47a35992SAnna Dabrowska        $access = AccessTable::byTableName($table, $pid, $ts);
2191fc2361fSSzymon Olewniczak
220*47a35992SAnna Dabrowska        /** @var 'helper_plugin_struct $helper */
221*47a35992SAnna Dabrowska        $helper = plugin_load('helper', 'struct');
222*47a35992SAnna Dabrowska        if ($this->type === 'page') {
223*47a35992SAnna Dabrowska            $helper->saveData($pid, [$table => $data], 'CSV data imported');
224*47a35992SAnna Dabrowska        } else {
225*47a35992SAnna Dabrowska            $helper->saveLookupData($access, $data);
226a0b3799eSAndreas Gohr        }
227a0b3799eSAndreas Gohr    }
228a0b3799eSAndreas Gohr
2291fc2361fSSzymon Olewniczak    /**
2301fc2361fSSzymon Olewniczak     * Imports one line into the schema
2311fc2361fSSzymon Olewniczak     *
2321fc2361fSSzymon Olewniczak     * @param string[] $line the parsed CSV line
2331fc2361fSSzymon Olewniczak     */
234*47a35992SAnna Dabrowska    protected function importLine($line)
235d6d97f60SAnna Dabrowska    {
236*47a35992SAnna Dabrowska        //read values, false if invalid, empty array if the same as current data
2371fc2361fSSzymon Olewniczak        $values = $this->readLine($line);
2381fc2361fSSzymon Olewniczak
2391fc2361fSSzymon Olewniczak        if ($values) {
240*47a35992SAnna Dabrowska            $this->saveLine($values);
2411fc2361fSSzymon Olewniczak        } else foreach ($this->errors as $error) {
2421fc2361fSSzymon Olewniczak            msg($error, -1);
2431fc2361fSSzymon Olewniczak        }
2441fc2361fSSzymon Olewniczak    }
245a0b3799eSAndreas Gohr}
246