xref: /plugin/struct/meta/CSVImporter.php (revision 308cc83fd5391df29d21d2bc1306c8da49fdb335)
1a0b3799eSAndreas Gohr<?php
2a0b3799eSAndreas Gohr
3a0b3799eSAndreas Gohrnamespace dokuwiki\plugin\struct\meta;
4a0b3799eSAndreas Gohr
54e4edb41SAnna Dabrowskause dokuwiki\plugin\struct\types\Page;
64e4edb41SAnna Dabrowska
7a0b3799eSAndreas Gohr/**
8f36cc634SAndreas Gohr * Class CSVImporter
9a0b3799eSAndreas Gohr *
10*308cc83fSAndreas Gohr * Imports CSV data
11a0b3799eSAndreas Gohr *
12a0b3799eSAndreas Gohr * @package dokuwiki\plugin\struct\meta
13a0b3799eSAndreas Gohr */
144e4edb41SAnna Dabrowskaclass CSVImporter
15d6d97f60SAnna Dabrowska{
16a0b3799eSAndreas Gohr
17a0b3799eSAndreas Gohr    /** @var  Schema */
18a0b3799eSAndreas Gohr    protected $schema;
19a0b3799eSAndreas Gohr
20a0b3799eSAndreas Gohr    /** @var  resource */
21a0b3799eSAndreas Gohr    protected $fh;
22a0b3799eSAndreas Gohr
23a0b3799eSAndreas Gohr    /** @var  \helper_plugin_sqlite */
24a0b3799eSAndreas Gohr    protected $sqlite;
25a0b3799eSAndreas Gohr
26a0b3799eSAndreas Gohr    /** @var Column[] The single values to store index => col */
27a0b3799eSAndreas Gohr    protected $columns = array();
28a0b3799eSAndreas Gohr
29a0b3799eSAndreas Gohr    /** @var int current line number */
30a0b3799eSAndreas Gohr    protected $line = 0;
31a0b3799eSAndreas Gohr
324e4edb41SAnna Dabrowska    /** @var array list of headers */
331fc2361fSSzymon Olewniczak    protected $header;
341fc2361fSSzymon Olewniczak
351fc2361fSSzymon Olewniczak    /** @var  array list of validation errors */
361fc2361fSSzymon Olewniczak    protected $errors;
371fc2361fSSzymon Olewniczak
38a0b3799eSAndreas Gohr    /**
39*308cc83fSAndreas Gohr     * @var string data type, must be one of page, global, serial
404e4edb41SAnna Dabrowska     */
414e4edb41SAnna Dabrowska    protected $type;
424e4edb41SAnna Dabrowska
434e4edb41SAnna Dabrowska    /**
44a0b3799eSAndreas Gohr     * CSVImporter constructor.
45a0b3799eSAndreas Gohr     *
46a0b3799eSAndreas Gohr     * @param string $table
47a0b3799eSAndreas Gohr     * @param string $file
484e4edb41SAnna Dabrowska     * @param string $type
49a0b3799eSAndreas Gohr     */
504e4edb41SAnna Dabrowska    public function __construct($table, $file, $type)
51d6d97f60SAnna Dabrowska    {
524e4edb41SAnna Dabrowska        $this->type = $type;
5317dbef8aSMichael Große        $this->openFile($file);
54a0b3799eSAndreas Gohr
55a0b3799eSAndreas Gohr        $this->schema = new Schema($table);
56a0b3799eSAndreas Gohr        if (!$this->schema->getId()) throw new StructException('Schema does not exist');
57a0b3799eSAndreas Gohr
58a0b3799eSAndreas Gohr        /** @var \helper_plugin_struct_db $db */
59a0b3799eSAndreas Gohr        $db = plugin_load('helper', 'struct_db');
60a0b3799eSAndreas Gohr        $this->sqlite = $db->getDB(true);
611fc2361fSSzymon Olewniczak    }
62a0b3799eSAndreas Gohr
631fc2361fSSzymon Olewniczak    /**
641fc2361fSSzymon Olewniczak     * Import the data from file.
651fc2361fSSzymon Olewniczak     *
661fc2361fSSzymon Olewniczak     * @throws StructException
671fc2361fSSzymon Olewniczak     */
68d6d97f60SAnna Dabrowska    public function import()
69d6d97f60SAnna Dabrowska    {
70a0b3799eSAndreas Gohr        // Do the import
71a0b3799eSAndreas Gohr        $this->readHeaders();
72a0b3799eSAndreas Gohr        $this->importCSV();
73a0b3799eSAndreas Gohr    }
74a0b3799eSAndreas Gohr
75a0b3799eSAndreas Gohr    /**
7617dbef8aSMichael Große     * Open a given file path
7717dbef8aSMichael Große     *
7817dbef8aSMichael Große     * The main purpose of this method is to be overridden in a mock for testing
7917dbef8aSMichael Große     *
8017dbef8aSMichael Große     * @param string $file the file path
8117dbef8aSMichael Große     *
8217dbef8aSMichael Große     * @return void
8317dbef8aSMichael Große     */
8417dbef8aSMichael Große    protected function openFile($file)
8517dbef8aSMichael Große    {
8617dbef8aSMichael Große        $this->fh = fopen($file, 'rb');
8717dbef8aSMichael Große        if (!$this->fh) {
8817dbef8aSMichael Große            throw new StructException('Failed to open CSV file for reading');
8917dbef8aSMichael Große        }
9017dbef8aSMichael Große    }
9117dbef8aSMichael Große
9217dbef8aSMichael Große    /**
9317dbef8aSMichael Große     * Get a parsed line from the opened CSV file
9417dbef8aSMichael Große     *
9517dbef8aSMichael Große     * The main purpose of this method is to be overridden in a mock for testing
9617dbef8aSMichael Große     *
9717dbef8aSMichael Große     * @return array|false|null
9817dbef8aSMichael Große     */
9917dbef8aSMichael Große    protected function getLine()
10017dbef8aSMichael Große    {
10117dbef8aSMichael Große        return fgetcsv($this->fh);
10217dbef8aSMichael Große    }
10317dbef8aSMichael Große
10417dbef8aSMichael Große    /**
105a0b3799eSAndreas Gohr     * Read the CSV headers and match it with the Schema columns
106a0b3799eSAndreas Gohr     */
107d6d97f60SAnna Dabrowska    protected function readHeaders()
108d6d97f60SAnna Dabrowska    {
10917dbef8aSMichael Große        $header = $this->getLine();
110a0b3799eSAndreas Gohr        if (!$header) throw new StructException('Failed to read CSV');
111a0b3799eSAndreas Gohr        $this->line++;
112a0b3799eSAndreas Gohr
11347a35992SAnna Dabrowska        // we might have to create a page column first
114*308cc83fSAndreas Gohr        if ($this->type !== CSVExporter::DATATYPE_GLOBAL) {
1154e4edb41SAnna Dabrowska            $pageType = new Page(null, 'pid');
1164e4edb41SAnna Dabrowska            $pidCol = new Column(0, $pageType, 0, true, $this->schema->getTable());
1174e4edb41SAnna Dabrowska            $this->columns[] = $pidCol;
1184e4edb41SAnna Dabrowska        }
1194e4edb41SAnna Dabrowska
120a0b3799eSAndreas Gohr        foreach ($header as $i => $head) {
121a0b3799eSAndreas Gohr            $col = $this->schema->findColumn($head);
12247a35992SAnna Dabrowska            // just skip the checks for 'pid' but discard other columns not present in the schema
12347a35992SAnna Dabrowska            if (!$col) {
12447a35992SAnna Dabrowska                if ($head !== 'pid') {
12547a35992SAnna Dabrowska                    unset($header[$i]);
12647a35992SAnna Dabrowska                }
12747a35992SAnna Dabrowska                continue;
12847a35992SAnna Dabrowska            }
129a0b3799eSAndreas Gohr            if (!$col->isEnabled()) continue;
130a0b3799eSAndreas Gohr            $this->columns[$i] = $col;
131a0b3799eSAndreas Gohr        }
132a0b3799eSAndreas Gohr
133a0b3799eSAndreas Gohr        if (!$this->columns) {
134a0b3799eSAndreas Gohr            throw new StructException('None of the CSV headers matched any of the schema\'s fields');
135a0b3799eSAndreas Gohr        }
1361fc2361fSSzymon Olewniczak
1371fc2361fSSzymon Olewniczak        $this->header = $header;
138a0b3799eSAndreas Gohr    }
139a0b3799eSAndreas Gohr
140a0b3799eSAndreas Gohr    /**
141a0b3799eSAndreas Gohr     * Walks through the CSV and imports
142a0b3799eSAndreas Gohr     */
143d6d97f60SAnna Dabrowska    protected function importCSV()
144d6d97f60SAnna Dabrowska    {
14517dbef8aSMichael Große        while (($data = $this->getLine()) !== false) {
146a0b3799eSAndreas Gohr            $this->line++;
14747a35992SAnna Dabrowska            $this->importLine($data);
148a0b3799eSAndreas Gohr        }
1490e489a46SSzymon Olewniczak    }
150a0b3799eSAndreas Gohr
151a0b3799eSAndreas Gohr    /**
1521fc2361fSSzymon Olewniczak     * The errors that occured during validation
153a0b3799eSAndreas Gohr     *
1541fc2361fSSzymon Olewniczak     * @return string[] already translated error messages
155a0b3799eSAndreas Gohr     */
156d6d97f60SAnna Dabrowska    public function getErrors()
157d6d97f60SAnna Dabrowska    {
1581fc2361fSSzymon Olewniczak        return $this->errors;
1591fc2361fSSzymon Olewniczak    }
1601fc2361fSSzymon Olewniczak
1611fc2361fSSzymon Olewniczak    /**
1621fc2361fSSzymon Olewniczak     * Validate a single value
1631fc2361fSSzymon Olewniczak     *
1641fc2361fSSzymon Olewniczak     * @param Column $col the column of that value
1651fc2361fSSzymon Olewniczak     * @param mixed &$rawvalue the value, will be fixed according to the type
1661fc2361fSSzymon Olewniczak     * @return bool true if the data validates, otherwise false
1671fc2361fSSzymon Olewniczak     */
168d6d97f60SAnna Dabrowska    protected function validateValue(Column $col, &$rawvalue)
169d6d97f60SAnna Dabrowska    {
1701fc2361fSSzymon Olewniczak        //by default no validation
1711fc2361fSSzymon Olewniczak        return true;
1721fc2361fSSzymon Olewniczak    }
1731fc2361fSSzymon Olewniczak
1741fc2361fSSzymon Olewniczak    /**
1751fc2361fSSzymon Olewniczak     * Read and validate CSV parsed line
1761fc2361fSSzymon Olewniczak     *
17747a35992SAnna Dabrowska     * @param $line
17847a35992SAnna Dabrowska     * @return array|bool
1791fc2361fSSzymon Olewniczak     */
18047a35992SAnna Dabrowska    protected function readLine($line)
181d6d97f60SAnna Dabrowska    {
182a0b3799eSAndreas Gohr        // prepare values for single value table
183a0b3799eSAndreas Gohr        $values = array();
184a0b3799eSAndreas Gohr        foreach ($this->columns as $i => $column) {
185a0b3799eSAndreas Gohr            if (!isset($line[$i])) throw new StructException('Missing field at CSV line %d', $this->line);
186a0b3799eSAndreas Gohr
1871fc2361fSSzymon Olewniczak            if (!$this->validateValue($column, $line[$i])) return false;
1881fc2361fSSzymon Olewniczak
189a0b3799eSAndreas Gohr            if ($column->isMulti()) {
19047a35992SAnna Dabrowska                // multi values get split on comma, but JSON values contain commas too, hence preg_split
19147a35992SAnna Dabrowska                if ($line[$i][0] === '[') {
19247a35992SAnna Dabrowska                    $line[$i] = preg_split('/,(?=\[)/', $line[$i]);
193a0b3799eSAndreas Gohr                } else {
19447a35992SAnna Dabrowska                    $line[$i] = array_map('trim', explode(',', $line[$i]));
195a0b3799eSAndreas Gohr                }
196a0b3799eSAndreas Gohr            }
19747a35992SAnna Dabrowska            // data access will handle multivalues, no need to manipulate them here
19847a35992SAnna Dabrowska            $values[] = $line[$i];
19947a35992SAnna Dabrowska        }
2001fc2361fSSzymon Olewniczak        //if no ok don't import
2011fc2361fSSzymon Olewniczak        return $values;
2021fc2361fSSzymon Olewniczak    }
203a0b3799eSAndreas Gohr
2041fc2361fSSzymon Olewniczak    /**
20569c07c1bSSzymon Olewniczak     * Save one CSV line into database
20669c07c1bSSzymon Olewniczak     *
2071fc2361fSSzymon Olewniczak     * @param string[] $values parsed line values
2081fc2361fSSzymon Olewniczak     */
20947a35992SAnna Dabrowska    protected function saveLine($values)
210d6d97f60SAnna Dabrowska    {
21147a35992SAnna Dabrowska        $data = array_combine($this->header, $values);
21247a35992SAnna Dabrowska        // pid is a non-data column and must be supplied to the AccessTable separately
21347a35992SAnna Dabrowska        $pid = isset($data['pid']) ? $data['pid'] : '';
21447a35992SAnna Dabrowska        unset($data['pid']);
21547a35992SAnna Dabrowska        $table = $this->schema->getTable();
2161fc2361fSSzymon Olewniczak
21747a35992SAnna Dabrowska        /** @var 'helper_plugin_struct $helper */
21847a35992SAnna Dabrowska        $helper = plugin_load('helper', 'struct');
219ad86a824SAnna Dabrowska        if ($this->type === CSVExporter::DATATYPE_PAGE) {
22047a35992SAnna Dabrowska            $helper->saveData($pid, [$table => $data], 'CSV data imported');
221ad86a824SAnna Dabrowska            return;
222ad86a824SAnna Dabrowska        }
223ad86a824SAnna Dabrowska        if ($this->type === CSVExporter::DATATYPE_SERIAL) {
224ad86a824SAnna Dabrowska            $access = AccessTable::getSerialAccess($table, $pid);
22547a35992SAnna Dabrowska        } else {
226*308cc83fSAndreas Gohr            $access = AccessTable::getGlobalAccess($table);
227a0b3799eSAndreas Gohr        }
228ad86a824SAnna Dabrowska        $helper->saveLookupData($access, $data);
229a0b3799eSAndreas Gohr    }
230a0b3799eSAndreas Gohr
2311fc2361fSSzymon Olewniczak    /**
2321fc2361fSSzymon Olewniczak     * Imports one line into the schema
2331fc2361fSSzymon Olewniczak     *
2341fc2361fSSzymon Olewniczak     * @param string[] $line the parsed CSV line
2351fc2361fSSzymon Olewniczak     */
23647a35992SAnna Dabrowska    protected function importLine($line)
237d6d97f60SAnna Dabrowska    {
23847a35992SAnna Dabrowska        //read values, false if invalid, empty array if the same as current data
2391fc2361fSSzymon Olewniczak        $values = $this->readLine($line);
2401fc2361fSSzymon Olewniczak
2411fc2361fSSzymon Olewniczak        if ($values) {
24247a35992SAnna Dabrowska            $this->saveLine($values);
2431fc2361fSSzymon Olewniczak        } else foreach ($this->errors as $error) {
2441fc2361fSSzymon Olewniczak            msg($error, -1);
2451fc2361fSSzymon Olewniczak        }
2461fc2361fSSzymon Olewniczak    }
247a0b3799eSAndreas Gohr}
248