xref: /plugin/struct/meta/CSVImporter.php (revision 7234bfb14e712ff548d9266ef32fdcc8eaf2d04e)
1a0b3799eSAndreas Gohr<?php
2a0b3799eSAndreas Gohr
3a0b3799eSAndreas Gohrnamespace dokuwiki\plugin\struct\meta;
4a0b3799eSAndreas Gohr
54e4edb41SAnna Dabrowskause dokuwiki\plugin\struct\types\Page;
64e4edb41SAnna Dabrowska
7a0b3799eSAndreas Gohr/**
8f36cc634SAndreas Gohr * Class CSVImporter
9a0b3799eSAndreas Gohr *
10308cc83fSAndreas Gohr * Imports CSV data
11a0b3799eSAndreas Gohr *
12a0b3799eSAndreas Gohr * @package dokuwiki\plugin\struct\meta
13a0b3799eSAndreas Gohr */
144e4edb41SAnna Dabrowskaclass CSVImporter
15d6d97f60SAnna Dabrowska{
16a0b3799eSAndreas Gohr    /** @var  Schema */
17a0b3799eSAndreas Gohr    protected $schema;
18a0b3799eSAndreas Gohr
19a0b3799eSAndreas Gohr    /** @var  resource */
20a0b3799eSAndreas Gohr    protected $fh;
21a0b3799eSAndreas Gohr
22a0b3799eSAndreas Gohr    /** @var  \helper_plugin_sqlite */
23a0b3799eSAndreas Gohr    protected $sqlite;
24a0b3799eSAndreas Gohr
25a0b3799eSAndreas Gohr    /** @var Column[] The single values to store index => col */
26*7234bfb1Ssplitbrain    protected $columns = [];
27a0b3799eSAndreas Gohr
28a0b3799eSAndreas Gohr    /** @var int current line number */
29a0b3799eSAndreas Gohr    protected $line = 0;
30a0b3799eSAndreas Gohr
314e4edb41SAnna Dabrowska    /** @var array list of headers */
321fc2361fSSzymon Olewniczak    protected $header;
331fc2361fSSzymon Olewniczak
341fc2361fSSzymon Olewniczak    /** @var  array list of validation errors */
351fc2361fSSzymon Olewniczak    protected $errors;
361fc2361fSSzymon Olewniczak
37a0b3799eSAndreas Gohr    /**
38308cc83fSAndreas Gohr     * @var string data type, must be one of page, global, serial
394e4edb41SAnna Dabrowska     */
404e4edb41SAnna Dabrowska    protected $type;
414e4edb41SAnna Dabrowska
424e4edb41SAnna Dabrowska    /**
43a0b3799eSAndreas Gohr     * CSVImporter constructor.
44a0b3799eSAndreas Gohr     *
45a0b3799eSAndreas Gohr     * @param string $table
46a0b3799eSAndreas Gohr     * @param string $file
474e4edb41SAnna Dabrowska     * @param string $type
48a0b3799eSAndreas Gohr     */
494e4edb41SAnna Dabrowska    public function __construct($table, $file, $type)
50d6d97f60SAnna Dabrowska    {
514e4edb41SAnna Dabrowska        $this->type = $type;
5217dbef8aSMichael Große        $this->openFile($file);
53a0b3799eSAndreas Gohr
54a0b3799eSAndreas Gohr        $this->schema = new Schema($table);
55a0b3799eSAndreas Gohr        if (!$this->schema->getId()) throw new StructException('Schema does not exist');
56a0b3799eSAndreas Gohr
57a0b3799eSAndreas Gohr        /** @var \helper_plugin_struct_db $db */
58a0b3799eSAndreas Gohr        $db = plugin_load('helper', 'struct_db');
59a0b3799eSAndreas Gohr        $this->sqlite = $db->getDB(true);
601fc2361fSSzymon Olewniczak    }
61a0b3799eSAndreas Gohr
621fc2361fSSzymon Olewniczak    /**
631fc2361fSSzymon Olewniczak     * Import the data from file.
641fc2361fSSzymon Olewniczak     *
651fc2361fSSzymon Olewniczak     * @throws StructException
661fc2361fSSzymon Olewniczak     */
67d6d97f60SAnna Dabrowska    public function import()
68d6d97f60SAnna Dabrowska    {
69a0b3799eSAndreas Gohr        // Do the import
70a0b3799eSAndreas Gohr        $this->readHeaders();
71a0b3799eSAndreas Gohr        $this->importCSV();
72a0b3799eSAndreas Gohr    }
73a0b3799eSAndreas Gohr
74a0b3799eSAndreas Gohr    /**
7517dbef8aSMichael Große     * Open a given file path
7617dbef8aSMichael Große     *
7717dbef8aSMichael Große     * The main purpose of this method is to be overridden in a mock for testing
7817dbef8aSMichael Große     *
7917dbef8aSMichael Große     * @param string $file the file path
8017dbef8aSMichael Große     *
8117dbef8aSMichael Große     * @return void
8217dbef8aSMichael Große     */
8317dbef8aSMichael Große    protected function openFile($file)
8417dbef8aSMichael Große    {
8517dbef8aSMichael Große        $this->fh = fopen($file, 'rb');
8617dbef8aSMichael Große        if (!$this->fh) {
8717dbef8aSMichael Große            throw new StructException('Failed to open CSV file for reading');
8817dbef8aSMichael Große        }
8917dbef8aSMichael Große    }
9017dbef8aSMichael Große
9117dbef8aSMichael Große    /**
9217dbef8aSMichael Große     * Get a parsed line from the opened CSV file
9317dbef8aSMichael Große     *
9417dbef8aSMichael Große     * The main purpose of this method is to be overridden in a mock for testing
9517dbef8aSMichael Große     *
9617dbef8aSMichael Große     * @return array|false|null
9717dbef8aSMichael Große     */
9817dbef8aSMichael Große    protected function getLine()
9917dbef8aSMichael Große    {
10017dbef8aSMichael Große        return fgetcsv($this->fh);
10117dbef8aSMichael Große    }
10217dbef8aSMichael Große
10317dbef8aSMichael Große    /**
104a0b3799eSAndreas Gohr     * Read the CSV headers and match it with the Schema columns
105a0b3799eSAndreas Gohr     */
106d6d97f60SAnna Dabrowska    protected function readHeaders()
107d6d97f60SAnna Dabrowska    {
10817dbef8aSMichael Große        $header = $this->getLine();
109a0b3799eSAndreas Gohr        if (!$header) throw new StructException('Failed to read CSV');
110a0b3799eSAndreas Gohr        $this->line++;
111a0b3799eSAndreas Gohr
11247a35992SAnna Dabrowska        // we might have to create a page column first
113308cc83fSAndreas Gohr        if ($this->type !== CSVExporter::DATATYPE_GLOBAL) {
1144e4edb41SAnna Dabrowska            $pageType = new Page(null, 'pid');
1154e4edb41SAnna Dabrowska            $pidCol = new Column(0, $pageType, 0, true, $this->schema->getTable());
1164e4edb41SAnna Dabrowska            $this->columns[] = $pidCol;
1174e4edb41SAnna Dabrowska        }
1184e4edb41SAnna Dabrowska
119a0b3799eSAndreas Gohr        foreach ($header as $i => $head) {
120a0b3799eSAndreas Gohr            $col = $this->schema->findColumn($head);
12147a35992SAnna Dabrowska            // just skip the checks for 'pid' but discard other columns not present in the schema
12247a35992SAnna Dabrowska            if (!$col) {
12347a35992SAnna Dabrowska                if ($head !== 'pid') {
12447a35992SAnna Dabrowska                    unset($header[$i]);
12547a35992SAnna Dabrowska                }
12647a35992SAnna Dabrowska                continue;
12747a35992SAnna Dabrowska            }
128a0b3799eSAndreas Gohr            if (!$col->isEnabled()) continue;
129a0b3799eSAndreas Gohr            $this->columns[$i] = $col;
130a0b3799eSAndreas Gohr        }
131a0b3799eSAndreas Gohr
132a0b3799eSAndreas Gohr        if (!$this->columns) {
133a0b3799eSAndreas Gohr            throw new StructException('None of the CSV headers matched any of the schema\'s fields');
134a0b3799eSAndreas Gohr        }
1351fc2361fSSzymon Olewniczak
1361fc2361fSSzymon Olewniczak        $this->header = $header;
137a0b3799eSAndreas Gohr    }
138a0b3799eSAndreas Gohr
139a0b3799eSAndreas Gohr    /**
140a0b3799eSAndreas Gohr     * Walks through the CSV and imports
141a0b3799eSAndreas Gohr     */
142d6d97f60SAnna Dabrowska    protected function importCSV()
143d6d97f60SAnna Dabrowska    {
14417dbef8aSMichael Große        while (($data = $this->getLine()) !== false) {
145a0b3799eSAndreas Gohr            $this->line++;
14647a35992SAnna Dabrowska            $this->importLine($data);
147a0b3799eSAndreas Gohr        }
1480e489a46SSzymon Olewniczak    }
149a0b3799eSAndreas Gohr
150a0b3799eSAndreas Gohr    /**
1511fc2361fSSzymon Olewniczak     * The errors that occured during validation
152a0b3799eSAndreas Gohr     *
1531fc2361fSSzymon Olewniczak     * @return string[] already translated error messages
154a0b3799eSAndreas Gohr     */
155d6d97f60SAnna Dabrowska    public function getErrors()
156d6d97f60SAnna Dabrowska    {
1571fc2361fSSzymon Olewniczak        return $this->errors;
1581fc2361fSSzymon Olewniczak    }
1591fc2361fSSzymon Olewniczak
1601fc2361fSSzymon Olewniczak    /**
1611fc2361fSSzymon Olewniczak     * Validate a single value
1621fc2361fSSzymon Olewniczak     *
1631fc2361fSSzymon Olewniczak     * @param Column $col the column of that value
1641fc2361fSSzymon Olewniczak     * @param mixed &$rawvalue the value, will be fixed according to the type
1651fc2361fSSzymon Olewniczak     * @return bool true if the data validates, otherwise false
1661fc2361fSSzymon Olewniczak     */
167d6d97f60SAnna Dabrowska    protected function validateValue(Column $col, &$rawvalue)
168d6d97f60SAnna Dabrowska    {
1691fc2361fSSzymon Olewniczak        //by default no validation
1701fc2361fSSzymon Olewniczak        return true;
1711fc2361fSSzymon Olewniczak    }
1721fc2361fSSzymon Olewniczak
1731fc2361fSSzymon Olewniczak    /**
1741fc2361fSSzymon Olewniczak     * Read and validate CSV parsed line
1751fc2361fSSzymon Olewniczak     *
17647a35992SAnna Dabrowska     * @param $line
17747a35992SAnna Dabrowska     * @return array|bool
1781fc2361fSSzymon Olewniczak     */
17947a35992SAnna Dabrowska    protected function readLine($line)
180d6d97f60SAnna Dabrowska    {
181a0b3799eSAndreas Gohr        // prepare values for single value table
182*7234bfb1Ssplitbrain        $values = [];
183a0b3799eSAndreas Gohr        foreach ($this->columns as $i => $column) {
184a0b3799eSAndreas Gohr            if (!isset($line[$i])) throw new StructException('Missing field at CSV line %d', $this->line);
185a0b3799eSAndreas Gohr
1861fc2361fSSzymon Olewniczak            if (!$this->validateValue($column, $line[$i])) return false;
1871fc2361fSSzymon Olewniczak
188a0b3799eSAndreas Gohr            if ($column->isMulti()) {
18947a35992SAnna Dabrowska                // multi values get split on comma, but JSON values contain commas too, hence preg_split
19047a35992SAnna Dabrowska                if ($line[$i][0] === '[') {
19147a35992SAnna Dabrowska                    $line[$i] = preg_split('/,(?=\[)/', $line[$i]);
192a0b3799eSAndreas Gohr                } else {
19347a35992SAnna Dabrowska                    $line[$i] = array_map('trim', explode(',', $line[$i]));
194a0b3799eSAndreas Gohr                }
195a0b3799eSAndreas Gohr            }
19647a35992SAnna Dabrowska            // data access will handle multivalues, no need to manipulate them here
19747a35992SAnna Dabrowska            $values[] = $line[$i];
19847a35992SAnna Dabrowska        }
1991fc2361fSSzymon Olewniczak        //if no ok don't import
2001fc2361fSSzymon Olewniczak        return $values;
2011fc2361fSSzymon Olewniczak    }
202a0b3799eSAndreas Gohr
2031fc2361fSSzymon Olewniczak    /**
20469c07c1bSSzymon Olewniczak     * Save one CSV line into database
20569c07c1bSSzymon Olewniczak     *
2061fc2361fSSzymon Olewniczak     * @param string[] $values parsed line values
2071fc2361fSSzymon Olewniczak     */
20847a35992SAnna Dabrowska    protected function saveLine($values)
209d6d97f60SAnna Dabrowska    {
21047a35992SAnna Dabrowska        $data = array_combine($this->header, $values);
21147a35992SAnna Dabrowska        // pid is a non-data column and must be supplied to the AccessTable separately
212*7234bfb1Ssplitbrain        $pid = $data['pid'] ?? '';
21347a35992SAnna Dabrowska        unset($data['pid']);
21447a35992SAnna Dabrowska        $table = $this->schema->getTable();
2151fc2361fSSzymon Olewniczak
21647a35992SAnna Dabrowska        /** @var 'helper_plugin_struct $helper */
21747a35992SAnna Dabrowska        $helper = plugin_load('helper', 'struct');
218ad86a824SAnna Dabrowska        if ($this->type === CSVExporter::DATATYPE_PAGE) {
21947a35992SAnna Dabrowska            $helper->saveData($pid, [$table => $data], 'CSV data imported');
220ad86a824SAnna Dabrowska            return;
221ad86a824SAnna Dabrowska        }
222ad86a824SAnna Dabrowska        if ($this->type === CSVExporter::DATATYPE_SERIAL) {
223ad86a824SAnna Dabrowska            $access = AccessTable::getSerialAccess($table, $pid);
22447a35992SAnna Dabrowska        } else {
225308cc83fSAndreas Gohr            $access = AccessTable::getGlobalAccess($table);
226a0b3799eSAndreas Gohr        }
227ad86a824SAnna Dabrowska        $helper->saveLookupData($access, $data);
228a0b3799eSAndreas Gohr    }
229a0b3799eSAndreas Gohr
2301fc2361fSSzymon Olewniczak    /**
2311fc2361fSSzymon Olewniczak     * Imports one line into the schema
2321fc2361fSSzymon Olewniczak     *
2331fc2361fSSzymon Olewniczak     * @param string[] $line the parsed CSV line
2341fc2361fSSzymon Olewniczak     */
23547a35992SAnna Dabrowska    protected function importLine($line)
236d6d97f60SAnna Dabrowska    {
23747a35992SAnna Dabrowska        //read values, false if invalid, empty array if the same as current data
2381fc2361fSSzymon Olewniczak        $values = $this->readLine($line);
2391fc2361fSSzymon Olewniczak
2401fc2361fSSzymon Olewniczak        if ($values) {
24147a35992SAnna Dabrowska            $this->saveLine($values);
2421fc2361fSSzymon Olewniczak        } else foreach ($this->errors as $error) {
2431fc2361fSSzymon Olewniczak            msg($error, -1);
2441fc2361fSSzymon Olewniczak        }
2451fc2361fSSzymon Olewniczak    }
246a0b3799eSAndreas Gohr}
247