xref: /plugin/struct/meta/CSVImporter.php (revision d6d97f6064c3b0f90310be8341edc9585520ee54)
1a0b3799eSAndreas Gohr<?php
2a0b3799eSAndreas Gohr
3a0b3799eSAndreas Gohrnamespace dokuwiki\plugin\struct\meta;
4a0b3799eSAndreas Gohr
5a0b3799eSAndreas Gohr/**
6f36cc634SAndreas Gohr * Class CSVImporter
7a0b3799eSAndreas Gohr *
8f36cc634SAndreas Gohr * Imports CSV data into a lookup schema
9a0b3799eSAndreas Gohr *
10a0b3799eSAndreas Gohr * @package dokuwiki\plugin\struct\meta
11a0b3799eSAndreas Gohr */
12*d6d97f60SAnna Dabrowskaabstract class CSVImporter
13*d6d97f60SAnna Dabrowska{
14a0b3799eSAndreas Gohr
15a0b3799eSAndreas Gohr    /** @var  Schema */
16a0b3799eSAndreas Gohr    protected $schema;
17a0b3799eSAndreas Gohr
18a0b3799eSAndreas Gohr    /** @var  resource */
19a0b3799eSAndreas Gohr    protected $fh;
20a0b3799eSAndreas Gohr
21a0b3799eSAndreas Gohr    /** @var  \helper_plugin_sqlite */
22a0b3799eSAndreas Gohr    protected $sqlite;
23a0b3799eSAndreas Gohr
24a0b3799eSAndreas Gohr    /** @var Column[] The single values to store index => col */
25a0b3799eSAndreas Gohr    protected $columns = array();
26a0b3799eSAndreas Gohr
27a0b3799eSAndreas Gohr    /** @var int current line number */
28a0b3799eSAndreas Gohr    protected $line = 0;
29a0b3799eSAndreas Gohr
301fc2361fSSzymon Olewniczak    /** @var  list of headers */
311fc2361fSSzymon Olewniczak    protected $header;
321fc2361fSSzymon Olewniczak
331fc2361fSSzymon Olewniczak    /** @var  array list of validation errors */
341fc2361fSSzymon Olewniczak    protected $errors;
351fc2361fSSzymon Olewniczak
36a0b3799eSAndreas Gohr    /**
37a0b3799eSAndreas Gohr     * CSVImporter constructor.
38a0b3799eSAndreas Gohr     *
39a0b3799eSAndreas Gohr     * @throws StructException
40a0b3799eSAndreas Gohr     * @param string $table
41a0b3799eSAndreas Gohr     * @param string $file
42a0b3799eSAndreas Gohr     */
43*d6d97f60SAnna Dabrowska    public function __construct($table, $file)
44*d6d97f60SAnna Dabrowska    {
4517dbef8aSMichael Große        $this->openFile($file);
46a0b3799eSAndreas Gohr
47a0b3799eSAndreas Gohr        $this->schema = new Schema($table);
48a0b3799eSAndreas Gohr        if (!$this->schema->getId()) throw new StructException('Schema does not exist');
49a0b3799eSAndreas Gohr
50a0b3799eSAndreas Gohr        /** @var \helper_plugin_struct_db $db */
51a0b3799eSAndreas Gohr        $db = plugin_load('helper', 'struct_db');
52a0b3799eSAndreas Gohr        $this->sqlite = $db->getDB(true);
531fc2361fSSzymon Olewniczak    }
54a0b3799eSAndreas Gohr
551fc2361fSSzymon Olewniczak    /**
561fc2361fSSzymon Olewniczak     * Import the data from file.
571fc2361fSSzymon Olewniczak     *
581fc2361fSSzymon Olewniczak     * @throws StructException
591fc2361fSSzymon Olewniczak     */
60*d6d97f60SAnna Dabrowska    public function import()
61*d6d97f60SAnna Dabrowska    {
62a0b3799eSAndreas Gohr        // Do the import
63a0b3799eSAndreas Gohr        $this->readHeaders();
64a0b3799eSAndreas Gohr        $this->importCSV();
65a0b3799eSAndreas Gohr    }
66a0b3799eSAndreas Gohr
67a0b3799eSAndreas Gohr    /**
6817dbef8aSMichael Große     * Open a given file path
6917dbef8aSMichael Große     *
7017dbef8aSMichael Große     * The main purpose of this method is to be overridden in a mock for testing
7117dbef8aSMichael Große     *
7217dbef8aSMichael Große     * @param string $file the file path
7317dbef8aSMichael Große     *
7417dbef8aSMichael Große     * @return void
7517dbef8aSMichael Große     */
7617dbef8aSMichael Große    protected function openFile($file)
7717dbef8aSMichael Große    {
7817dbef8aSMichael Große        $this->fh = fopen($file, 'rb');
7917dbef8aSMichael Große        if (!$this->fh) {
8017dbef8aSMichael Große            throw new StructException('Failed to open CSV file for reading');
8117dbef8aSMichael Große        }
8217dbef8aSMichael Große    }
8317dbef8aSMichael Große
8417dbef8aSMichael Große    /**
8517dbef8aSMichael Große     * Get a parsed line from the opened CSV file
8617dbef8aSMichael Große     *
8717dbef8aSMichael Große     * The main purpose of this method is to be overridden in a mock for testing
8817dbef8aSMichael Große     *
8917dbef8aSMichael Große     * @return array|false|null
9017dbef8aSMichael Große     */
9117dbef8aSMichael Große    protected function getLine()
9217dbef8aSMichael Große    {
9317dbef8aSMichael Große        return fgetcsv($this->fh);
9417dbef8aSMichael Große    }
9517dbef8aSMichael Große
9617dbef8aSMichael Große    /**
97a0b3799eSAndreas Gohr     * Read the CSV headers and match it with the Schema columns
981fc2361fSSzymon Olewniczak     *
991fc2361fSSzymon Olewniczak     * @return array headers of file
100a0b3799eSAndreas Gohr     */
101*d6d97f60SAnna Dabrowska    protected function readHeaders()
102*d6d97f60SAnna Dabrowska    {
10317dbef8aSMichael Große        $header = $this->getLine();
104a0b3799eSAndreas Gohr        if (!$header) throw new StructException('Failed to read CSV');
105a0b3799eSAndreas Gohr        $this->line++;
106a0b3799eSAndreas Gohr
107a0b3799eSAndreas Gohr        foreach ($header as $i => $head) {
108a0b3799eSAndreas Gohr            $col = $this->schema->findColumn($head);
109a0b3799eSAndreas Gohr            if (!$col) continue;
110a0b3799eSAndreas Gohr            if (!$col->isEnabled()) continue;
111a0b3799eSAndreas Gohr            $this->columns[$i] = $col;
112a0b3799eSAndreas Gohr        }
113a0b3799eSAndreas Gohr
114a0b3799eSAndreas Gohr        if (!$this->columns) {
115a0b3799eSAndreas Gohr            throw new StructException('None of the CSV headers matched any of the schema\'s fields');
116a0b3799eSAndreas Gohr        }
1171fc2361fSSzymon Olewniczak
1181fc2361fSSzymon Olewniczak        $this->header = $header;
119a0b3799eSAndreas Gohr    }
120a0b3799eSAndreas Gohr
121a0b3799eSAndreas Gohr    /**
122a0b3799eSAndreas Gohr     * Creates the insert string for the single value table
123a0b3799eSAndreas Gohr     *
124a0b3799eSAndreas Gohr     * @return string
125a0b3799eSAndreas Gohr     */
126*d6d97f60SAnna Dabrowska    protected function getSQLforAllValues()
127*d6d97f60SAnna Dabrowska    {
128a0b3799eSAndreas Gohr        $colnames = array();
129a0b3799eSAndreas Gohr        $placeholds = array();
130a0b3799eSAndreas Gohr        foreach ($this->columns as $i => $col) {
131a0b3799eSAndreas Gohr            $colnames[] = 'col' . $col->getColref();
132a0b3799eSAndreas Gohr            $placeholds[] = '?';
133a0b3799eSAndreas Gohr        }
134a0b3799eSAndreas Gohr        $colnames = join(', ', $colnames);
135a0b3799eSAndreas Gohr        $placeholds = join(', ', $placeholds);
136a0b3799eSAndreas Gohr        $table = $this->schema->getTable();
137a0b3799eSAndreas Gohr
138a0b3799eSAndreas Gohr        return "INSERT INTO data_$table ($colnames) VALUES ($placeholds)";
139a0b3799eSAndreas Gohr    }
140a0b3799eSAndreas Gohr
141a0b3799eSAndreas Gohr    /**
142a0b3799eSAndreas Gohr     * Creates the insert string for the multi value table
143a0b3799eSAndreas Gohr     *
144a0b3799eSAndreas Gohr     * @return string
145a0b3799eSAndreas Gohr     */
146*d6d97f60SAnna Dabrowska    protected function getSQLforMultiValue()
147*d6d97f60SAnna Dabrowska    {
148a0b3799eSAndreas Gohr        $table = $this->schema->getTable();
149a0b3799eSAndreas Gohr        /** @noinspection SqlResolve */
150a0b3799eSAndreas Gohr        return "INSERT INTO multi_$table (pid, colref, row, value) VALUES (?,?,?,?)";
151a0b3799eSAndreas Gohr    }
152a0b3799eSAndreas Gohr
153a0b3799eSAndreas Gohr    /**
154a0b3799eSAndreas Gohr     * Walks through the CSV and imports
155a0b3799eSAndreas Gohr     */
156*d6d97f60SAnna Dabrowska    protected function importCSV()
157*d6d97f60SAnna Dabrowska    {
158a0b3799eSAndreas Gohr
159a0b3799eSAndreas Gohr        $single = $this->getSQLforAllValues();
160a0b3799eSAndreas Gohr        $multi = $this->getSQLforMultiValue();
161a0b3799eSAndreas Gohr
16217dbef8aSMichael Große        while (($data = $this->getLine()) !== false) {
1630e489a46SSzymon Olewniczak            $this->sqlite->query('BEGIN TRANSACTION');
164a0b3799eSAndreas Gohr            $this->line++;
165a0b3799eSAndreas Gohr            $this->importLine($data, $single, $multi);
166a0b3799eSAndreas Gohr            $this->sqlite->query('COMMIT TRANSACTION');
167a0b3799eSAndreas Gohr        }
1680e489a46SSzymon Olewniczak    }
169a0b3799eSAndreas Gohr
170a0b3799eSAndreas Gohr    /**
1711fc2361fSSzymon Olewniczak     * The errors that occured during validation
172a0b3799eSAndreas Gohr     *
1731fc2361fSSzymon Olewniczak     * @return string[] already translated error messages
174a0b3799eSAndreas Gohr     */
175*d6d97f60SAnna Dabrowska    public function getErrors()
176*d6d97f60SAnna Dabrowska    {
1771fc2361fSSzymon Olewniczak        return $this->errors;
1781fc2361fSSzymon Olewniczak    }
1791fc2361fSSzymon Olewniczak
1801fc2361fSSzymon Olewniczak    /**
1811fc2361fSSzymon Olewniczak     * Validate a single value
1821fc2361fSSzymon Olewniczak     *
1831fc2361fSSzymon Olewniczak     * @param Column $col the column of that value
1841fc2361fSSzymon Olewniczak     * @param mixed &$rawvalue the value, will be fixed according to the type
1851fc2361fSSzymon Olewniczak     * @return bool true if the data validates, otherwise false
1861fc2361fSSzymon Olewniczak     */
187*d6d97f60SAnna Dabrowska    protected function validateValue(Column $col, &$rawvalue)
188*d6d97f60SAnna Dabrowska    {
1891fc2361fSSzymon Olewniczak        //by default no validation
1901fc2361fSSzymon Olewniczak        return true;
1911fc2361fSSzymon Olewniczak    }
1921fc2361fSSzymon Olewniczak
1931fc2361fSSzymon Olewniczak    /**
1941fc2361fSSzymon Olewniczak     * Read and validate CSV parsed line
1951fc2361fSSzymon Olewniczak     *
1961fc2361fSSzymon Olewniczak     * @param &$line
1971fc2361fSSzymon Olewniczak     */
198*d6d97f60SAnna Dabrowska    protected function readLine(&$line)
199*d6d97f60SAnna Dabrowska    {
200a0b3799eSAndreas Gohr        // prepare values for single value table
201a0b3799eSAndreas Gohr        $values = array();
202a0b3799eSAndreas Gohr        foreach ($this->columns as $i => $column) {
203a0b3799eSAndreas Gohr            if (!isset($line[$i])) throw new StructException('Missing field at CSV line %d', $this->line);
204a0b3799eSAndreas Gohr
2051fc2361fSSzymon Olewniczak            if (!$this->validateValue($column, $line[$i])) return false;
2061fc2361fSSzymon Olewniczak
207a0b3799eSAndreas Gohr            if ($column->isMulti()) {
208a0b3799eSAndreas Gohr                // multi values get split on comma
209a0b3799eSAndreas Gohr                $line[$i] = array_map('trim', explode(',', $line[$i]));
210a0b3799eSAndreas Gohr                $values[] = $line[$i][0];
211a0b3799eSAndreas Gohr            } else {
212a0b3799eSAndreas Gohr                $values[] = $line[$i];
213a0b3799eSAndreas Gohr            }
214a0b3799eSAndreas Gohr        }
2151fc2361fSSzymon Olewniczak        //if no ok don't import
2161fc2361fSSzymon Olewniczak        return $values;
2171fc2361fSSzymon Olewniczak    }
218a0b3799eSAndreas Gohr
2191fc2361fSSzymon Olewniczak    /**
22069c07c1bSSzymon Olewniczak     * INSERT $values into data_* table
22169c07c1bSSzymon Olewniczak     *
2221fc2361fSSzymon Olewniczak     * @param string[] $values
2231fc2361fSSzymon Olewniczak     * @param string $single SQL for single table
2241fc2361fSSzymon Olewniczak     *
22569c07c1bSSzymon Olewniczak     * @return string last_insert_rowid()
2261fc2361fSSzymon Olewniczak     */
227*d6d97f60SAnna Dabrowska    protected function insertIntoSingle($values, $single)
228*d6d97f60SAnna Dabrowska    {
229a0b3799eSAndreas Gohr        $this->sqlite->query($single, $values);
230a0b3799eSAndreas Gohr        $res = $this->sqlite->query('SELECT last_insert_rowid()');
231a0b3799eSAndreas Gohr        $pid = $this->sqlite->res2single($res);
232a0b3799eSAndreas Gohr        $this->sqlite->res_close($res);
233a0b3799eSAndreas Gohr
2341fc2361fSSzymon Olewniczak        return $pid;
2351fc2361fSSzymon Olewniczak    }
2361fc2361fSSzymon Olewniczak
2371fc2361fSSzymon Olewniczak    /**
23869c07c1bSSzymon Olewniczak     * INSERT one row into multi_* table
23969c07c1bSSzymon Olewniczak     *
2401fc2361fSSzymon Olewniczak     * @param string $multi SQL for multi table
2411fc2361fSSzymon Olewniczak     * @param $pid string
2421fc2361fSSzymon Olewniczak     * @param $column string
2431fc2361fSSzymon Olewniczak     * @param $row string
2441fc2361fSSzymon Olewniczak     * @param $value string
2451fc2361fSSzymon Olewniczak     */
246*d6d97f60SAnna Dabrowska    protected function insertIntoMulti($multi, $pid, $column, $row, $value)
247*d6d97f60SAnna Dabrowska    {
2481fc2361fSSzymon Olewniczak        $this->sqlite->query($multi, array($pid, $column->getColref(), $row + 1, $value));
2491fc2361fSSzymon Olewniczak    }
2501fc2361fSSzymon Olewniczak
2511fc2361fSSzymon Olewniczak    /**
25269c07c1bSSzymon Olewniczak     * Save one CSV line into database
25369c07c1bSSzymon Olewniczak     *
2541fc2361fSSzymon Olewniczak     * @param string[] $values parsed line values
2551fc2361fSSzymon Olewniczak     * @param string $single SQL for single table
2561fc2361fSSzymon Olewniczak     * @param string $multi SQL for multi table
2571fc2361fSSzymon Olewniczak     */
258*d6d97f60SAnna Dabrowska    protected function saveLine($values, $line, $single, $multi)
259*d6d97f60SAnna Dabrowska    {
2601fc2361fSSzymon Olewniczak        // insert into single value table (and create pid)
2611fc2361fSSzymon Olewniczak        $pid = $this->insertIntoSingle($values, $single);
2621fc2361fSSzymon Olewniczak
263a0b3799eSAndreas Gohr        // insert all the multi values
264a0b3799eSAndreas Gohr        foreach ($this->columns as $i => $column) {
265a0b3799eSAndreas Gohr            if (!$column->isMulti()) continue;
266a0b3799eSAndreas Gohr            foreach ($line[$i] as $row => $value) {
2671fc2361fSSzymon Olewniczak                $this->insertIntoMulti($multi, $pid, $column, $row, $value);
268a0b3799eSAndreas Gohr            }
269a0b3799eSAndreas Gohr        }
270a0b3799eSAndreas Gohr    }
271a0b3799eSAndreas Gohr
2721fc2361fSSzymon Olewniczak    /**
2731fc2361fSSzymon Olewniczak     * Imports one line into the schema
2741fc2361fSSzymon Olewniczak     *
2751fc2361fSSzymon Olewniczak     * @param string[] $line the parsed CSV line
2761fc2361fSSzymon Olewniczak     * @param string $single SQL for single table
2771fc2361fSSzymon Olewniczak     * @param string $multi SQL for multi table
2781fc2361fSSzymon Olewniczak     */
279*d6d97f60SAnna Dabrowska    protected function importLine($line, $single, $multi)
280*d6d97f60SAnna Dabrowska    {
2811fc2361fSSzymon Olewniczak        //read values, false if no validation
2821fc2361fSSzymon Olewniczak        $values = $this->readLine($line);
2831fc2361fSSzymon Olewniczak
2841fc2361fSSzymon Olewniczak        if ($values) {
2851fc2361fSSzymon Olewniczak            $this->saveLine($values, $line, $single, $multi);
2861fc2361fSSzymon Olewniczak        } else foreach ($this->errors as $error) {
2871fc2361fSSzymon Olewniczak            msg($error, -1);
2881fc2361fSSzymon Olewniczak        }
2891fc2361fSSzymon Olewniczak    }
290a0b3799eSAndreas Gohr}
291