xref: /plugin/struct/meta/CSVImporter.php (revision 4e4edb41089a43de9c0e39821afa8a55fe8f03ee)
1a0b3799eSAndreas Gohr<?php
2a0b3799eSAndreas Gohr
3a0b3799eSAndreas Gohrnamespace dokuwiki\plugin\struct\meta;
4a0b3799eSAndreas Gohr
5*4e4edb41SAnna Dabrowskause dokuwiki\plugin\struct\types\Page;
6*4e4edb41SAnna Dabrowska
7a0b3799eSAndreas Gohr/**
8f36cc634SAndreas Gohr * Class CSVImporter
9a0b3799eSAndreas Gohr *
10f36cc634SAndreas Gohr * Imports CSV data into a lookup schema
11a0b3799eSAndreas Gohr *
12a0b3799eSAndreas Gohr * @package dokuwiki\plugin\struct\meta
13a0b3799eSAndreas Gohr */
14*4e4edb41SAnna Dabrowskaclass CSVImporter
15d6d97f60SAnna Dabrowska{
16a0b3799eSAndreas Gohr
17a0b3799eSAndreas Gohr    /** @var  Schema */
18a0b3799eSAndreas Gohr    protected $schema;
19a0b3799eSAndreas Gohr
20a0b3799eSAndreas Gohr    /** @var  resource */
21a0b3799eSAndreas Gohr    protected $fh;
22a0b3799eSAndreas Gohr
23a0b3799eSAndreas Gohr    /** @var  \helper_plugin_sqlite */
24a0b3799eSAndreas Gohr    protected $sqlite;
25a0b3799eSAndreas Gohr
26a0b3799eSAndreas Gohr    /** @var Column[] The single values to store index => col */
27a0b3799eSAndreas Gohr    protected $columns = array();
28a0b3799eSAndreas Gohr
29a0b3799eSAndreas Gohr    /** @var int current line number */
30a0b3799eSAndreas Gohr    protected $line = 0;
31a0b3799eSAndreas Gohr
32*4e4edb41SAnna Dabrowska    /** @var array list of headers */
331fc2361fSSzymon Olewniczak    protected $header;
341fc2361fSSzymon Olewniczak
351fc2361fSSzymon Olewniczak    /** @var  array list of validation errors */
361fc2361fSSzymon Olewniczak    protected $errors;
371fc2361fSSzymon Olewniczak
38a0b3799eSAndreas Gohr    /**
39*4e4edb41SAnna Dabrowska     * @var string data type, must be one of page, lookup, serial
40*4e4edb41SAnna Dabrowska     */
41*4e4edb41SAnna Dabrowska    protected $type;
42*4e4edb41SAnna Dabrowska
43*4e4edb41SAnna Dabrowska    /**
44a0b3799eSAndreas Gohr     * CSVImporter constructor.
45a0b3799eSAndreas Gohr     *
46a0b3799eSAndreas Gohr     * @param string $table
47a0b3799eSAndreas Gohr     * @param string $file
48*4e4edb41SAnna Dabrowska     * @param string $type
49a0b3799eSAndreas Gohr     */
50*4e4edb41SAnna Dabrowska    public function __construct($table, $file, $type)
51d6d97f60SAnna Dabrowska    {
52*4e4edb41SAnna Dabrowska        $this->type = $type;
5317dbef8aSMichael Große        $this->openFile($file);
54a0b3799eSAndreas Gohr
55a0b3799eSAndreas Gohr        $this->schema = new Schema($table);
56a0b3799eSAndreas Gohr        if (!$this->schema->getId()) throw new StructException('Schema does not exist');
57a0b3799eSAndreas Gohr
58a0b3799eSAndreas Gohr        /** @var \helper_plugin_struct_db $db */
59a0b3799eSAndreas Gohr        $db = plugin_load('helper', 'struct_db');
60a0b3799eSAndreas Gohr        $this->sqlite = $db->getDB(true);
611fc2361fSSzymon Olewniczak    }
62a0b3799eSAndreas Gohr
631fc2361fSSzymon Olewniczak    /**
641fc2361fSSzymon Olewniczak     * Import the data from file.
651fc2361fSSzymon Olewniczak     *
661fc2361fSSzymon Olewniczak     * @throws StructException
671fc2361fSSzymon Olewniczak     */
68d6d97f60SAnna Dabrowska    public function import()
69d6d97f60SAnna Dabrowska    {
70a0b3799eSAndreas Gohr        // Do the import
71a0b3799eSAndreas Gohr        $this->readHeaders();
72a0b3799eSAndreas Gohr        $this->importCSV();
73a0b3799eSAndreas Gohr    }
74a0b3799eSAndreas Gohr
75a0b3799eSAndreas Gohr    /**
7617dbef8aSMichael Große     * Open a given file path
7717dbef8aSMichael Große     *
7817dbef8aSMichael Große     * The main purpose of this method is to be overridden in a mock for testing
7917dbef8aSMichael Große     *
8017dbef8aSMichael Große     * @param string $file the file path
8117dbef8aSMichael Große     *
8217dbef8aSMichael Große     * @return void
8317dbef8aSMichael Große     */
8417dbef8aSMichael Große    protected function openFile($file)
8517dbef8aSMichael Große    {
8617dbef8aSMichael Große        $this->fh = fopen($file, 'rb');
8717dbef8aSMichael Große        if (!$this->fh) {
8817dbef8aSMichael Große            throw new StructException('Failed to open CSV file for reading');
8917dbef8aSMichael Große        }
9017dbef8aSMichael Große    }
9117dbef8aSMichael Große
9217dbef8aSMichael Große    /**
9317dbef8aSMichael Große     * Get a parsed line from the opened CSV file
9417dbef8aSMichael Große     *
9517dbef8aSMichael Große     * The main purpose of this method is to be overridden in a mock for testing
9617dbef8aSMichael Große     *
9717dbef8aSMichael Große     * @return array|false|null
9817dbef8aSMichael Große     */
9917dbef8aSMichael Große    protected function getLine()
10017dbef8aSMichael Große    {
10117dbef8aSMichael Große        return fgetcsv($this->fh);
10217dbef8aSMichael Große    }
10317dbef8aSMichael Große
10417dbef8aSMichael Große    /**
105a0b3799eSAndreas Gohr     * Read the CSV headers and match it with the Schema columns
1061fc2361fSSzymon Olewniczak     *
1071fc2361fSSzymon Olewniczak     * @return array headers of file
108a0b3799eSAndreas Gohr     */
109d6d97f60SAnna Dabrowska    protected function readHeaders()
110d6d97f60SAnna Dabrowska    {
11117dbef8aSMichael Große        $header = $this->getLine();
112a0b3799eSAndreas Gohr        if (!$header) throw new StructException('Failed to read CSV');
113a0b3799eSAndreas Gohr        $this->line++;
114a0b3799eSAndreas Gohr
115*4e4edb41SAnna Dabrowska        // FIXME we might have to create a page column first
116*4e4edb41SAnna Dabrowska        if ($this->type !== 'lookup') {
117*4e4edb41SAnna Dabrowska            $pageType = new Page(null, 'pid');
118*4e4edb41SAnna Dabrowska            $pidCol = new Column(0, $pageType, 0, true, $this->schema->getTable());
119*4e4edb41SAnna Dabrowska            $this->columns[] = $pidCol;
120*4e4edb41SAnna Dabrowska        }
121*4e4edb41SAnna Dabrowska
122a0b3799eSAndreas Gohr        foreach ($header as $i => $head) {
123a0b3799eSAndreas Gohr            $col = $this->schema->findColumn($head);
124a0b3799eSAndreas Gohr            if (!$col) continue;
125a0b3799eSAndreas Gohr            if (!$col->isEnabled()) continue;
126a0b3799eSAndreas Gohr            $this->columns[$i] = $col;
127a0b3799eSAndreas Gohr        }
128a0b3799eSAndreas Gohr
129a0b3799eSAndreas Gohr        if (!$this->columns) {
130a0b3799eSAndreas Gohr            throw new StructException('None of the CSV headers matched any of the schema\'s fields');
131a0b3799eSAndreas Gohr        }
1321fc2361fSSzymon Olewniczak
1331fc2361fSSzymon Olewniczak        $this->header = $header;
134a0b3799eSAndreas Gohr    }
135a0b3799eSAndreas Gohr
136a0b3799eSAndreas Gohr    /**
137a0b3799eSAndreas Gohr     * Creates the insert string for the single value table
138a0b3799eSAndreas Gohr     *
139a0b3799eSAndreas Gohr     * @return string
140a0b3799eSAndreas Gohr     */
141d6d97f60SAnna Dabrowska    protected function getSQLforAllValues()
142d6d97f60SAnna Dabrowska    {
143a0b3799eSAndreas Gohr        $colnames = array();
144a0b3799eSAndreas Gohr        $placeholds = array();
145a0b3799eSAndreas Gohr        foreach ($this->columns as $i => $col) {
146a0b3799eSAndreas Gohr            $colnames[] = 'col' . $col->getColref();
147a0b3799eSAndreas Gohr            $placeholds[] = '?';
148a0b3799eSAndreas Gohr        }
149a0b3799eSAndreas Gohr        $colnames = join(', ', $colnames);
150a0b3799eSAndreas Gohr        $placeholds = join(', ', $placeholds);
151a0b3799eSAndreas Gohr        $table = $this->schema->getTable();
152a0b3799eSAndreas Gohr
153a0b3799eSAndreas Gohr        return "INSERT INTO data_$table ($colnames) VALUES ($placeholds)";
154a0b3799eSAndreas Gohr    }
155a0b3799eSAndreas Gohr
156a0b3799eSAndreas Gohr    /**
157a0b3799eSAndreas Gohr     * Creates the insert string for the multi value table
158a0b3799eSAndreas Gohr     *
159a0b3799eSAndreas Gohr     * @return string
160a0b3799eSAndreas Gohr     */
161d6d97f60SAnna Dabrowska    protected function getSQLforMultiValue()
162d6d97f60SAnna Dabrowska    {
163a0b3799eSAndreas Gohr        $table = $this->schema->getTable();
164a0b3799eSAndreas Gohr        /** @noinspection SqlResolve */
165a0b3799eSAndreas Gohr        return "INSERT INTO multi_$table (pid, colref, row, value) VALUES (?,?,?,?)";
166a0b3799eSAndreas Gohr    }
167a0b3799eSAndreas Gohr
168a0b3799eSAndreas Gohr    /**
169a0b3799eSAndreas Gohr     * Walks through the CSV and imports
170a0b3799eSAndreas Gohr     */
171d6d97f60SAnna Dabrowska    protected function importCSV()
172d6d97f60SAnna Dabrowska    {
173a0b3799eSAndreas Gohr
174*4e4edb41SAnna Dabrowska        // FIXME those are hopefully never used and can be removed soon
175a0b3799eSAndreas Gohr        $single = $this->getSQLforAllValues();
176a0b3799eSAndreas Gohr        $multi = $this->getSQLforMultiValue();
177a0b3799eSAndreas Gohr
17817dbef8aSMichael Große        while (($data = $this->getLine()) !== false) {
179*4e4edb41SAnna Dabrowska//            $this->sqlite->query('BEGIN TRANSACTION');
180a0b3799eSAndreas Gohr            $this->line++;
181a0b3799eSAndreas Gohr            $this->importLine($data, $single, $multi);
182*4e4edb41SAnna Dabrowska//            $this->sqlite->query('COMMIT TRANSACTION');
183a0b3799eSAndreas Gohr        }
1840e489a46SSzymon Olewniczak    }
185a0b3799eSAndreas Gohr
186a0b3799eSAndreas Gohr    /**
1871fc2361fSSzymon Olewniczak     * The errors that occured during validation
188a0b3799eSAndreas Gohr     *
1891fc2361fSSzymon Olewniczak     * @return string[] already translated error messages
190a0b3799eSAndreas Gohr     */
191d6d97f60SAnna Dabrowska    public function getErrors()
192d6d97f60SAnna Dabrowska    {
1931fc2361fSSzymon Olewniczak        return $this->errors;
1941fc2361fSSzymon Olewniczak    }
1951fc2361fSSzymon Olewniczak
1961fc2361fSSzymon Olewniczak    /**
1971fc2361fSSzymon Olewniczak     * Validate a single value
1981fc2361fSSzymon Olewniczak     *
1991fc2361fSSzymon Olewniczak     * @param Column $col the column of that value
2001fc2361fSSzymon Olewniczak     * @param mixed &$rawvalue the value, will be fixed according to the type
2011fc2361fSSzymon Olewniczak     * @return bool true if the data validates, otherwise false
2021fc2361fSSzymon Olewniczak     */
203d6d97f60SAnna Dabrowska    protected function validateValue(Column $col, &$rawvalue)
204d6d97f60SAnna Dabrowska    {
2051fc2361fSSzymon Olewniczak        //by default no validation
2061fc2361fSSzymon Olewniczak        return true;
2071fc2361fSSzymon Olewniczak    }
2081fc2361fSSzymon Olewniczak
2091fc2361fSSzymon Olewniczak    /**
2101fc2361fSSzymon Olewniczak     * Read and validate CSV parsed line
2111fc2361fSSzymon Olewniczak     *
2121fc2361fSSzymon Olewniczak     * @param &$line
2131fc2361fSSzymon Olewniczak     */
214d6d97f60SAnna Dabrowska    protected function readLine(&$line)
215d6d97f60SAnna Dabrowska    {
216a0b3799eSAndreas Gohr        // prepare values for single value table
217a0b3799eSAndreas Gohr        $values = array();
218a0b3799eSAndreas Gohr        foreach ($this->columns as $i => $column) {
219a0b3799eSAndreas Gohr            if (!isset($line[$i])) throw new StructException('Missing field at CSV line %d', $this->line);
220a0b3799eSAndreas Gohr
2211fc2361fSSzymon Olewniczak            if (!$this->validateValue($column, $line[$i])) return false;
2221fc2361fSSzymon Olewniczak
223a0b3799eSAndreas Gohr            if ($column->isMulti()) {
224*4e4edb41SAnna Dabrowska                // FIXME don't split JSON values, they contain commas! we need something more clever
225a0b3799eSAndreas Gohr                // multi values get split on comma
226a0b3799eSAndreas Gohr                $line[$i] = array_map('trim', explode(',', $line[$i]));
227a0b3799eSAndreas Gohr                $values[] = $line[$i][0];
228a0b3799eSAndreas Gohr            } else {
229a0b3799eSAndreas Gohr                $values[] = $line[$i];
230a0b3799eSAndreas Gohr            }
231a0b3799eSAndreas Gohr        }
2321fc2361fSSzymon Olewniczak        //if no ok don't import
2331fc2361fSSzymon Olewniczak        return $values;
2341fc2361fSSzymon Olewniczak    }
235a0b3799eSAndreas Gohr
2361fc2361fSSzymon Olewniczak    /**
23769c07c1bSSzymon Olewniczak     * INSERT $values into data_* table
23869c07c1bSSzymon Olewniczak     *
2391fc2361fSSzymon Olewniczak     * @param string[] $values
2401fc2361fSSzymon Olewniczak     * @param string $single SQL for single table
2411fc2361fSSzymon Olewniczak     *
24269c07c1bSSzymon Olewniczak     * @return string last_insert_rowid()
2431fc2361fSSzymon Olewniczak     */
244d6d97f60SAnna Dabrowska    protected function insertIntoSingle($values, $single)
245d6d97f60SAnna Dabrowska    {
246a0b3799eSAndreas Gohr        $this->sqlite->query($single, $values);
247a0b3799eSAndreas Gohr        $res = $this->sqlite->query('SELECT last_insert_rowid()');
248a0b3799eSAndreas Gohr        $pid = $this->sqlite->res2single($res);
249a0b3799eSAndreas Gohr        $this->sqlite->res_close($res);
250a0b3799eSAndreas Gohr
2511fc2361fSSzymon Olewniczak        return $pid;
2521fc2361fSSzymon Olewniczak    }
2531fc2361fSSzymon Olewniczak
2541fc2361fSSzymon Olewniczak    /**
25569c07c1bSSzymon Olewniczak     * INSERT one row into multi_* table
25669c07c1bSSzymon Olewniczak     *
2571fc2361fSSzymon Olewniczak     * @param string $multi SQL for multi table
2581fc2361fSSzymon Olewniczak     * @param $pid string
2591fc2361fSSzymon Olewniczak     * @param $column string
2601fc2361fSSzymon Olewniczak     * @param $row string
2611fc2361fSSzymon Olewniczak     * @param $value string
2621fc2361fSSzymon Olewniczak     */
263d6d97f60SAnna Dabrowska    protected function insertIntoMulti($multi, $pid, $column, $row, $value)
264d6d97f60SAnna Dabrowska    {
2651fc2361fSSzymon Olewniczak        $this->sqlite->query($multi, array($pid, $column->getColref(), $row + 1, $value));
2661fc2361fSSzymon Olewniczak    }
2671fc2361fSSzymon Olewniczak
2681fc2361fSSzymon Olewniczak    /**
26969c07c1bSSzymon Olewniczak     * Save one CSV line into database
27069c07c1bSSzymon Olewniczak     *
2711fc2361fSSzymon Olewniczak     * @param string[] $values parsed line values
2721fc2361fSSzymon Olewniczak     * @param string $single SQL for single table
2731fc2361fSSzymon Olewniczak     * @param string $multi SQL for multi table
2741fc2361fSSzymon Olewniczak     */
275d6d97f60SAnna Dabrowska    protected function saveLine($values, $line, $single, $multi)
276d6d97f60SAnna Dabrowska    {
2771fc2361fSSzymon Olewniczak        // insert into single value table (and create pid)
2781fc2361fSSzymon Olewniczak        $pid = $this->insertIntoSingle($values, $single);
2791fc2361fSSzymon Olewniczak
280a0b3799eSAndreas Gohr        // insert all the multi values
281a0b3799eSAndreas Gohr        foreach ($this->columns as $i => $column) {
282a0b3799eSAndreas Gohr            if (!$column->isMulti()) continue;
283a0b3799eSAndreas Gohr            foreach ($line[$i] as $row => $value) {
2841fc2361fSSzymon Olewniczak                $this->insertIntoMulti($multi, $pid, $column, $row, $value);
285a0b3799eSAndreas Gohr            }
286a0b3799eSAndreas Gohr        }
287a0b3799eSAndreas Gohr    }
288a0b3799eSAndreas Gohr
2891fc2361fSSzymon Olewniczak    /**
2901fc2361fSSzymon Olewniczak     * Imports one line into the schema
2911fc2361fSSzymon Olewniczak     *
2921fc2361fSSzymon Olewniczak     * @param string[] $line the parsed CSV line
2931fc2361fSSzymon Olewniczak     * @param string $single SQL for single table
2941fc2361fSSzymon Olewniczak     * @param string $multi SQL for multi table
2951fc2361fSSzymon Olewniczak     */
296d6d97f60SAnna Dabrowska    protected function importLine($line, $single, $multi)
297d6d97f60SAnna Dabrowska    {
2981fc2361fSSzymon Olewniczak        //read values, false if no validation
2991fc2361fSSzymon Olewniczak        $values = $this->readLine($line);
3001fc2361fSSzymon Olewniczak
3011fc2361fSSzymon Olewniczak        if ($values) {
302*4e4edb41SAnna Dabrowska            // FIXME trying to bypass another custom SQL query string
303*4e4edb41SAnna Dabrowska//            $this->saveLine($values, $line, $single, $multi);
304*4e4edb41SAnna Dabrowska
305*4e4edb41SAnna Dabrowska            $data = array_combine($this->header, $values);
306*4e4edb41SAnna Dabrowska            // pid is a non-data column and must be supplied to the AccessTable separately
307*4e4edb41SAnna Dabrowska            $pid = isset($data['pid']) ? $data['pid'] : '';
308*4e4edb41SAnna Dabrowska            unset($data['pid']);
309*4e4edb41SAnna Dabrowska            $table = $this->schema->getTable();
310*4e4edb41SAnna Dabrowska            $access = AccessTable::byTableName($table, $pid);
311*4e4edb41SAnna Dabrowska
312*4e4edb41SAnna Dabrowska            /** @var 'helper_plugin_struct $helper */
313*4e4edb41SAnna Dabrowska            $helper = plugin_load('helper', 'struct');
314*4e4edb41SAnna Dabrowska            $helper->saveLookupData($access, $data);
3151fc2361fSSzymon Olewniczak        } else foreach ($this->errors as $error) {
3161fc2361fSSzymon Olewniczak            msg($error, -1);
3171fc2361fSSzymon Olewniczak        }
3181fc2361fSSzymon Olewniczak    }
319a0b3799eSAndreas Gohr}
320