xref: /plugin/struct/meta/CSVImporter.php (revision 17dbef8af8e73f5c6a6bdbfcb629a48d85005bd7)
1a0b3799eSAndreas Gohr<?php
2a0b3799eSAndreas Gohr
3a0b3799eSAndreas Gohrnamespace dokuwiki\plugin\struct\meta;
4a0b3799eSAndreas Gohr
5a0b3799eSAndreas Gohr/**
6f36cc634SAndreas Gohr * Class CSVImporter
7a0b3799eSAndreas Gohr *
8f36cc634SAndreas Gohr * Imports CSV data into a lookup schema
9a0b3799eSAndreas Gohr *
10a0b3799eSAndreas Gohr * @package dokuwiki\plugin\struct\meta
11a0b3799eSAndreas Gohr */
121fc2361fSSzymon Olewniczakabstract class CSVImporter {
13a0b3799eSAndreas Gohr
14a0b3799eSAndreas Gohr    /** @var  Schema */
15a0b3799eSAndreas Gohr    protected $schema;
16a0b3799eSAndreas Gohr
17a0b3799eSAndreas Gohr    /** @var  resource */
18a0b3799eSAndreas Gohr    protected $fh;
19a0b3799eSAndreas Gohr
20a0b3799eSAndreas Gohr    /** @var  \helper_plugin_sqlite */
21a0b3799eSAndreas Gohr    protected $sqlite;
22a0b3799eSAndreas Gohr
23a0b3799eSAndreas Gohr    /** @var Column[] The single values to store index => col */
24a0b3799eSAndreas Gohr    protected $columns = array();
25a0b3799eSAndreas Gohr
26a0b3799eSAndreas Gohr    /** @var int current line number */
27a0b3799eSAndreas Gohr    protected $line = 0;
28a0b3799eSAndreas Gohr
291fc2361fSSzymon Olewniczak    /** @var  list of headers */
301fc2361fSSzymon Olewniczak    protected $header;
311fc2361fSSzymon Olewniczak
321fc2361fSSzymon Olewniczak    /** @var  array list of validation errors */
331fc2361fSSzymon Olewniczak    protected $errors;
341fc2361fSSzymon Olewniczak
35a0b3799eSAndreas Gohr    /**
36a0b3799eSAndreas Gohr     * CSVImporter constructor.
37a0b3799eSAndreas Gohr     *
38a0b3799eSAndreas Gohr     * @throws StructException
39a0b3799eSAndreas Gohr     * @param string $table
40a0b3799eSAndreas Gohr     * @param string $file
41a0b3799eSAndreas Gohr     */
42a0b3799eSAndreas Gohr    public function __construct($table, $file) {
43*17dbef8aSMichael Große        $this->openFile($file);
44a0b3799eSAndreas Gohr
45a0b3799eSAndreas Gohr        $this->schema = new Schema($table);
46a0b3799eSAndreas Gohr        if(!$this->schema->getId()) throw new StructException('Schema does not exist');
47a0b3799eSAndreas Gohr
48a0b3799eSAndreas Gohr        /** @var \helper_plugin_struct_db $db */
49a0b3799eSAndreas Gohr        $db = plugin_load('helper', 'struct_db');
50a0b3799eSAndreas Gohr        $this->sqlite = $db->getDB(true);
511fc2361fSSzymon Olewniczak    }
52a0b3799eSAndreas Gohr
531fc2361fSSzymon Olewniczak    /**
541fc2361fSSzymon Olewniczak     * Import the data from file.
551fc2361fSSzymon Olewniczak     *
561fc2361fSSzymon Olewniczak     * @throws StructException
571fc2361fSSzymon Olewniczak     */
581fc2361fSSzymon Olewniczak    public function import() {
59a0b3799eSAndreas Gohr        // Do the import
60a0b3799eSAndreas Gohr        $this->readHeaders();
61a0b3799eSAndreas Gohr        $this->importCSV();
62a0b3799eSAndreas Gohr    }
63a0b3799eSAndreas Gohr
64a0b3799eSAndreas Gohr    /**
65*17dbef8aSMichael Große     * Open a given file path
66*17dbef8aSMichael Große     *
67*17dbef8aSMichael Große     * The main purpose of this method is to be overridden in a mock for testing
68*17dbef8aSMichael Große     *
69*17dbef8aSMichael Große     * @param string $file the file path
70*17dbef8aSMichael Große     *
71*17dbef8aSMichael Große     * @return void
72*17dbef8aSMichael Große     */
73*17dbef8aSMichael Große    protected function openFile($file)
74*17dbef8aSMichael Große    {
75*17dbef8aSMichael Große        $this->fh = fopen($file, 'rb');
76*17dbef8aSMichael Große        if(!$this->fh) {
77*17dbef8aSMichael Große            throw new StructException('Failed to open CSV file for reading');
78*17dbef8aSMichael Große        }
79*17dbef8aSMichael Große    }
80*17dbef8aSMichael Große
81*17dbef8aSMichael Große    /**
82*17dbef8aSMichael Große     * Get a parsed line from the opened CSV file
83*17dbef8aSMichael Große     *
84*17dbef8aSMichael Große     * The main purpose of this method is to be overridden in a mock for testing
85*17dbef8aSMichael Große     *
86*17dbef8aSMichael Große     * @return array|false|null
87*17dbef8aSMichael Große     */
88*17dbef8aSMichael Große    protected function getLine()
89*17dbef8aSMichael Große    {
90*17dbef8aSMichael Große        return fgetcsv($this->fh);
91*17dbef8aSMichael Große    }
92*17dbef8aSMichael Große
93*17dbef8aSMichael Große    /**
94a0b3799eSAndreas Gohr     * Read the CSV headers and match it with the Schema columns
951fc2361fSSzymon Olewniczak     *
961fc2361fSSzymon Olewniczak     * @return array headers of file
97a0b3799eSAndreas Gohr     */
98a0b3799eSAndreas Gohr    protected function readHeaders() {
99*17dbef8aSMichael Große        $header = $this->getLine();
100a0b3799eSAndreas Gohr        if(!$header) throw new StructException('Failed to read CSV');
101a0b3799eSAndreas Gohr        $this->line++;
102a0b3799eSAndreas Gohr
103a0b3799eSAndreas Gohr        foreach($header as $i => $head) {
104a0b3799eSAndreas Gohr            $col = $this->schema->findColumn($head);
105a0b3799eSAndreas Gohr            if(!$col) continue;
106a0b3799eSAndreas Gohr            if(!$col->isEnabled()) continue;
107a0b3799eSAndreas Gohr            $this->columns[$i] = $col;
108a0b3799eSAndreas Gohr        }
109a0b3799eSAndreas Gohr
110a0b3799eSAndreas Gohr        if(!$this->columns) {
111a0b3799eSAndreas Gohr            throw new StructException('None of the CSV headers matched any of the schema\'s fields');
112a0b3799eSAndreas Gohr        }
1131fc2361fSSzymon Olewniczak
1141fc2361fSSzymon Olewniczak        $this->header = $header;
115a0b3799eSAndreas Gohr    }
116a0b3799eSAndreas Gohr
117a0b3799eSAndreas Gohr    /**
118a0b3799eSAndreas Gohr     * Creates the insert string for the single value table
119a0b3799eSAndreas Gohr     *
120a0b3799eSAndreas Gohr     * @return string
121a0b3799eSAndreas Gohr     */
122a0b3799eSAndreas Gohr    protected function getSQLforAllValues() {
123a0b3799eSAndreas Gohr        $colnames = array();
124a0b3799eSAndreas Gohr        $placeholds = array();
125a0b3799eSAndreas Gohr        foreach($this->columns as $i => $col) {
126a0b3799eSAndreas Gohr            $colnames[] = 'col' . $col->getColref();
127a0b3799eSAndreas Gohr            $placeholds[] = '?';
128a0b3799eSAndreas Gohr        }
129a0b3799eSAndreas Gohr        $colnames = join(', ', $colnames);
130a0b3799eSAndreas Gohr        $placeholds = join(', ', $placeholds);
131a0b3799eSAndreas Gohr        $table = $this->schema->getTable();
132a0b3799eSAndreas Gohr
133a0b3799eSAndreas Gohr        return "INSERT INTO data_$table ($colnames) VALUES ($placeholds)";
134a0b3799eSAndreas Gohr    }
135a0b3799eSAndreas Gohr
136a0b3799eSAndreas Gohr    /**
137a0b3799eSAndreas Gohr     * Creates the insert string for the multi value table
138a0b3799eSAndreas Gohr     *
139a0b3799eSAndreas Gohr     * @return string
140a0b3799eSAndreas Gohr     */
141a0b3799eSAndreas Gohr    protected function getSQLforMultiValue() {
142a0b3799eSAndreas Gohr        $table = $this->schema->getTable();
143a0b3799eSAndreas Gohr        /** @noinspection SqlResolve */
144a0b3799eSAndreas Gohr        return "INSERT INTO multi_$table (pid, colref, row, value) VALUES (?,?,?,?)";
145a0b3799eSAndreas Gohr    }
146a0b3799eSAndreas Gohr
147a0b3799eSAndreas Gohr    /**
148a0b3799eSAndreas Gohr     * Walks through the CSV and imports
149a0b3799eSAndreas Gohr     */
150a0b3799eSAndreas Gohr    protected function importCSV() {
151a0b3799eSAndreas Gohr
152a0b3799eSAndreas Gohr        $single = $this->getSQLforAllValues();
153a0b3799eSAndreas Gohr        $multi = $this->getSQLforMultiValue();
154a0b3799eSAndreas Gohr
155*17dbef8aSMichael Große        while(($data = $this->getLine()) !== false) {
1560e489a46SSzymon Olewniczak            $this->sqlite->query('BEGIN TRANSACTION');
157a0b3799eSAndreas Gohr            $this->line++;
158a0b3799eSAndreas Gohr            $this->importLine($data, $single, $multi);
159a0b3799eSAndreas Gohr            $this->sqlite->query('COMMIT TRANSACTION');
160a0b3799eSAndreas Gohr        }
1610e489a46SSzymon Olewniczak    }
162a0b3799eSAndreas Gohr
163a0b3799eSAndreas Gohr    /**
1641fc2361fSSzymon Olewniczak     * The errors that occured during validation
165a0b3799eSAndreas Gohr     *
1661fc2361fSSzymon Olewniczak     * @return string[] already translated error messages
167a0b3799eSAndreas Gohr     */
1681fc2361fSSzymon Olewniczak    public function getErrors() {
1691fc2361fSSzymon Olewniczak        return $this->errors;
1701fc2361fSSzymon Olewniczak    }
1711fc2361fSSzymon Olewniczak
1721fc2361fSSzymon Olewniczak    /**
1731fc2361fSSzymon Olewniczak     * Validate a single value
1741fc2361fSSzymon Olewniczak     *
1751fc2361fSSzymon Olewniczak     * @param Column $col the column of that value
1761fc2361fSSzymon Olewniczak     * @param mixed &$rawvalue the value, will be fixed according to the type
1771fc2361fSSzymon Olewniczak     * @return bool true if the data validates, otherwise false
1781fc2361fSSzymon Olewniczak     */
1791fc2361fSSzymon Olewniczak    protected function validateValue(Column $col, &$rawvalue) {
1801fc2361fSSzymon Olewniczak        //by default no validation
1811fc2361fSSzymon Olewniczak        return true;
1821fc2361fSSzymon Olewniczak    }
1831fc2361fSSzymon Olewniczak
1841fc2361fSSzymon Olewniczak    /**
1851fc2361fSSzymon Olewniczak     * Read and validate CSV parsed line
1861fc2361fSSzymon Olewniczak     *
1871fc2361fSSzymon Olewniczak     * @param &$line
1881fc2361fSSzymon Olewniczak     */
1891fc2361fSSzymon Olewniczak    protected function readLine(&$line) {
190a0b3799eSAndreas Gohr        // prepare values for single value table
191a0b3799eSAndreas Gohr        $values = array();
192a0b3799eSAndreas Gohr        foreach($this->columns as $i => $column) {
193a0b3799eSAndreas Gohr            if(!isset($line[$i])) throw new StructException('Missing field at CSV line %d', $this->line);
194a0b3799eSAndreas Gohr
1951fc2361fSSzymon Olewniczak            if(!$this->validateValue($column, $line[$i])) return false;
1961fc2361fSSzymon Olewniczak
197a0b3799eSAndreas Gohr            if($column->isMulti()) {
198a0b3799eSAndreas Gohr                // multi values get split on comma
199a0b3799eSAndreas Gohr                $line[$i] = array_map('trim', explode(',', $line[$i]));
200a0b3799eSAndreas Gohr                $values[] = $line[$i][0];
201a0b3799eSAndreas Gohr            } else {
202a0b3799eSAndreas Gohr                $values[] = $line[$i];
203a0b3799eSAndreas Gohr            }
204a0b3799eSAndreas Gohr        }
2051fc2361fSSzymon Olewniczak        //if no ok don't import
2061fc2361fSSzymon Olewniczak        return $values;
2071fc2361fSSzymon Olewniczak    }
208a0b3799eSAndreas Gohr
2091fc2361fSSzymon Olewniczak    /**
21069c07c1bSSzymon Olewniczak     * INSERT $values into data_* table
21169c07c1bSSzymon Olewniczak     *
2121fc2361fSSzymon Olewniczak     * @param string[] $values
2131fc2361fSSzymon Olewniczak     * @param string $single SQL for single table
2141fc2361fSSzymon Olewniczak     *
21569c07c1bSSzymon Olewniczak     * @return string last_insert_rowid()
2161fc2361fSSzymon Olewniczak     */
2171fc2361fSSzymon Olewniczak    protected function insertIntoSingle($values, $single) {
218a0b3799eSAndreas Gohr        $this->sqlite->query($single, $values);
219a0b3799eSAndreas Gohr        $res = $this->sqlite->query('SELECT last_insert_rowid()');
220a0b3799eSAndreas Gohr        $pid = $this->sqlite->res2single($res);
221a0b3799eSAndreas Gohr        $this->sqlite->res_close($res);
222a0b3799eSAndreas Gohr
2231fc2361fSSzymon Olewniczak        return $pid;
2241fc2361fSSzymon Olewniczak    }
2251fc2361fSSzymon Olewniczak
2261fc2361fSSzymon Olewniczak    /**
22769c07c1bSSzymon Olewniczak     * INSERT one row into multi_* table
22869c07c1bSSzymon Olewniczak     *
2291fc2361fSSzymon Olewniczak     * @param string $multi SQL for multi table
2301fc2361fSSzymon Olewniczak     * @param $pid string
2311fc2361fSSzymon Olewniczak     * @param $column string
2321fc2361fSSzymon Olewniczak     * @param $row string
2331fc2361fSSzymon Olewniczak     * @param $value string
2341fc2361fSSzymon Olewniczak     */
2351fc2361fSSzymon Olewniczak    protected function insertIntoMulti($multi, $pid, $column, $row, $value) {
2361fc2361fSSzymon Olewniczak        $this->sqlite->query($multi, array($pid, $column->getColref(), $row + 1, $value));
2371fc2361fSSzymon Olewniczak    }
2381fc2361fSSzymon Olewniczak
2391fc2361fSSzymon Olewniczak    /**
24069c07c1bSSzymon Olewniczak     * Save one CSV line into database
24169c07c1bSSzymon Olewniczak     *
2421fc2361fSSzymon Olewniczak     * @param string[] $values parsed line values
2431fc2361fSSzymon Olewniczak     * @param string $single SQL for single table
2441fc2361fSSzymon Olewniczak     * @param string $multi SQL for multi table
2451fc2361fSSzymon Olewniczak     */
2461fc2361fSSzymon Olewniczak    protected function saveLine($values, $line, $single, $multi) {
2471fc2361fSSzymon Olewniczak        // insert into single value table (and create pid)
2481fc2361fSSzymon Olewniczak        $pid = $this->insertIntoSingle($values, $single);
2491fc2361fSSzymon Olewniczak
250a0b3799eSAndreas Gohr        // insert all the multi values
251a0b3799eSAndreas Gohr        foreach($this->columns as $i => $column) {
252a0b3799eSAndreas Gohr            if(!$column->isMulti()) continue;
253a0b3799eSAndreas Gohr            foreach($line[$i] as $row => $value) {
2541fc2361fSSzymon Olewniczak                $this->insertIntoMulti($multi, $pid, $column, $row, $value);
255a0b3799eSAndreas Gohr            }
256a0b3799eSAndreas Gohr        }
257a0b3799eSAndreas Gohr    }
258a0b3799eSAndreas Gohr
2591fc2361fSSzymon Olewniczak    /**
2601fc2361fSSzymon Olewniczak     * Imports one line into the schema
2611fc2361fSSzymon Olewniczak     *
2621fc2361fSSzymon Olewniczak     * @param string[] $line the parsed CSV line
2631fc2361fSSzymon Olewniczak     * @param string $single SQL for single table
2641fc2361fSSzymon Olewniczak     * @param string $multi SQL for multi table
2651fc2361fSSzymon Olewniczak     */
2661fc2361fSSzymon Olewniczak    protected function importLine($line, $single, $multi) {
2671fc2361fSSzymon Olewniczak        //read values, false if no validation
2681fc2361fSSzymon Olewniczak        $values = $this->readLine($line);
2691fc2361fSSzymon Olewniczak
2701fc2361fSSzymon Olewniczak        if($values) {
2711fc2361fSSzymon Olewniczak            $this->saveLine($values, $line, $single, $multi);
2721fc2361fSSzymon Olewniczak        } else foreach($this->errors as $error) {
2731fc2361fSSzymon Olewniczak            msg($error, -1);
2741fc2361fSSzymon Olewniczak        }
2751fc2361fSSzymon Olewniczak    }
276a0b3799eSAndreas Gohr}
277