xref: /plugin/struct/meta/CSVImporter.php (revision 1fc2361fe236472e7585162e283a99db231aaf7e)
1a0b3799eSAndreas Gohr<?php
2a0b3799eSAndreas Gohr
3a0b3799eSAndreas Gohrnamespace dokuwiki\plugin\struct\meta;
4a0b3799eSAndreas Gohr
5a0b3799eSAndreas Gohr/**
6f36cc634SAndreas Gohr * Class CSVImporter
7a0b3799eSAndreas Gohr *
8f36cc634SAndreas Gohr * Imports CSV data into a lookup schema
9a0b3799eSAndreas Gohr *
10a0b3799eSAndreas Gohr * @package dokuwiki\plugin\struct\meta
11a0b3799eSAndreas Gohr */
12*1fc2361fSSzymon Olewniczakabstract class CSVImporter {
13a0b3799eSAndreas Gohr
14a0b3799eSAndreas Gohr    /** @var  Schema */
15a0b3799eSAndreas Gohr    protected $schema;
16a0b3799eSAndreas Gohr
17a0b3799eSAndreas Gohr    /** @var  resource */
18a0b3799eSAndreas Gohr    protected $fh;
19a0b3799eSAndreas Gohr
20a0b3799eSAndreas Gohr    /** @var  \helper_plugin_sqlite */
21a0b3799eSAndreas Gohr    protected $sqlite;
22a0b3799eSAndreas Gohr
23a0b3799eSAndreas Gohr    /** @var Column[] The single values to store index => col */
24a0b3799eSAndreas Gohr    protected $columns = array();
25a0b3799eSAndreas Gohr
26a0b3799eSAndreas Gohr    /** @var int current line number */
27a0b3799eSAndreas Gohr    protected $line = 0;
28a0b3799eSAndreas Gohr
29*1fc2361fSSzymon Olewniczak    /** @var  list of headers */
30*1fc2361fSSzymon Olewniczak    protected $header;
31*1fc2361fSSzymon Olewniczak
32*1fc2361fSSzymon Olewniczak    /** @var  array list of validation errors */
33*1fc2361fSSzymon Olewniczak    protected $errors;
34*1fc2361fSSzymon Olewniczak
35a0b3799eSAndreas Gohr    /**
36a0b3799eSAndreas Gohr     * CSVImporter constructor.
37a0b3799eSAndreas Gohr     *
38a0b3799eSAndreas Gohr     * @throws StructException
39a0b3799eSAndreas Gohr     * @param string $table
40a0b3799eSAndreas Gohr     * @param string $file
41a0b3799eSAndreas Gohr     */
42a0b3799eSAndreas Gohr    public function __construct($table, $file) {
43a0b3799eSAndreas Gohr        $this->fh = fopen($file, 'r');
44a0b3799eSAndreas Gohr        if(!$this->fh) throw new StructException('Failed to open CSV file for reading');
45a0b3799eSAndreas Gohr
46a0b3799eSAndreas Gohr        $this->schema = new Schema($table);
47a0b3799eSAndreas Gohr        if(!$this->schema->getId()) throw new StructException('Schema does not exist');
48a0b3799eSAndreas Gohr
49a0b3799eSAndreas Gohr        /** @var \helper_plugin_struct_db $db */
50a0b3799eSAndreas Gohr        $db = plugin_load('helper', 'struct_db');
51a0b3799eSAndreas Gohr        $this->sqlite = $db->getDB(true);
52*1fc2361fSSzymon Olewniczak    }
53a0b3799eSAndreas Gohr
54*1fc2361fSSzymon Olewniczak    /**
55*1fc2361fSSzymon Olewniczak     * Import the data from file.
56*1fc2361fSSzymon Olewniczak     *
57*1fc2361fSSzymon Olewniczak     * @throws StructException
58*1fc2361fSSzymon Olewniczak     */
59*1fc2361fSSzymon Olewniczak    public function import() {
60a0b3799eSAndreas Gohr        // Do the import
61a0b3799eSAndreas Gohr        $this->readHeaders();
62a0b3799eSAndreas Gohr        $this->importCSV();
63a0b3799eSAndreas Gohr    }
64a0b3799eSAndreas Gohr
65a0b3799eSAndreas Gohr    /**
66a0b3799eSAndreas Gohr     * Read the CSV headers and match it with the Schema columns
67*1fc2361fSSzymon Olewniczak     *
68*1fc2361fSSzymon Olewniczak     * @return array headers of file
69a0b3799eSAndreas Gohr     */
70a0b3799eSAndreas Gohr    protected function readHeaders() {
71a0b3799eSAndreas Gohr        $header = fgetcsv($this->fh);
72a0b3799eSAndreas Gohr        if(!$header) throw new StructException('Failed to read CSV');
73a0b3799eSAndreas Gohr        $this->line++;
74a0b3799eSAndreas Gohr
75a0b3799eSAndreas Gohr        foreach($header as $i => $head) {
76a0b3799eSAndreas Gohr            $col = $this->schema->findColumn($head);
77a0b3799eSAndreas Gohr            if(!$col) continue;
78a0b3799eSAndreas Gohr            if(!$col->isEnabled()) continue;
79a0b3799eSAndreas Gohr            $this->columns[$i] = $col;
80a0b3799eSAndreas Gohr        }
81a0b3799eSAndreas Gohr
82a0b3799eSAndreas Gohr        if(!$this->columns) {
83a0b3799eSAndreas Gohr            throw new StructException('None of the CSV headers matched any of the schema\'s fields');
84a0b3799eSAndreas Gohr        }
85*1fc2361fSSzymon Olewniczak
86*1fc2361fSSzymon Olewniczak        $this->header = $header;
87a0b3799eSAndreas Gohr    }
88a0b3799eSAndreas Gohr
89a0b3799eSAndreas Gohr    /**
90a0b3799eSAndreas Gohr     * Creates the insert string for the single value table
91a0b3799eSAndreas Gohr     *
92a0b3799eSAndreas Gohr     * @return string
93a0b3799eSAndreas Gohr     */
94a0b3799eSAndreas Gohr    protected function getSQLforAllValues() {
95a0b3799eSAndreas Gohr        $colnames = array();
96a0b3799eSAndreas Gohr        $placeholds = array();
97a0b3799eSAndreas Gohr        foreach($this->columns as $i => $col) {
98a0b3799eSAndreas Gohr            $colnames[] = 'col' . $col->getColref();
99a0b3799eSAndreas Gohr            $placeholds[] = '?';
100a0b3799eSAndreas Gohr        }
101a0b3799eSAndreas Gohr        $colnames = join(', ', $colnames);
102a0b3799eSAndreas Gohr        $placeholds = join(', ', $placeholds);
103a0b3799eSAndreas Gohr        $table = $this->schema->getTable();
104a0b3799eSAndreas Gohr
105a0b3799eSAndreas Gohr        return "INSERT INTO data_$table ($colnames) VALUES ($placeholds)";
106a0b3799eSAndreas Gohr    }
107a0b3799eSAndreas Gohr
108a0b3799eSAndreas Gohr    /**
109a0b3799eSAndreas Gohr     * Creates the insert string for the multi value table
110a0b3799eSAndreas Gohr     *
111a0b3799eSAndreas Gohr     * @return string
112a0b3799eSAndreas Gohr     */
113a0b3799eSAndreas Gohr    protected function getSQLforMultiValue() {
114a0b3799eSAndreas Gohr        $table = $this->schema->getTable();
115a0b3799eSAndreas Gohr        /** @noinspection SqlResolve */
116a0b3799eSAndreas Gohr        return "INSERT INTO multi_$table (pid, colref, row, value) VALUES (?,?,?,?)";
117a0b3799eSAndreas Gohr    }
118a0b3799eSAndreas Gohr
119a0b3799eSAndreas Gohr    /**
120a0b3799eSAndreas Gohr     * Walks through the CSV and imports
121a0b3799eSAndreas Gohr     */
122a0b3799eSAndreas Gohr    protected function importCSV() {
123a0b3799eSAndreas Gohr
124a0b3799eSAndreas Gohr        $single = $this->getSQLforAllValues();
125a0b3799eSAndreas Gohr        $multi = $this->getSQLforMultiValue();
126a0b3799eSAndreas Gohr
127a0b3799eSAndreas Gohr        $this->sqlite->query('BEGIN TRANSACTION');
128a0b3799eSAndreas Gohr        while(($data = fgetcsv($this->fh)) !== false) {
129a0b3799eSAndreas Gohr            $this->line++;
130a0b3799eSAndreas Gohr            $this->importLine($data, $single, $multi);
131a0b3799eSAndreas Gohr        }
132a0b3799eSAndreas Gohr        $this->sqlite->query('COMMIT TRANSACTION');
133a0b3799eSAndreas Gohr    }
134a0b3799eSAndreas Gohr
135a0b3799eSAndreas Gohr    /**
136*1fc2361fSSzymon Olewniczak     * The errors that occured during validation
137a0b3799eSAndreas Gohr     *
138*1fc2361fSSzymon Olewniczak     * @return string[] already translated error messages
139a0b3799eSAndreas Gohr     */
140*1fc2361fSSzymon Olewniczak    public function getErrors() {
141*1fc2361fSSzymon Olewniczak        return $this->errors;
142*1fc2361fSSzymon Olewniczak    }
143*1fc2361fSSzymon Olewniczak
144*1fc2361fSSzymon Olewniczak    /**
145*1fc2361fSSzymon Olewniczak     * Validate a single value
146*1fc2361fSSzymon Olewniczak     *
147*1fc2361fSSzymon Olewniczak     * @param Column $col the column of that value
148*1fc2361fSSzymon Olewniczak     * @param mixed &$rawvalue the value, will be fixed according to the type
149*1fc2361fSSzymon Olewniczak     * @return bool true if the data validates, otherwise false
150*1fc2361fSSzymon Olewniczak     */
151*1fc2361fSSzymon Olewniczak    protected function validateValue(Column $col, &$rawvalue) {
152*1fc2361fSSzymon Olewniczak        //by default no validation
153*1fc2361fSSzymon Olewniczak        return true;
154*1fc2361fSSzymon Olewniczak    }
155*1fc2361fSSzymon Olewniczak
156*1fc2361fSSzymon Olewniczak    /**
157*1fc2361fSSzymon Olewniczak     * Read and validate CSV parsed line
158*1fc2361fSSzymon Olewniczak     *
159*1fc2361fSSzymon Olewniczak     * @param &$line
160*1fc2361fSSzymon Olewniczak     */
161*1fc2361fSSzymon Olewniczak    protected function readLine(&$line) {
162a0b3799eSAndreas Gohr        // prepare values for single value table
163a0b3799eSAndreas Gohr        $values = array();
164a0b3799eSAndreas Gohr        foreach($this->columns as $i => $column) {
165a0b3799eSAndreas Gohr            if(!isset($line[$i])) throw new StructException('Missing field at CSV line %d', $this->line);
166a0b3799eSAndreas Gohr
167*1fc2361fSSzymon Olewniczak            if(!$this->validateValue($column, $line[$i])) return false;
168*1fc2361fSSzymon Olewniczak
169a0b3799eSAndreas Gohr            if($column->isMulti()) {
170a0b3799eSAndreas Gohr                // multi values get split on comma
171a0b3799eSAndreas Gohr                $line[$i] = array_map('trim', explode(',', $line[$i]));
172a0b3799eSAndreas Gohr                $values[] = $line[$i][0];
173a0b3799eSAndreas Gohr            } else {
174a0b3799eSAndreas Gohr                $values[] = $line[$i];
175a0b3799eSAndreas Gohr            }
176a0b3799eSAndreas Gohr        }
177*1fc2361fSSzymon Olewniczak        //if no ok don't import
178*1fc2361fSSzymon Olewniczak        return $values;
179*1fc2361fSSzymon Olewniczak    }
180a0b3799eSAndreas Gohr
181*1fc2361fSSzymon Olewniczak    /**
182*1fc2361fSSzymon Olewniczak     * @param string[] $values
183*1fc2361fSSzymon Olewniczak     * @param string $single SQL for single table
184*1fc2361fSSzymon Olewniczak     *
185*1fc2361fSSzymon Olewniczak     * @return string pid
186*1fc2361fSSzymon Olewniczak     */
187*1fc2361fSSzymon Olewniczak    protected function insertIntoSingle($values, $single) {
188a0b3799eSAndreas Gohr        $this->sqlite->query($single, $values);
189a0b3799eSAndreas Gohr        $res = $this->sqlite->query('SELECT last_insert_rowid()');
190a0b3799eSAndreas Gohr        $pid = $this->sqlite->res2single($res);
191a0b3799eSAndreas Gohr        $this->sqlite->res_close($res);
192a0b3799eSAndreas Gohr
193*1fc2361fSSzymon Olewniczak        return $pid;
194*1fc2361fSSzymon Olewniczak    }
195*1fc2361fSSzymon Olewniczak
196*1fc2361fSSzymon Olewniczak    /**
197*1fc2361fSSzymon Olewniczak     * @param string $multi SQL for multi table
198*1fc2361fSSzymon Olewniczak     * @param $pid string
199*1fc2361fSSzymon Olewniczak     * @param $column string
200*1fc2361fSSzymon Olewniczak     * @param $row string
201*1fc2361fSSzymon Olewniczak     * @param $value string
202*1fc2361fSSzymon Olewniczak     */
203*1fc2361fSSzymon Olewniczak    protected function insertIntoMulti($multi, $pid, $column, $row, $value) {
204*1fc2361fSSzymon Olewniczak        $this->sqlite->query($multi, array($pid, $column->getColref(), $row + 1, $value));
205*1fc2361fSSzymon Olewniczak    }
206*1fc2361fSSzymon Olewniczak
207*1fc2361fSSzymon Olewniczak    /**
208*1fc2361fSSzymon Olewniczak     * @param string[] $values parsed line values
209*1fc2361fSSzymon Olewniczak     * @param string $single SQL for single table
210*1fc2361fSSzymon Olewniczak     * @param string $multi SQL for multi table
211*1fc2361fSSzymon Olewniczak     */
212*1fc2361fSSzymon Olewniczak    protected function saveLine($values, $line, $single, $multi) {
213*1fc2361fSSzymon Olewniczak        // insert into single value table (and create pid)
214*1fc2361fSSzymon Olewniczak        $pid = $this->insertIntoSingle($values, $single);
215*1fc2361fSSzymon Olewniczak
216a0b3799eSAndreas Gohr        // insert all the multi values
217a0b3799eSAndreas Gohr        foreach($this->columns as $i => $column) {
218a0b3799eSAndreas Gohr            if(!$column->isMulti()) continue;
219a0b3799eSAndreas Gohr            foreach($line[$i] as $row => $value) {
220*1fc2361fSSzymon Olewniczak                $this->insertIntoMulti($multi, $pid, $column, $row, $value);
221a0b3799eSAndreas Gohr            }
222a0b3799eSAndreas Gohr        }
223a0b3799eSAndreas Gohr    }
224a0b3799eSAndreas Gohr
225*1fc2361fSSzymon Olewniczak    /**
226*1fc2361fSSzymon Olewniczak     * Imports one line into the schema
227*1fc2361fSSzymon Olewniczak     *
228*1fc2361fSSzymon Olewniczak     * @param string[] $line the parsed CSV line
229*1fc2361fSSzymon Olewniczak     * @param string $single SQL for single table
230*1fc2361fSSzymon Olewniczak     * @param string $multi SQL for multi table
231*1fc2361fSSzymon Olewniczak     */
232*1fc2361fSSzymon Olewniczak    protected function importLine($line, $single, $multi) {
233*1fc2361fSSzymon Olewniczak        //read values, false if no validation
234*1fc2361fSSzymon Olewniczak        $values = $this->readLine($line);
235*1fc2361fSSzymon Olewniczak
236*1fc2361fSSzymon Olewniczak        if($values) {
237*1fc2361fSSzymon Olewniczak            $this->saveLine($values, $line, $single, $multi);
238*1fc2361fSSzymon Olewniczak        } else foreach($this->errors as $error) {
239*1fc2361fSSzymon Olewniczak            msg($error, -1);
240*1fc2361fSSzymon Olewniczak        }
241*1fc2361fSSzymon Olewniczak    }
242a0b3799eSAndreas Gohr}
243