1<?php
2/*
3description : Dokuwiki PubMed2020 plugin
4author      : Eric Maeker
5email       : eric.maeker@gmail.com
6lastupdate  : 2020-06-05
7license     : Public-Domain
8*/
9
10if(!defined('DOKU_INC')) die();
11
12class pubmed2020_cache {
13  var $namespace  ='';
14  var $mediaDir   ='';
15  var $mediaFormat='';
16  var $linkFormat ='';
17  var $prefix     ='';
18  var $extension  ='';
19  var $tmpdir     ='';
20  var $crossRefId =''; // Crossreference file PMID <-> DOI
21  var $pdfDoiNS   =''; // Saving PDF using DOI.pdf
22                       // All files in this path must be named
23                       // {DOI}.pdf with a replacement of any '/' using '_'
24  var $pdfPmidNS  =''; // Saving PDF using PMID.pdf
25                       // All files in this path must be named
26                       // {PMID}.pdf
27  var $abstractTrFormat =''; // Files containing the translated abstract
28
29  /**
30   * Initialization
31   */
32  public function __construct($_name='plugin_cache',$_prefix='noname',$_ext='nbib'){
33    global $conf;
34    $this->namespace = strtolower($_name);
35    $this->pdfDoiNS  = strtolower($_name."/doi_pdf");
36    $this->pdfPmidNS = strtolower($_name."/pmid_pdf");
37    $this->prefix    = strtolower($_prefix);
38    $this->extension = strtolower($_ext);
39    if (empty($_prefix)){
40      $this->prefix = $_prefix;
41    }else{
42      $this->prefix = $_prefix.'_';
43    }
44    $delimiter = ($conf['useslash'])?'/':':';
45    $this->mediaDir    = $conf['mediadir'].'/'.$this->namespace;
46    $this->mediaFormat = $this->mediaDir.'/'.$this->prefix.'%s.'.$this->extension;
47    $this->linkFormat  = $this->namespace.$delimiter.$this->prefix.'%s.'.$this->extension;
48    $this->abstractTrFormat = $this->mediaDir.'/'.$this->prefix.'%s_fr.txt';
49
50    $this->crossRefId = 'cross';
51//     echo "<br/><br/><br/><pre>".
52//         "NS: ". $this->namespace.PHP_EOL.
53//         "pdfDoiNS: ". $this->pdfDoiNS.PHP_EOL.
54//         "pdfPmidNS: ". $this->pdfPmidNS.PHP_EOL.
55//         "Prefix: ".$this->prefix.PHP_EOL.
56//         "extension: ".$this->extension.PHP_EOL.
57//         "delimiter: ".$delimiter.PHP_EOL.
58//         "mediaDir: ".$this->mediaDir.PHP_EOL.
59//         "mediaFormat: ".$this->mediaFormat.PHP_EOL.
60//         "linkFormat: ".$this->linkFormat.PHP_EOL.
61//         "crossRefId: ".$this->crossRefId.PHP_EOL.
62//         "abstractTrFormat: ".$this->abstractTrFormat.PHP_EOL.
63//         "</pre><br/>";
64    $this->checkDir();
65  }
66
67  function startsWith($string, $startString) {
68    $len = strlen($startString);
69    return (substr($string, 0, $len) === $startString);
70  } // ok, V2020
71
72  /**
73   * Get local pdf file path if exists (checking PMID and DOI dirs)
74   */
75  function GetLocalPdfPath($pmid, $doi) {
76    global $conf;
77    $delimiter = ($conf['useslash'])?'/':':';
78    // Check with PMID
79    $ml = $this->pdfPmidNS.$delimiter.$pmid.".pdf";
80    $filename = mediaFN($ml);
81    //echo "<br/><pre>".$ml." ".$filename."</pre></br>";
82    if (!file_exists($filename)) {
83        // Test DOI
84        $ml = $this->pdfDoiNS.$delimiter.str_replace("/","_",$doi).".pdf";
85        $filename = mediaFN($ml);
86        //echo "<br/><pre>".$ml." ".$filename."</pre></br>";
87        if (!file_exists($filename)) {
88            return ""; // Not found
89        }
90    }
91    return ml($ml,'',true,'',true);
92  }
93
94  /**
95   * Get media file path
96   */
97  function getRawContentPath($base, $id) {
98    $id = strtolower($id);
99    $base = strtolower($base);
100    $file = sprintf($this->mediaFormat, $id);
101    if ($base === "pmcid")
102        $file = str_replace($this->prefix, $base.'_' , $file);
103    return $file;
104  }
105
106  /**
107   * Get all media file paths array
108   * array(ID,filepath)
109   */
110  function getAllMediaPaths() {
111    $dir = $this->mediaDir;
112    $dirhandle = opendir($dir);
113    $files = array();
114
115    $patten = array($this->prefix,'.'.$this->extension);
116    $replace = array('','');
117    while($name = readdir($dirhandle)){
118      if (strpos($name,$this->extension)!==false){
119        $path = $dir.'/'.$name;
120        $id = str_replace($patten,$replace,$name);
121        if (!empty($id))
122            $files[$id] = $path;
123      }
124    }
125    closedir();
126    return $files;
127  }
128
129  /**
130   */
131  function recreateCrossRefFile(){
132    $files = $this->getAllMediaPaths();
133    $cross = Array();
134    foreach ($files as $id => $path) {
135        // Read file $path
136        if (@file_exists($path)){
137          $content = io_readFile($path);
138          $doi = $this->_catchDoiFromRawMedlineContent($content);
139          // What to do if doi not found ?
140          if (!empty($doi))
141              $cross[$id] = $doi;
142        }
143    }
144    // Save cross data
145    $this->_save_array($this->crossRefId, $cross);
146    return true;
147  }
148
149  function PmidFromDoi(&$pdfDois) {
150    $cross = $this->_read_array("", "cross");
151    if (empty($cross))
152        return NULL;
153//     echo "<br><br>".print_r($cross)."<br><br>";
154    $pmids = Array();
155    $removeDoi = Array();
156    foreach ($pdfDois as $doi) {
157        $pmid = array_search($doi, $cross);
158//         echo "<br>PMID:{$pmid}.............DOI:{$doi}<br>";
159        if (!empty($pmid)) {
160            $pmids[] = $pmid;
161            $removeDoi[] = $doi;
162        }
163    }
164    $pdfDois = array_diff($pdfDois, $removeDoi);
165    return $pmids;
166  }
167
168  /**
169   * Get all local PDF file PMIDs
170   */
171  function GetAllAvailableLocalPdfByPMIDs() {
172    //$this->pdfDoiNS  = strtolower($_name."/doi_pdf");
173    //$this->pdfPmidNS = strtolower($_name."/pmid_pdf");
174    // cache all PDF in PMID dir
175    $dir = mediaFN($this->pdfPmidNS);
176    $dirhandle = opendir($dir);
177    $files = array();
178    while($name = readdir($dirhandle)){
179      if (strpos($name,".pdf")!==false){
180        $id = str_replace(".pdf","",$name);
181        $files[] = $id;
182      }
183    }
184    closedir();
185    return $files;
186  }
187
188  /**
189   * Get all local PDF file DOIs
190   */
191  function GetAllAvailableLocalPdfByDOIs() {
192    // cache all PDF in DOI dir
193    $dir = mediaFN($this->pdfDoiNS);
194//     echo "*********** ".$dir."<br/>";
195    $dirhandle = opendir($dir);
196    $files = array();
197    while($name = readdir($dirhandle)){
198      if (strpos($name,".pdf")!==false){
199        $id = str_replace(".pdf","",$name);
200        $id = str_replace("_","/",$id);
201        $files[] = $id;
202      }
203    }
204    closedir();
205    return $files;
206  }
207
208  /**
209   * Get media link
210   */
211  function GetMediaLink($id) {
212    return ml(sprintf($this->linkFormat, $id),'',true,'',true);
213  }
214
215  function GetDoiPdfUrl($doi) {
216    global $conf;
217    $delimiter = ($conf['useslash'])?'/':':';
218    $doi = str_replace("/","_",$doi);
219    $ml = $this->pdfDoiNS.$delimiter.$doi.".pdf";
220//     $filename = mediaFN($ml);
221//     $file = mediaFN($this->pdfDoiNS.$this->delimiter.$doi.'.pdf');
222    return ml($ml,'',true,'',true);
223  }
224  function GetDoiPdfThumbnailUrl($doi){
225    global $conf;
226    $delimiter = ($conf['useslash'])?'/':':';
227    $doi = str_replace("/","_",$doi);
228    $ml = $this->pdfDoiNS.$delimiter.$doi.'.jpg';
229    return ml($ml,'',true,'',true);
230  }
231
232  /**
233   * Get text from cache. If none, return false
234   *
235   * Uses gzip if extension is .gz
236   * and bz2 if extension is .bz2
237   */
238  function getMedlineContent($base, $id) {
239    $filepath = $this->getRawContentPath($base, $id);
240    if (@file_exists($filepath)) {
241      //@touch($filepath);
242      return io_readFile($filepath);
243    }
244    return false;
245  }
246
247  /**
248   * Return the content of the translated abstract of the PMID
249   */
250  function GetTranslatedAbstract($pmid, $lang='fr'){
251    $filepath = sprintf($this->abstractTrFormat,$pmid);
252    if (@file_exists($filepath)){
253      //@touch($filepath);
254      return io_readFile($filepath);
255    }
256    return "";
257  }
258
259  /**
260   * Save string to cache with a permission of $conf['fmode'].
261   *
262   * Uses gzip if extension is .gz
263   * and bz2 if extension is .bz2
264   */
265  function saveRawMedlineContent($base, $raw) {
266    global $conf;
267    $id = $this->_catchIdFromRawMedlineContent($base, $raw);
268    $doi = $this->_catchDoiFromRawMedlineContent($raw);
269    $path = $this->getRawContentPath($base, $id);
270
271    if (io_saveFile($path,$raw)){
272        @chmod($path,$conf['fmode']);
273        $crossrefs = $this->_read_array($base, $this->crossRefId);
274        $crossrefs[$id] = $doi;
275        $this->_save_array($this->crossRefId, $crossrefs);
276        return true;
277    }
278    return false;
279  } // Ok pubmed2020
280
281  /**
282   * Check cache directories
283   */
284  function checkDir() {
285    global $conf;
286    $dummyFN = mediaFN($this->namespace.':_dummy');
287    //echo "dummyFN: ".$dummyFN;
288    $tmp = dirname($dummyFN);
289    if (!@is_dir($tmp)){
290      io_makeFileDir($dummyFN);
291      @chmod($tmp,$conf['dmode']);
292    }
293
294    $dummyFN=mediaFN($this->pdfDoiNS.':_dummy');
295    $tmp = dirname($dummyFN);
296    //echo "dummyFN: ".$dummyFN." ".print_r(@is_dir($tmp))."<br>";
297    if (!@is_dir($tmp)){
298      io_makeFileDir($dummyFN);
299      @chmod($tmp,$conf['dmode']);
300    }
301
302    $dummyFN=mediaFN($this->pdfPmidNS.':_dummy');
303    $tmp = dirname($dummyFN);
304    //echo "dummyFN: ".$dummyFN." ".print_r(@is_dir($tmp))."<br>";
305    if (!@is_dir($tmp)){
306      io_makeFileDir($dummyFN);
307      @chmod($tmp,$conf['dmode']);
308    }
309
310    if (auth_aclcheck($this->namespace.":*","","@ALL")==0){
311       global $AUTH_ACL;
312       $acl = join("",file(DOKU_CONF.'acl.auth.php'));
313       $p_acl = $this->namespace.":*\t@ALL\t1\n";
314       $p_acl .= $this->pdfDoiNS.":*\t@admin\t16\n";
315       $p_acl .= $this->pdfPmidNS.":*\t@admin\t16\n";
316       $p_acl .= $this->pdfDoiNS.":*\t@ALL\t0\n";
317       $p_acl .= $this->pdfPmidNS.":*\t@ALL\t0\n";
318       $new_acl = $acl.$p_acl;
319       io_saveFile(DOKU_CONF.'acl.auth.php', $new_acl);
320       $AUTH_ACL = file(DOKU_CONF.'acl.auth.php'); // Reload ACL
321    }
322  }
323
324  /**
325   * Clear all media files in a plugin's media directory
326   */
327  function clearCache(){
328    global $conf;
329    $handle = @opendir($this->mediaDir);
330    if ($handle === false)
331      return;
332    while (($entry = readdir($handle))){
333      $path = $this->mediaDir.'/'.$entry;
334      if(is_file($path))
335          @unlink($path);
336    }
337    closedir($handle);
338  }
339
340  /**
341   * Remove cache and directory
342   */
343  function removeDir() {
344    $this->clearCache();
345    @rmdir($this->mediaDir);
346  }
347
348  /**
349   * save key/value array as tab-text
350   */
351  function _save_array($id, $array) {  // WRONG: ADD BASE
352    if (empty($id))
353      return false;
354    if (empty($array))
355        return false;
356    global $conf;
357    $path = $this->getRawContentPath("", $id);
358    if (io_saveFile($path,json_encode($array))) {
359      @chmod($path,$conf['fmode']);
360      return true;
361    }
362    return false;
363  }
364
365  /**
366   * Return true if the ID cached file exists
367   */
368  function _idExists($base, $id) {
369    $path = $this->getRawContentPath($base, $id);
370    if(@file_exists($path)!==false){
371      @touch($path);
372      return true;
373    }
374    return false;
375  } // Ok PubMed2020
376
377  /**
378   * read array from tab-text
379   */
380  function _read_array($base, $id) {
381    if (empty($id) || !$this->_idExists($base, $id))
382        return NULL;
383    $path = $this->getRawContentPath($base, $id);
384    $array = json_decode(io_readFile($path), true);
385    return $array;
386  }
387
388  function _catchDoiFromRawMedlineContent($raw) {
389    $medlinePattern = '~AID - (.*) \[doi\]~';
390    $matches = '';
391    $r = preg_match($medlinePattern, $raw, $matches);
392    return $matches[1];
393  } // Ok pubmed2020
394
395  function _catchIdFromRawMedlineContent($base, $raw) {
396    $pattern = "";
397    if ($base === "pmcid")
398        $pattern = '~PMC - PMC(.*)~';
399    else
400        $pattern = '~PMID- (.*)~';
401    $matches = '';
402    $r = preg_match($pattern, $raw, $matches);
403    return trim($matches[1]);
404  } // Ok pubmed2020
405
406}
407?>