1<?php
2/*
3description : Dokuwiki PubMed2020 plugin
4author      : Eric Maeker
5email       : eric.maeker[at]gmail.com
6lastupdate  : 2020-06-05
7license     : Public-Domain
8*/
9
10if(!defined('DOKU_INC')) die();
11
12class PubMed2020 {
13  var $HttpClient;
14  // New PubMed interface. See https://api.ncbi.nlm.nih.gov/lit/ctxp
15  var $ctxpBaseURL = "https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/";
16  var $ctxpURLs = array(
17        "pmid" => "pubmed/?format=medline&id=%s",
18        "pmcid" => "pmc/?format=medline&id=%s",
19      );
20
21  var $pubmedURL       = 'https://pubmed.ncbi.nlm.nih.gov/%s';
22  var $pmcURL          = 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC%s';
23  var $pubmedSearchURL = 'https://pubmed.ncbi.nlm.nih.gov/?term=%s';
24
25  // Set this to true to get debugging page output
26  //     when retrieving and processing pubmed URL
27  var $debugUsingEchoing = false;
28
29  public function __construct() {
30    $this->HttpClient   = new DokuHTTPClient();
31    $this->ctxpURLs["pmid"] = $this->ctxpBaseURL.$this->ctxpURLs["pmid"];
32    $this->ctxpURLs["pmcid"] = $this->ctxpBaseURL.$this->ctxpURLs["pmcid"];
33  } // Ok, V2020
34
35
36  function startsWith($string, $startString) {
37    $len = strlen($startString);
38    return (substr($string, 0, $len) === $startString);
39  } // ok, V2020
40
41  /*
42   * Get RIS, MEDLINE and CITATION from CTXP website
43  */
44  function getDataFromCtxp($base, $id, $doi="") {
45    $url = "";
46    if (empty($id))
47      return NULL;
48    if (empty($this->ctxpURLs[$base]))
49      return NULL;
50
51    $url = sprintf($this->ctxpURLs[$base], urlencode($id));
52
53    if ($this->debugUsingEchoing)
54      echo PHP_EOL.">> PUBMED: getting URL: ".$url.PHP_EOL;
55
56    // Retrieve URL
57    $medlineContent = $this->HttpClient->get($url);
58    // Check length of the returned HTTP content, make a second try if necessary
59    if (strlen($medlineContent) < 10) {
60      $medlineContent = $this->HttpClient->get($url);
61      if ($this->debugUsingEchoing)
62        echo PHP_EOL.">> PUBMED: Second try: ".strlen($medlineContent)." ".$url."<BR>".PHP_EOL;
63    }
64
65    // Check error in the content
66    if (strlen($medlineContent) < 10) {
67      if ($this->debugUsingEchoing)
68        echo PHP_EOL.">> PUBMED: Error while retrieving URL: ".$url."<10".PHP_EOL;
69      return NULL;
70    }
71    if ($this->debugUsingEchoing)
72      echo PHP_EOL.">> PUBMED: retrieved from the URL: ".PHP_EOL.$medlineContent.PHP_EOL;
73
74    return $medlineContent;
75  } // ok, V2020
76
77  /*
78   * Create a pubmed query, return the URL of the query
79   */
80  function getPubmedSearchURL($searchTerms) {
81    // Split using | to get URL options: size, format, filter, sort
82    $options = explode("|", $searchTerms);
83    if (count($options) < 1)
84      return "ERROR"; // TODO
85    $url = sprintf($this->pubmedSearchURL, urlencode($options[0]));
86    if (count($options) > 1)
87      $url .= "&".implode("&", array_slice($options, 1));
88    return $url;
89  } // ok, V2020
90
91  /**
92   * Get full abstract of the article stored in an Array where
93   *      "pmid"          -> PMID
94   *      "url"           -> URL to PubMed site
95   *      "authors"       -> Array of authors
96   *      "first_author"  -> First author + "et al." if other authors are listed
97   *      "title"         -> Full title
98   *      "lang"          -> language of the article
99   *      "journal_iso"   -> Journal ISO Abbreviation
100   *      "journal_title" -> Journal full title
101   *      "iso"           -> ISO citation of the article
102   *      "vol"           -> Journal Volume
103   *      "issue"         -> Journal Issue
104   *      "year"          -> Journal Year of publication
105   *      "month"         -> Journal Month of publication
106   *      "pages"         -> Journal pagination
107   *      "abstract"      -> Complete abstract
108   *      "doi"           -> doi references when available
109   * $pluginObject must be accessible for translations ($this->getLang())
110   */
111  function readMedlineContent($string, $pluginObject) {
112    // No data return empty array
113    if (empty($string))
114      return array("pmid" => "0");
115    $content = $string;
116    $authors = array();
117    $authorsVancouver = array();
118    $val = "";
119    $key = "";
120    $array = array();
121    $id = 0;
122    foreach(preg_split("/((\r?\n)|(\r\n?))/", $content) as $line) {
123      //echo print_r($line).PHP_EOL;
124      if ($this->startsWith($line,"  ")) {
125        // Append multiline value
126        $array[$key] .= ' '.trim($line);
127        continue;
128      } else if (strlen($line) > 4) {
129        // Get new key
130        $key = trim(substr($line, 0, 4));
131        if ($id<9)
132          $key .= '0';
133        $key .= $id;
134        $val = trim(substr($line, 6));
135        //echo PHP_EOL."k: ".$key." ; val: ".$val.PHP_EOL;
136        $id++;
137        $array[$key] = $val;
138      }
139    }
140    //echo print_r($array);
141
142    // Now process datas
143    // TODO: Catch book references. Eg: 28876803
144    $ret = array();
145    $mesh = array();
146    $keywords = array();
147    foreach($array as $key => $value) {
148      $k = preg_replace('/[0-9]+/', '', $key);
149
150      switch ($k) {  // See https://www.nlm.nih.gov/bsd/mms/medlineelements.html
151//AD  - Médecin gériatre, psychogériatre, Court séjour gériatrique, Unité COVID, Centre
152//      Hospitalier de Calais, 1601 Boulevard des Justes, 62100, Calais, France      Hospitalier de Calais, 1601 Boulevard des Justes, 62100, Calais, France
153
154        case "PMID":
155          $ret["pmid"] = $value;  //PMID - 15924077
156          $ret["url"] = sprintf($this->pubmedURL, urlencode($value));
157          break;
158        case "PMC":
159          $ret["pmcid"] = str_replace("PMC", "", $value);
160          $ret["pmcurl"] = sprintf($this->pmcURL, urlencode($ret["pmcid"]));
161          break;
162        case "DCOM": break; //DCOM- 20050929
163        case "LR": break;  //LR  - 20191109
164        case "IS": break;  //IS  - 0035-3787 (Print)  //IS  - 0035-3787 (Linking)
165        case "VI": $ret["vol"] = $value; break;  //VI  - 161
166        case "IP": $ret["issue"] = $value; break; //IP  - 4
167        case "DP":
168          $ret["year"] = substr($value,0,4);
169          break; //DP  - 2005 Apr
170        case "TI":
171          // TODO: Keep case of title correctly -> How?
172          $ret["title"] = $value;
173          break; // TI title english
174        case "PG": $ret["pages"] = $value; break;
175        case "AB": $ret["abstract"] = $value; break;
176        case "AU":
177          // Keep case of names correctly
178          // NAME SN -> Name SN (first letter uppercase only)
179          $n = explode(" ", trim($value));
180          if (count($n) >= 2) {
181              // $n[0] = ucfirst(strtolower($n[0]));
182              // Correctly manages Name1-Name2
183              $n[0] = ucwords(strtolower($n[0]), "-");
184              $value = $n[0]." ".$n[1];
185          }
186          array_push($authors, $value);
187          break;
188        case "LA": $ret["lang"] = $value; break; //LA  - fre
189        case "PT": $ret["type"] = $value; break; //PT  - English Abstract  //PT  - Journal Article
190        case "TT": $ret["translated_title"] = $value; break;
191        case "PL": $ret["country"] = $value; break;  //PL  - France
192        case "TA": $ret["journal_iso"] = $value; break; // TA  - Rev Neurol (Paris)
193        case "JT": $ret["journal_title"] = $value; break; // JT  - Revue neurologique
194        case "JID": $ret["journal_id"] = $value; break; // JID - 2984779R
195//         case "SB": $ret[""] = $value; break; // SB  - IM
196        case "MH": array_push($mesh, $value); break;
197        case "OT": array_push($keywords, $value); break;
198//         case "EDAT": $ret[""] = $value; break; // SB  - IM
199//         case "MHDA": $ret[""] = $value; break; // SB  - IM
200//         case "CRDT": $ret[""] = $value; break; // SB  - IM
201//         case "PHST": $ret[""] = $value; break; // SB  - IM
202        case "AID":
203          if (strpos($value, "[doi]") > 0)
204            $ret["doi"] = str_replace(" [doi]", "", $value);
205          if (strpos($value, "[pii]") > 0)
206            $ret["pii"] = str_replace(" [pii]", "", $value);
207          break;
208        //case "PST": $ret[""] = $value; break; // SB  - IM
209        case "SO": $ret["so"] = $value; break; //SO  - Rev Neurol (Paris). 2005 Apr;161(4):419-26. doi: 10.1016/s0035-3787(05)85071-4.
210        case "CI" : $ret["copyright"] = $value; break;
211        case "CN" : $ret["corporate_author"] = $value; break;
212        case "CTI" : $ret["collection_title"] = $value; break;
213        case "BTI" :
214          $ret["book_title"] = $value;
215          $ret["title"] = $value;
216          break;
217
218      }  // Switch
219    } // Foreach
220
221    // Get authors
222    if ($ret["corporate_author"]) {
223      array_push($authors, $ret["corporate_author"]);
224    }
225    $ret["authors"] = $authors;
226    $ret["authorsVancouver"] = $authors;
227    if (count($authors) == 0) {
228        array_push($authors, $pluginObject->getLang('no_author_listed'));
229    }
230
231    //"collectif" => $collectif,
232    // Create first author for short output
233    if (count($authors) > 1) {
234      $ret['first_author'] = $authors[0].' <span class="etal">et al</span>';
235    } else {
236      $ret['first_author'] = $authors[0];
237    }
238
239    // Create Vancouver Authors.
240    // Manage limitation in number of authors
241    $limit = $pluginObject->getConf('limit_authors_vancouver');
242    $authorsToUse = $ret["authorsVancouver"];
243    $addAndAl = false;
244    if ($limit >= 1) {
245      if (count($authorsToUse) > $limit) {
246        $addAndAl = true;
247        $authorsToUse = array_slice($authorsToUse, 0, $limit);
248      }
249    }
250
251    $vancouver = "";
252    if (count($authorsToUse) > 0) {
253      $vancouver = implode(', ',$authorsToUse);
254      if ($addAndAl)
255        $vancouver .= " ".$pluginObject->getConf('et_al_vancouver');
256      $vancouver .= ". ";
257    }
258    // no authors -> nothing to add  Eg: pmid 12142303
259
260    // Get Mesh terms & keywords
261    $ret["mesh"] = $mesh;
262    $ret["keywords"] = $keywords;
263
264    if ($ret["book_title"]) {
265      // Author. <i>BookTitle</i>. country:PB;year.
266      $ret["vancouver"] = $vancouver;
267      $ret["vancouver"] .= "<i>".$ret["book_title"].".</i> ";
268      $ret["iso"] = $ret["country"]." : ";
269      $ret["iso"] .= $ret["year"].".";
270      $ret["vancouver"] .= $ret["iso"];
271      //echo print_r($ret);
272      return $ret;
273    }
274    // Remove points from the journal_iso string
275    if ($pluginObject->getConf('remove_dot_from_journal_iso') === true)
276       $ret["journal_iso"] = str_replace(".", "", $ret["journal_iso"]);
277
278    // Construct iso citation of this article
279    // Use SO from the raw medline content
280    $ret["iso"] = $ret["so"];
281/*
282    // Construct iso citation of this article
283    $pubDate = $ret["year"]." ".$ret["month"]." ".$ret["day"];
284    $pubDate = trim(str_replace("  ", " ", $pubDate));
285
286    $ret["iso"] = $ret["journal_iso"].' ';
287    $ret["iso"] .= $pubDate.";";
288    if (!empty($ret["vol"]))
289      $ret["iso"] .= $ret["vol"];
290    if (!empty($ret["issue"]))
291      $ret["iso"] .= '('.$ret["issue"].')';
292    $ret["iso"] .= ':'.$ret["pages"];
293*/
294    // Construct Vancouver citation of this article
295    // See https://www.nlm.nih.gov/bsd/uniform_requirements.html
296    $vancouver .= $ret["title"];
297    $vancouver .= " ".$ret["so"];
298//     $vancouver .= " ".$ret["journal_iso"]."";
299//     $vancouver .= " ".$pubDate;
300//     $vancouver .= ";".$ret["vol"];
301//     if (!empty($ret["issue"]))
302//       $vancouver .= "(".$ret["issue"].")";
303//     $vancouver .= ":".$ret["pages"];
304    $ret["vancouver"] = $vancouver;
305
306    //echo print_r($ret);
307    return $ret;
308  } // Ok pubmed2020
309
310
311
312} // class PubMed2020
313
314?>