1<?php
2/*
3description : Dokuwiki PubMed2020 plugin
4author      : Eric Maeker
5email       : eric.maeker[at]gmail.com
6lastupdate  : 2021-02-09
7license     : Public-Domain
8
9Data are stored is RIS format: https://en.wikipedia.org/wiki/RIS_(file_format)
10See also: https://citation.crosscite.org/docs.html
11
12convertIds -> https://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/
13*/
14
15if(!defined('DOKU_INC')) die();
16
17class PubMed2020 {
18  var $HttpClient;
19  // New PubMed interface. See https://api.ncbi.nlm.nih.gov/lit/ctxp
20  var $ctxpBaseURL = "https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/";
21  var $ctxpURLs = array(
22        "pmid" => "pubmed/?format=medline&id=%s",
23        "pmcid" => "pmc/?format=medline&id=%s",
24      );
25
26  var $pubmedURL       = 'https://pubmed.ncbi.nlm.nih.gov/%s';
27  var $pmcURL          = 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC%s';
28  var $pubmedSearchURL = 'https://pubmed.ncbi.nlm.nih.gov/?term=%s';
29  var $similarURL      = 'https://pubmed.ncbi.nlm.nih.gov/?linkname=pubmed_pubmed&from_uid=%s';
30  var $citedByURL      = 'https://pubmed.ncbi.nlm.nih.gov/?linkname=pubmed_pubmed_citedin&from_uid=%s';
31  var $convertId       = 'https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?ids=%s&format=json&versions=no&showaiid=no';
32  var $referencesURL   = 'https://pubmed.ncbi.nlm.nih.gov/%s/#references';
33  var $scienceDirectURL= "https://www.sciencedirect.com/search?qs=%s"; // %s = doi
34  var $scienceDirectPIIURL= "https://www.sciencedirect.com/science/article/pii/%s"; // %s = pii
35
36  // Set this to true to get debugging page output
37  //     when retrieving and processing pubmed URL
38  var $debugUsingEchoing = false;
39
40  public function __construct() {
41    $this->HttpClient   = new DokuHTTPClient();
42    $this->ctxpURLs["pmid"] = $this->ctxpBaseURL.$this->ctxpURLs["pmid"];
43    $this->ctxpURLs["pmcid"] = $this->ctxpBaseURL.$this->ctxpURLs["pmcid"];
44  } // Ok, V2020
45
46
47  function startsWith($string, $startString) {
48    $len = strlen($startString);
49    return (substr($string, 0, $len) === $startString);
50  } // ok, V2020
51
52
53
54  function convertId($id){
55    if (empty($id))
56      return NULL;
57    $url = sprintf($this->convertId, $id);
58    if ($this->debugUsingEchoing)
59      echo PHP_EOL.">> CONVERT ID: getting URL: ".$url.PHP_EOL;
60    $json = $this->HttpClient->get($url);
61    if ($this->debugUsingEchoing)
62      echo PHP_EOL.">> CONVERT ID: returned: ".$json.PHP_EOL;
63    $r = json_decode($json);
64    if ($r->records[0]->status === "error") {
65      if ($this->debugUsingEchoing)
66        echo PHP_EOL.">> CONVERT ID: ERROR: ".$r->records[0]->errmsg.PHP_EOL;
67      return NULL;
68    }
69    echo print_r($r->records[0]);
70    return $r->records[0];
71  }
72
73  function getPmidFromDoi($doi){
74    if (empty($doi))
75      return NULL;
76    $search = "\"$doi\"&sort=date&size=100&format=pubmed";
77    $url = sprintf($this->pubmedSearchURL, $search);
78    if ($this->debugUsingEchoing)
79      echo PHP_EOL.">> getPmidFromDoi: getting URL: ".$url.PHP_EOL;
80    $r = $this->HttpClient->get($url);
81    if ($this->debugUsingEchoing)
82      echo PHP_EOL.">> getPmidFromDoi: returned: ".$r.PHP_EOL;
83    // <pre class="search-results-chunk">33543243</pre>
84
85    $pattern = "/PMID- (\d+)/";
86    if (preg_match($pattern, $r, $m)){
87      if ($this->debugUsingEchoing)
88        echo PHP_EOL.">> getPmidFromDoi: OK: ".$m[1].PHP_EOL;
89      return $m[1];
90    }
91    return NULL;
92  }
93
94  /**
95   * Returns all PMIDs corresponding to the search query
96   * Do not query format, sort order or size
97   * These data are automatically added to the search query
98   * Returns the array of PMIDs
99   */
100  function getAllSearchResult($search) {
101    $url = sprintf($this->pubmedSearchURL, urlencode($search));
102    $url .= "&format=pmid&sort=date&size=200";
103
104    //<pre class="search-results-chunk">.*<\/pre>
105    $content = $this->HttpClient->get($url);
106
107    $pattern = "/<pre class=\"search-results-chunk\">((?:.*?\r?\n?)*)<\/pre>/";
108    if (preg_match($pattern, $content, $m, PREG_UNMATCHED_AS_NULL)) {
109      $pmids = explode("\n", $m[1]);
110    }
111    return $pmids;
112  }
113
114  /*
115   * Get RIS, MEDLINE and CITATION from CTXP website
116  */
117  function getDataFromCtxp($base, $id, $doi="") {
118    $url = "";
119    if (empty($id))
120      return NULL;
121    if (empty($this->ctxpURLs[$base]))
122      return NULL;
123
124    $url = sprintf($this->ctxpURLs[$base], urlencode($id));
125
126    if ($this->debugUsingEchoing)
127      echo PHP_EOL.">> PUBMED: getting URL: ".$url.PHP_EOL;
128
129    // Retrieve URL
130    $medlineContent = $this->HttpClient->get($url);
131    // Check length of the returned HTTP content, make a second try if necessary
132    if (strlen($medlineContent) < 10) {
133      $medlineContent = $this->HttpClient->get($url);
134      if ($this->debugUsingEchoing)
135        echo PHP_EOL.">> PUBMED: Second try: ".strlen($medlineContent)." ".$url."<BR>".PHP_EOL;
136    }
137
138    // Check error in the content
139    if (strlen($medlineContent) < 10) {
140      if ($this->debugUsingEchoing)
141        echo PHP_EOL.">> PUBMED: Error while retrieving URL: ".$url."<10".PHP_EOL;
142      return NULL;
143    }
144    if ($this->debugUsingEchoing)
145      echo PHP_EOL.">> PUBMED: retrieved from the URL: ".PHP_EOL.$medlineContent.PHP_EOL;
146
147    return $medlineContent;
148  } // ok, V2020
149
150  /*
151   * Create a pubmed query, return the URL of the query
152   * {{pmid>search:#title|terms|size|filter|...}}
153   * return array(title, searchUrl)
154   */
155  function getPubmedSearchURL($searchTerms) {
156    // Split using | to get URL options: size, format, filter, sort
157    $options = explode("|", $searchTerms);
158    if (count($options) < 1)
159      return "ERROR"; // TODO
160    // Find title
161    $title = "";
162    if (substr($options[0], 0, 1) === "#") {
163      $title = substr($options[0], 1);
164      array_shift($options);
165    } else {
166      $title = $searchTerms; // Title === search terms
167    }
168    $url = sprintf($this->pubmedSearchURL, urlencode($options[0]));
169    if (count($options) > 1)
170      $url .= "&".implode("&", array_slice($options, 1));
171    return array($title, $url);
172  } // ok, V2020
173
174  /**
175   * Get full abstract of the article stored in an Array where
176   * Ids:
177   *   "pmid"          -> PMID
178   *   "pmcid"         -> if available PMCID
179   *   "doi"           -> DOI references when available
180   *   "pii"           -> PII references when available
181   *   "bookaccession"
182   *
183   * Authors:
184   *   "authors"       -> Array of authors
185   *   "first_author"  -> First author + "et al." if other authors are listed
186   *   "authorsLimit3" -> Three first authors + "et al." if other authors are listed
187   *   "authorsVancouver" -> according to the configuration of the plugin
188   *   "corporate_author" -> If author is corporate
189   *                        (in this case also included in authors and first_author)
190   *   "collectif"     -> If author is a collective
191   *
192   * Titles:
193   *   "title"         -> Full title
194   *   "title_low"     -> Lowered full title
195   *   "translated_title" -> Translated title (TODO: improve this)
196   *   "translated_title_low" -> Lowered translated title (TODO: improve this)
197   *   "book_title"
198   *
199   * Journal:
200   *   "journal_iso"   -> Journal ISO Abbreviation
201   *   "journal_title" -> Journal full title
202   *   "journal_id"
203   *
204   * Other references:
205   *   "lang"          -> language of the article
206   *   "iso"
207   *   "vol"           -> Journal Volume
208   *   "issue"         -> Journal Issue
209   *   "year"          -> Journal Year of publication
210   *   "month"         -> Journal Month of publication
211   *   "pages"         -> Journal pagination
212   *   "abstract"      -> Complete abstract
213   *   "abstract_wiki" -> Wikified abstract
214   *   "abstract_html" -> HTML'd abstract
215   *   "type"          -> Type of paper
216   *   "country"
217   *   "copyright"
218   *   "collection_title"
219   *   "publisher"
220   *
221   * Keywords, Mesh and Hastags:
222   *   "keywords"     -> Non-mesh keywords of the paper
223   *   "mesh"         -> Mesh terms associated with the paper
224   *   "hashtags"     -> Added hastag with command 'addhash'
225   *
226   * Hard coded citations:
227   *   "iso"           -> ISO citation of the article
228   *   "npg_full"      -> Citation for: Neurologie Psychiatrie Geriatrie journal
229   *   "npg_iso"       -> Same with authors and title
230   *
231   * Links:
232   *   "url"           -> URL to PubMed site
233   *   "pmcurl"        -> if available URL to PMC site
234   *   "googletranslate_abstract"   -> Link to google translate prepopulated with abstract
235   *   "sciencedirecturl" -> Link to ScienceDirect web site (using DOI)
236   *
237   * \note $pluginObject must be accessible for translations ($this->getLang())
238   */
239  function readMedlineContent($string, $pluginObject) {
240    // No data return empty array
241    if (empty($string))
242      return array("pmid" => "0");
243    $content = $string;
244    $authors = array();
245    $authorsVancouver = array();
246    $val = "";
247    $key = "";
248    $array = array();
249    $id = 0;
250    foreach(preg_split("/((\r?\n)|(\r\n?))/", $content) as $line) {
251      //echo print_r($line).PHP_EOL;
252      if ($this->startsWith($line,"  ")) {
253        // Append multiline value
254        $array[$key] .= ' '.trim($line);
255        continue;
256      } else if (strlen($line) > 4) {
257        // Get new key
258        $key = trim(substr($line, 0, 4));
259        if ($id<9)
260          $key .= '0';
261        $key .= $id;
262        $val = trim(substr($line, 6));
263        $id++;
264        $array[$key] = $val;
265      }
266    }
267
268    // Now process datas
269    // TODO: Catch book references. Eg: 28876803
270    $ret = array();
271    $mesh = array();
272    $keywords = array();
273    foreach($array as $key => $value) {
274      $k = preg_replace('/[0-9]+/', '', $key);
275
276      switch ($k) {  // See https://www.nlm.nih.gov/bsd/mms/medlineelements.html
277//AD  - Médecin gériatre, psychogériatre, Court séjour gériatrique, Unité COVID, Centre
278//      Hospitalier de Calais, 1601 Boulevard des Justes, 62100, Calais, France      Hospitalier de Calais, 1601 Boulevard des Justes, 62100, Calais, France
279
280        case "PMID":
281          $ret["pmid"] = $value;  //PMID - 15924077
282          $ret["url"] = sprintf($this->pubmedURL, urlencode($value));
283          break;
284        case "PMC":
285          $ret["pmcid"] = str_replace("PMC", "", $value);
286          $ret["pmcurl"] = sprintf($this->pmcURL, urlencode($ret["pmcid"]));
287          break;
288        case "DCOM": break; //DCOM- 20050929
289        case "LR": break;  //LR  - 20191109
290        case "IS": break;  //IS  - 0035-3787 (Print)  //IS  - 0035-3787 (Linking)
291        case "VI": $ret["vol"] = $value; break;  //VI  - 161
292        case "IP": $ret["issue"] = $value; break; //IP  - 4
293        case "DP":
294          $ret["year"] = substr($value,0,4);
295          break; //DP  - 2005 Apr
296        case "TI":
297          // TODO: Keep case of title correctly -> How?
298          $ret["title"] = $value;
299          break; // TI title english
300        case "PG":
301          $ret["pages"] = trim($value);
302          // Error with PMID 5042912 (remove last ending '-' char)
303          $ret["pages"] = rtrim($ret["pages"], "-");
304          break;
305        case "AB":
306          $ret["abstract"] = $value;
307          $ret["abstract_wiki"] = $this->_normalizeAbstract($value);
308          $ret["abstract_html"] = $this->_normalizeAbstract($value, "html");
309          break;
310/*
311        case "AU":
312          // Keep case of names correctly
313          // NAME SN -> Name SN (first letter uppercase only)
314          $n = explode(" ", trim($value));
315          if (count($n) >= 2) {
316              // $n[0] = ucfirst(strtolower($n[0]));
317              // Correctly manages Name1-Name2
318              $n[0] = ucwords(strtolower($n[0]), "-");
319              $value = $n[0]." ".$n[1];
320          }
321          //array_push($authors, $value);
322          break;
323*/
324        case "FAU":
325          $sn = "";
326          $surname = "";
327          if (strpos($value, ',') !== false) {
328            $n = explode(",", trim($value));
329            $sn = $n[1];
330            $name = $this->_normalizeNameCase($n[0]);
331          } else {
332            $n = explode(" ", trim($value));
333            $name = $this->_normalizeNameCase($n[0]);
334            $sn = $n[1];
335          }
336          // Keep only first letter of each surname
337          foreach (explode(' ', $sn) as $w) {
338            $surname .=  mb_substr($w,0,1,'UTF-8');
339          }
340          $value = $name." ".$surname;
341          array_push($authors, $value);
342          break;
343        case "LA": $ret["lang"] = $value; break; //LA  - fre
344        case "PT": $ret["type"] = $value; break; //PT  - English Abstract  //PT  - Journal Article
345        case "TT": $ret["translated_title"] = $value; break;
346        case "PL": $ret["country"] = $value; break;  //PL  - France
347        case "TA": $ret["journal_iso"] = $value; break; // TA  - Rev Neurol (Paris)
348        case "JT": $ret["journal_title"] = $value; break; // JT  - Revue neurologique
349        case "JID": $ret["journal_id"] = $value; break; // JID - 2984779R
350//         case "SB": $ret[""] = $value; break; // SB  - IM
351        case "MH": array_push($mesh, $value); break;
352        case "OT": array_push($keywords, $value); break;
353//         case "EDAT": $ret[""] = $value; break; // SB  - IM
354//         case "MHDA": $ret[""] = $value; break; // SB  - IM
355//         case "CRDT": $ret[""] = $value; break; // SB  - IM
356//         case "PHST": $ret[""] = $value; break; // SB  - IM
357        case "AID":
358          if (strpos($value, "[doi]") > 0)
359            $ret["doi"] = str_replace(" [doi]", "", $value);
360          if (strpos($value, "[pii]") > 0)
361            $ret["pii"] = str_replace(" [pii]", "", $value);
362          if (strpos($value, "[bookaccession]") > 0)
363            $ret["bookaccession"] = str_replace(" [bookaccession]", "", $value);
364          break;
365        //case "PST": $ret[""] = $value; break; // SB  - IM
366        case "SO":
367          // Error with 5042912 (pages) => replace "-." by "."
368          $ret["so"] = str_replace("-.", ".", $value);
369          break;
370        case "CI" : $ret["copyright"] = $value; break;
371        case "CN" : $ret["corporate_author"] = $value; break;
372        case "CTI" : $ret["collection_title"] = $value; break;
373        case "BTI" :
374          $ret["book_title"] = $value;
375          if (empty($ret["title"]))
376            $ret["title"] = $value;
377          break;
378        case "PB" : // Possible publisher? count as author?
379          $ret["publisher"] = $value;
380          break;
381        case "LID": // possible page? see 32947851
382          if (strpos($value, "[doi]") > 0) {
383            $ret["doi"] = str_replace(" [doi]", "", $value);
384          } else if (strpos($value, "[pii]") > 0) {
385            $ret["pii"] = str_replace(" [pii]", "", $value);
386          } else {
387            $ret["pages"] = $value;
388          }
389          break;
390        case "HASH": $ret["hashtags"] = $value; break;
391      }  // Switch
392    } // Foreach
393
394    // Create lowered case titles
395    if (!empty($ret["translated_title"])) {
396        $ret["translated_title_low"] = ucfirst(strtolower($ret["translated_title"])); //mb_convert_case($ret["translated_title"], MB_CASE_TITLE);
397    }
398    if (!empty($ret["title"])) {
399        $ret["title_low"] = ucfirst(strtolower($ret["title"])); //mb_convert_case($ret["title"], MB_CASE_TITLE);
400    }
401
402    // Manage unavailable title with a translated title
403    if (strpos($ret["title"], "[Not Available]") !== false) {
404        $ret["title"] = $ret["translated_title"];
405    }
406
407    // Get authors
408    if ($ret["corporate_author"]) {
409      array_push($authors, $ret["corporate_author"]);
410    }
411
412    $ret["authors"] = $authors;
413    $ret["authorsVancouver"] = $authors;
414    if (count($authors) == 0) {
415        array_push($authors, $pluginObject->getLang('no_author_listed'));
416    }
417
418    //"collectif" => $collectif,
419    // Create first author for short output
420    if (count($authors) > 1) {
421      $ret['first_author'] = $authors[0].", ".$pluginObject->getConf('et_al_vancouver');
422    } else {
423      $ret['first_author'] = $authors[0];
424    }
425
426    // Create Vancouver Authors.
427    // Manage limitation in number of authors
428    $limit = $pluginObject->getConf('limit_authors_vancouver');
429    $authorsToUse = $ret["authorsVancouver"];
430    $addAndAl = false;
431    if ($limit >= 1) {
432      if (count($authorsToUse) > $limit) {
433        $addAndAl = true;
434        $authorsToUse = array_slice($authorsToUse, 0, $limit);
435      }
436    }
437
438    $vancouver = "";
439    if (count($authorsToUse) > 0) {
440      $vancouver = implode(', ',$authorsToUse);
441      if ($addAndAl)
442        $vancouver .= ", ".$pluginObject->getConf('et_al_vancouver');
443      $vancouver .= ". ";
444    }
445
446    // Create 3 authors only
447    $limit = 3;
448    $authorsToUse = $ret["authorsVancouver"];
449    $addAndAl = false;
450    if ($limit >= 1) {
451      if (count($authorsToUse) > $limit) {
452        $addAndAl = true;
453        $authorsToUse = array_slice($authorsToUse, 0, $limit);
454      }
455    }
456    if (count($authorsToUse) > 0) {
457      $authors3 = implode(', ',$authorsToUse);
458      if ($addAndAl)
459        $authors3 .= ", ".$pluginObject->getConf('et_al_vancouver');
460      $authors3 .= ". ";
461    } else {
462      // Less than three authors
463      $authors3 = implode(', ',$authorsToUse).". ";
464    }
465    $ret["authorsLimit3"] = $authors3;
466    $ret["authorsVancouver"] = $vancouver;
467
468    // no authors -> nothing to add  Eg: pmid 12142303
469
470    // Book -> See https://pubmed.ncbi.nlm.nih.gov/30475568/?format=pubmed
471
472    // Get Mesh terms & keywords
473    $ret["mesh"] = $mesh;
474    $ret["keywords"] = $keywords;
475
476    // Remove points from the journal_iso string
477    if ($pluginObject->getConf('remove_dot_from_journal_iso') === true)
478       $ret["journal_iso"] = str_replace(".", "", $ret["journal_iso"]);
479
480    // Construct iso citation of this article
481    // Use SO from the raw medline content
482    $ret["iso"] = $ret["so"];
483    $ret = $this->createNpgCitation($ret);
484    $ret = $this->createGpnvCitation($ret);
485
486
487    $ret["similarurl"] = sprintf($this->similarURL, $ret["pmid"]);
488    $ret["citedbyurl"] = sprintf($this->citedByURL, $ret["pmid"]);
489    $ret["referencesurl"] = sprintf($this->referencesURL, $ret["pmid"]);
490
491    // Construct Vancouver citation of this article
492    // See https://www.nlm.nih.gov/bsd/uniform_requirements.html
493    if ($ret["book_title"]) {
494      // Author. <i>BookTitle</i>. country:PB;year.
495      $ret["vancouver"] = $vancouver;
496      $ret["vancouver"] .= $ret["title"]." ";
497      $ret["vancouver"] .= $ret["book_title"].". ";
498      $ret["iso"] = $ret["country"]." : ";
499      $ret["iso"] .= $ret["year"].".";
500      $ret["vancouver"] .= $ret["iso"];
501      $ret["sciencedirecturl"] = sprintf($this->scienceDirectURL, $ret["doi"]);
502      return $ret;
503    }
504    $vancouver .= $ret["title"];
505    $vancouver .= " ".$ret["so"];
506    // $vancouver .= " ".$ret["journal_iso"]."";
507    // $vancouver .= " ".$pubDate;
508    // $vancouver .= ";".$ret["vol"];
509    // if (!empty($ret["issue"]))
510    //   $vancouver .= "(".$ret["issue"].")";
511    // $vancouver .= ":".$ret["pages"];
512    $ret["vancouver"] = $vancouver;
513
514    $gg  =  "https://translate.google.com/";
515    $gg .= "?sl=auto&tl=fr&text=";
516    $gg .= rawurlencode($ret["abstract"]);
517    $gg .= "&op=translate";
518    $ret["googletranslate_abstract"] = $gg;
519    //echo print_r($ret);
520    $ret["sciencedirecturl"] = sprintf($this->scienceDirectURL, $ret["doi"]);
521    return $ret;
522  } // Ok pubmed2020
523
524
525
526  /** NPG: See https://www.em-consulte.com/revue/NPG/presentation/npg */
527  function createNpgCitation($ret) {
528    // Construct NPG ISO citation of this article
529    //%npg_iso% %year% ; %vol% (%issue%) : %pages%
530    // BOOKS
531    if (!empty($ret["book_title"])) {
532      // Trivalle C. Gérontologie préventive. Éléments de prévention du vieillissement pathologique. Paris : Masson, 2002.
533      // https://pubmed.ncbi.nlm.nih.gov/30475568/?format=pubmed
534      // Authors
535      $ret["npg_full"] = $ret["authorsLimit3"];
536      // Title
537      if (!empty($ret["translated_title"])) {
538        $t = $ret["translated_title"];
539      } else if (!empty($ret["title"])) {
540        $t = $ret["title"];
541      } else if (!empty($ret["book_title"])) {
542        $t = $ret["book_title"];
543      }
544
545      // Normalize title case
546      $t = $this->_normalizeTitleCase($t);
547
548      $ret["npg_full"] .= $t.". ";
549
550      // Town
551      if (!empty($ret["country"])) {
552        $ret["npg_full"] .= $ret["country"];
553      }
554      // Editor
555      if (!empty($ret["publisher"])) {
556        $ret["npg_full"] .= " : ".$ret["publisher"];
557      }
558      // Year
559      if (!empty($ret["year"])) {
560        $ret["npg_full"] .= ", ".$ret["year"].".";
561      }
562//       if (!empty($ret["bookaccession"])) {
563//         $ret["npg_full"] .= " https://www.ncbi.nlm.nih.gov/books/".$ret["bookaccession"];
564//       }
565      return $ret;
566    }
567    // JOURNALS
568    // Journal
569    if (!empty($ret["journal_iso"])) {
570       $npg = str_replace(".", "", $ret["journal_iso"])." ";
571    }
572    // Year
573    if (!empty($ret["year"])) {
574      $npg .= $ret["year"];
575      // Vol/Issue
576      if (!empty($ret["vol"]) || !empty($ret["issue"]))
577          $npg .= " ; ";
578      // Vol
579      if (!empty($ret["vol"]))
580          $npg .= $ret["vol"];
581      // Issue
582      if (!empty($ret["issue"]))
583          $npg .= "(".$ret["issue"].")";
584      // Pages or DOI (no pages -> add DOI)
585      if (!empty($ret["pages"])) {
586          $npg .= " : ".$this->_normalizePages($ret["pages"]).".";
587      } else if (!empty($ret["doi"])) {
588        $npg .= ", doi : ".$ret["doi"];
589//       } else if (!empty($ret["bookaccession"])) {
590//         $npg .= ", https://www.ncbi.nlm.nih.gov/books/".$ret["bookaccession"];
591      }
592    } else if (!empty($ret["doi"])) {
593      $npg .= ", doi : ".$ret["doi"];
594//     } else if (!empty($ret["bookaccession"])) {
595//       $npg .= ", https://www.ncbi.nlm.nih.gov/books/".$ret["bookaccession"];
596    }
597    $npg = trim(str_replace("  ", " ", $npg));
598    $ret["npg_iso"] = $npg;
599    $ret["npg_full"] = $ret["authorsLimit3"];
600    $t = "";
601    if (!empty($ret["translated_title"])) {
602      $t = $ret["translated_title"];
603    } else if (!empty($ret["title"])) {
604      $t = $ret["title"];
605    } else if (!empty($ret["book_title"])) {
606      $t = $ret["book_title"];
607    }
608
609    // Normalize title case
610    $t = $this->_normalizeTitleCase($t);
611
612    if (substr_compare(".", $t, -strlen($t)) === 0) {
613      mb_substr($t, 0, -1);
614    }
615
616    $ret["npg_full"] .= $t." ";
617    $ret["npg_full"] .= $ret["npg_iso"];
618
619    return $ret;
620  }
621
622  /**
623   * GPNV: See https://www.jle.com/fr/revues/gpn/espace_auteur
624   * vancouver with style mention & spaces
625   */
626  function createGpnvCitation($ret) {
627    // Construct NPG ISO citation of this article
628    //%npg_iso% %year% ; %vol% (%issue%) : %pages%
629    // BOOKS
630    if (!empty($ret["book_title"])) {
631      // Trivalle C. Gérontologie préventive. Éléments de prévention du vieillissement pathologique. Paris : Masson, 2002.
632      // https://pubmed.ncbi.nlm.nih.gov/30475568/?format=pubmed
633      // Authors
634      $ret["gpnv_full_authors"] = $ret["authorsVancouver"];
635      // Title
636      if (!empty($ret["translated_title"])) {
637        $t = $ret["translated_title"];
638      } else if (!empty($ret["title"])) {
639        $t = $ret["title"];
640      } else if (!empty($ret["book_title"])) {
641        $t = $ret["book_title"];
642      }
643      $ret["gpnv_full"] .= $t.". ";
644      // Town
645      if (!empty($ret["country"])) {
646        $ret["gpnv_full"] .= $ret["country"];
647      }
648      // Editor
649      if (!empty($ret["publisher"])) {
650        $ret["gpnv_full"] .= " : ".$ret["publisher"];
651      }
652      // Year
653      if (!empty($ret["year"])) {
654        $ret["gpnv_full"] .= ", ".$ret["year"].".";
655      }
656      // TODO: this is wrong
657      $ret["gpnv_full_title"] = $ret["gpnv_full"];
658      return $ret;
659    }
660    // JOURNALS
661    // Journal
662    if (!empty($ret["journal_iso"])) {
663      $ret["gpnv_full_journal"] = str_replace(".", "", $ret["journal_iso"])." ";
664    }
665    // Year
666    if (!empty($ret["year"])) {
667      $npg .= $ret["year"];
668      // Vol
669      if (!empty($ret["vol"])) {
670          $npg .= " ; ".$ret["vol"];
671        // Issue
672        if (!empty($ret["issue"])) {
673          $npg .= " (".$ret["issue"].")";
674        }
675        // Pages
676        if (!empty($ret["pages"])) {
677          $npg .= " : ".$this->_normalizePages($ret["pages"]).".";
678        }
679      } else if (!empty($ret["doi"])) {
680        $npg .= ", doi : ".$ret["doi"];
681//       } else if (!empty($ret["bookaccession"])) {
682//         $npg .= ", https://www.ncbi.nlm.nih.gov/books/".$ret["bookaccession"];
683      }
684//     } else if (!empty($ret["doi"])) {
685//       $npg .= ", doi : ".$ret["doi"];
686//     } else if (!empty($ret["bookaccession"])) {
687//       $npg .= ", https://www.ncbi.nlm.nih.gov/books/".$ret["bookaccession"];
688    }
689    $npg = trim(str_replace("  ", " ", $npg));
690    $ret["gpnv_full_iso"] = $npg;
691    $ret["gpnv_full_authors"] = $ret["authorsVancouver"];
692    $t = "";
693    if (!empty($ret["translated_title"])) {
694      $t = $ret["translated_title"];
695    } else if (!empty($ret["title"])) {
696      $t = $ret["title"];
697    } else if (!empty($ret["book_title"])) {
698      $t = $ret["book_title"];
699    }
700    if (substr_compare(".", $t, -strlen($t)) === 0) {
701      mb_substr($t, 0, -1);
702    }
703    $ret["gpnv_full_title"] = $t." ";
704    //$ret["gpnv_full"] .= $ret["gpnv_iso"];
705    return $ret;
706  }
707
708
709  /**
710   * Lowering title (with exceptions)
711   */
712  function _normalizeTitleCase($t) {
713    // Is title is full uppercase?
714    $low_t = ucfirst(strtolower(ucwords($t)));
715    if (mb_strtoupper($t, 'utf-8') !== $t) {
716      $words = preg_split('/[\s\-\[\]\(\)\/\'\’\"\“\”\.]+/', $t);
717      foreach ($words as $word) {
718        //echo $word.PHP_EOL;
719        if (strlen($word) > 1 && ctype_upper(str_replace("-", "", $word))) {
720          //echo $word."  ".strtolower($word)."\n";
721          //$low_t = str_replace(strtolower($word), $word, $low_t);
722          $low_t = preg_replace('/([\s\-\(\[\.\/\'])'.strtolower($word).'([\s\-\)\]\.\:\?\/\'])/i', "$1$word$2", $low_t);
723        }
724      }
725    }
726
727    // Case exceptions
728    $exceptions = Array(
729      // Countries
730      "Afghanistan", "Aland Islands", "Albania", "Algeria", "American Samoa", "Andorra", "Angola", "Anguilla", "Antarctica", "Antigua", "Argentina", "Armenia", "Aruba", "Australia", "Austria", "Azerbaijan", "Bahamas", "Bahrain", "Bangladesh", "Barbados", "Barbuda", "Belarus", "Belgium", "Belize", "Benin", "Bermuda", "Bhutan", "Bolivia", "Bosnia", "Botswana", "Bouvet Island", "Brazil", "British Indian Ocean Trty.", "Brunei Darussalam", "Bulgaria", "Burkina Faso", "Burundi", "Caicos Islands", "Cambodia", "Cameroon", "Canada", "Cape Verde", "Cayman Islands", "Central African Republic", "Chad", "Chile", "China", "Christmas Island", "Cocos (Keeling) Islands", "Colombia", "Comoros", "Congo", "Congo, Democratic Republic of the", "Cook Islands", "Costa Rica", "Cote d'Ivoire", "Croatia", "Cuba", "Cyprus", "Czech Republic", "Denmark", "Djibouti", "Dominica", "Dominican Republic", "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea", "Estonia", "Ethiopia", "Falkland Islands (Malvinas)", "Faroe Islands", "Fiji", "Finland", "France", "French Guiana", "French Polynesia", "French Southern Territories", "Futuna Islands", "Gabon", "Gambia", "Georgia", "Germany", "Ghana", "Gibraltar", "Greece", "Greenland", "Grenada", "Guadeloupe", "Guam", "Guatemala", "Guernsey", "Guinea", "Guinea-Bissau", "Guyana", "Haiti", "Heard", "Herzegovina", "Holy See", "Honduras", "Hong Kong", "Hungary", "Iceland", "India", "Indonesia", "Iran (Islamic Republic of)", "Iraq", "Ireland", "Isle of Man", "Israel", "Italy", "Jamaica", "Jan Mayen Islands", "Japan", "Jersey", "Jordan", "Kazakhstan", "Kenya", "Kiribati", "Korea", "Korea (Democratic)", "Kuwait", "Kyrgyzstan", "Lao", "Latvia", "Lebanon", "Lesotho", "Liberia", "Libyan Arab Jamahiriya", "Liechtenstein", "Lithuania", "Luxembourg", "Macao", "Macedonia", "Madagascar", "Malawi", "Malaysia", "Maldives", "Mali", "Malta", "Marshall Islands", "Martinique", "Mauritania", "Mauritius", "Mayotte", "McDonald Islands", "Mexico", "Micronesia", "Miquelon", "Moldova", "Monaco", "Mongolia", "Montenegro", "Montserrat", "Morocco", "Mozambique", "Myanmar", "Namibia", "Nauru", "Nepal", "Netherlands", "Netherlands Antilles", "Nevis", "New Caledonia", "New Zealand", "Nicaragua", "Niger", "Nigeria", "Niue", "Norfolk Island", "Northern Mariana Islands", "Norway", "Oman", "Pakistan", "Palau", "Palestinian Territory, Occupied", "Panama", "Papua New Guinea", "Paraguay", "Peru", "Philippines", "Pitcairn", "Poland", "Portugal", "Principe", "Puerto Rico", "Qatar", "Reunion", "Romania", "Russian Federation", "Rwanda", "Saint Barthelemy", "Saint Helena", "Saint Kitts", "Saint Lucia", "Saint Martin (French part)", "Saint Pierre", "Saint Vincent", "Samoa", "San Marino", "Sao Tome", "Saudi Arabia", "Senegal", "Serbia", "Seychelles", "Sierra Leone", "Singapore", "Slovakia", "Slovenia", "Solomon Islands", "Somalia", "South Africa", "South Georgia", "South Sandwich Islands", "Spain", "Sri Lanka", "Sudan", "Suriname", "Svalbard", "Swaziland", "Sweden", "Switzerland", "Syrian Arab Republic", "Taiwan", "Tajikistan", "Tanzania", "Thailand", "The Grenadines", "Timor-Leste", "Tobago", "Togo", "Tokelau", "Tonga", "Trinidad", "Tunisia", "Turkey", "Turkmenistan", "Turks Islands", "Tuvalu", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "United States", "Uruguay", "US Minor Outlying Islands", "Uzbekistan", "Vanuatu", "Vatican City State", "Venezuela", "Vietnam", "Virgin Islands (British)", "Virgin Islands (US)", "Wallis", "Western Sahara", "Yemen", "Zambia", "Zimbabwe",
731      // Cities
732      "Jerusalem",
733      // Continent
734      "Europe", "Africa",
735      // Associations / Societies
736      "American Geriatrics Society",
737      "American Psychiatric Association",
738      "American College of Physicians",
739      "American Academy of Family Physicians",
740      "American College of Cardiology",
741      "American Heart Association Task Force",
742      "ACC/AHA/AAPA/ABC/ACPM/AGS/APhA/ASH/ASPC/NMA/PCNA",
743      "ESC/ESH",
744      "European Society of Hypertension",
745      "European Union Geriatric Medicine Society Working Group",
746      "European Society of Anaesthesiology",
747      "American Association for Emergency Psychiatry",
748      // Diseases
749      "Parkinson",
750      "Alzheimer",
751      "Sydenham",
752      "Asperger",
753      // Others
754      "AI",
755      "Syst-Eur",
756      "UKU-SERS-Pat",
757      "Largactil",
758      "ADRs",
759      "U.S.",
760      "Hg",
761      "SARS",
762      "CoV",
763      "COVID",
764    );
765    foreach ($exceptions as $word) {
766      //echo $word.PHP_EOL;
767      $p = strtolower($word);
768      $p = str_replace("/", "\/", $p);
769      $p = '/([\s\-\(\.\/\'\`])'.$p.'([\s\-\)\.\:\?\/\'\`])/';
770      // String exists in full lowercase
771      //echo $p." ".print_r(preg_match($p, $low_t, $matches)).PHP_EOL;
772      // Find exception in full lowercase
773      if (preg_match($p, $low_t, $matches, PREG_OFFSET_CAPTURE)) {
774        // String in full lowercase
775        //echo "*** low".PHP_EOL;
776        $low_t = preg_replace($p, "$1$word$2", $low_t);
777      } else {
778        // Find exception in full lowercase but first letter
779        $p = ucfirst(strtolower($word));
780        $p = str_replace("/", "\/", $p);
781        $p = '/([\s\-\(\.\/\'\`])'.$p.'([\s\-\)\.\:\?\/\'\`])/';
782        //echo "---> ".$low_t."  //  ".$p." ".print_r(preg_match($p, $low_t, $matches)).PHP_EOL;
783        if  (preg_match($p, $low_t, $matches)) {
784          //echo "*** Ucf".PHP_EOL;
785          $low_t = preg_replace($p, "$1$word$2", $low_t);
786        } else {
787          // Find exception in full lowercase but first letter at the start of the title
788          $p = ucfirst(strtolower($word));
789          $p = str_replace("/", "\/", $p);
790          $p = '/^'.$p.'([\s\-\)\.\:\?\/\'\`])/m';
791          //echo "---> ".$low_t."  //  ".$p." ".print_r(preg_match($p, $low_t, $matches)).PHP_EOL;
792          if  (preg_match($p, $low_t, $matches)) {
793            //echo "*** Ucf".PHP_EOL;
794            $low_t = preg_replace($p, "$word$1", $low_t);
795          }
796        }
797      }
798    } // End exception checking
799
800    // Check all sentences -> Uppercase first letter of each sentence
801    if (strpos($low_t, ". ")) {
802      // Split each sentences
803      //echo "GOT A DOT".PHP_EOL;
804      $sentences = preg_split('/\. /', $low_t);
805      $low_t = "";
806      foreach ($sentences as $sentence) {
807        //echo $sentence.PHP_EOL;
808        $low_t .= rtrim(ucfirst($sentence), '.').". ";
809      }
810      $sentences = ucfirst(strtolower(ucwords($t)));
811    }
812
813    // Check all sentences -> Uppercase first letter of each sentence
814    if (strpos($low_t, "? ")) {
815      // Split each sentences
816      //echo "GOT A DOT".PHP_EOL;
817      $sentences = preg_split('/\? /', $low_t);
818      $low_t = "";
819      foreach ($sentences as $sentence) {
820        //echo $sentence.PHP_EOL;
821        $low_t .= ucfirst($sentence);
822        if (substr($sentence, -1) !== '.')
823          $low_t .= "? ";
824      }
825      $sentences = ucfirst(strtolower(ucwords($t)));
826    }
827
828    //echo $t.PHP_EOL.PHP_EOL;
829    $t = $low_t;
830    //echo $t.PHP_EOL.PHP_EOL;
831    return $t;
832  }
833  /*
834   * Normalize case of the author's name
835   */
836  function _normalizeNameCase($name) {
837    // Only change fully uppered names (take care to spaces)
838    if (ctype_upper(str_replace(" ", "", $name))) {
839       return ucwords(mb_strtolower($name), " \t\r\n\f\v-'");
840    }
841    return $name;
842  }
843
844  /*
845   * Normamize pages number
846   */
847  function _normalizePages($pages) {
848    // Test -
849    if (strpos($pages, "-") === false) {
850      return $pages;
851    }
852    // Split pages
853    $p = mb_split("-", $pages);
854    if (count($p) !== 2) {
855      return $pages;
856    }
857    // Count number of num and compare
858    if (strlen($p[0]) !== strlen($p[1])) {
859      return $pages;
860    }
861
862    // Compare num by num
863    $length = mb_strlen($p[0]);
864    for($i = 0 ; $i < $length ; $i++) {
865       if ($p[0][$i] !== $p[1][$i]) {
866         return $p[0]."-".mb_substr($p[1], $i);
867       }
868    }
869    return $pages;
870  }
871
872  /**
873   * Correct raw abstract into $format ("html" or "wiki")
874   */
875  function _normalizeAbstract($abstract, $format = "wiki"){  // Pb: 33397541
876    $chapters = Array(
877      "Aim:",
878      "Aims:",
879      "Aims and objectives:", //
880      "Authors' conclusions:",
881      "Authors conclusions:",
882      "Background \& aims:",
883      "Background and objectives:",
884      "Background:",
885      "Background\/objectives:",
886      "Background\/aims:", //
887      "Clinical rehabilitation impact:",
888      "Clinical relevance:",
889      "Clinical significance:",
890      "Clinical trial registration:",
891      "Clinical trials registration:", //
892      "Comparison:",
893      "Conclusion:",
894      "Conclusions\/implications:",
895      "Conclusions and implications:",
896      "Conclusions and relevance:",
897      "Conclusions\/relevance:",
898      "Conclusions:",
899      "Context:",
900      "Data analysis:", //
901      "Data collection and analysis:",
902      "Data extraction:",
903      "Data extraction and synthesis:", //
904      "Data sources:", //
905      "Data sources and review methods:",
906      "Data sources and study selection:", //
907      "Data synthesis:",
908      "Design, study, and participants:",
909      "Design:",
910      "Development:", //
911      "Diagnosis of interest:",
912      "Discussion:",
913      "Discussion and conclusions:", //
914      "Discussion and conclusion:", //
915      "Discussion\/Conclusion:",  //
916      "Eligibility criteria:",
917      "Experimental design:",
918      "Exposures:",
919      "Findings:",
920      "Funding:",
921      "Implications:",
922      "Implications for nursing management:",
923      "Implications for clinical management:",
924      "Importance:", //
925      "Inclusion criteria population:",
926      "Index test:",
927      "Information sources:",
928      "Interpretation:",
929      "Intervention:",
930      "Introduction:",
931      "Keywords:",
932      "Main outcome measures:",
933      "Main outcomes and measures:",
934      "Main outcomes:",
935      "Main results:",
936      "Material and methods:",
937      "Materials \& methods:",
938      "Measurements:",
939      "Mesures:",
940      "Methodological quality:",
941      "Methodology:", //
942      "Method:",
943      "Methods:",
944      "Methods and results:", //
945      "Objective:",
946      "Objectives:",
947      "Outcomes:",
948      "Participants:",
949      "Participants\/setting:",
950      "Patients and methods:",
951      "Population:",
952      "Primary and secondary outcome measures:",
953      "Purpose and objective:",
954      "Purpose:",
955      "Purpose of the study:", //
956      "Rationale:",
957      "Recent developments:", //
958      "Recommendations for screening and assessment:", //
959      "Recommendations for management:", //
960      "Reference test:",
961      "Relevance to clinical practice:", //
962      "Research question:",
963      "Results:",
964      "Result:", //
965      "Scope:", //
966      "Search methods:",
967      "Search strategy:",
968      "Selection criteria:",
969      "Setting:",
970      "Setting and subjects:", //
971      "Setting and participants:",
972      "Settings:",
973      "Significance of results:",
974      "Statistical analysis performed:",
975      "Study design and methods:",
976      "Study design:",
977      "Study selection:",
978      "Subjects:",
979      "Subjects\/methods:",
980      "Subjects \& methods:",
981      "Subjects and methods:",
982      "Summary:", //
983      "Trial registration:",
984      "Types of studies:",
985      "Where next\?:", //
986    );
987    // Prepare output tags
988    $lf = PHP_EOL.PHP_EOL;
989    $boldS = "**";
990    $boldE = "**";
991    switch ($format) {
992      case "html": case "xhtml":
993        $boldS = "<b>"; $boldE = "</b>"; $lf = "<br><br>";
994      default: break;
995    }
996    // Sort array
997    usort($chapters, function ($a, $b) {
998       return (substr_count($a, " ") < substr_count($b, " "));
999    });
1000    // Correct some typos in abstract
1001    $abstract = str_replace("ABSTRACTObjectives:", "Objectives: ", $abstract);
1002    // Replace in abstract
1003    foreach($chapters as $c) {
1004      $pattern = "/\s*".$c."\s+/i";
1005      $c = str_replace("\\", "", $c);
1006      $abstract = preg_replace($pattern, "$lf$boldS$c$boldE ", $abstract);
1007    }
1008    // Remove first $lf of abstract
1009    if (substr($abstract, 0, strlen($lf)) === $lf) {
1010      $abstract = substr($abstract, strlen($lf));
1011    }
1012//     $info = array();
1013//     $abstract = p_render('xhtml', p_get_instructions($abstract), $info);
1014//     echo '<pre>'.$abstract.'</pre>';
1015    return $abstract;
1016  }
1017
1018
1019
1020} // class PubMed2020
1021
1022?>