xref: /plugin/zwidoku/ShortDescription.php (revision f82bbc904bd835fc66a3f52ffaef251433904ec2)
1<?php
2
3/*
4 * Copyright (c) 2022 Sergei Chekanov
5 *
6 * This script is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * The GNU General Public License can be found at
12 * http://www.gnu.org/copyleft/gpl.html.
13 *
14 * This script is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 */
19
20
21class ShortDescription
22{
23    /** Original MediaWiki record. */
24    private $record_wiki = '';
25    private $record_txt = '';
26
27    /**
28     * Constructor.
29     * @param  string $wiki  Wiki Text
30     * @param  string $txt plain text
31     */
32    public function __construct($record_wiki, $record_txt)
33    {
34        $this->record_wiki = $record_wiki;
35        $this->record_txt = $record_txt;
36
37    }
38
39
40
41
42    /**
43     * Get  first sentance .
44     *
45     * @param  string $txt plain text
46     * @return string first long sentance
47     */
48
49     protected function get_first_sentence($string) {
50
51     $array = preg_split("/\r\n|\n|\r/", $string);
52     // split lines on long chunks with more than 5 words
53     // No more than 30 lines
54     $xsum=""; $n=0;
55     foreach ($array as &$value) {
56	     if (str_word_count($value, 0)>4) {
57		     $xsum=$xsum." ". trim($value);
58                     $n=$n+1;
59                     if ($n>30) break;
60	     } }
61
62    $xsum=trim($xsum);
63    //print("OK=".$xsum);
64
65    // split into sentances
66    $sentences = preg_split('/(?<=[.?!])\s+(?=[a-z])/i', $xsum);
67    //print_r($sentences);
68
69    //take a sentance with at least 3 words
70    $xsum="";
71    foreach ($sentences as &$value) {
72             if (str_word_count($value, 0)>3) {
73                     $xsum=$xsum." ". trim($value);
74                     break;
75             } }
76
77     return trim($xsum);
78}
79
80    // trim and remove full dot.
81    protected function mytrim($string){
82        $string=trim($string);
83	$string = rtrim($string,'.');
84	return trim($string);
85    }
86
87    /**
88     * Get short description of the article.
89     *
90     * @return string short description
91     */
92
93    public function getDescription() {
94
95
96	$wiki=$this->record_wiki;
97	$txt=$this->record_txt;
98
99	$description="";
100
101        // EnHub style
102        if (preg_match('/{{abstract\|(.*?)}}/i', $wiki, $match) == 1) {
103                  $description=$match[1];
104         }
105        if (str_word_count($description, 0)>3) return $this->mytrim($description);
106
107        // Wikipedia style
108        if (preg_match('/{{short description\|(.*?)}}/i', $wiki, $match) == 1) {
109                $description=$match[1];
110        }
111        if (str_word_count($description, 0)>3) return $this->mytrim($description);
112
113
114       // if nothing is found in templates, use plain text.
115       $description=$this->get_first_sentence($txt);
116
117	return $this->mytrim($description);
118
119}
120
121
122} // end class
123
124
125
126// some debugging
127//$wiki="Test {{Abstract|EncycloReader is a  web application designed to search multiple online encyclopedias at once}}
128//'''EncycloReader''' is a web application designed to search multiple online encyclopedias at once and read articles in a unified representation  {{Author|S.V.Chekanov}}";
129//$txt="{shs = ss}\n || \n Test. This  is a  web application designed to search multiple online encyclopedias at once.
130//EncycloReader is a web application designed to search multiple online encyclopedias at once and read articles in a unified representation";
131//$DESC = new ShortDescription($wiki, $txt);
132//print($DESC->getDescription());
133//print_r(get_first_sentence($txt));
134
135
136