xref: /plugin/zwidoku/ShortDescription.php (revision f82bbc904bd835fc66a3f52ffaef251433904ec2)
1*f82bbc90SS.Chekanov<?php
2*f82bbc90SS.Chekanov
3*f82bbc90SS.Chekanov/*
4*f82bbc90SS.Chekanov * Copyright (c) 2022 Sergei Chekanov
5*f82bbc90SS.Chekanov *
6*f82bbc90SS.Chekanov * This script is free software; you can redistribute it and/or modify
7*f82bbc90SS.Chekanov * it under the terms of the GNU General Public License as published by
8*f82bbc90SS.Chekanov * the Free Software Foundation; either version 2 of the License, or
9*f82bbc90SS.Chekanov * (at your option) any later version.
10*f82bbc90SS.Chekanov *
11*f82bbc90SS.Chekanov * The GNU General Public License can be found at
12*f82bbc90SS.Chekanov * http://www.gnu.org/copyleft/gpl.html.
13*f82bbc90SS.Chekanov *
14*f82bbc90SS.Chekanov * This script is distributed in the hope that it will be useful,
15*f82bbc90SS.Chekanov * but WITHOUT ANY WARRANTY; without even the implied warranty of
16*f82bbc90SS.Chekanov * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17*f82bbc90SS.Chekanov * GNU General Public License for more details.
18*f82bbc90SS.Chekanov */
19*f82bbc90SS.Chekanov
20*f82bbc90SS.Chekanov
21*f82bbc90SS.Chekanovclass ShortDescription
22*f82bbc90SS.Chekanov{
23*f82bbc90SS.Chekanov    /** Original MediaWiki record. */
24*f82bbc90SS.Chekanov    private $record_wiki = '';
25*f82bbc90SS.Chekanov    private $record_txt = '';
26*f82bbc90SS.Chekanov
27*f82bbc90SS.Chekanov    /**
28*f82bbc90SS.Chekanov     * Constructor.
29*f82bbc90SS.Chekanov     * @param  string $wiki  Wiki Text
30*f82bbc90SS.Chekanov     * @param  string $txt plain text
31*f82bbc90SS.Chekanov     */
32*f82bbc90SS.Chekanov    public function __construct($record_wiki, $record_txt)
33*f82bbc90SS.Chekanov    {
34*f82bbc90SS.Chekanov        $this->record_wiki = $record_wiki;
35*f82bbc90SS.Chekanov        $this->record_txt = $record_txt;
36*f82bbc90SS.Chekanov
37*f82bbc90SS.Chekanov    }
38*f82bbc90SS.Chekanov
39*f82bbc90SS.Chekanov
40*f82bbc90SS.Chekanov
41*f82bbc90SS.Chekanov
42*f82bbc90SS.Chekanov    /**
43*f82bbc90SS.Chekanov     * Get  first sentance .
44*f82bbc90SS.Chekanov     *
45*f82bbc90SS.Chekanov     * @param  string $txt plain text
46*f82bbc90SS.Chekanov     * @return string first long sentance
47*f82bbc90SS.Chekanov     */
48*f82bbc90SS.Chekanov
49*f82bbc90SS.Chekanov     protected function get_first_sentence($string) {
50*f82bbc90SS.Chekanov
51*f82bbc90SS.Chekanov     $array = preg_split("/\r\n|\n|\r/", $string);
52*f82bbc90SS.Chekanov     // split lines on long chunks with more than 5 words
53*f82bbc90SS.Chekanov     // No more than 30 lines
54*f82bbc90SS.Chekanov     $xsum=""; $n=0;
55*f82bbc90SS.Chekanov     foreach ($array as &$value) {
56*f82bbc90SS.Chekanov	     if (str_word_count($value, 0)>4) {
57*f82bbc90SS.Chekanov		     $xsum=$xsum." ". trim($value);
58*f82bbc90SS.Chekanov                     $n=$n+1;
59*f82bbc90SS.Chekanov                     if ($n>30) break;
60*f82bbc90SS.Chekanov	     } }
61*f82bbc90SS.Chekanov
62*f82bbc90SS.Chekanov    $xsum=trim($xsum);
63*f82bbc90SS.Chekanov    //print("OK=".$xsum);
64*f82bbc90SS.Chekanov
65*f82bbc90SS.Chekanov    // split into sentances
66*f82bbc90SS.Chekanov    $sentences = preg_split('/(?<=[.?!])\s+(?=[a-z])/i', $xsum);
67*f82bbc90SS.Chekanov    //print_r($sentences);
68*f82bbc90SS.Chekanov
69*f82bbc90SS.Chekanov    //take a sentance with at least 3 words
70*f82bbc90SS.Chekanov    $xsum="";
71*f82bbc90SS.Chekanov    foreach ($sentences as &$value) {
72*f82bbc90SS.Chekanov             if (str_word_count($value, 0)>3) {
73*f82bbc90SS.Chekanov                     $xsum=$xsum." ". trim($value);
74*f82bbc90SS.Chekanov                     break;
75*f82bbc90SS.Chekanov             } }
76*f82bbc90SS.Chekanov
77*f82bbc90SS.Chekanov     return trim($xsum);
78*f82bbc90SS.Chekanov}
79*f82bbc90SS.Chekanov
80*f82bbc90SS.Chekanov    // trim and remove full dot.
81*f82bbc90SS.Chekanov    protected function mytrim($string){
82*f82bbc90SS.Chekanov        $string=trim($string);
83*f82bbc90SS.Chekanov	$string = rtrim($string,'.');
84*f82bbc90SS.Chekanov	return trim($string);
85*f82bbc90SS.Chekanov    }
86*f82bbc90SS.Chekanov
87*f82bbc90SS.Chekanov    /**
88*f82bbc90SS.Chekanov     * Get short description of the article.
89*f82bbc90SS.Chekanov     *
90*f82bbc90SS.Chekanov     * @return string short description
91*f82bbc90SS.Chekanov     */
92*f82bbc90SS.Chekanov
93*f82bbc90SS.Chekanov    public function getDescription() {
94*f82bbc90SS.Chekanov
95*f82bbc90SS.Chekanov
96*f82bbc90SS.Chekanov	$wiki=$this->record_wiki;
97*f82bbc90SS.Chekanov	$txt=$this->record_txt;
98*f82bbc90SS.Chekanov
99*f82bbc90SS.Chekanov	$description="";
100*f82bbc90SS.Chekanov
101*f82bbc90SS.Chekanov        // EnHub style
102*f82bbc90SS.Chekanov        if (preg_match('/{{abstract\|(.*?)}}/i', $wiki, $match) == 1) {
103*f82bbc90SS.Chekanov                  $description=$match[1];
104*f82bbc90SS.Chekanov         }
105*f82bbc90SS.Chekanov        if (str_word_count($description, 0)>3) return $this->mytrim($description);
106*f82bbc90SS.Chekanov
107*f82bbc90SS.Chekanov        // Wikipedia style
108*f82bbc90SS.Chekanov        if (preg_match('/{{short description\|(.*?)}}/i', $wiki, $match) == 1) {
109*f82bbc90SS.Chekanov                $description=$match[1];
110*f82bbc90SS.Chekanov        }
111*f82bbc90SS.Chekanov        if (str_word_count($description, 0)>3) return $this->mytrim($description);
112*f82bbc90SS.Chekanov
113*f82bbc90SS.Chekanov
114*f82bbc90SS.Chekanov       // if nothing is found in templates, use plain text.
115*f82bbc90SS.Chekanov       $description=$this->get_first_sentence($txt);
116*f82bbc90SS.Chekanov
117*f82bbc90SS.Chekanov	return $this->mytrim($description);
118*f82bbc90SS.Chekanov
119*f82bbc90SS.Chekanov}
120*f82bbc90SS.Chekanov
121*f82bbc90SS.Chekanov
122*f82bbc90SS.Chekanov} // end class
123*f82bbc90SS.Chekanov
124*f82bbc90SS.Chekanov
125*f82bbc90SS.Chekanov
126*f82bbc90SS.Chekanov// some debugging
127*f82bbc90SS.Chekanov//$wiki="Test {{Abstract|EncycloReader is a  web application designed to search multiple online encyclopedias at once}}
128*f82bbc90SS.Chekanov//'''EncycloReader''' is a web application designed to search multiple online encyclopedias at once and read articles in a unified representation  {{Author|S.V.Chekanov}}";
129*f82bbc90SS.Chekanov//$txt="{shs = ss}\n || \n Test. This  is a  web application designed to search multiple online encyclopedias at once.
130*f82bbc90SS.Chekanov//EncycloReader is a web application designed to search multiple online encyclopedias at once and read articles in a unified representation";
131*f82bbc90SS.Chekanov//$DESC = new ShortDescription($wiki, $txt);
132*f82bbc90SS.Chekanov//print($DESC->getDescription());
133*f82bbc90SS.Chekanov//print_r(get_first_sentence($txt));
134*f82bbc90SS.Chekanov
135*f82bbc90SS.Chekanov
136