1<?php
2/**
3 * DokuWiki Spellcheck AJAX backend
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 */
8
9/**
10 * Licence info: This spellchecker is inspired by code by Garrison Locke available
11 * at http://www.broken-notebook.com/spell_checker/index.php (licensed under the Terms
12 * of an BSD license). The code in this file was nearly completly rewritten for DokuWiki
13 * and is licensed under GPL version 2 (See COPYING for details).
14 *
15 * Original Copyright notice follows:
16 *
17 * Copyright (c) 2005, Garrison Locke
18 * All rights reserved.
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions are met:
22 *
23 *   * Redistributions of source code must retain the above copyright notice,
24 *     this list of conditions and the following disclaimer.
25 *   * Redistributions in binary form must reproduce the above copyright notice,
26 *     this list of conditions and the following disclaimer in the documentation
27 *     and/or other materials provided with the distribution.
28 *   * Neither the name of the http://www.broken-notebook.com nor the names of its
29 *     contributors may be used to endorse or promote products derived from this
30 *     software without specific prior written permission.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
34 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
35 * IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
36 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
38 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
39 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
41 * OF SUCH DAMAGE.
42 */
43
44//fix for Opera XMLHttpRequests
45if(!count($_POST) && $HTTP_RAW_POST_DATA){
46  parse_str($HTTP_RAW_POST_DATA, $_POST);
47}
48
49if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../../');
50require_once (DOKU_INC.'inc/init.php');
51session_write_close();
52require_once (DOKU_INC.'inc/utf8.php');
53require_once (dirname(__FILE__).'/aspell.php');
54require_once (DOKU_INC.'inc/common.php');
55
56header('Content-Type: text/plain; charset=utf-8');
57
58//create spell object
59$spell = new Aspell($conf['lang'],null,'utf-8');
60$spell->setMode(PSPELL_FAST);
61
62//add personal dictionary
63if(@file_exists(DOKU_INC.'conf/words.aspell')){
64  $spell->personal = DOKU_INC.'conf/words.aspell';
65}
66
67//call the requested function
68$call = 'spell_'.$_POST['call'];
69if(function_exists($call)){
70  $call();
71}else{
72  print "The called function does not exist!";
73}
74
75/**
76 * replaces a link with blanks of same length
77 * The first string is the complete link and
78 * and the second optional string might be the
79 * alternative text that would become part of
80 * the result string and will be checked for
81 * spelling errors again.
82 *
83 * callback for preg_replace_callback
84 *
85 * @author Matthias Grimm <matthiasgrimm@users.sourceforge.net>
86 * @author Andreas Gohr <andi@splitbrain.org>
87 */
88function spaceslink($matches){
89  $string  = unslash($matches[1]);
90  $check   = unslash($matches[2]);
91  $result  = '  '; //opening [[
92  $result .= str_pad('',utf8_strlen($string),' ');
93  $result .= $check;
94  $result .= '  '; //closing ]]
95
96  return $result;
97}
98
99/**
100 * Spellchecker. Called by an AJAX request
101 *
102 * Runs the given Text through Aspell and prints XHTML with
103 * markup. The first char represents the error code:
104 *
105 * 0 - No spelling mistakes
106 * 1 - Spelling mistakes found
107 * 2 - An error occurred error message follows
108 *
109 * @author Andreas Gohr <andi@splitbrain.org>
110 */
111function spell_check() {
112  global $spell;
113  $string = $_POST['data'];
114  $misspell = false;
115
116  // for streamlined line endings
117  $string = preg_replace("/(\015\012)|(\015)/","\012",$string);
118  $string = htmlspecialchars($string);
119
120  // make sure multiple spaces and leading are kept
121  $string = preg_replace('/^ /m','&nbsp;',$string);
122  $string = preg_replace('/  /','&nbsp; ',$string);
123
124  // we need the text as array later
125  $data = explode("\n",$string);
126
127  // don't check links and medialinks for spelling errors
128  $string = preg_replace_callback('/\{\{(.*?)(\|(.*?))?(\}\})/','spaceslink',$string);
129  $string = preg_replace_callback('/\[\[(.*?)(\|(.*?))?(\]\])/','spaceslink',$string);
130
131  // run aspell in terse sgml mode, ignore nbsp as correct word
132  if(!$spell->runAspell($string,$out,$err,array('!','+html','@nbsp'))){
133    print '2'; //to indicate an error
134    print "An error occurred while trying to run the spellchecker:\n";
135    print $err;
136    return;
137  }
138
139  #use this to debug raw aspell output
140  #print "1$out"; return;
141
142  // go through the result
143  $lines = split("\n",$out);
144  $rcnt  = count($lines)-1;    // aspell result count
145  $lcnt  = count($data)+1;     // original line counter
146
147
148  for($i=$rcnt; $i>=0; $i--){
149    $line = trim($lines[$i]);
150    if($line[0] == '@') continue; // comment
151    if($line[0] == '*') continue; // no mistake in this word
152    if($line[0] == '+') continue; // root of word was found
153    if($line[0] == '?') continue; // word was guessed
154    if(empty($line)){
155      // empty line -> new source line
156      $lcnt--;
157      continue;
158    }
159    // now get the misspelled words
160    if(preg_match('/^& ([^ ]+) (\d+) (\d+): (.*)/',$line,$match)){
161      // match with suggestions
162      $word = $match[1];
163      $off  = $match[3]-1;
164      $sug  = split(', ',$match[4]);
165    }elseif(preg_match('/^# ([^ ]+) (\d+)/',$line,$match)){
166      // match without suggestions
167      $word = $match[1];
168      $off  = $match[2]-1;
169      $sug  = null;
170    }else{
171      // couldn't parse output
172      print '2';
173      print "The spellchecker output couldn't be parsed.\n";
174      print "Line $i:".$line;
175      return;
176    }
177
178    $misspell = true;
179    //aspell < 0.60 returns singlebyte offsets
180    if($spell->version >= 600){
181      $len  = utf8_strlen($word);
182      $data[$lcnt] = utf8_substr_replace($data[$lcnt],spell_formatword($word,$sug),$off, $len);
183    }else{
184      $len  = strlen($word);
185      $data[$lcnt] = substr_replace($data[$lcnt],spell_formatword($word,$sug),$off, $len);
186    }
187
188  }//end of output parsing
189
190  // the first char returns the spell info
191  if($misspell){
192    $string = '1'.join('<br />',$data);
193  }else{
194    $string = '0'.join('<br />',$data);
195  }
196
197  if(!$_POST['utf8']){
198    // encode multibyte chars as entities for broken Konqueror
199    $string = utf8_tohtml($string);
200  }
201
202  //output
203  print $string;
204}
205
206/**
207 * Formats a word with needed markup for the Suggestion Popup
208 *
209 * @author Andreas Gohr <andi@splitbrain.org>
210 */
211function spell_formatword($word,$suggestions=null){
212  static $i = 1;
213
214  if(is_array($suggestions)){
215    //restrict to maximum of 7 elements
216    $suggestions = array_slice($suggestions,0,7);
217    $suggestions = array_map('htmlspecialchars',$suggestions);
218
219    if(!$_POST['utf8']){
220      //konqueror's broken UTF-8 handling needs this
221      $suggestions = array_map('utf8_tohtml',$suggestions);
222    }
223
224    $suggestions = array_map('addslashes',$suggestions);
225
226    $sug = ",'".join("','",$suggestions)."'"; // build javascript args
227  }else{
228    $sug = '';
229  }
230
231  $link = '<a href="javascript:ajax_spell.suggest('.$i.$sug.')" '.
232          'class="spell_error" id="spell__error'.$i.'">'.htmlspecialchars($word).'</a>';
233  $i++;
234  return $link;
235}
236
237/**
238 * Rewrite markuped XHTML back to plain Text. AJAX callback
239 *
240 * @author Andreas Gohr <andi@splitbrain.org>
241 */
242function spell_resume(){
243  $text = $_POST['data'];
244
245  //some browsers insert newlines instead of spaces
246  $text = preg_replace("/(\r\n|\n|\r)/", ' ', $text);
247  $text = preg_replace("=<br */?>=i", "\n", $text);
248
249  // remove HTML tags
250  $text = strip_tags($text);
251
252  // restore spaces
253  $text = preg_replace('/&nbsp;/',' ',$text);
254  $text = preg_replace('/\xC2\xA0/',' ',$text);
255
256  // restore quoted special chars
257  $text = unhtmlspecialchars($text);
258
259  // check if UTF-8 is accepted
260  if(!$_POST['utf8']){
261    // protect '&' (gets removed in JS later)
262    $text = str_replace('&','&amp;',$text);
263    // encode multibyte chars as entities for broken Konqueror
264    $text = utf8_tohtml($text);
265  }
266
267  // output
268  print $text;
269}
270
271/**
272 * Just send data back as received for UTF-8 testing
273 */
274function spell_utf8test(){
275  // we need to return the raw value - substr protects against XSS
276  print substr($_POST['data'],0,3);
277}
278
279/**
280 * Reverse htmlspecialchars
281 *
282 * @author <donwilson at gmail dot com>
283 * @author Andreas Gohr <andi@splitbrain.org>
284 */
285function unhtmlspecialchars($string, $quotstyle=ENT_COMPAT){
286  $string = str_replace ( '&amp;', '&', $string );
287  $string = str_replace ( '&lt;', '<', $string );
288  $string = str_replace ( '&gt;', '>', $string );
289
290  if($quotstyle != ENT_NOQUOTES){
291    $string = str_replace ( '&quot;', '\"', $string );
292  }
293  if($quotstyle == ENT_QUOTES){
294    $string = str_replace ( '&#39;', '\'', $string );
295    $string = str_replace ( '&#039;', '\'', $string );
296  }
297
298  return $string;
299}
300
301//Setup VIM: ex: et ts=2 enc=utf-8 :
302?>
303