1<?php 2/** 3 * DokuWiki Spellcheck AJAX backend 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9/** 10 * Licence info: This spellchecker is inspired by code by Garrison Locke available 11 * at http://www.broken-notebook.com/spell_checker/index.php (licensed under the Terms 12 * of an BSD license). The code in this file was nearly completly rewritten for DokuWiki 13 * and is licensed under GPL version 2 (See COPYING for details). 14 * 15 * Original Copyright notice follows: 16 * 17 * Copyright (c) 2005, Garrison Locke 18 * All rights reserved. 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions are met: 22 * 23 * * Redistributions of source code must retain the above copyright notice, 24 * this list of conditions and the following disclaimer. 25 * * Redistributions in binary form must reproduce the above copyright notice, 26 * this list of conditions and the following disclaimer in the documentation 27 * and/or other materials provided with the distribution. 28 * * Neither the name of the http://www.broken-notebook.com nor the names of its 29 * contributors may be used to endorse or promote products derived from this 30 * software without specific prior written permission. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 34 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 35 * IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 36 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 38 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 39 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY 41 * OF SUCH DAMAGE. 42 */ 43 44//fix for Opera XMLHttpRequests 45if(!count($_POST) && $HTTP_RAW_POST_DATA){ 46 parse_str($HTTP_RAW_POST_DATA, $_POST); 47} 48 49if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../../'); 50require_once (DOKU_INC.'inc/init.php'); 51session_write_close(); 52require_once (DOKU_INC.'inc/utf8.php'); 53require_once (dirname(__FILE__).'/aspell.php'); 54require_once (DOKU_INC.'inc/common.php'); 55 56header('Content-Type: text/plain; charset=utf-8'); 57 58//create spell object 59$spell = new Aspell($conf['lang'],null,'utf-8'); 60$spell->setMode(PSPELL_FAST); 61 62//add personal dictionary 63if(@file_exists(DOKU_INC.'conf/words.aspell')){ 64 $spell->personal = DOKU_INC.'conf/words.aspell'; 65} 66 67//call the requested function 68$call = 'spell_'.$_POST['call']; 69if(function_exists($call)){ 70 $call(); 71}else{ 72 print "The called function does not exist!"; 73} 74 75/** 76 * replaces a link with blanks of same length 77 * The first string is the complete link and 78 * and the second optional string might be the 79 * alternative text that would become part of 80 * the result string and will be checked for 81 * spelling errors again. 82 * 83 * callback for preg_replace_callback 84 * 85 * @author Matthias Grimm <matthiasgrimm@users.sourceforge.net> 86 * @author Andreas Gohr <andi@splitbrain.org> 87 */ 88function spaceslink($matches){ 89 $string = unslash($matches[1]); 90 $check = unslash($matches[2]); 91 $result = ' '; //opening [[ 92 $result .= str_pad('',utf8_strlen($string),' '); 93 $result .= $check; 94 $result .= ' '; //closing ]] 95 96 return $result; 97} 98 99/** 100 * Spellchecker. Called by an AJAX request 101 * 102 * Runs the given Text through Aspell and prints XHTML with 103 * markup. The first char represents the error code: 104 * 105 * 0 - No spelling mistakes 106 * 1 - Spelling mistakes found 107 * 2 - An error occurred error message follows 108 * 109 * @author Andreas Gohr <andi@splitbrain.org> 110 */ 111function spell_check() { 112 global $spell; 113 $string = $_POST['data']; 114 $misspell = false; 115 116 // for streamlined line endings 117 $string = preg_replace("/(\015\012)|(\015)/","\012",$string); 118 $string = htmlspecialchars($string); 119 120 // make sure multiple spaces and leading are kept 121 $string = preg_replace('/^ /m',' ',$string); 122 $string = preg_replace('/ /',' ',$string); 123 124 // we need the text as array later 125 $data = explode("\n",$string); 126 127 // don't check links and medialinks for spelling errors 128 $string = preg_replace_callback('/\{\{(.*?)(\|(.*?))?(\}\})/','spaceslink',$string); 129 $string = preg_replace_callback('/\[\[(.*?)(\|(.*?))?(\]\])/','spaceslink',$string); 130 131 // run aspell in terse sgml mode, ignore nbsp as correct word 132 if(!$spell->runAspell($string,$out,$err,array('!','+html','@nbsp'))){ 133 print '2'; //to indicate an error 134 print "An error occurred while trying to run the spellchecker:\n"; 135 print $err; 136 return; 137 } 138 139 #use this to debug raw aspell output 140 #print "1$out"; return; 141 142 // go through the result 143 $lines = split("\n",$out); 144 $rcnt = count($lines)-1; // aspell result count 145 $lcnt = count($data)+1; // original line counter 146 147 148 for($i=$rcnt; $i>=0; $i--){ 149 $line = trim($lines[$i]); 150 if($line[0] == '@') continue; // comment 151 if($line[0] == '*') continue; // no mistake in this word 152 if($line[0] == '+') continue; // root of word was found 153 if($line[0] == '?') continue; // word was guessed 154 if(empty($line)){ 155 // empty line -> new source line 156 $lcnt--; 157 continue; 158 } 159 // now get the misspelled words 160 if(preg_match('/^& ([^ ]+) (\d+) (\d+): (.*)/',$line,$match)){ 161 // match with suggestions 162 $word = $match[1]; 163 $off = $match[3]-1; 164 $sug = split(', ',$match[4]); 165 }elseif(preg_match('/^# ([^ ]+) (\d+)/',$line,$match)){ 166 // match without suggestions 167 $word = $match[1]; 168 $off = $match[2]-1; 169 $sug = null; 170 }else{ 171 // couldn't parse output 172 print '2'; 173 print "The spellchecker output couldn't be parsed.\n"; 174 print "Line $i:".$line; 175 return; 176 } 177 178 $misspell = true; 179 //aspell < 0.60 returns singlebyte offsets 180 if($spell->version >= 600){ 181 $len = utf8_strlen($word); 182 $data[$lcnt] = utf8_substr_replace($data[$lcnt],spell_formatword($word,$sug),$off, $len); 183 }else{ 184 $len = strlen($word); 185 $data[$lcnt] = substr_replace($data[$lcnt],spell_formatword($word,$sug),$off, $len); 186 } 187 188 }//end of output parsing 189 190 // the first char returns the spell info 191 if($misspell){ 192 $string = '1'.join('<br />',$data); 193 }else{ 194 $string = '0'.join('<br />',$data); 195 } 196 197 if(!$_POST['utf8']){ 198 // encode multibyte chars as entities for broken Konqueror 199 $string = utf8_tohtml($string); 200 } 201 202 //output 203 print $string; 204} 205 206/** 207 * Formats a word with needed markup for the Suggestion Popup 208 * 209 * @author Andreas Gohr <andi@splitbrain.org> 210 */ 211function spell_formatword($word,$suggestions=null){ 212 static $i = 1; 213 214 if(is_array($suggestions)){ 215 //restrict to maximum of 7 elements 216 $suggestions = array_slice($suggestions,0,7); 217 $suggestions = array_map('htmlspecialchars',$suggestions); 218 219 if(!$_POST['utf8']){ 220 //konqueror's broken UTF-8 handling needs this 221 $suggestions = array_map('utf8_tohtml',$suggestions); 222 } 223 224 $suggestions = array_map('addslashes',$suggestions); 225 226 $sug = ",'".join("','",$suggestions)."'"; // build javascript args 227 }else{ 228 $sug = ''; 229 } 230 231 $link = '<a href="javascript:ajax_spell.suggest('.$i.$sug.')" '. 232 'class="spell_error" id="spell__error'.$i.'">'.htmlspecialchars($word).'</a>'; 233 $i++; 234 return $link; 235} 236 237/** 238 * Rewrite markuped XHTML back to plain Text. AJAX callback 239 * 240 * @author Andreas Gohr <andi@splitbrain.org> 241 */ 242function spell_resume(){ 243 $text = $_POST['data']; 244 245 //some browsers insert newlines instead of spaces 246 $text = preg_replace("/(\r\n|\n|\r)/", ' ', $text); 247 $text = preg_replace("=<br */?>=i", "\n", $text); 248 249 // remove HTML tags 250 $text = strip_tags($text); 251 252 // restore spaces 253 $text = preg_replace('/ /',' ',$text); 254 $text = preg_replace('/\xC2\xA0/',' ',$text); 255 256 // restore quoted special chars 257 $text = unhtmlspecialchars($text); 258 259 // check if UTF-8 is accepted 260 if(!$_POST['utf8']){ 261 // protect '&' (gets removed in JS later) 262 $text = str_replace('&','&',$text); 263 // encode multibyte chars as entities for broken Konqueror 264 $text = utf8_tohtml($text); 265 } 266 267 // output 268 print $text; 269} 270 271/** 272 * Just send data back as received for UTF-8 testing 273 */ 274function spell_utf8test(){ 275 // we need to return the raw value - substr protects against XSS 276 print substr($_POST['data'],0,3); 277} 278 279/** 280 * Reverse htmlspecialchars 281 * 282 * @author <donwilson at gmail dot com> 283 * @author Andreas Gohr <andi@splitbrain.org> 284 */ 285function unhtmlspecialchars($string, $quotstyle=ENT_COMPAT){ 286 $string = str_replace ( '&', '&', $string ); 287 $string = str_replace ( '<', '<', $string ); 288 $string = str_replace ( '>', '>', $string ); 289 290 if($quotstyle != ENT_NOQUOTES){ 291 $string = str_replace ( '"', '\"', $string ); 292 } 293 if($quotstyle == ENT_QUOTES){ 294 $string = str_replace ( ''', '\'', $string ); 295 $string = str_replace ( ''', '\'', $string ); 296 } 297 298 return $string; 299} 300 301//Setup VIM: ex: et ts=2 enc=utf-8 : 302?> 303