1*af28745aSAndreas Gohr<?php 2*af28745aSAndreas Gohr 3*af28745aSAndreas Gohrnamespace splitbrain\JSStrip; 4*af28745aSAndreas Gohr 5*af28745aSAndreas Gohr/** 6*af28745aSAndreas Gohr * Strip comments and whitespaces from given JavaScript Code 7*af28745aSAndreas Gohr * 8*af28745aSAndreas Gohr * This is a port of Nick Galbreath's python tool jsstrip.py which is 9*af28745aSAndreas Gohr * released under BSD license. See link for original code. 10*af28745aSAndreas Gohr * 11*af28745aSAndreas Gohr * @author Nick Galbreath <nickg@modp.com> 12*af28745aSAndreas Gohr * @author Andreas Gohr <andi@splitbrain.org> 13*af28745aSAndreas Gohr * @link http://code.google.com/p/jsstrip/ 14*af28745aSAndreas Gohr */ 15*af28745aSAndreas Gohrclass JSStrip 16*af28745aSAndreas Gohr{ 17*af28745aSAndreas Gohr 18*af28745aSAndreas Gohr const REGEX_STARTERS = [ 19*af28745aSAndreas Gohr '(', '=', '<', '>', '?', '[', '{', ',', ';', ':', '!', '&', '|', '+', '-', '%', '~', '^', 20*af28745aSAndreas Gohr 'return', 'yield', 'else', 'throw', 'await' 21*af28745aSAndreas Gohr ]; 22*af28745aSAndreas Gohr const WHITESPACE_CHARS = [" ", "\t", "\n", "\r", "\0", "\x0B"]; 23*af28745aSAndreas Gohr 24*af28745aSAndreas Gohr /** items that don't need spaces next to them */ 25*af28745aSAndreas Gohr const CHARS = "^&|!+\-*\/%=\?:;,{}()<>% \t\n\r'\"`[]~^"; 26*af28745aSAndreas Gohr 27*af28745aSAndreas Gohr /** 28*af28745aSAndreas Gohr * items which need a space if the sign before and after whitespace is equal. 29*af28745aSAndreas Gohr * E.g. '+ ++' may not be compressed to '+++' --> syntax error. 30*af28745aSAndreas Gohr */ 31*af28745aSAndreas Gohr const OPS = "+-/"; 32*af28745aSAndreas Gohr 33*af28745aSAndreas Gohr protected $source; 34*af28745aSAndreas Gohr protected $idx = 0; 35*af28745aSAndreas Gohr protected $line = 0; 36*af28745aSAndreas Gohr 37*af28745aSAndreas Gohr /** 38*af28745aSAndreas Gohr * Compress the given code 39*af28745aSAndreas Gohr * 40*af28745aSAndreas Gohr * @param string $source The JavaScript code to compress 41*af28745aSAndreas Gohr * @return string 42*af28745aSAndreas Gohr * @throws Exception if parsing fails 43*af28745aSAndreas Gohr */ 44*af28745aSAndreas Gohr function compress($source) 45*af28745aSAndreas Gohr { 46*af28745aSAndreas Gohr $source = ltrim($source); // strip all initial whitespace 47*af28745aSAndreas Gohr $source .= "\n"; 48*af28745aSAndreas Gohr $idx = 0; // char index for input string 49*af28745aSAndreas Gohr 50*af28745aSAndreas Gohr // track these as member variables 51*af28745aSAndreas Gohr $this->source = $source; 52*af28745aSAndreas Gohr $this->line = 1; 53*af28745aSAndreas Gohr $this->idx = &$idx; 54*af28745aSAndreas Gohr 55*af28745aSAndreas Gohr $j = 0; // char forward index for input string 56*af28745aSAndreas Gohr $slen = strlen($source); // size of input string 57*af28745aSAndreas Gohr $lch = ''; // last char added 58*af28745aSAndreas Gohr $result = ''; // we store the final result here 59*af28745aSAndreas Gohr 60*af28745aSAndreas Gohr 61*af28745aSAndreas Gohr while ($idx < $slen) { 62*af28745aSAndreas Gohr // skip all "boring" characters. This is either 63*af28745aSAndreas Gohr // reserved word (e.g. "for", "else", "if") or a 64*af28745aSAndreas Gohr // variable/object/method (e.g. "foo.color") 65*af28745aSAndreas Gohr while ($idx < $slen && (strpos(self::CHARS, $source[$idx]) === false)) { 66*af28745aSAndreas Gohr $result .= $source[$idx]; 67*af28745aSAndreas Gohr $idx = $idx + 1; 68*af28745aSAndreas Gohr } 69*af28745aSAndreas Gohr 70*af28745aSAndreas Gohr $ch = $source[$idx]; 71*af28745aSAndreas Gohr // multiline comments (keeping IE conditionals) 72*af28745aSAndreas Gohr if ($ch == '/' && $source[$idx + 1] == '*' && $source[$idx + 2] != '@') { 73*af28745aSAndreas Gohr $endC = strpos($source, '*/', $idx + 2); 74*af28745aSAndreas Gohr if ($endC === false) $this->fatal('Found invalid /*..*/ comment'); 75*af28745aSAndreas Gohr 76*af28745aSAndreas Gohr // check if this is a NOCOMPRESS comment 77*af28745aSAndreas Gohr if (substr($source, $idx, $endC + 2 - $idx) == '/* BEGIN NOCOMPRESS */') { 78*af28745aSAndreas Gohr // take nested NOCOMPRESS comments into account 79*af28745aSAndreas Gohr $depth = 0; 80*af28745aSAndreas Gohr $nextNC = $endC; 81*af28745aSAndreas Gohr do { 82*af28745aSAndreas Gohr $beginNC = strpos($source, '/* BEGIN NOCOMPRESS */', $nextNC + 2); 83*af28745aSAndreas Gohr $endNC = strpos($source, '/* END NOCOMPRESS */', $nextNC + 2); 84*af28745aSAndreas Gohr 85*af28745aSAndreas Gohr if ($endNC === false) $this->fatal('Found invalid NOCOMPRESS comment'); 86*af28745aSAndreas Gohr if ($beginNC !== false && $beginNC < $endNC) { 87*af28745aSAndreas Gohr $depth++; 88*af28745aSAndreas Gohr $nextNC = $beginNC; 89*af28745aSAndreas Gohr } else { 90*af28745aSAndreas Gohr $depth--; 91*af28745aSAndreas Gohr $nextNC = $endNC; 92*af28745aSAndreas Gohr } 93*af28745aSAndreas Gohr } while ($depth >= 0); 94*af28745aSAndreas Gohr 95*af28745aSAndreas Gohr // verbatim copy contents, trimming but putting it on its own line 96*af28745aSAndreas Gohr $result .= "\n" . trim(substr($source, $idx + 22, $endNC - ($idx + 22))) . "\n"; // BEGIN comment = 22 chars 97*af28745aSAndreas Gohr $idx = $endNC + 20; // END comment = 20 chars 98*af28745aSAndreas Gohr } else { 99*af28745aSAndreas Gohr $idx = $endC + 2; 100*af28745aSAndreas Gohr } 101*af28745aSAndreas Gohr continue; 102*af28745aSAndreas Gohr } 103*af28745aSAndreas Gohr 104*af28745aSAndreas Gohr // singleline 105*af28745aSAndreas Gohr if ($ch == '/' && $source[$idx + 1] == '/') { 106*af28745aSAndreas Gohr $endC = strpos($source, "\n", $idx + 2); 107*af28745aSAndreas Gohr if ($endC === false) $this->fatal('Invalid comment'); // not sure this can happen 108*af28745aSAndreas Gohr $idx = $endC; 109*af28745aSAndreas Gohr continue; 110*af28745aSAndreas Gohr } 111*af28745aSAndreas Gohr 112*af28745aSAndreas Gohr // tricky. might be an RE 113*af28745aSAndreas Gohr if ($ch == '/') { 114*af28745aSAndreas Gohr // rewind, skip white space 115*af28745aSAndreas Gohr $j = 1; 116*af28745aSAndreas Gohr while (in_array($source[$idx - $j], self::WHITESPACE_CHARS)) { 117*af28745aSAndreas Gohr $j = $j + 1; 118*af28745aSAndreas Gohr } 119*af28745aSAndreas Gohr if (current(array_filter( 120*af28745aSAndreas Gohr self::REGEX_STARTERS, 121*af28745aSAndreas Gohr function ($e) use ($source, $idx, $j) { 122*af28745aSAndreas Gohr $len = strlen($e); 123*af28745aSAndreas Gohr $idx = $idx - $j + 1 - $len; 124*af28745aSAndreas Gohr return substr($source, $idx, $len) === $e; 125*af28745aSAndreas Gohr } 126*af28745aSAndreas Gohr ))) { 127*af28745aSAndreas Gohr // yes, this is an re 128*af28745aSAndreas Gohr // now move forward and find the end of it 129*af28745aSAndreas Gohr $j = 1; 130*af28745aSAndreas Gohr // we set this flag when inside a character class definition, enclosed by brackets [] where '/' does not terminate the re 131*af28745aSAndreas Gohr $ccd = false; 132*af28745aSAndreas Gohr while ($ccd || $source[$idx + $j] != '/') { 133*af28745aSAndreas Gohr if ($source[$idx + $j] == '\\') $j = $j + 2; 134*af28745aSAndreas Gohr else { 135*af28745aSAndreas Gohr $j++; 136*af28745aSAndreas Gohr // check if we entered/exited a character class definition and set flag accordingly 137*af28745aSAndreas Gohr if ($source[$idx + $j - 1] == '[') $ccd = true; 138*af28745aSAndreas Gohr else if ($source[$idx + $j - 1] == ']') $ccd = false; 139*af28745aSAndreas Gohr } 140*af28745aSAndreas Gohr } 141*af28745aSAndreas Gohr $result .= substr($source, $idx, $j + 1); 142*af28745aSAndreas Gohr $idx = $idx + $j + 1; 143*af28745aSAndreas Gohr continue; 144*af28745aSAndreas Gohr } 145*af28745aSAndreas Gohr } 146*af28745aSAndreas Gohr 147*af28745aSAndreas Gohr // double quote strings 148*af28745aSAndreas Gohr if ($ch == '"') { 149*af28745aSAndreas Gohr $j = 1; 150*af28745aSAndreas Gohr while (($idx + $j < $slen) && $source[$idx + $j] != '"') { 151*af28745aSAndreas Gohr if ($source[$idx + $j] == '\\' && ($source[$idx + $j + 1] == '"' || $source[$idx + $j + 1] == '\\')) { 152*af28745aSAndreas Gohr $j += 2; 153*af28745aSAndreas Gohr } else { 154*af28745aSAndreas Gohr $j += 1; 155*af28745aSAndreas Gohr } 156*af28745aSAndreas Gohr } 157*af28745aSAndreas Gohr $string = substr($source, $idx, $j + 1); 158*af28745aSAndreas Gohr // remove multiline markers: 159*af28745aSAndreas Gohr $string = str_replace("\\\n", '', $string); 160*af28745aSAndreas Gohr $result .= $string; 161*af28745aSAndreas Gohr $idx = $idx + $j + 1; 162*af28745aSAndreas Gohr continue; 163*af28745aSAndreas Gohr } 164*af28745aSAndreas Gohr 165*af28745aSAndreas Gohr // single quote strings 166*af28745aSAndreas Gohr if ($ch == "'") { 167*af28745aSAndreas Gohr $j = 1; 168*af28745aSAndreas Gohr while (($idx + $j < $slen) && $source[$idx + $j] != "'") { 169*af28745aSAndreas Gohr if ($source[$idx + $j] == '\\' && ($source[$idx + $j + 1] == "'" || $source[$idx + $j + 1] == '\\')) { 170*af28745aSAndreas Gohr $j += 2; 171*af28745aSAndreas Gohr } else { 172*af28745aSAndreas Gohr $j += 1; 173*af28745aSAndreas Gohr } 174*af28745aSAndreas Gohr } 175*af28745aSAndreas Gohr $string = substr($source, $idx, $j + 1); 176*af28745aSAndreas Gohr // remove multiline markers: 177*af28745aSAndreas Gohr $string = str_replace("\\\n", '', $string); 178*af28745aSAndreas Gohr $result .= $string; 179*af28745aSAndreas Gohr $idx = $idx + $j + 1; 180*af28745aSAndreas Gohr continue; 181*af28745aSAndreas Gohr } 182*af28745aSAndreas Gohr 183*af28745aSAndreas Gohr // backtick strings 184*af28745aSAndreas Gohr if ($ch == "`") { 185*af28745aSAndreas Gohr $j = 1; 186*af28745aSAndreas Gohr while (($idx + $j < $slen) && $source[$idx + $j] != "`") { 187*af28745aSAndreas Gohr if ($source[$idx + $j] == '\\' && ($source[$idx + $j + 1] == "`" || $source[$idx + $j + 1] == '\\')) { 188*af28745aSAndreas Gohr $j += 2; 189*af28745aSAndreas Gohr } else { 190*af28745aSAndreas Gohr $j += 1; 191*af28745aSAndreas Gohr } 192*af28745aSAndreas Gohr } 193*af28745aSAndreas Gohr $string = substr($source, $idx, $j + 1); 194*af28745aSAndreas Gohr // remove multiline markers: 195*af28745aSAndreas Gohr $string = str_replace("\\\n", '', $string); 196*af28745aSAndreas Gohr $result .= $string; 197*af28745aSAndreas Gohr $idx = $idx + $j + 1; 198*af28745aSAndreas Gohr continue; 199*af28745aSAndreas Gohr } 200*af28745aSAndreas Gohr 201*af28745aSAndreas Gohr // whitespaces 202*af28745aSAndreas Gohr if ($ch == ' ' || $ch == "\r" || $ch == "\n" || $ch == "\t") { 203*af28745aSAndreas Gohr $lch = substr($result, -1); 204*af28745aSAndreas Gohr if ($ch == "\n") $this->line++; 205*af28745aSAndreas Gohr 206*af28745aSAndreas Gohr // Only consider deleting whitespace if the signs before and after 207*af28745aSAndreas Gohr // are not equal and are not an operator which may not follow itself. 208*af28745aSAndreas Gohr if ($idx + 1 < $slen && ((!$lch || $source[$idx + 1] == ' ') 209*af28745aSAndreas Gohr || $lch != $source[$idx + 1] 210*af28745aSAndreas Gohr || strpos(self::OPS, $source[$idx + 1]) === false)) { 211*af28745aSAndreas Gohr // leading spaces 212*af28745aSAndreas Gohr if ($idx + 1 < $slen && (strpos(self::CHARS, $source[$idx + 1]) !== false)) { 213*af28745aSAndreas Gohr $idx = $idx + 1; 214*af28745aSAndreas Gohr continue; 215*af28745aSAndreas Gohr } 216*af28745aSAndreas Gohr // trailing spaces 217*af28745aSAndreas Gohr // if this ch is space AND the last char processed 218*af28745aSAndreas Gohr // is special, then skip the space 219*af28745aSAndreas Gohr if ($lch && (strpos(self::CHARS, $lch) !== false)) { 220*af28745aSAndreas Gohr $idx = $idx + 1; 221*af28745aSAndreas Gohr continue; 222*af28745aSAndreas Gohr } 223*af28745aSAndreas Gohr } 224*af28745aSAndreas Gohr 225*af28745aSAndreas Gohr // else after all of this convert the "whitespace" to 226*af28745aSAndreas Gohr // a single space. It will get appended below 227*af28745aSAndreas Gohr $ch = ' '; 228*af28745aSAndreas Gohr } 229*af28745aSAndreas Gohr 230*af28745aSAndreas Gohr // other chars 231*af28745aSAndreas Gohr $result .= $ch; 232*af28745aSAndreas Gohr $idx = $idx + 1; 233*af28745aSAndreas Gohr } 234*af28745aSAndreas Gohr 235*af28745aSAndreas Gohr return trim($result); 236*af28745aSAndreas Gohr } 237*af28745aSAndreas Gohr 238*af28745aSAndreas Gohr /** 239*af28745aSAndreas Gohr * Helper to throw a fatal error 240*af28745aSAndreas Gohr * 241*af28745aSAndreas Gohr * Tries to give some context to locate the error 242*af28745aSAndreas Gohr * 243*af28745aSAndreas Gohr * @param string $msg 244*af28745aSAndreas Gohr * @throws Exception 245*af28745aSAndreas Gohr */ 246*af28745aSAndreas Gohr protected function fatal($msg) 247*af28745aSAndreas Gohr { 248*af28745aSAndreas Gohr $before = substr($this->source, max(0, $this->idx - 15), $this->idx); 249*af28745aSAndreas Gohr $after = substr($this->source, $this->idx, 15); 250*af28745aSAndreas Gohr 251*af28745aSAndreas Gohr $msg = "$msg on line {$this->line}: '{$before}◎{$after}'"; 252*af28745aSAndreas Gohr throw new Exception($msg); 253*af28745aSAndreas Gohr } 254*af28745aSAndreas Gohr} 255