1 <?php
2 
3 namespace splitbrain\JSStrip;
4 
5 /**
6  * Strip comments and whitespaces from given JavaScript Code
7  *
8  * This is a port of Nick Galbreath's python tool jsstrip.py which is
9  * released under BSD license. See link for original code.
10  *
11  * @author Nick Galbreath <nickg@modp.com>
12  * @author Andreas Gohr <andi@splitbrain.org>
13  * @link   http://code.google.com/p/jsstrip/
14  */
15 class JSStrip
16 {
17 
18     const REGEX_STARTERS = [
19         '(', '=', '<', '>', '?', '[', '{', ',', ';', ':', '!', '&', '|', '+', '-', '%', '~', '^',
20         'return', 'yield', 'else', 'throw', 'await'
21     ];
22     const WHITESPACE_CHARS = [" ", "\t", "\n", "\r", "\0", "\x0B"];
23 
24     /** items that don't need spaces next to them */
25     const CHARS = "^&|!+\-*\/%=\?:;,{}()<>% \t\n\r'\"`[]~^";
26 
27     /**
28      * items which need a space if the sign before and after whitespace is equal.
29      * E.g. '+ ++' may not be compressed to '+++' --> syntax error.
30      */
31     const OPS = "+-/";
32 
33     protected $source;
34     protected $idx = 0;
35     protected $line = 0;
36 
37     /**
38      * Compress the given code
39      *
40      * @param string $source The JavaScript code to compress
41      * @return string
42      * @throws Exception if parsing fails
43      */
44     function compress($source)
45     {
46         $source = ltrim($source);     // strip all initial whitespace
47         $source .= "\n";
48         $idx = 0;             // char index for input string
49 
50         // track these as member variables
51         $this->source = $source;
52         $this->line = 1;
53         $this->idx = &$idx;
54 
55         $j = 0;             // char forward index for input string
56         $slen = strlen($source); // size of input string
57         $lch = '';         // last char added
58         $result = '';       // we store the final result here
59 
60 
61         while ($idx < $slen) {
62             // skip all "boring" characters.  This is either
63             // reserved word (e.g. "for", "else", "if") or a
64             // variable/object/method (e.g. "foo.color")
65             while ($idx < $slen && (strpos(self::CHARS, $source[$idx]) === false)) {
66                 $result .= $source[$idx];
67                 $idx = $idx + 1;
68             }
69 
70             $ch = $source[$idx];
71             // multiline comments (keeping IE conditionals)
72             if ($ch == '/' && $source[$idx + 1] == '*' && $source[$idx + 2] != '@') {
73                 $endC = strpos($source, '*/', $idx + 2);
74                 if ($endC === false) $this->fatal('Found invalid /*..*/ comment');
75 
76                 // check if this is a NOCOMPRESS comment
77                 if (substr($source, $idx, $endC + 2 - $idx) == '/* BEGIN NOCOMPRESS */') {
78                     // take nested NOCOMPRESS comments into account
79                     $depth = 0;
80                     $nextNC = $endC;
81                     do {
82                         $beginNC = strpos($source, '/* BEGIN NOCOMPRESS */', $nextNC + 2);
83                         $endNC = strpos($source, '/* END NOCOMPRESS */', $nextNC + 2);
84 
85                         if ($endNC === false) $this->fatal('Found invalid NOCOMPRESS comment');
86                         if ($beginNC !== false && $beginNC < $endNC) {
87                             $depth++;
88                             $nextNC = $beginNC;
89                         } else {
90                             $depth--;
91                             $nextNC = $endNC;
92                         }
93                     } while ($depth >= 0);
94 
95                     // verbatim copy contents, trimming but putting it on its own line
96                     $result .= "\n" . trim(substr($source, $idx + 22, $endNC - ($idx + 22))) . "\n"; // BEGIN comment = 22 chars
97                     $idx = $endNC + 20; // END comment = 20 chars
98                 } else {
99                     $idx = $endC + 2;
100                 }
101                 continue;
102             }
103 
104             // singleline
105             if ($ch == '/' && $source[$idx + 1] == '/') {
106                 $endC = strpos($source, "\n", $idx + 2);
107                 if ($endC === false) $this->fatal('Invalid comment'); // not sure this can happen
108                 $idx = $endC;
109                 continue;
110             }
111 
112             // tricky.  might be an RE
113             if ($ch == '/') {
114                 // rewind, skip white space
115                 $j = 1;
116                 while (in_array($source[$idx - $j], self::WHITESPACE_CHARS)) {
117                     $j = $j + 1;
118                 }
119                 if (current(array_filter(
120                     self::REGEX_STARTERS,
121                     function ($e) use ($source, $idx, $j) {
122                         $len = strlen($e);
123                         $idx = $idx - $j + 1 - $len;
124                         return substr($source, $idx, $len) === $e;
125                     }
126                 ))) {
127                     // yes, this is an re
128                     // now move forward and find the end of it
129                     $j = 1;
130                     // we set this flag when inside a character class definition, enclosed by brackets [] where '/' does not terminate the re
131                     $ccd = false;
132                     while ($ccd || $source[$idx + $j] != '/') {
133                         if ($source[$idx + $j] == '\\') $j = $j + 2;
134                         else {
135                             $j++;
136                             // check if we entered/exited a character class definition and set flag accordingly
137                             if ($source[$idx + $j - 1] == '[') $ccd = true;
138                             else if ($source[$idx + $j - 1] == ']') $ccd = false;
139                         }
140                     }
141                     $result .= substr($source, $idx, $j + 1);
142                     $idx = $idx + $j + 1;
143                     continue;
144                 }
145             }
146 
147             // double quote strings
148             if ($ch == '"') {
149                 $j = 1;
150                 while (($idx + $j < $slen) && $source[$idx + $j] != '"') {
151                     if ($source[$idx + $j] == '\\' && ($source[$idx + $j + 1] == '"' || $source[$idx + $j + 1] == '\\')) {
152                         $j += 2;
153                     } else {
154                         $j += 1;
155                     }
156                 }
157                 $string = substr($source, $idx, $j + 1);
158                 // remove multiline markers:
159                 $string = str_replace("\\\n", '', $string);
160                 $result .= $string;
161                 $idx = $idx + $j + 1;
162                 continue;
163             }
164 
165             // single quote strings
166             if ($ch == "'") {
167                 $j = 1;
168                 while (($idx + $j < $slen) && $source[$idx + $j] != "'") {
169                     if ($source[$idx + $j] == '\\' && ($source[$idx + $j + 1] == "'" || $source[$idx + $j + 1] == '\\')) {
170                         $j += 2;
171                     } else {
172                         $j += 1;
173                     }
174                 }
175                 $string = substr($source, $idx, $j + 1);
176                 // remove multiline markers:
177                 $string = str_replace("\\\n", '', $string);
178                 $result .= $string;
179                 $idx = $idx + $j + 1;
180                 continue;
181             }
182 
183             // backtick strings
184             if ($ch == "`") {
185                 $j = 1;
186                 while (($idx + $j < $slen) && $source[$idx + $j] != "`") {
187                     if ($source[$idx + $j] == '\\' && ($source[$idx + $j + 1] == "`" || $source[$idx + $j + 1] == '\\')) {
188                         $j += 2;
189                     } else {
190                         $j += 1;
191                     }
192                 }
193                 $string = substr($source, $idx, $j + 1);
194                 // remove multiline markers:
195                 $string = str_replace("\\\n", '', $string);
196                 $result .= $string;
197                 $idx = $idx + $j + 1;
198                 continue;
199             }
200 
201             // whitespaces
202             if ($ch == ' ' || $ch == "\r" || $ch == "\n" || $ch == "\t") {
203                 $lch = substr($result, -1);
204                 if ($ch == "\n") $this->line++;
205 
206                 // Only consider deleting whitespace if the signs before and after
207                 // are not equal and are not an operator which may not follow itself.
208                 if ($idx + 1 < $slen && ((!$lch || $source[$idx + 1] == ' ')
209                         || $lch != $source[$idx + 1]
210                         || strpos(self::OPS, $source[$idx + 1]) === false)) {
211                     // leading spaces
212                     if ($idx + 1 < $slen && (strpos(self::CHARS, $source[$idx + 1]) !== false)) {
213                         $idx = $idx + 1;
214                         continue;
215                     }
216                     // trailing spaces
217                     //  if this ch is space AND the last char processed
218                     //  is special, then skip the space
219                     if ($lch && (strpos(self::CHARS, $lch) !== false)) {
220                         $idx = $idx + 1;
221                         continue;
222                     }
223                 }
224 
225                 // else after all of this convert the "whitespace" to
226                 // a single space.  It will get appended below
227                 $ch = ' ';
228             }
229 
230             // other chars
231             $result .= $ch;
232             $idx = $idx + 1;
233         }
234 
235         return trim($result);
236     }
237 
238     /**
239      * Helper to throw a fatal error
240      *
241      * Tries to give some context to locate the error
242      *
243      * @param string $msg
244      * @throws Exception
245      */
246     protected function fatal($msg)
247     {
248         $before = substr($this->source, max(0, $this->idx - 15), $this->idx);
249         $after = substr($this->source, $this->idx, 15);
250 
251         $msg = "$msg on line {$this->line}: '{$before}{$after}'";
252         throw new Exception($msg);
253     }
254 }
255