1<?php
2/**
3 * PyCode plugin: it embeds a Python script hosted in a remote repository.
4 *
5 * method.php: it defines all the common methods
6 *      used by the PyCode plugin.
7 *
8 * @author Torpedo <dgtorpedo@gmail.com>
9 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
10 * @package method
11 */
12
13 /**
14 * This class defines all the methods used by the PyCode plugin to handle
15 * the code to embed.
16 *
17 * @package method_pycode
18 */
19class method_pycode_plugin {
20
21    /**
22     * It gets the code to embed.
23     *
24     * @param (str) $url the url to <file>
25     * @param (str) $name if specified, it can be:
26     *              name-lines|function|class
27     *              where name-lines is of the form from:to, that is:
28     *              :<#>     from the beginning to line <#>
29     *              <#>:<#>  from line <#> to line <#>
30     *              :        all
31     * @param (str) $subname if specified, it can be only:
32     *              name-class
33     *              at which the function belongs to
34     * @param (str) $flag flag to indicate what kind of code we want to embed:
35     *              "" = all | "c" = class | "f" = function | "l" = lines
36     * @return (arr) $code the code of <file> to embed
37     * @return (int) $sl the number of the starting line of the code
38     * @return (int) $el the number of the ending line of the code
39     * @return (int) $pt the position of the first type after indentation
40     */
41    public function _get_code($url, $flag = null, $name = null, $subname = null) {
42        $code_all = array();
43        $code = array();
44        $str_cls = "class ";
45        $str_def = "def ";
46        $ind_def = null;
47        $ind_cls = null;
48        $sl = null;
49        $el = null;
50
51        try {
52            // get the whole code of <file>
53            $opts = array(
54                "http" => array(
55                    "header" => "Cache-Control: no-cache, no-store, must-revalidate"
56                    )
57                );
58            $context = stream_context_create($opts);
59            $code_all = @file($url, FILE_IGNORE_NEW_LINES, $context);
60            if ($code_all == false) {
61                throw new Exception("error");
62            }
63
64            // take only the code of interest
65            if ($flag == "") {
66                $code = $code_all;
67                $sl = 1;
68                $el = count($code);
69            }
70            elseif ($flag == "l") {
71                $name = explode(":", $name);  // array {[0] => from, [1] => to}
72                if ($name[0] == "" and $name[1] == "") {
73                    $sl = 1;
74                    $el = $name[1];
75                }
76                else {
77                    $sl = $name[0];
78                    $el = $name[1];
79                }
80                if ($sl !== null and $el !== null) {
81                    $code = array_slice($code_all, $sl - 1, $el - $sl + 1);
82                }
83                else {
84                    throw new Exception("notfound-lns");
85                }
86            }
87            elseif ($flag == "f") {
88                foreach ($code_all as $num => $line) {
89                    $length = strlen($line);
90                    $str = ltrim($line);
91                    if ($subname !== null and $ind_cls === null and
92                        strpos($str, $str_cls . $subname . "(") !== 0 and
93                        strpos($str, $str_cls . $subname. ":") !== 0) {
94                        continue;  // skip this line
95                    }
96                    elseif ($subname !== null and $ind_cls === null and
97                        (strpos($str, $str_cls . $subname . "(") === 0 or
98                        strpos($str, $str_cls . $subname. ":") === 0)) {
99                        $ind_cls = $length - strlen($str);
100                    }
101                    elseif (strpos($str, $str_def . $name . "(") === 0) {
102                        $ind_def = $length - $ind_cls - strlen($str);
103                        if ($subname !== null and $ind_cls + $ind_def <= $ind_cls) {
104                            break;
105                        }
106                        else {
107                            $sl = $num + 1;
108                            array_push($code, $line);
109                        }
110                    }
111                    elseif ($ind_def !== null) {
112                        // collapse indentation outside the function till 1st
113                        // character next and test if empty or #:
114                        //
115                        //     class C(object):
116                        //         def f(x):
117                        // ^^^^^^^^^
118                        // $str = "d"
119                        $str = trim(substr($line, 0, $ind_cls + $ind_def + 1));
120                        if ((strlen($str) != 0 and $str[0] != "#")) {
121                            $el = $num;  // the previous line is the end line
122                            break;
123                        }
124                        elseif (strlen($str) == 0 and count($code_all) != $num + 1) {
125                            array_push($code, $line);
126                        }
127                        elseif (strlen($str) == 0 and count($code_all) == $num + 1) {
128                            $el = $num + 1;  // the current line is the end line
129                            array_push($code, $line);
130                            break;
131                        }
132                    }
133                }
134                if ($subname !== null and $ind_cls === null) {
135                    throw new Exception("notfound-cls");
136                }
137                elseif ($ind_def === null) {
138                    throw new Exception("notfound-def");
139                }
140            }
141            elseif ($flag == "c") {
142                foreach ($code_all as $num => $line) {
143                    $length = strlen($line);
144                    $str = ltrim($line);
145                    if (strpos($str, $str_cls . $name . "(") === 0 or
146                        strpos($str, $str_cls . $name. ":") === 0) {
147                        $sl = $num + 1;
148                        $ind_cls = $length - strlen(ltrim($line));
149                        array_push($code, $line);
150                    }
151                    elseif ($ind_cls !== null) {
152                        // collapse indentation outside the class till 1st
153                        // character next and test if empty or #:
154                        //
155                        //     class C(object):
156                        // ^^^^^
157                        // $str = "c"
158                        $str = trim(substr($line, 0, $ind_cls + 1));
159                        if ((strlen($str) != 0 and $str[0] != "#")) {
160                            $el = $num;  // the previous line is the end line
161                            break;
162                        }
163                        elseif (strlen($str) == 0 and count($code_all) != $num + 1) {
164                            array_push($code, $line);
165                        }
166                        elseif (strlen($str) == 0 and count($code_all) == $num + 1) {
167                            $el = $num + 1;  // the current line is the end line
168                            array_push($code, $line);
169                            break;
170                        }
171                    }
172                }
173                if ($ind_cls === null) {
174                    throw new Exception("notfound-cls");
175                }
176            }
177        }
178
179        catch (Exception $error) {
180            $code = $error->getMessage();  // returns the error message
181        }
182
183        return array($code, $sl, $el);
184    }
185
186    /**
187     * It gets the docstring.
188     *
189     * @param (arr) $code the code of <file> to embed
190     * @param (str) $name it can be:
191     *              name-class|function
192     * @flag (str) $flag it indicates if we are dealing with a function or class
193     *             and can be only:
194     *             "c" = class | "f" = function
195     * @return (arr) $brief only the descriptive part of the docstring
196     */
197    public function _get_docstr($code, $name = null, $flag = null) {
198        $brief = array();
199        $docstr = array();
200        $str_cls = "class ";
201        $str_def = "def ";
202        $triple = '"""';
203
204        if ($flag == "c") {
205            $str = $str_cls;
206        }
207        elseif ($flag == "f") {
208            $str = $str_def;
209        }
210
211        // get only the docstring
212        $i = 0;  // it becomes 1 when the pointer is inside the code of name
213        $j = 0;  // it becomes 1 when the pointer is inside multiline docstring
214        foreach ($code as $line) {
215            $trimmed = trim($line);
216            if (strpos($trimmed, $str . $name . "(") === 0 or
217                strpos($trimmed, $str . $name. ":") === 0) {
218                $i = 1;  // catch only docstring associated with name
219            }
220            elseif ($i == 1) {
221                if (strpos($trimmed, "#") === false) {
222                    $re = "/(class|def)\s+(\S+?)(?=\(|\:)(.*)(?=\:)/";
223                    if (preg_match($re, $trimmed, $matches) === 1) {
224                        $i = 0;  // from here forward starts another code
225                        break;
226                    }
227                    // check for inline docstring
228                    elseif (substr_count($trimmed, $triple) == 2 and $j == 0) {
229                        array_push($docstr, $line);
230                        break;
231                    }
232                    // check for the begenning of multiline docstring
233                    elseif (substr_count($trimmed, $triple) == 1 and $j == 0) {
234                        array_push($docstr, $line);
235                        $j = 1;
236                    }
237                    // check for a string inside a multiline docstring
238                    elseif (substr_count($trimmed, $triple) == 0 and $j == 1) {
239                        array_push($docstr, $line);
240                    }
241                    // check for the end of multilene docstring
242                    elseif (substr_count($trimmed, $triple) == 1 and $j == 1) {
243                        array_push($docstr, $line);
244                        break;
245                    }
246                }
247            }
248        }
249
250        if (empty($docstr) == false) {
251            // remove indentation around the docstring
252            // but preserve internal indentation
253            $ind = strpos($docstr[0], $triple);
254            foreach ($docstr as $key => $val) {
255                $docstr[$key] = substr_replace($val, "", 0, $ind);
256            }
257            // remove triple from first line
258            $docstr[0] = trim($docstr[0], '"');
259            if (strlen($docstr[0]) != 0) {
260                array_push($brief, $docstr[0]);
261            }
262            // strip the brief from the docstring
263            foreach ($docstr as $key => $line) {
264                if ($key == 0) {
265                    continue;
266                }
267                $length = strlen($line);
268                $indent = $length - strlen(ltrim($line));
269                $dashed = preg_match("/[-]+/", $line, $matches);
270                // reST style
271                if (substr($line, 0, 1) == ":") {
272                    array_pop($brief);  // last one doesn't belong to brief
273                    break;
274                }
275                // javadoc style
276                elseif (substr($line, 0, 1) == "@") {
277                    array_pop($brief);  // last one doesn't belong to brief
278                    break;
279                }
280                // google style
281                elseif ($indent != $length and ctype_space(substr($line, 0, $indent)) == true) {
282                    $brief = array_slice($brief, 0, -2);  // last two don't belong to brief
283                    break;
284                }
285                // numpy style
286                elseif ($dashed === 1 and strlen($matches[0]) == $length) {
287                    $brief = array_slice($brief, 0, -2);  // last two don't belong to brief
288                    break;
289                }
290                elseif ($line == $triple) {
291                    break;
292                }
293                array_push($brief, $line);
294            }
295            // in case, remove triple from last line
296            $brief[count($brief) - 1] = rtrim($brief[count($brief) - 1], '"');
297            // rewrite the array containing the docstrings
298            $docstr = $brief;
299        }
300
301        return $docstr;
302    }
303
304    /**
305     * It removes the GeSHi's wrapper element because it will replaced with
306     * the DokuWiki's one.
307     *
308     * This function is based on p_xhtml_cached_geshi() defined in
309     * inc/parserutils.php but which has the problem that removes from the
310     * code any leading or trailing blank lines and dosn't remove space
311     * character (&nbsp;) when there is an empty line.
312     *
313     * Note that before DokuWiki version "Detritus" the GeSHi library was in
314     * inc/geshi
315     *
316     * @param (str) $code the code of <file> to embed
317     * @param (str) $lang the language name used in <file>
318     * @return (str) $geshi_code the code cleaned
319     */
320    public function _get_geshi_code($code, $lang) {
321        if (file_exists(DOKU_INC . "inc/geshi.php") == true) {
322            $lib_geshi = DOKU_INC . "inc/geshi";
323        }
324        else {
325            $lib_geshi = DOKU_INC . "vendor/easybook/geshi/geshi";
326        }
327        $geshi = new GeSHi($code, $lang, $lib_geshi);
328        $geshi->set_encoding("utf-8");
329        $geshi->enable_classes();
330        $geshi->set_header_type(GESHI_HEADER_PRE);
331
332        $geshi_code = trim(preg_replace('!^<pre[^>]*>|</pre>$!','',$geshi->parse_code()),"\n\r");
333        $geshi_code = str_replace(array("&nbsp;\n", "&nbsp;"), "\n", $geshi_code);
334        return $geshi_code;
335    }
336
337    /**
338     * It gets the tree structure of a given class defined in <file>.
339     *
340     * @param (arr) $code_cls the code of a given class defined in <file>
341     *              To get the name of the class or the methods is used the
342     *              following regex:
343     *              (.*)        match any character, from zero to
344     *                          infinite times, as many times as poosible
345     *              (class|def) match the string "class" or "def"
346     *              \s+         match any white space character, from one to
347     *                          infinite times, as many times as poosible
348     *              (\S+?)      match any non-white space character, from one
349     *                          to infinite times, as few times as poosible
350     *              (?=\(|\:)   stop previous matching until it finds ( or :
351     *              (.*)        match any character, from zero to
352     *                          infinite times, as many times as poosible
353     *              (?=\:)      stop previous matching until it finds :
354     *              The result is the following array:
355     *              array {
356     *              [0] => (str) "class|def <name-class|function>(<parameters>):"
357     *              [1] => (str) ""
358     *              [2] => (str) "class|def"
359     *              [3] => (str) <name-class|function>
360     *              [4] => (str) "(<parameters>)"
361     *              }
362     * @return (arr) $tree the name of a class and its methods, that is:
363     *              array {
364     *              [0] => array {
365     *                  [0] => (str) "class <name-class>(<parameters>)"
366     *                  [1] => (str) <name-class>
367     *                  [2] => (int) 0
368     *                  }
369     *              [i] => array {
370     *                  [0] => (str) "def <name-function>(<parameters>)"
371     *                  [1] => (str) <name-function>
372     *                  [2] => (int) 0
373     *                  }
374     *              }
375     *              where 0 means "unchanged"
376     */
377    public function _get_tree($code_cls) {
378        $tree = array();
379
380        foreach ($code_cls as $line) {
381            $re = "/(.*)(class|def)\s+(\S+?)(?=\(|\:)(.*)(?=\:)/";
382            if (preg_match($re, $line, $matches) === 1) {
383                if (strpos(trim($matches[1]), "#") === false) {
384                    array_push($tree, array(trim($matches[0],": \t\n\r"), $matches[3], 0));
385                }
386            }
387        }
388
389        return $tree;
390    }
391
392    /**
393     * It checks if the code got from the local copy of <file>
394     * (which might not up-to-date) is different from the code of <file>,
395     * hosted in the repository (which is up-to-date).
396     * The code can be:
397     *       all | between two given lines | of a given function
398     *
399     * @param (arr) $code_new the code got from the <file> hosted in the repo
400     * @param (arr) $code_old the code got from the local copy of <file>
401     * @return (str) $code_dif a value which indicates if the code is
402     *              changed or not:
403     *              if $code_dif == "no-dif" then the code is not changed
404     *              if $code_dif == "dif" then the code is changed
405     */
406    public function _get_code_dif ($code_new, $code_old) {
407        $code_dif = "no-dif";
408
409        $dif = array_intersect_assoc($code_new, $code_old);
410
411        if (count($dif) < max(count($code_new), count($code_old))) {
412            $code_dif = 'dif';
413        }
414
415        return $code_dif;
416    }
417
418    /**
419     * It compares the tree structure of a given class, defined in the
420     * local copy of <file> (which might not up-to-date), against the tree
421     * structure of the same class defined in <file>, hosted in the
422     * repository (which is up-to-date).
423     *
424     * It checks for:
425     *      class's docstring;
426     *      class's parameters changed;
427     *      methods added;
428     *      methods deleted;
429     *      methods with parameters changed;
430     *      methods with code changed.
431     *
432     * @param (str) $raw_url the url to the raw code of <file> in the repo
433     *              Bitbucket <raw-url> =
434     *              "https://bitbucket.org/<user>/<repo>/raw/<branch>/<file>"
435     *              GitHub <raw-url> =
436     *              "https://raw.githubusercontent.com/<user>/<repo>/<branch>/<file>"
437     * @param (str) $loc_url the url to the raw code of the copy of <file> in
438     *              <srv-path>/lib/plugins/pycode/tmp/<host>/<repo>/<branch>/<file>
439     * @param (arr) $tree_new the name of a class and its methods, defined in
440     *              <file>, hosted in the repository, and is like:
441     *              array {
442     *              [0] => array {
443     *                  [0] => (str) "class <name-class>(<parameters>)"
444     *                  [1] => (str) <name-class>
445     *                  [2] => (int) 0
446     *                  }
447     *              [i] => array {
448     *                  [0] => (str) "class <name-function>(<parameters>)"
449     *                  [1] => (str) <name-function>
450     *                  [2] => (int) 0
451     *                  }
452     *              }
453     *              where 0 means "unchanged"
454     * @param (arr) $tree_old the name of a class and its methods, defined in
455     *              the local copy of <file>, and is like:
456     *              array {
457     *              [0] => array {
458     *                  [0] => (str) "class <name-class>(<parameters>)"
459     *                  [1] => (str) <name-class>
460     *                  [2] => (int) 0
461     *                  }
462     *              [i] => array {
463     *                  [0] => (str) "class <name-function>(<parameters>)"
464     *                  [1] => (str) <name-function>
465     *                  [2] => (int) 0
466     *                  }
467     *              }
468     *              where 0 means "unchanged"
469     * @return (arr) $tree_dif the name of a class and its methods and,
470     *              for each of them, are entered the differencies, that is:
471     *              array {
472     *              [0] => array {
473     *                  [0] => (str) "class <name-class>(<parameters>)"
474     *                  [1] => (str) <name-class>
475     *                  [2] => (int) <#>
476     *                  }
477     *              [i] => array {
478     *                  [0] => (str) "class <name-function>(<parameters>)"
479     *                  [1] => (str) <name-function>
480     *                  [2] => (int) <#>
481     *                  }
482     *              }
483     *              where <#> is a flag used by the print function of the
484     *              tree structure of the given class and which tells "how"
485     *              to render each name; it can takes the following values:
486     *              <#> = 0 means "unchanged"
487     *              <#> = 1 means "changed"
488     *              <#> = 2 means "deleted"
489     *              <#> = 3 means "added"
490     */
491    public function _get_tree_dif ($raw_url, $loc_url, $tree_new, $tree_old) {
492        $name = $tree_old[0][1];  // name class
493        $tree = new ArrayObject($tree_old);
494        $tree_dif = $tree->getArrayCopy();
495
496        // prepare arrays for comparison
497        foreach ($tree_old as $key) {
498            $parms_old[] = $key[0];
499            $names_old[] = $key[1];
500        }
501        foreach ($tree_new as $key) {
502            $parms_new[] = $key[0];
503            $names_new[] = $key[1];
504        }
505
506        // check if the docstring of the class is changed
507        list($code_cls_old, $sl, $el) = $this->_get_code($loc_url, "c", $name);
508        list($code_cls_new, $sl, $el) = $this->_get_code($raw_url, "c", $name);
509        $docstr_cls_old = $this->_get_docstr($code_cls_old, $name, "c");
510        $docstr_cls_new = $this->_get_docstr($code_cls_new, $name, "c");
511        $docstr_cls_dif = $this->_get_code_dif($docstr_cls_new, $docstr_cls_old);
512        if ($docstr_cls_dif == "dif") {
513            $tree_dif[0][2] = 1;
514        }
515
516        // check for new entry methods
517        $news = array_diff($names_new, $names_old);
518
519        // check for methods that have been deleted
520        $names = array_diff($names_old, $names_new);
521        foreach ($names as $key => $val) {
522            $tree_dif[$key][2] = 2;
523        }
524
525        // rewrite old tree without methods deleted
526        foreach ($names as $key => $val) {
527            unset($tree_old[$key]);
528            unset($parms_old[$key]);
529        }
530
531        // check for methods with parameters changed
532        $parms = array_diff($parms_old, $parms_new);
533        foreach ($parms as $key => $val) {
534            $tree_dif[$key][2] = 1;
535        }
536
537        // rewrite old tree without methods whith parameters changed
538        foreach ($parms as $key => $val) {
539            unset($tree_old[$key]);
540        }
541
542        // check for methods with code changed
543        foreach ($tree_old as $key => $val) {
544            if ($val[1] != $name) {
545                $codes[$key] = $val[1];
546            }
547        }
548        foreach ($codes as $key => $val) {
549            list($code_def_old, $sl, $el) = $this->_get_code($loc_url, "f", $val, $name);
550            list($code_def_new, $sl, $el) = $this->_get_code($raw_url, "f", $val, $name);
551            $code_def_dif = $this->_get_code_dif($code_def_new, $code_def_old);
552            if ($code_def_dif == "dif") {
553                $tree_dif[$key][2] = 1;
554            }
555        }
556
557        // append new entry methods
558        foreach ($news as $key => $val) {
559            $new = array ($tree_new[$key][0], $tree_new[$key][1], 3);
560            $tree_dif[] = $new;
561        }
562
563        return $tree_dif;
564    }
565
566    /**
567     * It rewrites the <src_url> into the <raw_url>.
568     *
569     * @param (str) $src_url the url to the source code of <file> in the repo
570     *              Bitbucket <src-url> =
571     *              "https://bitbucket.org/<user>/<repo>/src/<branch>/<file>"
572     *              GitHub <src-url> =
573     *              "https://github.com/<user>/<repo>/blob/<branch>/<file>"
574     * @return (str) $raw_url the url to the raw code of <file> in the repo
575     *              Bitbucket <raw-url> =
576     *              "https://bitbucket.org/<user>/<repo>/raw/<branch>/<file>"
577     *              GitHub <raw-url> =
578     *              "https://raw.githubusercontent.com/<user>/<repo>/<branch>/<file>"
579     */
580    public function _get_raw_url($src_url) {
581        $raw_url = "";
582
583        $url = substr($src_url, 8);  // strip "https://"
584        $url = explode("/", $url);  // returns an array of strings
585
586        // depending on which kind of repo, the array returned is:
587        //
588        // array {
589        // [0] => (str) "bitbucket.org"
590        // [1] => (str) <user>
591        // [2] => (str) <repo>
592        // [3] => (str) "src"
593        // [4] => (str) <branch>
594        // [5] => (str) <file>
595        // }
596        //
597        // array {
598        // [0] => (str) "github.com"
599        // [1] => (str) <user>
600        // [2] => (str) <repo>
601        // [3] => (str) "blob"
602        // [4] => (str) <branch>
603        // [5] => (str) <file>
604        // }
605
606        if (strstr($url[0], "bitbucket") == true) {
607            $url[3] = "raw";
608            $raw_url = "https://" . implode("/", $url);
609        }
610        elseif (strstr($url[0], "github") == true) {
611            $url[0] = "raw.githubusercontent.com";
612            unset($url[3]);
613            $raw_url = "https://" . implode("/", $url);
614        }
615
616        return $raw_url;
617    }
618
619    /**
620     * From <src-url> it pulls out the names of <repo>, <branch>, <file>.
621     *
622     * @param (str) $src_url the url to the source code of <file> in the repo
623     *              Bitbucket <src-url> =
624     *              "https://bitbucket.org/<user>/<repo>/src/<branch>/<file>"
625     *              GitHub <src-url> =
626     *              "https://github.com/<user>/<repo>/blob/<branch>/<file>"
627     * @return (str) $name_host the name of the service hosting
628     * @return (str) $name_repo the name of the repository
629     * @return (str) $name_brch the name of the branch
630     * @return (str) $name_file the name of <file>
631     */
632    public function _get_names($src_url) {
633        $name_host = "";
634        $name_repo = "";
635        $name_brch = "";
636        $name_file = "";
637
638        $url = substr($src_url, 8);  // strip "https://"
639        $url = explode("/", $url);  // returns an array of strings
640
641        // depending on which kind of repo, the array returned is:
642        //
643        // array {
644        // [0] => (str) "bitbucket.org"
645        // [1] => (str) <user>
646        // [2] => (str) <repo>
647        // [3] => (str) "src"
648        // [4] => (str) <branch>
649        // [5] => (str) <file>
650        // }
651        //
652        // array {
653        // [0] => (str) "github.com"
654        // [1] => (str) <user>
655        // [2] => (str) <repo>
656        // [3] => (str) "blob"
657        // [4] => (str) <branch>
658        // [5] => (str) <file>
659        // }
660
661        $name_host = substr($url[0], 0, -4);  // strip ".org|.com"
662        $name_repo = $url[2];
663        $name_brch = $url[4];
664        $name_file = $url[5];
665
666        return array($name_host, $name_repo, $name_brch, $name_file);
667    }
668
669    /*
670     *It returns the path to the icon for the corrisponding language.
671     *
672     * @param (str) $name_file the name of path/to/<file>
673     * @return (str) $path_icon the name of path/to/<icon>
674     */
675    public function _get_icon($name_file) {
676        $name_icon = "";
677        $ext_file = pathinfo($name_file, PATHINFO_EXTENSION);
678
679        $dir_icons = "lib/images/fileicons/";
680        $files = scandir($dir_icons);
681
682        foreach($files as $icon) {
683            $parts_icon = pathinfo($icon);
684            if($parts_icon["extension"] == "png") {
685                if($parts_icon["filename"] == $ext_file) {
686                    $name_icon = DOKU_BASE . $dir_icons . $icon;
687                    break;
688                }
689                else {
690                    $name_icon = DOKU_BASE . $dir_icons . "file.png";
691                }
692            }
693        }
694
695        return $name_icon;
696    }
697
698    /**
699     * It returns the language name associated with the extension of <file>
700     *
701     * The language names recognized are those supported by GeSHi and defined
702     * in vendor/easybook/geshi/geshi.php.
703     *
704     * @param (str) $name_file the name of path/to/<file>
705     * @return (str) $lang the language name used in <file>
706     */
707    public function _get_lang($name_file) {
708        $lang = "";
709        $ext = pathinfo($name_file, PATHINFO_EXTENSION);
710
711        $lookup = array (
712            "6502acme" => array ("a", "s", "asm", "inc"),
713            "6502tasm" => array ("a", "s", "asm", "inc"),
714            "6502kickass" => array ("a", "s", "asm", "inc"),
715            "68000devpac" => array ("a", "s", "asm", "inc"),
716            "abap" => array ("abap"),
717            "actionscript" => array ("as"),
718            "ada" => array ("a", "ada", "adb", "ads"),
719            "apache" => array ("conf"),
720            "asm" => array ("ash", "asm", "inc"),
721            "asp" => array ("asp"),
722            "bash" => array ("sh"),
723            "bf" => array ("bf"),
724            "c" => array ("c", "h"),
725            "c_mac" => array ("c", "h"),
726            "caddcl" => array (),
727            "cadlisp" => array (),
728            "cdfg" => array ("cdfg"),
729            "cobol" => array ("cbl"),
730            "cpp" => array ("cpp", "hpp", "C", "H", "CPP", "HPP"),
731            "csharp" => array ("cs"),
732            "css" => array ("css"),
733            "d" => array ("d"),
734            "delphi" => array ("dpk", "dpr", "pp", "pas"),
735            "diff" => array ("diff", "patch"),
736            "dos" => array ("bat", "cmd"),
737            "gdb" => array ("kcrash", "crash", "bt"),
738            "gettext" => array ("po", "pot"),
739            "gml" => array ("gml"),
740            "gnuplot" => array ("plt"),
741            "groovy" => array ("groovy"),
742            "haskell" => array ("hs"),
743            "html4strict" => array ("html", "htm"),
744            "ini" => array ("ini", "desktop"),
745            "java" => array ("java"),
746            "javascript" => array ("js"),
747            "klonec" => array ("kl1"),
748            "klonecpp" => array ("klx"),
749            "latex" => array ("tex"),
750            "lisp" => array ("lisp"),
751            "lua" => array ("lua"),
752            "matlab" => array ("m"),
753            "mpasm" => array (),
754            "mysql" => array ("sql"),
755            "nsis" => array (),
756            "objc" => array (),
757            "oobas" => array (),
758            "oracle8" => array (),
759            "oracle10" => array (),
760            "pascal" => array ("pas"),
761            "perl" => array ("pl", "pm"),
762            "php" => array ("php", "php5", "phtml", "phps"),
763            "povray" => array ("pov"),
764            "providex" => array ("pvc", "pvx"),
765            "prolog" => array ("pl"),
766            "python" => array ("py"),
767            "qbasic" => array ("bi"),
768            "reg" => array ("reg"),
769            "ruby" => array ("rb"),
770            "sas" => array ("sas"),
771            "scala" => array ("scala"),
772            "scheme" => array ("scm"),
773            "scilab" => array ("sci"),
774            "smalltalk" => array ("st"),
775            "smarty" => array (),
776            "tcl" => array ("tcl"),
777            "vb" => array ("bas"),
778            "vbnet" => array (),
779            "visualfoxpro" => array (),
780            "whitespace" => array ("ws"),
781            "xml" => array ("xml", "svg", "xrc"),
782            "z80" => array ("z80", "asm", "inc")
783            );
784
785        foreach ($lookup as $language => $extensions) {
786            if (in_array($ext, $extensions)) {
787                $lang = $language;
788            }
789        }
790
791        return $lang;
792    }
793
794    /**
795     * It determines the path to the local copy of <file>.
796     *
797     * @param (str) $name_host the name of the service hosting
798     * @param (str) $name_repo the name of the repository
799     * @param (str) $name_brch the name of the branch
800     * @param (str) $name_file the name of <file>
801     * @return (str) $loc_url the url to the raw code of the copy of <file> in
802     *      <srv-path>/lib/plugins/pycode/tmp/<host>/<repo>/<branch>/<file>
803     */
804    public function _get_loc_url($name_host, $name_repo, $name_brch, $name_file) {
805        $dir = dirname(__FILE__) . "/tmp";
806        $var = $name_host. "/" . $name_repo . "/" . $name_brch;
807        $loc_url = $dir . "/" . $var. "/" . $name_file;
808
809        return $loc_url;
810    }
811
812    /**
813     * It gets the tree directories, starting from the given root.
814     *
815     * @param (str) $dir the root from which start to scan recursively,
816     *      like as: <root>/
817     * @return (arr) $tree_dir the tree directories, that is:
818     *      array {
819     *      [0] => (str) "<root>/<dir-1>"
820     *      [1] => (str) "<root>/<dir-1>/<subdir-1>"
821     *      [2] => (str) "<root>/<dir-2>"
822     *      [3] => (str) "<root>/<dir-2>/<subdir-2>"
823     *      [i] => (str) "<root>/<dir-i>"
824     *      }
825     */
826    public function _get_tree_dir($dir) {
827        $d = new RecursiveDirectoryIterator($dir, RecursiveDirectoryIterator::SKIP_DOTS);
828        $i = new RecursiveIteratorIterator($d, RecursiveIteratorIterator::SELF_FIRST);
829
830        $tree_dir = array();
831
832        foreach ($i as $path => $item) {
833            if ($item->isDir()) {
834                $tree_dir[] = $path;
835            }
836        }
837
838        return $tree_dir;
839    }
840
841    /**
842     * It removes the indentation outside the code of a given function.
843     *
844     * If there is a # in the indentation it is moved inside the function.
845     *
846     * @param (arr) $code the code to indent
847     * @return (arr) $code_ind the code indented
848     */
849    public function _remove_indent($code) {
850        $code_ind = array();
851        $ind = null;
852
853        foreach ($code as $line) {
854            $length = strlen($line);
855            if ($ind === null) {
856                $ind = $length - strlen(ltrim($line));
857            }
858            $str = trim(substr($line, 0, $ind));
859            if (strlen($str) != 0 and $str[0] == "#") {
860                $line = "#" . substr($line, $ind);
861            }
862            else {
863                $line = substr($line, $ind);
864            }
865            array_push($code_ind, $line);
866        }
867
868       return $code_ind;
869    }
870
871    /**
872     * It removes multi whitespace and replace them whith only one.
873     *
874     * To find all whitespace (one or more) is used the following regex:
875     *      \s+     match any white space character, between one and infinite
876     *              times, as many times as possible
877     * Than replace all the capturing groups with only one white space.
878     *
879     * @param (str) $match the text matched
880     * @return (str) $rest the text with multi withespace removed
881     */
882    public function _remove_multi_space($match) {
883        $re = "/\s+/";
884        $subst = " ";
885        $rest = preg_replace($re, $subst, $match);
886
887        return $rest;
888    }
889
890    /**
891     * It removes empty directories, from a given tree directory.
892     *
893     * It starts to remove from the bottom of the tree.
894     *
895     * @param (arr) $tree_dir the tree directories, that is:
896     *      array {
897     *      [0] => (str) "<root>/<dir-1>"
898     *      [1] => (str) "<root>/<dir-1>/<subdir-1>"
899     *      [2] => (str) "<root>/<dir-2>"
900     *      [3] => (str) "<root>/<dir-1>/<subdir-2>"
901     *      [i] => (str) "<root>/<dir-i>"
902     *      }
903     */
904    public function _remove_empty_dir($tree_dir) {
905        $tree_dir = array_reverse($tree_dir);
906
907        foreach ($tree_dir as $path) {
908            $p = array_diff(scandir($path), array("..", "."));
909            if (count($p) == 0) {
910                rmdir($path);
911            }
912        }
913    }
914
915    /**
916     * It saves data in a specified file.
917     *
918     * @param (mix) $data the data to write; can be either a string or an array.
919     * @param (str) $filename path to the file where to write the data.
920     */
921    public function _save_code($filename, $data) {
922        global $INFO;
923        $dir = dirname($filename);
924
925        if (file_exists($dir) == false) {
926            mkdir($dir, 0777, true);
927        }
928
929        // this for save code
930        if (is_array($data) == true) {
931            $data = implode(PHP_EOL, $data) . PHP_EOL;
932            file_put_contents($filename, $data);
933        }
934        // this for save a list of <file>(s) embedded
935        elseif (is_string($data) == true and $INFO["filepath"] != "") {
936            // <srv-path>/data/pages/<path>/file.txt
937            // we want only: <path>/file.txt
938            $wiki = str_replace(DOKU_INC . "data/pages/", "", $INFO["filepath"]);
939
940            // <srv-path>/lib/plugins/pycode/tmp/<host>/<repo>/<branch>/<file>
941            // we want only: <host>/<repo>/<branch>/<file>
942            $file = str_replace(DOKU_PLUGIN . "pycode/tmp/", "", $data);
943
944            if (file_exists($filename) == true) {
945                $arr = json_decode(file_get_contents($filename), true);
946            }
947            else {
948                $arr[$wiki] = array();
949            }
950
951            // for each <file> we append in which wiki page it's embedded:
952            // array {
953            // ["<wiki-pg>1"] => array {
954            //                   [0] => "<file>1",
955            //                   [1] => "<file>2"
956            //                   }
957            // }
958            if (array_search($file, $arr[$wiki]) === false) {
959                $arr[$wiki][] = $file;
960            }
961
962            // for save the array in a file, we convert it into a string:
963            // {"<wiki-pg>1":["<file>1","<file>2"]}
964            $data = json_encode($arr);
965            file_put_contents($filename, $data);
966        }
967    }
968
969    /**
970     * It checks if the <src_url> corresponds to one of the following.
971     *
972     * @param (str) $src_url the url to the source code of <file> in the repo
973     *              Bitbucket <src-url> =
974     *              "https://bitbucket.org/<user>/<repo>/src/<branch>/<file>"
975     *              GitHub <src-url> =
976     *              "https://github.com/<user>/<repo>/blob/<branch>/<file>"
977     * @return (str) $src_url if the url is wrong it returns the string "error"
978     *              otherwise the given url.
979     */
980    public function _check_src_url($src_url) {
981        try {
982            if ((strpos($src_url, "https://bitbucket.org/") === false and
983                strpos($src_url, "https://github.com/") === false)
984                or
985                (strpos($src_url, "/src/") === false and
986                 strpos($src_url, "/blob/") === false)) {
987                throw new Exception("error");
988            }
989        }
990
991        catch (Exception $error) {
992            $src_url = $error->getMessage();  // returns the error message
993        }
994
995        return $src_url;
996    }
997}
998