<?php
/**
 * PyCode plugin: it embeds a Python script hosted in a remote repository.
 *
 * method.php: it defines all the common methods
 *      used by the PyCode plugin.
 *
 * @author Torpedo <dgtorpedo@gmail.com>
 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
 * @package method
 */

 /**
 * This class defines all the methods used by the PyCode plugin to handle
 * the code to embed.
 *
 * @package method_pycode
 */
class method_pycode_plugin {

    /**
     * It gets the code to embed.
     *
     * @param (str) $url the url to <file>
     * @param (str) $name if specified, it can be:
     *              name-lines|function|class
     *              where name-lines is of the form from:to, that is:
     *              :<#>     from the beginning to line <#>
     *              <#>:<#>  from line <#> to line <#>
     *              :        all
     * @param (str) $subname if specified, it can be only:
     *              name-class
     *              at which the function belongs to
     * @param (str) $flag flag to indicate what kind of code we want to embed:
     *              "" = all | "c" = class | "f" = function | "l" = lines
     * @return (arr) $code the code of <file> to embed
     * @return (int) $sl the number of the starting line of the code
     * @return (int) $el the number of the ending line of the code
     * @return (int) $pt the position of the first type after indentation
     */
    public function _get_code($url, $flag = null, $name = null, $subname = null) {
        $code_all = array();
        $code = array();
        $str_cls = "class ";
        $str_def = "def ";
        $ind_def = null;
        $ind_cls = null;
        $sl = null;
        $el = null;

        try {
            // get the whole code of <file>
            $opts = array(
                "http" => array(
                    "header" => "Cache-Control: no-cache, no-store, must-revalidate"
                    )
                );
            $context = stream_context_create($opts);
            $code_all = @file($url, FILE_IGNORE_NEW_LINES, $context);
            if ($code_all == false) {
                throw new Exception("error");
            }

            // take only the code of interest
            if ($flag == "") {
                $code = $code_all;
                $sl = 1;
                $el = count($code);
            }
            elseif ($flag == "l") {
                $name = explode(":", $name);  // array {[0] => from, [1] => to}
                if ($name[0] == "" and $name[1] == "") {
                    $sl = 1;
                    $el = $name[1];
                }
                else {
                    $sl = $name[0];
                    $el = $name[1];
                }
                if ($sl !== null and $el !== null) {
                    $code = array_slice($code_all, $sl - 1, $el - $sl + 1);
                }
                else {
                    throw new Exception("notfound-lns");
                }
            }
            elseif ($flag == "f") {
                foreach ($code_all as $num => $line) {
                    $length = strlen($line);
                    $str = ltrim($line);
                    if ($subname !== null and $ind_cls === null and
                        strpos($str, $str_cls . $subname . "(") !== 0 and
                        strpos($str, $str_cls . $subname. ":") !== 0) {
                        continue;  // skip this line
                    }
                    elseif ($subname !== null and $ind_cls === null and
                        (strpos($str, $str_cls . $subname . "(") === 0 or
                        strpos($str, $str_cls . $subname. ":") === 0)) {
                        $ind_cls = $length - strlen($str);
                    }
                    elseif (strpos($str, $str_def . $name . "(") === 0) {
                        $ind_def = $length - $ind_cls - strlen($str);
                        if ($subname !== null and $ind_cls + $ind_def <= $ind_cls) {
                            break;
                        }
                        else {
                            $sl = $num + 1;
                            array_push($code, $line);
                        }
                    }
                    elseif ($ind_def !== null) {
                        // collapse indentation outside the function till 1st
                        // character next and test if empty or #:
                        //
                        //     class C(object):
                        //         def f(x):
                        // ^^^^^^^^^
                        // $str = "d"
                        $str = trim(substr($line, 0, $ind_cls + $ind_def + 1));
                        if ((strlen($str) != 0 and $str[0] != "#")) {
                            $el = $num;  // the previous line is the end line
                            break;
                        }
                        elseif (strlen($str) == 0 and count($code_all) != $num + 1) {
                            array_push($code, $line);
                        }
                        elseif (strlen($str) == 0 and count($code_all) == $num + 1) {
                            $el = $num + 1;  // the current line is the end line
                            array_push($code, $line);
                            break;
                        }
                    }
                }
                if ($subname !== null and $ind_cls === null) {
                    throw new Exception("notfound-cls");
                }
                elseif ($ind_def === null) {
                    throw new Exception("notfound-def");
                }
            }
            elseif ($flag == "c") {
                foreach ($code_all as $num => $line) {
                    $length = strlen($line);
                    $str = ltrim($line);
                    if (strpos($str, $str_cls . $name . "(") === 0 or
                        strpos($str, $str_cls . $name. ":") === 0) {
                        $sl = $num + 1;
                        $ind_cls = $length - strlen(ltrim($line));
                        array_push($code, $line);
                    }
                    elseif ($ind_cls !== null) {
                        // collapse indentation outside the class till 1st
                        // character next and test if empty or #:
                        //
                        //     class C(object):
                        // ^^^^^
                        // $str = "c"
                        $str = trim(substr($line, 0, $ind_cls + 1));
                        if ((strlen($str) != 0 and $str[0] != "#")) {
                            $el = $num;  // the previous line is the end line
                            break;
                        }
                        elseif (strlen($str) == 0 and count($code_all) != $num + 1) {
                            array_push($code, $line);
                        }
                        elseif (strlen($str) == 0 and count($code_all) == $num + 1) {
                            $el = $num + 1;  // the current line is the end line
                            array_push($code, $line);
                            break;
                        }
                    }
                }
                if ($ind_cls === null) {
                    throw new Exception("notfound-cls");
                }
            }
        }

        catch (Exception $error) {
            $code = $error->getMessage();  // returns the error message
        }

        return array($code, $sl, $el);
    }

    /**
     * It gets the docstring.
     *
     * @param (arr) $code the code of <file> to embed
     * @param (str) $name it can be:
     *              name-class|function
     * @flag (str) $flag it indicates if we are dealing with a function or class
     *             and can be only:
     *             "c" = class | "f" = function
     * @return (arr) $brief only the descriptive part of the docstring
     */
    public function _get_docstr($code, $name = null, $flag = null) {
        $brief = array();
        $docstr = array();
        $str_cls = "class ";
        $str_def = "def ";
        $triple = '"""';

        if ($flag == "c") {
            $str = $str_cls;
        }
        elseif ($flag == "f") {
            $str = $str_def;
        }

        // get only the docstring
        $i = 0;  // it becomes 1 when the pointer is inside the code of name
        $j = 0;  // it becomes 1 when the pointer is inside multiline docstring
        foreach ($code as $line) {
            $trimmed = trim($line);
            if (strpos($trimmed, $str . $name . "(") === 0 or
                strpos($trimmed, $str . $name. ":") === 0) {
                $i = 1;  // catch only docstring associated with name
            }
            elseif ($i == 1) {
                if (strpos($trimmed, "#") === false) {
                    $re = "/(class|def)\s+(\S+?)(?=\(|\:)(.*)(?=\:)/";
                    if (preg_match($re, $trimmed, $matches) === 1) {
                        $i = 0;  // from here forward starts another code
                        break;
                    }
                    // check for inline docstring
                    elseif (substr_count($trimmed, $triple) == 2 and $j == 0) {
                        array_push($docstr, $line);
                        break;
                    }
                    // check for the begenning of multiline docstring
                    elseif (substr_count($trimmed, $triple) == 1 and $j == 0) {
                        array_push($docstr, $line);
                        $j = 1;
                    }
                    // check for a string inside a multiline docstring
                    elseif (substr_count($trimmed, $triple) == 0 and $j == 1) {
                        array_push($docstr, $line);
                    }
                    // check for the end of multilene docstring
                    elseif (substr_count($trimmed, $triple) == 1 and $j == 1) {
                        array_push($docstr, $line);
                        break;
                    }
                }
            }
        }

        if (empty($docstr) == false) {
            // remove indentation around the docstring
            // but preserve internal indentation
            $ind = strpos($docstr[0], $triple);
            foreach ($docstr as $key => $val) {
                $docstr[$key] = substr_replace($val, "", 0, $ind);
            }
            // remove triple from first line
            $docstr[0] = trim($docstr[0], '"');
            if (strlen($docstr[0]) != 0) {
                array_push($brief, $docstr[0]);
            }
            // strip the brief from the docstring
            foreach ($docstr as $key => $line) {
                if ($key == 0) {
                    continue;
                }
                $length = strlen($line);
                $indent = $length - strlen(ltrim($line));
                $dashed = preg_match("/[-]+/", $line, $matches);
                // reST style
                if (substr($line, 0, 1) == ":") {
                    array_pop($brief);  // last one doesn't belong to brief
                    break;
                }
                // javadoc style
                elseif (substr($line, 0, 1) == "@") {
                    array_pop($brief);  // last one doesn't belong to brief
                    break;
                }
                // google style
                elseif ($indent != $length and ctype_space(substr($line, 0, $indent)) == true) {
                    $brief = array_slice($brief, 0, -2);  // last two don't belong to brief
                    break;
                }
                // numpy style
                elseif ($dashed === 1 and strlen($matches[0]) == $length) {
                    $brief = array_slice($brief, 0, -2);  // last two don't belong to brief
                    break;
                }
                elseif ($line == $triple) {
                    break;
                }
                array_push($brief, $line);
            }
            // in case, remove triple from last line
            $brief[count($brief) - 1] = rtrim($brief[count($brief) - 1], '"');
            // rewrite the array containing the docstrings
            $docstr = $brief;
        }

        return $docstr;
    }

    /**
     * It removes the GeSHi's wrapper element because it will replaced with
     * the DokuWiki's one.
     *
     * This function is based on p_xhtml_cached_geshi() defined in
     * inc/parserutils.php but which has the problem that removes from the
     * code any leading or trailing blank lines and dosn't remove space
     * character (&nbsp;) when there is an empty line.
     *
     * Note that before DokuWiki version "Detritus" the GeSHi library was in
     * inc/geshi
     *
     * @param (str) $code the code of <file> to embed
     * @param (str) $lang the language name used in <file>
     * @return (str) $geshi_code the code cleaned
     */
    public function _get_geshi_code($code, $lang) {
        if (file_exists(DOKU_INC . "inc/geshi.php") == true) {
            $lib_geshi = DOKU_INC . "inc/geshi";
        }
        else {
            $lib_geshi = DOKU_INC . "vendor/easybook/geshi/geshi";
        }
        $geshi = new GeSHi($code, $lang, $lib_geshi);
        $geshi->set_encoding("utf-8");
        $geshi->enable_classes();
        $geshi->set_header_type(GESHI_HEADER_PRE);

        $geshi_code = trim(preg_replace('!^<pre[^>]*>|</pre>$!','',$geshi->parse_code()),"\n\r");
        $geshi_code = str_replace(array("&nbsp;\n", "&nbsp;"), "\n", $geshi_code);
        return $geshi_code;
    }

    /**
     * It gets the tree structure of a given class defined in <file>.
     *
     * @param (arr) $code_cls the code of a given class defined in <file>
     *              To get the name of the class or the methods is used the
     *              following regex:
     *              (.*)        match any character, from zero to
     *                          infinite times, as many times as poosible
     *              (class|def) match the string "class" or "def"
     *              \s+         match any white space character, from one to
     *                          infinite times, as many times as poosible
     *              (\S+?)      match any non-white space character, from one
     *                          to infinite times, as few times as poosible
     *              (?=\(|\:)   stop previous matching until it finds ( or :
     *              (.*)        match any character, from zero to
     *                          infinite times, as many times as poosible
     *              (?=\:)      stop previous matching until it finds :
     *              The result is the following array:
     *              array {
     *              [0] => (str) "class|def <name-class|function>(<parameters>):"
     *              [1] => (str) ""
     *              [2] => (str) "class|def"
     *              [3] => (str) <name-class|function>
     *              [4] => (str) "(<parameters>)"
     *              }
     * @return (arr) $tree the name of a class and its methods, that is:
     *              array {
     *              [0] => array {
     *                  [0] => (str) "class <name-class>(<parameters>)"
     *                  [1] => (str) <name-class>
     *                  [2] => (int) 0
     *                  }
     *              [i] => array {
     *                  [0] => (str) "def <name-function>(<parameters>)"
     *                  [1] => (str) <name-function>
     *                  [2] => (int) 0
     *                  }
     *              }
     *              where 0 means "unchanged"
     */
    public function _get_tree($code_cls) {
        $tree = array();

        foreach ($code_cls as $line) {
            $re = "/(.*)(class|def)\s+(\S+?)(?=\(|\:)(.*)(?=\:)/";
            if (preg_match($re, $line, $matches) === 1) {
                if (strpos(trim($matches[1]), "#") === false) {
                    array_push($tree, array(trim($matches[0],": \t\n\r"), $matches[3], 0));
                }
            }
        }

        return $tree;
    }

    /**
     * It checks if the code got from the local copy of <file>
     * (which might not up-to-date) is different from the code of <file>,
     * hosted in the repository (which is up-to-date).
     * The code can be:
     *       all | between two given lines | of a given function
     *
     * @param (arr) $code_new the code got from the <file> hosted in the repo
     * @param (arr) $code_old the code got from the local copy of <file>
     * @return (str) $code_dif a value which indicates if the code is
     *              changed or not:
     *              if $code_dif == "no-dif" then the code is not changed
     *              if $code_dif == "dif" then the code is changed
     */
    public function _get_code_dif ($code_new, $code_old) {
        $code_dif = "no-dif";

        $dif = array_intersect_assoc($code_new, $code_old);

        if (count($dif) < max(count($code_new), count($code_old))) {
            $code_dif = 'dif';
        }

        return $code_dif;
    }

    /**
     * It compares the tree structure of a given class, defined in the
     * local copy of <file> (which might not up-to-date), against the tree
     * structure of the same class defined in <file>, hosted in the
     * repository (which is up-to-date).
     *
     * It checks for:
     *      class's docstring;
     *      class's parameters changed;
     *      methods added;
     *      methods deleted;
     *      methods with parameters changed;
     *      methods with code changed.
     *
     * @param (str) $raw_url the url to the raw code of <file> in the repo
     *              Bitbucket <raw-url> =
     *              "https://bitbucket.org/<user>/<repo>/raw/<branch>/<file>"
     *              GitHub <raw-url> =
     *              "https://raw.githubusercontent.com/<user>/<repo>/<branch>/<file>"
     * @param (str) $loc_url the url to the raw code of the copy of <file> in
     *              <srv-path>/lib/plugins/pycode/tmp/<host>/<repo>/<branch>/<file>
     * @param (arr) $tree_new the name of a class and its methods, defined in
     *              <file>, hosted in the repository, and is like:
     *              array {
     *              [0] => array {
     *                  [0] => (str) "class <name-class>(<parameters>)"
     *                  [1] => (str) <name-class>
     *                  [2] => (int) 0
     *                  }
     *              [i] => array {
     *                  [0] => (str) "class <name-function>(<parameters>)"
     *                  [1] => (str) <name-function>
     *                  [2] => (int) 0
     *                  }
     *              }
     *              where 0 means "unchanged"
     * @param (arr) $tree_old the name of a class and its methods, defined in
     *              the local copy of <file>, and is like:
     *              array {
     *              [0] => array {
     *                  [0] => (str) "class <name-class>(<parameters>)"
     *                  [1] => (str) <name-class>
     *                  [2] => (int) 0
     *                  }
     *              [i] => array {
     *                  [0] => (str) "class <name-function>(<parameters>)"
     *                  [1] => (str) <name-function>
     *                  [2] => (int) 0
     *                  }
     *              }
     *              where 0 means "unchanged"
     * @return (arr) $tree_dif the name of a class and its methods and,
     *              for each of them, are entered the differencies, that is:
     *              array {
     *              [0] => array {
     *                  [0] => (str) "class <name-class>(<parameters>)"
     *                  [1] => (str) <name-class>
     *                  [2] => (int) <#>
     *                  }
     *              [i] => array {
     *                  [0] => (str) "class <name-function>(<parameters>)"
     *                  [1] => (str) <name-function>
     *                  [2] => (int) <#>
     *                  }
     *              }
     *              where <#> is a flag used by the print function of the
     *              tree structure of the given class and which tells "how"
     *              to render each name; it can takes the following values:
     *              <#> = 0 means "unchanged"
     *              <#> = 1 means "changed"
     *              <#> = 2 means "deleted"
     *              <#> = 3 means "added"
     */
    public function _get_tree_dif ($raw_url, $loc_url, $tree_new, $tree_old) {
        $name = $tree_old[0][1];  // name class
        $tree = new ArrayObject($tree_old);
        $tree_dif = $tree->getArrayCopy();

        // prepare arrays for comparison
        foreach ($tree_old as $key) {
            $parms_old[] = $key[0];
            $names_old[] = $key[1];
        }
        foreach ($tree_new as $key) {
            $parms_new[] = $key[0];
            $names_new[] = $key[1];
        }

        // check if the docstring of the class is changed
        list($code_cls_old, $sl, $el) = $this->_get_code($loc_url, "c", $name);
        list($code_cls_new, $sl, $el) = $this->_get_code($raw_url, "c", $name);
        $docstr_cls_old = $this->_get_docstr($code_cls_old, $name, "c");
        $docstr_cls_new = $this->_get_docstr($code_cls_new, $name, "c");
        $docstr_cls_dif = $this->_get_code_dif($docstr_cls_new, $docstr_cls_old);
        if ($docstr_cls_dif == "dif") {
            $tree_dif[0][2] = 1;
        }

        // check for new entry methods
        $news = array_diff($names_new, $names_old);

        // check for methods that have been deleted
        $names = array_diff($names_old, $names_new);
        foreach ($names as $key => $val) {
            $tree_dif[$key][2] = 2;
        }

        // rewrite old tree without methods deleted
        foreach ($names as $key => $val) {
            unset($tree_old[$key]);
            unset($parms_old[$key]);
        }

        // check for methods with parameters changed
        $parms = array_diff($parms_old, $parms_new);
        foreach ($parms as $key => $val) {
            $tree_dif[$key][2] = 1;
        }

        // rewrite old tree without methods whith parameters changed
        foreach ($parms as $key => $val) {
            unset($tree_old[$key]);
        }

        // check for methods with code changed
        foreach ($tree_old as $key => $val) {
            if ($val[1] != $name) {
                $codes[$key] = $val[1];
            }
        }
        foreach ($codes as $key => $val) {
            list($code_def_old, $sl, $el) = $this->_get_code($loc_url, "f", $val, $name);
            list($code_def_new, $sl, $el) = $this->_get_code($raw_url, "f", $val, $name);
            $code_def_dif = $this->_get_code_dif($code_def_new, $code_def_old);
            if ($code_def_dif == "dif") {
                $tree_dif[$key][2] = 1;
            }
        }

        // append new entry methods
        foreach ($news as $key => $val) {
            $new = array ($tree_new[$key][0], $tree_new[$key][1], 3);
            $tree_dif[] = $new;
        }

        return $tree_dif;
    }

    /**
     * It rewrites the <src_url> into the <raw_url>.
     *
     * @param (str) $src_url the url to the source code of <file> in the repo
     *              Bitbucket <src-url> =
     *              "https://bitbucket.org/<user>/<repo>/src/<branch>/<file>"
     *              GitHub <src-url> =
     *              "https://github.com/<user>/<repo>/blob/<branch>/<file>"
     * @return (str) $raw_url the url to the raw code of <file> in the repo
     *              Bitbucket <raw-url> =
     *              "https://bitbucket.org/<user>/<repo>/raw/<branch>/<file>"
     *              GitHub <raw-url> =
     *              "https://raw.githubusercontent.com/<user>/<repo>/<branch>/<file>"
     */
    public function _get_raw_url($src_url) {
        $raw_url = "";

        $url = substr($src_url, 8);  // strip "https://"
        $url = explode("/", $url);  // returns an array of strings

        // depending on which kind of repo, the array returned is:
        //
        // array {
        // [0] => (str) "bitbucket.org"
        // [1] => (str) <user>
        // [2] => (str) <repo>
        // [3] => (str) "src"
        // [4] => (str) <branch>
        // [5] => (str) <file>
        // }
        //
        // array {
        // [0] => (str) "github.com"
        // [1] => (str) <user>
        // [2] => (str) <repo>
        // [3] => (str) "blob"
        // [4] => (str) <branch>
        // [5] => (str) <file>
        // }

        if (strstr($url[0], "bitbucket") == true) {
            $url[3] = "raw";
            $raw_url = "https://" . implode("/", $url);
        }
        elseif (strstr($url[0], "github") == true) {
            $url[0] = "raw.githubusercontent.com";
            unset($url[3]);
            $raw_url = "https://" . implode("/", $url);
        }

        return $raw_url;
    }

    /**
     * From <src-url> it pulls out the names of <repo>, <branch>, <file>.
     *
     * @param (str) $src_url the url to the source code of <file> in the repo
     *              Bitbucket <src-url> =
     *              "https://bitbucket.org/<user>/<repo>/src/<branch>/<file>"
     *              GitHub <src-url> =
     *              "https://github.com/<user>/<repo>/blob/<branch>/<file>"
     * @return (str) $name_host the name of the service hosting
     * @return (str) $name_repo the name of the repository
     * @return (str) $name_brch the name of the branch
     * @return (str) $name_file the name of <file>
     */
    public function _get_names($src_url) {
        $name_host = "";
        $name_repo = "";
        $name_brch = "";
        $name_file = "";

        $url = substr($src_url, 8);  // strip "https://"
        $url = explode("/", $url);  // returns an array of strings

        // depending on which kind of repo, the array returned is:
        //
        // array {
        // [0] => (str) "bitbucket.org"
        // [1] => (str) <user>
        // [2] => (str) <repo>
        // [3] => (str) "src"
        // [4] => (str) <branch>
        // [5] => (str) <file>
        // }
        //
        // array {
        // [0] => (str) "github.com"
        // [1] => (str) <user>
        // [2] => (str) <repo>
        // [3] => (str) "blob"
        // [4] => (str) <branch>
        // [5] => (str) <file>
        // }

        $name_host = substr($url[0], 0, -4);  // strip ".org|.com"
        $name_repo = $url[2];
        $name_brch = $url[4];
        $name_file = $url[5];

        return array($name_host, $name_repo, $name_brch, $name_file);
    }

    /*
     *It returns the path to the icon for the corrisponding language.
     *
     * @param (str) $name_file the name of path/to/<file>
     * @return (str) $path_icon the name of path/to/<icon>
     */
    public function _get_icon($name_file) {
        $name_icon = "";
        $ext_file = pathinfo($name_file, PATHINFO_EXTENSION);

        $dir_icons = "lib/images/fileicons/";
        $files = scandir($dir_icons);

        foreach($files as $icon) {
            $parts_icon = pathinfo($icon);
            if($parts_icon["extension"] == "png") {
                if($parts_icon["filename"] == $ext_file) {
                    $name_icon = DOKU_BASE . $dir_icons . $icon;
                    break;
                }
                else {
                    $name_icon = DOKU_BASE . $dir_icons . "file.png";
                }
            }
        }

        return $name_icon;
    }

    /**
     * It returns the language name associated with the extension of <file>
     *
     * The language names recognized are those supported by GeSHi and defined
     * in vendor/easybook/geshi/geshi.php.
     *
     * @param (str) $name_file the name of path/to/<file>
     * @return (str) $lang the language name used in <file>
     */
    public function _get_lang($name_file) {
        $lang = "";
        $ext = pathinfo($name_file, PATHINFO_EXTENSION);

        $lookup = array (
            "6502acme" => array ("a", "s", "asm", "inc"),
            "6502tasm" => array ("a", "s", "asm", "inc"),
            "6502kickass" => array ("a", "s", "asm", "inc"),
            "68000devpac" => array ("a", "s", "asm", "inc"),
            "abap" => array ("abap"),
            "actionscript" => array ("as"),
            "ada" => array ("a", "ada", "adb", "ads"),
            "apache" => array ("conf"),
            "asm" => array ("ash", "asm", "inc"),
            "asp" => array ("asp"),
            "bash" => array ("sh"),
            "bf" => array ("bf"),
            "c" => array ("c", "h"),
            "c_mac" => array ("c", "h"),
            "caddcl" => array (),
            "cadlisp" => array (),
            "cdfg" => array ("cdfg"),
            "cobol" => array ("cbl"),
            "cpp" => array ("cpp", "hpp", "C", "H", "CPP", "HPP"),
            "csharp" => array ("cs"),
            "css" => array ("css"),
            "d" => array ("d"),
            "delphi" => array ("dpk", "dpr", "pp", "pas"),
            "diff" => array ("diff", "patch"),
            "dos" => array ("bat", "cmd"),
            "gdb" => array ("kcrash", "crash", "bt"),
            "gettext" => array ("po", "pot"),
            "gml" => array ("gml"),
            "gnuplot" => array ("plt"),
            "groovy" => array ("groovy"),
            "haskell" => array ("hs"),
            "html4strict" => array ("html", "htm"),
            "ini" => array ("ini", "desktop"),
            "java" => array ("java"),
            "javascript" => array ("js"),
            "klonec" => array ("kl1"),
            "klonecpp" => array ("klx"),
            "latex" => array ("tex"),
            "lisp" => array ("lisp"),
            "lua" => array ("lua"),
            "matlab" => array ("m"),
            "mpasm" => array (),
            "mysql" => array ("sql"),
            "nsis" => array (),
            "objc" => array (),
            "oobas" => array (),
            "oracle8" => array (),
            "oracle10" => array (),
            "pascal" => array ("pas"),
            "perl" => array ("pl", "pm"),
            "php" => array ("php", "php5", "phtml", "phps"),
            "povray" => array ("pov"),
            "providex" => array ("pvc", "pvx"),
            "prolog" => array ("pl"),
            "python" => array ("py"),
            "qbasic" => array ("bi"),
            "reg" => array ("reg"),
            "ruby" => array ("rb"),
            "sas" => array ("sas"),
            "scala" => array ("scala"),
            "scheme" => array ("scm"),
            "scilab" => array ("sci"),
            "smalltalk" => array ("st"),
            "smarty" => array (),
            "tcl" => array ("tcl"),
            "vb" => array ("bas"),
            "vbnet" => array (),
            "visualfoxpro" => array (),
            "whitespace" => array ("ws"),
            "xml" => array ("xml", "svg", "xrc"),
            "z80" => array ("z80", "asm", "inc")
            );

        foreach ($lookup as $language => $extensions) {
            if (in_array($ext, $extensions)) {
                $lang = $language;
            }
        }

        return $lang;
    }

    /**
     * It determines the path to the local copy of <file>.
     *
     * @param (str) $name_host the name of the service hosting
     * @param (str) $name_repo the name of the repository
     * @param (str) $name_brch the name of the branch
     * @param (str) $name_file the name of <file>
     * @return (str) $loc_url the url to the raw code of the copy of <file> in
     *      <srv-path>/lib/plugins/pycode/tmp/<host>/<repo>/<branch>/<file>
     */
    public function _get_loc_url($name_host, $name_repo, $name_brch, $name_file) {
        $dir = dirname(__FILE__) . "/tmp";
        $var = $name_host. "/" . $name_repo . "/" . $name_brch;
        $loc_url = $dir . "/" . $var. "/" . $name_file;

        return $loc_url;
    }

    /**
     * It gets the tree directories, starting from the given root.
     *
     * @param (str) $dir the root from which start to scan recursively,
     *      like as: <root>/
     * @return (arr) $tree_dir the tree directories, that is:
     *      array {
     *      [0] => (str) "<root>/<dir-1>"
     *      [1] => (str) "<root>/<dir-1>/<subdir-1>"
     *      [2] => (str) "<root>/<dir-2>"
     *      [3] => (str) "<root>/<dir-2>/<subdir-2>"
     *      [i] => (str) "<root>/<dir-i>"
     *      }
     */
    public function _get_tree_dir($dir) {
        $d = new RecursiveDirectoryIterator($dir, RecursiveDirectoryIterator::SKIP_DOTS);
        $i = new RecursiveIteratorIterator($d, RecursiveIteratorIterator::SELF_FIRST);

        $tree_dir = array();

        foreach ($i as $path => $item) {
            if ($item->isDir()) {
                $tree_dir[] = $path;
            }
        }

        return $tree_dir;
    }

    /**
     * It removes the indentation outside the code of a given function.
     *
     * If there is a # in the indentation it is moved inside the function.
     *
     * @param (arr) $code the code to indent
     * @return (arr) $code_ind the code indented
     */
    public function _remove_indent($code) {
        $code_ind = array();
        $ind = null;

        foreach ($code as $line) {
            $length = strlen($line);
            if ($ind === null) {
                $ind = $length - strlen(ltrim($line));
            }
            $str = trim(substr($line, 0, $ind));
            if (strlen($str) != 0 and $str[0] == "#") {
                $line = "#" . substr($line, $ind);
            }
            else {
                $line = substr($line, $ind);
            }
            array_push($code_ind, $line);
        }

       return $code_ind;
    }

    /**
     * It removes multi whitespace and replace them whith only one.
     *
     * To find all whitespace (one or more) is used the following regex:
     *      \s+     match any white space character, between one and infinite
     *              times, as many times as possible
     * Than replace all the capturing groups with only one white space.
     *
     * @param (str) $match the text matched
     * @return (str) $rest the text with multi withespace removed
     */
    public function _remove_multi_space($match) {
        $re = "/\s+/";
        $subst = " ";
        $rest = preg_replace($re, $subst, $match);

        return $rest;
    }

    /**
     * It removes empty directories, from a given tree directory.
     *
     * It starts to remove from the bottom of the tree.
     *
     * @param (arr) $tree_dir the tree directories, that is:
     *      array {
     *      [0] => (str) "<root>/<dir-1>"
     *      [1] => (str) "<root>/<dir-1>/<subdir-1>"
     *      [2] => (str) "<root>/<dir-2>"
     *      [3] => (str) "<root>/<dir-1>/<subdir-2>"
     *      [i] => (str) "<root>/<dir-i>"
     *      }
     */
    public function _remove_empty_dir($tree_dir) {
        $tree_dir = array_reverse($tree_dir);

        foreach ($tree_dir as $path) {
            $p = array_diff(scandir($path), array("..", "."));
            if (count($p) == 0) {
                rmdir($path);
            }
        }
    }

    /**
     * It saves data in a specified file.
     *
     * @param (mix) $data the data to write; can be either a string or an array.
     * @param (str) $filename path to the file where to write the data.
     */
    public function _save_code($filename, $data) {
        global $INFO;
        $dir = dirname($filename);

        if (file_exists($dir) == false) {
            mkdir($dir, 0777, true);
        }

        // this for save code
        if (is_array($data) == true) {
            $data = implode(PHP_EOL, $data) . PHP_EOL;
            file_put_contents($filename, $data);
        }
        // this for save a list of <file>(s) embedded
        elseif (is_string($data) == true and $INFO["filepath"] != "") {
            // <srv-path>/data/pages/<path>/file.txt
            // we want only: <path>/file.txt
            $wiki = str_replace(DOKU_INC . "data/pages/", "", $INFO["filepath"]);

            // <srv-path>/lib/plugins/pycode/tmp/<host>/<repo>/<branch>/<file>
            // we want only: <host>/<repo>/<branch>/<file>
            $file = str_replace(DOKU_PLUGIN . "pycode/tmp/", "", $data);

            if (file_exists($filename) == true) {
                $arr = json_decode(file_get_contents($filename), true);
            }
            else {
                $arr[$wiki] = array();
            }

            // for each <file> we append in which wiki page it's embedded:
            // array {
            // ["<wiki-pg>1"] => array {
            //                   [0] => "<file>1",
            //                   [1] => "<file>2"
            //                   }
            // }
            if (array_search($file, $arr[$wiki]) === false) {
                $arr[$wiki][] = $file;
            }

            // for save the array in a file, we convert it into a string:
            // {"<wiki-pg>1":["<file>1","<file>2"]}
            $data = json_encode($arr);
            file_put_contents($filename, $data);
        }
    }

    /**
     * It checks if the <src_url> corresponds to one of the following.
     *
     * @param (str) $src_url the url to the source code of <file> in the repo
     *              Bitbucket <src-url> =
     *              "https://bitbucket.org/<user>/<repo>/src/<branch>/<file>"
     *              GitHub <src-url> =
     *              "https://github.com/<user>/<repo>/blob/<branch>/<file>"
     * @return (str) $src_url if the url is wrong it returns the string "error"
     *              otherwise the given url.
     */
    public function _check_src_url($src_url) {
        try {
            if ((strpos($src_url, "https://bitbucket.org/") === false and
                strpos($src_url, "https://github.com/") === false)
                or
                (strpos($src_url, "/src/") === false and
                 strpos($src_url, "/blob/") === false)) {
                throw new Exception("error");
            }
        }

        catch (Exception $error) {
            $src_url = $error->getMessage();  // returns the error message
        }

        return $src_url;
    }
}