1<?php
2
3/*
4 *  pgn4web javascript chessboard
5 *  copyright (C) 2009-2014 Paolo Casaschi
6 *  see README file and http://pgn4web.casaschi.net
7 *  for credits, license and more details
8 */
9
10error_reporting(E_ALL | E_STRICT);
11
12$targetUrl = get_param("targetUrl", "tu", "");
13$linkFilterDefault = ".+.pgn$";
14$linkFilter = get_param("linkFilter", "lf", $linkFilterDefault);
15$frameDepthDefault = 0;
16$frameDepth = get_param("frameDepth", "fd", $frameDepthDefault);
17$viewerUrlDefault = "viewer.php?pd=";
18$viewerUrl = get_param("viewerUrl", "vu", $viewerUrlDefault);
19$doubleEncodeLink = get_param("doubleEncodeLink", "del", "false");
20$doubleEncodeLink = (($doubleEncodeLink == "true") || ($doubleEncodeLink == "t"));
21$reverseSort = get_param("reverseSort", "rs", "false");
22$reverseSort = (($reverseSort == "true") || ($reverseSort == "t"));
23$headlessPage = get_param("headlessPage", "hp", "false");
24$headlessPage = (($headlessPage == "true") || ($headlessPage == "t"));
25$help = get_param("help", "h", "false");
26$help = (($help == "true") || ($help == "t"));
27if ((! is_numeric($frameDepth)) || ($frameDepth < 0) || ($frameDepth > 5)) { $frameDepth = $frameDepthDefault; }
28$actualFrameDepth = 0;
29$urls = array();
30get_links($targetUrl, $frameDepth);
31print_links();
32
33function get_links($targetUrl, $depth) {
34    global $urls, $linkFilter, $frameDepth, $actualFrameDepth;
35
36    if (! $targetUrl) { return; }
37
38    if ($frameDepth - $depth > $actualFrameDepth) { $actualFrameDepth = $frameDepth - $depth; }
39
40    $html = file_get_contents($targetUrl);
41    $dom = new DOMDocument();
42    @$dom->loadHTML($html);
43    $xpath = new DOMXPath($dom);
44
45    $bases = $xpath->evaluate("/html/head//base");
46    if ($bases->length > 0) {
47        $baseItem = $bases->item($bases->length - 1);
48        $base = $baseItem->getAttribute('href');
49    } else {
50        $base = $targetUrl;
51    }
52
53    if ($depth > 0) {
54        $frames = $xpath->evaluate("/html/body//iframe");
55        for ($i = 0; $i < $frames->length; $i++) {
56            $frame = $frames->item($i);
57            $url = make_absolute($frame->getAttribute('src'), $base);
58            if ($url != $targetUrl) { get_links($url, $depth -1); }
59        }
60        $frames = $xpath->evaluate("/html/body//frame");
61        for ($i = 0; $i < $frames->length; $i++) {
62            $frame = $frames->item($i);
63            $url = make_absolute($frame->getAttribute('src'), $base);
64            if ($url != $targetUrl) { get_links($url, $depth -1); }
65        }
66    }
67
68    $hrefs = $xpath->evaluate("/html/body//a");
69    for ($i = 0; $i < $hrefs->length; $i++) {
70        $href = $hrefs->item($i);
71        $url = $href->getAttribute('href');
72        $absolute = make_absolute($url, $base);
73        if (preg_match("@".$linkFilter."@i", parse_url($absolute, PHP_URL_PATH))) {
74            array_push($urls, $absolute);
75        }
76    }
77}
78
79function print_links() {
80    global $urls, $reverseSort, $targetUrl, $linkFilter, $frameDepth, $viewerUrl, $doubleEncodeLink, $headlessPage, $help, $actualFrameDepth;
81
82    $labelColor = "lightgray";
83
84    $urls = array_unique($urls);
85    if ($reverseSort) { rsort($urls); }
86    else { sort($urls); }
87
88    print("<!DOCTYPE HTML>" . "\n" . "<html>" . "\n" . "<head>" . "\n");
89
90    if (($numUrls = count($urls)) == 1) { print "<title>1 link</title>" . "\n"; }
91    else { print "<title>$numUrls links</title>" . "\n"; }
92
93    print "<link rel='icon' sizes='16x16' href='pawn.ico' />" . "\n";
94    print "<style tyle='text/css'> body { font-family: sans-serif; padding: 1.75em; line-height: 1.5em; } a { color: black; text-decoration: none; } ol { color: $labelColor; } </style>" . "\n";
95    print "<script type='text/javascript'> var viewerWin; </script>" . "\n";
96
97    print("</head>" . "\n" . "<body>" . "\n");
98
99    if ($help) {
100        print("<pre>" . "\n");
101        print("targetUrl = target url to scan for links" . "\n");
102        print("linkFilter = filter for selecting links" . "\n");
103        print("frameDepth = maximum recursive depth to scan frames" . "\n");
104        print("viewerUrl = viewer url to open links" . "\n");
105        print("doubleEncodeLink = true|false" . "\n");
106        print("reverseSort = true|false" . "\n");
107        print("headlessPage = true|false" . "\n");
108        print("help = true" . "\n");
109        print("\n");
110        print("</pre>" . "\n");
111    }
112
113    if (!$headlessPage) {
114        print "<span style='color:$labelColor'>targetUrl</span> &nbsp; &nbsp; <a href='" . $targetUrl . "' target='_blank'>" . $targetUrl . "</a><br />" . "\n";
115        print "<span style='color:$labelColor'>linkFilter</span> &nbsp; &nbsp; " . $linkFilter . "<br />" . "\n";
116        if ($frameDepth > 0) { print "frameDepth: &nbsp; &nbsp; <b>" . $frameDepth . "</b> &nbsp; &nbsp; <span style='opacity: 0.2;'>" . $actualFrameDepth . "</span><br />" . "\n"; }
117        print("<div>&nbsp;</div>" . "\n");
118    }
119    if ($numUrls > 0) {
120        print("<ol>" . "\n");
121        for ($i = 0; $i < count($urls); $i++) {
122            print("<li>");
123            print("<a href='javascript:void(0);' onclick='if (event.shiftKey) { location.href = \"$urls[$i]\"; } else { if (viewerWin && !viewerWin.closed) { viewerWin.close(); } viewerWin = window.open(\"" . ($viewerUrl . ($doubleEncodeLink ? rawurlencode(rawurlencode($urls[$i])) : rawurlencode($urls[$i]))) . "\", \"pgn4web_link_viewer\"); viewerWin.focus(); } this.blur(); return false;'>");
124            print($urls[$i] . "</a>" . "</li>" . "\n");
125        }
126        print "</ol>" . "\n";
127    } else {
128        print("<i>no links found</i>" . "\n");
129    }
130
131    print("</body>" . "\n" . "</html>");
132}
133
134function get_param($param, $shortParam, $default) {
135  if (isset($_REQUEST[$param])) { return $_REQUEST[$param]; }
136  if (isset($_REQUEST[$shortParam])) { return $_REQUEST[$shortParam]; }
137  return $default;
138}
139
140function make_absolute($url, $base) {
141
142    // Return base if no url
143    if( ! $url) return $base;
144
145    // Return if already absolute URL
146    if(parse_url($url, PHP_URL_SCHEME) != '') return $url;
147
148    // Urls only containing query or anchor
149    if($url[0] == '#' || $url[0] == '?') return $base.$url;
150
151    // Parse base URL and convert to local variables: $scheme, $host, $path
152    extract(parse_url($base));
153
154    // If no path, use /
155    if( ! isset($path)) $path = '/';
156
157    // Remove non-directory element from path
158    $path = preg_replace('#/[^/]*$#', '', $path);
159
160    // Destroy path if relative url points to root
161    if($url[0] == '/') $path = '';
162
163    // Dirty absolute URL
164    $abs = "$host$path/$url";
165
166    // Replace '//' or '/./' or '/foo/../' with '/'
167    $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#');
168    for($n = 1; $n > 0; $abs = preg_replace($re, '/', $abs, -1, $n)) {}
169
170    // Absolute URL is ready!
171    return $scheme.'://'.$abs;
172}
173
174?>
175