1<?php 2 3/* 4 * pgn4web javascript chessboard 5 * copyright (C) 2009-2014 Paolo Casaschi 6 * see README file and http://pgn4web.casaschi.net 7 * for credits, license and more details 8 */ 9 10error_reporting(E_ALL | E_STRICT); 11 12$targetUrl = get_param("targetUrl", "tu", ""); 13$linkFilterDefault = ".+.pgn$"; 14$linkFilter = get_param("linkFilter", "lf", $linkFilterDefault); 15$frameDepthDefault = 0; 16$frameDepth = get_param("frameDepth", "fd", $frameDepthDefault); 17$viewerUrlDefault = "viewer.php?pd="; 18$viewerUrl = get_param("viewerUrl", "vu", $viewerUrlDefault); 19$doubleEncodeLink = get_param("doubleEncodeLink", "del", "false"); 20$doubleEncodeLink = (($doubleEncodeLink == "true") || ($doubleEncodeLink == "t")); 21$reverseSort = get_param("reverseSort", "rs", "false"); 22$reverseSort = (($reverseSort == "true") || ($reverseSort == "t")); 23$headlessPage = get_param("headlessPage", "hp", "false"); 24$headlessPage = (($headlessPage == "true") || ($headlessPage == "t")); 25$help = get_param("help", "h", "false"); 26$help = (($help == "true") || ($help == "t")); 27if ((! is_numeric($frameDepth)) || ($frameDepth < 0) || ($frameDepth > 5)) { $frameDepth = $frameDepthDefault; } 28$actualFrameDepth = 0; 29$urls = array(); 30get_links($targetUrl, $frameDepth); 31print_links(); 32 33function get_links($targetUrl, $depth) { 34 global $urls, $linkFilter, $frameDepth, $actualFrameDepth; 35 36 if (! $targetUrl) { return; } 37 38 if ($frameDepth - $depth > $actualFrameDepth) { $actualFrameDepth = $frameDepth - $depth; } 39 40 $html = file_get_contents($targetUrl); 41 $dom = new DOMDocument(); 42 @$dom->loadHTML($html); 43 $xpath = new DOMXPath($dom); 44 45 $bases = $xpath->evaluate("/html/head//base"); 46 if ($bases->length > 0) { 47 $baseItem = $bases->item($bases->length - 1); 48 $base = $baseItem->getAttribute('href'); 49 } else { 50 $base = $targetUrl; 51 } 52 53 if ($depth > 0) { 54 $frames = $xpath->evaluate("/html/body//iframe"); 55 for ($i = 0; $i < $frames->length; $i++) { 56 $frame = $frames->item($i); 57 $url = make_absolute($frame->getAttribute('src'), $base); 58 if ($url != $targetUrl) { get_links($url, $depth -1); } 59 } 60 $frames = $xpath->evaluate("/html/body//frame"); 61 for ($i = 0; $i < $frames->length; $i++) { 62 $frame = $frames->item($i); 63 $url = make_absolute($frame->getAttribute('src'), $base); 64 if ($url != $targetUrl) { get_links($url, $depth -1); } 65 } 66 } 67 68 $hrefs = $xpath->evaluate("/html/body//a"); 69 for ($i = 0; $i < $hrefs->length; $i++) { 70 $href = $hrefs->item($i); 71 $url = $href->getAttribute('href'); 72 $absolute = make_absolute($url, $base); 73 if (preg_match("@".$linkFilter."@i", parse_url($absolute, PHP_URL_PATH))) { 74 array_push($urls, $absolute); 75 } 76 } 77} 78 79function print_links() { 80 global $urls, $reverseSort, $targetUrl, $linkFilter, $frameDepth, $viewerUrl, $doubleEncodeLink, $headlessPage, $help, $actualFrameDepth; 81 82 $labelColor = "lightgray"; 83 84 $urls = array_unique($urls); 85 if ($reverseSort) { rsort($urls); } 86 else { sort($urls); } 87 88 print("<!DOCTYPE HTML>" . "\n" . "<html>" . "\n" . "<head>" . "\n"); 89 90 if (($numUrls = count($urls)) == 1) { print "<title>1 link</title>" . "\n"; } 91 else { print "<title>$numUrls links</title>" . "\n"; } 92 93 print "<link rel='icon' sizes='16x16' href='pawn.ico' />" . "\n"; 94 print "<style tyle='text/css'> body { font-family: sans-serif; padding: 1.75em; line-height: 1.5em; } a { color: black; text-decoration: none; } ol { color: $labelColor; } </style>" . "\n"; 95 print "<script type='text/javascript'> var viewerWin; </script>" . "\n"; 96 97 print("</head>" . "\n" . "<body>" . "\n"); 98 99 if ($help) { 100 print("<pre>" . "\n"); 101 print("targetUrl = target url to scan for links" . "\n"); 102 print("linkFilter = filter for selecting links" . "\n"); 103 print("frameDepth = maximum recursive depth to scan frames" . "\n"); 104 print("viewerUrl = viewer url to open links" . "\n"); 105 print("doubleEncodeLink = true|false" . "\n"); 106 print("reverseSort = true|false" . "\n"); 107 print("headlessPage = true|false" . "\n"); 108 print("help = true" . "\n"); 109 print("\n"); 110 print("</pre>" . "\n"); 111 } 112 113 if (!$headlessPage) { 114 print "<span style='color:$labelColor'>targetUrl</span> <a href='" . $targetUrl . "' target='_blank'>" . $targetUrl . "</a><br />" . "\n"; 115 print "<span style='color:$labelColor'>linkFilter</span> " . $linkFilter . "<br />" . "\n"; 116 if ($frameDepth > 0) { print "frameDepth: <b>" . $frameDepth . "</b> <span style='opacity: 0.2;'>" . $actualFrameDepth . "</span><br />" . "\n"; } 117 print("<div> </div>" . "\n"); 118 } 119 if ($numUrls > 0) { 120 print("<ol>" . "\n"); 121 for ($i = 0; $i < count($urls); $i++) { 122 print("<li>"); 123 print("<a href='javascript:void(0);' onclick='if (event.shiftKey) { location.href = \"$urls[$i]\"; } else { if (viewerWin && !viewerWin.closed) { viewerWin.close(); } viewerWin = window.open(\"" . ($viewerUrl . ($doubleEncodeLink ? rawurlencode(rawurlencode($urls[$i])) : rawurlencode($urls[$i]))) . "\", \"pgn4web_link_viewer\"); viewerWin.focus(); } this.blur(); return false;'>"); 124 print($urls[$i] . "</a>" . "</li>" . "\n"); 125 } 126 print "</ol>" . "\n"; 127 } else { 128 print("<i>no links found</i>" . "\n"); 129 } 130 131 print("</body>" . "\n" . "</html>"); 132} 133 134function get_param($param, $shortParam, $default) { 135 if (isset($_REQUEST[$param])) { return $_REQUEST[$param]; } 136 if (isset($_REQUEST[$shortParam])) { return $_REQUEST[$shortParam]; } 137 return $default; 138} 139 140function make_absolute($url, $base) { 141 142 // Return base if no url 143 if( ! $url) return $base; 144 145 // Return if already absolute URL 146 if(parse_url($url, PHP_URL_SCHEME) != '') return $url; 147 148 // Urls only containing query or anchor 149 if($url[0] == '#' || $url[0] == '?') return $base.$url; 150 151 // Parse base URL and convert to local variables: $scheme, $host, $path 152 extract(parse_url($base)); 153 154 // If no path, use / 155 if( ! isset($path)) $path = '/'; 156 157 // Remove non-directory element from path 158 $path = preg_replace('#/[^/]*$#', '', $path); 159 160 // Destroy path if relative url points to root 161 if($url[0] == '/') $path = ''; 162 163 // Dirty absolute URL 164 $abs = "$host$path/$url"; 165 166 // Replace '//' or '/./' or '/foo/../' with '/' 167 $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'); 168 for($n = 1; $n > 0; $abs = preg_replace($re, '/', $abs, -1, $n)) {} 169 170 // Absolute URL is ready! 171 return $scheme.'://'.$abs; 172} 173 174?> 175