5)) { $frameDepth = $frameDepthDefault; } $actualFrameDepth = 0; $urls = array(); get_links($targetUrl, $frameDepth); print_links(); function get_links($targetUrl, $depth) { global $urls, $linkFilter, $frameDepth, $actualFrameDepth; if (! $targetUrl) { return; } if ($frameDepth - $depth > $actualFrameDepth) { $actualFrameDepth = $frameDepth - $depth; } $html = file_get_contents($targetUrl); $dom = new DOMDocument(); @$dom->loadHTML($html); $xpath = new DOMXPath($dom); $bases = $xpath->evaluate("/html/head//base"); if ($bases->length > 0) { $baseItem = $bases->item($bases->length - 1); $base = $baseItem->getAttribute('href'); } else { $base = $targetUrl; } if ($depth > 0) { $frames = $xpath->evaluate("/html/body//iframe"); for ($i = 0; $i < $frames->length; $i++) { $frame = $frames->item($i); $url = make_absolute($frame->getAttribute('src'), $base); if ($url != $targetUrl) { get_links($url, $depth -1); } } $frames = $xpath->evaluate("/html/body//frame"); for ($i = 0; $i < $frames->length; $i++) { $frame = $frames->item($i); $url = make_absolute($frame->getAttribute('src'), $base); if ($url != $targetUrl) { get_links($url, $depth -1); } } } $hrefs = $xpath->evaluate("/html/body//a"); for ($i = 0; $i < $hrefs->length; $i++) { $href = $hrefs->item($i); $url = $href->getAttribute('href'); $absolute = make_absolute($url, $base); if (preg_match("@".$linkFilter."@i", parse_url($absolute, PHP_URL_PATH))) { array_push($urls, $absolute); } } } function print_links() { global $urls, $reverseSort, $targetUrl, $linkFilter, $frameDepth, $viewerUrl, $doubleEncodeLink, $headlessPage, $help, $actualFrameDepth; $labelColor = "lightgray"; $urls = array_unique($urls); if ($reverseSort) { rsort($urls); } else { sort($urls); } print("" . "\n" . "" . "\n" . "" . "\n"); if (($numUrls = count($urls)) == 1) { print "1 link" . "\n"; } else { print "$numUrls links" . "\n"; } print "" . "\n"; print "" . "\n"; print "" . "\n"; print("" . "\n" . "" . "\n"); if ($help) { print("
" . "\n");
        print("targetUrl = target url to scan for links" . "\n");
        print("linkFilter = filter for selecting links" . "\n");
        print("frameDepth = maximum recursive depth to scan frames" . "\n");
        print("viewerUrl = viewer url to open links" . "\n");
        print("doubleEncodeLink = true|false" . "\n");
        print("reverseSort = true|false" . "\n");
        print("headlessPage = true|false" . "\n");
        print("help = true" . "\n");
        print("\n");
        print("
" . "\n"); } if (!$headlessPage) { print "targetUrl     " . $targetUrl . "
" . "\n"; print "linkFilter     " . $linkFilter . "
" . "\n"; if ($frameDepth > 0) { print "frameDepth:     " . $frameDepth . "     " . $actualFrameDepth . "
" . "\n"; } print("
 
" . "\n"); } if ($numUrls > 0) { print("
    " . "\n"); for ($i = 0; $i < count($urls); $i++) { print("
  1. "); print(""); print($urls[$i] . "" . "
  2. " . "\n"); } print "
" . "\n"; } else { print("no links found" . "\n"); } print("" . "\n" . ""); } function get_param($param, $shortParam, $default) { if (isset($_REQUEST[$param])) { return $_REQUEST[$param]; } if (isset($_REQUEST[$shortParam])) { return $_REQUEST[$shortParam]; } return $default; } function make_absolute($url, $base) { // Return base if no url if( ! $url) return $base; // Return if already absolute URL if(parse_url($url, PHP_URL_SCHEME) != '') return $url; // Urls only containing query or anchor if($url[0] == '#' || $url[0] == '?') return $base.$url; // Parse base URL and convert to local variables: $scheme, $host, $path extract(parse_url($base)); // If no path, use / if( ! isset($path)) $path = '/'; // Remove non-directory element from path $path = preg_replace('#/[^/]*$#', '', $path); // Destroy path if relative url points to root if($url[0] == '/') $path = ''; // Dirty absolute URL $abs = "$host$path/$url"; // Replace '//' or '/./' or '/foo/../' with '/' $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'); for($n = 1; $n > 0; $abs = preg_replace($re, '/', $abs, -1, $n)) {} // Absolute URL is ready! return $scheme.'://'.$abs; } ?>