#!/usr/bin/perl
# @martin eiszner mei@websec.org
################################################
$|=1;
use strict;
use Getopt::Std;
use LWP::UserAgent;
use HTTP::Request::Common;
use HTTP::Response;
use HTML::Form;
use HTML::TokeParser;
use IO::Scalar;
my %opt;
getopts("u:p:l:hi",\%opt);
my $url = $opt{'u'};
my $proxy = $opt{'p'};
my $log = $opt{'l'};
my $getti = "";
my (@fields,@hpairs);
my %HEADERS;
my ($si,$k,$v);
if (!$url)
{
print "\nusage: $0\n\t-u [url]\n\t-p [proxy]\n\t-h (output headers)\n\t-i (include dynamic links)\n\n";
exit 11;
}
my $user_agent = new LWP::UserAgent( requests_redirectable => ['GET', 'HEAD'] );
$user_agent->agent("Mozilla/4.0(compatible;MSIE6.0;Windows NT 5.0)");
$user_agent->proxy("https", $proxy);
$user_agent->proxy("http", $proxy);
if ($url !~ /http:\/\// && $url !~ /https:\/\//)
{
$getti = "http://".$url;
}
else
{
$getti = $url;
}
$getti =~ s/[\n\r]//g;
## find out everything about uri
##
my($uri_protocol, $uri_domain, $uri_path, $uri_query, $uri_frags) =
$getti =~ m|^(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
my($uri_dir,$uri_script) = $uri_path =~ m|^(.+\/)(.[^\/]+)$|;
my $response = $user_agent->request(GET "$getti", %HEADERS);
if (!$response->is_success)
{
print $response->code(), "-", $response->message(), "\n";
}
else
{
my $content = $response->content();
$content .= &create_content($content) if ($opt{'i'});
my $rheaders = $response->headers;
## walk thru all the forms
##
my @forms = HTML::Form->parse($content, "$getti");
foreach my $f (@forms)
{
my @inputs = $f->inputs;
my $def_method = $f->method;
my $def_action = $f->action;
my $def_encoding = $f->enctype;
#print "\nACTION: $def_method $def_action\nENCODING:$def_encoding\nCOOKIE: $def_cookie\n\nINPUT:\n";
print "\n******\n$def_method $def_action\n";
print "******\n".$rheaders->as_string."\n" if $opt{'h'};
print "******\nINPUT:\n";
foreach my $single_input (@inputs)
{
print $single_input->name.":\t".$single_input->value."\n";
}
print "\n";
}
}# end request success
## end main
###########
######################################
## parses html-content
## and returns array containing links
##
sub getl
{
my $c = shift;
my $first = shift;
my $second = shift;
my $fcont = new IO::Scalar \$c;
my @links;
## in case of frames
##
my $f_page = HTML::TokeParser->new($fcont);
while (my $token = $f_page->get_tag("$first"))
{
my $link = $token->[1]{"$second"} || "";
push(@links,$link);
}
$fcont->close;
return @links;
}
##################################################
##################################################
## build content for found links
##
sub create_content
{
my $content = shift;
## extract all the possible dynamic links
##
my (@all_links, %links_inform);
push (@all_links, &getl($content,"frame", "src"));
push (@all_links, &getl($content,"a", "href"));
push (@all_links, &getl($content,"form", "action"));
push (@all_links, &getl($content,"img", "src"));
## put links with query-strings into our content
## only one time for each path-info !!
##
foreach my $siu (@all_links)
{
## this is a bit dirty .. but nice :)
##
my $x_content = "";
## is it fully qualified ?
##
my($siprot, $sidom, $sipath, $siquery, $sifrags) =
$siu =~ m|^(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
my($sidir,$siscript) = $siu =~ m|^(.+\/)(.[^\/]+)$|;
($sipath = $uri_dir.$sipath) if ($uri_dir ne $sidir);
my $x_action = ($sidom ne '' ? $siprot."://".$sidom.$sipath : $uri_protocol."://".$uri_domain."/".$sipath );
## only if there is a query_string
##
if ($siquery ne '')
{
my $x_value = "<form action=\"$x_action\">";
my @ins = split(/&/,$siquery);
foreach my $sin (@ins)
{
my ($k,$v) = split(/=/,$sin,2);
$x_value .= "<input type=\"text\" name=\"$k\" value=\"$v\">";
}
$x_value .= "</form>";
## very nice hashthing
##
$links_inform{$x_action} = $x_value;
}
}
## add findings to $content
## only if i-option set ..
my $ret = "";
if ($opt{'i'})
{
while (my ($xk,$xv) = each(%links_inform) ) { $ret .= $xv; }
}
## yeppi
##
return $ret;
}