use Mojo::UserAgent;
use Bloom::Filter;
use Smart::Comments;
use DBI;
my $dbname = "bbs_url";
my $location = "localhost";
my $port = "3306"; 
my $database = "DBI:mysql:$dbname:$location:$port";
my $db_user = "root";
my $db_pass = "toor";
my $dbh = DBI->connect($database,$db_user,$db_pass);
my $dept_level = 4;
my $baseUrl = Mojo::URL->new($ARGV[0] || 'http://bbs.xxxxx.cn/');
my ($domain) = $baseUrl =~ qr#http://(?:www.)?([^/]+)#;
my $filter = Bloom::Filter->new(capacity => 100000, error_rate => 0.0001);
my $ua = Mojo::UserAgent->new(max_redirects => 5);
$name="xxxxx";
my $query = "CREATE TABLE $name("." `No` int(100) NOT NULL auto_increment,"." `depth` int(10) NOT NULL,"." `Url` text  NOT NULL, PRIMARY KEY  (`No`) ".") ENGINE=MyISAM  DEFAULT CHARSET=utf8;";
my $sth = $dbh->prepare($query);
$sth->execute() or die "create table student error: ".$sth->errstr();
my $callback;$callback = sub  {
    my ($ua, $tx) = @_;
    #open(FD,">>url.txt")|| die ("Could not open file");
    return if !$tx->success;
 
    my $dept = $tx->req->headers->header('dept');
    return if $dept > $dept_level;
    ++$dept;
    $tx->res->dom->find("a[href]")->each(sub{
            my $attrs  = shift->attrs;
            my $newUrl = Mojo::URL->new($attrs->{href});
 
    
            if (!$newUrl->host and !$newUrl->scheme) {
                $newUrl->host($tx->req->url->host);
                $newUrl->scheme($tx->req->url->scheme);
            }   
            $newUrl->fragment(undef); 
            next if ( $newUrl->scheme ne 'http' && $newUrl->scheme ne 'https' );
            next if $newUrl->host !~ qr/$domain/;
            next if ( $newUrl->path =~ /.(jpg|png|bmp|mp3|wma|wmv|gz|zip|rar|iso|pdf)$/i );
 
            if( !$filter->check($newUrl) ) {
        if(($filter->key_count())%1000 ==0){    
                print $filter->key_count(), " $dept ", $newUrl, "\n";
        }
        if($dept==3 || $dept ==4 || $dept ==5){
        #$n++;
            #print FD $filter->key_count(),"\t",$dept,"\t",$newUrl,"\n";
         my $sql="insert into $name(depth,Url) values('$dept','$newUrl')";
         my $sth=$dbh->prepare("$sql");
         $sth->execute();
         $sth->finish();
               }
        #if($dept==4){
        #$n++;
            #print FD $filter->key_count(),"\t",$dept,"\t",$newUrl,"\n";
         #my $sql="insert into $names(depth,Url) values('$dept','$newUrl')";
         #my $sth=$dbh->prepare("$sql");
         #$sth->execute();
         #$sth->finish();
             # }
                $filter->add($newUrl);
                $ua->get($newUrl => { dept => $dept } => $callback);
                
            }   
    });
};
 
$ua->get($baseUrl => { dept => 1} => $callback);
Mojo::IOLoop->start;