一个抓取页面的Perl程序

#!/usr/bin/perl
#***************************************
#	#先获取日志记录
	#如果为空 {
		#则和把也上获取的时间和当前时间对比,然后筛选和当前时间差距最小的时间,发送邮件
		#然后记录到日志中
	#}else{
			#把日志中的时间和页面上获取的时间进行比较,如果页面上有大于这个日志记录的时间,
			#发送邮件和短信,
		
	#}
#*************************************
use strict;
use CGI;
use URI::Escape; 
use LWP; 
use Encode; 
use Digest::MD5 qw(md5 md5_hex md5_base64);
use LWP::ConnCache;
use LWP::UserAgent;
use HTML::TreeBuilder;
#设置当前时间
#******************************************************
my ($day,$month,$year) = (localtime(time() - 86400))[3,4,5];
$year += 1900;
$month++;
my $now_date = sprintf("%04d-%02d-%02d",$year,$month,$day);
#print "$now_date\n";exit;
#*******************************************************

my $url ="http://www.bjjs.gov.cn/publish/portal0/tab4021";
my $browser = new LWP::UserAgent;
$browser->agent("AgentName/0.1");
$browser->timeout(10);
$browser->env_proxy;
$browser->conn_cache(LWP::ConnCache->new());
#请求地址
my $response = $browser->get($url);
my $path = qq[/data0/xwdata];
my $file = qq[data.txt];

open LOGFH,">>log.txt" || die "Open log file faild:$!";				#记录错误的信息

unless ($response->is_success) {
	print LOGFH "无法获取$url";
	next;
}

##解析HTML
my $html = $response->content;
	$html= uri_unescape( encode('gb2312',decode('utf8',$html)) );  

$|=1;
#************************
#手机号配置

my $sms_to = qq[手机号];

#获取HTML 内容
my $root = HTML::TreeBuilder->new_from_content($html);

#解析通知公告  的时间
my $td_tzgg=$root->look_down("_tag","td","align","right","width","95");
foreach my $date ($td_tzgg->content_refs_list) {
	next if ref $$date;
#	print $$date,"\n";
	my $date_ym = $$date;
	#如果文件存在且大小不为0的时候
	if(-s $file){
		open  DATAFH,"<$file";
		while(<DATAFH>){
			chmod $_;
			next if $_ eq '';
			$_ =~ s/\n//g;
			#把日志中的时间和页面上获取的时间进行比较,如果页面上有大于这个日志记录的时间,
			if($date_ym gt "$_"){
				open FI,">data.txt";
				print "正在调用接口\n";
				use HTTP::Request;
				use LWP::UserAgent;
				my $ua = new LWP::UserAgent;
				$ua->agent("AgentName/0.1");
				 $ua->timeout(50);
				$ua->env_proxy; 
				my $sms_msg = qq[通知公告更新了];
				$sms_msg = encode('utf8',decode('gb2312',$sms_msg));
				my $pwd = qq[fanfan];
				my $ckkey=md5_hex($pwd.$sms_to); 
				my $response = $ua->get("http://mail.xxx.xx.com?c=$sms_msg&t=$sms_to&pwd=$pwd&key=$ckkey");
				my $result;
				 if ($response->is_success) {
					 $result=$response->content;  # or whatever
					 print "$result\n";
					 print "调用成功\n";
				 }else{
					 print "调用失败\n";
					 die $response->status_line;
				 } 
				print FI "$date_ym";
				close FI;
			}else{
				print "已经发送过邮件1\n";
			}
		}
	}else{
		open FI,">data.txt" || die "open datafile faild:$!";		#记录每次发送邮件的信息
		#则和把也上获取的时间和当前时间对比,然后筛选和当前时间差距最小的时间,发送邮件
		if($date_ym ge "$now_date" ){
			print "发送一次邮件1";
			#*****************************
			print "正在调用接口\n";
			use HTTP::Request;
			use LWP::UserAgent;
			my $ua = new LWP::UserAgent;
			$ua->agent("AgentName/0.1");
			 $ua->timeout(50);
			$ua->env_proxy; 
			my $sms_msg = qq[通知公告更新了];
			$sms_msg = encode('utf8',decode('gb2312',$sms_msg));
			my $pwd = qq[fanfan];
			my $ckkey=md5_hex($pwd.$sms_to); 
			my $response = $ua->get("http://mail.xxx.xx.com?c=$sms_msg&t=$sms_to&pwd=$pwd&key=$ckkey");
			my $result;
			 if ($response->is_success) {
				 $result=$response->content;  # or whatever
				 print "$result\n";
				 print "调用成功\n";
			 }else{
				 print "调用失败\n";
				 die $response->status_line;
			 }
			 #******************************
			print FI "$date_ym";
		}
		close FI;
	}

}

my $td_xmgs=$root->look_down("_tag","td","width","50%");
my $con = $td_xmgs->as_text();
my $file_xm = qq[data_xm.txt];
if(-s $file_xm){
	open XT,"<data_xm.txt";
	while(<XT>){
		chmod $_;
		next if $_ eq '';
		$_ =~ s/\n//g;

		if($con eq "$_"){
			print "已经发送过邮件2\n";
#			exit;
		}else{
			open XM,">data_xm.txt";
			#*****************************
			print "正在调用接口\n";
			use HTTP::Request;
			use LWP::UserAgent;
			my $ua = new LWP::UserAgent;
			$ua->agent("AgentName/0.1");
			 $ua->timeout(50);
			$ua->env_proxy; 
			my $sms_msg = qq[项目公示更新了];
			$sms_msg = encode('utf8',decode('gb2312',$sms_msg));
			my $pwd = qq[fanfan];
			my $ckkey=md5_hex($pwd.$sms_to); 
			my $response = $ua->get("http://mail.xxx.xx.com?c=$sms_msg&t=$sms_to&pwd=$pwd&key=$ckkey");
			my $result;
			 if ($response->is_success) {
				 $result=$response->content;  # or whatever
				 print "$result\n";
				 print "调用成功\n";
			 }else{
				 print "调用失败\n";
				 die $response->status_line;
			 }
			 #******************************
			print XM "$con";
			close XM;
		}

	}
	close XT;
}else{
	open XM,">data_xm.txt";
	print "发送邮件一次2";
	#*****************************
	print "正在调用接口\n";
	use HTTP::Request;
	use LWP::UserAgent;
	my $ua = new LWP::UserAgent;
	$ua->agent("AgentName/0.1");
	 $ua->timeout(50);
	$ua->env_proxy; 
	my $sms_msg = qq[项目公示更新了];
	$sms_msg = encode('utf8',decode('gb2312',$sms_msg));
	my $pwd = qq[fanfan];
	my $ckkey=md5_hex($pwd.$sms_to); 
	my $response = $ua->get("http://mail.xxx.xx.com?c=$sms_msg&t=$sms_to&pwd=$pwd&key=$ckkey");
	my $result;
	 if ($response->is_success) {
		 $result=$response->content;  # or whatever
		 print "$result\n";
		 print "调用成功\n";
	 }else{
		 print "调用失败\n";
		 die $response->status_line;
	 }
	 #******************************
	print XM "$con";
	close XM;
}


$root->delete;
close LOGFH;
close DATAFH;

  

posted @ 2021-01-22 19:39  pebblecome  阅读(93)  评论(0)    收藏  举报