利用Python自动识别目标网页是否有注释
本代码主要识别网页源代码中是否有注释,并将注释显示出来,因为有些时候注释具有一定的信息收集价值,主要用到的模块包括:
1. requests请求模块
2. re正则表达式模块
1 import requests 2 import re 3 import sys 4 import optparse 5 import termcolor 6 7 class CommentFinder: 8 def __init__(self) -> None: 9 self.url = self.url_prefix_formatter(self.get_params()) 10 self.headers = { 11 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0' 12 } 13 14 15 def get_params(self): 16 parser = optparse.OptionParser('Usage: < Program > -u website url ') 17 parser.add_option('-u', '--url', dest='url', type='string', help='Specify website url') 18 options, args = parser.parse_args() 19 if options.url is None : 20 print(parser.usage) 21 sys.exit(0) 22 return options.url 23 24 def url_prefix_formatter(self, url): 25 if url.startswith('http://'): 26 return url 27 elif url.startswith('https://'): 28 return url 29 else: 30 return 'http://' + url 31 32 def retrieve_webpage(self): 33 try: 34 response = requests.get(url=self.url, headers=self.headers) 35 if response.status_code == 200: 36 return response.text 37 except Exception as e: 38 print(e) 39 sys.eixt(0) 40 41 def run(self): 42 response = self.retrieve_webpage() 43 pattern = r'<!--(.*)-->' 44 if response: 45 comment_list = re.findall(pattern, response) 46 if len(comment_list)==0: 47 print("No comment found on the target") 48 else: 49 print("Has found the following comments on the target: %s" % self.url) 50 i = 1 51 for comment in comment_list: 52 print('\t%d'%i,termcolor.colored(comment, 'blue')) 53 i +=1 54 55 if __name__ == '__main__': 56 commentfinder = CommentFinder() 57 commentfinder.run()
STRIVE FOR PROGRESS,NOT FOR PERFECTION

浙公网安备 33010602011771号