1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # author:么么哒
4 import requests
5 import re
6 from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED, FIRST_COMPLETED
7 requests.packages.urllib3.disable_warnings()
8
9 proxy = {
10 'http':'127.0.0.1:1080',
11 'https':'127.0.0.1:1080'
12 }
13 heads={
14 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36',
15 'Content-Type':'text/xml; charset=utf-8'
16 }
17
18
19 def Reptile(text):
20
21 try:
22 target = 'https://www.google.com/search?q={}'.format(text)
23 r = requests.get(target,headers=heads,timeout=30,proxies=proxy,verify=False)
24 meme = r.text
25 pattern = re.compile(r'class="LC20lb MBeuO DKV0Md">(.*?)</h3><div class=')
26 result = re.findall(pattern, meme)
27 #print(meme)
28 print(target)
29 for results in result:
30 with open("google-title.txt",'a+',encoding='utf-8') as f:
31 f.write(results +"\n")
32 f.close()
33 except Exception as e:
34 print("error")
35
36 domain =[]
37 for line in open(r'test.txt','r', encoding='utf-8'):
38 domain.append(line.strip('\n'))
39
40 executor = ThreadPoolExecutor(max_workers=20)
41 all_task = [executor.submit(Reptile, (text)) for text in domain]
42 wait(all_task, return_when=FIRST_COMPLETED)