学习代码-----

#coding = utf-8
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time

class driver_1(object):
    def __init__(self):
        self.driver = webdriver.Chrome('D:\chromedriver_win32\chromedriver.exe')

    #访问谷歌
    def logload_Chrome(self):
        self.driver.get('https://www.google.com/')

    #搜索内容
    def Search(self):
        self.logload_Chrome()
        Search = self.driver.find_element_by_name('q')
        Search.send_keys('aaaa')
        Search.send_keys(Keys.ENTER)

    #搜索站点
    def Search_site(self,site):
        self.driver.implicitly_wait(10)
        self.site = 'site:' + '\'' + site + '\''
        self.logload_Chrome()
        Search = self.driver.find_element_by_name('q')
        Search.send_keys(self.site)
        Search.send_keys(Keys.ENTER)
        #保存当前窗口的句柄
        mainWindow = self.driver.current_window_handle
        #print(self.driver.title)
        '''
        保存url和name
        '''
        for handle in self.driver.window_handles:
            self.driver.switch_to.window(handle)
            # 切换窗口
            if 'suda.edu.cn' in self.driver.title:
                print('当前位置:%s' % self.driver.title)
                Result_name = self.driver.find_elements_by_css_selector('#search .g .r h3')
                Result_url = self.driver.find_elements_by_css_selector('#search .g .r>a[href]')
                dict_name_url = {}
                i = 0
                while i < len(Result_name):
                    print(i)
                    dict_name_url[str(Result_name[i].text)] = str(Result_url[i].get_attribute('href'))
                    i += 1

        return dict_name_url

        #self.driver.close()



if __name__ == "__main__":
    site = str(input('输入需要搜索的站点:'))
    a = driver_1()
    name = a.Search_site(site)
    print(name.items())

 https://pan.baidu.com/s/1JHzdTqfWMH_wqPJUjSA_cw

#coding = utf-8
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
import pymysql

class driver_1(object):
    def __init__(self):
        self.driver = webdriver.Chrome('D:\chromedriver_win32\chromedriver.exe')

    #访问谷歌
    def logload_Chrome(self):
        self.driver.get('https://www.google.com/')

    #搜索站点
    def Search_site(self,site):
        self.driver.implicitly_wait(10)
        self.site = 'site:' + '\'' + site + '\''
        self.logload_Chrome()
        Search = self.driver.find_element_by_name('q')
        Search.send_keys(self.site)
        Search.send_keys(Keys.ENTER)
        #保存当前窗口的句柄
        mainWindow = self.driver.current_window_handle
        #print(self.driver.title)

    #提取url name
    def Save_name_url(self,site):
        '''
        提取url和name
        '''
        for handle in self.driver.window_handles:
            self.driver.switch_to.window(handle)
            # 切换窗口
            if str(site) in self.driver.title:
                #print('当前位置:%s' % self.driver.title)
                Result_name = self.driver.find_elements_by_css_selector('#search .g .r h3')
                Result_url = self.driver.find_elements_by_css_selector('#search .g .r>a[href]')
                dict_name_url = {}
                i = 0
                while i < len(Result_name):
                    dict_name_url[str(Result_name[i].text)] = str(Result_url[i].get_attribute('href'))
                    i += 1
        return dict_name_url

        #self.driver.close()

    #循环遍历功能
    def Cyclic_Search(self,num):
        i = 0
        while i < num:
            self.driver.find_element_by_css_selector('[valign=top] td:nth-last-child(1)').click()
            dict_name_url2 = self.Save_name_url(site)
            i += 1
        return dict_name_url2

    #保存url和name到文件
    def Save_file(self):
        pass

class db(object):

    #链接出入数据
    def connect(self,i,key,value):
        conn = pymysql.connect('localhost', 'root', 'Password@2020', 'suda')
        # 创建游标对象
        cursor = conn.cursor()
        try:
            cursor.execute("insert into information_site values(%s,'%s','%s');" % (int(i), key, value))
        except:
            conn.ping()
            cursor = conn.cursor()
            cursor.execute("insert into information_site values(%s,'%s','%s');" % (int(i), key, value))


if __name__ == "__main__":
    site = str(input('输入需要搜索的站点:'))
    num = int(input('请输入需要搜索的页数:')) - 1
    #搜索信息
    a = driver_1()
    a.Search_site(site)
    name = a.Save_name_url(site)
    name2 = a.Cyclic_Search(num)
    now_name = {**name,**name2}
    print('信息收集完成%s' %now_name.items())
    print('提取信息数目%s' %len(now_name))
    print('准备存入数据')
    '''
    #保存数据
    b = db()
    i = 1
    for key,value in now_name.items():
        b.connect(i,key,value)
        i += 1
    print('数据存入完成')
    '''

 

posted @ 2020-04-20 21:48  天象独行  阅读(204)  评论(0编辑  收藏  举报