1 # -*-coding:utf-8-*-
2 from bs4 import BeautifulSoup
3 import requests, time
4 url = 'https://knewone.com/discover?page='
5
6 def get_page(url, data=None): # 获取页面
7 wb_data = requests.get(url)
8 soup = BeautifulSoup(wb_data.text, 'html.parser')
9
10 imgs = soup.select('a.cover-inner > img')
11 titles = soup.select('section.content > h4 >a')
12 links = soup.select('section.content > h4 > a')
13 # 数据打印
14 if data is None:
15 for img, title, link in zip(imgs, titles, links):
16 data = {
17 'img': img.get('src'),
18 'title': title.get('title'),
19 'link': link.get('href')
20 }
21 print data
22
23 def get_more_pages(start, end):
24 for one in range(start, end):
25 get_page(url + str(one))
26 time.sleep(2) # 获取链接
27
28 get_more_pages(1, 10) # 调用函数
# -*-coding:utf-8-*-
from bs4 import BeautifulSoup
import requests, time
url = 'https://knewone.com/discover?page='
def get_page(url, data=None): # 获取页面
wb_data = requests.get(url)
soup = BeautifulSoup(wb_data.text, 'html.parser')
imgs = soup.select('a.cover-inner > img')
titles = soup.select('section.content > h4 >a')
links = soup.select('section.content > h4 > a')
# 数据打印
if data is None:
for img, title, link in zip(imgs, titles, links):
data = {
'img': img.get('src'),
'title': title.get('title'),
'link': link.get('href')
}
print data
def get_more_pages(start, end):
for one in range(start, end):
get_page(url + str(one))
time.sleep(2) # 获取链接
get_more_pages(1, 10) # 调用函数