import re
'''
1.用正则表达式判断邮箱是否输入正确
2.用正则表达式识别出全部的电话号码(这里用的是固话模式)
3.用正则表达式进行英文单词的分词
'''
def em_match(e):
try:
c = re.findall(r'[0-9a-zA-Z][0-9a-zA-z\_]*\@[a-z]+\.[c,o,m,n]+', str(e))
if c == e:
print('Success!')
else:
print('Wrong layout')
except Exception :
print('Wrong layout')
pass
def tel_match(n):
try:
nn = re.findall(r'[0-9]{3,5}-[0-9]{6,8}', str(n))[0]
if nn == n:
print('Success!')
else:
print('Wrong layout')
except Exception :
print('Wrong layout')
pass
def word_split(mm):
mess = re.split(r"\s", str(mm))
print(mess)
e = input("请输入您的email:")
em_match(e)
n = input("请输入您的电话:")
tel_match(n)
m = '''
Five score years ago, a great American, in whose symbolic shadow we stand today, signed the Emancipation Proclamation.
This momentous decree came as a great beacon light of hope to millions of Negro slaves who had been seared in the
flames of withering injustice. It came as a joyous daybreak to end the long night of bad captivity.
'''
word_split(m)
import re
import requests
from bs4 import BeautifulSoup
def get_ncoding(url):
coding = re.findall(r'_[0-9]+/(\d+)\.html', str(url))[0]
return str(coding)
def click_count(num):
c_url = 'http://oa.gzcc.cn/api.php?op=count&id=' + str(num) + '&modelid=80'
# print(c_url)
resc = requests.get(c_url)
resc.encoding = 'utf-8'
soupc = BeautifulSoup(resc.text, 'html.parser')
# print(soupc)
count = re.findall(r'.html\(\'(\d+)\'\);', str(soupc))[-1]
# print(count)
return str(count)
def get_new_details(url):
res = requests.get(url)
res.encoding = 'utf-8'
soup = BeautifulSoup(res.text, 'html.parser')
title = soup.select('.show-title')[0].get_text()
info = soup.select('.show-info')[0].get_text()
content = soup.select('.show-content')[0].get_text().strip()
return title, info, content
if __name__ == '__main__':
n_url = 'http://news.gzcc.cn/html/2018/xiaoyuanxinwen_0404/9183.html'
num_news = int(get_ncoding(n_url))
cl_times = int(click_count(num_news))
tit = get_new_details(n_url)[0]
inf = get_new_details(n_url)[1]
cont = get_new_details(n_url)[2]
print(num_news, '\n', cl_times, '\n', tit, '\n', inf, '\n', cont)