Python: download file
def unGz(file_name):
"""
ungz zip file import gzip
:param file_name:
:return:
"""
f_name = file_name.replace(".gz", "")
#获取文件的名称,去掉
g_file = gzip.GzipFile(file_name)
#创建gzip对象
open(f_name, "w+").write(g_file.read())
#gzip对象用read()打开后,写入open()建立的文件里。
g_file.close()
#关闭gzip对象
def unZip(file_name):
"""
unzip zip file import zipfile
:param file_name:
:return:
"""
zip_file = zipfile.ZipFile(file_name)
if os.path.isdir(file_name + "_files"):
pass
else:
os.mkdir(file_name + "_files")
for names in zip_file.namelist():
zip_file.extract(names,file_name + "_files/")
zip_file.close()
def getLink(url:str):
"""
:param url:
:return:
"""
headers = {
'Accept-Language': 'zh-CN,zh;q=0.9,en-CN;q=0.8,en;q=0.7,zh-TW;q=0.6',
'Cookie': 'rewardsn=; wxtokenkey=777',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'
}
# 访问链接并从json中提取微信推文链接
response = requests.get(url, headers=headers)
data = json.loads(response.text)
link = data['getalbum_resp']['article_list'][0]['url']
return link
def getZipUrl(link:str):
"""
:param link:
:return:
"""
# 访问微信推文链接并解析网页
response = requests.get(link)
soup = BeautifulSoup(response.text, 'html.parser')
# 提取文本中的zip链接,正则匹配以https://开头以.zip后缀的链接
content = soup.find('div', {'id': 'js_content'}).get_text()
zipurl = re.findall(r'https://.*?\.zip', content)
return zipurl
def requestsDownload(url:str,newfile:str):
"""
:param url:
:param newfile:
:return:
"""
content = requests.get(url).content
with open(newfile, 'wb') as file:
file.write(content)
def print_hi(name):
# Use a breakpoint in the code line below to debug your script.
print(f'Hi, {name} world,geovindu,涂聚文') # Press Ctrl+F8 to toggle the breakpoint.
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
#print_hi('PyCharm,python language')
# 从微信推文json数据中获得最新一期IP库的发布文章链接
url = 'https://mp.weixin.qq.com/mp/appmsgalbum?__biz=Mzg3Mzc0NTA3NA==&action=getalbum&album_id=2329805780276838401&f=json'
downurl=""
try:
link = getLink(url)
if link:
zip_url = getZipUrl(link)
if len(zip_url)>0:
for i in range(0,len(zip_url)):
downurl=zip_url[0]
print(zip_url[i])
else:
print("没有找到zip链接")
else:
print("没有找到微信推文链接")
except Exception as e:
print("出现错误:", e)
requestsDownload(downurl,"geovindu.zip")
哲学管理(学)人生, 文学艺术生活, 自动(计算机学)物理(学)工作, 生物(学)化学逆境, 历史(学)测绘(学)时间, 经济(学)数学金钱(理财), 心理(学)医学情绪, 诗词美容情感, 美学建筑(学)家园, 解构建构(分析)整合学习, 智商情商(IQ、EQ)运筹(学)生存.---Geovin Du(涂聚文)
浙公网安备 33010602011771号