Python连接MongoDB
pip install pymongo
pymongo 官方文档: https://pymongo.readthedocs.io/en/stable/
建立连接
import pymongo
def get_db(database, host, port, user, pwd):
client = pymongo.MongoClient(host=host, port=port, username = user, password = pwd)
db = client[database]
return db
client = get_db("test_db1","192.168.4.63", 20001, "root", "mima123456,")
# 切换数据库
db = client ['test_db1']
result = client["mycol1"].find()
for r in result:
print(r)
完成增删改查
import pymongo
def get_db(database, host, port, user, pwd):
client = pymongo.MongoClient(host=host, port=port, username = user, password = pwd)
db = client[database]
return db
db = get_db("test_db1","192.168.4.63", 20001, "root", "mima123456,")
# 增删改查
# 增加数据
def add_one(table, data):
result = db[table].insert_one(data)
return result
def add_many(table, data_list):
result = db[table].insert_many(data_list)
return result.inserted_ids
def upd(table, condition, data):
data = {'hehe': 'hehe', 'meme': 'meme'}
# result = db[table].update_many(condition, {"$set": data})
result = db[table].update_many(condition, {'$set':data})
return result
def delete(table, condition):
result = db[table].remove(condition)
return result
if __name__ == '__main__':
# r = add_one("stu", {"name": "西瓜", "age":18})
# print(r.inserted_id)
# r = add_many("stu", [{"name": "嘎嘎"},{"name": "咔咔"}])
# print(r.inserted_ids)
# result = upd("stu", {"name": 99999}, {"age": 100})
# print(result)
result = delete("stu", {"name": "哈哈"})
print(result)
抓链家!!!
import requests
from lxml import etree
import pymongo
from concurrent.futures import ThreadPoolExecutor
def get_db(database, host, port, user, pwd):
client = pymongo.MongoClient(host=host, port=port, username = user, password = pwd)
db = client[database]
return db
db = get_db("test_db1","192.168.4.63", 20001, "root", "mima123456,")
def add_many(table, data_list):
result = db[table].insert_many(data_list)
return result.inserted_ids
def get_page_source(url):
resp = requests.get(url)
page_source = resp.text
return page_source
def parse_html(html):
tree = etree.HTML(html)
li_list = tree.xpath("//ul[@class='sellListContent']/li")
try:
lst = []
for li in li_list:
title = li.xpath("./div[1]/div[1]/a/text()")[0]
position_info = "-".join((s.strip() for s in li.xpath("./div[1]/div[2]/div/a/text()")))
temp = li.xpath("./div[1]/div[3]/div/text()")[0].split(" | ")
# 凑出来的数据. 可能会不对
if len(temp) == 6:
temp.insert(5, "")
elif len(temp) == 8:
temp.pop()
huxing, mianji, chaoxiang, zhangxiu, louceng, nianfen, jiegou = temp
guanzhu, fabushijian = li.xpath("./div[1]/div[4]/text()")[0].split(" / ")
tags = li.xpath("./div[1]/div[5]/span/text()")
data = {
"title": title,
"position": position_info,
"huxing": huxing,
"mianji": mianji,
"chaoxiang": chaoxiang,
"zhangxiu": zhangxiu,
"louceng": louceng,
"nianfen": nianfen,
"jiegou": jiegou,
"guanzhu": guanzhu,
"fabushijian": fabushijian,
"tags": tags
}
lst.append(data)
# 存入mongodb
add_many("ershoufang", lst)
except Exception as e:
print(e)
print(temp)
def main(url):
page_source = get_page_source(url)
parse_html(page_source)
if __name__ == '__main__':
with ThreadPoolExecutor(10) as t:
for i in range(1, 10):
url = f"https://bj.lianjia.com/ershoufang/pg{i}/"
t.submit(main, url)
浙公网安备 33010602011771号