Python连接MongoDB

python处理MongoDB首选就是pymongo. 首先, 安装一下这个模块

pip install pymongo

 

pymongo 官方文档: https://pymongo.readthedocs.io/en/stable/

 

建立连接

import pymongo

def get_db(database, host, port, user, pwd):
    client = pymongo.MongoClient(host=host, port=port, username = user, password = pwd)
    db = client[database]
    return db

client = get_db("test_db1","192.168.4.63", 20001, "root", "mima123456,")
# 切换数据库
db = client ['test_db1']
result = client["mycol1"].find()
for r in result:
    print(r)

 

完成增删改查

import pymongo

def get_db(database, host, port, user, pwd):
    client = pymongo.MongoClient(host=host, port=port, username = user, password = pwd)
    db = client[database]
    return db

db = get_db("test_db1","192.168.4.63", 20001, "root", "mima123456,")
# 增删改查
# 增加数据
def add_one(table, data):
    result = db[table].insert_one(data)
    return result

def add_many(table, data_list):
    result = db[table].insert_many(data_list)
    return result.inserted_ids

def upd(table, condition, data):
    data = {'hehe': 'hehe', 'meme': 'meme'}
    # result = db[table].update_many(condition, {"$set": data})
    result = db[table].update_many(condition, {'$set':data})
    return result

def delete(table, condition):
    result = db[table].remove(condition)
    return result

if __name__ == '__main__':
    # r = add_one("stu", {"name": "西瓜", "age":18})
    # print(r.inserted_id)
    # r = add_many("stu", [{"name": "嘎嘎"},{"name": "咔咔"}])
    # print(r.inserted_ids)
    # result = upd("stu", {"name": 99999}, {"age": 100})
    # print(result)
    result = delete("stu", {"name": "哈哈"})
    print(result)

 

抓链家!!!

import requests
from lxml import etree
import pymongo
from concurrent.futures import ThreadPoolExecutor

def get_db(database, host, port, user, pwd):
    client = pymongo.MongoClient(host=host, port=port, username = user, password = pwd)
    db = client[database]
    return db

db = get_db("test_db1","192.168.4.63", 20001, "root", "mima123456,")

def add_many(table, data_list):
    result = db[table].insert_many(data_list)
    return result.inserted_ids

def get_page_source(url):
    resp = requests.get(url)
    page_source = resp.text
    return page_source

def parse_html(html):
    tree = etree.HTML(html)
    li_list = tree.xpath("//ul[@class='sellListContent']/li")
    try:
        lst = []
        for li in li_list:
            title = li.xpath("./div[1]/div[1]/a/text()")[0]
            position_info = "-".join((s.strip() for s in li.xpath("./div[1]/div[2]/div/a/text()")))

            temp = li.xpath("./div[1]/div[3]/div/text()")[0].split(" | ")
            # 凑出来的数据. 可能会不对
            if len(temp) == 6:
                temp.insert(5, "")
            elif len(temp) == 8:
                temp.pop()
            huxing, mianji, chaoxiang, zhangxiu, louceng, nianfen, jiegou = temp
            guanzhu, fabushijian = li.xpath("./div[1]/div[4]/text()")[0].split(" / ")
            tags = li.xpath("./div[1]/div[5]/span/text()")

            data = {
                "title": title,
                "position": position_info,
                "huxing": huxing,
                "mianji": mianji,
                "chaoxiang": chaoxiang,
                "zhangxiu": zhangxiu,
                "louceng": louceng,
                "nianfen": nianfen,
                "jiegou": jiegou,
                "guanzhu": guanzhu,
                "fabushijian": fabushijian,
                "tags": tags
            }
            lst.append(data)
        # 存入mongodb
        add_many("ershoufang", lst)
    except Exception as e:
        print(e)
        print(temp)

def main(url):
    page_source = get_page_source(url)
    parse_html(page_source)

if __name__ == '__main__':
    with ThreadPoolExecutor(10) as t:
        for i in range(1, 10):
            url = f"https://bj.lianjia.com/ershoufang/pg{i}/"
            t.submit(main, url)

 

posted @ 2022-05-27 00:23  屠魔的少年  阅读(5)  评论(0)    收藏  举报