import pymysql
import urllib3
from lxml import etree
import logging
import requests
import queue
import time
import threading
from threading import RLock
import re
thread_num = 0
lock = RLock()
import multiprocessing
# 添加日志
logging.basicConfig(
level=logging.INFO, # 定义输出到文件的log级别,大于此级别的都被输出
format='%(asctime)s %(filename)s %(levelname)s : %(message)s', # 定义输出log的格式
datefmt='%Y-%m-%d %H:%M:%S', # 时间
filename='druginfoError.log', # log文件名
filemode='a') # 写入模式“w”或“a”
class yaoyuan(object):
def __init__(self):
self.strat_record = 1
self.end_record = 10000001
# self.db = pymysql.connect(host='localhost', port=3306, database='druginfo', user='root', password='mysql', charset='utf8')
self.db = pymysql.connect(host='rm-bp195i4u0w1066u709o.mysql.rds.aliyuncs.com', port=3306, database='druginfo',
user='qygwroot', password='kangcenet@123', charset='utf8')
self.cursor = self.db.cursor()
self.parse_page()
def parse_page(self):
star_time = time.time()
threading_list = []
# pass
# print('这是一行mysql的测试数据')
mypool = multiprocessing.Pool(10)
# 并发10条进程
for i in range(1000):
# self.parse_page_data(i)
mypool.apply_async(self.parse_page_data, (i,))
# print(1)
# 关闭进程池
mypool.close()
# 阻塞等待
mypool.join()
times = time.time() - star_time
print(times)
def parse_page_data(self, a):
print(a)
# self.db = pymysql.connect(host='localhost', port=3306, database='druginfo', user='root', password='mysql', charset='utf8')
db = pymysql.connect(host='rm-bp195i4u0w1066u709o.mysql.rds.aliyuncs.com', port=3306, database='druginfo',
user='qygwroot', password='kangcenet@123', charset='utf8')
cursor = db.cursor()
drugsql = "insert into text(name) values('{}')"
drugsql_data = drugsql.format('这是一行mysql的测试数据')
# lock.acquire()
cursor.execute(drugsql_data)
db.commit()
# lock.release()
if __name__ == '__main__':
a = yaoyuan()
1.实例
import time
from multiprocessing import Process, Queue, Pool, Manager, Pipe
def producer(queue):
queue.put("a")
time.sleep(2)
def consumer(queue):
time.sleep(2)
data = queue.get()
print(data)
if __name__ == "__main__":
queue = Manager().Queue(10) # 进程之间的通讯
progress_list = Manager().list() # 进程之间的通讯
progress_dict = Manager().dict() # 进程之间的通讯
my_producer = Process(target=producer, args=(queue,))
my_consumer = Process(target=consumer, args=(queue,))
my_producer.start()
my_consumer.start()
my_producer.join()
my_consumer.join()
# multiprocessing中的queue不能用于pool进程池
# pool中的进程间通信需要使用manager中的queue
from queue import Queue # 线程间可以使用
from multiprocessing import Queue
from multiprocessing import Manager
def producer(queue):
queue.put("a")
time.sleep(2)
def consumer(queue):
time.sleep(2)
data = queue.get()
print(data)
if __name__ == "__main__":
queue = Manager().Queue(10)
pool = Pool(2)
for i in range(10):
pool.apply_async(producer, args=(queue,))
pool.apply_async(consumer, args=(queue,))
pool.close() # 先close才能在join 不然会报错
pool.join()