import requests
import re
import time
from redis import Redis
import threading
REDIS_HOST, REDIS_PORT, PASSWORD = '192.168.2.51', '6379', 'mypwd'
rds = Redis(host=REDIS_HOST, port=REDIS_PORT, password=PASSWORD)
f, url_l, filter_replace_l = 'kwaddress_address_20180227.json', [], ['\n', '\t', ' ']
with open(f,'r',encoding='utf-8') as fr:
    for i in fr:
        try:
            ii =i.split('address":"')[-1].split('"}')[0]
            #url_l.append(ii)
            rds.sadd('chk_url_all', ii)
        except Exception as e:
            print(e)
去内存,持久化
import pymysql
from redis import Redis
import time
h, pt, u, p, db = '192.168.2.210', 3306, 'root', 'nfwt&2016', 'xl_product_DONOT_REMOVE'
def mysql_fetch(sql, res_type='tuple'):
    global h, pt, u, p, db
    try:
        conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8')
    except Exception as e:
        print(e)
        return ()
    if res_type == 'dic':
        cursor = conn.cursor(pymysql.cursors.DictCursor)
    else:
        cursor = conn.cursor()
    cursor.execute(sql)
    conn.commit()
    cursor.close()
    conn.close()
    return cursor.fetchall()
def mysql_write(sql):
    global h, pt, u, p, db
    try:
        conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8')
    except Exception as e:
        print(e)
        return 1
    cursor = conn.cursor()
    cursor.execute(sql)
    conn.commit()
    cursor.close()
    conn.close()
    return 0
REDIS_HOST, REDIS_PORT, PASSWORD = '192.168.2.51', '6379', 'mypwd'
rds = Redis(host=REDIS_HOST, port=REDIS_PORT, password=PASSWORD)
while True:
    s_404 = [i.decode('utf-8') for i in rds.smembers('chk_url_404')]
    for i in s_404:
        sql = 'SELECT id FROM chk_url_404 WHERE url="{}" '.format(i)
        try:
            r = mysql_fetch(sql)
            print(sql)
        except Exception as e:
            print(e)
            continue
        if len(r) == 0:
            sql = 'INSERT INTO chk_url_404 (url,indb_time) VALUES ("{}","{}")'.format(i, int(time.time()))
            try:
                mysql_write(sql)
                print(sql)
            except Exception as e:
                print(e)
                continue
    time.sleep(60 * 15)
多线程
import requests
import re
import time
from redis import Redis
import threading
REDIS_HOST, REDIS_PORT, PASSWORD = '192.168.2.51', '6379', 'mypwd'
rds = Redis(host=REDIS_HOST, port=REDIS_PORT, password=PASSWORD)
# f, url_l, filter_replace_l = 'DISTINCT_url.txt', [], ['\n', '\t', ' ']
# with open(f, 'r', encoding='utf-8') as fr:
#     for i in fr:
#         try:
#             for ii in filter_replace_l:
#                 i = i.replace(ii, '')
#             rds.sadd('chk_url_all', i)
#         except Exception as e:
#             print(e)
def tf():
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
    while True:
        try:
            url_ori = rds.spop('chk_url_all').decode('utf-8')
            s = 'http://'
            if s not in url_ori:
                url = '{}{}'.format(s, url_ori)
            print(url)
            r = requests.get(url, headers=headers, timeout=50)
            print(r.status_code)
            sc = r.status_code
            if sc == 404:
                print(sc)
                rds.sadd('chk_url_404', url_ori)
        except Exception as e:
            print(e)
class MyThread(threading.Thread):
    def __init__(self, func, args=None, name=None):
        threading.Thread.__init__(self)
        self.func, self.args, self.name = func, args, name
    def run(self):
        if self.args==None:
            self.func()
        else:
            self.func(self.args)
tl = []
for i in range(10):
    thread_instance = MyThread(tf)
    tl.append(thread_instance)
for t in tl:
    t.setDaemon = False
    t.start()
for t in tl:
    t.join()
发布代码至多台机器
redis   自典型  支持更新 
rds = return_redis(redis_key)
rds.hset(my_dict_name, uid, uid_info)