python json url转义相关

#coding=utf-8
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
from MyClient import MyClient
import os,time,json,urllib
import requests

reload(sys)
sys.setdefaultencoding('utf-8')


'''
python CommentLoader.py comment
python CommentLoader.py reply
'''


sql = MyClient("10.40.73.12", 19030, 'comment', '123456', 'storage2')
sql.connect()

'''
cmd = 'insert into newtab(id, at) values(NULL, now()),(NULL, now());'
r = sql.execute(cmd)
print r

cmd = 'select * from newtab limit 5 offset 5'
r = sql.execute(cmd)
print r
'''
batch_size = 5000
offset_c = 0L
offset_r = 0L
offset_file_c = 'offset_c'
offset_file_r = 'offset_r'
domain = 'http://10.40.87.142:38850/'
#domain = 'http://ip:port/'
#domain = 'http://10.40.73.12:38851/'
add_url_format = domain + 'comments?op=add&lang=%s&comment_ref_id=%s&author_id=%s'
add_url = domain + 'comments?'
reply_url_format = domain + 'comments?op=reply&lang=%s&comment_ref_id=%s&author_id=%s&comment_id=%d'
reply_url = domain + 'comments?'
like_url_format = domain + 'comments?op=manage_like_num&lang=%s&comment_ref_id=%s&author_id=setnumuser&comment_id=%d&set_num=%d'
like_url_format2 = domain + 'comments?op=manage_like_num&lang=%s&comment_ref_id=%s&author_id=setnumuser&comment_id=%d&reply_id=%d&set_num=%d'

def read_offset_from_file(fn):
    f = open(fn)
    line = f.readline() 
    f.close()
    line = line.strip('\n')
    return int(line)

def flush_offset_to_file(fn, offset):
    f = open(fn+'new', 'w')
    f.write(str(offset))
    f.close()
    os.remove(fn)
    os.rename(fn+'new', fn)


def write_new_id(id, new_id):
    global sql
    cmd = "update comment set new_id='%d' where id='%s'" % (new_id, id)
    sql.execute(cmd)

def get_new_comment_id(id):
    global sql
    cmd = "select new_id from comment where id='%d'" % (id,)
    ret = sql.execute(cmd)
    if ret and len(ret) == 1:
        return ret[0][0]
    else:
        return None

def napi_comment_map(comment_ref_id, lang, id, new_id):
    map_url = "http://10.40.87.141:38880/comments?op=napi_map&lang=%s&comment_ref_id=%s&comment_id=%s" % (lang,comment_ref_id,id)
    pl = '{"new_id": "%s"}' % (new_id,)
    r = requests.post(url=map_url, data=pl)
    if r.status_code != 200:
        print 'Failed to map %s %s %s %s' % (comment_ref_id,lang,id,new_id)
    else:
        print 'Success to map %s %s %s %s' % (comment_ref_id,lang,id,new_id)


def napi_reply_map(comment_ref_id,lang,old_comment_id,old_id,comment_id,id):
    map_url = "http://10.40.87.141:38880/comments?op=napi_map&lang=%s&comment_ref_id=%s&comment_id=%s&reply_id=%s" % (lang,comment_ref_id,old_comment_id,old_id)
    pl = '{"new_id": "%s", "new_comment_id":"%s"}' % (id, comment_id)
    r = requests.post(url=map_url, data=pl)
    if r.status_code != 200:
        print 'Failed to map %s %s %s %s' % (comment_ref_id,lang,id,old_id)
    else:
        print 'Success to map %s %s %s %s' % (comment_ref_id,lang,id,old_id)

def napi_msg(id):
    url = 'http://iflow-in.napi.ucweb.com/3/discuss/comments/'+ id +'?_app_id=a14ab4f776074435956a5819ec01ca40'
    r = requests.get(url)
    msg = None
    if(r.status_code == 200):
        o = r.json()
        msg = o['data']['message']
        msg = msg.replace('\\','\\\\')
    return msg

def napi_reply_msg(comment_ref_id, lang, comment_id, id):
    url = 'http://iflow-in.napi.ucweb.com/3/discuss/categories/'+lang+'/threads/'+comment_ref_id+'/comments/'+comment_id+'/replies?_app_id=a14ab4f776074435956a5819ec01ca40'
    r = requests.get(url)
    msg = None
    if(r.status_code == 200):
        o = r.json()
        for reply in o['data']:
            if reply['_id'] == id:
                msg = reply['message']
                msg = msg.replace('\\','\\\\')
                break
    return msg

def add_comment(id, created_at, comment):
    retry = False
    napimsg = None
    try:
        napimsg = json.loads(comment, strict=False)
    except ValueError, e:
        retry = True
    except Exception, e:
        print "loads error first" + id 
        print e
        return
    if retry:
        try:
            comment = comment.replace('\\','\\\\')
            s = comment.find('"message": "')
            e = comment.find('", "', s)
            head = comment[:s]
            msg_val = comment[s+len('"message": "'):e] 
            msg_val = msg_val.replace('"', '\\"')
            tail = comment[e:]
            new_comment = head + '"message": "' + msg_val + tail

            s = new_comment.find('"author": "')
            e = new_comment.find('", "', s)
            head = new_comment[:s]
            author_val = new_comment[s+len('"author": "'):e]
            author_val = author_val.replace('"', '\\"')
            tail = new_comment[e:]
            new_new = head + '"author": "' + author_val + tail

            print 'quoto ' + id
            napimsg = json.loads(new_new, strict=False)
        except:
            print "loads error " + id
            print comment,new_comment
            return
    if not 'extra' in napimsg:
        print "No extra:" + id
        return

    body = {}
    try:
        body['app_id'] = napimsg['app_id']
        body['lang'] = napimsg['category']
        body['comment_ref_id'] = napimsg['thread']
        body['author'] = napimsg['author']
        body['author_id'] = napimsg['author_id']
        body['created_at'] = int(time.mktime(time.strptime(created_at, '%Y-%m-%d %H:%M:%S'))) 
        body['extra'] = napimsg['extra']
        body['message'] = napimsg['message']
        if napimsg['message'].find('????') != -1:
            tmp = napi_msg(id) 
            if tmp:
                print 'emoj ' + id
                body['message'] = tmp
        if not 'user_image' in napimsg['extra']:
            body['user_image'] = ""
        else:
            body['user_image'] = napimsg['extra']['user_image']
        if not 'item_id' in napimsg['extra']:
            p = body['comment_ref_id'].find('_comment')
            body['item_id'] = body['comment_ref_id'][:p]
        else:
            body['item_id'] = napimsg['extra']['item_id']
        if napimsg['status'] == 'approved':
            body['status'] = 1
        else:
            return
        if 'from' in napimsg['extra']:
            body['user_type'] = napimsg['extra']['from']
        #if 'ds' in napimsg['extra']:
        #    body['ut_did'] = napimsg['extra']['ds']
        body['utdid'] = napimsg['author_id']
        body['floor_num'] = napimsg['floor_num']
    except Exception as e:
        print e
    # call add interface
    #url = add_url_format % (body['lang'], body['comment_ref_id'], body['author_id'])
    p = {'op':'add','lang':body['lang'], 'comment_ref_id':body['comment_ref_id'], 'author_id':body['author_id']}
    p = urllib.urlencode(p)
    url = add_url + p
    rbody=json.dumps(body,ensure_ascii=False)
    #r = requests.post(url, data=rbody)
    rbody = rbody.replace("'", '\'"\'"\'')
    result = os.popen('curl -XPOST "%s" -s -w %%{http_code} -d \'%s\'' %(url,rbody)).read()
    status_code = result[-3:]
    resp = result[0:-3]
    #if r.status_code == 200:
    if status_code == '200':
        #o = r.json()
        o = json.loads(resp)
        new_id = o['data']['_id']
        write_new_id(id, new_id)
        napi_comment_map(body['comment_ref_id'],body['lang'],id,str(new_id))
        # set num of like
        if napimsg['like'] > 0:
            url = like_url_format % (body['lang'], body['comment_ref_id'], new_id, napimsg['like'])
            like_body = {'author':'setnumuser', 'author_id':'setnumuser', 'user_type':'admin', 'user_image':'http://alibaba-inc.com/admin.png'}
            r = requests.post(url, data=like_body)
    else:
        #print 'add comment %s failed %d' % (body['comment_ref_id'], r.status_code)
        print 'add comment %s  %s failed %s' % (body['comment_ref_id'], id, status_code)
        print result

def load_comment():
    global offset_c
    global sql
    offset_c = read_offset_from_file(offset_file_c)
    while True:
        cmd = 'select id, created_at, entry from comment order by created_at asc limit %d offset %d;' % (batch_size, offset_c)
        r = sql.execute(cmd)
        for comment in r:
            add_comment(comment[0], comment[1], comment[2])
        if len(r) < batch_size:
            print 'Catched up new data\n'
            time.sleep(5)
        offset_c = offset_c + len(r)
        flush_offset_to_file(offset_file_c, offset_c)
    return

def add_reply(id, created_at, comment):
    retry = False
    napimsg = None
    try:
        napimsg = json.loads(comment, strict=False)
    except ValueError, e:
        retry = True
    except Exception, e:
        print "loads error first" + id
        print e
        return
    if retry:
        try:
            comment = comment.replace('\\','\\\\')
            s = comment.find('"message": "')
            e = comment.find('", "', s)
            head = comment[:s]
            msg_val = comment[s+len('"message": "'):e]
            msg_val = msg_val.replace('"', '\\"')
            tail = comment[e:]
            new_comment = head + '"message": "' + msg_val + tail
            s = new_comment.find('"author": "')
            e = new_comment.find('", "', s)
            head = new_comment[:s]
            author_val = new_comment[s+len('"author": "'):e]
            author_val = author_val.replace('"', '\\"')
            tail = new_comment[e:]
            new_new = head + '"author": "' + author_val + tail
            print 'quoto ' + id
            napimsg = json.loads(new_new, strict=False)
        except:
            print "loads error " + id
            print comment,new_comment
            return

    if not 'extra' in napimsg:
        print "No extra:" + id
        return

    body = {}
    old_comment_id = None
    try:
        body['app_id'] = napimsg['app_id']
        body['lang'] = napimsg['category']
        body['comment_ref_id'] = napimsg['thread']
        body['author'] = napimsg['author']
        body['author_id'] = napimsg['author_id']
        body['created_at'] = int(time.mktime(time.strptime(created_at, '%Y-%m-%d %H:%M:%S'))) 
        body['extra'] = napimsg['extra']
        old_comment_id = napimsg['comment_id'] ## comment_id from reply msg is number. but comment mesg _id is string
        new_comment_id = get_new_comment_id(old_comment_id)
        if not new_comment_id:
            print 'Do not find comment %d for reply %s' % (old_comment_id,id)
            return
        body['comment_id'] = new_comment_id
        body['message'] = napimsg['message']
        if napimsg['message'].find('????') != -1:
            tmp = napi_reply_msg(body['comment_ref_id'], body['lang'],str(old_comment_id), id) 
            if tmp:
                print 'emoj ' + id
                body['message'] = tmp
        if not 'user_image' in napimsg['extra']:
            body['user_image'] = ""
        else:
            body['user_image'] = napimsg['extra']['user_image']
        if not 'item_id' in napimsg['extra']:
            p = body['comment_ref_id'].find('_comment')
            body['item_id'] = body['comment_ref_id'][:p]
        else:
            body['item_id'] = napimsg['extra']['item_id']
        if napimsg['status'] == 'approved':
            body['status'] = 1
        else:
            print "Not approved " + id
            return
      
        if 'from' in napimsg['extra']:
            body['user_type'] = napimsg['extra']['from']
        #if 'ds' in napimsg['extra']:
        #    body['ut_did'] = napimsg['extra']['ds']
        body['utdid'] = napimsg['author_id']
        #floor_num
    except Exception as e:
        print e

    # call reply interface
    #url = reply_url_format % (body['lang'], body['comment_ref_id'], body['author_id'], int(body['comment_id']))
    p = {'op':'reply','lang':body['lang'], 'comment_ref_id':body['comment_ref_id'], 'author_id':body['author_id'], 'comment_id':body['comment_id']}
    p = urllib.urlencode(p)
    url = reply_url + p

    rbody=json.dumps(body,ensure_ascii=False)
    #r = requests.post(url, data=rbody)
    rbody = rbody.replace("'", '\'"\'"\'')
    result = os.popen('curl -XPOST "%s" -s -w %%{http_code} -d \'%s\'' %(url,rbody)).read()
    status_code = result[-3:]
    resp = result[0:-3]
    #if r.status_code != 200:
    if status_code != '200':
        #print 'reply comment %s failed %d' % (body['comment_ref_id'], r.status_code)
        print 'reply comment %s failed %s' % (body['comment_ref_id'], status_code)
    else:
        print 'reply comment ok  %s' % (id,)
        # set num of like
        o = json.loads(resp)
        new_id = o['data']['_id']
        napi_reply_map(body['comment_ref_id'],body['lang'],old_comment_id,id,body['comment_id'],str(new_id))
        if napimsg['like'] > 0:
            #o = r.json()
            url = like_url_format2 % (body['lang'], body['comment_ref_id'], int(body['comment_id']),new_id, napimsg['like'])
            like_body = {'author':'setnumuser', 'author_id':'setnumuser', 'user_type':'admin', 'user_image':'http://alibaba-inc.com/admin.png'}
            r = requests.post(url, data=like_body)
        

def load_reply():
    global offset_r
    global sql
    offset_r = read_offset_from_file(offset_file_r)
    while True:
        cmd = 'select id, created_at, entry from reply order by created_at asc limit %d offset %d;' % (batch_size, offset_r)
        r = sql.execute(cmd)
        for reply in r:
            add_reply(reply[0], reply[1], reply[2])
        if len(r) < batch_size:
            print 'Catched up new data\n'
            time.sleep(3)
        offset_r = offset_r + len(r)
        flush_offset_to_file(offset_file_r, offset_r)
    return

if __name__ == '__main__':
    if sys.argv[1] == 'comment':
        load_comment()
    elif sys.argv[1] == 'reply':
        load_reply()
    else:
        print 'Load comment or reply'

    global sql
    sql.close()

  

posted on 2017-08-22 14:22  不忘初衷,方能致远  阅读(1850)  评论(0)    收藏  举报

导航