python json url转义相关
#coding=utf-8 import sys reload(sys) sys.setdefaultencoding('utf-8') from MyClient import MyClient import os,time,json,urllib import requests reload(sys) sys.setdefaultencoding('utf-8') ''' python CommentLoader.py comment python CommentLoader.py reply ''' sql = MyClient("10.40.73.12", 19030, 'comment', '123456', 'storage2') sql.connect() ''' cmd = 'insert into newtab(id, at) values(NULL, now()),(NULL, now());' r = sql.execute(cmd) print r cmd = 'select * from newtab limit 5 offset 5' r = sql.execute(cmd) print r ''' batch_size = 5000 offset_c = 0L offset_r = 0L offset_file_c = 'offset_c' offset_file_r = 'offset_r' domain = 'http://10.40.87.142:38850/' #domain = 'http://ip:port/' #domain = 'http://10.40.73.12:38851/' add_url_format = domain + 'comments?op=add&lang=%s&comment_ref_id=%s&author_id=%s' add_url = domain + 'comments?' reply_url_format = domain + 'comments?op=reply&lang=%s&comment_ref_id=%s&author_id=%s&comment_id=%d' reply_url = domain + 'comments?' like_url_format = domain + 'comments?op=manage_like_num&lang=%s&comment_ref_id=%s&author_id=setnumuser&comment_id=%d&set_num=%d' like_url_format2 = domain + 'comments?op=manage_like_num&lang=%s&comment_ref_id=%s&author_id=setnumuser&comment_id=%d&reply_id=%d&set_num=%d' def read_offset_from_file(fn): f = open(fn) line = f.readline() f.close() line = line.strip('\n') return int(line) def flush_offset_to_file(fn, offset): f = open(fn+'new', 'w') f.write(str(offset)) f.close() os.remove(fn) os.rename(fn+'new', fn) def write_new_id(id, new_id): global sql cmd = "update comment set new_id='%d' where id='%s'" % (new_id, id) sql.execute(cmd) def get_new_comment_id(id): global sql cmd = "select new_id from comment where id='%d'" % (id,) ret = sql.execute(cmd) if ret and len(ret) == 1: return ret[0][0] else: return None def napi_comment_map(comment_ref_id, lang, id, new_id): map_url = "http://10.40.87.141:38880/comments?op=napi_map&lang=%s&comment_ref_id=%s&comment_id=%s" % (lang,comment_ref_id,id) pl = '{"new_id": "%s"}' % (new_id,) r = requests.post(url=map_url, data=pl) if r.status_code != 200: print 'Failed to map %s %s %s %s' % (comment_ref_id,lang,id,new_id) else: print 'Success to map %s %s %s %s' % (comment_ref_id,lang,id,new_id) def napi_reply_map(comment_ref_id,lang,old_comment_id,old_id,comment_id,id): map_url = "http://10.40.87.141:38880/comments?op=napi_map&lang=%s&comment_ref_id=%s&comment_id=%s&reply_id=%s" % (lang,comment_ref_id,old_comment_id,old_id) pl = '{"new_id": "%s", "new_comment_id":"%s"}' % (id, comment_id) r = requests.post(url=map_url, data=pl) if r.status_code != 200: print 'Failed to map %s %s %s %s' % (comment_ref_id,lang,id,old_id) else: print 'Success to map %s %s %s %s' % (comment_ref_id,lang,id,old_id) def napi_msg(id): url = 'http://iflow-in.napi.ucweb.com/3/discuss/comments/'+ id +'?_app_id=a14ab4f776074435956a5819ec01ca40' r = requests.get(url) msg = None if(r.status_code == 200): o = r.json() msg = o['data']['message'] msg = msg.replace('\\','\\\\') return msg def napi_reply_msg(comment_ref_id, lang, comment_id, id): url = 'http://iflow-in.napi.ucweb.com/3/discuss/categories/'+lang+'/threads/'+comment_ref_id+'/comments/'+comment_id+'/replies?_app_id=a14ab4f776074435956a5819ec01ca40' r = requests.get(url) msg = None if(r.status_code == 200): o = r.json() for reply in o['data']: if reply['_id'] == id: msg = reply['message'] msg = msg.replace('\\','\\\\') break return msg def add_comment(id, created_at, comment): retry = False napimsg = None try: napimsg = json.loads(comment, strict=False) except ValueError, e: retry = True except Exception, e: print "loads error first" + id print e return if retry: try: comment = comment.replace('\\','\\\\') s = comment.find('"message": "') e = comment.find('", "', s) head = comment[:s] msg_val = comment[s+len('"message": "'):e] msg_val = msg_val.replace('"', '\\"') tail = comment[e:] new_comment = head + '"message": "' + msg_val + tail s = new_comment.find('"author": "') e = new_comment.find('", "', s) head = new_comment[:s] author_val = new_comment[s+len('"author": "'):e] author_val = author_val.replace('"', '\\"') tail = new_comment[e:] new_new = head + '"author": "' + author_val + tail print 'quoto ' + id napimsg = json.loads(new_new, strict=False) except: print "loads error " + id print comment,new_comment return if not 'extra' in napimsg: print "No extra:" + id return body = {} try: body['app_id'] = napimsg['app_id'] body['lang'] = napimsg['category'] body['comment_ref_id'] = napimsg['thread'] body['author'] = napimsg['author'] body['author_id'] = napimsg['author_id'] body['created_at'] = int(time.mktime(time.strptime(created_at, '%Y-%m-%d %H:%M:%S'))) body['extra'] = napimsg['extra'] body['message'] = napimsg['message'] if napimsg['message'].find('????') != -1: tmp = napi_msg(id) if tmp: print 'emoj ' + id body['message'] = tmp if not 'user_image' in napimsg['extra']: body['user_image'] = "" else: body['user_image'] = napimsg['extra']['user_image'] if not 'item_id' in napimsg['extra']: p = body['comment_ref_id'].find('_comment') body['item_id'] = body['comment_ref_id'][:p] else: body['item_id'] = napimsg['extra']['item_id'] if napimsg['status'] == 'approved': body['status'] = 1 else: return if 'from' in napimsg['extra']: body['user_type'] = napimsg['extra']['from'] #if 'ds' in napimsg['extra']: # body['ut_did'] = napimsg['extra']['ds'] body['utdid'] = napimsg['author_id'] body['floor_num'] = napimsg['floor_num'] except Exception as e: print e # call add interface #url = add_url_format % (body['lang'], body['comment_ref_id'], body['author_id']) p = {'op':'add','lang':body['lang'], 'comment_ref_id':body['comment_ref_id'], 'author_id':body['author_id']} p = urllib.urlencode(p) url = add_url + p rbody=json.dumps(body,ensure_ascii=False) #r = requests.post(url, data=rbody) rbody = rbody.replace("'", '\'"\'"\'') result = os.popen('curl -XPOST "%s" -s -w %%{http_code} -d \'%s\'' %(url,rbody)).read() status_code = result[-3:] resp = result[0:-3] #if r.status_code == 200: if status_code == '200': #o = r.json() o = json.loads(resp) new_id = o['data']['_id'] write_new_id(id, new_id) napi_comment_map(body['comment_ref_id'],body['lang'],id,str(new_id)) # set num of like if napimsg['like'] > 0: url = like_url_format % (body['lang'], body['comment_ref_id'], new_id, napimsg['like']) like_body = {'author':'setnumuser', 'author_id':'setnumuser', 'user_type':'admin', 'user_image':'http://alibaba-inc.com/admin.png'} r = requests.post(url, data=like_body) else: #print 'add comment %s failed %d' % (body['comment_ref_id'], r.status_code) print 'add comment %s %s failed %s' % (body['comment_ref_id'], id, status_code) print result def load_comment(): global offset_c global sql offset_c = read_offset_from_file(offset_file_c) while True: cmd = 'select id, created_at, entry from comment order by created_at asc limit %d offset %d;' % (batch_size, offset_c) r = sql.execute(cmd) for comment in r: add_comment(comment[0], comment[1], comment[2]) if len(r) < batch_size: print 'Catched up new data\n' time.sleep(5) offset_c = offset_c + len(r) flush_offset_to_file(offset_file_c, offset_c) return def add_reply(id, created_at, comment): retry = False napimsg = None try: napimsg = json.loads(comment, strict=False) except ValueError, e: retry = True except Exception, e: print "loads error first" + id print e return if retry: try: comment = comment.replace('\\','\\\\') s = comment.find('"message": "') e = comment.find('", "', s) head = comment[:s] msg_val = comment[s+len('"message": "'):e] msg_val = msg_val.replace('"', '\\"') tail = comment[e:] new_comment = head + '"message": "' + msg_val + tail s = new_comment.find('"author": "') e = new_comment.find('", "', s) head = new_comment[:s] author_val = new_comment[s+len('"author": "'):e] author_val = author_val.replace('"', '\\"') tail = new_comment[e:] new_new = head + '"author": "' + author_val + tail print 'quoto ' + id napimsg = json.loads(new_new, strict=False) except: print "loads error " + id print comment,new_comment return if not 'extra' in napimsg: print "No extra:" + id return body = {} old_comment_id = None try: body['app_id'] = napimsg['app_id'] body['lang'] = napimsg['category'] body['comment_ref_id'] = napimsg['thread'] body['author'] = napimsg['author'] body['author_id'] = napimsg['author_id'] body['created_at'] = int(time.mktime(time.strptime(created_at, '%Y-%m-%d %H:%M:%S'))) body['extra'] = napimsg['extra'] old_comment_id = napimsg['comment_id'] ## comment_id from reply msg is number. but comment mesg _id is string new_comment_id = get_new_comment_id(old_comment_id) if not new_comment_id: print 'Do not find comment %d for reply %s' % (old_comment_id,id) return body['comment_id'] = new_comment_id body['message'] = napimsg['message'] if napimsg['message'].find('????') != -1: tmp = napi_reply_msg(body['comment_ref_id'], body['lang'],str(old_comment_id), id) if tmp: print 'emoj ' + id body['message'] = tmp if not 'user_image' in napimsg['extra']: body['user_image'] = "" else: body['user_image'] = napimsg['extra']['user_image'] if not 'item_id' in napimsg['extra']: p = body['comment_ref_id'].find('_comment') body['item_id'] = body['comment_ref_id'][:p] else: body['item_id'] = napimsg['extra']['item_id'] if napimsg['status'] == 'approved': body['status'] = 1 else: print "Not approved " + id return if 'from' in napimsg['extra']: body['user_type'] = napimsg['extra']['from'] #if 'ds' in napimsg['extra']: # body['ut_did'] = napimsg['extra']['ds'] body['utdid'] = napimsg['author_id'] #floor_num except Exception as e: print e # call reply interface #url = reply_url_format % (body['lang'], body['comment_ref_id'], body['author_id'], int(body['comment_id'])) p = {'op':'reply','lang':body['lang'], 'comment_ref_id':body['comment_ref_id'], 'author_id':body['author_id'], 'comment_id':body['comment_id']} p = urllib.urlencode(p) url = reply_url + p rbody=json.dumps(body,ensure_ascii=False) #r = requests.post(url, data=rbody) rbody = rbody.replace("'", '\'"\'"\'') result = os.popen('curl -XPOST "%s" -s -w %%{http_code} -d \'%s\'' %(url,rbody)).read() status_code = result[-3:] resp = result[0:-3] #if r.status_code != 200: if status_code != '200': #print 'reply comment %s failed %d' % (body['comment_ref_id'], r.status_code) print 'reply comment %s failed %s' % (body['comment_ref_id'], status_code) else: print 'reply comment ok %s' % (id,) # set num of like o = json.loads(resp) new_id = o['data']['_id'] napi_reply_map(body['comment_ref_id'],body['lang'],old_comment_id,id,body['comment_id'],str(new_id)) if napimsg['like'] > 0: #o = r.json() url = like_url_format2 % (body['lang'], body['comment_ref_id'], int(body['comment_id']),new_id, napimsg['like']) like_body = {'author':'setnumuser', 'author_id':'setnumuser', 'user_type':'admin', 'user_image':'http://alibaba-inc.com/admin.png'} r = requests.post(url, data=like_body) def load_reply(): global offset_r global sql offset_r = read_offset_from_file(offset_file_r) while True: cmd = 'select id, created_at, entry from reply order by created_at asc limit %d offset %d;' % (batch_size, offset_r) r = sql.execute(cmd) for reply in r: add_reply(reply[0], reply[1], reply[2]) if len(r) < batch_size: print 'Catched up new data\n' time.sleep(3) offset_r = offset_r + len(r) flush_offset_to_file(offset_file_r, offset_r) return if __name__ == '__main__': if sys.argv[1] == 'comment': load_comment() elif sys.argv[1] == 'reply': load_reply() else: print 'Load comment or reply' global sql sql.close()