文章编辑器 文本替换 操作dom 发帖 富文本 今日头条发布富文本的实现 键盘化的html


js 修改 iframe
it=document.getElementById('ueditor_0').contentWindow.document.getElementsByTagName("body")[0];
it.innerHTML='<li ><img src="https://www.baidu.com/img/baidu_jgylogo3.gif" alt="qq_383" title="qq_3503"></li><li >qq_3203</li><li >2017年04月13日 10:02</li><li ><span>3074</span></li>'
from selenium import webdriver from time import sleep import time from selenium.webdriver.common.keys import Keys import os import requests import time import threading import logging import random start_time = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())) os_sep = os.sep this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[ -1] logf = this_file_name + '.log' try: logging.basicConfig(level=logging.INFO, format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]', datefmt='%a, %d %b %Y %H:%M:%S', filename=logf, filemode='a') except Exception as e: s = '%s%s%s' % ('logging.basicConfig EXCEPTION ', time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), e) with open(logf, 'a') as fo: fo.write(s) print(s) os._exit(4002) logging.info('START') img_url = 'https://s3.pstatp.com/toutiao/static/img/logo.201f80d.png' img_dir = 'C:\\Users\\sas\\PycharmProjects\\py_win_to_unix\\crontab_chk_url\\personas\\trunk\\plugins\\spider\\dl_img_tmp\\' def spider_webimg_dl_return_local_img_path(img_dir, img_url, local_default='default.DONOT_REMOVE.png'): r = '%s%s' % (img_dir, local_default) try: bytes = requests.get(img_url)._content r = '%s%s%s%s%s' % ( img_dir, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), str(threading.get_ident()), img_url.replace('/', '_xl_').replace(':', '_fxl_').replace('?', '_fxlquestion_').replace('=', '_fxlequal_').replace( '&', '_fxland_'), '.png') if bytes != 0: with open(r, 'wb')as f: f.write(bytes) except Exception as e: print(e) return r import pymysql h, pt, u, p, db = '192.168.2.210', 3306, 'root', 'joke_', 'star_media_helper' def mysql_fetch(sql, res_type='tuple'): global h, pt, u, p, db try: conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8') except Exception as e: print(e) return () if res_type == 'dic': cursor = conn.cursor(pymysql.cursors.DictCursor) else: cursor = conn.cursor() cursor.execute(sql) conn.commit() cursor.close() conn.close() return cursor.fetchall() def mysql_write(sql): global h, pt, u, p, db try: conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8') except Exception as e: print(e) return 1 cursor = conn.cursor() cursor.execute(sql) conn.commit() cursor.close() conn.close() return 0 import random while True: logging.info('LOOP----') sql = 'SELECT username,password,toutiaoid FROM joke__star_helper_namepwd WHERE status=1 AND category=1 AND id>236 AND NOT (toutiaoid IS NULL OR toutiaoid="" )' sql = 'SELECT username,password,toutiaoid FROM joke__star_helper_namepwd WHERE status=1 AND category=1 AND id=7856582 AND NOT (toutiaoid IS NULL OR toutiaoid="" )' res = mysql_fetch(sql) ac_l = [{'u': i[0], 'p': i[1], 'toutiao_uid': i[2]} for i in res] for ac in ac_l: myid, mypwd, toutiao_uid = ac['u'], ac['p'], ac['toutiao_uid'] # 发布限制条件逻辑 sql = "SELECT * FROM joke__star_helper_relation_wukong_question WHERE INSTR(CONCAT(',',id_toutiao_uid_list,','),CONCAT(',','{}',',')) AND time_effective<={} ORDER BY id DESC; ".format( toutiao_uid, int(time.time())); sql = "SELECT * FROM joke__helper_article_publish WHERE INSTR(CONCAT(',',id_toutiao_uid_list,','),CONCAT(',','{}',',')) AND time_effective<={} ORDER BY id DESC; ".format( toutiao_uid, int(time.time())); print(sql) logging.info(sql) res_content = mysql_fetch(sql, 'dic') if len(res_content) == 0: continue id_article_list = [i['id_article_list'] for i in res_content] sql = 'SELECT * FROM joke__helper_article WHERE id IN ({}) AND id NOT IN (SELECT article_id FROM joke__helper_article_publish_result WHERE 1 AND toutiao_uid="{}" ) LIMIT 2; '.format( ','.join([i['id_article_list'] for i in res_content]), toutiao_uid) # sql = 'SELECT * FROM joke__star_helper_wukong_question WHERE id NOT IN (SELECT toutiao_uid FROM joke__star_helper_toutiaouser_wukong_question) LIMIT 1' logging.info(sql) res_content = mysql_fetch(sql, 'dic') if len(res_content) == 0: continue browser = webdriver.Chrome() f_url_l = ['https://www.toutiao.com/group/1589657566362638/', 'https://www.wukong.com/question/6388670742287876353/', 'https://www.wukong.com/tag/6215497898671475202/'] f_url_l += ['https://www.wukong.com/question/6512777037948649741/', 'https://www.wukong.com/question/6469247721038414093/', 'https://www.wukong.com/question/6481502080249889037/'] # f_url_l = [] f_url_l = ['https://www.toutiao.com/a6514526304476332552/', 'https://www.toutiao.com/a6514661446876398088/', 'https://www.toutiao.com/a6514778729951003150/'] f_url_l += ['https://www.toutiao.com/a6514216125151052291/', 'https://www.toutiao.com/a6512315164463727111/', 'https://www.toutiao.com/a6513334304318161411/'] f_url_l_a = f_url_l[int(time.time()) % len(f_url_l)] # browser.get(random.choice(f_url_l)) browser.get(f_url_l_a) time.sleep(random.randint(10, 20)) js = 'window.location.href="https://sso.toutiao.com/login/";' js = 'window.location.href="https://sso.toutiao.com/login/?service=https://mp.toutiao.com/sso_confirm/?redirect_url=/";' browser.execute_script(js) time.sleep(random.randint(10, 20)) # js = 'window.location.href="https://sso.toutiao.com/login/?service=https%3A%2F%2Fwww.wukong.com%2Fwenda%2Fwelcome%2F#type=0";' browser.execute_script(js) ac_type = 'qq' if ac_type == 'qq': myid, mypwd = ac['u'], ac['p'] xp = '/html/body/div/div/div[2]/div/div/div/ul/li[3]' browser.find_element_by_xpath(xp).click() time.sleep(10) js = '%s%s%s' % ('document.getElementById("u").value="', myid, '"') browser.execute_script(js) js = '%s%s%s' % ('document.getElementById("p").value="', mypwd, '"') browser.execute_script(js) time.sleep(random.randint(5, 15)) xp_newpage = '//*[@id="go"]' browser.find_element_by_xpath(xp_newpage).click() time.sleep(random.randint(10, 20)) elif ac_type == 'mail_qq': continue time.sleep(5) browser.refresh() js = 'window.location.href="https://www.toutiao.com/";' browser.execute_script(js) browser.refresh() time.sleep(6) js = 'window.location.href="https://www.wukong.com/";' js = 'window.location.href="https://mp.toutiao.com/profile_v2/publish/";' js = 'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";' browser.execute_script(js) time.sleep(6) # title js = '%s%s%s' % ('document.getElementById("title").value="', '林志玲捐款记录被翻出 单笔高达千万', '"') js = 'document.getElementById("title").value="{}"'.format('林志玲捐款记录被翻出 单笔高达千万') browser.execute_script(js) time.sleep(2) fhtml, dbhtml_str = 'toutaio.db.html', '' with open(fhtml, 'r', encoding='utf-8') as fr: for hi in fr: dbhtml_str = '{}{}'.format(dbhtml_str, hi.replace('\n', '')) db_html = dbhtml_str # db_html = '<li ><img src="https://www.baidu.com/img/baidu_jgylogo3.gif" alt="qq_383" title="qq_3503"></li><li >qq_3203</li><li >2017年04月13日 10:02</li><li ><span>3074</span></li>' js = 'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"'.format( db_html) browser.execute_script(js) time.sleep(2) xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]' xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]' browser.find_element_by_xpath(xp).click() dd = 9 # js = 'document.getElementsByClassName("ask")[0].click();' # browser.execute_script(js) # time.sleep(12) # time.sleep(random.randint(10, 20)) # # 需要键盘事件 反爬虫 # tmp_target = browser.find_element_by_class_name('input-box').find_element_by_tag_name('input') # tmp_target.send_keys(Keys.SPACE) # tmp_target.send_keys(Keys.CONTROL, 'a') # tmp_target.send_keys(Keys.CONTROL, 'x') # tmp_target.send_keys(Keys.CONTROL, 'v') # tmp_target.send_keys(Keys.BACK_SPACE) # time.sleep(random.randint(10, 20)) # res_content = [] for i in res_content[0:1]: dbid, content, img_list = i['id'], i['content'], i['img_list'] tmp_l = ['口红', '指甲油', '护发素', '沐浴露', '洗手液', '洗发水', '牙膏'] tmp_l_1 = ['老人', '小孩', '白领', '前台妹子', '行政妹子', '大学生', '高中生'] tmp_l_2 = ['类型', '特质', '种类', '价位', '原材料', '主要成分', '价格'] s = '{}{}{}{}{}{}{}'.format(str(random.randint(1, 12)), '月份,', random.choice(tmp_l_1), '适合使用什么', random.choice(tmp_l_2), '的', random.choice(tmp_l)) js = 'document.getElementsByClassName("input-box")[0].childNodes[0].value="{}";'.format(s) browser.execute_script(js) time.sleep(12) # # tmp_target.send_keys(Keys.SPACE) js = 'document.getElementsByClassName("step-btn next")[0].click();' browser.execute_script(js) # step-btn submit js = 'document.getElementsByClassName("step-btn submit")[0].click();' browser.execute_script(js) time.sleep(12) # js = 'window.location.href="https://www.wukong.com/user/?uid={}&type=1";'.format(toutiao_uid) browser.execute_script(js) time.sleep(12) res_url = browser.find_element_by_class_name('question-title').find_elements_by_tag_name('a')[ 0].get_attribute('href') # print(i) # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/textarea' # try: # browser.find_element_by_xpath(xp_newpage) # except Exception as e: # print(e) # break # browser.find_element_by_xpath(xp_newpage).click() # words = content # # Message: SyntaxError: unterminated string literal # mytxt = words.replace('\n', ' ').replace('\r', ' ').replace('\\br', ' ').replace('"', '“').replace("'", '‘') # # Message: SyntaxError: missing ; before statement # mytxt = mytxt.replace("'", '‘') # # 2000 头条 # mytxt = mytxt[0:2000] # mytxt = '好消息' if len(mytxt.replace(' ', '')) == 0 else mytxt # # # 需要键盘事件 反爬虫 # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE) # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'a') # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'x') # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'v') # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.BACK_SPACE) # time.sleep(random.randint(2, 5)) # # try: # # js = '%s%s%s' % ('document.getElementsByTagName("textarea")[0].value="', '', '"') # # browser.execute_script(js) # js = '%s%s%s' % ('document.getElementsByTagName("textarea")[0].value="', mytxt, '"') # browser.execute_script(js) # time.sleep(3) # except Exception as jse: # print('.getElementsByTagName("textarea")--log-', jse) # continue # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE) # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[1]/span[1]/span' # browser.find_element_by_xpath(xp_newpage).click() # time.sleep(3) # try: # upload = browser.find_element_by_id('fileElem') # # logs_img = '' # img_url_list = img_list.split(',') # # for imgid in img_url_list: # img_url = 'http://192.168.2.212:83/file/get?type=star_helper&id=199'.replace('199', str(imgid)) # local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url, # local_default='default.DONOT_REMOVE.png') # print(local_img_path) # time.sleep(random.randint(2, 4)) # logs_img += img_url # logs_img += local_img_path # upload.send_keys(local_img_path) # time.sleep(random.randint(3, 7)) # except Exception as ee: # img_url_default = '' # img_url = img_url_default # local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url, # local_default='default.DONOT_REMOVE.png') # sleep(2) # logs_img += img_url # logs_img += local_img_path # # upload.send_keys(local_img_path) # logging.exception(ee) # # try: # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/ul' # browser.find_element_by_xpath(xp_newpage).click() # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[2]/a' # browser.find_element_by_xpath(xp_newpage).click() # # time.sleep(random.randint(8, 20)) # js = 'document.getElementsByClassName("ugc-mode-content")[0].getElementsByTagName("a")[0].target="_self"' # browser.execute_script(js) # # time.sleep(random.randint(2, 5)) # xp_newpage = '/html/body/div/div[2]/div[2]/div[2]/ul/li[1]/div/div[2]/div/div[2]/a' # browser.find_element_by_xpath(xp_newpage).click() # time.sleep(random.randint(3, 6)) # url_curr = browser.current_url # # with open('toutiao_success.log', 'a', encoding='utf-8') as f: # logs = '%s%s%s%s%s\n' % ( # time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), ac_type, myid[0:4], mytxt, # logs_img) # print(logs) # f.write(logs) sql = 'INSERT INTO joke__helper_article_publish_result (article_id,article_url,time_script,toutiao_uid) VALUE("%s","%s","%s","%s");' % ( dbid, res_url, int(time.time()), toutiao_uid) mysql_write(sql) print(sql) time.sleep(random.randint(20, 30)) js = 'window.location.href="https://www.wukong.com/"' js = 'window.location.href="https://www.toutiao.com/"' browser.execute_script(js) # except Exception as e_url_jump: # print('e_url_jump', e_url_jump) try: browser.quit() except Exception as e1: print(e1) logging.exception(e1) time.sleep(random.randint(120, 300))
xp_newpage = '//*[@id="title"]'
mytxt = '林志玲捐款记录被翻出 单笔高达千万'
browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
browser.find_element_by_xpath(xp_newpage).send_keys(mytxt)
<div class='article' id='artibody'>
<div class='img_wrapper'>
<img alt='林志玲' src='http://n.sinaimg.cn/ent/transform/703/w253h450/20180416/77p2-fzcyxmv1344655.jpg'>
<span class='img_descr'>林志玲</span>
</div>
<div class='img_wrapper'>
<img alt='林志玲捐款记录' src='http://n.sinaimg.cn/ent/transform/250/w600h450/20180416/CVWm-fzcyxmv1342897.jpg'>
<span class='img_descr'>林志玲捐款记录</span>
</div>
<div class='img_wrapper'>
<img alt='林志玲捐款记录' src='http://n.sinaimg.cn/ent/transform/250/w600h450/20180416/hXMn-fzcyxmv1342914.jpg'>
<span class='img_descr'>林志玲捐款记录</span>
</div> <!--video-list-->
<div class='video-2017' id='videoList0'></div>
<!--/video-list-->
<p> 新浪娱乐讯 据台湾媒体报道,林志玲
自出道以来,热心公益,甚至创立了自己的基金会,每年固定发行公益年历。近日明星从事公益的话题发烧,她也被网友挖出,几乎每个月都在转帐捐款,且其中一笔高达1000万人民币,更让网友惊呼连连。
</p>
<div id='ad_44124' class='otherContent_01'
style='display: block; margin: 10px 20px 10px 0px; float: left; overflow: hidden; clear: both; padding: 4px; width: 300px; height: 250px;'>
</div>
<p>
林志玲被网友翻出,2016年至2018年间的捐款纪录,几乎每个月都有记录,且最低都是人民币万元起跳,其中甚至有一笔高达1000万人民币,捐款项目是“筑巢行动”,不少人看到明细,都惊讶表示,原来志玲姐姐私下默默捐了这么多善款,还有人笑称:“她是不是拿着手机,无聊就转帐的那种人?”、“真的人美心也美”、“太圈粉了”、“志玲姐姐真的太低调了”。</p>
<p>
43岁的林志玲1998年出道,从伸展台转战影视圈,尚未出名前就热心公益,更在2011年,主动以个人名义,成立“志玲姐姐慈善基金会”。她也固定每年拍摄公益写真年历,所得全数捐给儿福机构,或是帮助弱势孩童急难救助等,多年从不间断,且义卖期间,她从不公开做宣传,低调行善,受到不少人赞赏。</p>
<p>
林志玲2016年受访曾透露,投入公益的契机,是因为身边罹癌友人的一句话,才让她下定决心。当时这位好友问她:“你希望离开后,怎样被大家记得?”她想了一想,认为既然是公众人物,就应该让大家记得自己微笑的样子,要用这样的身分,做些有影响力的事,从此将公益当做自我赋予的使命,一做就是好多年。ETtoday/文</p>
<p class='article-editor'>(责编:kita)</p>
<div style='font-size: 0px; height: 0px; clear: both;'></div>
</div>
<!-- 非定向300*250按钮 end -->
</div>
from selenium import webdriver
from time import sleep
import time
from selenium.webdriver.common.keys import Keys
import os
import requests
import time
import threading
import logging
import random
start_time = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time()))
os_sep = os.sep
this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[
-1]
logf = this_file_name + '.log'
try:
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]',
datefmt='%a, %d %b %Y %H:%M:%S',
filename=logf,
filemode='a')
except Exception as e:
s = '%s%s%s' % ('logging.basicConfig EXCEPTION ', time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), e)
with open(logf, 'a') as fo:
fo.write(s)
print(s)
os._exit(4002)
logging.info('START')
img_url = 'https://s3.pstatp.com/toutiao/static/img/logo.201f80d.png'
img_dir = 'C:\\Users\\sas\\PycharmProjects\\py_win_to_unix\\crontab_chk_url\\personas\\trunk\\plugins\\spider\\dl_img_tmp\\'
def spider_webimg_dl_return_local_img_path(img_dir, img_url, local_default='default.DONOT_REMOVE.png'):
r = '%s%s' % (img_dir, local_default)
try:
bytes = requests.get(img_url)._content
r = '%s%s%s%s%s' % (
img_dir, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), str(threading.get_ident()),
img_url.replace('/', '_xl_').replace(':', '_fxl_').replace('?', '_fxlquestion_').replace('=',
'_fxlequal_').replace(
'&', '_fxland_'), '.png')
if bytes != 0:
with open(r, 'wb')as f:
f.write(bytes)
except Exception as e:
print(e)
return r
import pymysql
h, pt, u, p, db = '192.168.2.210', 3306, 'root', 'joke', 'star_media_joke'
def mysql_fetch(sql, res_type='tuple'):
global h, pt, u, p, db
try:
conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8')
except Exception as e:
print(e)
return ()
if res_type == 'dic':
cursor = conn.cursor(pymysql.cursors.DictCursor)
else:
cursor = conn.cursor()
cursor.execute(sql)
conn.commit()
cursor.close()
conn.close()
return cursor.fetchall()
def mysql_write(sql):
global h, pt, u, p, db
try:
conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8')
except Exception as e:
print(e)
return 1
cursor = conn.cursor()
cursor.execute(sql)
conn.commit()
cursor.close()
conn.close()
return 0
import random
while True:
logging.info('LOOP----')
sql = 'SELECT username,password,toutiaoid FROM joke_star_joke_joke_namepwd WHERE status=1 AND category=1 AND id>236 AND NOT (toutiaoid IS NULL OR toutiaoid="" )'
sql = 'SELECT username,password,toutiaoid FROM joke_star_joke_namepwd WHERE status=1 AND category=1 AND id=7856582 AND NOT (toutiaoid IS NULL OR toutiaoid="" )'
res = mysql_fetch(sql)
ac_l = [{'u': i[0], 'p': i[1], 'toutiao_uid': i[2]} for i in res]
for ac in ac_l:
myid, mypwd, toutiao_uid = ac['u'], ac['p'], ac['toutiao_uid']
# 发布限制条件逻辑
sql = "SELECT * FROM joke_star_joke_relation_wukong_question WHERE INSTR(CONCAT(',',id_toutiao_uid_list,','),CONCAT(',','{}',',')) AND time_effective<={} ORDER BY id DESC; ".format(
toutiao_uid, int(time.time()));
sql = "SELECT * FROM joke_joke_article_publish WHERE INSTR(CONCAT(',',id_toutiao_uid_list,','),CONCAT(',','{}',',')) AND time_effective<={} ORDER BY id DESC; ".format(
toutiao_uid, int(time.time()));
print(sql)
logging.info(sql)
res_content = mysql_fetch(sql, 'dic')
if len(res_content) == 0:
continue
id_article_list = [i['id_article_list'] for i in res_content]
sql = 'SELECT * FROM joke_joke_article WHERE id IN ({}) AND id NOT IN (SELECT article_id FROM joke_joke_article_publish_result WHERE 1 AND toutiao_uid="{}" ) LIMIT 2; '.format(
','.join([i['id_article_list'] for i in res_content]), toutiao_uid)
# sql = 'SELECT * FROM joke_star_joke_wukong_question WHERE id NOT IN (SELECT toutiao_uid FROM joke_star_joke_toutiaouser_wukong_question) LIMIT 1'
logging.info(sql)
res_content = mysql_fetch(sql, 'dic')
if len(res_content) == 0:
continue
browser = webdriver.Chrome()
f_url_l = ['https://www.toutiao.com/group/1589657566362638/',
'https://www.wukong.com/question/6388670742287876353/',
'https://www.wukong.com/tag/6215497898671475202/']
f_url_l += ['https://www.wukong.com/question/6512777037948649741/',
'https://www.wukong.com/question/6469247721038414093/',
'https://www.wukong.com/question/6481502080249889037/']
# f_url_l = []
f_url_l = ['https://www.toutiao.com/a6514526304476332552/', 'https://www.toutiao.com/a6514778729951003150/']
f_url_l += ['https://www.toutiao.com/a6514216125151052291/', 'https://www.toutiao.com/a6512315164463727111/',
'https://www.toutiao.com/a6513334304318161411/']
f_url_l_a = f_url_l[int(time.time()) % len(f_url_l)]
# browser.get(random.choice(f_url_l))
browser.get(f_url_l_a)
time.sleep(random.randint(10, 20))
js = 'window.location.href="https://sso.toutiao.com/login/";'
js = 'window.location.href="https://sso.toutiao.com/login/?service=https://mp.toutiao.com/sso_confirm/?redirect_url=/";'
browser.execute_script(js)
time.sleep(random.randint(10, 20))
# js = 'window.location.href="https://sso.toutiao.com/login/?service=https%3A%2F%2Fwww.wukong.com%2Fwenda%2Fwelcome%2F#type=0";'
browser.execute_script(js)
ac_type = 'qq'
if ac_type == 'qq':
myid, mypwd = ac['u'], ac['p']
xp = '/html/body/div/div/div[2]/div/div/div/ul/li[3]'
browser.find_element_by_xpath(xp).click()
time.sleep(10)
js = '%s%s%s' % ('document.getElementById("u").value="', myid, '"')
browser.execute_script(js)
js = '%s%s%s' % ('document.getElementById("p").value="', mypwd, '"')
browser.execute_script(js)
time.sleep(random.randint(5, 15))
xp_newpage = '//*[@id="go"]'
browser.find_element_by_xpath(xp_newpage).click()
time.sleep(random.randint(10, 20))
elif ac_type == 'mail_qq':
continue
time.sleep(5)
browser.refresh()
js = 'window.location.href="https://www.toutiao.com/";'
browser.execute_script(js)
browser.refresh()
time.sleep(6)
js = 'window.location.href="https://www.wukong.com/";'
js = 'window.location.href="https://mp.toutiao.com/profile_v2/publish/";'
js = 'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";'
browser.execute_script(js)
time.sleep(6)
# title
# js = '%s%s%s' % ('document.getElementById("title").value="', '林志玲捐款记录被翻出 单笔高达千万', '"')
# js = 'document.getElementById("title").value="{}"'.format('林志玲捐款记录被翻出 单笔高达千万')
# browser.execute_script(js)
xp_newpage = '//*[@id="title"]'
mytxt = '林志玲捐款记录被翻出 单笔高达千万'
browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
browser.find_element_by_xpath(xp_newpage).send_keys(mytxt)
time.sleep(2)
fhtml, dbhtml_str = 'toutaio.db.html', ''
with open(fhtml, 'r', encoding='utf-8') as fr:
for hi in fr:
dbhtml_str = '{}{}'.format(dbhtml_str, hi.replace('\n', ''))
xp = '//*[@id="edui18_body"]/div[1]'
# //*[@id="edui18_body"]/div[1]
browser.find_element_by_xpath(xp).click()
time.sleep(2)
# //*[@id="images"]/div[1]/div
xp = '//*[@id="images"]/div[1]/div'
xp = '//*[@id="images"]/div[1]/div/span'
browser.find_element_by_xpath(xp).click()
time.sleep(1)
db_html = dbhtml_str
# db_html = '<li ><img src="https://www.baidu.com/img/baidu_jgylogo3.gif" alt="qq_383" title="qq_3503"></li><li >qq_3203</li><li >2017年04月13日 10:02</li><li ><span>3074</span></li>'
js = 'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"'.format(
db_html)
browser.execute_script(js)
xp = '//*[@id="edui18_body"]/div[1]'
# //*[@id="edui18_body"]/div[1]
browser.find_element_by_xpath(xp).click()
time.sleep(2)
# //*[@id="images"]/div[1]/div
xp = '//*[@id="images"]/div[1]/div'
xp = '//*[@id="images"]/div[1]/div/span'
browser.find_element_by_xpath(xp).click()
time.sleep(1)
# xp='//*[@id="graphic"]/div/div/div[2]/div[2]/div[1]/div[2]/div/div/div[2]/div[1]/div[2]/i'
# browser.find_element_by_xpath(xp)
# xp='//*[@id="pgc-text-img"]/div/div[1]/div[1]'
# browser.find_element_by_xpath(xp)
xp = '//*[@id="pgc-text-img"]/div/div[2]/div/button[1]'
browser.find_element_by_xpath(xp)
xp = '//*[@id="graphic"]/div/div/div[2]/div[2]/div[1]/div[2]/div/div/div/div/label[3]/div/input'
browser.find_element_by_xpath(xp)
time.sleep(2)
time.sleep(2)
xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]'
xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]'
browser.find_element_by_xpath(xp).click()
dd = 9
# js = 'document.getElementsByClassName("ask")[0].click();'
# browser.execute_script(js)
# time.sleep(12)
# time.sleep(random.randint(10, 20))
# # 需要键盘事件 反爬虫
# tmp_target = browser.find_element_by_class_name('input-box').find_element_by_tag_name('input')
# tmp_target.send_keys(Keys.SPACE)
# tmp_target.send_keys(Keys.CONTROL, 'a')
# tmp_target.send_keys(Keys.CONTROL, 'x')
# tmp_target.send_keys(Keys.CONTROL, 'v')
# tmp_target.send_keys(Keys.BACK_SPACE)
# time.sleep(random.randint(10, 20))
# res_content = []
for i in res_content[0:1]:
dbid, content, img_list = i['id'], i['content'], i['img_list']
tmp_l = ['口红', '指甲油', '护发素', '沐浴露', '洗手液', '洗发水', '牙膏']
tmp_l_1 = ['老人', '小孩', '白领', '前台妹子', '行政妹子', '大学生', '高中生']
tmp_l_2 = ['类型', '特质', '种类', '价位', '原材料', '主要成分', '价格']
s = '{}{}{}{}{}{}{}'.format(str(random.randint(1, 12)), '月份,', random.choice(tmp_l_1), '适合使用什么',
random.choice(tmp_l_2), '的', random.choice(tmp_l))
js = 'document.getElementsByClassName("input-box")[0].childNodes[0].value="{}";'.format(s)
browser.execute_script(js)
time.sleep(12)
#
# tmp_target.send_keys(Keys.SPACE)
js = 'document.getElementsByClassName("step-btn next")[0].click();'
browser.execute_script(js)
# step-btn submit
js = 'document.getElementsByClassName("step-btn submit")[0].click();'
browser.execute_script(js)
time.sleep(12)
#
js = 'window.location.href="https://www.wukong.com/user/?uid={}&type=1";'.format(toutiao_uid)
browser.execute_script(js)
time.sleep(12)
res_url = browser.find_element_by_class_name('question-title').find_elements_by_tag_name('a')[
0].get_attribute('href')
# print(i)
# xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/textarea'
# try:
# browser.find_element_by_xpath(xp_newpage)
# except Exception as e:
# print(e)
# break
# browser.find_element_by_xpath(xp_newpage).click()
# words = content
# # Message: SyntaxError: unterminated string literal
# mytxt = words.replace('\n', ' ').replace('\r', ' ').replace('\\br', ' ').replace('"', '“').replace("'", '‘')
# # Message: SyntaxError: missing ; before statement
# mytxt = mytxt.replace("'", '‘')
# # 2000 头条
# mytxt = mytxt[0:2000]
# mytxt = '好消息' if len(mytxt.replace(' ', '')) == 0 else mytxt
#
# # 需要键盘事件 反爬虫
# browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
# # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'a')
# # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'x')
# # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'v')
# # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.BACK_SPACE)
# time.sleep(random.randint(2, 5))
#
# try:
# # js = '%s%s%s' % ('document.getElementsByTagName("textarea")[0].value="', '', '"')
# # browser.execute_script(js)
# js = '%s%s%s' % ('document.getElementsByTagName("textarea")[0].value="', mytxt, '"')
# browser.execute_script(js)
# time.sleep(3)
# except Exception as jse:
# print('.getElementsByTagName("textarea")--log-', jse)
# continue
#
# browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
# xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[1]/span[1]/span'
# browser.find_element_by_xpath(xp_newpage).click()
# time.sleep(3)
# try:
# upload = browser.find_element_by_id('fileElem')
#
# logs_img = ''
# img_url_list = img_list.split(',')
#
# for imgid in img_url_list:
# img_url = 'http://192.168.2.212:83/file/get?type=star_joke&id=199'.replace('199', str(imgid))
# local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url,
# local_default='default.DONOT_REMOVE.png')
# print(local_img_path)
# time.sleep(random.randint(2, 4))
# logs_img += img_url
# logs_img += local_img_path
# upload.send_keys(local_img_path)
# time.sleep(random.randint(3, 7))
# except Exception as ee:
# img_url_default = ''
# img_url = img_url_default
# local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url,
# local_default='default.DONOT_REMOVE.png')
# sleep(2)
# logs_img += img_url
# logs_img += local_img_path
# # upload.send_keys(local_img_path)
# logging.exception(ee)
#
# try:
# xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/ul'
# browser.find_element_by_xpath(xp_newpage).click()
# xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[2]/a'
# browser.find_element_by_xpath(xp_newpage).click()
#
# time.sleep(random.randint(8, 20))
# js = 'document.getElementsByClassName("ugc-mode-content")[0].getElementsByTagName("a")[0].target="_self"'
# browser.execute_script(js)
#
# time.sleep(random.randint(2, 5))
# xp_newpage = '/html/body/div/div[2]/div[2]/div[2]/ul/li[1]/div/div[2]/div/div[2]/a'
# browser.find_element_by_xpath(xp_newpage).click()
# time.sleep(random.randint(3, 6))
# url_curr = browser.current_url
#
# with open('toutiao_success.log', 'a', encoding='utf-8') as f:
# logs = '%s%s%s%s%s\n' % (
# time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), ac_type, myid[0:4], mytxt,
# logs_img)
# print(logs)
# f.write(logs)
sql = 'INSERT INTO joke_joke_article_publish_result (article_id,article_url,time_script,toutiao_uid) VALUE("%s","%s","%s","%s");' % (
dbid, res_url, int(time.time()), toutiao_uid)
mysql_write(sql)
print(sql)
time.sleep(random.randint(20, 30))
js = 'window.location.href="https://www.wukong.com/"'
js = 'window.location.href="https://www.toutiao.com/"'
browser.execute_script(js)
# except Exception as e_url_jump:
# print('e_url_jump', e_url_jump)
try:
browser.quit()
except Exception as e1:
print(e1)
logging.exception(e1)
time.sleep(random.randint(120, 300))
<img onload="editor.fireEvent('contentchange')" src="https://p1.pstatp.com/large/pgc-image/152385934210854ceb909ec" alt="pgc-image/152385934210854ceb909ec" _src="https://p1.pstatp.com/large/pgc-image/152385934210854ceb909ec" buttonadded="true">

'''
<img onload="editor.fireEvent('contentchange')" src="https://p1.pstatp.com/large/pgc-image/15238623686755f9e3c409a" _src="https://p1.pstatp.com/large/pgc-image/15238623686755f9e3c409a" alt="pgc-image/15238623686755f9e3c409a" buttonadded="true">
'''
dbhtml_str_ = dbhtml_str
img_n = dbhtml_str_.count('<img')
s = '<img onload="editor.fireEvent(\'contentchange\')" src="https://p1.pstatp.com/large/pgc-image/TTimgCode" _src="https://p1.pstatp.com/large/pgc-image/TTimgCode" alt="pgc-image/TTimgCode" buttonadded="true">'
s = '<img onload="editor.fireEvent(\'contentchange\')" src="https://p1.pstatp.com/large/pgc-image/TTimgCode" _src="https://p1.pstatp.com/large/pgc-image/TTimgCode" alt="pgc-image/TTimgCode" buttonadded="true">'
# s = "<img onload='editor.fireEvent(\'contentchange\')' src='https://p1.pstatp.com/large/pgc-image/TTimgCode' _src='https://p1.pstatp.com/large/pgc-image/TTimgCode' alt='pgc-image/TTimgCode' buttonadded='true'>"
ss = ''
l = pgc_img_url_l_toutiao
for i in range(img_n):
if i == 0:
p1 = dbhtml_str.index('<img ', 0)
else:
p1 = dbhtml_str.index('<img ', p1 + 3)
tmp = '{}{}'.format(dbhtml_str[0:p1].replace('>', 'X'), dbhtml_str[p1:])
p2 = tmp.index('>')
ss = s.replace('TTimgCode', l[0].split('/')[-1])
dbhtml_str = '{}{}{}'.format(dbhtml_str[0:p1], ss, dbhtml_str[p2 + 1:])
del l[0]
print('-----------------')
print(dbhtml_str)
time.sleep(2)
js = 'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";'
browser.execute_script(js)
time.sleep(6)
xp_newpage = '//*[@id="title"]'
mytxt = d['title']
browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
browser.find_element_by_xpath(xp_newpage).send_keys(mytxt)
time.sleep(2)
# SAVE NOT DEL
"""
xp = '//*[@id="edui18_body"]/div[1]'
# //*[@id="edui18_body"]/div[1]
browser.find_element_by_xpath(xp).click()
time.sleep(2)
xp = '//*[@id="images"]/div[1]/div/span'
browser.find_element_by_xpath(xp).click()
time.sleep(3)
'''
'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"'.format(dbhtml_str.replace('onload="editor.fireEvent(\'contentchange\')"','').replace('"',"'").replace('\n',''))
'''
# 结合浏览器控制台,拼接符合语法的js字符串
r_d = {'onload="editor.fireEvent(\'contentchange\')"': '', '"': "'", '\n': ''}
dbhtml_str_py_js = dbhtml_str
for k in r_d:
dbhtml_str_py_js = dbhtml_str_py_js.replace(k, r_d[k])
dbhtml_str_py_js = dbhtml_str_py_js.replace('nbsp;', ' ')
js = 'document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0].innerHTML="{}"'.format(
dbhtml_str_py_js)
browser.execute_script(js)
"""
# 激活编辑区
browser.find_element_by_class_name('ql-container').click()
# 进行上传图片圆圈操作
browser.find_element_by_class_name('icon-pic_tool').click()
# 激活目标上传口
browser.find_element_by_class_name('tui-tab-list').find_elements_by_class_name('tui-tab')[-1].click()
# 关闭上传口
browser.find_element_by_class_name('tui-tab-panel-active').find_elements_by_class_name('tui-btn')[
-1].click()
# 结合浏览器控制台,拼接符合语法的js字符串
r_d = {'onload="editor.fireEvent(\'contentchange\')"': '', '"': "'", '\n': ''}
dbhtml_str_py_js = dbhtml_str
for k in r_d:
dbhtml_str_py_js = dbhtml_str_py_js.replace(k, r_d[k])
dbhtml_str_py_js = dbhtml_str_py_js.replace('nbsp;', ' ')
#传入键盘化的html
# document.getElementsByClassName("ql-editor ql-blank")[0].innerHTML = "44"
js = 'document.getElementsByClassName("ql-editor ql-blank")[0].innerHTML ="{}"'.format(
dbhtml_str_py_js)
browser.execute_script(js)

浙公网安备 33010602011771号