# coding=utf-8
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from pyquery import PyQuery as pg
import datetime
import time
# browser = webdriver.Chrome()
# browser.maximize_window() # 窗口最大化
# clear_cache(browser)
# browser.get('http://www.customs.go.th/statistic_report.php?show_search=1') # 在当前浏览器中访问百度
# wait = WebDriverWait(browser, 2) # 等待的最大时间
# browser.implicitly_wait(10)
#
# # print(browser.current_url)
# # 新开一个窗口,通过执行js来新开一个窗口
# js = 'window.open("http://www.baidu.com");'
# browser.execute_script(js)
# handles = browser.window_handles
# for item in handles:
# print(item)
# 'SD','TJ','CN'
country_codes = '''CC,CX,CO,KM,CG,CD,CK,CR,CI,HR,CU,CW,CY,CZ,DD,DK,DJ,DM,DO,TP,EC,EG,SV,GQ,ER,EE,ET,FK,FO,FJ,FI,YY,FR,FX,GF,PF,TF,GA,GM,GE,DE,GH,GI,GR,GL,GD,GP,GU,GT,GG,GN,GW,GY,HT,HM,HN,HK,HU,IS,IN,ID,IR,IQ,IE,IM,IL,IT,JM,JP,JE,JO,KZ,KE,KI,KP,KR,KW,KG,LA,LV,LB,LS,LR,LY,LI,LT,LU,MO,MK,MG,MW,MY,MV,ML,MT,MH,MQ,MR,MU,YT,MX,FM,MD,MC,MN,ME,MS,MA,MZ,MM,NA,NR,NP,NL,AN,NC,NZ,NI,NE,NG,NU,NF,MP,NO,OM,OT,PK,PW,PS,PA,PG,PY,PE,PH,PN,PL,PT,PR,QA,RE,RO,RU,RW,BL,SH,KN,LC,MF,PM,VC,WS,SM,ST,SA,SN,RS,CS,SC,SL,SG,SX,SK,SI,SB,SO,ZA,GS,SS,ES,LK,SR,SJ,SZ,SE,CH,SY,TW,TZ,TH,TL,TG,TK,TO,TT,TN,TR,TM,TC,TV,UG,UA,AE,GB,US,UM,UY,UZ,VU,VA,VE,VN,VG,VI,WF,EH,YE,YU,ZR,ZM,ZW,ZZ,FZ,EZ,AX,AF,AL,DZ,AS,AD,AO,AI,AQ,AG,AR,AM,AW,AU,AT,AZ,BS,BH,BD,BB,BY,BE,BZ,BJ,BM,BT,BO,BQ,BA,BW,BV,BR,IO,BN,BG,BF,BI,KH,CM,CA,CV,KY,CF,TD,CL'''
import requests
def getEqual11(hscode=None, imtype=None, year_parm=None, month_parm=None, country=''):
info = dict(imex_type=imtype, tariff_code=hscode, country_code=country, month=month_parm, year=year_parm)
print(info)
datas = requests.post('http://www.customs.go.th/statistic_report.php?show_search=1', data=info)
doc = pg(datas.text)
# print(datas.text)
table = doc('.table-responsive .table')
print(table.text())
childrens = (child for child in enumerate(table.children()[1]))
with open(r'27071000001.txt', 'a') as myfile:
for i, child in childrens:
# print("*" * 100)
# 遍历每行记录
if isinstance(child, str):
continue
tds = child.getchildren()
if len(tds) < 6:
continue
item = []
# 遍历每列字段
for index, td in enumerate(tds):
if index <= 1:
item.append(td.text)
else:
corrent = td.text.replace('\r', '').replace('\n', '').replace('\t', '')
item.append(corrent)
print(item)
# 国家编码简写 国家编码 出口还是进口 税则号编码 年 月 月quality 月cif 累计quality 累计cif
record = (item[0], item[1], imtype, hscode, year_parm, month_parm, str(item[2]).replace(",", ""),
item[3].replace(",", ""), item[4].replace(",", ""), item[5].replace(",", ""))
print(record)
myfile.write(
"{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\n".format(*record))
# yield record
def scraw_tailand_Data_11():
countrys = country_codes.split(',')
for country in countrys:
for year in range(2007, 2019):
start_month = 1
# if year == 2007 or year==2008:
# start_month = 7
# if year==2009:
# start_month=5
for month in range(start_month, 13):
if year == 2018 and month > 8:
break
else:
for type in ['import', 'export']:
time.sleep(5)
getEqual11('27071000001', type, str(year), str(month),str(country))
scraw_tailand_Data_11()