1 import json
2 from collections import Counter
3 from json import JSONDecodeError
4
5 import mysql
6 import requests
7 from lxml import etree
8
9 # 定义远程 SVG 文件的 URL
10 file = r'D:\tmp_files\jmx\0919_3568.txt'
11 data_to_insert=[]
12 with open(file, 'r', encoding='utf-8') as file:
13 # 逐行读取文件
14 for line in file:
15 try:
16 # print(line.strip()) # 使用 strip() 去掉每行末尾的换行符
17 url = line.strip()
18 response = requests.get(url)
19 start_index = url.rfind('/') + 1 # 从最后一个 '/' 开始
20 end_index = url.find('_', start_index) # 找到第一个 '_'
21 # 提取子字符串
22 pos_id = url[start_index:end_index]
23 print("pos_id===", pos_id)
24 # 发送 HTTP GET 请求获取 SVG 文件内容
25 response = requests.get(url)
26 # 打印响应内容(XML 格式)
27
28 svg_content = response.content # 使用 .content 获取字节类型的内容
29 # print(svg_content)
30 # print(svg_content.decode('utf-8')) # 打印内容
31 # 将字节类型的 SVG 内容解析为 XML 解析树
32 root = etree.fromstring(svg_content)
33 # print(svg_content)
34 # print(root.xpath('//@class'))
35 class_items = root.xpath('//@class')
36 dataTitle_items = root.xpath('//@data-title')
37 dictx = {}
38 counter = Counter(class_items)
39 store_btn_info = {}
40 for obj, count in counter.items():
41 # print(f"{obj}: {count}; ")
42 store_btn_info[obj] = count
43 btn_table = store_btn_info.get("btn_table")
44 btn_poster = store_btn_info.get("btn_poster")
45 btn_mainEntry = store_btn_info.get("btn_mainEntry")
46 # print(btn_table,btn_poster,btn_mainEntry)
47 btn_poster_elements = root.xpath("//*[@class='btn_poster']")
48 btn_table_elements = root.xpath("//*[@class='btn_table']")
49 btn_mainEntry_elements = root.xpath("//*[@class='btn_mainEntry']")
50
51 dictx1 = {}
52 btn_poster_title_list = []
53 for element in btn_poster_elements:
54 class_attr = str(element.get('class'))
55 data_title = str(element.get('data-title'))
56 # btn_poster_title_list.append(data_title)
57 data_pictures = json.loads(str(element.get('data-pictures')).replace("'", '"'))
58 # 打印提取到的属性
59 # print(f"class_attr: {class_attr}",f"data-title: {data_title}",f"data-pictures: {data_pictures}")
60 picture = data_pictures.get("picture")
61 backPicture = data_pictures.get("backPicture")
62 if picture != '' and backPicture != '':
63 dictx1[data_title] = [picture, backPicture]
64 btn_poster_title_list.append(str(data_title + '-E'))
65 btn_poster_title_list.append(str(data_title + '-I'))
66
67 elif picture != '':
68 dictx1[data_title] = [picture]
69 btn_poster_title_list.append(str(data_title + '-E'))
70 elif backPicture != '':
71 dictx1[data_title] = [backPicture]
72 btn_poster_title_list.append(str(data_title + '-I'))
73 # print("-"*100)
74 # print(dictx1)
75 # print("-"*100)
76 btn_poster = sum([len(v) for v in dictx1.values()])
77 # print(sum([ len(v) for v in dictx1.values()]))
78 btn_poster_title_list.sort()
79
80 dictx2 = {}
81 btn_table_dataId_list = []
82 for element in btn_table_elements:
83 class_attr = element.get('class')
84 data_table_id = str(element.get('data-table-id')).replace(' ', '')
85 # print(data_table_id)
86 btn_table_dataId_list.append(data_table_id)
87 data_title = str(element.get('data-title'))
88 data_sku = str(element.get('data-sku'))
89 # 打印提取到的属性
90 # print(f"class_attr: {class_attr}",f"data-title: {data_table_id}",f"data-pictures: {data_title}",f"data-sku:{data_sku}")
91 dictx2[data_table_id] = (data_sku, data_title)
92 # print(dictx2)
93 btn_table_dataId_list.sort()
94 # print(btn_table_dataId_list)
95
96 dictx3 = {}
97 btn_poster_title_list_v1 = []
98 for element in btn_mainEntry_elements:
99 class_attr = element.get('class')
100 data_items = json.loads(str(element.get('data-items')).replace("'", '"'))
101 data_title = element.get('data-title')
102 data_posters = json.loads(str(element.get('data-posters')).replace("'", '"'))
103 # 打印提取到的属性
104 dictx3[
105 "btn_mainEntryx"] = f"class_attr: {class_attr}\n", f"data_items: {data_items}\n", f"data_title: {data_title}\n", f"data_posters:{data_posters}"
106 # print(f"class_attr: {class_attr}\n",f"data_items: {data_items}\n",f"data_title: {data_title}\n",f"data_posters:{data_posters}")
107 data_items_titles = [item['title'] for item in data_items]
108 data_posters_titles = [item['title'] for item in data_posters]
109 data_posters_tit = [item['title'] for item in data_posters][0] if len(
110 [item['title'] for item in data_posters]) > 0 else ''
111 picture = [item['picture'] for item in data_posters][0] if len(
112 [item['picture'] for item in data_posters]) else ''
113 backPicture = [item['backPicture'] for item in data_posters][0] if len(
114 [item['backPicture'] for item in data_posters]) else ''
115 if picture != '' and backPicture != '':
116 data_posters_titles[0] = str(data_posters_tit) + "-E"
117 data_posters_titles.append(str(data_posters_tit) + "-I")
118 elif picture != '':
119 data_posters_titles[0] = str(data_posters_tit) + "-E"
120
121 elif backPicture != '':
122 data_posters_titles[0] = str(data_posters_tit) + "-I"
123
124 import itertools
125
126 btn_poster_title_list_v1.extend(data_items_titles)
127 btn_poster_title_list_v1.extend(data_posters_titles)
128
129 btn_poster_title_list_v1 = btn_poster_title_list + btn_poster_title_list_v1
130 btn_poster_title_list_v1.sort()
131
132 # print(dictx3)
133
134 dictx["btn_table"] = dictx2
135 dictx["btn_poster"] = dictx1
136 dictx["btn_mainEntry"] = dictx3
137 # print(dictx)
138
139 from mysql.connector import Error
140
141 try:
142 # 连接到 MySQL 数据库
143 connection = mysql.connector.connect(
144 host='', # 数据库主机地址
145 user='', # 数据库用户名
146 password=', # 数据库密码
147 database='' # 数据库名称
148 )
149 if connection.is_connected():
150 # 创建游标对象
151 cursor = connection.cursor()
152 # cursor.execute(create_table_query)
153 # 插入数据
154 insert_query = '''
155 INSERT INTO tmp_store_svg_info_1 (
156 url,
157 store_id,
158 btn_table,
159 btn_poster,
160 btn_mainEntry,
161 btn_table_dataId_list,
162 btn_poster_title_list,
163 svg_btn_info,
164 svg_xml,
165 create_time
166 ) VALUES (
167 %s, %s, %s, %s, %s,%s,%s, %s, %s, NOW()
168 )
169 '''
170
171 # data_to_insert = [(
172 # url
173 # , pos_id
174 # , str(btn_table)
175 # , str(len(btn_poster_title_list_v1))
176 # , str(btn_mainEntry)
177 # , str(btn_table_dataId_list).replace("'", '"')
178 # , str(btn_poster_title_list_v1).replace("'", '"')
179 # , json.dumps(dictx)
180 # , str(svg_content)
181 # )]
182
183
184 print("data_to_insert=",len(data_to_insert))
185 if len(data_to_insert)==100:
186 cursor.executemany(insert_query, data_to_insert)
187 connection.commit()
188 data_to_insert.clear()
189 cursor.close()
190 connection.close()
191 print("数据插入成功")
192 else:
193 data_to_insert.append(
194 (
195 url
196 , pos_id
197 , str(btn_table)
198 , str(len(btn_poster_title_list_v1))
199 , str(btn_mainEntry)
200 , str(btn_table_dataId_list).replace("'", '"')
201 , str(btn_poster_title_list_v1).replace("'", '"')
202 , json.dumps(dictx)
203 , str(svg_content)
204 )
205
206 )
207 except Error as e:
208 print(f"Error: {e}")
209 if connection.is_connected():
210 connection.rollback() # 回滚事务
211 finally:
212 # 关闭游标和连接
213 if connection.is_connected():
214 cursor.close()
215 connection.close()
216 print("数据库连接已关闭")
217 except JSONDecodeError as e:
218 print(url)
219 continue
220
221
222 def save(connection,li):
223 try:
224 insert_query = '''
225 INSERT INTO tmp_store_svg_info_1 (
226 url,
227 store_id,
228 btn_table,
229 btn_poster,
230 btn_mainEntry,
231 btn_table_dataId_list,
232 btn_poster_title_list,
233 svg_btn_info,
234 svg_xml,
235 create_time
236 ) VALUES (
237 %s, %s, %s, %s, %s,%s,%s, %s, %s, NOW()
238 )
239 '''
240 if connection.is_connected():
241 # 创建游标对象
242 cursor = connection.cursor()
243 if len(data_to_insert) == 100:
244 cursor.executemany(insert_query, data_to_insert)
245 connection.commit()
246 data_to_insert.clear()
247 cursor.close()
248 connection.close()
249 print("数据插入成功")
250
251 except Error as e:
252 print(f"Error: {e}")
253 if connection.is_connected():
254 connection.rollback() # 回滚事务
255 finally:
256 # 关闭游标和连接
257 if connection.is_connected():
258 cursor.close()
259 connection.close()
260 print("数据库连接已关闭")