观影数据集分析:
json格式化去name:
# -*- coding = UTF-8 -*-
# @Time : 2021/12/9 14:43
# @Author : 伏珊瑞
# @File : dojson.py
# @Software : PyCharm
#json格式化
import pymysql
import json
conn = pymysql.connect(host='localhost', user="root", passwd="123456", db="movie")
cur = conn.cursor()
cur.execute("select cast,movie_id from tmdb_5000_credits;")
alldate = []
sql = "update tmdb_5000_credits set cast= %s where movie_id=%s"
while 1:
res = cur.fetchone()
if res is None:
# 表示已经取完结果集
break
js = json.loads(res[0])
date = []
for i in js:
date.append(i['name'])
newdate = ",".join(str(i) for i in date)
can = (newdate, res[1])
alldate.append(can)
cur.executemany(sql, alldate)
cur.close()
conn.commit()
conn.close()
统计不同电影数量:
# -*- coding = UTF-8 -*-
# @Time : 2021/12/9 16:19
# @Author : 伏珊瑞
# @File : getalltype.py
# @Software : PyCharm
#统计一个字段的频率求和
import pymysql
conn = pymysql.connect(host='localhost', user="root", passwd="123456", db="movie")
cur = conn.cursor()
cur2 = conn.cursor()
cur.execute("select genres,budget,revenue from tmdb_5000_movies;")
date = []
count = []
budget=[]
revenue=[]
while 1:
res = cur.fetchone()
if res is None:
# 表示已经取完结果集
break
for i in res[0].split(','):
if i not in date:
date.append(i)
count.append(1)
budget.append(res[1])
revenue.append(res[2])
else:
count[date.index(i)] += 1
budget[date.index(i)]+=res[1]
revenue[date.index(i)] += res[2]
temp = 0
alldate = []
for i in date:
if i.strip()!='':
if count[temp] > 0:
can = (i, count[temp],budget[temp],int(revenue[temp]))
alldate.append(can)
temp += 1
sql = "insert into genres values(%s,%s,%s,%s)"
insert = cur2.executemany(sql, alldate)
cur.close()
cur2.close()
conn.commit()
conn.close()
浙公网安备 33010602011771号