观影数据集分析:
获取电影长度区间数量:
# -*- coding = UTF-8 -*-
# @Time : 2021/12/9 16:19
# @Author : 伏珊瑞
# @File : getalltype.py
# @Software : PyCharm
#把数据写进电影长度统计表
import pymysql
conn = pymysql.connect(host='localhost', user="root", passwd="123456", db="movie")
cur = conn.cursor()
cur2 = conn.cursor()
cur.execute("select runtime from tmdb_5000_movies;")
date = []
count = []
while 1:
res = cur.fetchone()
if res is None:
# 表示已经取完结果集
break
if res[0] not in date:
date.append(res[0])
count.append(1)
else:
count[date.index(res[0])] += 1
temp = 0
alldate = []
for i in date:
if i.strip()!='':
if count[temp] > 0:
can = (i, count[temp])
alldate.append(can)
temp += 1
sql = "insert into length values(%s,%s)"
insert = cur2.executemany(sql, alldate)
cur.close()
cur2.close()
conn.commit()
conn.close()
获取所有导演信息:
# -*- coding = UTF-8 -*-
# @Time : 2021/12/9 16:19
# @Author : 伏珊瑞
# @File : getalltype.py
# @Software : PyCharm
#把演员表的导演信息写到导演表
import pymysql
conn = pymysql.connect(host='localhost', user="root", passwd="123456", db="movie")
cur = conn.cursor()
cur2 = conn.cursor()
cur.execute("select director,revenue from tmdb_5000_credits;")
date = []
count = []
revenue=[]
while 1:
res = cur.fetchone()
if res is None:
# 表示已经取完结果集
break
for i in res[0].split(','):
if i not in date:
date.append(i)
count.append(1)
revenue.append(res[1])
else:
try:
count[date.index(i)] += 1
revenue[date.index(i)] += res[1]
except TypeError :
print(res[1])
temp = 0
alldate = []
for i in date:
if i.strip()!='':
if count[temp] > 0:
can = (i, revenue[temp])
alldate.append(can)
temp += 1
sql = "insert into director values(%s,%s)"
insert = cur2.executemany(sql, alldate)
cur.close()
cur2.close()
conn.commit()
conn.close()
浙公网安备 33010602011771号