观影数据集分析:
获取电影长度区间数量:
# -*- coding = UTF-8 -*- # @Time : 2021/12/9 16:19 # @Author : 伏珊瑞 # @File : getalltype.py # @Software : PyCharm #把数据写进电影长度统计表 import pymysql conn = pymysql.connect(host='localhost', user="root", passwd="123456", db="movie") cur = conn.cursor() cur2 = conn.cursor() cur.execute("select runtime from tmdb_5000_movies;") date = [] count = [] while 1: res = cur.fetchone() if res is None: # 表示已经取完结果集 break if res[0] not in date: date.append(res[0]) count.append(1) else: count[date.index(res[0])] += 1 temp = 0 alldate = [] for i in date: if i.strip()!='': if count[temp] > 0: can = (i, count[temp]) alldate.append(can) temp += 1 sql = "insert into length values(%s,%s)" insert = cur2.executemany(sql, alldate) cur.close() cur2.close() conn.commit() conn.close()
获取所有导演信息:
# -*- coding = UTF-8 -*- # @Time : 2021/12/9 16:19 # @Author : 伏珊瑞 # @File : getalltype.py # @Software : PyCharm #把演员表的导演信息写到导演表 import pymysql conn = pymysql.connect(host='localhost', user="root", passwd="123456", db="movie") cur = conn.cursor() cur2 = conn.cursor() cur.execute("select director,revenue from tmdb_5000_credits;") date = [] count = [] revenue=[] while 1: res = cur.fetchone() if res is None: # 表示已经取完结果集 break for i in res[0].split(','): if i not in date: date.append(i) count.append(1) revenue.append(res[1]) else: try: count[date.index(i)] += 1 revenue[date.index(i)] += res[1] except TypeError : print(res[1]) temp = 0 alldate = [] for i in date: if i.strip()!='': if count[temp] > 0: can = (i, revenue[temp]) alldate.append(can) temp += 1 sql = "insert into director values(%s,%s)" insert = cur2.executemany(sql, alldate) cur.close() cur2.close() conn.commit() conn.close()