观影数据集分析:

json格式化去name:

# -*- coding = UTF-8 -*-
# @Time : 2021/12/9 14:43
# @Author : 伏珊瑞
# @File : dojson.py
# @Software : PyCharm
#json格式化

import pymysql
import json

conn = pymysql.connect(host='localhost', user="root", passwd="123456", db="movie")
cur = conn.cursor()
cur.execute("select cast,movie_id from tmdb_5000_credits;")
alldate = []
sql = "update tmdb_5000_credits set cast= %s where movie_id=%s"
while 1:
    res = cur.fetchone()
    if res is None:
        # 表示已经取完结果集
        break
    js = json.loads(res[0])
    date = []
    for i in js:
        date.append(i['name'])
    newdate = ",".join(str(i) for i in date)
    can = (newdate, res[1])
    alldate.append(can)
cur.executemany(sql, alldate)
cur.close()
conn.commit()
conn.close()

  统计不同电影数量:

# -*- coding = UTF-8 -*-
# @Time : 2021/12/9 16:19
# @Author : 伏珊瑞
# @File : getalltype.py
# @Software : PyCharm
#统计一个字段的频率求和

import pymysql

conn = pymysql.connect(host='localhost', user="root", passwd="123456", db="movie")
cur = conn.cursor()
cur2 = conn.cursor()
cur.execute("select genres,budget,revenue from tmdb_5000_movies;")
date = []
count = []
budget=[]
revenue=[]
while 1:
    res = cur.fetchone()
    if res is None:
        # 表示已经取完结果集
        break
    for i in res[0].split(','):
        if i not in date:
            date.append(i)
            count.append(1)
            budget.append(res[1])
            revenue.append(res[2])
        else:
            count[date.index(i)] += 1
            budget[date.index(i)]+=res[1]
            revenue[date.index(i)] += res[2]
temp = 0
alldate = []
for i in date:
    if i.strip()!='':
        if count[temp] > 0:
            can = (i, count[temp],budget[temp],int(revenue[temp]))
            alldate.append(can)
        temp += 1
sql = "insert into genres values(%s,%s,%s,%s)"
insert = cur2.executemany(sql, alldate)
cur.close()
cur2.close()
conn.commit()
conn.close()

  

 

posted on 2021-12-08 16:28  sean1246  阅读(109)  评论(0)    收藏  举报