大数据分析与可视化 之 随堂练习

随堂练习,随手记录

2.ipynb

#%%

import numpy as np
import csv

#%%

iris_data = []
with open(r'文件地址')as csvfile:
    csv_reader = csv.reader(csvfile)
    birth_header = next(csv_reader)
    for row in csv_reader:
        iris_data.append(row)
print(iris_data)

#%%

iris_list = []
for row in iris_data:
    iris_list.append(tuple(row[1:]))
iris_list

#%%

datatype = np.dtype([("Sepal.Length",np.str_,40),
                    ("Sepal.Width",np.str_,40),
                    ("Petal.Length",np.str_,40),
                    ("Petal.Width",np.str_,40),
                    ("Species",np.str_,40)])
print(datatype)

#%%

iris_data = np.array(iris_list,dtype = datatype)
iris_data

#%%

PetalLength = iris_data['Petal.Length'].astype(float)
PetalLength

#%%

np.sort(PetalLength)

#%%

np.unique(PetalLength)

#%%

np.sum(PetalLength)

#%%

np.mean(PetalLength)

#%%

np.std(PetalLength)

#%%

np.var(PetalLength)

#%%

np.min(PetalLength)

#%%

np.max(PetalLength)

3.ipynb

#%%

import numpy as np
import pandas as pd
s = pd.Series([1,2,3,4,5,6,7,8,9])
print(s)

#%%

df = pd.DataFrame(np.random.randint(1, 20,size=(10,4))) 
print(df)

#%%

temp1=df.iloc[0] 
print(temp1)

#%%

print(df[0])

#%%

print(df.head(3))

#%%

print(df.tail(3))

#%%

print(df[1:3])

#%%

print(df.loc[1,1])

#%%

print(df[df[1] > 0])

#%%

df['add'] = [0,1,2,3,4,5,6,7,8,9] 
print(df)

4.ipynb

#%%

import pandas as pd
data = {'sudent': ['zhao', 'qian', 'sun', 'li', 'zhou', 'wu', 'zheng', 'wang'], 
        'class': [1, 2, 2, 3, 3, 4, 1, 5], 
        'score': [90, 80, 85, 75, 95, 70, 70,85]} 
df = pd.DataFrame(data) 
print(df) 
print(df.columns)

#%%

pd.DataFrame(data, columns=['class', 'student', 'score'])

#%%

print(df['class']) 
print(df.score)

#%%

print(df.index)

#%%

val = pd.Series([10, 9, 8], index=[2, 3, 4]) 
df['year'] = val 
print(df)

#%%

df['isnull'] = df.year.isnull() 
print(df)

#%%

del df['isnull'] 
print(df)

5.ipynb

#%%

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
%matplotlib inline

#%%

fdata = pd.read_csv(r'E:\Job\code\BigData\chapter05\tips.csv')
fdata.head()

#%%

fdata.describe()

#%%

fdata.rename(columns={'total_bill':'消费总额','tip':'小费','sex':'性别','smoker':'是否抽烟','day':'星期','time':'聚餐时间段','size':'人数'},inplace=True)
fdata.head()

#%%

fdata.plot(kind = 'scatter',x='消费总额',y='小费')

#%%

fdata.groupby('性别')['小费'].mean()

#%%

print(fdata['星期'].unique())
r = fdata.groupby('星期')['小费'].mean()
fig = r.plot(kind = 'bar',x = '星期',y = '小费',fontsize = 12,rot = 30)
fig.axes.title.set_size(16)

#%%

r = fdata.groupby(['性别','是否抽烟'])['小费'].mean()
fig = r.plot(kind = 'bar',x = '星期',y = '小费',fontsize = 12,rot = 30)
fig.axes.title.set_size(16)


#%%

r = fdata.groupby(['聚餐时间段'])['小费'].mean()
fig = r.plot(kind = 'bar',x = '星期',y = '小费',fontsize = 15,rot = 30)
fig.axes.title.set_size(16)

posted @ 2023-12-30 20:28  Ivan丶ky  阅读(149)  评论(0)    收藏  举报