摘要: feature_dict = {i:label for i,label in zip(range(4),('spepal length in cm','spepal width in cm','petal length in cm','petal width in cm'))} import pandas as pd df = pd.io.parsers.read_csv(filepath_... 阅读全文
posted @ 2017-11-23 22:57 天下一杰 阅读(281) 评论(0) 推荐(0)
摘要: import pandas as pdimport jjiebadf_news = pd.read_table('./data/val.txt',names=['category','theme','URL','content'],encoding='utf-8') df_news = df_news.dropna() # 缺失值直接drop掉 df_news.head()content = d... 阅读全文
posted @ 2017-11-22 21:57 天下一杰 阅读(1248) 评论(0) 推荐(0)
摘要: import re,collections # 把语料中的单词全部抽取出来,转成小写,并且取出单词中间的特殊符号 def words(text): return re.findall('[a-z]+',text.lower()) def train(features): model = collections.defaultdict(lambda:1) # 词频的默认出现数为1 f... 阅读全文
posted @ 2017-11-22 17:28 天下一杰 阅读(516) 评论(0) 推荐(0)
摘要: import pandas titanic = pandas.read_csv("titanic_train.csv") # 读取数据 # titanic.head() print titanic.describe() # 每一列的统计 从上图中可以看到,age字段有缺少值,可以用平均值进行填充 titanic["Age"] = titanic["Age"].fillna(titani... 阅读全文
posted @ 2017-11-22 11:27 天下一杰 阅读(1383) 评论(0) 推荐(0)
摘要: %matplotlib inline import matplotlib.pyplot as plt import pandas as pd from sklearn.datasets.california_housing import fetch_california_housing housing = fetch_california_housing() # 内置的数据集housing.d... 阅读全文
posted @ 2017-11-21 23:51 天下一杰 阅读(270) 评论(0) 推荐(0)
摘要: data = pd.read_csv("creditcard.csv") data.head()count_classes = pd.value_counts(data['class'],sort = True).sort_index() # value_count:计算数值的个数count_classes.plot(kind = 'bar') # 绘制条形图plt.title("Fraud c... 阅读全文
posted @ 2017-11-21 18:39 天下一杰 阅读(2899) 评论(0) 推荐(0)
摘要: import numpy as np import pandas as pd import matplotlib.pyplot as plt %matplotlib inline import os path = 'data' + os.sep + 'LogiReg_data.txt' pdData = pd.read_csv(path,header=None,names['Exam 1',... 阅读全文
posted @ 2017-11-21 00:16 天下一杰 阅读(823) 评论(1) 推荐(0)
摘要: 单变量分析绘图%matplotlib inline import numpy as np import pandas as pd from scipy import stats,integrate import matplotlib.pyplot as plt import seaborn as sns sns.set(color_code=True) na.random.seed(sum(... 阅读全文
posted @ 2017-11-20 12:12 天下一杰 阅读(1706) 评论(0) 推荐(0)
摘要: 折线图的绘制import pandas as pd unrate = pd.read_csv('unrate.csv') unrate['DATE'] = pd.to_datetime(unrate['DATE']) # 转换成datetime的格式import matplotlib.pyplot as plt first_twelve = unrate[0:12]plt.plot(first_... 阅读全文
posted @ 2017-11-20 00:12 天下一杰 阅读(769) 评论(0) 推荐(0)
摘要: series 阅读全文
posted @ 2017-11-19 22:37 天下一杰 阅读(8851) 评论(0) 推荐(0)