爬虫 美国政治献金案例分析
美国2012年总统候选人政治献金数据分析
导入包
In [4]:
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
方便大家操作,将月份和参选人以及所在政党进行定义
In [1]:
months = {'JAN' : 1, 'FEB' : 2, 'MAR' : 3, 'APR' : 4, 'MAY' : 5, 'JUN' : 6,
'JUL' : 7, 'AUG' : 8, 'SEP' : 9, 'OCT': 10, 'NOV': 11, 'DEC' : 12}
of_interest = ['Obama, Barack', 'Romney, Mitt', 'Santorum, Rick',
'Paul, Ron', 'Gingrich, Newt']
parties = {
'Bachmann, Michelle': 'Republican',
'Romney, Mitt': 'Republican',
'Obama, Barack': 'Democrat',
"Roemer, Charles E. 'Buddy' III": 'Reform',
'Pawlenty, Timothy': 'Republican',
'Johnson, Gary Earl': 'Libertarian',
'Paul, Ron': 'Republican',
'Santorum, Rick': 'Republican',
'Cain, Herman': 'Republican',
'Gingrich, Newt': 'Republican',
'McCotter, Thaddeus G': 'Republican',
'Huntsman, Jon': 'Republican',
'Perry, Rick': 'Republican'
}
读取文件
In [5]:
# 读文件
df = pd.read_csv('./data/usa_election.txt')
df.head(2)
Out[5]:
In [7]:
#新建一列各个候选人所在党派party
# 参看上面的parties 字典
df['party']=df['cand_nm'].map(parties)
df.head(2)
Out[7]:
In [8]:
#party这一列中有哪些元素
df['party'].unique()
Out[8]:
In [9]:
#统计party列中各个元素出现次数
df['party'].value_counts()
Out[9]:
In [10]:
#查看各个党派收到的政治献金总数contb_receipt_amt
df.groupby(by='party')['contb_receipt_amt'].sum()
Out[10]:
In [11]:
df.head(2)
Out[11]:
In [14]:
#查看具体每天各个党派收到的政治献金总数contb_receipt_amt
df.groupby(by=['contb_receipt_dt','party'])['contb_receipt_amt'].sum()
. . .
In [15]:
def transform_date(d):
day,month,year=d.split('-')
month=months[month]
return '20'+year+'-'+str(month)+'-'+day
In [16]:
#将表中日期格式转换为'yyyy-mm-dd'
df['contb_receipt_dt'] = df['contb_receipt_dt'].map(transform_date)
In [18]:
df.head(1)
Out[18]:
In [19]:
#查看老兵(捐献者职业)DISABLED VETERAN主要支持谁
#1.找出老兵对应的行数据
df['contbr_occupation'] == 'DISABLED VETERAN'
old_bing_df = df.loc[df['contbr_occupation'] == 'DISABLED VETERAN']
#对候选人分组对金额做聚合
old_bing_df.groupby(by='cand_nm')['contb_receipt_amt'].sum()
Out[19]:
In [ ]:
#捐赠金额最大的人的职业以及捐献额 .通过query("查询条件来查找捐献人职业")
In [22]:
df['contb_receipt_amt'].max()
Out[22]:
In [23]:
df.query('contb_receipt_amt == 1944042.43')
Out[23]:

浙公网安备 33010602011771号