import numpy as np
import pandas as pd
s = pd.Series([12,-4,4,8])
s
Out[4]:
0 12
1 -4
2 4
3 8
dtype: int64
s = pd.Series([12,-4,4,8],index=['a','b','c','d'])
s
Out[6]:
a 12
b -4
c 4
d 8
dtype: int64
s.values
Out[7]: array([12, -4, 4, 8], dtype=int64)
s.index
Out[8]: Index(['a', 'b', 'c', 'd'], dtype='object')
s[2]
Out[9]: 4
s['b']
Out[10]: -4
s[0:2]
Out[11]:
a 12
b -4
dtype: int64
s[['b','c']]
Out[12]:
b -4
c 4
dtype: int64
s[1] = 0
s
Out[14]:
a 12
b 0
c 4
d 8
dtype: int64
s['a'] = 10
s
Out[16]:
a 10
b 0
c 4
d 8
dtype: int64
arr = np.array([1,2,3,4])
s = pd.Series(arr)
s
Out[19]:
0 1
1 2
2 3
3 4
dtype: int32
s[s>2]
Out[20]:
2 3
3 4
dtype: int32
s/2
Out[21]:
0 0.5
1 1.0
2 1.5
3 2.0
dtype: float64
np.log(s)
Out[22]:
0 0.000000
1 0.693147
2 1.098612
3 1.386294
dtype: float64
serd = pd.Series([1,0,2,1,2,3],index=['white','white','blue','green','green','yellow'])
serd
Out[24]:
white 1
white 0
blue 2
green 1
green 2
yellow 3
dtype: int64
serd.unique()
Out[25]: array([1, 0, 2, 3], dtype=int64)
serd.value_counts()
Out[26]:
2 2
1 2
3 1
0 1
dtype: int64
# 判断给定的一列元素是否包含在数据结构之中
serd.isin([0,3])
Out[28]:
white False
white True
blue False
green False
green False
yellow True
dtype: bool
serd[serd.isin([0,3])]
Out[29]:
white 0
yellow 3
dtype: int64
s2 = pd.Series([5,3,np.Nan,14])
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-30-ec475b2401c9> in <module>()
----> 1 s2 = pd.Series([5,3,np.Nan,14])
AttributeError: module 'numpy' has no attribute 'Nan'
s2 = pd.Series([5,3,np.NaN,14])
s2
Out[32]:
0 5.0
1 3.0
2 NaN
3 14.0
dtype: float64
s2.isnull()
Out[33]:
0 False
1 False
2 True
3 False
dtype: bool
s2.notnull()
Out[34]:
0 True
1 True
2 False
3 True
dtype: bool
s2[s2.isnull()]
Out[35]:
2 NaN
dtype: float64
mydict = {'red':1000,'blue':1500,'yellow':450,'orange':800}
myseries = pd.Series(mydict)
myseries
Out[38]:
blue 1500
orange 800
red 1000
yellow 450
dtype: int64
colors = ['red','yellow','orange','blue','green']
myseries = pd.Series(mydict,index=colors)
myseries
Out[41]:
red 1000.0
yellow 450.0
orange 800.0
blue 1500.0
green NaN
dtype: float64
mydict2 = {'red':1000,'yellow':450,'black':800}
myseries2 = pd.Series(mydict2)
myseries + myseries2
Out[44]:
black NaN
blue NaN
green NaN
orange NaN
red 2000.0
yellow 900.0
dtype: float64
# DataFrame对象
data = {
File "<ipython-input-46-5e6020ae37c4>", line 1
data = {
^
SyntaxError: unexpected EOF while parsing
data = {'color':['blue','green','yellow','red','white'],'object':['ball','pen','pencil','paper','mug'],'price':[1.2,1.4,0.6,1.3,2]}
frame = pd.DataFrame(data)
frame
Out[49]:
color object price
0 blue ball 1.2
1 green pen 1.4
2 yellow pencil 0.6
3 red paper 1.3
4 white mug 2.0