Data
Stay hungry,Stay foolish!

导航

 

重塑和轴向旋转

Se

import pandas as pd
import numpy as np
from pandas import Series

data=pd.DataFrame(np.arange(6).reshape(2,3),
                  index=['Ohio','Colorado'],
                  columns=['one','two','three']
                 )
data.index.names=['state']
data.columns.names=['number']
data

number	   one	two	three
   state			
   Ohio	    0	1	2
Colorado	3	4	5


# 使用该unstack()方法可将列转为行,一一对应,得到一个Series
result = data.stack()
result

state     number
Ohio      one       0
          two       1
          three     2
Colorado  one       3
          two       4
          three     5
dtype: int32

# unstack()可以将其重排为一个DataFrame
result.unstack()

number	one	two	three
   state			
   Ohio	    0	1	2
Colorado	3	4	5

# 默认情况下,unstack操作的是最里面的那层number,这个对象可以接收索引的编号啊或名称
result.unstack('state')

state	  Ohio Colorado
number		
  one	    0	3
  two	    1	4
  three	    2	5
# 传入索引名称,都是讲state的层次化索引变为DataFrame格式
result.unstack(0)

state	   Ohio	Colorado
number		
 one	    0	3
 two	    1	4
 three	    2	5

 # 当组里的值不是都有的时候,unstack会引入缺失数
s1 = Series([0,1,2,3],index=['a','b','c','d'])
s2 = Series([4,5,6], index=['c','d','e'])
data2 = pd.concat([s1,s2],keys=['one','two'])
data2

one  a    0
     b    1
     c    2
     d    3
two  c    4
     d    5
     e    6
dtype: int64

data2.unstack()

    a	b	c	d	e
one	0.0	1.0	2.0	3.0	NaN
two	NaN	NaN	4.0	5.0	6.0

# 但是stack却可以过滤掉缺失数据,如果不想过滤,可以dropna=False
data2.unstack().stack()
one  a    0.0
     b    1.0
     c    2.0
     d    3.0
two  c    4.0
     d    5.0
     e    6.0
dtype: float64

# 这是不过滤的效果
data2.unstack().stack(dropna=False)

one  a    0.0
     b    1.0
     c    2.0
     d    3.0
     e    NaN
two  a    NaN
     b    NaN
     c    4.0
     d    5.0
     e    6.0
dtype: float64

# DataFrame中的stack和unstack

result

state     number
Ohio      one       0
          two       1
          three     2
Colorado  one       3
          two       4
          three     5
dtype: int32

df = pd.DataFrame({'left':result, 'right':result+5},columns=pd.Index(['left','right'],name='side'))
df

          side	   left	   right
state	  number		
Ohio	  one	     0	      5
          two	     1	      6
          three	     2	      7
Colorado  one	     3	      8
          two	     4	      9
          three	     5	      10

# 对DataFrame进行unstack操作,会将旋转轴变为结果中的最低级别,变为层次化索引的最低级别
df.unstack('state')

side	left	            right
state	Ohio	Colorado	Ohio	Colorado
number				
one	     0	  3	               5	  8
two	     1	  4	               6	  9
three	 2	  5	               7	  10

# side也会是最低级别,把side折叠
df.unstack('state').stack('side')

state	       Colorado	Ohio
number	side		
one	    left	3	      0
        right	8	      5
two	    left	4	      1
        right	9	      6
three	left	5	      2
        right	10	      7

时间序列数据的堆叠格式

data_c = [
    ['1959-03-31','realgdb',2710.349],
    ['1959-03-31','infl',0.000],
    ['1959-03-31','unemp',5.800],
    ['1959-06-30','realgdb',2778.801],
    ['1959-06-30','infl',2.340],
    ['1959-06-30','unemp',5.100],
    ['1959-09-30','realgdb',2775.488],
    ['1959-09-30','infl',2.740],
    ['1959-09-30','unemp',5.300],
]
ldata = pd.DataFrame(data_c,columns=['data','item','value'])
ldata

        data	item	value
0	1959-03-31	realgdb	2710.349
1	1959-03-31	infl	0.000
2	1959-03-31	unemp	5.800
3	1959-06-30	realgdb	2778.801
4	1959-06-30	infl	2.340
5	1959-06-30	unemp	5.100
6	1959-09-30	realgdb	2775.488
7	1959-09-30	infl	2.740
8	1959-09-30	unemp	5.300


# 将data作为行索引,item作为列索引,最简单的方法,pivot快捷函数
ldata.pivot('data','item','value')

item	    infl	realgdb	    unemp
      data			
1959-03-31	0.00	2710.349	5.8
1959-06-30	2.34	2778.801	5.1
1959-09-30	2.74	2775.488	5.3


# pivot其实是执行了如下两步,本质还是堆叠
#第一步
ldata.set_index(['data','item'])


	                value
    data	item	
1959-03-31	realgdb	2710.349
            infl	0.000
            unemp	5.800
1959-06-30	realgdb	2778.801
            infl	2.340
            unemp	5.100
1959-09-30	realgdb	2775.488
            infl	2.740
            unemp	5.300

# 第二步
ldata.set_index(['data','item']).unstack()


value
item	    infl	realgdb	   unemp
   data			
1959-03-31	0.00	2710.349	5.8
1959-06-30	2.34	2778.801	5.1
1959-09-30	2.74	2775.488	5.3
posted on 2018-11-29 16:28  进击中的青年  阅读(110)  评论(0编辑  收藏  举报