python中groupby()函数讲解
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
df = pd.DataFrame({'key1':list('aabba'),
'key2': ['one','two','one','two','one'],
'data1': ['1','3','5','7','9'],
'data2': ['2','4','6','8','10']})
print df
grouped = df.groupby(['key1']).size() #按key1的值分组,并统计个数
print grouped
print '++++++++++++++'
grouped1 = df['data1'].astype(float).groupby(df['key1']).mean() #先将data1转换成浮点型,然后分组求均值
print grouped1
print type(grouped1) #series类型
print '++++++++++++++++++'
df['add'] = ['AA','BB',"CC",'DD','EE'] #dataframe追加一列
print df
grouped2=df.groupby(['key1','key2']).size() #按两列属性分组
#注意若groupby前面用df的形式则后面参数直接用['key1']的形式
print grouped2
print type(grouped2)
print '++++++++++++++++++'
grouped3=df['data1'].astype(float).groupby([df['key1'],df['add']]).mean() #按key1与key2分组,求data1这一列均值
#注意若groupby前面用df['data1']的形式则后面参数必须用df['key1']的形式
print grouped3
print type(grouped3) #series
运行结果如下:
data1 data2 key1 key2
0 1 2 a one
1 3 4 a two
2 5 6 b one
3 7 8 b two
4 9 10 a one
key1
a 3
b 2
dtype: int64
++++++++++++++
key1
a 4.333333
b 6.000000
Name: data1, dtype: float64
<class 'pandas.core.series.Series'>
++++++++++++++++++
data1 data2 key1 key2 add
0 1 2 a one AA
1 3 4 a two BB
2 5 6 b one CC
3 7 8 b two DD
4 9 10 a one EE
key1 key2
a one 2
two 1
b one 1
two 1
dtype: int64
<class 'pandas.core.series.Series'>
++++++++++++++++++
key1 add
a AA 1.0
BB 3.0
EE 9.0
b CC 5.0
DD 7.0
Name: data1, dtype: float64
<class 'pandas.core.series.Series'>
相关文章