python中groupby()函数讲解

2023-01-31 05:01:05 python 函数 讲解
# -*- coding: utf-8 -*-
import pandas as pd
import  numpy as np

df = pd.DataFrame({'key1':list('aabba'),
                  'key2': ['one','two','one','two','one'],
                  'data1': ['1','3','5','7','9'],
                  'data2': ['2','4','6','8','10']})
print df

grouped = df.groupby(['key1']).size()      #按key1的值分组,并统计个数
print grouped
print '++++++++++++++'

grouped1 = df['data1'].astype(float).groupby(df['key1']).mean()     #先将data1转换成浮点型,然后分组求均值
print grouped1
print type(grouped1)       #series类型
print '++++++++++++++++++'

df['add'] = ['AA','BB',"CC",'DD','EE']       #dataframe追加一列
print df
grouped2=df.groupby(['key1','key2']).size()      #按两列属性分组
#注意若groupby前面用df的形式则后面参数直接用['key1']的形式
print grouped2
print type(grouped2)
print '++++++++++++++++++'

grouped3=df['data1'].astype(float).groupby([df['key1'],df['add']]).mean()  #按key1与key2分组,求data1这一列均值
#注意若groupby前面用df['data1']的形式则后面参数必须用df['key1']的形式
print grouped3
print type(grouped3)          #series


运行结果如下:
  data1 data2 key1 key2
0     1     2    a  one
1     3     4    a  two
2     5     6    b  one
3     7     8    b  two
4     9    10    a  one
key1
a    3
b    2
dtype: int64
++++++++++++++
key1
a    4.333333
b    6.000000
Name: data1, dtype: float64
<class 'pandas.core.series.Series'>
++++++++++++++++++
  data1 data2 key1 key2 add
0     1     2    a  one  AA
1     3     4    a  two  BB
2     5     6    b  one  CC
3     7     8    b  two  DD
4     9    10    a  one  EE
key1  key2
a     one     2
      two     1
b     one     1
      two     1
dtype: int64
<class 'pandas.core.series.Series'>
++++++++++++++++++
key1  add
a     AA     1.0
      BB     3.0
      EE     9.0
b     CC     5.0
      DD     7.0
Name: data1, dtype: float64
<class 'pandas.core.series.Series'>

相关文章