tidyfst处理多列同时运算

2020-06-02 00:00:00 数据 语言 统计 科学 复旦大学

作者:黄天元,复旦大学博士在读,热爱数据科学与开源工具(R),致力于利用数据科学迅速积累行业经验优势和科学知识发现,涉猎内容包括但不限于信息计量、机器学习、数据可视化、应用统计建模、知识图谱等,著有《R语言高效数据处理指南》(《R语言数据高效处理指南》(黄天元)【摘要 书评 试读】- 京东图书)。知乎专栏:R语言数据挖掘。邮箱:huang.tian-yuan@qq.com.欢迎合作交流。

dplyr的1.0.0版本已经发布了,这里针对其多列原位汇总(参考自cran.r-project.org/web/)进行一个演示,并给出tidyfst中相对应的操作。

library(pacman)
p_load(tidyfst,dplyr)

# dplyr
starwars %>% 
  summarise(across(where(is.character), ~ length(unique(.x))))
#> # A tibble: 1 x 8
#>    name hair_color skin_color eye_color   sex gender homeworld species
#>   <int>      <int>      <int>     <int> <int>  <int>     <int>   <int>
#> 1    87         13         31        15     5      3        49      38
# tidyfst
starwars %>% 
  summarise_vars(is.character,uniqueN)
#>     name hair_color skin_color eye_color   sex gender homeworld species
#>    <int>      <int>      <int>     <int> <int>  <int>     <int>   <int>
#> 1:    87         13         31        15     5      3        49      38

# dplyr
starwars %>% 
  group_by(species) %>% 
  filter(n() > 1) %>% 
  summarise(across(c(sex, gender, homeworld), ~ length(unique(.x))))
#> `summarise()` ungrouping output (override with `.groups` argument)
#> # A tibble: 9 x 4
#>   species    sex gender homeworld
#>   <chr>    <int>  <int>     <int>
#> 1 Droid        1      2         3
#> 2 Gungan       1      1         1
#> 3 Human        2      2        16
#> 4 Kaminoan     2      2         1
#> 5 Mirialan     1      1         1
#> 6 Twi'lek      2      2         1
#> 7 Wookiee      1      1         1
#> 8 Zabrak       1      1         2
#> 9 <NA>         1      1         3
# tidyfst
starwars %>% 
  group_dt(
    by = species,
    filter_dt(.N > 1) %>% 
      summarise_vars("sex|gender|homeworld",uniqueN)
  ) 
#>     species   sex gender homeworld
#>      <char> <int>  <int>     <int>
#> 1:    Human     2      2        16
#> 2:    Droid     1      2         3
#> 3:  Wookiee     1      1         1
#> 4:   Gungan     1      1         1
#> 5:     <NA>     1      1         3
#> 6:   Zabrak     1      1         2
#> 7:  Twi'lek     2      2         1
#> 8: Mirialan     1      1         1
#> 9: Kaminoan     2      2         1

# dplyr
starwars %>% 
  group_by(homeworld) %>% 
  filter(n() > 1) %>% 
  summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)))
#> `summarise()` ungrouping output (override with `.groups` argument)
#> # A tibble: 10 x 4
#>    homeworld height  mass birth_year
#>    <chr>      <dbl> <dbl>      <dbl>
#>  1 Alderaan    176.  64         43  
#>  2 Corellia    175   78.5       25  
#>  3 Coruscant   174.  50         91  
#>  4 Kamino      208.  83.1       31.5
#>  5 Kashyyyk    231  124        200  
#>  6 Mirial      168   53.1       49  
#>  7 Naboo       175.  64.2       55  
#>  8 Ryloth      179   55         48  
#>  9 Tatooine    170.  85.4       54.6
#> 10 <NA>        139.  82        334.
# tidyfst
starwars %>% 
  group_dt(
    by = homeworld,
    filter_dt(.N > 1) %>% 
      summarise_vars(is.numeric,function(x) mean(x,na.rm = TRUE))
  )
#>     homeworld   height      mass birth_year
#>        <char>    <num>     <num>      <num>
#>  1:  Tatooine 169.8000  85.37500   54.64444
#>  2:     Naboo 175.4545  64.16667   55.00000
#>  3:  Alderaan 176.3333  64.00000   43.00000
#>  4:  Kashyyyk 231.0000 124.00000  200.00000
#>  5:  Corellia 175.0000  78.50000   25.00000
#>  6:      <NA> 138.7500  82.00000  334.33333
#>  7:    Kamino 208.3333  83.10000   31.50000
#>  8: Coruscant 173.6667  50.00000   91.00000
#>  9:    Ryloth 179.0000  55.00000   48.00000
#> 10:    Mirial 168.0000  53.10000   49.00000

# dplyr
df <- data.frame(g = c(1, 1, 2), x = c(-1, 1, 3), y = c(-1, -4, -9))
df %>% 
  group_by(g) %>% 
  summarise(across(where(is.numeric), sum))
#> `summarise()` ungrouping output (override with `.groups` argument)
#> # A tibble: 2 x 3
#>       g     x     y
#>   <dbl> <dbl> <dbl>
#> 1     1     0    -5
#> 2     2     3    -9
# tidyfst
df %>% 
  summarise_vars(is.numeric,sum,by = g)
#>        g     x     y
#>    <num> <num> <num>
#> 1:     1     0    -5
#> 2:     2     3    -9

相关文章