在 R 中将不同的表组合在一起

2022-01-15 00:00:00 r format data-manipulation html

我正在使用 R.

针对这个问题,我生成了以下数据:

For this problem, I generated the following data :

set.seed(123)

v1 <- c("2010-2011","2011-2012", "2012-2013", "2013-2014", "2014-2015") 
v2 <- c("A", "B", "C", "D", "E")
v3 <- c("Z", "Y", "X", "W" )

data_1 = data.frame(var_1 = rnorm(871, 10,10), var_2 = rnorm(871, 5,5))

data_1$dates <- as.factor(sample(v1, 871, replace=TRUE, prob=c(0.5, 0.2, 0.1, 0.1, 0.1)))

data_1$types <- as.factor(sample(v2, 871, replace=TRUE, prob=c(0.3, 0.2, 0.1, 0.1, 0.1)))

data_1$types2 <- as.factor(sample(v3, 871, replace=TRUE, prob=c(0.3, 0.5, 0.1, 0.1)))


data_2 = data.frame(var_1 = rnorm(412, 10,10), var_2 = rnorm(412, 5,5))

data_2$dates <- as.factor(sample(v1, 412, replace=TRUE, prob=c(0.5, 0.2, 0.1, 0.1, 0.1)))

data_2$types <- as.factor(sample(v2, 412, replace=TRUE, prob=c(0.3, 0.2, 0.1, 0.1, 0.1)))

data_2$types2 <- as.factor(sample(v3, 412, replace=TRUE, prob=c(0.3, 0.5, 0.1, 0.1)))

data_3 = data.frame(var_1 = rnorm(332, 10,10), var_2 = rnorm(332, 5,5))

data_3$dates <- as.factor(sample(v1, 332, replace=TRUE, prob=c(0.5, 0.2, 0.1, 0.1, 0.1)))

data_3$types <- as.factor(sample(v2, 332, replace=TRUE, prob=c(0.3, 0.2, 0.1, 0.1, 0.1)))

data_3$types2 <- as.factor(sample(v3, 332, replace=TRUE, prob=c(0.3, 0.5, 0.1, 0.1)))

然后我使用这 3 个数据集创建了一个数据集:

I then created a single data set using these 3 data sets:

data_1 <- data.frame(name="data_1", data_1)
data_2 <- data.frame(name="data_2", data_2)
data_3 <- data.frame(name="data_3", data_3)
problem_data <- rbind(data_1, data_2, data_3)

根据以上数据,我可以制作下表:

Based on the above data, I was then able to make the following table:

summary <- xtabs(~name+types+types2, problem_data)
ftable(summary, row.vars=1, col.vars=2:3)

       types    A               B               C               D               E            
       types2   W   X   Y   Z   W   X   Y   Z   W   X   Y   Z   W   X   Y   Z   W   X   Y   Z
name                                                                                         
data_1         26  29 172 104  27  20 111  48  12  10  64  32  12  10  43  33  15   9  56  38
data_2         13  14  80  54   9  12  56  35   5   4  25  18   3   2  16  14   8   4  27  13
data_3          6  11  62  48   7  12  38  24   6   2  20   8   6   5  19  14   7   3  27   7

我正在尝试修改上表,使其看起来像这样:

到目前为止我尝试了什么:

library(memisc) 
summary <- xtabs(~dates+name+types+types2, problem_data)
t = ftable(summary, row.vars=1, col.vars=2:4)
show_html(t)

虽然上表不是我最初想要的,但它看起来确实不错.我唯一要补充的是分隔边界线".这使得更容易看到新类别何时开始(例如 data_1、data_2、data_3),以及total"和总计"行.

Although the above table is not what I was originally looking for, it does seem to appear quite nice. The only thing I would like to add are "separating borders lines" which make it easier to see when new categories begin (e.g. data_1, data_2, data_3), as well as "total" and "grand total" rows.

有人可以告诉我怎么做吗?

Can someone please show me how to do this?

谢谢!

注意:没有线条的表格

推荐答案

瞧:最终版本:删除了第一个版本:注意:我删除了 Version1 行.添加行和格式没有问题(注释掉):

Voila: Final version: Removed first version: Note: I removed the lines Version1. It is no problem to add lines and formatting(commented out):

没有线条:

带线:


library(tidyverse)
#install.packages("ftExtra")
library(ftExtra)
library(flextable)
library(janitor)
#library(officer)

# ##############################################################################
# Calculating the Grand total
my_func_grand_total <- function(df){
  df %>% select(-c(var_1, var_2)) %>% 
    as_tibble() %>% 
    group_by(dates, types2, types) %>% 
    count() %>% 
    arrange(types) %>% 
    mutate(types = paste(types, types2, sep = "_")) %>% 
    ungroup() %>% 
    select(-types2) %>% 
    pivot_wider(
      names_from = types,
      values_from = n,
      values_fill = 0
    )
}

# list of dataframes
df_list_grand_total <- list(data_1 = data_1, data_2 = data_2, data_3 = data_3)

# apply my_func_grand_total to the list
df_list_grand_total <- purrr::map(df_list_grand_total, my_func_grand_total)

# get the row with Grand total
Grand_total <- bind_rows(df_list_grand_total, .id = "name") %>%
  adorn_totals() %>% 
  slice(16) # last row
###############################################################################

###############################################################################
# Calculating the other things:

df_list <- list(data_1 = data_1, data_2 = data_2, data_3 = data_3)

my_func <- function(df){
  df %>% select(-c(var_1, var_2)) %>% 
    as_tibble() %>% 
    group_by(dates, types2, types) %>% 
    count() %>% 
    arrange(types) %>% 
    mutate(types = paste(types, types2, sep = "_")) %>% 
    ungroup() %>% 
    select(-types2) %>% 
    pivot_wider(
      names_from = types,
      values_from = n,
      values_fill = 0
    ) %>% 
    adorn_totals()
}

df_list <- purrr::map(df_list, my_func)


big_border = fp_border(color="black", width = 2)
#std_border = fp_border(color="orange", width = 1)

bind_rows(df_list, .id = "name") %>%
  bind_rows(Grand_total) %>% 
  group_by(name) %>%
  as_flextable(groups_to = "merged") %>% 
  delete_part(part = "header") %>% 
  add_header(name = "name", dates="types2", A_W="W", A_X="X", A_Y="Y", A_Z="Z", 
             B_W="W", B_X="X", B_Y="Y", B_Z="Z", 
             C_W="W", C_X="X", C_Y="Y", C_Z="Z",
             D_W="W", D_X="X", D_Y="Y", D_Z="Z", 
             E_W="W", E_X="X", E_Y="Y", E_Z="Z", top=FALSE) %>%  
  add_header(dates="types", A_W="A", A_X="A", A_Y="A", A_Z="A", 
             B_W="B", B_X="B", B_Y="B", B_Z="B", 
             C_W="C", C_X="C", C_Y="C", C_Z="C",
             D_W="D", D_X="D", D_Y="D", D_Z="D", 
             E_W="E", E_X="E", E_Y="E", E_Z="E", top=TRUE) %>% 
  merge_h(part = "header") %>% 
  #align(align = "center", part = "all") %>% 
  align(align = "left", part = "body") %>% 
  autofit() %>% 
  border_remove()
  #hline_top(part="all", border = big_border) %>% 
  #hline_bottom(part="body", border = big_border)
###############################################################################

相关文章