基于 R base 语法的 50 幅图之二

2022-04-08 00:00:00 专区 方法 我不 棒棒糖 这幅

Deviation

Diverging Bars

一种非常有意思,并且非常直观的图形,本质上有点类似直方图。

df <- fread("data/mtcars.csv")x <- df[, mpg]df[, mpg_z := (x - mean(x)) / sd(x)]df[, colors := fifelse(mpg_z < , "red", "green", ), keyby = mpg_z]df[, n := 1:.N]
op <- par()par(mar = c(4, 4.5, 2, 0.3), mgp = c(3, 0.1, ))plot.new()plot.window(xlim = c(-2, 2.5), ylim = c(, 32),)box()grid(32, 32)abline(v = , col = "gray", lty = 3, lwd = 4)with(df, segments( x0 = rep(, max(n)), y0 = n, x1 = mpg_z, y1 = n, col = colors, lwd = 4 ))axis( 1, lwd = 1, las = , col = 'gray', tick = FALSE, cex.axis = 0.5)axis( 2, lwd = 1, las = 1, at = df$n, labels = df$cars, col = 'gray', tick = FALSE, cex.axis = 0.5)mtext(side = 1, 'Mileage', line = 1)mtext(side = 2, 'Model', line = 3.4)title('Diverging bars of car mileage')par(op)

Diverging Texts

和上幅图相比,就是添加了点的数值。

df_green <- df[mpg_z >= ]df_red <- df[mpg_z < ]
op <- par()par(mar = c(4, 4.5, 2, 0.3), mgp = c(3, 0.1, ))plot.new()plot.window(xlim = c(-2, 2.5), ylim = c(, 32), )box()grid(32, 32)abline(v = , col = "gray", lty = 3, lwd = 4)with(df, segments( x0 = rep(, max(n)), y0 = n, x1 = mpg_z, y1 = n, col = colors, lwd = 4 ))
with(df_green, text( x = mpg_z + 0.2, y = n, labels = as.character(round(mpg_z, 2)), col = "green", cex = 0.6 ))with(df_red, text( x = mpg_z - 0.2, y = n, labels = as.character(round(mpg_z, 2)), col = "red", cex = 0.6 ))
axis( 1, lwd = 1, las = , col = 'gray', tick = FALSE, cex.axis = 0.5)axis( 2, lwd = 1, las = 1, at = df$n, labels = df$cars, col = 'gray', tick = FALSE, cex.axis = 0.5)mtext(side = 1, 'Mileage', line = 1)mtext(side = 2, 'Model', line = 3.4)title('Diverging bars of car mileage')par(op)

  

作图结束后才发现原来线条的颜色作者改了黑色,这倒是无关紧要,我觉得有颜色的更清晰。

Diverging Dot Plot

这幅图实在太好做了,几乎不费功夫。

op <- par()par(mar = c(4, 4.5, 2, 0.3), mgp = c(3, 0.1, ))plot.new()plot.window(xlim = c(-2, 2.5), ylim = c(, 32), )box()grid(32, 32)points( x = df_green$mpg_z, y = df_green$n, col = 'green', pch = 20, cex = 3.5)
points( x = df_red$mpg_z, y = df_red$n, col = 'red', pch = 20, cex = 3.5)
with(df, text( x = mpg_z, y = n, labels = as.character(round(mpg_z, 1)), col = "white", cex = 0.5 ))
axis( 1, lwd = 1, las = , col = 'gray', tick = FALSE, cex.axis = 0.5)axis( 2, lwd = 1, las = 1, at = df$n, labels = df$cars, col = 'gray', tick = FALSE, cex.axis = 0.5)mtext(side = 1, 'Mileage', line = 1)mtext(side = 2, 'Model', line = 3.4)title('Diverging bars of car mileage')par(op)

Diverging Lollipop Chart with Markers

这个棒棒糖图也是上面的变种。稍微修改下代码即可。

op <- par()par(mar = c(4, 4.5, 2, 0.3), mgp = c(3, 0.1, ))df <- fread("data/mtcars.csv")x <- df[, mpg]df[, mpg_z := (x - mean(x)) / sd(x)]df[, colors := alpha('black', 0.8), keyby = mpg_z]df[, n := 1:.N]df[cars == 'Fiat X1-9']$colors = alpha('darkorange', 0.8)df[, size := 2.8]df[cars == 'Fiat X1-9']$size = 6
ytop <- df[cars == 'Merc 280C']$nybot <- df[cars == 'Merc 450SE']$n
plot.new()plot.window(xlim = c(-2, 2.5), ylim = c(, 32), )box()grid(32, 32)with(df,points( x = mpg_z, y = n, col = colors, pch = 20, cex = size))
with(df, segments( x0 = rep(, max(n)), y0 = n, x1 = mpg_z, y1 = n, col = colors, lwd = 4 ))with(df, points( x = mpg_z, y = n, col = colors, pch = 20, cex = size))rect(-2.1, 0.5, -1.5, 2.6, col = alpha("red", 0.2))rect(1.5, 27.7, 2.3, 32.9, col = alpha("green", 0.2))rect(0.5, ybot+0.5, 2, ytop-0.5, col = 'darkred')
lines(x = c(, ), y = c(ybot-0.5, ytop + 0.5), col = "darkblue")lines(x = c(-0.3, ), y = c(ybot-0.5, ybot-0.5), col = "darkblue")lines(x = c(-0.3, ), y = c(ytop + 0.5, ytop + 0.5), col = "darkblue")lines(x = c(, 0.5), y = c(ytop-1, ytop -1), col = "darkblue")text(1.3, ytop-1, "Mercedes Models", col = 'white', cex = 0.65)
axis( 1, lwd = 1, las = , col = 'gray', tick = FALSE, cex.axis = 0.5)axis( 2, lwd = 1, las = 1, at = df$n, labels = df$cars, col = 'gray', tick = FALSE, cex.axis = 0.5)mtext(side = 1, 'Mileage', line = 1)mtext(side = 2, 'Model', line = 3.4)title('Diverging bars of car mileage')par(op)

Area Chart

这幅图我放弃,没有找到 matplotlib.pyplot.fill_between 的简单的方法,看了一下 python 的源代码,这个挺长的,有空再看看怎么实现,ggplot2 无对应的功能,但可以通过 https://stackoverflow.com/questions/54687321/fill-area-between-lines-using-ggplot-in-r 的方法实现。不过实在要用,又不想太费劲,那么 reticulate 或者直接 matplotlib 解决。

Ranking

Ordered Bar Chart

就是将条形图按大小排序,有意思的是图形边缘的上色。R base 里无直接可以使用的方法,可以利用 fig 和画两个图填充一下背景色,来实现,不过我不准备大费周章的来搞点颜色。在图例设置 alpha 透明度上色也没问题。

另外就是关于坐标轴的旋转,这个关闭 barplot 确实不好操作,网上找了一个方法 https://stackoverflow.com/questions/10286473/rotating-x-axis-labels-in-r-for-barplot, 就是将条形图的间隔设置为 1,然后在设置一个 endpoint,也就是条形图的宽度来实现,其实让我来操作,我会直接让 las = 2 即可,我不觉得垂直的丑。

op <- par()df <- data.table(ggplot2::mpg)df_new <- df[, mean(cty), by = manufacturer]df_new <- df_new[order(V1)]end_point = 0.5 + nrow(df_new) + nrow(df_new) - 1barplot( df_new$V1, col = alpha("firebrick", 0.7), ylim = c(, 30), ylab = 'Mileage per Gallon', tcl = , space = 1, main = 'Bar chart of highway per mileage', cex.axis = 0.7)text( seq(1.5, end_point, by = 2), par("usr")[3] - 0.25, srt = 60, adj = 1, xpd = TRUE, labels = df_new$manufacturer, cex = 0.7)text( seq(1.5, end_point, by = 2), df_new$V1+1, labels = as.character(round(df_new$V1, 2)), cex = 0.7)box(lty = 1)

  

Lollipop Chart

这个棒棒糖图本质上还是上面的图,不过就是画的比较棒棒糖了。我觉得直接做这幅图要比上面的图操心少一些

df_new[, n := 1: .N]colors <- alpha("firebrick", 0.7)with( df_new, plot(n, V1, pch = 20, cex = 3, col = colors, ylim = c(, 30), xlab = 'Model', ylab = 'Mileage per Gallon', tcl = , main = 'Lollipop chart of highway mileage', cex.axis = 0.7, xaxt = 'n' ))with(df_new, segments(n, rep(-1, max(n)), n, V1, col = colors, lwd = 3.5))text( x =df_new$n, par("usr")[3] - 0.25, srt = 60, adj = 1, xpd = TRUE, labels = df_new$manufacturer, cex = 0.7)text( x =df_new$n, df_new$V1+2, labels = as.character(round(df_new$V1, 2)), cex = 0.7)box(lty = 1)

Dot Plot

这就是个手动加参考线的散点图。

par(mar = c(5.1, 6.5, 4.1, 0.8))with( df_new, plot( V1, n, pch = 20, cex = 2.2, col = colors, xlim = c(10, 26), xlab = 'Mileage per Gallon', tcl = , main = 'Dot plot of highway mileage', cex.axis = 0.7, yaxt = 'n', ylab = '' ))with(df_new, segments(rep(10,max(n)), n, rep(26,max(n)), n, col = "lightgray", lwd = 1, lty = 3))axis(2, at = df_new$n, las = 2, labels = df_new$manufacturer, cex = 0.7,)mtext("Model", side=2, line=5.5)par(op)

Slope Chart

这种图生态学领域用的少。

df <- fread( "data/gdppercap.csv", skip = 1, col.names = c('continent', '1952', '1957') )df[, `:=`( left = paste(continent, ',', as.character(round(`1952`))), right = paste(continent, ',', as.character(round(`1957`))))]
df[, colors := fifelse(continent == "Asia", "red", "green")]
plot.new()plot.window(x = c(1948, 1961), y = c(500, 12500),)
segments( c(1952, 1957), c(, ), c(1952, 1957), c(12500, 12500), lty = 3, cex = 2, col = 'gray')
with(df, points( rep(1952, 5), `1952`, col = colors, pch = 20, cex = 2.5 ))with(df, points( rep(1957, 5), `1957`, col = colors, pch = 20, cex = 2.5 ))
with(df, segments( rep(1952, 5), `1952`, rep(1957, 5), `1957`, col = colors, lwd = 2.5 ))with(df, text( rep(1952 - 1.5, 5), `1952`, labels = left, col = colors, cex = 0.8 ))
with(df, text( rep(1957 + 1.5, 5), `1957`, labels = right, col = colors, cex = 0.8))
text( par("usr")[1] - 0.25, seq(500, 12500, 2000), xpd = TRUE, labels = as.character('1952', '1957'), cex = 0.9)
text( x = c(1952, 1957), par("usr")[3] - 0.25, xpd = TRUE, labels = as.character('1952', '1957'), cex = 0.9)
mtext("Mean GDP per Capita", side = 2, line = 2)

Dumbbell Plot

这个图也是非常简单的一幅图。

df <- fread('data/health.csv')df[, n:= 1:.N]with(df, plot( pct_2014, n, pch = 20, cex = 2.5, col = alpha('#a3c4dc', 0.7), xlim = c(0.03, 0.26), axes = FALSE, xlab = '', ylab = "Mean GDP per Capita", main = "Dumbbell Chart" ))
with(df, points( pct_2013, n, pch = 20, cex = 2.5, col = alpha('#0e668b', 0.7) ))
with(df, segments( pct_2013, n, pct_2014, n, col = alpha('#a3c4dc', 0.7), lwd = 2 ))

with(df, segments( seq(0.05, 0.25, 0.05), rep(0.1, 4), seq(0.05, 0.25, 0.05), rep(25.5, 4), col = "lightgray", lwd = 2, lty =3 ))axis(1, at = seq(0.05, 0.25, 0.05), labels = c("5%", "10%", "15%", "20%", "25%"), col.ticks = NA)axis(2, col.ticks = NA)box(lwd = 1)

就是这些内容

来源 https://mp.weixin.qq.com/s/1gcj4Dz8McTnU3r4SSQTfQ

相关文章