基于 R base 语法的 50 幅图之二
Deviation
Diverging Bars
一种非常有意思,并且非常直观的图形,本质上有点类似直方图。
df <- fread("data/mtcars.csv")
x <- df[, mpg]
df[, mpg_z := (x - mean(x)) / sd(x)]
df[, colors := fifelse(mpg_z < , "red", "green", ), keyby = mpg_z]
df[, n := 1:.N]
op <- par()
par(mar = c(4, 4.5, 2, 0.3), mgp = c(3, 0.1, ))
plot.new()
plot.window(xlim = c(-2, 2.5),
ylim = c(, 32),)
box()
grid(32, 32)
abline(v = ,
col = "gray",
lty = 3,
lwd = 4)
with(df,
segments(
x0 = rep(, max(n)),
y0 = n,
x1 = mpg_z,
y1 = n,
col = colors,
lwd = 4
))
axis(
1,
lwd = 1,
las = ,
col = 'gray',
tick = FALSE,
cex.axis = 0.5
)
axis(
2,
lwd = 1,
las = 1,
at = df$n,
labels = df$cars,
col = 'gray',
tick = FALSE,
cex.axis = 0.5
)
mtext(side = 1, 'Mileage', line = 1)
mtext(side = 2, 'Model', line = 3.4)
title('Diverging bars of car mileage')
par(op)
Diverging Texts
和上幅图相比,就是添加了点的数值。
df_green <- df[mpg_z >= ]
df_red <- df[mpg_z < ]
op <- par()
par(mar = c(4, 4.5, 2, 0.3), mgp = c(3, 0.1, ))
plot.new()
plot.window(xlim = c(-2, 2.5),
ylim = c(, 32), )
box()
grid(32, 32)
abline(v = ,
col = "gray",
lty = 3,
lwd = 4)
with(df,
segments(
x0 = rep(, max(n)),
y0 = n,
x1 = mpg_z,
y1 = n,
col = colors,
lwd = 4
))
with(df_green,
text(
x = mpg_z + 0.2,
y = n,
labels = as.character(round(mpg_z, 2)),
col = "green",
cex = 0.6
))
with(df_red,
text(
x = mpg_z - 0.2,
y = n,
labels = as.character(round(mpg_z, 2)),
col = "red",
cex = 0.6
))
axis(
1,
lwd = 1,
las = ,
col = 'gray',
tick = FALSE,
cex.axis = 0.5
)
axis(
2,
lwd = 1,
las = 1,
at = df$n,
labels = df$cars,
col = 'gray',
tick = FALSE,
cex.axis = 0.5
)
mtext(side = 1, 'Mileage', line = 1)
mtext(side = 2, 'Model', line = 3.4)
title('Diverging bars of car mileage')
par(op)
作图结束后才发现原来线条的颜色作者改了黑色,这倒是无关紧要,我觉得有颜色的更清晰。
Diverging Dot Plot
这幅图实在太好做了,几乎不费功夫。
op <- par()
par(mar = c(4, 4.5, 2, 0.3), mgp = c(3, 0.1, ))
plot.new()
plot.window(xlim = c(-2, 2.5),
ylim = c(, 32), )
box()
grid(32, 32)
points(
x = df_green$mpg_z,
y = df_green$n,
col = 'green',
pch = 20,
cex = 3.5
)
points(
x = df_red$mpg_z,
y = df_red$n,
col = 'red',
pch = 20,
cex = 3.5
)
with(df,
text(
x = mpg_z,
y = n,
labels = as.character(round(mpg_z, 1)),
col = "white",
cex = 0.5
))
axis(
1,
lwd = 1,
las = ,
col = 'gray',
tick = FALSE,
cex.axis = 0.5
)
axis(
2,
lwd = 1,
las = 1,
at = df$n,
labels = df$cars,
col = 'gray',
tick = FALSE,
cex.axis = 0.5
)
mtext(side = 1, 'Mileage', line = 1)
mtext(side = 2, 'Model', line = 3.4)
title('Diverging bars of car mileage')
par(op)
Diverging Lollipop Chart with Markers
这个棒棒糖图也是上面的变种。稍微修改下代码即可。
op <- par()
par(mar = c(4, 4.5, 2, 0.3), mgp = c(3, 0.1, ))
df <- fread("data/mtcars.csv")
x <- df[, mpg]
df[, mpg_z := (x - mean(x)) / sd(x)]
df[, colors := alpha('black', 0.8), keyby = mpg_z]
df[, n := 1:.N]
df[cars == 'Fiat X1-9']$colors = alpha('darkorange', 0.8)
df[, size := 2.8]
df[cars == 'Fiat X1-9']$size = 6
ytop <- df[cars == 'Merc 280C']$n
ybot <- df[cars == 'Merc 450SE']$n
plot.new()
plot.window(xlim = c(-2, 2.5),
ylim = c(, 32), )
box()
grid(32, 32)
with(df,
points(
x = mpg_z,
y = n,
col = colors,
pch = 20,
cex = size)
)
with(df,
segments(
x0 = rep(, max(n)),
y0 = n,
x1 = mpg_z,
y1 = n,
col = colors,
lwd = 4
))
with(df,
points(
x = mpg_z,
y = n,
col = colors,
pch = 20,
cex = size)
)
rect(-2.1, 0.5, -1.5, 2.6, col = alpha("red", 0.2))
rect(1.5, 27.7, 2.3, 32.9, col = alpha("green", 0.2))
rect(0.5, ybot+0.5, 2, ytop-0.5, col = 'darkred')
lines(x = c(, ), y = c(ybot-0.5, ytop + 0.5), col = "darkblue")
lines(x = c(-0.3, ), y = c(ybot-0.5, ybot-0.5), col = "darkblue")
lines(x = c(-0.3, ), y = c(ytop + 0.5, ytop + 0.5), col = "darkblue")
lines(x = c(, 0.5), y = c(ytop-1, ytop -1), col = "darkblue")
text(1.3, ytop-1, "Mercedes Models", col = 'white', cex = 0.65)
axis(
1,
lwd = 1,
las = ,
col = 'gray',
tick = FALSE,
cex.axis = 0.5
)
axis(
2,
lwd = 1,
las = 1,
at = df$n,
labels = df$cars,
col = 'gray',
tick = FALSE,
cex.axis = 0.5
)
mtext(side = 1, 'Mileage', line = 1)
mtext(side = 2, 'Model', line = 3.4)
title('Diverging bars of car mileage')
par(op)
Area Chart
这幅图我放弃,没有找到 matplotlib.pyplot.fill_between
的简单的方法,看了一下 python 的源代码,这个挺长的,有空再看看怎么实现,ggplot2
无对应的功能,但可以通过 https://stackoverflow.com/questions/54687321/fill-area-between-lines-using-ggplot-in-r 的方法实现。不过实在要用,又不想太费劲,那么 reticulate
或者直接 matplotlib
解决。
Ranking
Ordered Bar Chart
就是将条形图按大小排序,有意思的是图形边缘的上色。R base 里无直接可以使用的方法,可以利用 fig
和画两个图填充一下背景色,来实现,不过我不准备大费周章的来搞点颜色。在图例设置 alpha 透明度上色也没问题。
另外就是关于坐标轴的旋转,这个关闭 barplot 确实不好操作,网上找了一个方法 https://stackoverflow.com/questions/10286473/rotating-x-axis-labels-in-r-for-barplot, 就是将条形图的间隔设置为 1,然后在设置一个 endpoint,也就是条形图的宽度来实现,其实让我来操作,我会直接让 las = 2 即可,我不觉得垂直的丑。
op <- par()
df <- data.table(ggplot2::mpg)
df_new <- df[, mean(cty), by = manufacturer]
df_new <- df_new[order(V1)]
end_point = 0.5 + nrow(df_new) + nrow(df_new) - 1
barplot(
df_new$V1,
col = alpha("firebrick", 0.7),
ylim = c(, 30),
ylab = 'Mileage per Gallon',
tcl = ,
space = 1,
main = 'Bar chart of highway per mileage',
cex.axis = 0.7
)
text(
seq(1.5, end_point, by = 2),
par("usr")[3] - 0.25,
srt = 60,
adj = 1,
xpd = TRUE,
labels = df_new$manufacturer,
cex = 0.7
)
text(
seq(1.5, end_point, by = 2),
df_new$V1+1,
labels = as.character(round(df_new$V1, 2)),
cex = 0.7
)
box(lty = 1)
Lollipop Chart
这个棒棒糖图本质上还是上面的图,不过就是画的比较棒棒糖了。我觉得直接做这幅图要比上面的图操心少一些
df_new[, n := 1: .N]
colors <- alpha("firebrick", 0.7)
with(
df_new,
plot(n,
V1,
pch = 20,
cex = 3,
col = colors,
ylim = c(, 30),
xlab = 'Model',
ylab = 'Mileage per Gallon',
tcl = ,
main = 'Lollipop chart of highway mileage',
cex.axis = 0.7,
xaxt = 'n'
)
)
with(df_new, segments(n, rep(-1, max(n)), n, V1, col = colors, lwd = 3.5))
text(
x =df_new$n,
par("usr")[3] - 0.25,
srt = 60,
adj = 1,
xpd = TRUE,
labels = df_new$manufacturer,
cex = 0.7
)
text(
x =df_new$n,
df_new$V1+2,
labels = as.character(round(df_new$V1, 2)),
cex = 0.7
)
box(lty = 1)
Dot Plot
这就是个手动加参考线的散点图。
par(mar = c(5.1, 6.5, 4.1, 0.8))
with(
df_new,
plot(
V1,
n,
pch = 20,
cex = 2.2,
col = colors,
xlim = c(10, 26),
xlab = 'Mileage per Gallon',
tcl = ,
main = 'Dot plot of highway mileage',
cex.axis = 0.7,
yaxt = 'n',
ylab = ''
)
)
with(df_new, segments(rep(10,max(n)), n, rep(26,max(n)), n, col = "lightgray", lwd = 1, lty = 3))
axis(2,
at = df_new$n,
las = 2,
labels = df_new$manufacturer,
cex = 0.7,
)
mtext("Model", side=2, line=5.5)
par(op)
Slope Chart
这种图生态学领域用的少。
df <-
fread(
"data/gdppercap.csv",
skip = 1,
col.names = c('continent', '1952', '1957')
)
df[, `:=`(
left = paste(continent, ',', as.character(round(`1952`))),
right = paste(continent, ',', as.character(round(`1957`)))
)]
df[, colors := fifelse(continent == "Asia", "red", "green")]
plot.new()
plot.window(x = c(1948, 1961),
y = c(500, 12500),)
segments(
c(1952, 1957),
c(, ),
c(1952, 1957),
c(12500, 12500),
lty = 3,
cex = 2,
col = 'gray'
)
with(df,
points(
rep(1952, 5),
`1952`,
col = colors,
pch = 20,
cex = 2.5
))
with(df,
points(
rep(1957, 5),
`1957`,
col = colors,
pch = 20,
cex = 2.5
))
with(df,
segments(
rep(1952, 5),
`1952`,
rep(1957, 5),
`1957`,
col = colors,
lwd = 2.5
))
with(df,
text(
rep(1952 - 1.5, 5),
`1952`,
labels = left,
col = colors,
cex = 0.8
))
with(df, text(
rep(1957 + 1.5, 5),
`1957`,
labels = right,
col = colors,
cex = 0.8
))
text(
par("usr")[1] - 0.25,
seq(500, 12500, 2000),
xpd = TRUE,
labels = as.character('1952', '1957'),
cex = 0.9
)
text(
x = c(1952, 1957),
par("usr")[3] - 0.25,
xpd = TRUE,
labels = as.character('1952', '1957'),
cex = 0.9
)
mtext("Mean GDP per Capita", side = 2, line = 2)
Dumbbell Plot
这个图也是非常简单的一幅图。
df <- fread('data/health.csv')
df[, n:= 1:.N]
with(df,
plot(
pct_2014,
n,
pch = 20,
cex = 2.5,
col = alpha('#a3c4dc', 0.7),
xlim = c(0.03, 0.26),
axes = FALSE,
xlab = '',
ylab = "Mean GDP per Capita",
main = "Dumbbell Chart"
))
with(df,
points(
pct_2013,
n,
pch = 20,
cex = 2.5,
col = alpha('#0e668b', 0.7)
))
with(df,
segments(
pct_2013,
n,
pct_2014,
n,
col = alpha('#a3c4dc', 0.7),
lwd = 2
))
with(df,
segments(
seq(0.05, 0.25, 0.05),
rep(0.1, 4),
seq(0.05, 0.25, 0.05),
rep(25.5, 4),
col = "lightgray",
lwd = 2,
lty =3
))
axis(1, at = seq(0.05, 0.25, 0.05), labels = c("5%", "10%", "15%", "20%", "25%"), col.ticks = NA)
axis(2, col.ticks = NA)
box(lwd = 1)
就是这些内容
来源 https://mp.weixin.qq.com/s/1gcj4Dz8McTnU3r4SSQTfQ
相关文章