pandas 和Matplotlib-需要按国家接种疫苗的百分比,并使用下拉菜单绘制特定国家首选疫苗的条形图
问题描述
这是数据集。
location date vaccine total_vaccinations
0 Austria 2021-01-08 Johnson&Johnson 0
1 Austria 2021-01-08 Moderna 0
2 Austria 2021-01-08 Oxford/AstraZeneca 0
3 Austria 2021-01-08 Pfizer/BioNTech 30938
4 Austria 2021-01-15 Johnson&Johnson 0
... ... ... ... ...
8633 Uruguay 2021-07-05 Pfizer/BioNTech 1024793
8634 Uruguay 2021-07-05 Sinovac 3045997
8635 Uruguay 2021-07-06 Oxford/AstraZeneca 43245
8636 Uruguay 2021-07-06 Pfizer/BioNTech 1038942
8637 Uruguay 2021-07-06 Sinovac 3079853
8638 rows × 4 columns
我在Jupyter笔记本上工作。
- 需要按国家/地区列出的疫苗接种百分比
- 使用下拉菜单(交互式绘图小工具)在特定国家/地区绘制首选疫苗的条形图
解决方案
- 您可以从OWID 获取包含人口数据的COVID数据
- 您似乎就是在这里按制造商获取数据的
- 数据可以与整体COVID数据合并,这样您记录的所有属性都可用
- 已图案化使用,因此隐藏/显示痕迹是交互式的
- NB按制造商发布数据的国家并不多
import requests, io
import pandas as pd
# get data by manufactuerer
dfm = pd.read_csv(io.StringIO(
requests.get("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations-by-manufacturer.csv").text))
# get all COVID data
dfall = pd.read_csv(io.StringIO(
requests.get("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv").text))
# join two datasets together and make manufactuerer data columns. NB not all countries publish this data...
dfv = (
dfall.set_index(["location", "date"])
.join(
dfm.set_index(["location", "date", "vaccine"])
.unstack("vaccine")
.droplevel(0, 1),
how="inner",
)
.reset_index()
)
# filter to latest data only
dfplot = (
dfv.sort_values(["iso_code", "date"])
.groupby("iso_code", as_index=False)
.last()
.sort_values("people_fully_vaccinated_per_hundred", ascending=False)
)
import plotly.express as px
import plotly.graph_objects as go
# use plotly so it's interactive. rebase vaccines given by population
fig = px.bar(
dfplot.assign(
**{c: dfplot[c] / dfplot["population"] for c in dfm["vaccine"].unique()}
),
x="location",
y=dfm["vaccine"].unique(),
)
# add a line of people fully vaccinated
fig.add_trace(
go.Scatter(
x=dfplot["location"],
y=dfplot["people_fully_vaccinated_per_hundred"] / 100,
name="Fully vaccinated",
mode="lines",
line={"color": "purple", "width": 4},
)
)
已更新
- 原要求规定接种人数百分比为必填项。已根据评论将其删除
- 要求确实已重新声明为交互式仪表板,因此使用了DASH
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
import dash_table
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output, State
import requests, io
import pandas as pd
import plotly.express as px
# get data by manufactuerer
dfm = pd.read_csv(io.StringIO(
requests.get("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations-by-manufacturer.csv").text))
def buildTab(col="location"):
dfc = pd.DataFrame({col: dfm[col].unique()})
return dash_table.DataTable(
id=col,
columns=[{"name": c, "id": c} for c in dfc.columns],
data=dfc.to_dict("records"),
row_selectable="multi",
style_header={"fontWeight": "bold"},
style_as_list_view=True,
css=[{"selector": ".dash-spreadsheet tr", "rule": "height: 5px;"}],
)
# Build App
app = JupyterDash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.layout = html.Div(
[
dbc.Row(
[
dbc.Col(
buildTab(col="location"),
width=3,
style={"height": "20vh", "overflow-y": "auto"},
),
dbc.Col(
buildTab(col="vaccine"),
width=3,
style={"height": "20vh", "overflow-y": "auto"},
),
],
),
html.Div(id="graphs"),
],
style={
"font-family": "Arial",
"font-size": "0.9em",
},
)
@app.callback(
Output(component_id="graphs", component_property="children"),
Input("location", "selected_rows"),
Input("vaccine", "selected_rows"),
State("location", "data"),
State("vaccine", "data"),
)
def updateGraphs(selected_location, selected_vaccine, location, vaccine):
global dfm
if selected_location and selected_vaccine:
d = dfm.merge(
pd.DataFrame(location).iloc[selected_location], on="location", how="inner"
).merge(pd.DataFrame(vaccine).iloc[selected_vaccine], on="vaccine", how="inner")
return dcc.Graph(
figure=px.bar(
d.sort_values(["location", "vaccine", "date"])
.groupby(["location", "vaccine"], as_index=False)
.last(),
x="location",
y="total_vaccinations",
color="vaccine",
)
)
else:
return None
# Run app and display result inline in the notebook
app.run_server(mode="inline")
相关文章