如何从宽数据帧创建堆叠条形图

2022-03-04 00:00:00 python pandas matplotlib bar-chart

问题描述

您能帮我找出这个代码有什么问题吗?我收到相同的错误消息&&ufunc‘add’不包含具有签名匹配类型的循环(dtype(‘<;U32’)、dtype(‘<;U32’))->;dtype(‘<;U32’)&qot;

import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

data = {'Information Technology' : [217.88,  581.07,  247.28,  200.17], 'Industrials' : [142.33,  324.59,  159.60,  163.68], 'Health Care' : [126.63,  258.50,  135.77,  134.90], 'Communication' : [142.33,  324.59,  159.60,  163.68], 'Financials' : [212.54,  330.87,  165.56,  203.98], 'Consumer Staples' : [561.36,  390.29,  117.85,   93.67], 'Seniority' : [1.0, 2.0, 3.0, 4.0]}

frame = pd.DataFrame(data)

   Information Technology  Industrials  Health Care  Communication  Financials  Consumer Staples  Seniority
0                  217.88       142.33       126.63         142.33      212.54            561.36        1.0
1                  581.07       324.59       258.50         324.59      330.87            390.29        2.0
2                  247.28       159.60       135.77         159.60      165.56            117.85        3.0
3                  200.17       163.68       134.90         163.68      203.98             93.67        4.0

frame.set_index('Seniority')
legend = ['Information Technology','Industrials','Health Care', 'Communication','Financials', 'Consumer Staples']
Information_Technology = frame.iloc[:,0]
Industrials = frame.iloc[:,1]
Health_Care = frame.iloc[:,2]
Communication = frame.iloc[:,3]
Financials = frame.iloc[:,4]
Consumer_Staples = frame.iloc[:,5]
Seniority = frame.iloc[:,6]


graphInformationTechnology = plt.bar(Seniority, Information_Technology ,0.35 ,color = '#FF0000' ,label = "Information Technology") 

graphIndustrials = plt.bar(Seniority, Industrials, 0.35 , bottom = Information_Technology ,color = '#ffff00' ,label ="Industrials")
graphHealthCare = plt.bar(Seniority,Health_Care, 0.35 , bottom = Information_Technology + Health_Care ,color = '#0000FF' ,label ="Health Care")
graphCommunication = plt.bar(Seniority,Health_Care, 0.35 , bottom = Information_Technology + Health_Care + Industrials ,color = '#0000FF' ,label ="Communication")
graphFinancials = plt.bar(Seniority,Health_Care, 0.35 , bottom = Information_Technology + Health_Care + Industrials + Financials ,color = '#0000FF' ,label ="Financials")
graphconsumerstaples = plt.bar(Seniority,Health_Care, 0.35 , bottom = Information_Technology + Health_Care + Financials + Financials + 'consumer staples' ,color = '#0000FF' ,label ="Consumer Staples")


for ticklabel in plt.gca().get_xticklabels():
    ticklabel.set_color('#CC3300') 
for ticklabel in plt.gca().get_yticklabels():
    ticklabel.set_color('#9932CC') 

plt.xlabel('Seniority')
plt.ylabel('Job Count')
plt.legend(legend , loc=(1.0,0)) 
plt.title('Seniority and Job Count', size=20, fontweight="bold")


plt.show()

错误

---------------------------------------------------------------------------
UFuncTypeError                            Traceback (most recent call last)
~AppDataLocalTemp/ipykernel_22704/1999813345.py in <module>
     19 graphCommunication = plt.bar(Seniority,Health_Care, 0.35 , bottom = Information_Technology + Health_Care + Industrials ,color = '#0000FF' ,label ="Communication")
     20 graphFinancials = plt.bar(Seniority,Health_Care, 0.35 , bottom = Information_Technology + Health_Care + Industrials + Financials ,color = '#0000FF' ,label ="Financials")
---> 21 graphconsumerstaples = plt.bar(Seniority,Health_Care, 0.35 , bottom = Information_Technology + Health_Care + Financials + Financials + 'consumer staples' ,color = '#0000FF' ,label ="Consumer Staples")
     22 
     23 

D:anaconda3envspy39libsite-packagespandascoreopscommon.py in new_method(self, other)
     67         other = item_from_zerodim(other)
     68 
---> 69         return method(self, other)
     70 
     71     return new_method

D:anaconda3envspy39libsite-packagespandascorearraylike.py in __add__(self, other)
     90     @unpack_zerodim_and_defer("__add__")
     91     def __add__(self, other):
---> 92         return self._arith_method(other, operator.add)
     93 
     94     @unpack_zerodim_and_defer("__radd__")

D:anaconda3envspy39libsite-packagespandascoreseries.py in _arith_method(self, other, op)
   5524 
   5525         with np.errstate(all="ignore"):
-> 5526             result = ops.arithmetic_op(lvalues, rvalues, op)
   5527 
   5528         return self._construct_result(result, name=res_name)

D:anaconda3envspy39libsite-packagespandascoreopsarray_ops.py in arithmetic_op(left, right, op)
    222         _bool_arith_check(op, left, right)
    223 
--> 224         res_values = _na_arithmetic_op(left, right, op)
    225 
    226     return res_values

D:anaconda3envspy39libsite-packagespandascoreopsarray_ops.py in _na_arithmetic_op(left, right, op, is_cmp)
    164 
    165     try:
--> 166         result = func(left, right)
    167     except TypeError:
    168         if is_object_dtype(left) or is_object_dtype(right) and not is_cmp:

UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('float64'), dtype('<U16')) -> None

解决方案

pandas 堆叠条形图-正确方式

  • pandas.DataFrame.plotkind='bar'stacked=True一起使用
  • 这将实现从26行代码减少到9行
  • 对于自定义颜色,请为每个部分指定颜色列表,并在ax = frame.plot(..., color=color)内部添加color=color
    • color = ['red', 'yellow', 'blue', 'orange', 'green', 'purple']
  • 测试于python 3.8.12pandas 1.3.4matplotlib 3.4.3
import pandas as pd
import matplotlib.pyplot as plt

# create the DataFrame using data from the OP
frame = pd.DataFrame(data)
frame.set_index('Seniority', inplace=True)

ax = frame.plot(kind='bar', stacked='True', figsize=(8, 6), rot=0, ylabel='Job Count')
ax.set_title('Seniority and Job Count', size=20, fontweight="bold")
ax.legend(loc=(1.01, 0))

ax.tick_params(axis='x', colors='red', which='both')
ax.tick_params(axis='y', colors='purple', which='both')

plt.show()

修复现有代码

  • 绘图创建有许多问题
    1. 为许多地块指定的y不正确
    2. bottom没有根据上一节的y正确创建
      • Financials + 'consumer staples'是导致错误的直接原因。'Consumer_Staples'不应添加到graphconsumerstaples绘图的bottom
# change setting the index because it's not assigned in the OP
frame.set_index('Seniority', inplace=True)

# fix the plotting section and everything else works fine
graphInformationTechnology = plt.bar(Seniority, Information_Technology ,0.35 ,color = '#FF0000' ,label = "Information Technology") 
graphIndustrials = plt.bar(Seniority, Industrials, 0.35 , bottom = Information_Technology ,color = '#ffff00' ,label ="Industrials")
graphHealthCare = plt.bar(Seniority,Health_Care, 0.35 , bottom = Information_Technology + Industrials ,color = '#0000FF' ,label ="Health Care")
graphCommunication = plt.bar(Seniority, Communication, 0.35 , bottom = Information_Technology + Industrials + Health_Care ,color = 'orange' ,label ="Communication")
graphFinancials = plt.bar(Seniority, Financials, 0.35 , bottom = Information_Technology + Industrials + Health_Care + Communication ,color = 'green' ,label ="Financials")
graphconsumerstaples = plt.bar(Seniority, Consumer_Staples, 0.35 , bottom = Information_Technology + Industrials + Health_Care + Communication + Financials ,color = 'purple' ,label ="Consumer Staples")

相关文章