是否有一个python函数可以对多个索引级别进行颜色/样式设置?

问题描述

我定义了一个函数,通过CountPercentage %为每列获取value_counts,如下所示:

import pandas as pd
import seaborn as sns
import numpy as np

from IPython.display import display


df = sns.load_dataset("diamonds")

def valueCountDF(df):
    
    object_cols = list(df.select_dtypes(exclude=np.number).columns)
    numeric_cols = list(df.select_dtypes(include=np.number).columns)

    c = df[object_cols].apply(lambda x: x.value_counts(dropna=False)).T.stack().astype(int)

    p = (df[object_cols].apply(lambda x: x.value_counts(normalize=True,
                                                       dropna=False)).T.stack() * 100).round(2)

    cp = pd.concat([c,p], axis=1, keys=["Count", "Percentage %"])
    display(cp)

valueCountDF(df)

此代码输出:

                   Count  Percentage %
cut     Fair        1610          2.98
        Good        4906          9.10
        Ideal      21551         39.95
        Premium    13791         25.57
        Very Good  12082         22.40
color   D           6775         12.56
        E           9797         18.16
        F           9542         17.69
        G          11292         20.93
        H           8304         15.39
        I           5422         10.05
        J           2808          5.21
clarity I1           741          1.37
        IF          1790          3.32
        SI1        13065         24.22
        SI2         9194         17.04
        VS1         8171         15.15
        VS2        12258         22.73
        VVS1        3655          6.78
        VVS2        5066          9.39

白色背景的Jupyter Notebooks中的大数据集很难理解上述数据。

所以我想尝试使用每个行索引的背景色设置pandas dataframe styler样式的DataFrame。

# Uses the full color range
display(cp.style.background_gradient(cmap='viridis'))

上面的代码给出了不包括DF的索引的BACKGROUND_GRADER。我需要为每个行索引(cut, color, clarity)及其组着色。

准确地说,我想区分颜色,例如一种颜色中的"剪切"和"剪切"组,以及一种颜色中的"颜色"和"颜色组"。有办法做到这一点吗?

更新:

感谢@r-beginners

使用以下css样式器

table_css = [
    {
        "selector":"th.row_heading.level0",
        "props":[
            ("background-color", "darkseagreen"),
            ("color", "white")
        ]
    }
]
def valueCountDF(df):
    
    object_cols = list(df.select_dtypes(exclude=np.number).columns)
    numeric_cols = list(df.select_dtypes(include=np.number).columns)

    c = df[object_cols].apply(lambda x: x.value_counts(dropna=False)).T.stack().astype(int)

    p = (df[object_cols].apply(lambda x: x.value_counts(normalize=True,
                                                   dropna=False)).T.stack() * 100).round(2)

    cp = pd.concat([c,p], axis=1, keys=["Count", "Percentage %"])
    #cp.index.names = ['C3','grade']
    #print(cp.style.render())
    style = cp.style.background_gradient(cmap='viridis')
    style = style.set_table_styles(table_css)
    return style

valueCountDF(df)

能够仅用一种颜色为Level 0索引上色,如下所示。


解决方案

加载数据

import pandas as pd
import seaborn as sns
import numpy as np
from matplotlib import colors

df = sns.load_dataset("diamonds")

重置索引并重命名轴。更新函数中的代码valueCountDF

def valueCountDF(df):
    
    object_cols = list(df.select_dtypes(exclude=np.number).columns)
    numeric_cols = list(df.select_dtypes(include=np.number).columns)

    c = df[object_cols].apply(lambda x: x.value_counts(dropna=False)).T.stack().astype(int)

    p = (df[object_cols].apply(lambda x: x.value_counts(normalize=True,
                                                       dropna=False)).T.stack() * 100).round(2)

    cp = pd.concat([c,p], axis=1, keys=["Count", "Percentage %"])

    # Reset index and name the axis
    cp = cp.rename_axis(['Variable','Class']).reset_index()
    cp['Variable'] = np.where(cp['Variable'].duplicated(),'',cp['Variable'])

    return cp

使用np.broadcast_toradd()

def colr(x):
    y = x.assign(k=x['Variable'].ne("").cumsum())
    d = dict(enumerate(colors.cnames))
    y[:] = np.broadcast_to(y['k'].map(d).radd('background-color:').to_numpy()[:,None]
                          ,y.shape)
    return y.drop("k",1)

应用样式

val_count = valueCountDF(df)
val_count.style.apply(colr,axis=None).format({'Percentage %': '{:.1f}'})

相关文章