如何在写入EXCEL时删除透视DF中的NULL/空列

2022-03-03 00:00:00 python-3.x pandas dataframe openpyxl pivot-table

问题描述

我正尝试将下表的透视视图写入Excel。我正在读取现有工作簿(input.xlsx)的索引表，并在excel DB(db1.xlsx)中筛选ID，并尝试在input.xlsx中打印那些动态ID的透视视图。

输入工作簿索引表：-

db1.xlsx：-

ID  NAME SEGMENT    LEVEL   PARAMETER   VALUE   REMARKS
11  NAME2   set1    L       ball    32000   GREEN
11  NAME2   set1    M       ball    30000   GREEN
11  NAME2   set1    H       ball    29000   GREEN
11  NAME2   set1    L       egg     68      GREEN
11  NAME2   set1    M       egg     67      GREEN
11  NAME2   set1    H       egg     62      GREEN
11  NAME2   set1    L       tin     67667   GREEN
11  NAME2   set1    M       tin     60852   GREEN
11  NAME2   set1    H       tin     50434   GREEN
11  NAME2   set1    L       rat     6       GREEN
11  NAME2   set1    M       rat     5       GREEN
11  NAME2   set1    H       rat     4       GREEN
11  NAME2   set1    L       pen     3       GREEN
11  NAME2   set1    M       pen     2       GREEN
11  NAME2   set1    H       pen     1       GREEN
11  NAME2   set2    L       ball    2000    GREEN
11  NAME2   set2    M       ball    2000    GREEN
11  NAME2   set2    H       ball    2000    GREEN
11  NAME2   set2    L       egg     67      GREEN
11  NAME2   set2    M       egg     67      GREEN
11  NAME2   set2    H       egg     66      GREEN
11  NAME2   set2    L       tin     11078   GREEN
11  NAME2   set2    M       tin     10633   GREEN
11  NAME2   set2    H       tin     10271   GREEN
11  NAME2   set2    L       rat     5       GREEN
11  NAME2   set2    M       rat     4       GREEN
11  NAME2   set2    H       rat     3       GREEN
11  NAME2   set2    L       pen     4       GREEN
11  NAME2   set2    M       pen     3       GREEN
11  NAME2   set2    H       pen     2       GREEN
11  NAME2   set1    L       ball    32000   TSS
11  NAME2   set1    M       ball    30000   TSS
11  NAME2   set1    H       ball    29000   TSS
11  NAME2   set1    L       egg     68      TSS
11  NAME2   set1    M       egg     67      TSS
11  NAME2   set1    H       egg     62      TSS
11  NAME2   set1    L       tin     100     TSS
11  NAME2   set1    M       tin     100     TSS
11  NAME2   set1    H       tin     100     TSS
11  NAME2   set1    L       rat     6       TSS
11  NAME2   set1    M       rat     5       TSS
11  NAME2   set1    H       rat     4       TSS
11  NAME2   set1    L       pen     1       TSS
11  NAME2   set1    M       pen     1       TSS
11  NAME2   set1    H       pen     1       TSS
11  NAME2   set2    L       ball    2000    TSS
11  NAME2   set2    M       ball    2000    TSS
11  NAME2   set2    H       ball    2000    TSS
11  NAME2   set2    L       egg     67      TSS
11  NAME2   set2    M       egg     67      TSS
11  NAME2   set2    H       egg     66      TSS
11  NAME2   set2    L       tin     100     TSS
11  NAME2   set2    M       tin     100     TSS
11  NAME2   set2    H       tin     100     TSS
11  NAME2   set2    L       rat     5       TSS
11  NAME2   set2    M       rat     4       TSS
11  NAME2   set2    H       rat     3       TSS
11  NAME2   set2    L       pen     1       TSS
11  NAME2   set2    M       pen     1       TSS
11  NAME2   set2    H       pen     1       TSS
11  NAME2   set1    NOT APPLICABLE  max 800 GREEN
11  NAME2   set2    NOT APPLICABLE  max 300 GREEN
11  NAME2   set1    NOT APPLICABLE  max 1300 TSS
11  NAME2   set2    NOT APPLICABLE  max 500 TSS

我为获取透视视图而编写的代码：-

from openpyxl import load_workbook
from openpyxl.styles import Alignment
import pandas as pd,os,sys


os.chdir(r'')


def fetchDatabaseRowsAsDataFrame(country_id, remark, database_table_name):
    database_table_df = pd.read_excel(database_table_name)
    
    return database_table_df.query('ID == ' + str(country_id) + ' and REMARKS == "' + str(remark) + '"') 

wb = load_workbook('input.xlsx')
try:
    index_sheet = wb['Index']
    input_df = pd.read_excel('input.xlsx', sheet_name= 'Index')
    for ind in input_df.index:
        wb.create_sheet(str(input_df['ID'][ind]))
        current_sheet = wb[str(input_df['ID'][ind])]
        current_sheet.cell(1,1).value = "Index sheet"
        current_sheet.cell(1,1).hyperlink = "input.xlsx#Index!A1"
        current_sheet.cell(1,1).style = "Hyperlink"
        heading_row = 3
        start_row = heading_row + 2
        
        remarks_array = ['REMARKS1', 'REMARKS2', 'REMARKS3']
        
        db_array = ['db1.xlsx', 'db1.xlsx', 'db1.xlsx']
        empty = True
        
        for i in range(len(remarks_array)):
            
            db_rows = fetchDatabaseRowsAsDataFrame(input_df['ID'][ind], input_df[remarks_array[i]][ind], db_array[i])
            
            if (db_rows is not None) and  (len(db_rows) > 0):
                empty = False
                current_sheet.cell(heading_row-1, 3).value = remarks_array[i] + "-" + input_df[remarks_array[i]][ind] 
                for ind_db_rows in db_rows.index:
                    if pd.isnull(db_rows['LEVEL'][ind_db_rows]) or db_rows['LEVEL'][ind_db_rows] == 'NOT APPLICABLE':
                        db_rows.at[ind_db_rows, 'LEVEL'] = 'NA'
                
                pivot_db_rows = pd.pivot_table(db_rows, values = 'VALUE', index=['SEGMENT','PARAMETER'], columns = 'LEVEL').reset_index()
                pivot_columns = list(pivot_db_rows)[2:]
                pivot_segments = pivot_db_rows['SEGMENT'].unique()
                pivot_parameters = pivot_db_rows['PARAMETER'].unique()
                pivot_parameter_column_index = {}
                initial_pivot_segment_column_index = 4
                paramater_column_map = {}
                
                for pivot_parameter_index in range(len(pivot_parameters)):
                    current_sheet.merge_cells(start_row=heading_row,start_column=initial_pivot_segment_column_index,end_row=heading_row,end_column=initial_pivot_segment_column_index+len(pivot_columns)-1)
                    
                    current_sheet.cell(heading_row,initial_pivot_segment_column_index).value = pivot_parameters[pivot_parameter_index]
                    current_sheet.cell(heading_row,initial_pivot_segment_column_index).alignment = Alignment(horizontal='center')
                    pivot_parameter_column_index[pivot_parameters[pivot_parameter_index]] = initial_pivot_segment_column_index
                    for pivot_column_index in range(len(pivot_columns)):
                        current_sheet.cell(heading_row+1,initial_pivot_segment_column_index+pivot_column_index).value = pivot_columns[pivot_column_index]
                    
                    initial_pivot_segment_column_index = initial_pivot_segment_column_index+len(pivot_columns)
                
                current_sheet.cell(heading_row+1,3).value = 'SEGMENT'
                
                for pivot_segment_index in range(len(pivot_segments)):
                    current_sheet.cell(heading_row+2+pivot_segment_index,3).value=pivot_segments[pivot_segment_index]
                
                pivot_dictionary = {}
                for ind_pivot_db_rows in pivot_db_rows.index:
                    if pivot_db_rows['SEGMENT'][ind_pivot_db_rows] not in pivot_dictionary:
                        pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]] = {}
                    if pivot_db_rows['PARAMETER'][ind_pivot_db_rows] not in pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]]:
                        pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]][pivot_db_rows['PARAMETER'][ind_pivot_db_rows]] = {}
                    
                    for pivot_column_index in range(len(pivot_columns)):
                        pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]][pivot_db_rows['PARAMETER'][ind_pivot_db_rows]][pivot_columns[pivot_column_index]] = pivot_db_rows[pivot_columns[pivot_column_index]][ind_pivot_db_rows]
                
                
                for pivot_segment_index in range(len(pivot_segments)):
                    for pivot_parameter_index in range(len(pivot_parameters)):
                        for pivot_column_index in range(len(pivot_columns)):
                            if pivot_segments[pivot_segment_index] in pivot_dictionary and pivot_parameters[pivot_parameter_index] in pivot_dictionary[pivot_segments[pivot_segment_index]] and pivot_columns[pivot_column_index] in pivot_dictionary[pivot_segments[pivot_segment_index]][pivot_parameters[pivot_parameter_index]]:
                                current_sheet.cell(heading_row+2+pivot_segment_index, pivot_parameter_column_index[pivot_parameters[pivot_parameter_index]] + pivot_column_index).value = pivot_dictionary[pivot_segments[pivot_segment_index]][pivot_parameters[pivot_parameter_index]][pivot_columns[pivot_column_index]]
                
                heading_row = heading_row + len(pivot_segments) + 4
        if empty:
            wb.remove(current_sheet)
                    
                    
        
    wb.save('input.xlsx')
    wb.close()

except Exception as e:
    print("Exception occured "+str(e))
    wb.save('input.xlsx')
    wb.close()

输出：-

预期产量：-

我无法删除空列。我还希望具有NA级别的参数作为透视表视图表中的第一列出现，如预期输出中所示。

适用的ID。

解决方案

尝试：

df = df.replace("NOT APPLICABLE", "")
x = df[df.REMARKS.eq("GREEN")].pivot("SEGMENT", ["PARAMETER", "LEVEL"], "VALUE")

x = x.reindex(
    pd.MultiIndex.from_tuples(
        sorted(x.columns, key=lambda k: (k[1] != "", k[0], k[1]))
    ),
    axis=1,
)
print(x)

打印：

         max   ball               egg         pen       rat          tin              
                  H      L      M   H   L   M   H  L  M   H  L  M      H      L      M
SEGMENT                                                                               
set1     800  29000  32000  30000  62  68  67   1  3  2   4  6  5  50434  67667  60852
set2     300   2000   2000   2000  66  67  67   2  4  3   3  5  4  10271  11078  10633

相关文章