如何在写入EXCEL时删除透视DF中的NULL/空列
问题描述
我正尝试将下表的透视视图写入Excel。 我正在读取现有工作簿(input.xlsx)的索引表,并在excel DB(db1.xlsx)中筛选ID,并尝试在input.xlsx中打印那些动态ID的透视视图。
输入工作簿索引表:-
db1.xlsx:-
ID NAME SEGMENT LEVEL PARAMETER VALUE REMARKS
11 NAME2 set1 L ball 32000 GREEN
11 NAME2 set1 M ball 30000 GREEN
11 NAME2 set1 H ball 29000 GREEN
11 NAME2 set1 L egg 68 GREEN
11 NAME2 set1 M egg 67 GREEN
11 NAME2 set1 H egg 62 GREEN
11 NAME2 set1 L tin 67667 GREEN
11 NAME2 set1 M tin 60852 GREEN
11 NAME2 set1 H tin 50434 GREEN
11 NAME2 set1 L rat 6 GREEN
11 NAME2 set1 M rat 5 GREEN
11 NAME2 set1 H rat 4 GREEN
11 NAME2 set1 L pen 3 GREEN
11 NAME2 set1 M pen 2 GREEN
11 NAME2 set1 H pen 1 GREEN
11 NAME2 set2 L ball 2000 GREEN
11 NAME2 set2 M ball 2000 GREEN
11 NAME2 set2 H ball 2000 GREEN
11 NAME2 set2 L egg 67 GREEN
11 NAME2 set2 M egg 67 GREEN
11 NAME2 set2 H egg 66 GREEN
11 NAME2 set2 L tin 11078 GREEN
11 NAME2 set2 M tin 10633 GREEN
11 NAME2 set2 H tin 10271 GREEN
11 NAME2 set2 L rat 5 GREEN
11 NAME2 set2 M rat 4 GREEN
11 NAME2 set2 H rat 3 GREEN
11 NAME2 set2 L pen 4 GREEN
11 NAME2 set2 M pen 3 GREEN
11 NAME2 set2 H pen 2 GREEN
11 NAME2 set1 L ball 32000 TSS
11 NAME2 set1 M ball 30000 TSS
11 NAME2 set1 H ball 29000 TSS
11 NAME2 set1 L egg 68 TSS
11 NAME2 set1 M egg 67 TSS
11 NAME2 set1 H egg 62 TSS
11 NAME2 set1 L tin 100 TSS
11 NAME2 set1 M tin 100 TSS
11 NAME2 set1 H tin 100 TSS
11 NAME2 set1 L rat 6 TSS
11 NAME2 set1 M rat 5 TSS
11 NAME2 set1 H rat 4 TSS
11 NAME2 set1 L pen 1 TSS
11 NAME2 set1 M pen 1 TSS
11 NAME2 set1 H pen 1 TSS
11 NAME2 set2 L ball 2000 TSS
11 NAME2 set2 M ball 2000 TSS
11 NAME2 set2 H ball 2000 TSS
11 NAME2 set2 L egg 67 TSS
11 NAME2 set2 M egg 67 TSS
11 NAME2 set2 H egg 66 TSS
11 NAME2 set2 L tin 100 TSS
11 NAME2 set2 M tin 100 TSS
11 NAME2 set2 H tin 100 TSS
11 NAME2 set2 L rat 5 TSS
11 NAME2 set2 M rat 4 TSS
11 NAME2 set2 H rat 3 TSS
11 NAME2 set2 L pen 1 TSS
11 NAME2 set2 M pen 1 TSS
11 NAME2 set2 H pen 1 TSS
11 NAME2 set1 NOT APPLICABLE max 800 GREEN
11 NAME2 set2 NOT APPLICABLE max 300 GREEN
11 NAME2 set1 NOT APPLICABLE max 1300 TSS
11 NAME2 set2 NOT APPLICABLE max 500 TSS
我为获取透视视图而编写的代码:-
from openpyxl import load_workbook
from openpyxl.styles import Alignment
import pandas as pd,os,sys
os.chdir(r'')
def fetchDatabaseRowsAsDataFrame(country_id, remark, database_table_name):
database_table_df = pd.read_excel(database_table_name)
return database_table_df.query('ID == ' + str(country_id) + ' and REMARKS == "' + str(remark) + '"')
wb = load_workbook('input.xlsx')
try:
index_sheet = wb['Index']
input_df = pd.read_excel('input.xlsx', sheet_name= 'Index')
for ind in input_df.index:
wb.create_sheet(str(input_df['ID'][ind]))
current_sheet = wb[str(input_df['ID'][ind])]
current_sheet.cell(1,1).value = "Index sheet"
current_sheet.cell(1,1).hyperlink = "input.xlsx#Index!A1"
current_sheet.cell(1,1).style = "Hyperlink"
heading_row = 3
start_row = heading_row + 2
remarks_array = ['REMARKS1', 'REMARKS2', 'REMARKS3']
db_array = ['db1.xlsx', 'db1.xlsx', 'db1.xlsx']
empty = True
for i in range(len(remarks_array)):
db_rows = fetchDatabaseRowsAsDataFrame(input_df['ID'][ind], input_df[remarks_array[i]][ind], db_array[i])
if (db_rows is not None) and (len(db_rows) > 0):
empty = False
current_sheet.cell(heading_row-1, 3).value = remarks_array[i] + "-" + input_df[remarks_array[i]][ind]
for ind_db_rows in db_rows.index:
if pd.isnull(db_rows['LEVEL'][ind_db_rows]) or db_rows['LEVEL'][ind_db_rows] == 'NOT APPLICABLE':
db_rows.at[ind_db_rows, 'LEVEL'] = 'NA'
pivot_db_rows = pd.pivot_table(db_rows, values = 'VALUE', index=['SEGMENT','PARAMETER'], columns = 'LEVEL').reset_index()
pivot_columns = list(pivot_db_rows)[2:]
pivot_segments = pivot_db_rows['SEGMENT'].unique()
pivot_parameters = pivot_db_rows['PARAMETER'].unique()
pivot_parameter_column_index = {}
initial_pivot_segment_column_index = 4
paramater_column_map = {}
for pivot_parameter_index in range(len(pivot_parameters)):
current_sheet.merge_cells(start_row=heading_row,start_column=initial_pivot_segment_column_index,end_row=heading_row,end_column=initial_pivot_segment_column_index+len(pivot_columns)-1)
current_sheet.cell(heading_row,initial_pivot_segment_column_index).value = pivot_parameters[pivot_parameter_index]
current_sheet.cell(heading_row,initial_pivot_segment_column_index).alignment = Alignment(horizontal='center')
pivot_parameter_column_index[pivot_parameters[pivot_parameter_index]] = initial_pivot_segment_column_index
for pivot_column_index in range(len(pivot_columns)):
current_sheet.cell(heading_row+1,initial_pivot_segment_column_index+pivot_column_index).value = pivot_columns[pivot_column_index]
initial_pivot_segment_column_index = initial_pivot_segment_column_index+len(pivot_columns)
current_sheet.cell(heading_row+1,3).value = 'SEGMENT'
for pivot_segment_index in range(len(pivot_segments)):
current_sheet.cell(heading_row+2+pivot_segment_index,3).value=pivot_segments[pivot_segment_index]
pivot_dictionary = {}
for ind_pivot_db_rows in pivot_db_rows.index:
if pivot_db_rows['SEGMENT'][ind_pivot_db_rows] not in pivot_dictionary:
pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]] = {}
if pivot_db_rows['PARAMETER'][ind_pivot_db_rows] not in pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]]:
pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]][pivot_db_rows['PARAMETER'][ind_pivot_db_rows]] = {}
for pivot_column_index in range(len(pivot_columns)):
pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]][pivot_db_rows['PARAMETER'][ind_pivot_db_rows]][pivot_columns[pivot_column_index]] = pivot_db_rows[pivot_columns[pivot_column_index]][ind_pivot_db_rows]
for pivot_segment_index in range(len(pivot_segments)):
for pivot_parameter_index in range(len(pivot_parameters)):
for pivot_column_index in range(len(pivot_columns)):
if pivot_segments[pivot_segment_index] in pivot_dictionary and pivot_parameters[pivot_parameter_index] in pivot_dictionary[pivot_segments[pivot_segment_index]] and pivot_columns[pivot_column_index] in pivot_dictionary[pivot_segments[pivot_segment_index]][pivot_parameters[pivot_parameter_index]]:
current_sheet.cell(heading_row+2+pivot_segment_index, pivot_parameter_column_index[pivot_parameters[pivot_parameter_index]] + pivot_column_index).value = pivot_dictionary[pivot_segments[pivot_segment_index]][pivot_parameters[pivot_parameter_index]][pivot_columns[pivot_column_index]]
heading_row = heading_row + len(pivot_segments) + 4
if empty:
wb.remove(current_sheet)
wb.save('input.xlsx')
wb.close()
except Exception as e:
print("Exception occured "+str(e))
wb.save('input.xlsx')
wb.close()
输出:-
预期产量:-我无法删除空列。 我还希望具有NA级别的参数作为透视表视图表中的第一列出现,如预期输出中所示。
适用的ID。解决方案
尝试:
df = df.replace("NOT APPLICABLE", "")
x = df[df.REMARKS.eq("GREEN")].pivot("SEGMENT", ["PARAMETER", "LEVEL"], "VALUE")
x = x.reindex(
pd.MultiIndex.from_tuples(
sorted(x.columns, key=lambda k: (k[1] != "", k[0], k[1]))
),
axis=1,
)
print(x)
打印:
max ball egg pen rat tin
H L M H L M H L M H L M H L M
SEGMENT
set1 800 29000 32000 30000 62 68 67 1 3 2 4 6 5 50434 67667 60852
set2 300 2000 2000 2000 66 67 67 2 4 3 3 5 4 10271 11078 10633
相关文章