How do I concatenate multiple xlsx files with the same sheet_names. For example,
I have 3 xlsx files, Rob_schedule.xlsx, Mike_schdule.xlsx and Jerome_schedule.xlsx.
Each file has the following sheet/tab names : home, office & school.
The code below generates the 3 xlsx files ( you can copy + paste and run to generate the excel files)
##############################Generating the data for Rob_schedule.xlsx########################
import pandas as pd
import numpy as np
df= {
'Date':[10232020,10242020,10252020,10262020],
'Class':['AP_Bio','AP_Chem','Physics','History'],
'Period':[3,1,2,4]}
school = pd.DataFrame(df,columns = ['Date','Class','Period'])
school
df2= {
'Date':[10232020,10242020,10252020,10262020],
'Meeting':['MQ1','MQ6','MQ2','MQ8'],
'Lunch':[1,1,1,3],
'code':['java','python','C','C++']}
office = pd.DataFrame(df2,columns = ['Date','Meeting','Lunch','code'])
office
df3= {
'cooking':['C','B','D','B'],
'Laundry':['color','white','White','color'],
'cleaning':['balcony','garage','restroom','bathroom']}
home = pd.DataFrame(df3,columns = ['cooking','Laundry','cleaning'])
home
import pandas as pd
#initialze the excel writer
writer = pd.ExcelWriter('Rob_schedule.xlsx', engine='xlsxwriter')
#store your dataframes in a dict, where the key is the sheet name you want
frames = {'home':home, 'office':office,
'school':school}
#now loop thru and put each on a specific sheet
for sheet, frame in frames.items():
frame.to_excel(writer, sheet_name = sheet,index = False)
#critical last step
writer.save()
################################ generating Mike_schedule.xlsx###################################
import pandas as pd
import numpy as np
df= {
'Date':[10232020,10242020,10252020,10262020],
'Class':['AP_Bio','AP_Chem','Physics','History'],
'Period':[3,1,2,4]}
school = pd.DataFrame(df,columns = ['Date','Class','Period'])
school
df2= {
'Date':[10232020,10242020,10252020,10262020],
'Meeting':['MQ1','MQ2','MQ4','MQ5'],
'Lunch':[1,1,1,3],
'code':['javascript','R','C','C++']}
office = pd.DataFrame(df2,columns = ['Date','Meeting','Lunch','code'])
office
df3= {
'cooking':['A','B','D','B'],
'Laundry':['color','white','white','color'],
'cleaning':['patio','garage','living_room','bathroom']}
home = pd.DataFrame(df3,columns = ['cooking','Laundry','cleaning'])
home
#initialze the excel writer
writer = pd.ExcelWriter('Mike_schedule.xlsx', engine='xlsxwriter')
#store your dataframes in a dict, where the key is the sheet name you want
frames = {'home':home, 'office':office,
'school':school}
#now loop thru and put each on a specific sheet
for sheet, frame in frames.items(): # .use .items for python 3.X
frame.to_excel(writer, sheet_name = sheet,index = False)
#critical last step
writer.save()
######################### Generate Jerome schedule###########################################
df= {
'Date':[10232020,10242020,10252020,10262020],
'Class':['French','Math','Physics','History'],
'Period':[3,1,2,4]}
school = pd.DataFrame(df,columns = ['Date','Class','Period'])
school
df2= {
'Date':[10232020,10242020,10252020,10262020],
'Meeting':['MQ1','MQ2','MQ4','MQ5'],
'Lunch':[1,1,1,3],
'code':['javascript','python','R','C++']}
office = pd.DataFrame(df2,columns = ['Date','Meeting','Lunch','code'])
office
df3= {
'cooking':['X','B','D','C'],
'Laundry':['color','white','white','color'],
'cleaning':['patio','garage','living_room','bathroom']}
home = pd.DataFrame(df3,columns = ['cooking','Laundry','cleaning'])
home
import pandas as pd
#initialze the excel writer
writer = pd.ExcelWriter('Jerome_schedule.xlsx', engine='xlsxwriter')
#store your dataframes in a dict, where the key is the sheet name you want
frames = {'home':home, 'office':office,
'school':school}
#now loop thru and put each on a specific sheet
for sheet, frame in frames.items(): # .use .items for python 3.X
frame.to_excel(writer, sheet_name = sheet,index = False)
#critical last step
writer.save()
I want to
concatenatethe corresponding sheets/tabs :home,office, andschoolforRob_schedule.xlsx,Mike_schedule.xlsx&Jerome_schedule.xlsxexport the concatenated dataframes as
family_schedule.xlsxwithhome,officeandschooltabs
My attempt:
# This code concatenates all the tabs into one tab, but what I want is to concatenate all by their corresponding sheet/tab names
import pandas as pd
path = os.chdir(r'mypath\\')
files = os.listdir(path)
files
# pull files with `.xlsx` extension
excel_files = [file for file in files if '.xlsx' in file]
excel_files
def create_df_from_excel(file_name):
file = pd.ExcelFile(file_name)
names = file.sheet_names
return pd.concat([file.parse(name) for name in names])
df = pd.concat(
[create_df_from_excel(xl) for xl in excel_files]
)
# save the data frame
writer = pd.ExcelWriter('family_reschedule.xlsx')
df.to_excel(writer, '')
writer.save()