wb1 = openpyxl.load_workbook(file1_path)
wb2 = openpyxl.load_workbook(file2_path)
# Initialize list to store differences and sheet info
differences = []
sheets_info = []
# Compare the number of sheets and their labels
sheets1 = wb1.sheetnames
sheets2 = wb2.sheetnames
if sheets1 != sheets2:
differences.append(f"Sheet names or order differ: {sheets1} vs {sheets2}")
# Iterate over the sheets in file 1
for sheet_name in sheets1:
if sheet_name in sheets2:
df1 = pd.read_excel(file1_path, sheet_name=sheet_name)
df2 = pd.read_excel(file2_path, sheet_name=sheet_name)
# Compare the shape of the sheets
if df1.shape != df2.shape:
differences.append(f"Different shape in sheet '{sheet_name}': {df1.shape} (File 1) vs {df2.shape} (File 2)")
# Identify added or removed rows
if df1.shape[0] != df2.shape[0]:
row_diff = abs(df1.shape[0] - df2.shape[0])
if df1.shape[0] > df2.shape[0]:
differences.append(f"{row_diff} rows removed in sheet '{sheet_name}' in File 2")
removed_rows = df1[~df1.isin(df2)].dropna(how='all')
differences.append(f"Removed rows in sheet '{sheet_name}':\n{removed_rows}")
else:
differences.append(f"{row_diff} rows added in sheet '{sheet_name}' in File 2")
added_rows = df2[~df2.isin(df1)].dropna(how='all')
differences.append(f"Added rows in sheet '{sheet_name}':\n{added_rows}")
# Identify added or removed columns
if df1.shape[1] != df2.shape[1]:
col_diff = abs(df1.shape[1] - df2.shape[1])
if df1.shape[1] > df2.shape[1]:
differences.append(f"{col_diff} columns removed in sheet '{sheet_name}' in File 2")
removed_cols = df1.columns.difference(df2.columns)
differences.append(f"Removed columns in sheet '{sheet_name}': {removed_cols.tolist()}")
else:
differences.append(f"{col_diff} columns added in sheet '{sheet_name}' in File 2")
added_cols = df2.columns.difference(df1.columns)
differences.append(f"Added columns in sheet '{sheet_name}': {added_cols.tolist()}")
else:
# Compare the data cell by cell
for row in range(len(df1)):
for col in df1.columns:
value1 = df1.at[row, col]
value2 = df2.at[row, col]
if pd.isna(value1) and pd.isna(value2):
continue # Skip NaN vs NaN comparisons
if value1 != value2:
differences.append(f"Difference in sheet '{sheet_name}', row {row+1}, column '{col}': {value1} (File 1) vs {value2} (File 2)")
else:
differences.append(f"Sheet '{sheet_name}' is missing in File 2")
# Check for sheets that are in file 2 but not in file 1
for sheet_name in sheets2:
if sheet_name not in sheets1:
differences.append(f"Sheet '{sheet_name}' is missing in File 1")
return differences
except Exception as e:
print(f"An error occurred: {str(e)}")
return None
if differences:
file.write("Differences between files:\n")
for diff in differences:
file.write(diff + '\n')
else:
file.write("No differences found between files.")
print("Analysis complete. Output saved to", output_path)
except Exception as e:
print(f"An error occurred while writing output: {str(e)}")
if name == "main":
try:
Input file paths
file1_path = input("Enter path to the first Excel file: ")
file2_path = input("Enter path to the second Excel file: ")
file1_name = file1_path.split('/')[-1] # Extracting file name with extension
file2_name = file2_path.split('/')[-1] # Extracting file name with extension
# Analyze files and get differences
differences = compare_excel_files(file1_path, file2_path)
# Output to txt file
output_path = input("Enter the path to save the output txt file: ")
username = getpass.getuser() # Get username of the user running the program
write_output_to_txt(output_path, differences, file1_name, file2_name, username)
except KeyboardInterrupt:
print("\nProgram interrupted by user.")
except Exception as e:
print(f"An unexpected error occurred: {str(e)}")
import pandas as pd import openpyxl import getpass
def compare_excel_files(file1_path, file2_path): try:
Load both Excel files
def write_output_to_txt(output_path, differences, file1_name, file2_name, username): try: with open(output_path, 'w') as file: file.write(f"Username: {username}\n") file.write(f"Input File 1: {file1_name}\n") file.write(f"Input File 2: {file2_name}\n\n")
if name == "main": try:
Input file paths