import pandas as pd
import getpass
def compare_excel_files(file1_path, file2_path):
try:
# Read Excel files
df1 = pd.read_excel(file1_path)
df2 = pd.read_excel(file2_path)
# Initialize list to store differences
differences = []
# Iterate over unique values in column A of file 1
unique_values_file1 = df1['A'].unique()
for value in unique_values_file1:
# Check if value exists in column A of file 2
if value in df2['A'].values:
# Get corresponding rows in file 2
corresponding_rows_file2 = df2[df2['A'] == value]
# Check each value in column B of file 1 against corresponding values in column B of file 2
for _, row_file1 in df1[df1['A'] == value].iterrows():
for _, row_file2 in corresponding_rows_file2.iterrows():
diff_row = {'A': value}
# Compare values in other columns
for col in df1.columns[1:]: # Exclude column A
value_file1 = row_file1[col]
value_file2 = row_file2[col]
if value_file1 != value_file2:
diff_row[col] = f"{value_file1} (File 1) vs {value_file2} (File 2)"
differences.append(diff_row)
break # No need to check further columns for this pair
else:
# Add information for value missing in file 2
diff_row = {'A': value}
for col in df1.columns[1:]:
value_file1 = df1.loc[df1['A'] == value, col].iloc[0]
diff_row[col] = f"{value_file1} (File 1) | Missing in File 2"
differences.append(diff_row)
# Check if any values in column A of file 2 are missing in column A of file 1
unique_values_file2 = df2['A'].unique()
for value in unique_values_file2:
if value not in df1['A'].values:
# Add information for value missing in file 1
diff_row = {'A': value}
for col in df1.columns[1:]:
diff_row[col] = f"Missing in File 1 | {df2.loc[df2['A'] == value, col].iloc[0]} (File 2)"
differences.append(diff_row)
return len(df1), len(df2), pd.DataFrame(differences)
except Exception as e:
print(f"An error occurred: {str(e)}")
return None, None, None
def write_output_to_txt(output_path, num_lines_file1, num_lines_file2, differences, file1_name, file2_name, username):
try:
with open(output_path, 'w') as file:
file.write(f"Username: {username}\n")
file.write(f"Input File 1: {file1_name}\n")
file.write(f"Input File 2: {file2_name}\n")
file.write(f"Number of lines in File 1: {num_lines_file1}\n")
file.write(f"Number of lines in File 2: {num_lines_file2}\n\n")
if differences is not None and not differences.empty:
file.write("Differences between files:\n")
df_diff = pd.DataFrame(differences).drop_duplicates()
file.write(df_diff.to_string(index=False))
else:
file.write("No differences found between files.")
print("Analysis complete. Output saved to", output_path)
except Exception as e:
print(f"An error occurred while writing output: {str(e)}")
if __name__ == "__main__":
try:
# Input file paths
file1_path = input("Enter path to the first Excel file: ")
file2_path = input("Enter path to the second Excel file: ")
file1_name = file1_path.split('/')[-1] # Extracting file name with extension
file2_name = file2_path.split('/')[-1] # Extracting file name with extension
# Analyze files and get differences
num_lines_file1, num_lines_file2, differences = compare_excel_files(file1_path, file2_path)
# Output to txt file
output_path = input("Enter the path to save the output txt file: ")
username = getpass.getuser() # Get username of the user running the program
write_output_to_txt(output_path, num_lines_file1, num_lines_file2, differences, file1_name, file2_name, username)
except KeyboardInterrupt:
print("\nProgram interrupted by user.")
except Exception as e:
print(f"An unexpected error occurred: {str(e)}")