nourou4them / python-learning

Learn to code in Python
0 stars 0 forks source link

Compare 2 #20

Open nourou4them opened 2 months ago

nourou4them commented 2 months ago

import pandas as pd import openpyxl import getpass

def compare_excel_files(file1_path, file2_path): try:

Load both Excel files

    wb1 = openpyxl.load_workbook(file1_path)
    wb2 = openpyxl.load_workbook(file2_path)

    # Initialize list to store differences and sheet info
    differences = []
    sheets_info = []

    # Compare the number of sheets and their labels
    sheets1 = wb1.sheetnames
    sheets2 = wb2.sheetnames
    if sheets1 != sheets2:
        differences.append(f"Sheet names or order differ: {sheets1} vs {sheets2}")

    # Iterate over the sheets in file 1
    for sheet_name in sheets1:
        if sheet_name in sheets2:
            df1 = pd.read_excel(file1_path, sheet_name=sheet_name)
            df2 = pd.read_excel(file2_path, sheet_name=sheet_name)

            # Compare the shape of the sheets
            if df1.shape != df2.shape:
                differences.append(f"Different shape in sheet '{sheet_name}': {df1.shape} (File 1) vs {df2.shape} (File 2)")

                # Identify added or removed rows
                if df1.shape[0] != df2.shape[0]:
                    row_diff = abs(df1.shape[0] - df2.shape[0])
                    if df1.shape[0] > df2.shape[0]:
                        differences.append(f"{row_diff} rows removed in sheet '{sheet_name}' in File 2")
                        removed_rows = df1[~df1.isin(df2)].dropna(how='all')
                        differences.append(f"Removed rows in sheet '{sheet_name}':\n{removed_rows}")
                    else:
                        differences.append(f"{row_diff} rows added in sheet '{sheet_name}' in File 2")
                        added_rows = df2[~df2.isin(df1)].dropna(how='all')
                        differences.append(f"Added rows in sheet '{sheet_name}':\n{added_rows}")

                # Identify added or removed columns
                if df1.shape[1] != df2.shape[1]:
                    col_diff = abs(df1.shape[1] - df2.shape[1])
                    if df1.shape[1] > df2.shape[1]:
                        differences.append(f"{col_diff} columns removed in sheet '{sheet_name}' in File 2")
                        removed_cols = df1.columns.difference(df2.columns)
                        differences.append(f"Removed columns in sheet '{sheet_name}': {removed_cols.tolist()}")
                    else:
                        differences.append(f"{col_diff} columns added in sheet '{sheet_name}' in File 2")
                        added_cols = df2.columns.difference(df1.columns)
                        differences.append(f"Added columns in sheet '{sheet_name}': {added_cols.tolist()}")
            else:
                # Compare the data cell by cell
                for row in range(len(df1)):
                    for col in df1.columns:
                        value1 = df1.at[row, col]
                        value2 = df2.at[row, col]
                        if pd.isna(value1) and pd.isna(value2):
                            continue  # Skip NaN vs NaN comparisons
                        if value1 != value2:
                            differences.append(f"Difference in sheet '{sheet_name}', row {row+1}, column '{col}': {value1} (File 1) vs {value2} (File 2)")
        else:
            differences.append(f"Sheet '{sheet_name}' is missing in File 2")

    # Check for sheets that are in file 2 but not in file 1
    for sheet_name in sheets2:
        if sheet_name not in sheets1:
            differences.append(f"Sheet '{sheet_name}' is missing in File 1")

    return differences
except Exception as e:
    print(f"An error occurred: {str(e)}")
    return None

def write_output_to_txt(output_path, differences, file1_name, file2_name, username): try: with open(output_path, 'w') as file: file.write(f"Username: {username}\n") file.write(f"Input File 1: {file1_name}\n") file.write(f"Input File 2: {file2_name}\n\n")

        if differences:
            file.write("Differences between files:\n")
            for diff in differences:
                file.write(diff + '\n')
        else:
            file.write("No differences found between files.")
    print("Analysis complete. Output saved to", output_path)
except Exception as e:
    print(f"An error occurred while writing output: {str(e)}")

if name == "main": try:

Input file paths

    file1_path = input("Enter path to the first Excel file: ")
    file2_path = input("Enter path to the second Excel file: ")

    file1_name = file1_path.split('/')[-1]  # Extracting file name with extension
    file2_name = file2_path.split('/')[-1]  # Extracting file name with extension

    # Analyze files and get differences
    differences = compare_excel_files(file1_path, file2_path)

    # Output to txt file
    output_path = input("Enter the path to save the output txt file: ")
    username = getpass.getuser()  # Get username of the user running the program
    write_output_to_txt(output_path, differences, file1_name, file2_name, username)
except KeyboardInterrupt:
    print("\nProgram interrupted by user.")
except Exception as e:
    print(f"An unexpected error occurred: {str(e)}")