nourou4them / python-learning

Learn to code in Python
0 stars 0 forks source link

central #21

Open nourou4them opened 2 weeks ago

nourou4them commented 2 weeks ago
pip install pandas openpyxl
import pandas as pd

# Load the Excel file
file_path = 'input_file.xlsx'
sheet_name = 'safir'
df = pd.read_excel(file_path, sheet_name=sheet_name, header=2)  # assuming headers start at row 3
# Get unique values from column E
unique_values = df['E'].unique()
# Create a new Excel file
output_file = 'output_file.xlsx'
writer = pd.ExcelWriter(output_file, engine='openpyxl')

# Iterate over unique values in column E
for value in unique_values:
    # Filter rows where column E equals current value
    filtered_df = df[df['E'] == value]

    # Write filtered data to a new sheet
    filtered_df.to_excel(writer, sheet_name=value, index=False)

# Save the Excel file
writer.save()
nourou4them commented 2 weeks ago
import pandas as pd
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles import Font
from openpyxl.writer.excel import save_virtual_workbook

def filter_and_create_excel(file_path, sheet_name, output_file):
    # Load the DataFrame, specifying the header row correctly
    print("Loading Excel file...")
    df = pd.read_excel(file_path, sheet_name=sheet_name, header=2)  # headers start at row 3 (index 2)

    # Print column names for debugging
    print("Columns in the DataFrame:", df.columns)

    # Exclude specified columns
    excluded_columns = ['B', 'C', 'D'] + list(df.loc[:, 'R':'AL'].columns) + list(df.loc[:, 'AS':'AV'].columns)
    df_filtered = df.drop(columns=excluded_columns)

    # Check if the "Filter" column exists
    if 'Filter' not in df_filtered.columns:
        raise KeyError(f"Column 'Filter' not found in the DataFrame. Available columns: {df_filtered.columns}")

    # Get unique values from the "Filter" column
    unique_values = df_filtered['Filter'].unique()

    # Create a new Excel file
    with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
        # Create the "Index" sheet
        index_df = pd.DataFrame(columns=["Sheet Name"])
        index_df.to_excel(writer, sheet_name="Index", index=False)

        # Iterate over unique values in the "Filter" column
        for value in unique_values:
            # Filter rows where the "Filter" column equals the current value
            filtered_df = df_filtered[df_filtered['Filter'] == value]

            # Convert value to string to use as sheet name
            sheet_name = str(value)

            # Write filtered data to a new sheet
            filtered_df.to_excel(writer, sheet_name=sheet_name, index=False)

        writer.save()

    # Reopen the workbook to add hyperlinks
    wb = load_workbook(output_file)
    index_ws = wb['Index']

    # Add hyperlinks to the "Index" sheet
    for i, value in enumerate(unique_values, start=2):  # Start at row 2 to leave space for headers
        sheet_name = str(value)
        cell = index_ws.cell(row=i, column=1)
        cell.value = sheet_name
        cell.hyperlink = f"#{sheet_name}!A1"
        cell.style = "Hyperlink"

    # Add a bold font to the header
    index_ws.cell(row=1, column=1).font = Font(bold=True)

    # Save the workbook
    wb.save(output_file)
    print(f"Filtered data has been written to {output_file}")

if __name__ == "__main__":
    # Input file paths
    file_path = input("Enter path to the first Excel file: ")
    sheet_name = 'safir'
    output_file = 'output_file.xlsx'

    # Run the function
    filter_and_create_excel(file_path, sheet_name, output_file)