This is to prevent duplicates. Upsert is equivalent to INSERT ... ON CONFLICT DO UPDATE. Pandas does not directly support this with data frames but it can be implemented roughly as follows:
import pandas as pd
from sqlalchemy import create_engine, text
# Create a database engine
engine = create_engine("postgresql+psycopg2://username:password@localhost:5432/mydatabase")
# Sample data
data = {'user_id': [1, 2, 3], 'username': ['alice', 'bob', 'charlie'], 'email': ['alice@example.com', 'bob@example.com', 'charlie@example.com']}
df = pd.DataFrame(data)
# Define the table name
table_name = 'users'
# Method to upsert
def upsert_dataframe(df, table_name, engine):
# Convert the DataFrame to a list of dictionaries for SQLAlchemy to use in the raw SQL statement
records = df.to_dict(orient='records')
# Define the insert statement with an ON CONFLICT clause for upsert functionality
insert_stmt = f"""
INSERT INTO {table_name} (user_id, username, email)
VALUES (:user_id, :username, :email)
ON CONFLICT (user_id) DO UPDATE SET
username = EXCLUDED.username,
email = EXCLUDED.email;
"""
# Execute the upsert
with engine.connect() as conn:
conn.execute(text(insert_stmt), records)
# Perform the upsert
upsert_dataframe(df, table_name, engine)
This is to prevent duplicates. Upsert is equivalent to
INSERT ... ON CONFLICT DO UPDATE
. Pandas does not directly support this with data frames but it can be implemented roughly as follows: