Open vperluzzo opened 3 years ago
@jkrobicki and @AdamSulek any updates?
Created table in postgres: And trying to put excel into this db:
Trying on this code to work:
import pandas as pd from sqlalchemy import create_engine import psycopg2
engine=create_engine("postresql+psycopg2://sqlpad:sqlpad/localhost:3001/Postgres demo")
def sheets(data, file): if(data=="Sheet1"): df=pd.read_excel(file, "Sheet1") df.to_sql(name="timelogs_excel", con=engine,if_exists="append", index=False)
with pd.ExcelFile("extract_timelogs_may_july.xlsx") as xls: for sheet_name in xls.sheet_names: sheets(sheet_name,xls)
Changed code to: import pandas as pd from sqlalchemy import create_engine
engine = create_engine("postgresql://sqlpad:sqlpad@localhost:3001/Postgres_sqlpad", pool_pre_ping=True)
def sheets(file): df=pd.read_excel(file) print(df) df.to_sql(name="timelogs_excel", con=engine, if_exists="append", index=False)
Working on a problem with connection to database:
Changing database to SQLite and trying to put two column table first
Managed to insert specific columns into SQLite database
Working code for importing "extract_timelogs_may_july.xlsx" into SQLite database via csv: import sqlite3 import pandas as pd from sqlalchemy import create_engine from sqlalchemy import Table, Column, Integer, String, MetaData
meta = MetaData() file = "extract_timelogs_may_july.xlsx" output = "output.xlsx"
df = pd.read_excel(file) dfc = df.to_csv("dfc.csv") readcsv = pd.read_csv("dfc.csv")
dfr = readcsv.drop( labels=None, axis=0, index=None, columns="Unnamed: 0", level=None, inplace=False,errors='raise')
engine = create_engine('sqlite:///timelogs.db', echo = True) meta = MetaData()
timelogs = Table('timelogs', meta,
Column('contractor', String(60)),
Column('date', String(60)),
Column('start', String(60)),
Column('end', String(60)),
Column('work_element', String(60)),
Column('calendar_month', String(60)),
Column('work_area', String(60)),
Column('client', String(60)),
Column('count_hours', String(60)),
Column('count_days', String(60)),
Column('client_rate', String(60)),
Column('client_charges', String(60)),
Column('invoice_month', String(60))
)
meta.create_all(engine)
x = dfr.to_sql(name="timelogs", con=engine, if_exists="append", index=False)
results = engine.execute("select * from timelogs")
final = pd.DataFrame(data=results, columns=dfr.columns) final
Final code for Prefect Flow:
import sqlite3 import pandas as pd from sqlalchemy import create_engine from sqlalchemy import Table, Column, Integer, String, MetaData import prefect from prefect import task, Flow engine = create_engine('sqlite:///timelogs.db', echo = True)
@task def read_excel(file): df = pd.read_excel(file) return df @task def to_csv(df): dfc = df.to_csv("dfc.csv") return dfc @task def read_csv(): readcsv = pd.read_csv("dfc.csv") dfr = readcsv.drop( labels=None, axis=0, index=None, columns="Unnamed: 0", level=None, inplace=False,errors='raise') return dfr
@task def create_table(): meta = MetaData()
timelogs = Table('timelogs', meta,
Column('contractor', String(60)),
Column('date', String(60)),
Column('start', String(60)),
Column('end', String(60)),
Column('work_element', String(60)),
Column('calendar_month', String(60)),
Column('work_area', String(60)),
Column('client', String(60)),
Column('count_hours', String(60)),
Column('count_days', String(60)),
Column('client_rate', String(60)),
Column('client_charges', String(60)),
Column('invoice_month', String(60))
)
table = meta.create_all(engine)
@task def to_sql(dfr): tosql = dfr.to_sql(name="timelogs", con=engine, if_exists="append", index=False) return tosql
@task def read_table(): table = pd.read_sql_table("timelogs", con=engine) return table
with Flow("excel_to_database") as flow: read_excel = read_excel("extract_timelogs_may_july.xlsx") to_csv = to_csv(read_excel) read_csv = read_csv() create_table() to_sql = to_sql(read_csv) read_table() flow.visualize() flow.run()
print(tabulate(df, headers = 'keys', tablefmt = 'psql'))