Closed RobinL closed 4 years ago
Intended usage:
#df is a spark DataFrame
pmeta_json = df.schema.json())
db = get_existing_database_from_glue_catalogue("test_data_types")
tab = tablemeta_from_parquet_meta(pmeta_json, name="parquet_test_table", location="database/test/test_parquet/")
db.add_table(tab)
db.update_glue_database()
or
from pyarrow.parquet import ParquetFile
pmeta_json = ParquetFile("test_nest.parquet").metadata.metadata[b"org.apache.spark.sql.parquet.row.metadata"]
db = get_existing_database_from_glue_catalogue("test_data_types")
tab = tablemeta_from_parquet_meta(pmeta_json, name="parquet_test_table", location="database/test/test_parquet/")
db.add_table(tab)
db.update_glue_database()
Closes #123