Need to provide ability so that file metadata can be added to dataframe
e.g
import dlt
@dlt.table
def bronze():
return (spark.readStream.format("cloudFiles")
# define the schema for the ~6 common columns across files. All other input fields will be "rescued" into a JSON string column that can be queried via dot notation.
.schema("Common1 string, Common2 string, _file_path string") # _file_path is a hidden auto-field but shows up in rescueData column JSON with this method. Spoofing that I have the same column in my input file so i can drop this spoofed column later
.option("cloudFiles.format", "csv")
.option("cloudFiles.schemaEvolutionMode", "rescue")
.option("cloudFiles.rescuedDataColumn","extraFields") # override default _rescuedData column name with whatever you want to call this column
.option("header","true")
.load("/Volumes/vol/data/*.txt")
.select("*","_metadata") # add file metadata information to output
.drop("_file_path") # discard dummy input column to keep _file_path out of extraFields rescue data column
)
Need to provide ability so that file metadata can be added to dataframe e.g