Open MrPowers opened 1 year ago
Useful code snippet to leverage:
import itertools
from datetime import datetime, timedelta
import pyarrow as pa
import pyarrow.compute as pc
from deltalake import DeltaTable, write_deltalake
def record_observations(date: datetime) -> pa.Table:
"""Pulls data for a certain datetime"""
nrows = 1000
return pa.table(
{
"date": pa.array([date.date()] * nrows),
"timestamp": pa.array([date] * nrows),
"value": pc.random(nrows),
}
)
# Example of output
record_observations(datetime(2021, 1, 1, 12)).to_pandas()
hours_iter = (datetime(2021, 1, 1) + timedelta(hours=i) for i in itertools.count())
# Write 100 hours worth of data
for timestamp in itertools.islice(hours_iter, 100):
write_deltalake(
"observation_data",
record_observations(timestamp),
partition_by=["date"],
mode="append",
)
Make it simple to generate timeseries data.