Open chanelcolgate opened 3 years ago
import logging, urllib3, shutil
logging.basicConfig(level=logging.INFO)
def download_dataset(url, LOCAL_FILE_NAME):
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
c = urllib3.PoolManager()
with c.request("GET", url, preload_content=False) as res, open(
LOCAL_FILE_NAME, "wb"
) as out_file:
shutil.copyfileobj(res, out_file)
logging.info("Dowload completed.")
logging.info("Started download script")
URL_1 = 'https://raw.githubusercontent.com/chanelcolgate/hydroelectric-project/master/data/SMP2020-2021_48h.csv'
LOCAL_FILE_NAME_1 = 'SMP2020-2021_48h.csv'
download_dataset(URL_1, LOCAL_FILE_NAME_1)
URL_2 = 'https://raw.githubusercontent.com/chanelcolgate/hydroelectric-project/master/data/SMP_5678_2021.csv'
LOCAL_FILE_NAME_2 = 'SMP_5678_2021.csv'
download_dataset(URL_2, LOCAL_FILE_NAME_2)
Description
Actions
tf.Example
data structures.with tf.io.TFRecordWriter("test.tfrecord") as w: w.write(b"First record") w.write(b"Second record")
for record in tf.data.TFRecordDataset("test.tfrecord"): print(record)
base_dir = os.getcwd() data_dir = "data"
Define the data path
examples = external_input(os.path.join(base_dir, data_dir))
Instantiate the pipeline component.
example_gen = CsvExampleGen(input=examples)
Execute the component interactively
context.run(example_gen)
6:2:2
. The ratio settings are defined through thehash_buckets
:base_dir = os.getcwd() data_dir = 'data' output = example_gen_pb2.Output(
Define preferred splits
) examples = external_input(os.path.join(base_dir, data_dir))
Add output_config argument
example_gen = CsvExampleGen(input=examples, output_config=output)
context.run(example_gen)
tf.Example
:def _bytes_feature(value): return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _float_feature(value): return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
def _int64_feature(value): return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
Estimate
Tests