I have a kubeflow notebook server that houses a large files (100 GB+) and was trying to read it into my pipeline but I am getting a file not found error. Not sure what's the best step since most tutorials I see download their data which isn't feasible in my case.
`import kfp
import kfp.dsl as dsl
import kfp.components as comp
from kubernetes import config, client
from kubernetes.client import CoreV1Api, V1PodList,V1Volume
def data_loader():
import pandas as pd
import numpy as np
import sys
data = pd.read_csv('/home/jovyan/workspace/sandbox/data.csv', sep=',',header=None)
I have a kubeflow notebook server that houses a large files (100 GB+) and was trying to read it into my pipeline but I am getting a file not found error. Not sure what's the best step since most tutorials I see download their data which isn't feasible in my case.
`import kfp import kfp.dsl as dsl import kfp.components as comp from kubernetes import config, client from kubernetes.client import CoreV1Api, V1PodList,V1Volume def data_loader(): import pandas as pd import numpy as np import sys data = pd.read_csv('/home/jovyan/workspace/sandbox/data.csv', sep=',',header=None)
data_loading_op = comp.func_to_container_op(data_loader, base_image='tensorflow/tensorflow:1.11.0-py3')
@dsl.pipeline( name='DataLoading Pipeline', description='Test.' ) def phenology_pipeline(): data_loading_task = data_loading_op()`