Open atberium opened 1 day ago
Could you give a sample of first several lines of files on s3, so that I could give it a try by myself. Thank you
could you try to use client_kwargs
instead of endpoint_url
? just like this example:
d34 = Loader("s3://datafiles/group.e", key='access-id', secret='secret-access-key', client_kwargs={'region_name': 'us-east-1'})
ref: https://graphscope.io/docs/loading_graphs#loader-variants
Hi! Yes, please:
id|firstName|lastName|gender
933|Mahinda|Perera|male
6597069767117|Eli|Peretz|female
10995116278700|Joseph|Anderson|female
21990232556027|Yacine|Abdelli|male
26388279066636|Jose|Alonso|female
28587302322727|Steve|Moore|male
...
src|dst|creationDate
933|4398046511628|2010-07-30T15:19:53.298+0000
933|8796093023017|2010-10-27T02:33:06.288+0000
933|28587302322537|2012-04-12T13:56:58.931+0000
6597069767117|15393162789274|2011-03-25T14:49:23.134+0000
6597069767117|17592186044425|2011-06-16T11:28:39.623+0000
6597069767117|17592186044897|2011-06-30T06:45:20.777+0000
6597069767117|17592186045408|2011-06-10T17:47:19.432+0000
6597069767117|19791209300796|2011-07-27T15:43:14.869+0000
21990232556027|24189255811876|2012-01-25T11:51:27.348+0000
21990232556027|28587302322870|2012-04-25T21:31:05.259+0000
26388279066636|26388279067534|2012-02-22T05:30:13.683+0000
28587302322727|32985348833314|2012-08-12T19:27:16.894+0000
...
I got them from https://github.com/apache/incubator-graphar-testing/tree/955596c325ceba7b607e285738e3dd0ce4ff424e/ldbc_sample
could you try to use
client_kwargs
instead ofendpoint_url
? just like this example:d34 = Loader("s3://datafiles/group.e", key='access-id', secret='secret-access-key', client_kwargs={'region_name': 'us-east-1'})
ref: https://graphscope.io/docs/loading_graphs#loader-variants
We have custom host, so region is not suitable for us.
using parameters this way: client_kwargs={'endpoint_url': '{{ s3_endpoint_url }}'}
show the same error
Thanks for the input, I can reproduce it now.
Describe the bug Have k8s cluster. When try to load graph from data storing in S3, get an error
To Reproduce Steps to reproduce the behavior:
session = graphscope.session() # depends on your setup, you could have some parameters set
session.close()
graph = session.g() graph = graph.add_vertices(Loader('s3://bucket/vertices.csv', key='{{ s3_access_key }}', secret='{{ s3_secret_key }}', endpoint_url='{{ s3_endpoint_url }}', delimiter='|'), label='vertex') graph = graph.add_edges(Loader('s3://bucket/edges.csv', key='{{ s3_access_key }}', secret='{{ s3_secret_key }}', endpoint_url='{{ s3_endpoint_url }}', delimiter='|'), src_label='vertex', dst_label='vertex', label='knows')
E1106 17:01:31.000000 481 /tmp/gs-local-deps/v6d-0.24.2/modules/graph/loader/arrow_fragment_loader.cc:432] Failed to read from stream o04c02d48a740008a: Object not exists: failed to get metadata for 'o04c02d48a740008a': failed to read get_data reply: {"content":null,"type":"get_data_reply"} E1106 17:01:31.000000 435 /tmp/gs-local-deps/v6d-0.24.2/modules/graph/loader/arrow_fragment_loader.cc:432] Failed to read from stream o04c02d48a740008a: Object not exists: failed to get metadata for 'o04c02d48a740008a': failed to read get_data reply: {"content":null,"type":"get_data_reply"} E1106 17:01:31.000000 114 /home/graphscope/GraphScope/analytical_engine/core/server/dispatcher.cc:153] Worker 0: VineyardError occurred on worker 0: VineyardError occurred on worker 0: /tmp/gs-local-deps/v6d-0.24.2/modules/graph/loader/fragment_loader_utils.cc:218: SyncSchema -> Assertion failed: field_num > 0: Empty table list cannot be used for normalizing schema vineyard::SyncSchema(std::shared_ptr const&, grape::CommSpec const&) + 0x7BC
vineyard::sync_gs_error<gs::ArrowFragmentLoader<long, unsigned long, vineyard::ArrowVertexMap>::loadVertexTables(std::vector<std::shared_ptr, std::allocator<std::shared_ptr > > const&, int, int)::{lambda(std::shared_ptr const&)#2}&, std::shared_ptr const&>(grape::CommSpec const&, gs::ArrowFragmentLoader<long, unsigned long, vineyard::ArrowVertexMap>::loadVertexTables(std::vector<std::shared_ptr, std::allocator<std::shared_ptr > > const&, int, int)::{lambda(std::shared_ptr const&)#2}&, std::shared_ptr const&)::{lambda()#2}::operator()() const + 0x49
gs::ArrowFragmentLoader<long, unsigned long, vineyard::ArrowVertexMap>::loadVertexTables(std::vector<std::shared_ptr, std::allocator<std::shared_ptr > > const&, int, int) + 0x1845
vineyard::sync_gs_error<gs::ArrowFragmentLoader<long, unsigned long, vineyard::ArrowVertexMap>::LoadVertexTables()::{lambda()#2}&>(grape::CommSpec const&, gs::ArrowFragmentLoader<long, unsigned long, vineyard::ArrowVertexMap>::LoadVertexTables()::{lambda()#2}&)::{lambda()#2}::operator()() const + 0x52
gs::ArrowFragmentLoader<long, unsigned long, vineyard::ArrowVertexMap>::LoadVertexTables() + 0x35D
gs::ArrowFragmentLoader<long, unsigned long, vineyard::ArrowVertexMap>::LoadVertexEdgeTables() + 0x2D1
gs::ArrowFragmentLoader<long, unsigned long, vineyard::ArrowVertexMap>::AddLabelsToFragment(unsigned long) + 0x47
gs::ArrowFragmentLoader<long, unsigned long, vineyard::ArrowVertexMap>::AddLabelsToFragmentAsFragmentGroup(unsigned long) + 0x3B
AddLabelsToGraph + 0x485
gs::GrapeInstance::addLabelsToGraph(gs::rpc::GSParams const&) + 0x83B
gs::GrapeInstance::OnReceive(std::shared_ptr) + 0x1357
gs::Dispatcher::processCmd(std::shared_ptr) + 0xEA
gs::Dispatcher::publisherLoop() + 0x246
std::error_code::default_error_condition() const + 0x33
pthread_condattr_setpshared + 0x513
2024-11-06 09:01:31,956 [ERROR][rpc:189]: Runstep failed with code: ANALYTICAL_ENGINE_INTERNAL_ERROR, message: Error occurred during RunStep, The traceback is: Traceback (most recent call last):
File "/home/graphscope/.local/lib/python3.10/site-packages/gscoordinator/op_executor.py", line 106, in run_step
for response in responses:
File "/home/graphscope/.local/lib/python3.10/site-packages/grpc/_channel.py", line 543, in next
return self._next()
File "/home/graphscope/.local/lib/python3.10/site-packages/grpc/_channel.py", line 969, in _next
raise self
grpc._channel._MultiThreadedRendezvous: <_MultiThreadedRendezvous of RPC that terminated with:
status = StatusCode.INTERNAL
details = "VineyardError occurred on worker 0: VineyardError occurred on worker 0: /tmp/gs-local-deps/v6d-0.24.2/modules/graph/loader/fragment_loader_utils.cc:218: SyncSchema -> Assertion failed: field_num > 0: Empty table list cannot be used for normalizing schema
vineyard::SyncSchema(std::shared_ptr const&, grape::CommSpec const&) + 0x7BC
vineyard::sync_gs_error<gs::ArrowFragmentLoader<long, unsigned long, vineyard::ArrowVertexMap>::loadVertexTables(std::vector<std::shared_ptr, std::allocator<std::shared_ptr > > const&, int, int)::{lambda(std::shared_ptr const&)#2}&, std::shared_ptr const&>(grape::CommSpec const&, gs::ArrowFragmentLoader<long, unsigned long, vineyard::ArrowVertexMap>::loadVertexTables(std::vector<std::shared_ptr, std::allocator<std::shared_ptr > > const&, int, int)::{lambda(std::shared_ptr const&)#2}&, std::shared_ptr const&)::{lambda()#2}::operator()() const + 0x49
gs::ArrowFragmentLoader<long, unsigned long, vineyard::ArrowVertexMap>::loadVertexTables(std::vector<std::shared_ptr, std::allocator<std::shared_ptr > > const&, int, int) + 0x1845
...
session = graphscope.session( k8s_volumes={ "data": { "type": "hostPath", "field": { "path": os.path.expanduser("~/examples/"), "type": "Directory" }, "mounts": { "mountPath": "/examples/" } } } )
graph = session.g() graph = graph.add_vertices(Loader('/examples/vertices.csv', delimiter='|'), label='vertex') graph = graph.add_edges(Loader('/examples/edges.csv', delimiter='|'), src_label='vertex', dst_label='vertex', label='knows')