Closed sataliulan closed 3 years ago
Dear @sataliulan, I will move this issue to the related repo: nebula-spark-utils :) 您好,我把这个挪到 spark utils 那个 repo了哈。
ok,I will try it again
@sataliulan could you please share your neo4j_application.conf
? thanks
@sataliulan could you please share your
neo4j_application.conf
? thanks
Here is my conf file:
{
# Spark相关配置
spark: {
app: {
name: Nebula Exchange 2.0
}
driver: {
cores: 1
maxResultSize: 2G
}
executor: {
memory:2G
}
cores:{
max: 16
}
}
# Nebula Graph相关配置
nebula: {
address:{
graph:["192.168.10.17:9669"]
meta:["192.168.10.17:9559"]
}
user: admin
pswd: admin
space: Medical
connection {
timeout: 3000
retry: 3
}
execution {
retry: 3
}
error: {
max: 32
output: /tmp/errors
}
rate: {
limit: 1024
timeout: 1000
}
}
# 处理点
tags: [
# 设置标签Disease相关信息。
{
name: Disease
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (n:Disease) with id(n) as _id, n.prevent as prevent, n.cureWay as cure_way, n.name as name, n.cure_lasttime as cure_last_time, n.cure_prob as cure_prob, n.cause as cause, n.cureDepartment as cure_department,n.desc as desc, n.easy_get as easy_get return _id, prevent, cure_way, name, cure_last_time, cure_prob, cause, cure_department, desc, easy_get order by _id"
fields: [prevent,cureWay,name,cure_lasttime,cure_prob,cause,cureDepartment,desc,easy_get]
nebula.fields: [prevent,cure_way,name,cure_last_time,cure_prob,cause,cure_department,desc,easy_get]
vertex: {
field:_id
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
# 设置标签Symptom相关信息。
{
name: Symptom
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (n:Symptom) with id(n) as _id, n.name as name return _id,name order by _id"
fields: [name]
nebula.fields: [name]
vertex: {
field:_id
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
# 设置标签Department相关信息。
{
name: Department
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (n:Department) with id(n) as _id, n.name as name return _id,name order by _id"
fields: [name]
nebula.fields: [name]
vertex: {
field:_id
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
# 设置标签Check相关信息。
{
name: Check
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (n:Check) with id(n) as _id, n.name as name return _id,name order by _id"
fields: [name]
nebula.fields: [name]
vertex: {
field:_id
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
# 设置标签Drug相关信息。
{
name: Drug
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (n:Drug) with id(n) as _id, n.name as name return _id,name order by _id"
fields: [name]
nebula.fields: [name]
vertex: {
field:_id
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
# 设置标签Food相关信息。
{
name: Food
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (n:Food) with id(n) as _id, n.name as name return _id,name order by _id"
fields: [name]
nebula.fields: [name]
vertex: {
field:_id
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
# 设置标签Producer相关信息。
{
name: Producer
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (n:Producer) with id(n) as _id, n.name as name return _id,name order by _id"
fields: [name]
nebula.fields: [name]
vertex: {
field:_id
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
# 设置标签BasePrescription相关信息。
{
name: BasePrescription
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (n:BasePrescription) with id(n) as _id, n.name as name, n.note as note, n.physical_condition as physical_condition, n.age_min as age_min, n.style as style, n.time as time, n.type as type, n.frequency as frequency, n.age_max as age_max return _id,name,note,physical_condition,age_min,style,time,type,frequency,age_max order by _id"
fields: [name,note,physical_condition,age_min,style,time,type,frequency,age_max]
nebula.fields: [name,note,physical_condition,age_min,style,time,type,frequency,age_max]
vertex: {
field:_id
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
]
# 处理边数据
edges: [
# 设置边类型acompany with相关信息
{
name: acompany_with
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (a:Disease)-[r:acompany_with]->(b:Disease) return id(a) as src, id(b) as dst, r.name as name order by id(r)"
fields: [name]
nebula.fields: [name]
source: {
field: src
}
target: {
field: dst
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
# 设置边类型belongs to相关信息
{
name: belongs_to
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (a:Department)-[r:belongs_to]-(b:Department) return id(a) as src, id(b) as dst, r.name as name order by id(r)"
fields: [name]
nebula.fields: [name]
source: {
field: src
}
target: {
field: dst
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
# 设置边类型recommand eat相关信息
{
name: recommand_eat
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (a:Disease)-[r:recommand_eat]-(b:Food) return id(a) as src, id(b) as dst, r.name as name order by id(r)"
fields: [name]
nebula.fields: [name]
source: {
field: src
}
target: {
field: dst
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
# 设置边类型drugs of相关信息
{
name: drugs_of
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (a:Producer)-[r:drugs_of]-(b:Drug) return id(a) as src, id(b) as dst, r.name as name order by id(r)"
fields: [name]
nebula.fields: [name]
source: {
field: src
}
target: {
field: dst
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
# 设置边类型exercise_prescrition相关信息
{
name: exercise_prescrition
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (a:BasePrescription)-[r:exercise_prescrition]-(b:Disease) return id(a) as src, id(b) as dst, r.name as name order by id(r)"
fields: [name]
nebula.fields: [name]
source: {
field: src
}
target: {
field: dst
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
# 设置边类型common_drug相关信息
{
name: common_drug
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (a:Disease)-[r:common_drug]-(b:Drug) return id(a) as src, id(b) as dst, r.name as name order by id(r)"
fields: [name]
nebula.fields: [name]
source: {
field: src
}
target: {
field: dst
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
# 设置边类型do_eat相关信息
{
name: do_eat
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (a:Disease)-[r:do_eat]-(b:Food) return id(a) as src, id(b) as dst, r.name as name order by id(r)"
fields: [name]
nebula.fields: [name]
source: {
field: src
}
target: {
field: dst
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
# 设置边类型no_eat相关信息
{
name: no_eat
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (a:Disease)-[r:no_eat]-(b:Food) return id(a) as src, id(b) as dst, r.name as name order by id(r)"
fields: [name]
nebula.fields: [name]
source: {
field: src
}
target: {
field: dst
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
# 设置边类型recommand_drug相关信息
{
name: recommand_drug
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (a:Disease)-[r:recommand_drug]-(b:Drug) return id(a) as src, id(b) as dst, r.name as name order by id(r)"
fields: [name]
nebula.fields: [name]
source: {
field: src
}
target: {
field: dst
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
# 设置边类型has_symptom相关信息
{
name: has_symptom
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (a:Disease)-[r:has_symptom]-(b:Symptom) return id(a) as src, id(b) as dst, r.name as name order by id(r)"
fields: [name]
nebula.fields: [name]
source: {
field: src
}
target: {
field: dst
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
# 设置边类型need_check相关信息
{
name: need_check
type: {
source: neo4j
sink: client
}
server: "bolt://127.0.0.1:7687"
user: neo4j
password:111111
database:graph.db
exec: "match (a:Disease)-[r:need_check]-(b:Check) return id(a) as src, id(b) as dst, r.name as name order by id(r)"
fields: [name]
nebula.fields: [name]
source: {
field: src
}
target: {
field: dst
}
partition: 10
batch: 1000
check_point_path: /tmp/medical
}
]
}
By the way, I checked the neo4j.conf , here is the bolt connetor setting:
dbms.connector.bolt.enabled=true
dbms.connector.bolt.listen_address=:7687
@Nicole00 is helping via the forum on the same topic https://discuss.nebula-graph.com.cn/t/topic/4506 (thanks!!)
By the way, I checked the neo4j.conf , here is the bolt connetor setting:
Bolt connector
dbms.connector.bolt.enabled=true
dbms.connector.bolt.tls_level=OPTIONAL
dbms.connector.bolt.listen_address=:7687
Please check your available database name graph.db.
By the way, I checked the neo4j.conf , here is the bolt connetor setting:
Bolt connector
dbms.connector.bolt.enabled=true
dbms.connector.bolt.tls_level=OPTIONAL
dbms.connector.bolt.listen_address=:7687
Please check your available database name graph.db.
well ,thx for your reply. I've checked my neo4j connection status, it shows : Version : 3.5.4 Edition: Enterprise Name: graph.db
Moreover, should I change the db name graph.db to graph ?
this post has resolved the issue. https://discuss.nebula-graph.com.cn/t/topic/4506/7?u=nicole
版本:Nebula Exchange 2.0, Nebula Graph 2.0.1 spark2.4 os:Ubuntu 18.04 按照文档说明编写了neo4j_application.conf ,执行命令${SPARK_HOME}/bin/spark-submit --master "local" --class com.vesoft.nebula.exchange.Exchange /root/nebula-spark-utils/nebula-exchange/target/nebula-exchange-2.0.0.jar -c /root/nebula-spark-utils/nebula-exchange/target/classes/neo4j_application.conf 报错误: Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): org.neo4j.driver.exceptions.ClientException: Database name parameter for selecting database is not supported in Bolt Protocol Version 3. Database name: 'graph.db' 再次参考文档,文档上说:Exchange使用Neo4j Driver 4.0.1实现对Neo4j数据的读取。查找Neo4j官方未找到Neo4j Driver ,但找到neo4j-spark-connector 不知道是不是这个,请确认下,若不是,希望给出Neo4j Driver 4.0.1的地址,