alibaba / GraphScope

🔨 🍇 💻 🚀 GraphScope: A One-Stop Large-Scale Graph Computing System from Alibaba | 一站式图计算系统
https://graphscope.io
Apache License 2.0
3.24k stars 439 forks source link

[BUG] graphscope-client: graph add_edges failed to add edge for label with same name but src_label and dst_label different #1995

Open dzhiwei opened 2 years ago

dzhiwei commented 2 years ago

Describe the bug A clear and concise description of what the bug is.

To Reproduce Steps to reproduce the behavior: Failed to add edge for label "knows" with different src_label and dst_label paris below code: (error: label "knows" already exists)

def test():
    sess: graphscope.Session = get_default_session()
    try:
        sess.load_from()
        graph = sess.g(directed=False, oid_type="int64_t")
        #  id|firstName|lastName|gender|birthday|creationDate|locationIP|browserUsed
        data1 = pandas.read_csv("/tmp/testingdata/0001/person_0_0.csv", sep="|")
        data2 = pandas.read_csv("/tmp/testingdata/0001/person_0_1.csv", sep="|")
        comment = pandas.read_csv("/tmp/testingdata/0001/comment_0_1.csv", sep="|")

        person = pandas.merge(data1, data2, on="id", how="outer")

        data4 = pandas.read_csv("/tmp/testingdata/0001/person_knows_person_0_1.csv", sep="|")
        data5 = pandas.read_csv("/tmp/testingdata/0001/person_likes_comment_0_0.csv", sep="|")
        graph = graph.add_vertices(person, label="person")
        graph = graph.add_vertices(comment, label="comment")
        graph = graph.add_edges(data4, label="knows", src_label="person", dst_label="person")

        graph = graph.add_edges(data5, label="likes", src_label="person", dst_label="comment")

        graph = graph.add_edges(data5, label="knows", src_label="person", dst_label="comment")

        # Analytical engine
        # project the projected graph to simple graph.
        simple_g = graph.project(vertices={"person": []}, edges={"knows": []})

        context = graphscope.pagerank(simple_g, delta=0.8)

        dataframe = context.to_dataframe(selector={'id': 'v.id', "result": 'r'})
        print(dataframe)

        print('this is the end of the program.. ')
    except Exception as e:
        traceback.print_exc(e)
    finally:
        sess.close()

while it's working fine with session.load_from as below:

def test2():
    sess: graphscope.Session = get_default_session()
    try:

        # graph = sess.g(directed=False, oid_type="int64_t")
        #  id|firstName|lastName|gender|birthday|creationDate|locationIP|browserUsed
        data1 = pandas.read_csv("/tmp/testingdata/0001/person_0_0.csv", sep="|")
        data2 = pandas.read_csv("/tmp/testingdata/0001/person_0_1.csv", sep="|")
        comment = pandas.read_csv("/tmp/testingdata/0001/comment_0_1.csv", sep="|")

        person = pandas.merge(data1, data2, on="id", how="outer")

        data4 = pandas.read_csv("/tmp/testingdata/0001/person_knows_person_0_1.csv", sep="|")
        data5 = pandas.read_csv("/tmp/testingdata/0001/person_likes_comment_0_0.csv", sep="|")

        vertices = {
            "person": (
                Loader(
                    person, header_row=True, delimiter="|"
                ),
                [
                    "firstName",
                    "lastName",
                    "gender",
                    "birthday",
                    "creationDate",
                    "locationIP",
                    "browserUsed",
                ],
                "id",
            ),
            "comment": (
                Loader(
                    comment, header_row=True, delimiter="|"
                ),
                None,
                "id",
            )
        }
        edges = {
            "knows": [
                (
                    Loader(
                        data4,
                        header_row=True,
                        delimiter="|",
                    ),
                    ["creationDate"],
                    ("Person.id", "person"),
                    ("Person.id2", "person"),
                ),
                (
                    Loader(
                        data4,
                        header_row=True,
                        delimiter="|",
                    ),
                    ["creationDate"],
                    ("Person.id", "person"),
                    ("Person.id2", "comment"),
                )
            ],
            "likes": [
                (
                    Loader(
                        data5,
                        header_row=True,
                        delimiter="|",
                    ),
                    None,
                    ("Person.id", "person"),
                    ("Comment.id", "comment"),
                )
            ]
        }
        graph = sess.load_from(vertices=vertices, edges=edges)
        # Analytical engine
        # project the projected graph to simple graph.
        simple_g = graph.project(vertices={"person": []}, edges={"knows": []})

        context = graphscope.pagerank(simple_g, delta=0.8)

        dataframe = context.to_dataframe(selector={'id': 'v.id', "result": 'r'})
        print(dataframe)

        print('this is the end of the program.. ')
    except Exception as e:
        traceback.print_exc(e)
    finally:
        sess.close()

Expected behavior A clear and concise description of what you expected to happen.

Screenshots If applicable, add screenshots to help explain your problem.

Environment (please complete the following information):

Additional context Add any other context about the problem here.

sighingnow commented 2 years ago

Have put it to my queue.

No ETA. load_from is recommended for such cases: https://graphscope.io/docs/reference/graph.html?highlight=load_from#graphscope.framework.graph_builder.load_from