Kanaries / Rath

Next generation of automated data exploratory analysis and visualization platform.
https://kanaries.net
GNU Affero General Public License v3.0
4.29k stars 335 forks source link

should be treated the same as bgKnowledge #182

Closed github-actions[bot] closed 2 years ago

github-actions[bot] commented 2 years ago

https://github.com/Kanaries/Rath/blob/d2cabfef63f845df85e23b3d306f6ac455cef76e/services/causal-service/algorithms/causallearn/XLearner.py#L142


    anc = []
    S = []
    G_fd = set()
    # for (u, v) in background_knowledge.required_rules_specs:
    #     src, dest = NodeId.get(u.get_name(), None), NodeId.get(v.get_name(), None)
    #     if src is None:
    #         src = NodeId[u.get_name()] = cur_id
    #         FDNodes.append(u)
    #         G_fd.add(u.get_attribute('id'))
    #         adj.append(set())
    #         anc.append(set())
    #         attr_id.append(u.get_attribute('id'))
    #         cur_id += 1
    #     if dest is None:
    #         dest = NodeId[v.get_name()] = cur_id
    #         FDNodes.append(v)
    #         G_fd.add(v.get_attribute('id'))
    #         adj.append(set())
    #         anc.append(set())
    #         attr_id.append(v.get_attribute('id'))
    #         cur_id += 1
    #     adj[src].add(dest)
    #     anc[dest].add(src)
    """
    NodeId: Dict[int, int] 原始图中对应点的局域编号
    FDNode: List[int]: 在Gfd中的causallearn格式的graphnodes,全局编号
    attr_id: Gfd中每个点在原始图中对应的点编号
    adj, anc: Gfd的邻接表
    G_fd: Gfd中的点集,原图编号
    """
    for dep in functional_dependencies:
        if len(dep.params) == 1: # TODO: dep.fid depends only on dep.params[0]:
            param, f = dep.params[0].fid, dep.fid
            u, v = f_ind[dep.params[0].fid], f_ind[dep.fid]
            src, dest = NodeId.get(u, None), NodeId.get(v, None)
            if src is None:
                src = NodeId[u] = cur_id
                node = FCI.GraphNode(f"X{u+1}")
                node.add_attribute('id', u)
                G_fd.add(u), adj.append(set()), anc.append(set()), FDNodes.append(node)
                attr_id.append(u)
                cur_id += 1
            if dest is None:
                dest = NodeId[v] = cur_id
                node = FCI.GraphNode(f"X{v+1}")
                node.add_attribute('id', v)
                G_fd.add(v), adj.append(set()), anc.append(set()), FDNodes.append(node)
                attr_id.append(v)
                cur_id += 1
            adj[src].add(dest)
            anc[dest].add(src)
        else:
            # TODO: should be treated the same as bgKnowledge
            pass
    topo = toposort(adj)

    fake_knowledge = BackgroundKnowledge()
    skeleton_knowledge = set()
    for t in topo[::-1]:
        mxvcnt, y = 0, -1
        for a in anc[t]:
            print("a = ", a, attr_id[a])
            vcnt = np.unique(dataset[:, attr_id[a]]).size
            if vcnt > mxvcnt:
                y = a
                mxvcnt = vcnt
        if y == -1: continue
        # S.append((attr_id[t], attr_id[y]))
        # fake_knowledge.add_required_by_node(FDNodes[t], FDNodes[y])
        fake_knowledge.add_required_by_node(FDNodes[y], FDNodes[t])
        skeleton_knowledge.add((attr_id[y], attr_id[t]))
        # remove X and connected edges from G_FD
        G_fd.remove(attr_id[t])
        for a in anc[t]:
            adj[a].remove(t)
    GfdNodes = []
    for i, v in enumerate(G_fd):
        node = FCI.GraphNode(f"X{v + 1}")
        node.add_attribute("id", v)
        GfdNodes.append(node)
    FDgraph, FD_sep_sets = FCI.fas(dataset, GfdNodes, independence_test_method=independence_test_method, alpha=alpha,
                          knowledge=None, depth=depth, verbose=verbose)
github-actions[bot] commented 2 years ago

Closed in fe41774375c795ea0335014cc8577d8e5c372691