Tomorrowdawn / EvolveGCN

Semester project for social computing of USTC.
0 stars 3 forks source link

create subgraph fix #6

Open leen-stt1 opened 2 months ago

leen-stt1 commented 2 months ago
def create_subgraph(self):
        subgraphs = []  # 存储每次会议的子图

        m_copy = self.meeting_bill_member_tensor_voteResult
        #mask = m_copy == -np.inf
        #m_copy[mask] = 0

        # 对每次会议创建子图
        # for meeting_index in range(self.meeting_num):
        for meeting_index in tqdm(range(self.meeting_num), desc="Generating subgraphs"):
            start_time = time.time()

            # 提取每次会议的投票tensor
            votes_slice = deepcopy(m_copy[meeting_index, :, :])
            votes_slice[votes_slice==-np.inf] = 0

            # 计算相似度矩阵(其尺寸应该是 num_members x num_members)
            # similarity_matrix = squareform(pdist(votes_slice.T, lambda u, v: self.similarity_measure(u, v, meeting_index)))

            max_vote_count = self.meeting_bill_tensor_maxVoteCount[meeting_index, :] # 每个议案的最大投票次数
            normalized_max_vote_count = np.where(max_vote_count == 0, 1, max_vote_count) # 防止分母为零
            # normalized_max_vote_count 从(8849,) 重塑为 (8849,1)
            normalized_max_vote_count = normalized_max_vote_count[:, np.newaxis]

            # 这样就可以保持行对行的除法操作,每个议题对每个成员进行归一化,  标准化议案贡献,使得每个议案对于相似度贡献相同 (members x bills)
            normalized_votes = votes_slice / normalized_max_vote_count

            # 利用矩阵乘法计算成员间的相似度 (members x members)
            # np.dot 对二维数组执行矩阵乘法,对于一维数组执行内积
            similarity_matrix = np.dot(normalized_votes, normalized_votes.T)

            # 根据相似度矩阵创建图
            g = dgl.DGLGraph()

            # 使用triu_indices函数获取上三角矩阵中的索引
            src_list, dst_list = np.triu_indices(self.member_num, k=1)  # k=1表示不包括对角线
            src_list = src_list.astype(np.int64)
            dst_list = dst_list.astype(np.int64)
            # print("src_list.shape before: ", src_list.shape)
            # print("dst_list.shape before: ", dst_list.shape)

            # 从这些索引中得到所有的边的权重,并过滤掉无穷大的权重
            edge_weights = similarity_matrix[src_list, dst_list]
            # print("edge_weights.shape: ", edge_weights.shape)
            finite_edges = ~np.isinf(edge_weights)
            # print("finite_edges.shape: ", finite_edges.shape)

            # 只保留有限权重的边
            src_list = src_list[finite_edges]
            dst_list = dst_list[finite_edges]
            edge_weights = edge_weights[finite_edges]
            # print("src_list.shape: ", src_list.shape)
            # print("dst_list.shape: ", dst_list.shape)
            # print("edge_weights.shape: ", edge_weights.shape)

            # 如果有边可以添加,那么转换权重到适当的类型并添加这些边
            if len(src_list) > 0:
                # 将权重从NumPy数组转换为PyTorch张量
                edge_weights_tensor = torch.from_numpy(edge_weights).float()

                # 一次性添加所有的边和它们的权重
                g.add_edges(src_list, dst_list, {'weight': edge_weights_tensor})

            # # 将NumPy数组转换为PyTorch张量
            votes_slice = m_copy[meeting_index, :, :]
            vote_data = torch.from_numpy(votes_slice.T).float()

            # # 输出有多少个节点
            # print("g.num_nodes(): ", g.num_nodes())
            # # 输出有多少个边
            # print("g.num_edges(): ", g.num_edges())
            # # 输出vote_data 的形状
            # print("vote_data.shape: ", vote_data.shape)

            # # 把每个成员的投票数据设置为节点的'data'特征
            g.ndata['vote'] = vote_data

            # 为了把每个成员的投票数据设置为节点的'data'特征, 遍历已有节点并设置'data'特征

            subgraphs.append(g)

        self.subgraph = subgraphs
        pass

Futhermore the generator class should be able to switch to different similarity matrix computing methods.