Open leen-stt1 opened 2 months ago
def create_subgraph(self): subgraphs = [] # 存储每次会议的子图 m_copy = self.meeting_bill_member_tensor_voteResult #mask = m_copy == -np.inf #m_copy[mask] = 0 # 对每次会议创建子图 # for meeting_index in range(self.meeting_num): for meeting_index in tqdm(range(self.meeting_num), desc="Generating subgraphs"): start_time = time.time() # 提取每次会议的投票tensor votes_slice = deepcopy(m_copy[meeting_index, :, :]) votes_slice[votes_slice==-np.inf] = 0 # 计算相似度矩阵(其尺寸应该是 num_members x num_members) # similarity_matrix = squareform(pdist(votes_slice.T, lambda u, v: self.similarity_measure(u, v, meeting_index))) max_vote_count = self.meeting_bill_tensor_maxVoteCount[meeting_index, :] # 每个议案的最大投票次数 normalized_max_vote_count = np.where(max_vote_count == 0, 1, max_vote_count) # 防止分母为零 # normalized_max_vote_count 从(8849,) 重塑为 (8849,1) normalized_max_vote_count = normalized_max_vote_count[:, np.newaxis] # 这样就可以保持行对行的除法操作,每个议题对每个成员进行归一化, 标准化议案贡献,使得每个议案对于相似度贡献相同 (members x bills) normalized_votes = votes_slice / normalized_max_vote_count # 利用矩阵乘法计算成员间的相似度 (members x members) # np.dot 对二维数组执行矩阵乘法,对于一维数组执行内积 similarity_matrix = np.dot(normalized_votes, normalized_votes.T) # 根据相似度矩阵创建图 g = dgl.DGLGraph() # 使用triu_indices函数获取上三角矩阵中的索引 src_list, dst_list = np.triu_indices(self.member_num, k=1) # k=1表示不包括对角线 src_list = src_list.astype(np.int64) dst_list = dst_list.astype(np.int64) # print("src_list.shape before: ", src_list.shape) # print("dst_list.shape before: ", dst_list.shape) # 从这些索引中得到所有的边的权重,并过滤掉无穷大的权重 edge_weights = similarity_matrix[src_list, dst_list] # print("edge_weights.shape: ", edge_weights.shape) finite_edges = ~np.isinf(edge_weights) # print("finite_edges.shape: ", finite_edges.shape) # 只保留有限权重的边 src_list = src_list[finite_edges] dst_list = dst_list[finite_edges] edge_weights = edge_weights[finite_edges] # print("src_list.shape: ", src_list.shape) # print("dst_list.shape: ", dst_list.shape) # print("edge_weights.shape: ", edge_weights.shape) # 如果有边可以添加,那么转换权重到适当的类型并添加这些边 if len(src_list) > 0: # 将权重从NumPy数组转换为PyTorch张量 edge_weights_tensor = torch.from_numpy(edge_weights).float() # 一次性添加所有的边和它们的权重 g.add_edges(src_list, dst_list, {'weight': edge_weights_tensor}) # # 将NumPy数组转换为PyTorch张量 votes_slice = m_copy[meeting_index, :, :] vote_data = torch.from_numpy(votes_slice.T).float() # # 输出有多少个节点 # print("g.num_nodes(): ", g.num_nodes()) # # 输出有多少个边 # print("g.num_edges(): ", g.num_edges()) # # 输出vote_data 的形状 # print("vote_data.shape: ", vote_data.shape) # # 把每个成员的投票数据设置为节点的'data'特征 g.ndata['vote'] = vote_data # 为了把每个成员的投票数据设置为节点的'data'特征, 遍历已有节点并设置'data'特征 subgraphs.append(g) self.subgraph = subgraphs pass
Futhermore the generator class should be able to switch to different similarity matrix computing methods.
Futhermore the generator class should be able to switch to different similarity matrix computing methods.