Accessing aligned pairs of trained model

nju-websoft / OpenEA

A Benchmarking Study of Embedding-based Entity Alignment for Knowledge Graphs, VLDB 2020

GNU General Public License v3.0

517 stars 80 forks source link

I am following the entity alignment tutorial tutorial/entity_alignment/main.py and would like to store all closest alignments for each entity in model.kgs.kg1.entities_set.

Question: How can I access them?

Currently, I am using the following repurposed code (function added to tutorial/entity_alignment/mtranse.py):

def get_matches(self, remove_dangling=False):

        candidate_list_kg2 = self.kgs.kg2.entities_set
        if remove_dangling:
            candidate_list_kg2 = (
                candidate_list_kg2
                - set([x for x, _ in self.kgs.train_unlinked_entities2])
                - set([x for x, _ in self.kgs.valid_unlinked_entities2])
            )

        candidate_list_kg2 = list(candidate_list_kg2)

        embeds1 = tf.nn.embedding_lookup(self.ent_embeds, list(self.kgs.kg1.entities_set)).eval(session=self.session)
        embeds2 = tf.nn.embedding_lookup(self.ent_embeds, candidate_list_kg2).eval(session=self.session)
        mapping = self.mapping_mat.eval(session=self.session) if self.mapping_mat is not None else None

        alignment_rest_12, _, _, sim_list = test(
            embeds1, embeds2, mapping, self.args.top_k, self.args.test_threads_num,
            metric=self.args.eval_metric, normalize=self.args.eval_norm, csls_k=0, accurate=True
        )

        return alignment_rest_12

But the problem is that the tuple in alignment_rest_12 cannot be mapped to the entity URIs. The integer values seem not to be referring to entity IDs since

assert all([i[0] in model.kgs.kg1.entities_id_dict.values() for i in alignment_rest_12])

fails.

def get_matches(self, remove_dangling: bool = False) -> list: candidate_list_kg2 = self.kgs.kg2.entities_set if remove_dangling: candidate_list_kg2 = ( candidate_list_kg2 - set([x for x, _ in self.kgs.train_unlinked_entities2]) - set([x for x, _ in self.kgs.valid_unlinked_entities2]) ) candidate_list_kg1 = sorted(list(self.kgs.kg1.entities_set)) candidate_list_kg2 = sorted(list(candidate_list_kg2)) embeds1 = tf.nn.embedding_lookup(self.ent_embeds, candidate_list_kg1).eval(session=self.session) embeds2 = tf.nn.embedding_lookup(self.ent_embeds, candidate_list_kg2).eval(session=self.session) mapping = self.mapping_mat.eval(session=self.session) if self.mapping_mat is not None else None alignment_rest_12, _, _, sim_list = test( embeds1, embeds2, mapping, self.args.top_k, self.args.test_threads_num, metric=self.args.eval_metric, normalize=self.args.eval_norm, csls_k=0, accurate=True ) candidates_kg1 = np.array(candidate_list_kg1) candidates_kg2 = np.array(candidate_list_kg2) matches = np.array(list(alignment_rest_12)) # Map from position (index) to ID matches_reindexed = np.dstack(( candidates_kg1[matches[:, 0]], candidates_kg2[matches[:, 1]] )).squeeze() # Map from ID to name mapping_kg1_int_name = {v: k for k, v in self.kgs.kg1.entities_id_dict.items()} mapping_kg2_int_name = {v: k for k, v in self.kgs.kg2.entities_id_dict.items()} matches_names = np.dstack(( [mapping_kg1_int_name.get(i) for i in matches_reindexed[:, 0]], [mapping_kg2_int_name.get(i) for i in matches_reindexed[:, 1]] )).squeeze().tolist() return matches_names

nju-websoft / OpenEA

Accessing aligned pairs of trained model #48