Open snash4 opened 5 years ago
Hi @snash4, you can try adapting the snippet below to your uses:
import types
from functools import partial
import tensorflow as tf
from sklearn.utils import check_random_state
from tqdm import tqdm
from ampligraph.datasets import AmpligraphDatasetAdapter, NumpyDatasetAdapter
from ampligraph.evaluation import generate_corruptions_for_fit, to_idx, generate_corruptions_for_eval, \
hits_at_n_score, mrr_score
def generate_corruptions(self, X_pos, batches_count, epochs):
try:
tf.reset_default_graph()
self.rnd = check_random_state(self.seed)
tf.random.set_random_seed(self.seed)
self._load_model_from_trained_params()
dataset_handle = NumpyDatasetAdapter()
dataset_handle.use_mappings(self.rel_to_idx, self.ent_to_idx)
dataset_handle.set_data(X_pos, "pos")
batch_size_pos = int(np.ceil(dataset_handle.get_size("pos") / batches_count))
gen_fn = partial(dataset_handle.get_next_train_batch, batch_size=batch_size_pos, dataset_type="pos")
dataset = tf.data.Dataset.from_generator(gen_fn,
output_types=tf.int32,
output_shapes=(None, 3))
dataset = dataset.repeat().prefetch(1)
dataset_iter = tf.data.make_one_shot_iterator(dataset)
x_pos_tf = dataset_iter.get_next()
e_s, e_p, e_o = self._lookup_embeddings(x_pos_tf)
scores_pos = self._fn(e_s, e_p, e_o)
x_neg_tf = generate_corruptions_for_fit(x_pos_tf,
entities_list=None,
eta=1,
corrupt_side='s+o',
entities_size=0,
rnd=self.seed)
e_s_neg, e_p_neg, e_o_neg = self._lookup_embeddings(x_neg_tf)
scores = self._fn(e_s_neg, e_p_neg, e_o_neg)
epoch_iterator_with_progress = tqdm(range(1, epochs + 1), disable=(not self.verbose), unit='epoch')
scores_list = []
with tf.Session(config=self.tf_config) as sess:
sess.run(tf.global_variables_initializer())
for _ in epoch_iterator_with_progress:
losses = []
for batch in range(batches_count):
scores_list.append(sess.run(scores))
dataset_handle.cleanup()
return np.concatenate(scores_list)
except Exception as e:
dataset_handle.cleanup()
raise e
model.generate_corruptions = types.MethodType(generate_corruptions, model)
So, what I am doing above is adding a new method to some model object (e.g. a TransE model) that scores newly generated corruptions.
In your case, you don't need the model scores themselves, so you can delete all lines related to the embeddings and to the score. Now, you only need to actually change the output of the function, fromsess.run(scores)
to something like sess.run(x_neg_tf)
. You can control the memory utilisation by changing the batches_count argument.
If you face any problems, please do say so. If you need interactive help, visit us at our Slack channel.
Hi tabacof, Thanks a lot for the code. It worked after a few modifications. While generating the negative triplets, I noticed two things:
One more thing, I couldn't understand is that why the negative samples are generated per batch?
Thanks.
@snash4 can I go ahead and close this?
Hi, I am trying to generate negative samples using _generate_corruptions_foreval, since it returns a tensor, and when I try to convert it to bumpy using tensor.eval(), it throws exception of out of memory. I tried it on even small sample, but it has same exception.
Please suggest the solution. Thanks.