Open francolq opened 5 years ago
I have fixed it like this, seems to work correctly:
couples = []
labels = []
if seed is None:
seed = random.randint(0, 10e6)
random.seed(seed)
for i, wi in enumerate(sequence):
if not wi:
continue
if sampling_table is not None:
if sampling_table[wi] < random.random():
continue
window_start = max(0, i - window_size)
window_end = min(len(sequence), i + window_size + 1)
for j in range(window_start, window_end):
if j != i:
wj = sequence[j]
if not wj:
continue
couples.append([wi, wj])
if categorical:
labels.append([0, 1])
else:
labels.append(1)
if negative_samples > 0:
num_negative_samples = int(len(labels) * negative_samples)
words = [c[0] for c in couples]
random.seed(seed)
random.shuffle(words)
couples += [[words[i % len(words)],
random.randint(1, vocabulary_size - 1)]
for i in range(num_negative_samples)]
if categorical:
labels += [[1, 0]] * num_negative_samples
else:
labels += [0] * num_negative_samples
if shuffle:
random.seed(seed)
random.shuffle(couples)
random.seed(seed)
random.shuffle(labels)
return couples, labels
Random seed should be fixed before this call to random.random():
https://github.com/keras-team/keras-preprocessing/blob/6f679b06d10d39edcb066142eec9e3bcd6d9de4b/keras_preprocessing/sequence.py#L199
Sorry for ignoring the guidelines.