Closed ahmedfgad closed 1 year ago
The issue is solved by copying the model before making predictions. https://stackoverflow.com/a/75606666/5426539
import torch
import numpy
import concurrent.futures
import copy
numpy.random.seed(1)
def create_rand_weights(model, num_models):
random_model_weights = []
for model_idx in range(num_models):
weights_dict = model.state_dict()
for key in weights_dict:
w_matrix = weights_dict[key].cpu().detach().numpy()
layer_weights_shape = w_matrix.shape
if len(layer_weights_shape) > 1:
layer_weights = numpy.random.rand(layer_weights_shape[0], layer_weights_shape[1])
else:
layer_weights = numpy.random.rand(layer_weights_shape[0])
weights_dict[key] = torch.from_numpy(layer_weights)
random_model_weights.append(weights_dict)
return random_model_weights
def model_error(model_weights):
global data_inputs, data_outputs, model
_model = copy.deepcopy(model)
_model.load_state_dict(model_weights)
predictions = _model(data_inputs)
abs_error = loss_function(predictions, data_outputs).detach().numpy() + 0.00000001
return abs_error
input_layer = torch.nn.Linear(3, 2)
relu_layer = torch.nn.ReLU()
output_layer = torch.nn.Linear(2, 1)
model = torch.nn.Sequential(input_layer,
relu_layer,
output_layer)
loss_function = torch.nn.L1Loss()
data_inputs = torch.tensor([[0.02, 0.1, 0.15],
[0.7, 0.6, 0.8],
[1.5, 1.2, 1.7],
[3.2, 2.9, 3.1]])
data_outputs = torch.tensor([[0.1],
[0.6],
[1.3],
[2.5]])
num_models = 10
random_model_weights = create_rand_weights(model, num_models)
ExecutorClass = concurrent.futures.ThreadPoolExecutor
thread_output = []
with ExecutorClass(max_workers=2) as executor:
output = executor.map(model_error, random_model_weights)
for out in output:
thread_output.append(out)
thread_output=numpy.array(thread_output)
print("Wrong Outputs using Threads")
print(thread_output)
print("\n\n")
correct_output = []
for idx in range(num_models):
error = model_error(random_model_weights[idx])
correct_output.append(error)
correct_output=numpy.array(correct_output)
print("Correct Outputs without Threads")
print(correct_output)
print(correct_output - thread_output)
PyTorch gives wrong results when used with parallel processing. This is similar to this issue: https://github.com/ahmedfgad/GeneticAlgorithmPython/issues/145