Open rogeriochaves opened 2 years ago
for now I managed to work around it by redefining start_multi_process_pool and quantizing after the thread spawn:
def start_multi_process_pool(self, target_devices):
"""
Starts multi process to process the encoding with several, independent processes.
This method is recommended if you want to encode on multiple GPUs. It is advised
to start only one process per GPU. This method works together with encode_multi_process
:param target_devices: PyTorch target devices, e.g. cuda:0, cuda:1... If None, all available CUDA devices will be used
:return: Returns a dict with the target processes, an input queue and and output queue.
"""
if target_devices is None:
if torch.cuda.is_available():
target_devices = ['cuda:{}'.format(i) for i in range(torch.cuda.device_count())]
else:
target_devices = ['cpu']*4
ctx = mp.get_context('spawn')
input_queue = ctx.Queue()
output_queue = ctx.Queue()
processes = []
for cuda_id in target_devices:
p = ctx.Process(target=_encode_multi_process_worker, args=(cuda_id, self, input_queue, output_queue), daemon=True)
p.start()
processes.append(p)
return {'input': input_queue, 'output': output_queue, 'processes': processes}
def _encode_multi_process_worker(target_device: str, model, input_queue, results_queue):
"""
Internal working process to encode sentences in multi-process setup
"""
+ model = quantize_dynamic(model)
while True:
try:
id, batch_size, sentences = input_queue.get()
embeddings = model.encode(sentences, device=target_device, show_progress_bar=False, convert_to_numpy=True, batch_size=batch_size)
results_queue.put([id, embeddings])
except queue.Empty:
break
Hello, I'm trying to have a quantized model running in multi-process, this is my model:
however I get this error:
what can I do? thanks in advance!