"""
serializes the tensors (copies GPU tensors to CPU), and
persists the snapshot to disk.
Fsync the file after writing to gaurantee persistence
Call this in a new process to perform in bgk
"""
def _serialize_and_persist(
self,
filepath,
snapshot,
active_snapshot,
lock,
linkpath=None,
iter_chk = None,
epoch_chk = None,
overwrite = True):
print("[{}] START ASYNC".format(time.time()))
with lock:
if active_snapshot.value == 0:
self.logger.error("Cannot persist. Empty snapshot")
return
#Create new stream
s = torch.cuda.Stream()
torch.cuda.stream(s)
#print("Saving : {}".format(filepath))
torch.save(snapshot, filepath)
#print("Saved : {}".format(filepath))
# Clear the snapshot.
with lock:
active_snapshot.value = 0
# Ensure its persisted
f = open(filepath, 'a+')
os.fsync(f.fileno())
f.close()
update_stats(
filepath,
iter_chk=iter_chk,
overwrite=overwrite,
epoch_chk = epoch_chk,
linkpath=linkpath)
print("[{}] END ASYNC".format(time.time()))
**这一段代码中
Create new stream
s = torch.cuda.Stream()
torch.cuda.stream(s)
torch.save(snapshot, filepath)
我实际测试发现并没有实现计算和save的重叠,CheckFreq似乎也没有调用整个函数,请教一下这个函数究竟是否可行呢**
""" serializes the tensors (copies GPU tensors to CPU), and persists the snapshot to disk.
**这一段代码中
Create new stream
s = torch.cuda.Stream() torch.cuda.stream(s) torch.save(snapshot, filepath) 我实际测试发现并没有实现计算和save的重叠,CheckFreq似乎也没有调用整个函数,请教一下这个函数究竟是否可行呢**