Open oSallyo opened 4 months ago
我也是只有cpu,然后我用GitHub Copilot询问修改后可以运行,它说在代码中,pynvml
库被用于监控GPU内存使用情况。如果想在使用CPU时修改代码,可以简单地移除与pynvml
相关的代码,因为在CPU模式下,不需要监控GPU内存。除此之外还有一个大问题就是gym的版本修改等,你可以看另一个问题,有大佬发了教程。
目前修改完test-cpu如下:
import copy
import json
import os
import random
import time as time
import gym import pandas as pd import torch import numpy as np
import PPO_model from env.load_data import nums_detec
def setup_seed(seed): torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) np.random.seed(seed) random.seed(seed) torch.backends.cudnn.deterministic = True
def tabulate_rows(rows, headers): if rows: return tabulate.tabulate(rows, headers=headers) else: print("No data to tabulate.") def main():
device = torch.device("cpu")
torch.set_default_dtype(torch.float32)
torch.set_default_device(device)
print("PyTorch device: ", device.type)
torch.set_printoptions(precision=None, threshold=np.inf, edgeitems=None, linewidth=None, profile=None, sci_mode=False)
# Load config and init objects
# ... rest of your code ...
# Load config and init objects
with open("./config.json", 'r') as load_f:
load_dict = json.load(load_f)
env_paras = load_dict["env_paras"]
model_paras = load_dict["model_paras"]
train_paras = load_dict["train_paras"]
test_paras = load_dict["test_paras"]
env_paras["device"] = device
model_paras["device"] = device
env_test_paras = copy.deepcopy(env_paras)
num_ins = test_paras["num_ins"]
if test_paras["sample"]:
env_test_paras["batch_size"] = test_paras["num_sample"]
else:
env_test_paras["batch_size"] = 1
model_paras["actor_in_dim"] = model_paras["out_size_ma"] * 2 + model_paras["out_size_ope"] * 2
model_paras["critic_in_dim"] = model_paras["out_size_ma"] + model_paras["out_size_ope"]
data_path = "./data_test/{0}/".format(test_paras["data_path"])
test_files = os.listdir(data_path)
test_files.sort(key=lambda x: x[:-4])
test_files = test_files[:num_ins]
mod_files = os.listdir('./model/')[:]
memories = PPO_model.Memory()
model = PPO_model.PPO(model_paras, train_paras)
rules = test_paras["rules"]
envs = [] # Store multiple environments
# Detect and add models to "rules"
if "DRL" in rules:
for root, ds, fs in os.walk('./model/'):
for f in fs:
if f.endswith('.pt'):
rules.append(f)
if len(rules) != 1:
if "DRL" in rules:
rules.remove("DRL")
# Generate data files and fill in the header
str_time = time.strftime("%Y%m%d_%H%M%S", time.localtime(time.time()))
save_path = './save/test_{0}'.format(str_time)
os.makedirs(save_path)
writer = pd.ExcelWriter(
'{0}/makespan_{1}.xlsx'.format(save_path, str_time)) # Makespan data storage path
writer_time = pd.ExcelWriter('{0}/time_{1}.xlsx'.format(save_path, str_time)) # time data storage path
file_name = [test_files[i] for i in range(num_ins)]
data_file = pd.DataFrame(file_name, columns=["file_name"])
data_file.to_excel(writer, sheet_name='Sheet1', index=False)
writer.save()
writer.close()
data_file.to_excel(writer_time, sheet_name='Sheet1', index=False)
writer_time.save()
writer_time.close()
# Rule-by-rule (model-by-model) testing
start = time.time()
for i_rules in range(len(rules)):
rule = rules[i_rules]
# Load trained model
if rule.endswith('.pt'):
if device.type == 'cuda':
model_CKPT = torch.load('./model/' + mod_files[i_rules])
else:
model_CKPT = torch.load('./model/' + mod_files[i_rules], map_location='cpu')
print('\nloading checkpoint:', mod_files[i_rules])
model.policy.load_state_dict(model_CKPT)
model.policy_old.load_state_dict(model_CKPT)
print('rule:', rule)
# Schedule instance by instance
step_time_last = time.time()
makespans = []
times = []
for i_ins in range(num_ins):
test_file = data_path + test_files[i_ins]
with open(test_file) as file_object:
line = file_object.readlines()
ins_num_jobs, ins_num_mas, _ = nums_detec(line)
env_test_paras["num_jobs"] = ins_num_jobs
env_test_paras["num_mas"] = ins_num_mas
# Environment object already exists
if len(envs) == num_ins:
env = envs[i_ins]
# Create environment object
else:
# DRL-S, each env contains multiple (=num_sample) copies of one instance
if test_paras["sample"]:
env = gym.make('fjsp-v0', case=[test_file] * test_paras["num_sample"],
env_paras=env_test_paras, data_source='file')
# DRL-G, each env contains one instance
else:
env = gym.make('fjsp-v0', case=[test_file], env_paras=env_test_paras, data_source='file')
envs.append(copy.deepcopy(env))
print("Create env[{0}]".format(i_ins))
# Schedule an instance/environment
# DRL-S
if test_paras["sample"]:
makespan, time_re = schedule(env, model, memories, flag_sample=test_paras["sample"])
makespans.append(torch.min(makespan))
times.append(time_re)
# DRL-G
else:
time_s = []
makespan_s = [] # In fact, the results obtained by DRL-G do not change
for j in range(test_paras["num_average"]):
makespan, time_re = schedule(env, model, memories)
makespan_s.append(makespan)
time_s.append(time_re)
env.reset()
makespans.append(torch.mean(torch.tensor(makespan_s)))
times.append(torch.mean(torch.tensor(time_s)))
print("finish env {0}".format(i_ins))
print("rule_spend_time: ", time.time() - step_time_last)
# Save makespan and time data to files
data = pd.DataFrame(torch.tensor(makespans).t().tolist(), columns=[rule])
data.to_excel(writer, sheet_name='Sheet1', index=False, startcol=i_rules + 1)
writer._save()
writer._close()
data = pd.DataFrame(torch.tensor(times).t().tolist(), columns=[rule])
data.to_excel(writer_time, sheet_name='Sheet1', index=False, startcol=i_rules + 1)
writer_time.save()
writer_time.close()
for env in envs:
env.reset()
print("total_spend_time: ", time.time() - start)
def schedule(env, model, memories, flag_sample=False):
state = env.state
dones = env.done_batch
done = False # Unfinished at the beginning
last_time = time.time()
i = 0
while ~done:
i += 1
with torch.no_grad():
actions = model.policy_old.act(state, memories, dones, flag_sample=flag_sample, flag_train=False)
state, rewards, dones = env.step(actions) # environment transit
done = dones.all()
spend_time = time.time() - last_time # The time taken to solve this environment (instance)
# print("spend_time: ", spend_time)
# Verify the solution
gantt_result = env.validate_gantt()[0]
if not gantt_result:
print("Scheduling Error!!!!!!")
return copy.deepcopy(env.makespan_batch), spend_time
if name == 'main': main()
我也是只有cpu,然后我用GitHub Copilot询问修改后可以运行,它说在代码中,库被用于监控GPU内存使用情况。如果想在使用CPU时修改代码,可以简单地移除与相关的代码,因为在CPU模式下,不需要监控GPU内存。除此之外还有一个大问题就是gym的版本修改等,你可以看另一个问题,有大佬发了教程。 目前修改完test-cpu如下: import copy import json import os import random import time as time
pynvml``pynvml
导入健身房 导入熊猫作为 PD 导入火炬导入 numpy 作为 NP
从env.load_data导入nums_detec导入PPO_model
def setup_seed(种子): torch.manual_seed(种子) torch.cuda.manual_seed_all(种子) np.random.seed(种子) random.seed(种子) torch.backends.cudnn.deterministic = True
def tabulate_rows(rows, headers): if rows: return tabulate.tabulate(rows, headers=headers) else: print(“没有要制表的数据。 def main(): # PyTorch 初始化 device = torch.device(“cpu”) torch.set_default_dtype(torch.float32) torch.set_default_device(device) print(“PyTorch device: ”, device.type) torch.set_printoptions(precision=None, threshold=np.inf, edgeitems=None, linewidth=None, profile=None, sci_mode=False)
# Load config and init objects # ... rest of your code ... # Load config and init objects with open("./config.json", 'r') as load_f: load_dict = json.load(load_f) env_paras = load_dict["env_paras"] model_paras = load_dict["model_paras"] train_paras = load_dict["train_paras"] test_paras = load_dict["test_paras"] env_paras["device"] = device model_paras["device"] = device env_test_paras = copy.deepcopy(env_paras) num_ins = test_paras["num_ins"] if test_paras["sample"]: env_test_paras["batch_size"] = test_paras["num_sample"] else: env_test_paras["batch_size"] = 1 model_paras["actor_in_dim"] = model_paras["out_size_ma"] * 2 + model_paras["out_size_ope"] * 2 model_paras["critic_in_dim"] = model_paras["out_size_ma"] + model_paras["out_size_ope"] data_path = "./data_test/{0}/".format(test_paras["data_path"]) test_files = os.listdir(data_path) test_files.sort(key=lambda x: x[:-4]) test_files = test_files[:num_ins] mod_files = os.listdir('./model/')[:] memories = PPO_model.Memory() model = PPO_model.PPO(model_paras, train_paras) rules = test_paras["rules"] envs = [] # Store multiple environments # Detect and add models to "rules" if "DRL" in rules: for root, ds, fs in os.walk('./model/'): for f in fs: if f.endswith('.pt'): rules.append(f) if len(rules) != 1: if "DRL" in rules: rules.remove("DRL") # Generate data files and fill in the header str_time = time.strftime("%Y%m%d_%H%M%S", time.localtime(time.time())) save_path = './save/test_{0}'.format(str_time) os.makedirs(save_path) writer = pd.ExcelWriter( '{0}/makespan_{1}.xlsx'.format(save_path, str_time)) # Makespan data storage path writer_time = pd.ExcelWriter('{0}/time_{1}.xlsx'.format(save_path, str_time)) # time data storage path file_name = [test_files[i] for i in range(num_ins)] data_file = pd.DataFrame(file_name, columns=["file_name"]) data_file.to_excel(writer, sheet_name='Sheet1', index=False) writer._save() writer.close() data_file.to_excel(writer_time, sheet_name='Sheet1', index=False) writer_time._save() writer_time.close() # Rule-by-rule (model-by-model) testing start = time.time() for i_rules in range(len(rules)): rule = rules[i_rules] # Load trained model if rule.endswith('.pt'): if device.type == 'cuda': model_CKPT = torch.load('./model/' + mod_files[i_rules]) else: model_CKPT = torch.load('./model/' + mod_files[i_rules], map_location='cpu') print('\nloading checkpoint:', mod_files[i_rules]) model.policy.load_state_dict(model_CKPT) model.policy_old.load_state_dict(model_CKPT) print('rule:', rule) # Schedule instance by instance step_time_last = time.time() makespans = [] times = [] for i_ins in range(num_ins): test_file = data_path + test_files[i_ins] with open(test_file) as file_object: line = file_object.readlines() ins_num_jobs, ins_num_mas, _ = nums_detec(line) env_test_paras["num_jobs"] = ins_num_jobs env_test_paras["num_mas"] = ins_num_mas # Environment object already exists if len(envs) == num_ins: env = envs[i_ins] # Create environment object else: # DRL-S, each env contains multiple (=num_sample) copies of one instance if test_paras["sample"]: env = gym.make('fjsp-v0', case=[test_file] * test_paras["num_sample"], env_paras=env_test_paras, data_source='file') # DRL-G, each env contains one instance else: env = gym.make('fjsp-v0', case=[test_file], env_paras=env_test_paras, data_source='file') envs.append(copy.deepcopy(env)) print("Create env[{0}]".format(i_ins)) # Schedule an instance/environment # DRL-S if test_paras["sample"]: makespan, time_re = schedule(env, model, memories, flag_sample=test_paras["sample"]) makespans.append(torch.min(makespan)) times.append(time_re) # DRL-G else: time_s = [] makespan_s = [] # In fact, the results obtained by DRL-G do not change for j in range(test_paras["num_average"]): makespan, time_re = schedule(env, model, memories) makespan_s.append(makespan) time_s.append(time_re) env.reset() makespans.append(torch.mean(torch.tensor(makespan_s))) times.append(torch.mean(torch.tensor(time_s))) print("finish env {0}".format(i_ins)) print("rule_spend_time: ", time.time() - step_time_last) # Save makespan and time data to files data = pd.DataFrame(torch.tensor(makespans).t().tolist(), columns=[rule]) data.to_excel(writer, sheet_name='Sheet1', index=False, startcol=i_rules + 1) writer._save() writer._close() data = pd.DataFrame(torch.tensor(times).t().tolist(), columns=[rule]) data.to_excel(writer_time, sheet_name='Sheet1', index=False, startcol=i_rules + 1) writer_time.save() writer_time.close() for env in envs: env.reset() print("total_spend_time: ", time.time() - start)
def schedule(env, model, memories, flag_sample=False): # Get state and completion signal state = env.state dones = env.done_batch done = False # Unfinished at the beginning last_time = time.time() i = 0 while ~done: i += 1 with torch.no_grad(): actions = model.policy_old.act(state, memories, dones, flag_sample=flag_sample, flag_train=False) state, rewards, dones = env.step(actions) # environment transit done = dones.all() spend_time = time.time() - last_time # The time taken to solve this environment (instance) # print("spend_time: ", spend_time)
# Verify the solution gantt_result = env.validate_gantt()[0] if not gantt_result: print("Scheduling Error!!!!!!") return copy.deepcopy(env.makespan_batch), spend_time
if name == 'main': main()
能运行创建环境,
但rule_spend_time之后遇到了这样的问题:ValueError: index must be as long as the number of data rows: len(index)=7 len(rows)=0
求助大佬,哭泣
没有gpu,不能用pynvml包,请问test.py代码应该如何修改