Open jaekyoungkim opened 1 year ago
I am trying to change your HBM model to multilabel-HBM. but it is not that easy. Do you have any idea or challenge to try multilabel-HBM?
Here is my code.
erase this part "if gradient_clipping > 0.0: nn.utils.clip_gradnorm(model.parameters(), gradient_clipping)"
for e in range(no_epochs): # tqdm(range(no_epochs)) / start new epoch print('\n epoch ',e) cost = 0.0 for i, data in enumerate(trainloader): # enumerate(tqdm(trainloader)) model.train(True) # train mode opt.zero_grad() # weights reset inputs, labels = data # data setting if inputs.size(1) > config.seq_length: inputs = inputs[:, :config.seq_length, :] if torch.cuda.is_available(): # set to cuda inputs, labels = Variable(inputs.cuda(args['cuda_num'])), labels.cuda(args['cuda_num']) out = model(inputs) # inputs to model # criterion = nn.BCELoss() # for single label prediction # sm = nn.Softmax(dim=1) # pred_prob = sm(pred_prob) criterion = nn.BCEWithLogitsLoss() # nn.BCEWithLogitsLoss(true label, predicted output) loss_output = criterion(out[0], labels.type(torch.float)) cost += loss_output cost = cost / len(trainloader) print('train loss output :', loss_output) print("train cost : ", cost) # print('epoch ',e,'step ',i,'loss:',loss_output.item(),'num of postives', labels.sum()) train_loss_tol = float(loss_output.cpu()) # for calculating in cpu loss_output.backward() # back-propagation #if gradient_clipping > 0.0: # nn.utils.clip_grad_norm_(model.parameters(), gradient_clipping) opt.step() # weight update # del inputs, labels, out, loss_output torch.cuda.empty_cache() # erase cache in GPU for memory issue losses.append(train_loss_tol)
I think you should change your loss to cross entropy loss instead of using binary cross entropy loss: line: criterion = nn.BCEWithLogitsLoss() -> criterion = nn.CrossEntropyLoss()
I am trying to change your HBM model to multilabel-HBM. but it is not that easy. Do you have any idea or challenge to try multilabel-HBM?
Here is my code.
erase this part "if gradient_clipping > 0.0: nn.utils.clip_gradnorm(model.parameters(), gradient_clipping)"