Open osuossu8 opened 1 year ago
https://www.kaggle.com/competitions/commonlitreadabilityprize/discussion/257844
def train_one_epoch(model, optimizer, scheduler, dataloader, valid_loader, device, epoch, best_score, valid_labels):
model.train()
for step, data in enumerate(dataloader):
if (step > 0) & (step % CFG.eval_freq == 0) :
valid_epoch_loss, pred = valid_one_epoch(model, valid_loader, device, epoch)
model.train()
return epoch_loss, valid_epoch_loss, pred, best_score
pytorch
for i, d in enumerate(train_loader):
optimizer.zero_grad()
output, _ = model(input_ids, mask)
loss = criterion(output, target)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
scheduler.step()
tensorflow
keyword arguments. Allowed to be {clipnorm, clipvalue, lr, decay, exclude_from_weight_decay}. clipnorm is clip gradients by norm; clipvalue is clip gradients by value. decay is included for backward compatibility to allow time inverse decay of learning rate. lr is included for backward compatibility, recommended to use learning_rate instead. exclude_from_weight_decay accepts list of regex patterns of variables excluded from weight decay.
optimizer = tfa.optimizers.AdamW(
lr=Config.lr
, weight_decay=Config.weight_decay
, clipnorm =1.0
)
pytorch
self.bert = transformers.BertModel.from_pretrained(BERT_MODEL,
hidden_dropout_prob=0,
attention_probs_dropout_prob=0)
tensorflow
from transformers import TFAutoModel, AutoConfig
cfg = AutoConfig.from_pretrained('roberta-base')
cfg.hidden_dropout_prob = 0
cfg.attention_probs_dropout_prob = 0
roberta = TFAutoModel.from_pretrained('roberta-base',config=cfg)
pytorch
def freeze(module):
"""
Freezes module's parameters.
"""
for parameter in module.parameters():
parameter.requires_grad = False
self.model = AutoModel.from_pretrained(model_name, config=self.config)
# Freeze
if self.cfg.freezing:
freeze(self.model.embeddings)
freeze(self.model.encoder.layer[:2])
tensorflow https://note.nkmk.me/python-tensorflow-keras-trainable-freeze-unfreeze/
transformer = TFAutoModel.from_pretrained(Config.model, config=cfg)
# freeze
transformer.roberta.embeddings.trainable = False
for l in transformer.roberta.encoder.layer[:2]:
l.trainable = False
re-initialize layers
https://www.kaggle.com/competitions/commonlitreadabilityprize/discussion/257302