Hello! Thank you for your work. It's a great contribution.
I’m training the model based on what you got on train.py. I’m doing it on a small subset of the CLWD dataset, because I’m trying to overfit the model (it’s just a check of consistency, I want to see that the model learns).
If I use a small sample, 10 examples (so 10 images on each of the dedicated folder: Watermarked_images, Watermark_free_images, etc) and run the training loop I get an error when the batching occurs. It’s an OpenCV error which seems to be related to the train_loader generator.
If I use 200 examples (as I'm using in #31) , I don’t get the problem.
class DictArgs(object):
def init(self, d):
for k, v in d.items():
setattr(self, k, v)
ARGS = DictArgs(ARGS)
In `media/10CLWD-train-equal-test` I got a copy of the CLWD folder structure with just 10 examples, also with the test and train folders being equal. I'm not interested in testing performance on unseen data at this point, the test folder is there just so the code in the repository doesn't break.
- TRAINING LOOP
from future import print_function, absolute_import
import argparse
import torch,time,os
torch.backends.cudnn.benchmark = True
from src.utils.misc import save_checkpoint, adjust_learning_rate
import src.models as models
import datasets as datasets
from options import Options
import numpy as np
model = models.dict[args.models](datasets=data_loaders, args=args)
model = models.__dict__[MODEL_NAME](datasets=data_loaders, args=ARGS)
print('============================ Initization Finish && Training Start =============================================')
print(f"It will start in epoch: {model.args.start_epoch}\nIt will end in epoch: {model.args.epochs}")
for epoch in range(model.args.start_epoch, model.args.epochs):
lr = adjust_learning_rate(data_loaders, model, epoch, lr, args)
print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr))
model.record('lr',lr, epoch)
model.train(epoch)
# save model
save_epochs = {1,10,20,50,100,200,300,400}
if epoch in save_epochs:
model.validate(epoch)
model.flush()
print(f"Saving checkpoint of epoch {epoch}")
model.save_checkpoint(filename=f"checkpoint{epoch}.pth.tar")
# model.validate(epoch)
if args.freq < 0:
model.validate(epoch)
model.flush()
model.save_checkpoint()
return model
final_model = train(ARGS)
I get the following output with the error as soon as training starts
==> creating model
==> creating model [Finish]
==> Total params: 21.39M
==> Total devices: 1
==> Current Checkpoint: checkpoint/slbr_v1
============================ Initization Finish && Training Start =============================================
It will start in epoch: 0
It will end in epoch: 21
/alloc/data/fury_watermark-remover-fda/discovery/SLBR-Visible-Watermark-Removal-master/src/models/SLBR.py in train(self, epoch)
104 end = time.time()
105 bar = Bar('Processing {} '.format(self.args.nets), max=len(self.train_loader))
--> 106 for i, batches in enumerate(self.train_loader):
107 current_index = len(self.train_loader) * epoch + i
108
/usr/local/lib/python3.7/site-packages/torch/utils/data/dataloader.py in next(self)
433 if self._sampler_iter is None:
434 self._reset()
--> 435 data = self._next_data()
436 self._num_yielded += 1
437 if self._dataset_kind == _DatasetKind.Iterable and \
/usr/local/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
1083 else:
1084 del self._task_info[idx]
-> 1085 return self._process_data(data)
1086
1087 def _try_put_index(self):
/usr/local/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _process_data(self, data)
1109 self._try_put_index()
1110 if isinstance(data, ExceptionWrapper):
-> 1111 data.reraise()
1112 return data
1113
/usr/local/lib/python3.7/site-packages/torch/_utils.py in reraise(self)
426 # have message field
427 raise self.exc_type(message=msg)
--> 428 raise self.exc_type(msg)
429
430
error: Caught error in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 198, in _worker_loop
data = fetcher.fetch(index)
File "/usr/local/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/usr/local/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/alloc/data/fury_watermark-remover-fda/discovery/SLBR-Visible-Watermark-Removal-master/datasets/clwd_dataset.py", line 82, in getitem
sample = self.get_sample(index)
File "/alloc/data/fury_watermark-remover-fda/discovery/SLBR-Visible-Watermark-Removal-master/datasets/clwd_dataset.py", line 63, in get_sample
img_J = cv2.cvtColor(img_J, cv2.COLOR_BGR2RGB)
cv2.error: OpenCV(4.2.0) /io/opencv/modules/imgproc/src/color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'
I've narrowed it down to a problem on the batching, as the stack trace suggests. I tried a minimal example just initialising the `train_loader` and iterating over the batches. The same happens. I print the path being processed.
error Traceback (most recent call last)
/tmp/ipykernel_32332/1400065821.py in
3 num_workers=args.workers, pin_memory=True)
4
----> 5 for i, batch in enumerate(train_loader):
6 print(i,batch["img_path"],"\n")
/usr/local/lib/python3.7/site-packages/torch/utils/data/dataloader.py in next(self)
433 if self._sampler_iter is None:
434 self._reset()
--> 435 data = self._next_data()
436 self._num_yielded += 1
437 if self._dataset_kind == _DatasetKind.Iterable and \
/usr/local/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
1083 else:
1084 del self._task_info[idx]
-> 1085 return self._process_data(data)
1086
1087 def _try_put_index(self):
/usr/local/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _process_data(self, data)
1109 self._try_put_index()
1110 if isinstance(data, ExceptionWrapper):
-> 1111 data.reraise()
1112 return data
1113
/usr/local/lib/python3.7/site-packages/torch/_utils.py in reraise(self)
426 # have message field
427 raise self.exc_type(message=msg)
--> 428 raise self.exc_type(msg)
429
430
error: Caught error in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 198, in _worker_loop
data = fetcher.fetch(index)
File "/usr/local/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/usr/local/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/alloc/data/fury_watermark-remover-fda/discovery/SLBR-Visible-Watermark-Removal-master/datasets/clwd_dataset.py", line 82, in getitem
sample = self.get_sample(index)
File "/alloc/data/fury_watermark-remover-fda/discovery/SLBR-Visible-Watermark-Removal-master/datasets/clwd_dataset.py", line 63, in get_sample
img_J = cv2.cvtColor(img_J, cv2.COLOR_BGR2RGB)
cv2.error: OpenCV(4.2.0) /io/opencv/modules/imgproc/src/color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'
Some images do get processed, but it fails with the last batch. The images missing on this example (`7.jpg` and `9.jpg`) are on the folder, double checked. Have you run into anything similar?
Thanks for the support
Hello! Thank you for your work. It's a great contribution.
I’m training the model based on what you got on train.py. I’m doing it on a small subset of the CLWD dataset, because I’m trying to overfit the model (it’s just a check of consistency, I want to see that the model learns).
If I use a small sample, 10 examples (so 10 images on each of the dedicated folder: Watermarked_images, Watermark_free_images, etc) and run the training loop I get an error when the batching occurs. It’s an OpenCV error which seems to be related to the train_loader generator.
If I use 200 examples (as I'm using in #31) , I don’t get the problem.
CODE
ARGS = dict(checkpoint=CHECKPOINT_DIR, crop_size=CROP_SIZE, dataset='clwd', dataset_dir=DATASET_DIR, debug=False, epochs=21, evaluate=False,
freq=-1, lr=0.01, schedule=[100,200,300], gamma=0.1,
sigma_decay=0,
resume=RESUME_DIR, start_epoch=0, nets='slbr', test_dir=TEST_DIR, train_batch=2, dlr=0.001, data='', data_augumentation=False, finetune='', flip=False, alpha=0.5, beta1=0.9, beta2=0.999, bg_mode='res_mask', gan_norm=False, gpu=True, gpu_id='0', hl=False, input_size=256, k_center=2, k_refine=3, k_skip_stage=3, lambda_content=0, lambda_iou=0, lambda_l1=4, lambda_mask=1, lambda_primary=0.01, lambda_style=0, loss_type='l2', mask_mode='res', masked=False, momentum=0, name='slbr_v1', no_flip=True, normalized_input=False, preprocess='resize', project_mode='simple', requires_grad=False, res=False, sim_metric='cos', sltype='vggx', test_batch=1,
use_refine=True, weight_decay=0, workers=1)
class DictArgs(object): def init(self, d): for k, v in d.items(): setattr(self, k, v)
ARGS = DictArgs(ARGS)
from future import print_function, absolute_import
import argparse import torch,time,os
torch.backends.cudnn.benchmark = True
from src.utils.misc import save_checkpoint, adjust_learning_rate import src.models as models
import datasets as datasets from options import Options import numpy as np
def train(args): args.seed = 1 np.random.seed(args.seed) torch.manual_seed(args.seed) MODEL_NAME = "SLBR"
model = models.dict[args.models](datasets=data_loaders, args=args)
if args.freq < 0:
model.validate(epoch)
model.flush()
model.save_checkpoint()
final_model = train(ARGS)
==> creating model ==> creating model [Finish] ==> Total params: 21.39M ==> Total devices: 1 ==> Current Checkpoint: checkpoint/slbr_v1 ============================ Initization Finish && Training Start ============================================= It will start in epoch: 0 It will end in epoch: 21
Epoch: 1 | LR: 0.01000000
error Traceback (most recent call last) /tmp/ipykernel_32332/965729503.py in
----> 1 final_model = train(ARGS)
/tmp/ipykernel_32332/1760790972.py in train(args) 46 47 model.record('lr',lr, epoch) ---> 48 model.train(epoch) 49 50
/alloc/data/fury_watermark-remover-fda/discovery/SLBR-Visible-Watermark-Removal-master/src/models/SLBR.py in train(self, epoch) 104 end = time.time() 105 bar = Bar('Processing {} '.format(self.args.nets), max=len(self.train_loader)) --> 106 for i, batches in enumerate(self.train_loader): 107 current_index = len(self.train_loader) * epoch + i 108
/usr/local/lib/python3.7/site-packages/torch/utils/data/dataloader.py in next(self) 433 if self._sampler_iter is None: 434 self._reset() --> 435 data = self._next_data() 436 self._num_yielded += 1 437 if self._dataset_kind == _DatasetKind.Iterable and \
/usr/local/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self) 1083 else: 1084 del self._task_info[idx] -> 1085 return self._process_data(data) 1086 1087 def _try_put_index(self):
/usr/local/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _process_data(self, data) 1109 self._try_put_index() 1110 if isinstance(data, ExceptionWrapper): -> 1111 data.reraise() 1112 return data 1113
/usr/local/lib/python3.7/site-packages/torch/_utils.py in reraise(self) 426 # have message field 427 raise self.exc_type(message=msg) --> 428 raise self.exc_type(msg) 429 430
error: Caught error in DataLoader worker process 0. Original Traceback (most recent call last): File "/usr/local/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 198, in _worker_loop data = fetcher.fetch(index) File "/usr/local/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch data = [self.dataset[idx] for idx in possibly_batched_index] File "/usr/local/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/alloc/data/fury_watermark-remover-fda/discovery/SLBR-Visible-Watermark-Removal-master/datasets/clwd_dataset.py", line 82, in getitem
sample = self.get_sample(index)
File "/alloc/data/fury_watermark-remover-fda/discovery/SLBR-Visible-Watermark-Removal-master/datasets/clwd_dataset.py", line 63, in get_sample
img_J = cv2.cvtColor(img_J, cv2.COLOR_BGR2RGB)
cv2.error: OpenCV(4.2.0) /io/opencv/modules/imgproc/src/color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'
args = ARGS train_loader = torch.utils.data.DataLoader(datasets.CLWDDataset("train", args),batch_size=args.train_batch, shuffle=True, num_workers=args.workers, pin_memory=True)
for i, batch in enumerate(train_loader): print(i,batch["img_path"],"\n")
0 ['media/10CLWD-train-equal-test/train/Watermarked_image/8.jpg', 'media/10CLWD-train-equal-test/train/Watermarked_image/10.jpg']
1 ['media/10CLWD-train-equal-test/train/Watermarked_image/5.jpg', 'media/10CLWD-train-equal-test/train/Watermarked_image/3.jpg']
2 ['media/10CLWD-train-equal-test/train/Watermarked_image/1.jpg', 'media/10CLWD-train-equal-test/train/Watermarked_image/6.jpg']
3 ['media/10CLWD-train-equal-test/train/Watermarked_image/4.jpg', 'media/10CLWD-train-equal-test/train/Watermarked_image/2.jpg']
error Traceback (most recent call last) /tmp/ipykernel_32332/1400065821.py in
3 num_workers=args.workers, pin_memory=True)
4
----> 5 for i, batch in enumerate(train_loader):
6 print(i,batch["img_path"],"\n")
/usr/local/lib/python3.7/site-packages/torch/utils/data/dataloader.py in next(self) 433 if self._sampler_iter is None: 434 self._reset() --> 435 data = self._next_data() 436 self._num_yielded += 1 437 if self._dataset_kind == _DatasetKind.Iterable and \
/usr/local/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self) 1083 else: 1084 del self._task_info[idx] -> 1085 return self._process_data(data) 1086 1087 def _try_put_index(self):
/usr/local/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _process_data(self, data) 1109 self._try_put_index() 1110 if isinstance(data, ExceptionWrapper): -> 1111 data.reraise() 1112 return data 1113
/usr/local/lib/python3.7/site-packages/torch/_utils.py in reraise(self) 426 # have message field 427 raise self.exc_type(message=msg) --> 428 raise self.exc_type(msg) 429 430
error: Caught error in DataLoader worker process 0. Original Traceback (most recent call last): File "/usr/local/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 198, in _worker_loop data = fetcher.fetch(index) File "/usr/local/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch data = [self.dataset[idx] for idx in possibly_batched_index] File "/usr/local/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/alloc/data/fury_watermark-remover-fda/discovery/SLBR-Visible-Watermark-Removal-master/datasets/clwd_dataset.py", line 82, in getitem
sample = self.get_sample(index)
File "/alloc/data/fury_watermark-remover-fda/discovery/SLBR-Visible-Watermark-Removal-master/datasets/clwd_dataset.py", line 63, in get_sample
img_J = cv2.cvtColor(img_J, cv2.COLOR_BGR2RGB)
cv2.error: OpenCV(4.2.0) /io/opencv/modules/imgproc/src/color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'