Follow below steps , it should work

step 1 config creation

from detr_tf.training_config import TrainingConfig from os.path import expanduser import os

class CustomConfig(TrainingConfig):

def __init__(self):
    super().__init__()

    # Dataset info
    self.datadir = os.path.join(expanduser("~"), "PATH_Training_Files")
    # The model is trained using fixed size images.
    # The following is the desired target image size
    self.image_size = (512, 512)

config = CustomConfig()

Step 2 create function for loading data

def load_hardhat_data_from_index(index, filenames, train_val, anns, config, augmentation):

Open the image

image = imageio.imread(os.path.join(config.datadir, f"{train_val}", filenames[index]))

print(filenames[index])

# Select all the annotatiom (bbox and class) on this image
image_anns = anns[anns["filename"] == filenames[index]]    

# Convert all string class to number (the target class)
t_class = image_anns["class"].map(lambda x: CLASS_NAMES.index(x)).to_numpy()
# Select the width&height of each image (should be the same since all the ann belongs to the same image)
width = image_anns["width"].to_numpy()
height = image_anns["height"].to_numpy()
# Select the xmin, ymin, xmax and ymax of each bbox, Then, normalized the bbox to be between and 0 and 1
# Finally, convert the bbox from xmin,ymin,xmax,ymax to x_center,y_center,width,height
bbox_list = image_anns[["xmin", "ymin", "xmax", "ymax"]].to_numpy()
bbox_list = bbox_list / [width[0], height[0], width[0], height[0]] 
t_bbox = bbox.xy_min_xy_max_to_xcycwh(bbox_list)

# Transform and augment image with bbox and class if needed
image, t_bbox, t_class = detr_transform(image, t_bbox, t_class, config, augmentation=augmentation)

# Normalized image
image = processing.normalized_images(image, config)

return image.astype(np.float32), t_bbox.astype(np.float32), np.expand_dims(t_class, axis=-1)

def load_hardhat(train_val, batch_size, config, augmentation=False): """ Load the hardhat dataset """ anns = pd.read_csv(os.path.join(config.datadir, f'{train_val}/train_Annotation_File.csv')) CLASS_NAMES = ["background"] + anns["class"].unique().tolist() print(CLASS_NAMES, len(CLASS_NAMES)) filenames = anns["filename"].unique().tolist() print(filenames ) indexes = list(range(0, len(filenames))) print("indexes ",indexes)

shuffle(indexes)

dataset = tf.data.Dataset.from_tensor_slices(indexes)
dataset = dataset.map(lambda idx: processing.numpy_fc(
    idx, load_hardhat_data_from_index, 
    filenames=filenames, train_val=train_val, anns=anns, config=config, augmentation=augmentation)
,num_parallel_calls=tf.data.experimental.AUTOTUNE)
# Filter labels to be sure to keep only sample with at least one bbox
dataset = dataset.filter(lambda imgs, tbbox, tclass: tf.shape(tbbox)[0] > 0)
# Pad bbox and labels
dataset = dataset.map(processing.pad_labels, num_parallel_calls=tf.data.experimental.AUTOTUNE)
# Batch images
dataset = dataset.batch(batch_size, drop_remainder=True)

return dataset,CLASS_NAMES

Step 3 data loading

dataset,CLASS_NAMES = load_hardhat('W2Forms', 1, config)

data format used for fine tuning is attached , Training data (images) and annotation file is stored in /data/W2Forms location

train_W2FORMS (1).csv

Visual-Behavior / detr-tensorflow

TypeError: load_tfcsv_dataset() got multiple values for argument 'augmentation' #45

step 1 config creation

Open the image

print(filenames[index])

shuffle(indexes)

data format used for fine tuning is attached , Training data (images) and annotation file is stored in /data/W2Forms location