Closed buptLwz closed 8 months ago
Hi,
Thanks for your interest. We use either V100 or P40 depending on availability.
Please let me know if you have further questions.
What about the specific number of GPUs? BTW, I'm also interested in your other work. Regarding your proposed new dataset R2-YTVOS, how can I obtain it?
We typically use 4 GPUs for an experiment.
For the R2-VOS dataset, unfortunately, since there is a more than one year gap between the paper's first submission and the final acceptance, we didn't keep the instantiated dataset. If you want to create a similar style dataset, you may use the following code and make necessary adjustments.
BTW, we are launching an universal benchmark (R^2-bench) for robust referring tasks and will release shortly, please keep an eye on the recent Arxivs if you are interested.
import mmcv
import numpy as np
import glob
import cv2
from tqdm import tqdm
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
def inspect_json(val_type):
# val_type = 'random'
results = mmcv.load(f'/mnt/data/refvos/meta_expressions/valid/meta_expressions_{val_type}.json')
# print(results['videos']['8b7b57b94d']['expressions'])
print(results)
def generate_random():
results = mmcv.load('/mnt/data/refvos/meta_expressions/valid/meta_expressions.json')
# print(results['videos']['0062f687f1']['expressions'])
video_list = results['videos'].keys()
# shuffle exps
exps_list = [results['videos'][video]['expressions'] for video in video_list]
np.random.shuffle(exps_list)
for i in tqdm(range(len(video_list))):
video = video_list[i]
results['videos'][video]['expressions'] = exps_list[i]
mmcv.dump(results, '/mnt/data/refvos/meta_expressions/valid/meta_expressions_random.json')
def generate_object():
results = mmcv.load('/mnt/data/refvos/meta_expressions/valid/meta_expressions.json')
obj = mmcv.load('/mnt/data/refvos/meta_expressions/valid/parse_exp.json')['obj']
video_list = list(results['videos'].keys())
with tqdm(total=len(video_list)) as pbar:
for video in video_list:
for k, v in results['videos'][video]['expressions'].items():
sentence = v['exp']
parsed_sentence = nltk.sent_tokenize(sentence)[0]
parsed_sentence = nltk.pos_tag(nltk.word_tokenize(parsed_sentence))
flag = 0
for word in parsed_sentence:
# add the first none
if word[1] == 'NN' and flag == 0:
new_obj = np.random.choice(obj)
while new_obj == word[0]:
new_obj = np.random.choice(obj)
sentence = sentence.replace(word[0], new_obj)
# print(f"{sentence}, prev_obj: {word[0]}, new_obj: {new_obj}")
results['videos'][video]['expressions'][k] = {'exp': sentence}
pbar.update(1)
mmcv.dump(results, '/mnt/data/refvos/meta_expressions/valid/meta_expressions_object.json')
def generate_action():
results = mmcv.load('/mnt/data/refvos/meta_expressions/valid/meta_expressions.json')
act = mmcv.load('/mnt/data/refvos/meta_expressions/valid/parse_exp.json')['act']
video_list = list(results['videos'].keys())
with tqdm(total=len(video_list)) as pbar:
for video in video_list:
for k, v in results['videos'][video]['expressions'].items():
sentence = v['exp']
parsed_sentence = nltk.sent_tokenize(sentence)[0]
parsed_sentence = nltk.pos_tag(nltk.word_tokenize(parsed_sentence))
for word in parsed_sentence:
# replace all verb+ing
if word[1] == 'VBG':
new_act = np.random.choice(act)
while new_act == word[0]:
new_act = np.random.choice(act)
sentence = sentence.replace(word[0], new_act)
# print(f"{sentence}, prev_obj: {word[0]}, new_obj: {new_obj}")
results['videos'][video]['expressions'][k] = {'exp': sentence}
pbar.update(1)
mmcv.dump(results, '/mnt/data/refvos/meta_expressions/valid/meta_expressions_action.json')
def process_col():
with open('color.csv') as file:
color = []
for i, line in enumerate(file):
if i % 2 == 0:
color.append(line[:-1])
mmcv.dump({'color': color}, 'color.json')
def generate_color():
results = mmcv.load('/mnt/data/refvos/meta_expressions/valid/meta_expressions.json')
color = mmcv.load('color.json')['color']
video_list = list(results['videos'].keys())
video_with_old_color = {}
video_with_new_color = {}
with tqdm(total=len(video_list)) as pbar:
for video in video_list:
new_exp = []
old_exp = []
for k, v in results['videos'][video]['expressions'].items():
sentence = v['exp']
parsed_sentence = nltk.sent_tokenize(sentence)[0]
parsed_sentence = nltk.pos_tag(nltk.word_tokenize(parsed_sentence))
has_color = False
for word in parsed_sentence:
# replace all verb+ing
if word[1] == 'JJ' and word[0] in color:
has_color = True
new_color = np.random.choice(color)
while new_color == word[0]:
new_color = np.random.choice(color)
sentence = sentence.replace(word[0], new_color)
# print(f"{sentence}, prev_obj: {word[0]}, new_obj: {new_obj}")
if has_color:
new_exp.append(sentence)
old_exp.append(v['exp'])
# results['videos'][video]['expressions'][k] = {'exp': sentence}
if len(new_exp):
# new exp
expressions = {}
for i, exp in enumerate(new_exp):
expressions[str(i)] = {'exp': exp}
new_video = results['videos'][video]
new_video['expressions'] = expressions
video_with_new_color[video] = new_video
# old exp
expressions = {}
for i, exp in enumerate(old_exp):
expressions[str(i)] = {'exp': exp}
new_video = results['videos'][video]
new_video['expressions'] = expressions
video_with_old_color[video] = new_video
pbar.update(1)
mmcv.dump({'videos': video_with_new_color},
'/mnt/data/refvos/meta_expressions/valid/meta_expressions_color.json')
mmcv.dump({'videos': video_with_old_color},
'/mnt/data/refvos/meta_expressions/valid/meta_expressions_old_color.json')
def parse_expression():
results = mmcv.load('/mnt/data/refvos/meta_expressions/train/meta_expressions.json')
video_list = results['videos'].keys()
exp_list = [results['videos'][video]['expressions'][exp_id]['exp'] for video in video_list for exp_id in
results['videos'][video]['expressions'] ]
act = {}
obj = {}
adj = {}
with tqdm(total=len(exp_list)) as pbar:
for sentence in exp_list:
parsed_sentence = nltk.sent_tokenize(sentence)[0]
parsed_sentence = nltk.pos_tag(nltk.word_tokenize(parsed_sentence))
flag = 0
for word in parsed_sentence:
# add all verb
if word[1] == 'VBG' and word[0] not in act.keys():
act[word[0]] = 0
# add the first noun
if word[1] == 'NN' and flag == 0 and word[0] not in act.keys():
obj[word[0]] = 0
flag = 1
if word[1] == 'JJ' and word[0] not in adj.keys():
adj[word[0]] = 0
pbar.update(1)
pbar.set_description(f'act: {len(act)}, obj: {len(obj)}, adj: {len(adj)}')
print(act)
print(obj)
print(adj)
act = list(act.keys())
obj = list(obj.keys())
adj = list(adj.keys())
mmcv.dump({'act': act, 'obj': obj, 'adj': adj}, '/mnt/data/refvos/meta_expressions/valid/parse_exp.json')
if __name__ == '__main__':
# inspect_json('old_color')
# process_col()
# parse_expression()
# generate_object()
# generate_action()
# generate_color()
Thank you for your patient reply! I’m looking forward to your new work. It undoubtedly fills the gap in this field!
Hello, could you share information about the hardware you are using? I couldn't find it in the paper.