lxa9867 / QSD

[CVPR 2024] "Towards Robust Audiovisual Segmentation in Complex Environments with Quantization-based Semantic Decomposition"
11 stars 1 forks source link

Hardware Information #1

Closed buptLwz closed 8 months ago

buptLwz commented 8 months ago

Hello, could you share information about the hardware you are using? I couldn't find it in the paper.

lxa9867 commented 8 months ago

Hi,

Thanks for your interest. We use either V100 or P40 depending on availability.

Please let me know if you have further questions.

buptLwz commented 8 months ago

What about the specific number of GPUs? BTW, I'm also interested in your other work. Regarding your proposed new dataset R2-YTVOS, how can I obtain it?

lxa9867 commented 8 months ago

We typically use 4 GPUs for an experiment.

For the R2-VOS dataset, unfortunately, since there is a more than one year gap between the paper's first submission and the final acceptance, we didn't keep the instantiated dataset. If you want to create a similar style dataset, you may use the following code and make necessary adjustments.

BTW, we are launching an universal benchmark (R^2-bench) for robust referring tasks and will release shortly, please keep an eye on the recent Arxivs if you are interested.

import mmcv
import numpy as np
import glob
import cv2
from tqdm import tqdm
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

def inspect_json(val_type):
    # val_type = 'random'
    results = mmcv.load(f'/mnt/data/refvos/meta_expressions/valid/meta_expressions_{val_type}.json')
    # print(results['videos']['8b7b57b94d']['expressions'])
    print(results)

def generate_random():
    results = mmcv.load('/mnt/data/refvos/meta_expressions/valid/meta_expressions.json')
    # print(results['videos']['0062f687f1']['expressions'])
    video_list = results['videos'].keys()
    # shuffle exps
    exps_list = [results['videos'][video]['expressions'] for video in video_list]
    np.random.shuffle(exps_list)
    for i in tqdm(range(len(video_list))):
        video = video_list[i]
        results['videos'][video]['expressions'] = exps_list[i]
    mmcv.dump(results, '/mnt/data/refvos/meta_expressions/valid/meta_expressions_random.json')

def generate_object():
    results = mmcv.load('/mnt/data/refvos/meta_expressions/valid/meta_expressions.json')
    obj = mmcv.load('/mnt/data/refvos/meta_expressions/valid/parse_exp.json')['obj']
    video_list = list(results['videos'].keys())
    with tqdm(total=len(video_list)) as pbar:
        for video in video_list:
            for k, v in results['videos'][video]['expressions'].items():
                sentence = v['exp']
                parsed_sentence = nltk.sent_tokenize(sentence)[0]
                parsed_sentence = nltk.pos_tag(nltk.word_tokenize(parsed_sentence))
                flag = 0
                for word in parsed_sentence:
                    # add the first none
                    if word[1] == 'NN' and flag == 0:
                        new_obj = np.random.choice(obj)
                        while new_obj == word[0]:
                            new_obj = np.random.choice(obj)
                        sentence = sentence.replace(word[0], new_obj)
                        # print(f"{sentence}, prev_obj: {word[0]}, new_obj: {new_obj}")
                results['videos'][video]['expressions'][k] = {'exp': sentence}
            pbar.update(1)
        mmcv.dump(results, '/mnt/data/refvos/meta_expressions/valid/meta_expressions_object.json')

def generate_action():
    results = mmcv.load('/mnt/data/refvos/meta_expressions/valid/meta_expressions.json')
    act = mmcv.load('/mnt/data/refvos/meta_expressions/valid/parse_exp.json')['act']
    video_list = list(results['videos'].keys())
    with tqdm(total=len(video_list)) as pbar:
        for video in video_list:
            for k, v in results['videos'][video]['expressions'].items():
                sentence = v['exp']
                parsed_sentence = nltk.sent_tokenize(sentence)[0]
                parsed_sentence = nltk.pos_tag(nltk.word_tokenize(parsed_sentence))
                for word in parsed_sentence:
                    # replace all verb+ing
                    if word[1] == 'VBG':
                        new_act = np.random.choice(act)
                        while new_act == word[0]:
                            new_act = np.random.choice(act)
                        sentence = sentence.replace(word[0], new_act)
                        # print(f"{sentence}, prev_obj: {word[0]}, new_obj: {new_obj}")
                results['videos'][video]['expressions'][k] = {'exp': sentence}
            pbar.update(1)
        mmcv.dump(results, '/mnt/data/refvos/meta_expressions/valid/meta_expressions_action.json')

def process_col():
    with open('color.csv') as file:
        color = []
        for i, line in enumerate(file):
            if i % 2 == 0:
                color.append(line[:-1])
        mmcv.dump({'color': color}, 'color.json')

def generate_color():
    results = mmcv.load('/mnt/data/refvos/meta_expressions/valid/meta_expressions.json')
    color = mmcv.load('color.json')['color']
    video_list = list(results['videos'].keys())
    video_with_old_color = {}
    video_with_new_color = {}
    with tqdm(total=len(video_list)) as pbar:
        for video in video_list:
            new_exp = []
            old_exp = []
            for k, v in results['videos'][video]['expressions'].items():
                sentence = v['exp']
                parsed_sentence = nltk.sent_tokenize(sentence)[0]
                parsed_sentence = nltk.pos_tag(nltk.word_tokenize(parsed_sentence))
                has_color = False
                for word in parsed_sentence:
                    # replace all verb+ing
                    if word[1] == 'JJ' and word[0] in color:
                        has_color = True
                        new_color = np.random.choice(color)
                        while new_color == word[0]:
                            new_color = np.random.choice(color)
                        sentence = sentence.replace(word[0], new_color)
                        # print(f"{sentence}, prev_obj: {word[0]}, new_obj: {new_obj}")
                if has_color:
                    new_exp.append(sentence)
                    old_exp.append(v['exp'])
                # results['videos'][video]['expressions'][k] = {'exp': sentence}
            if len(new_exp):
                # new exp
                expressions = {}
                for i, exp in enumerate(new_exp):
                    expressions[str(i)] = {'exp': exp}
                new_video = results['videos'][video]
                new_video['expressions'] = expressions
                video_with_new_color[video] = new_video
                # old exp
                expressions = {}
                for i, exp in enumerate(old_exp):
                    expressions[str(i)] = {'exp': exp}
                new_video = results['videos'][video]
                new_video['expressions'] = expressions
                video_with_old_color[video] = new_video
            pbar.update(1)
        mmcv.dump({'videos': video_with_new_color},
                  '/mnt/data/refvos/meta_expressions/valid/meta_expressions_color.json')
        mmcv.dump({'videos': video_with_old_color},
                  '/mnt/data/refvos/meta_expressions/valid/meta_expressions_old_color.json')

def parse_expression():
    results = mmcv.load('/mnt/data/refvos/meta_expressions/train/meta_expressions.json')
    video_list = results['videos'].keys()
    exp_list = [results['videos'][video]['expressions'][exp_id]['exp'] for video in video_list for exp_id in
                results['videos'][video]['expressions'] ]
    act = {}
    obj = {}
    adj = {}
    with tqdm(total=len(exp_list)) as pbar:
        for sentence in exp_list:
            parsed_sentence = nltk.sent_tokenize(sentence)[0]
            parsed_sentence = nltk.pos_tag(nltk.word_tokenize(parsed_sentence))
            flag = 0
            for word in parsed_sentence:
                # add all verb
                if word[1] == 'VBG' and word[0] not in act.keys():
                    act[word[0]] = 0
                # add the first noun
                if word[1] == 'NN' and flag == 0 and word[0] not in act.keys():
                    obj[word[0]] = 0
                    flag = 1
                if word[1] == 'JJ' and word[0] not in adj.keys():
                    adj[word[0]] = 0
            pbar.update(1)
            pbar.set_description(f'act: {len(act)}, obj: {len(obj)}, adj: {len(adj)}')
        print(act)
        print(obj)
        print(adj)
        act = list(act.keys())
        obj = list(obj.keys())
        adj = list(adj.keys())
        mmcv.dump({'act': act, 'obj': obj, 'adj': adj}, '/mnt/data/refvos/meta_expressions/valid/parse_exp.json')

if __name__ == '__main__':
    # inspect_json('old_color')
    # process_col()
    # parse_expression()
    # generate_object()
    # generate_action()
    # generate_color()
buptLwz commented 8 months ago

Thank you for your patient reply! I’m looking forward to your new work. It undoubtedly fills the gap in this field!