Closed lixumin-zai closed 5 months ago
def multimodal_processor(self, sources): for source in sources: source[0]['value'] = DEFAULT_IMAGE_TOKEN for sentence in source: replace_token = DEFAULT_IMAGE_PATCH_TOKEN * self.multimodal_cfg['image_token_len'] if self.multimodal_cfg['use_im_start_end']: replace_token = DEFAULT_IM_START_TOKEN + replace_token + DEFAULT_IM_END_TOKEN sentence["value"] = sentence["value"].replace(DEFAULT_IMAGE_TOKEN, replace_token) return sources
这一步 source[0]['value'] = DEFAULT_IMAGE_TOKEN
这一步 source[0]['value'] = DEFAULT_IMAGE_TOKEN