Open AHPUymhd opened 1 week ago
from openai import OpenAI import os import base64 from PIL import Image
client = OpenAI(
api_key="sk-8f4b6a41111111111111110d1bc2", base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
)
def compress_image(image_path, factor=3): img = Image.open(image_path)
target_size = (img.size[0] // factor, img.size[1] // factor) # Resize the image img_resized = img.resize(target_size, Image.LANCZOS) # Save the compressed image, overwriting the original img_resized.save(image_path, "JPEG")
def encode_all_images_in_folder(folder_path): encoded_images = [] for filename in os.listdir(folder_path): if filename.endswith(".jpg") or filename.endswith(".png"): full_file_path = os.path.join(folder_path, filename) encoded_image = encode_image(full_file_path) encoded_images.append(encoded_image) return encoded_images
def encode_image(image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode("utf-8")
folder_path = "/home/robot/图片/44-62/44-62/44/004067/images"
encoded_images = encode_all_images_in_folder(folder_path)
messages_content = [] for encoded_image in encoded_images: messages_content.append( { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}, } ) messages_content.append({"type": "text", "text": "这些图片是一个第一视角的驾驶视频的连续帧,并且时间顺序是从前往后的,车辆发生的事故是什么,发生在哪一年哪一月的具体时间之间,车辆的车型和颜色是什么样的,第一视角的驾驶员的行为又是什么样的,你可以详细回答但不能胡编乱造一些答案"},)
completion = client.chat.completions.create( model="qwen-vl-max-0809", temperature= 0.4, # 较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化 0-2 top_p=0.3, # 取值越大,生成的随机性越高 0-1 messages=[ { "role": "user", "content": messages_content, } ], )
print(completion.choices[0].message.content) 我的代码是将本地的视频分成一个个视频帧,然后将这些帧转换成base64格式的数据输入给qwen2-vl-72b进行视频推理(以为官网的72b的api调用只能输出网络图像,而我希望输入的是本地图像),我尝试将"type": "image_url"修改成"type": "video" ,但是他告诉我不能回答视频,"type": "video"的回答如图: 而"type": "image_url"的回答如图 那么请问这image_url和video两种方式进行推理的效果是一样的吗,如果不一样我如何才能调用api对本地视频正确的进行理解推理("type": "video"),官网的教程在"type": "video"只有对网络图像推理
使用官网的例子是这样的,是可以正常推理的 但是我将网络图片改成本地图片之后就不可以了,报错如下 ,请问我怎么推理本地视频呢
@kq-chen
from openai import OpenAI import os import base64 from PIL import Image
client = OpenAI(
若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx",
)
压缩单个图片的分辨率
def compress_image(image_path, factor=3): img = Image.open(image_path)
Calculate the target size
压缩一个文件夹内图片的分辨率
def encode_all_images_in_folder(folder_path): encoded_images = [] for filename in os.listdir(folder_path): if filename.endswith(".jpg") or filename.endswith(".png"): full_file_path = os.path.join(folder_path, filename) encoded_image = encode_image(full_file_path) encoded_images.append(encoded_image) return encoded_images
将图片编码成base64格式数据
def encode_image(image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode("utf-8")
folder_path = "/home/robot/图片/44-62/44-62/44/004067/images"
encoded_images = encode_all_images_in_folder(folder_path)
messages_content = [] for encoded_image in encoded_images: messages_content.append( { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}, } ) messages_content.append({"type": "text", "text": "这些图片是一个第一视角的驾驶视频的连续帧,并且时间顺序是从前往后的,车辆发生的事故是什么,发生在哪一年哪一月的具体时间之间,车辆的车型和颜色是什么样的,第一视角的驾驶员的行为又是什么样的,你可以详细回答但不能胡编乱造一些答案"},)
completion = client.chat.completions.create( model="qwen-vl-max-0809", temperature= 0.4, # 较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化 0-2 top_p=0.3, # 取值越大,生成的随机性越高 0-1 messages=[ { "role": "user", "content": messages_content, } ], )
print(completion.choices[0].message.content) 我的代码是将本地的视频分成一个个视频帧,然后将这些帧转换成base64格式的数据输入给qwen2-vl-72b进行视频推理(以为官网的72b的api调用只能输出网络图像,而我希望输入的是本地图像),我尝试将"type": "image_url"修改成"type": "video" ,但是他告诉我不能回答视频,"type": "video"的回答如图: 而"type": "image_url"的回答如图 那么请问这image_url和video两种方式进行推理的效果是一样的吗,如果不一样我如何才能调用api对本地视频正确的进行理解推理("type": "video"),官网的教程在"type": "video"只有对网络图像推理