airockchip / rknn-toolkit2

Other
991 stars 104 forks source link

T5-xxl Encoder 运行报错 `parseRKNN: exportDataSize large then model size` #206

Open happyme531 opened 2 weeks ago

happyme531 commented 2 weeks ago

如题

rknn-toolkit2版本 2.0.0b17 (更高版本转换时会报invalid tensor malloc size, tensor name: , target: CPU, size: 0这个错误) librknnrt.so版本2.2.0

导出onnx:

import torch
from transformers import T5EncoderModel
from pathlib import Path
import os

os.chdir(os.path.dirname(os.path.abspath(__file__)))

def export_t5_encoder_to_onnx(
    model_path: str,
    output_path: str,
    device: str = "cuda" if torch.cuda.is_available() else "cpu"
):
    # 加载模型
    model = T5EncoderModel.from_pretrained(model_path)
    model.to(device)
    model.eval()

    # 创建示例输入
    # 使用动态维度 batch_size=1, sequence_length=8 作为示例
    dummy_input = {
        "input_ids": torch.ones(1, 8, dtype=torch.long).to(device),
        "attention_mask": torch.ones(1, 8, dtype=torch.long).to(device)
    }

    # 确保输出目录存在
    output_dir = Path(output_path).parent
    output_dir.mkdir(parents=True, exist_ok=True)

    # 导出为ONNX格式
    torch.onnx.export(
        model,                                          # 要导出的模型
        (dummy_input["input_ids"], 
         dummy_input["attention_mask"]),               # 模型输入
        output_path,                                   # 输出文件路径
        opset_version=17,                             # ONNX算子集版本
        input_names=["input_ids", "attention_mask"],  # 输入节点的名称
        output_names=["last_hidden_state"],           # 输出节点的名称
        dynamic_axes={                                # 定义动态维度
            "input_ids": {0: "batch_size", 1: "sequence_length"},
            "attention_mask": {0: "batch_size", 1: "sequence_length"},
            "last_hidden_state": {0: "batch_size", 1: "sequence_length"}
        },
        do_constant_folding=True,                     # 优化常量折叠
        export_params=True                            # 导出模型参数
    )

    print(f"模型已成功导出到: {output_path}")
    del model

if __name__ == "__main__":
    # 使用示例
    model_path = "."  # 或者您的本地模型路径
    output_path = "t5-v1_1-xxl-encoder.onnx"
    export_t5_encoder_to_onnx(model_path, output_path, device="cpu")

转换代码:

#!/usr/bin/env python
# coding: utf-8

import datetime
from rknn.api import RKNN
from sys import exit
import os

os.chdir(os.path.dirname(os.path.abspath(__file__)))

rknn = RKNN(verbose=True)
ONNX_MODEL="t5-v1_1-xxl-encoder.onnx"
RKNN_MODEL=ONNX_MODEL.replace(".onnx",".rknn")
DATASET="/home/zt/rk3588-nn/rknn_model_zoo/datasets/COCO/coco_subset_20.txt"
QUANTIZE=False
detailed_performance_log = True

timedate_iso = datetime.datetime.now().isoformat()

rknn.config(
    # mean_values=[0.485, 0.456, 0.406],
    # std_values=[0.229, 0.224, 0.225],
    quantized_dtype='w8a8',
    quantized_algorithm='normal',
    quantized_method='channel',
    quantized_hybrid_level=0,
    target_platform='rk3588',
    quant_img_RGB2BGR = False,
    float_dtype='float16',
    optimization_level=3,
    remove_weight=False,
    compress_weight=False,
    inputs_yuv_fmt=None,
    single_core_mode=False,
    dynamic_input=None,
    model_pruning=False,
    op_target=None,
    quantize_weight=False,
    remove_reshape=False,
    sparse_infer=False,
    # enable_flash_attention=False,
    )

ret = rknn.load_onnx(model=ONNX_MODEL, inputs=["input_ids", "attention_mask"], input_size_list=[[1, 128], [1, 128]])
ret = rknn.build(do_quantization=QUANTIZE, dataset=DATASET, rknn_batch_size=None)
ret = rknn.export_rknn(RKNN_MODEL)

测试代码:

import numpy as np
from transformers import T5Tokenizer
from rknnlite.api import RKNNLite

class TextSimilarityCalculator:
    def __init__(self, model_path: str, tokenizer_path: str):
        # 初始化 RKNNLite
        self.rknn_lite = RKNNLite(verbose=True)

        # 加载RKNN模型
        ret = self.rknn_lite.load_rknn(model_path)
        if ret != 0:
            raise RuntimeError('Load RKNN model failed')

        # 初始化运行时环境
        ret = self.rknn_lite.init_runtime()
        if ret != 0:
            raise RuntimeError('Init runtime failed')

        # 加载分词器
        self.tokenizer = T5Tokenizer.from_pretrained(tokenizer_path)

    def __del__(self):
        # 确保在对象销毁时释放资源
        if hasattr(self, 'rknn_lite'):
            self.rknn_lite.release()

    def get_embedding(self, text: str) -> np.ndarray:
        # 对文本进行编码
        inputs = self.tokenizer(
            text,
            padding=True,
            truncation=True,
            max_length=128,
            return_tensors="np"
        )

        # 准备输入数据
        input_data = [
            inputs["input_ids"],
            inputs["attention_mask"]
        ]

        # 使用RKNNLite进行推理
        outputs = self.rknn_lite.inference(inputs=input_data)

        # 获取last_hidden_state并计算平均值
        embeddings = outputs[0].mean(axis=1)

        # 使用numpy进行归一化
        norm = np.linalg.norm(embeddings, axis=1, keepdims=True)
        embeddings = embeddings / norm

        return embeddings

    def compute_similarity(self, text1: str, text2: str) -> float:
        # 获取两段文本的嵌入向量
        embedding1 = self.get_embedding(text1)
        embedding2 = self.get_embedding(text2)

        # 计算余弦相似度
        similarity = np.dot(embedding1, embedding2.T)

        return similarity.item()

    def find_most_similar(self, query: str, text_list: list[str]) -> list[tuple[str, float]]:
        # 获取查询文本的嵌入向量
        query_embedding = self.get_embedding(query)

        # 获取所有文本的嵌入向量并计算相似度
        similarities = []
        for text in text_list:
            text_embedding = self.get_embedding(text)
            similarity = np.dot(query_embedding, text_embedding.T).item()
            similarities.append((text, similarity))

        # 按相似度降序排序
        similarities.sort(key=lambda x: x[1], reverse=True)
        return similarities

def main():
    try:
        # 初始化计算器
        calculator = TextSimilarityCalculator(
            "text_encoder_3/t5-v1_1-xxl-encoder.rknn",  # RKNN模型路径
            "tokenizer_3/"  # tokenizer路径
        )

        # 示例1:计算两段文本的相似度
        text1 = "今天天气真不错,阳光明媚"
        text2 = "今天是个好天气,太阳很大"
        text3 = "我最近在学习人工智能技术"

        similarity = calculator.compute_similarity(text1, text2)
        print(f"\n文本1: {text1}")
        print(f"文本2: {text2}")
        print(f"相似度: {similarity:.4f}")

        similarity = calculator.compute_similarity(text1, text3)
        print(f"\n文本1: {text1}")
        print(f"文本3: {text3}")
        print(f"相似度: {similarity:.4f}")

        # 示例2:在文本库中找到最相似的文本
        text_database = [
            "今天阳光很好,适合出门",
            "这个周末我要去爬山",
            "人工智能正在改变世界",
            "今天下着大雨,天气不好",
            "机器学习是人工智能的一个分支"
        ]

        query = "今天是个晴天,很适合户外活动"
        print(f"\n查询文本: {query}")
        print("在文本库中搜索相似内容:")

        results = calculator.find_most_similar(query, text_database)
        for text, score in results:
            print(f"相似度 {score:.4f}: {text}")

    except Exception as e:
        print(f"发生错误: {str(e)}")

if __name__ == "__main__":
    main() 

报错日志:

firefly@zt-firefly ~/m/z/stable-diffusion-3.5-medium> ls -l text_encoder_3/t5-v1_1-xxl-encoder.rknn
-rw-r--r-- 1 firefly firefly 9550357619 11月  8 22:31 text_encoder_3/t5-v1_1-xxl-encoder.rknn
firefly@zt-firefly ~/m/z/stable-diffusion-3.5-medium> python ./text_similarity_rknn.py
W rknn-toolkit-lite2 version: 2.2.0
W Verbose file path is invalid, debug info will not dump to file.
D target set by user is: None
D Starting ntp or adb, target soc is RK3588, device id is: None
I RKNN: [23:21:46.537] RKNN Runtime Information, librknnrt version: 2.2.0 (c195366594@2024-09-14T12:18:56)
I RKNN: [23:21:46.537] RKNN Driver Information, version: 0.9.8
E RKNN: [23:21:46.539] parseRKNN: exportDataSize large then model size: 9550356224 vs 960423027!
E RKNN: [23:21:46.539] parseRKNN from buffer: Invalid RKNN format!
E RKNN: [23:21:46.539] rknn_init, load model failed!
E Catch exception when init runtime!
E Traceback (most recent call last):
  File "/home/firefly/.local/lib/python3.9/site-packages/rknnlite/api/rknn_lite.py", line 157, in init_runtime
    self.rknn_runtime.build_graph(self.rknn_data, self.load_model_in_npu)
  File "rknnlite/api/rknn_runtime.py", line 921, in rknnlite.api.rknn_runtime.RKNNRuntime.build_graph
Exception: RKNN init failed. error code: RKNN_ERR_MODEL_INVALID

发生错误: Init runtime failed
yuyun2000 commented 1 week ago

不要为难rk了 哥