xenova / transformers.js

State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in your browser, with no need for a server!
https://huggingface.co/docs/transformers.js
Apache License 2.0
10.86k stars 657 forks source link

Unsupported model type: whisper and CORS error #314

Open rojitdhakal opened 11 months ago

rojitdhakal commented 11 months ago

const transcribe = async (
    audio,
    model,
    multilingual,
    quantized,
    subtask,
    language,
) => {
    // TODO use subtask and language

    //If multilingual is true, it adds an empty string ("") to the model name, effectively leaving it unchanged. If multilingual is false, it appends ".en" to the model name, indicating that the model should be used for English transcription.

    const modelName = `Xenova/whisper-${model}${multilingual ? "" : ".en"}`;
    console.log('modelName',modelName)

    const p = AutomaticSpeechRecognitionPipelineFactory;

    //// Check if the current model settings are different from the new settings
    if (p.model !== modelName || p.quantized !== quantized) {
        // Invalidate model if different
        // Update the model name and quantized status
        p.model = modelName;
        p.quantized = quantized;

        // Check if there is an existing instance of the ASR pipeline
        if (p.instance !== null) {

            // Dispose of the existing instance (clean up resources)
            (await p.getInstance()).dispose();

            // Set the instance to null (indicating that it needs to be recreated)
            p.instance = null;
        }
    }

    // Load transcriber model
    let transcriber = await p.getInstance((data) => {
        self.postMessage(data);
    });

    const time_precision =
        transcriber.processor.feature_extractor.config.chunk_length /
        transcriber.model.config.max_source_positions;

    // Storage for chunks to be processed. Initialise with an empty chunk.
    let chunks_to_process = [
        {
            tokens: [],
            finalised: false,
        },
    ];

    // TODO: Storage for fully-processed and merged chunks
    // let decoded_chunks = [];

    function chunk_callback(chunk) {
        let last = chunks_to_process[chunks_to_process.length - 1];

        // Overwrite last chunk with new info
        Object.assign(last, chunk);
        last.finalised = true;

        // Create an empty chunk after, if it not the last chunk
        if (!chunk.is_last) {
            chunks_to_process.push({
                tokens: [],
                finalised: false,
            });
        }
    }

    // Inject custom callback function to handle merging of chunks
    function callback_function(item) {
        let last = chunks_to_process[chunks_to_process.length - 1];

        // Update tokens of last chunk
        last.tokens = [...item[0].output_token_ids];

        // Merge text chunks
        // TODO optimise so we don't have to decode all chunks every time
        let data = transcriber.tokenizer._decode_asr(chunks_to_process, {
            time_precision: time_precision,
            return_timestamps: true,
            force_full_sequences: false,
        });

        self.postMessage({
            status: "update",
            task: "automatic-speech-recognition",
            data: data,
        });
    }

    // Actually run transcription
    let output = await transcriber(audio, {
        // Greedy
        top_k: 0,
        do_sample: false,

        // Sliding window
        chunk_length_s: 30,
        stride_length_s: 5,

        // Language and task
        language: language,
        task: subtask,

        // Return timestamps
        return_timestamps: true,
        force_full_sequences: false,

        // Callback functions
        callback_function: callback_function, // after each generation step
        chunk_callback: chunk_callback, // after each chunk is processed
    }).catch((error) => {

        self.postMessage({
            status: "error",
            task: "automatic-speech-recognition",
            data: error,
        });
        return null;
    });

    return output;
};       
rojitdhakal commented 11 months ago
  static async getInstance(progress_callback = null) {

        if (this.instance === null) {
            this.instance = pipeline(this.task, this.model, {
                quantized: this.quantized,
                progress_callback,
            });
        }
        console.log("inside",this.instance)
        return this.instance;
    }    '

while consoling this.instances it shows

   Promise {<pending>}
[[Prototype]]
: 
Promise
[[PromiseState]]
: 
"rejected"
[[PromiseResult]]
: 
Error: Unsupported model type: whisper at AutoModelForCTC.from_pretrained (webpack-internal:///./node_modules/.pnpm/@xenova+transformers@2.6.0/node_modules/@xenova/transformers/src/models.js:3550:19) at async eval (webpack-internal:///./node_modules/.pnpm/@xenova+transformers@2.6.0/node_modules/@xenova/transformers/src/pipelines.js:2087:33)
message
: 
"Unsupported model type: whisper"
stack
: 
"Error: Unsupported model type: whisper\n    at AutoModelForCTC.from_pretrained (webpack-internal:///./node_modules/.pnpm/@xenova+transformers@2.6.0/node_modules/@xenova/transformers/src/models.js:3550:19)\n    at async eval (webpack-internal:///./node_modules/.pnpm/@xenova+transformers@2.6.0/node_modules/@xenova/transformers/src/pipelines.js:2087:33)"
xenova commented 11 months ago

Hi there. I believe this is due to an issue we just fixed in v2.6.1 (related to minification). Could you please upgrade to v2.6.1 and try again? Thanks!

rojitdhakal commented 11 months ago

I just upgraded v2.6.1 again the same error persists??

rojitdhakal commented 11 months ago

image

rojitdhakal commented 11 months ago

image

xenova commented 11 months ago

Could you please post information about your environment, e.g., OS, browser, built tools?

I am aware of a similar issue with users that use create-react-app, and if this is the case, please switch to a more up-to-date build tool like Vite.

rojitdhakal commented 11 months ago

OS: Windows 11 Browser: Chrome-117.0.5938.89 build tools: create-next-app

xenova commented 11 months ago

build tools: create-next-app

Please try using Vite for your project. CRA has been removed from the React documentation. See here for more information.

rojitdhakal commented 11 months ago

we are using next JS ? There is no any support Vite for next js apllication

xenova commented 11 months ago

Oh my apologies, I misread "create-next-app" as "create-react-app". Sorry about that!

Could you post any information about your build process, such as any minification taking place?

rojitdhakal commented 11 months ago

I am facing this locally in development server without minification .

xenova commented 11 months ago

Do you perhaps have a repo where I can try reproduce this? Or could you post your next.config.js? Thanks!

rojitdhakal commented 11 months ago

We are currently working in the private repo. We will share the repo later if required, need to prepare for that , but Now Here's the next config

/** @type {import('next').NextConfig} */

const nextConfig = {
  reactStrictMode: true,
  compress: false,

  images: {
    loader: "akamai",
    path: "",
  },
  compiler: {
    // Enables the styled-components SWC transform
    styledComponents: true,
  },
  // lessLoaderOptions: {
  //   lessOptions: {
  //     javascriptEnabled: true,
  //   },
  // },
  webpack(config) {
    config.module.rules.push({
      test: /\.svg$/,
      use: ["@svgr/webpack"],
    });
    return config;
  },
};

module.exports = nextConfig;
xenova commented 11 months ago

And which version of node / next.js / npm are you using?

rojitdhakal commented 11 months ago

next-version:13.4.13 node-version:16.15.0 pnpm-version :7.23.0

xenova commented 11 months ago

node-version:16.15.0

This might be the issue. In the docs, we recommend using a minimum node version of 18. 16.X has reached EOL. Could you try upgrade?

szprytny commented 11 months ago

image

I tried to run whisper model via automatic-speech-recognition pipeline and got same error caused by unsupported AutoModelForCTC, this PR might have introduced bug: https://github.com/xenova/transformers.js/pull/220/files?file-filters%5B%5D=.js&show-viewed-files=true#diff-2f6b66f61363f7b45e1b165f81d3ce15b3768da43e40410085aee8bd8666a629R1739

xenova commented 11 months ago

@szprytny Could you provide more information about your environment? Are you using the latest version of Transformers.js?

szprytny commented 11 months ago

I have node 18.9.1 transformers.js 2.6.2 When I removed declaration of AutoModelForCTC from https://github.com/xenova/transformers.js/blob/main/src/pipelines.js#L1953

Pipeline went further. I got error Unsupported model IR version: 9 which I was able to pass by overriding onnxruntime-node in my project's package.json

xenova commented 11 months ago

And which bundler are you using? I am aware of issues with create-react-app. I haven't had any problems with vite, for example.

I got error Unsupported model IR version: 9

Yes this is because you exported with onnx >= 14, and Transformers.js still uses onnxruntime-web v1.14 (which only supports a max IR version of 8). See here for an issue I files a while ago.

szprytny commented 11 months ago

I did not run it as a web-app, I just tried to do inference using plain node script running with npx tsx

xenova commented 11 months ago

@szprytny Can you provide some sample code which resulted in this error?

szprytny commented 11 months ago

It seems that error

Unsupported model type: whisper

is misleading as the real problem was my model have newer IR version. It seems that error related to this is not handled well enough and results in calling from_pretrained for AutoModelForCTC class in loadItems function

Here is the script I used to run it

import { WaveFile } from "wavefile";
import path from "path";
import { readFileSync } from "fs";
import { pipeline, env } from "@xenova/transformers";

env.localModelPath = "c:/model/onnx/";

const prepareAudio = (filePath: string): Float64Array => {
  const wav = new WaveFile(readFileSync(path.normalize(filePath)));
  wav.toBitDepth("32f");
  wav.toSampleRate(16000);
  let audioData = wav.getSamples();

  return audioData;
};

const test = async () => {
  let pipe = await pipeline("automatic-speech-recognition", "shmisper", {
    local_files_only: true,
  });

  let out = await pipe(prepareAudio("c:/content/01_0.wav"));
  console.log(out);
};

test();
xenova commented 11 months ago

I see... Indeed, that error message would be quite misleading. Could you try downgrade to onnx==1.13.1 and re-export your model? See https://github.com/xenova/transformers.js/blob/main/scripts/requirements.txt for the other recommended versions.

Keith-Hon commented 10 months ago

I have the extact same problem. I changed the onnx version to 1.13.1. Small model works but not medium and large-v2 models

dmmagdal commented 9 months ago

Having same issue as main thread:

xenova commented 9 months ago

Yes, we use optimum behind the scenes. The purpose of the conversion script is to also perform quantization afterwards, but if this is not necessary for your use-case, you can use optimum directly and just structure the repo as the other transformers.js models on the HF Hub.

dmmagdal commented 8 months ago

I converted the whisper-base model to onnx using optimum-cli and moved the model files to the onnx folder locally and verified my env had the same modules from your requirements.txt version. Why I tried to run my inference script (NodeJS) here I still end up with errors output.txt

itsyoboieltr commented 1 month ago

@xenova I could reproduce this error on the v3 branch with the example whisper-word-timestamps. If I go to worker.js and change the model_id from onnx-community/whisper-base_timestamped to Xenova/whisper-large-v3 I get the error: Unsupported model type: whisper

nsenkevich commented 1 week ago

same,

Screenshot 2024-08-26 at 18 36 21

when trying to use distil-whisper/distil-medium.en on Whisper WebGPU Unsupported model type: whisper "@huggingface/transformers": "^3.0.0-alpha.9",