acrcloud / acrcloud_sdk_python

124 stars 45 forks source link

Audio Recognition Python SDK

Overview

ACRCloud provides services such as Music Recognition, Broadcast Monitoring, Custom Audio Recognition, Copyright Compliance & Data Deduplication, Live Channel Detection, and Offline Recognition etc.

This audio recognition python SDK support most of audio / video files.

Audio: mp3, wav, m4a, flac, aac, amr, ape, ogg ...
Video: mp4, mkv, wmv, flv, ts, avi ...

Requirements

Follow one of the tutorials to create a project and get your host, access_key and access_secret.

Install

Linux and macOS

python3 -m pip install pyacrcloud

Other platform

You can run python -m pip install git+https://github.com/acrcloud/acrcloud_sdk_python or go to sub dir, and run"sudo python setup.py install".

Windows Runtime Library

If you run the SDK on Windows, you must install this library.
X86: download and install Library(windows/vcredist_x86.exe)
x64: download and install Library(windows/vcredist_x64.exe)

Note

  1. If you run the SDK on Windows, you must install library(vcredist).
  2. ALL version supports humming.
  3. If you use docker alpine, you need to install "apk add --update libstdc++"

Functions

Introduction all API.

recognizer.py

class ACRCloudRecognizer:
    def recognize_by_file(self, file_path, start_seconds, rec_length=10):
      #@param file_path : query file path
      #@param start_seconds : skip (start_seconds) seconds from from the beginning of (filePath)
      #@param rec_length: use rec_length seconds data to recongize
      #@return result metainfos

    def recognize_by_filebuffer(self, file_buffer, start_seconds, rec_length=10):
      #@param file_buffer : file_path query buffer
      #@param start_seconds : skip (start_seconds) seconds from from the beginning of (filePath)
      #@param rec_length: use rec_length seconds data to recongize
      #@return result metainfos

    def recognize(self, wav_audio_buffer):
      #@param wav_audio_buffer : query buffer(RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, mono 8000 Hz)
      #@return result metainfos

Module acrcloud_extr_tool

def create_fingerprint_by_file(file_name, start_time_seconds, audio_len_seconds, is_db_fingerprint, opt):
      #file_name: Path of input file; 
      #start_time_seconds: Start time of input file, default is 0; 
      #audio_len_seconds: Length of audio data you need. if you create recogize frigerprint, default is 12 seconds, if you create db frigerprint, it is not usefully; 
      #is_db_fingerprint: If it is True, it will create db frigerprint (Fingerprint for bucket, not for recognition); 
      #opt opt = {
               'filter_energy_min': 0,
               'silence_energy_threshold': 100,
               'silence_rate_threshold': 1
           }

def create_humming_fingerprint_by_file(file_name, start_time_seconds, audio_len_seconds):
      #file_name: Path of input file; 
      #start_time_seconds: Start time of input file, default is 0; 
      #audio_len_seconds: Length of audio data you need. if you create recogize frigerprint, default is 12 seconds, if you create db frigerprint, it is not usefully; 

def create_fingerprint_by_filebuffer(data_buffer, start_time_seconds, audio_len_seconds, is_db_fingerprint, opt):
      #data_buffer: data buffer of input file; 
      #start_time_seconds: Start time of input file, default is 0; 
      #audio_len_seconds: Length of audio data you need. if you create recogize frigerprint, default is 12 seconds, if you create db frigerprint, it is not usefully; 
      #is_db_fingerprint: If it is True, it will create db frigerprint (Fingerprint for bucket, not for recognition); 
      #opt = {
               'filter_energy_min': 0,
               'silence_energy_threshold': 100,
               'silence_rate_threshold': 1
           }

def create_humming_fingerprint_by_filebuffer(data_buffer, start_time_seconds, audio_len_seconds):
      #data_buffer: data buffer of input file; 
      #start_time_seconds: Start time of input file, default is 0; 
      #audio_len_seconds: Length of audio data you need. if you create recogize frigerprint, default is 12 seconds, if you create db frigerprint, it is not usefully; 

def create_fingerprint(data_buffer, is_db_fingerprint):
      #data_buffer: audio data buffer(RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, mono 8000 Hz); 
      #is_db_fingerprint: If it is True, it will create db frigerprint (Fingerprint for bucket, not for recognition); 

def create_humming_fingerprint(data_buffer):
      #data_buffer: audio data buffer(RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, mono 8000 Hz); 

def decode_audio_by_file(file_name, start_time_seconds, audio_len_seconds):
      #It will return the audio data(RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, mono 8000 Hz); 
      #file_name: Path of input file; 
      #start_time_seconds: Start time of input file, default is 0; 
      #audio_len_seconds: Length of audio data you need, if it is 0, will decode all the audio; 

def decode_audio_by_filebuffer(data_buffer, start_time_seconds, audio_len_seconds):
      #It will return the audio data(RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, mono 8000 Hz); 
      #data_buffer: data buffer of input file; 
      #start_time_seconds: Start time of input file, default is 0; 
      #audio_len_seconds: Length of audio data you need, if it is 0, will decode all the audio; 

def version() 
      #return the version of this module

Example

run Test: python test.py test.mp3

#!/usr/bin/env python
#-*- coding:utf-8 -*-

import os, sys
from acrcloud.recognizer import ACRCloudRecognizer

if __name__ == '__main__':
    config = {
        #Replace "xxxxxxxx" below with your project's host, access_key and access_secret.
        'host':'XXXXXXXX',
        'access_key':'XXXXXXXX', 
        'access_secret':'XXXXXXXX',
        'timeout':10 # seconds
    }

    '''This module can recognize ACRCloud by most of audio/video file. 
        Audio: mp3, wav, m4a, flac, aac, amr, ape, ogg ...
        Video: mp4, mkv, wmv, flv, ts, avi ...'''
    re = ACRCloudRecognizer(config)

    #recognize by file path, and skip 0 seconds from from the beginning of sys.argv[1].
    print re.recognize_by_file(sys.argv[1], 0)

    buf = open(sys.argv[1], 'rb').read()
    #recognize by file_audio_buffer that read from file path, and skip 0 seconds from from the beginning of sys.argv[1].
    print re.recognize_by_filebuffer(buf, 0)