berstend / puppeteer-extra

💯 Teach puppeteer new tricks through plugins.
https://extra.community
MIT License
6.51k stars 742 forks source link

Are there plans to add a speech recognition plugin? #96

Closed humorwang closed 3 years ago

humorwang commented 4 years ago

Are there plans to add a speech recognition plugin?

humorwang commented 4 years ago

More reliable speech recognition rate

berstend commented 4 years ago

@humorwang can you provide more context what you mean?

If you refer to recaptcha (?) then I'll quote myself from the readme documentation:

image

humorwang commented 4 years ago

@berstend yes, I've seen the readme documentation.But I still want to use audio challenge. Because it's free. use Baidu speech recognition, the accuracy is very good.

berstend commented 4 years ago

I recommend look into this then:

https://github.com/ecthros/uncaptcha2/issues/7 https://github.com/dessant/buster

Happy to accept PRs, but I won't implement it myself as 2captcha & friends a cheap enough (for my use cases). :)

PS: The main reason I don't implement is that the audio challenge will eventually be blocked when you use it too often, so the potential savings are miniscule.

berstend commented 4 years ago

@humorwang can you point me to the public Baidu speech-to-text API? I can only find pre-trained tensorflow models but no hosted API

berstend commented 4 years ago

image

https://ai.baidu.com/tech/speech

Is it this? Doesn't seem to be free?

berstend commented 4 years ago

Buster is using this API primarily as it seems (which is free): https://wit.ai/faq

humorwang commented 4 years ago

yes. is https://ai.baidu.com/tech/speech is has QPS limit,but enough for me.

humorwang commented 4 years ago

I can't access this website (https://wit.ai/faq) , in my Country. hahaha (>_<). ps: I can using more IP proxy to solve the Google block.

humorwang commented 4 years ago

this is my code, if you can help me to revise it. that would be great too.

const puppeteer = require('puppeteer');
const utility = require("utility");
const request = require('request');
const fs = require('fs');

;(async () => {

  await ones();

  await page.waitFor(15000)

})();

async function ones(){
  const browser = await puppeteer.launch({
      headless: false,
      slowMo: 250, // slow down by 250ms
      devtools: true,
      userDataDir:"D:/temporatry",
      args: [
        '--cryptauth-http-host',
        '--disable-accelerated-2d-canvas',
        '--disable-background-networking',
        '--disable-background-timer-throttling',
        '--disable-browser-side-navigation',
        '--disable-client-side-phishing-detection',
        '--disable-default-apps',
        '--disable-dev-shm-usage',
        '--disable-device-discovery-notifications',
        '--disable-extensions',
        '--disable-features=site-per-process',
        '--disable-hang-monitor',
        '--disable-java',
        '--disable-popup-blocking',
        '--disable-prompt-on-repost',
        '--disable-setuid-sandbox',
        '--disable-sync',
        '--disable-translate',
        '--disable-web-security',
        '--disable-webgl',
        '--metrics-recording-only',
        '--no-first-run',
        '--safebrowsing-disable-auto-update',
        '--no-sandbox',
        '--enable-automation',
        '--password-store=basic',
        '--use-mock-keychain',
        '--proxy-server=http://127.0.0.1:1080'
      ]
  });
  const page = await browser.newPage();
  await page.setBypassCSP(true);
  await page.goto('https://www.pathofexile.com/account/create');

  const hasRecaptchaScriptTag = await page.$(
    `script[src="/recaptcha/api.js"]`
  )

  console.log('hasRecaptchaScriptTag', !!hasRecaptchaScriptTag)
  if (hasRecaptchaScriptTag) {
    console.log('waitForRecaptchaClient - start', new Date())
    await page.waitForFunction(
    `
      (function() {
        return window.___grecaptcha_cfg && window.___grecaptcha_cfg.count
      })()
    `,
      { polling: 200, timeout: 10 * 1000 }
    )
    console.log('waitForRecaptchaClient - end', new Date())
  }
  const frames = await page.frames();
  let checkbox_frame,image_frame;
  frames.forEach(frame => {
    let url = frame.url();
    if (url.indexOf("api2/anchor")>-1){
      checkbox_frame = frame;
    }
    if (url.indexOf("api2/bframe")>-1){
      image_frame = frame;
    }
  });
  checkbox = await checkbox_frame.$("#recaptcha-anchor")
  await click_button(page,checkbox);
  const aria_checkeds = await checkbox_frame.$$eval('#recaptcha-anchor',spans => spans.map(span => span.getAttribute("aria-checked")));
  let aria_checked = aria_checkeds[0]
  //直接过
  if (aria_checked == 'true'){

  } else{
    await image_frame.addScriptTag({
      url: 'https://cdn.bootcss.com/jquery/3.2.0/jquery.min.js'
    })
    tiles = await image_frame.$$(".rc-imageselect-tile")
    for (var index = 0; index <6; index++) {
      await click_button(page, tiles[Math.floor(Math.random() * 9)])
    }

    audio_button = await image_frame.$("#recaptcha-audio-button")
    await click_button(page, audio_button);
    await page.waitFor(5000)
    play_button = await image_frame.$(".goog-inline-block")
    await click_button(page, play_button);
    await page.waitFor(5000)
    const audio_href = await image_frame.evaluate(() => {
      var audio_source = jQuery(".rc-audiochallenge-tdownload-link").attr("href");
      return audio_source;
    });
    console.info("audio_href:" + audio_href)
    let res;
    if (audio_href){
      res = await syncBody("http://127.0.0.1:8888/audio", {'url':audio_href});
      console.info("--------------------")
      console.info(res.result)

      await page.waitFor(5000)

      if (typeof(res.result) != "undefined" && res.result!=''){
          console.info("input value")
          response_input = await image_frame.$("#audio-response")
          await response_input.type(res.result,delay=randomNum(70, 130));
          verify_button = await image_frame.$("#recaptcha-verify-button")

          await click_button(page, verify_button)
          await page.waitFor(6000)
          const aria_checkeds_2 = await checkbox_frame.$$eval('#recaptcha-anchor',spans => spans.map(span => span.getAttribute("aria-checked")));
          let aria_checked_2 = aria_checkeds_2[0]
          console.info(aria_checked_2)
          if (aria_checked_2=='true'){
            console.info("success")
          }
      }
      await page.waitFor(10000)
    }
  }
  await browser.close();
}

async function click_button(page,button){
  let bb = await button.boundingBox();
  await page.mouse.move(randomNum(300, 800),randomNum(400, 900),steps=parseInt(randomNum(500, 600)));
  await page.mouse.move(randomNum(100, 800),randomNum(200, 900),steps=parseInt(randomNum(300, 500)));
  await page.mouse.move(randomNum(0, 800),randomNum(0, 600),steps=parseInt(randomNum(200, 500)));
  await page.mouse.move(bb["x"], bb["y"], steps=parseInt(randomNum(300, 480)));
  await button.hover();
  //await asyncio.sleep(random.uniform(1, 3));
  let click_delay = randomNum(25, 165);
  await button.click(delay=click_delay);
}

function randomNum(minNum,maxNum){ 
  switch(arguments.length){ 
      case 1: 
          return parseInt(Math.random()*minNum+1,10); 
      break; 
      case 2: 
          return parseInt(Math.random()*(maxNum-minNum+1)+minNum,10); 
      break; 
          default: 
              return 0; 
          break; 
  } 
} 

let syncBody = async function (url, params) {
  var url = url;
  var params = params;
  let body = await synchronous_post(url, params);
  console.info(body)
  return JSON.parse(body);
}

let synchronous_post = function (url, params) {
  let options = {
      url: url,
      form: params
  };
  return new Promise(function (resolve, reject) {
    request.get(options, function (error, response, body) {
          if (error) {
              reject(error);
          } else {
              resolve(body);
          }
      });
  });
}
berstend commented 4 years ago

I'm gonna re-open this issue and make it a potential future feature as implementing a self-contained reCAPTCHA cracker based on the audio challenge sounds like a fun challenge (with the option to use the existing, reliable 2captcha method as a fallback).

Would be great if you could provide a quick tutorial how to make an account/get a token at https://ai.baidu.com/tech/speech for us westerners. :)

humorwang commented 4 years ago

sorry,No matter how I try, I use the link https://ai.baidu.com/tech/speech . always visit the official website of China. so i can't provide a quick tutorial for us westerners. maybe you can use my token to test. this is my python service code . maybe you can use the node.js sdk (npm install baidu-aip-sdk) the doc link and my APP_ID=17887138 API_Key=9iEGZg7UPylQtRxFy3kTi6ua SECRET_KEY=mMsFps8dYpH15t3FzGtFiuDbEoqEyb6N

from sanic import Sanic
from sanic import response
from sanic.response import json
import aiofiles
import hashlib
from pydub import AudioSegment
from aip import AipSpeech
import asyncio
import requests

app = Sanic()

async def get_page_win(
        url,
        proxy=None,
        proxy_auth=None,
        binary=False,
        verify=False,
        timeout=300):
    proxies = None
    if proxy:
        if proxy_auth:
            proxy = proxy.replace("http://", "")
            username = proxy_auth['username']
            password = proxy_auth['password']
            proxies = {
                "http": f"http://{username}:{password}@{proxy}",
                "https": f"http://{username}:{password}@{proxy}"}
        else:
            proxies = {"http": proxy, "https": proxy}
    with requests.Session() as session:
        resp = session.get(
            url,
            proxies=proxies,
            verify=verify,
            timeout=timeout)
        if binary:
            return resp.content
        return resp.text

async def save_file(file, data, binary=False):
    mode = "w" if not binary else "wb"
    async with aiofiles.open(file, mode=mode) as f:
        await f.write(data)

def get_md(url):
    m = hashlib.md5()
    m.update(url)
    return m.hexdigest()

async def mp3_to_wav(mp3_filename):
    wav_filename = mp3_filename.replace("mp3", "wav")
    segment = AudioSegment.from_mp3(mp3_filename)
    sound = segment.set_channels(1).set_frame_rate(16000)
    garbage = len(sound) / 3.1
    sound = sound[+garbage:len(sound) - garbage]
    sound.export(wav_filename, format="wav")
    return wav_filename

async def get_file_content(filePath):
        with open(filePath, 'rb') as fp:
            return fp.read()

@app.route('/')
async def index(request):
    msg = {'message': 'Welcom to 猿人学Python'}
    return json(msg, ensure_ascii=False)

@app.route('/audio', methods=['POST','GET'])
async def audio(request):
    url = request.form['url'][0]
    print(url)
    file_path = "C:/Users/wangyan/Desktop/recaptcha/pyp/mp3/"
    filename = get_md(url.encode("utf-8")) + ".mp3"
    audio_data = await get_page_win(url, proxy="127.0.0.1:1080", binary=True, timeout=30*1000)
    print(type(audio_data))
    await save_file(file_path + filename, data=audio_data, binary=True)

    wav_filename = await mp3_to_wav(file_path + filename)
    war_file = await get_file_content(wav_filename)
    client = AipSpeech("17887138", "9iEGZg7UPylQtRxFy3kTi6ua", "mMsFps8dYpH15t3FzGtFiuDbEoqEyb6N")
    res = client.asr(war_file, 'wav', 16000, {
        'dev_pid': 1737,
    })
    print(res)
    print(type(res))
    print(type(res['result']))
    answer = ''
    if (res['err_no'] == 0):
        answer = res['result'][0]

    return response.json({"result":answer })

if __name__ == '__main__':
    app.run(host='127.0.0.1', port=8888,debug=True)
berstend commented 4 years ago

Thanks for providing more info :) Adding speech recognition is an interesting but non-trivial task. I realistically won't have time to tackle this anytime soon, but PRs are definitely welcome.

MoraxCloud commented 4 years ago

@berstend You can use IBM (500 minutes per month) or Microsoft (5 hours per month) speech-to-text API.

berstend commented 3 years ago

Closing due to inactivity.

ramzimaalej commented 3 years ago

I have tried this, but it did not allow me to use the audio as it detected that I'm doing automated requests.