gospider007 / gospider

🚀Gospider is a powerful Golang web crawler that includes all the necessary libraries for transitioning from Python to Golang. It provides a fast and seamless transition for Python web crawlers to Golang.
GNU Lesser General Public License v3.0
91 stars 22 forks source link

两次handshake耗时比较久 #7

Closed jianmaikj closed 11 months ago

jianmaikj commented 11 months ago

同样使用代理的情况下,总体耗时是正常使用net/http或fasthttp请求耗时的2到3倍, 看了下好像是这里两次handshake耗时比较久,

if err = utlsConn.HandshakeContext(ctx); err != nil {
        if strings.HasSuffix(err.Error(), "bad record MAC") {
            err = tools.WrapError(err, "检测到22扩展异常,请删除此扩展后重试")
        }
    }

有什么优化方案吗

gospider007 commented 11 months ago

同样使用代理的情况下,总体耗时是正常使用net/http或fasthttp请求耗时的2到3倍, 看了下好像是这里两次handshake耗时比较久,

if err = utlsConn.HandshakeContext(ctx); err != nil {
      if strings.HasSuffix(err.Error(), "bad record MAC") {
          err = tools.WrapError(err, "检测到22扩展异常,请删除此扩展后重试")
      }
  }

有什么优化方案吗

我测试了下并没有出现你说的这种情况,你可以提供具体的测试代码吗

jianmaikj commented 11 months ago

同样使用代理的情况下,总体耗时是正常使用net/http或fasthttp请求耗时的2到3倍, 看了下好像是这里两次handshake耗时比较久,

if err = utlsConn.HandshakeContext(ctx); err != nil {
        if strings.HasSuffix(err.Error(), "bad record MAC") {
            err = tools.WrapError(err, "检测到22扩展异常,请删除此扩展后重试")
        }
    }

有什么优化方案吗

我测试了下并没有出现你说的这种情况,你可以提供具体的测试代码吗

这是我的测试代码

const myProxy = "socks5://xxxxxx:xxxx"

func TestGospider(t *testing.T) {
    doGospider()
    doHttp()
}
func doHttp() {
    t0 := time.Now()
    var dialer proxy.Dialer
    if u, err := url.Parse(myProxy); err == nil {
        dialer, err = proxy.FromURL(u, proxy.Direct)
    }
    transport := &http.Transport{
        Dial: dialer.Dial,
    }

    client := &http.Client{
        Transport: transport,
        Timeout:   6 * time.Second,
    }

    resp, err := client.Get("https://tools.scrapfly.io/api/fp/ja3?extended=1")

        fmt.Println("reqTime http:", time.Now().Sub(t0).Milliseconds())

    if err != nil {
        fmt.Println("err:", err)
    }

    body, _ := io.ReadAll(resp.Body)
    fmt.Println(string(body))
}

func doGospider() {
    t0 := time.Now()
    Ja3Spec, err := ja3.CreateSpecWithId(ja3.HelloRandomized)
    if err != nil {
        fmt.Println("err:", err)
        return
    }
    reqCli, err := requests.NewClient(nil, requests.ClientOption{
        H2Ja3:   true,
        Ja3Spec: Ja3Spec,
    })

    if err != nil {
        fmt.Println("err2:", err)
        return
    }
    apiUrl := "https://tools.scrapfly.io/api/fp/ja3?extended=1"
    response, err := reqCli.Get(nil, apiUrl, requests.RequestOption{
        Timeout: 6 * time.Second,
        Proxy: myProxy,
    })
    fmt.Println("reqTime gospider:", time.Now().Sub(t0).Milliseconds())
    if err != nil {
        fmt.Println("err3:", err)
    }
    fmt.Println(response.Json())
}
gospider007 commented 11 months ago

同样使用代理的情况下,总体耗时是正常使用net/http或fasthttp请求耗时的2到3倍, 看了下好像是这里两次handshake耗时比较久,

if err = utlsConn.HandshakeContext(ctx); err != nil {
      if strings.HasSuffix(err.Error(), "bad record MAC") {
          err = tools.WrapError(err, "检测到22扩展异常,请删除此扩展后重试")
      }
  }

有什么优化方案吗

我测试了下并没有出现你说的这种情况,你可以提供具体的测试代码吗

这是我的测试代码

const myProxy = "socks5://xxxxxx:xxxx"

func TestGospider(t *testing.T) {
  doGospider()
  doHttp()
}
func doHttp() {
  t0 := time.Now()
  var dialer proxy.Dialer
  if u, err := url.Parse(myProxy); err == nil {
      dialer, err = proxy.FromURL(u, proxy.Direct)
  }
  transport := &http.Transport{
      Dial: dialer.Dial,
  }

  client := &http.Client{
      Transport: transport,
      Timeout:   6 * time.Second,
  }

  resp, err := client.Get("https://tools.scrapfly.io/api/fp/ja3?extended=1")

        fmt.Println("reqTime http:", time.Now().Sub(t0).Milliseconds())

  if err != nil {
      fmt.Println("err:", err)
  }

  body, _ := io.ReadAll(resp.Body)
  fmt.Println(string(body))
}

func doGospider() {
  t0 := time.Now()
  Ja3Spec, err := ja3.CreateSpecWithId(ja3.HelloRandomized)
  if err != nil {
      fmt.Println("err:", err)
      return
  }
  reqCli, err := requests.NewClient(nil, requests.ClientOption{
      H2Ja3:   true,
      Ja3Spec: Ja3Spec,
  })

  if err != nil {
      fmt.Println("err2:", err)
      return
  }
  apiUrl := "https://tools.scrapfly.io/api/fp/ja3?extended=1"
  response, err := reqCli.Get(nil, apiUrl, requests.RequestOption{
      Timeout: 6 * time.Second,
      Proxy: myProxy,
  })
  fmt.Println("reqTime gospider:", time.Now().Sub(t0).Milliseconds())
  if err != nil {
      fmt.Println("err3:", err)
  }
  fmt.Println(response.Json())
}

I fixed this bug and you can try updating it

jianmaikj commented 11 months ago

thanks! it works now, but i found the handshake will take almost 2/3 of the request time, is it normal?

gospider007 commented 11 months ago

谢谢!它现在可以工作了,但我发现握手将花费几乎 2/3 的请求时间,正常吗?

normal