go-rod / rod

A Chrome DevTools Protocol driver for web automation and scraping.
https://go-rod.github.io
MIT License
5k stars 328 forks source link

并发page pool, context deadline exceeded,page为什么没有释放? #1044

Closed see72changes closed 2 months ago

see72changes commented 2 months ago

func Img(list []model.SpiderList) (imgList map[string][]string) {

u := launcher.New().
    Set("--no-sandbox").
    MustLaunch()

browser := rod.New().ControlURL(u).MustConnect()

defer browser.MustClose()

// We create a pool that will hold at most 3 pages which means the max concurrency is 3
pool := rod.NewPagePool(2)

// Create a page if needed
create := func() *rod.Page {
    // We use MustIncognito to isolate pages with each other
    return browser.MustIncognito().MustPage()
}

imgList = make(map[string][]string)

var lock sync.Mutex

// Run jobs concurrently
wg := sync.WaitGroup{}

// 遍历列表
for _, item := range list {

    wg.Add(1)

    go func(item model.SpiderList) {

        defer wg.Done()

        page := pool.Get(create)
        defer pool.Put(page)

        var resultsImg []string
        var err error

        fmt.Println("采集图片:", item.Id)
        resultsImg, err = Job(item.Url, page)

        if err != nil {
            fmt.Println(item.Url)
            fmt.Println(err)  // context deadline exceeded
            return
        }
        lock.Lock() // 加锁
        imgList[strconv.FormatInt(item.Id, 10)] = resultsImg
        lock.Unlock() // 解锁

        //time.Sleep(time.Second * time.Duration(utils.RandInt(1, 3)))

    }(item)
}

wg.Wait()

pool.Cleanup(func(p *rod.Page) { defer p.MustClose() })

//rodUtils.Pause() // pause goroutine

return imgList

}

func Job(url string, page *rod.Page) ([]string, error) {

fmt.Println("采集地址:", url)

//page.MustNavigate(url).MustWaitLoad()
var imgSrc []string

err := page.Timeout(time.Second * 20).Navigate(url)

if err != nil {
    fmt.Println(err.Error())
    fmt.Println("打开链接失败")
    return imgSrc, err
}

jsContentElement, err := page.Timeout(time.Second * 10).Element("#js_content")
if err != nil {
    fmt.Println("获取js_content超时")
    return imgSrc, err
}

// 图片
jsContentImgs, err := jsContentElement.Timeout(time.Second * 5).Elements("img")

if err != nil {
    fmt.Println("获取img超时")
    return imgSrc, err
}

for _, img := range jsContentImgs {

    dataSrcImg, err := img.Timeout(time.Second * 30).Attribute("data-src")

    if err != nil || dataSrcImg == nil {
        continue
    }

    imgSrc = append(imgSrc, *dataSrcImg)
}

return utils.RemoveRepByMap(imgSrc), nil

}

出现context deadline exceeded错误,程序异常挂起,排查下来,page没有释放。 Current version is v0.114.5

github-actions[bot] commented 2 months ago

Please add a valid Rod Version: v0.0.0 to your issue. Current version is v0.115.0

generated by check-issue

ysmood commented 2 months ago

322