Open feeops opened 1 month ago
Create a custom proxy with golang?
I use this code
package main
import (
"fmt"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/launcher"
"github.com/go-rod/stealth"
"time"
)
func main() {
l := launcher.New()
l.Devtools(false)
l.Headless(true)
l.NoSandbox(true)
u, err := l.Launch()
if err != nil {
fmt.Printf("launcher error: %v\n", err)
}
defer l.Kill()
mainBrowser := rod.New().ControlURL(u).NoDefaultDevice().MustConnect()
defer mainBrowser.MustClose()
browser := mainBrowser.MustIncognito()
page := stealth.MustPage(browser)
page.MustSetExtraHeaders("Referer", "https://www.google.com")
page.MustSetExtraHeaders("X-Header", "my request header")
err = page.Timeout(60 * time.Second).Navigate("https://httpbin.org/get")
if err != nil {
fmt.Println("page.Timeout", err)
return
}
page.WaitLoad()
time.Sleep(3 * time.Second)
fmt.Println(page.MustHTML())
}
Output
<html><head><meta name="color-scheme" content="light dark"><meta charset="utf-8"></head><body><pre>{
"args": {},
"headers": {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "zh-CN",
"Host": "httpbin.org",
"Priority": "u=0, i",
"Sec-Ch-Ua": "\"Not;A=Brand\";v=\"24\", \"Chromium\";v=\"128\"",
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": "\"Windows\"",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/128.0.0.0 Safari/537.36",
"X-Amzn-Trace-Id": "Root=1-66e14b6f-3a84e23251d5d8aa0b031acf",
"X-Header": "my request header"
},
"origin": "42.2.127.62",
"url": "https://httpbin.org/get"
}
</pre><div class="json-formatter-container"></div></body></html>
X-Header do work.
Referer do not work
Not sure why it doesn't work. The cdp api doesn't have other options to set extra headers.
playwright or browserless or puppeteer can do it.
this is playwright code.
package main
import (
stealth "github.com/jonfriesen/playwright-go-stealth"
"github.com/playwright-community/playwright-go"
"log"
"os"
)
func main() {
pw, err := playwright.Run()
if err != nil {
log.Fatalf("could not start playwright: %v", err)
}
defer pw.Stop()
opts := playwright.BrowserTypeLaunchOptions{}
// opts.ExecutablePath = playwright.String("C:\\Users\\liyi\\AppData\\Local\\Google\\Chrome\\Application\\chrome.exe")
ua := `--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.3`
opts.Args = []string{ua}
browser, err := pw.Chromium.Launch(opts)
if err != nil {
log.Fatalf("could not launch browser: %v", err)
}
defer browser.Close()
page, err := browser.NewPage()
if err != nil {
log.Fatalf("could not create page: %v", err)
}
err = stealth.Inject(page)
if err != nil {
log.Fatalf("could not inject stealth script: %v", err)
}
if _, err = page.Goto("https://httpbin.org/get",
playwright.PageGotoOptions{Referer: playwright.String("https://www.baidu.com")}); err != nil {
log.Fatalf("could not goto: %v", err)
}
page.WaitForLoadState(
playwright.PageWaitForLoadStateOptions{
State: playwright.LoadStateNetworkidle})
content, err := page.Screenshot()
if err != nil {
log.Fatalf("could not get screenshot: %v", err)
}
os.WriteFile("screenshot.png", content, 0600)
}
You can log the cdp calls of puppeteer and compare it with rod's to check what makes the difference.
Do go-rod has a function like PageGotoOptions in playwright or puppeteer?
I added one, will push it later, this is a bug of rod
page.Navigate()
clears the referrer when calling proto.PageNavigate()
, but calling proto.PageNavigate()
directly seems to work.
url := "https://foo.com"
ref := "https://bar.com"
_, err = proto.PageNavigate{URL: url, Referrer: ref}.Call(page)
page.Navigate()
clears the referrer when callingproto.PageNavigate()
, but callingproto.PageNavigate()
directly seems to work.url := "https://foo.com" ref := "https://bar.com" _, err = proto.PageNavigate{URL: url, Referrer: ref}.Call(page)
It works.Thanks.
Rod Version: v0.116.2
The code to demonstrate your question
I want to add Referrer field to web scraping,I can use HijackRequests,but it is very slow.
Is there any way to get it simply?