ulixee / hero

The web browser built for scraping
MIT License
800 stars 41 forks source link

All urls received by tab.on 'resource' are the same #152

Closed Baker68 closed 2 years ago

Baker68 commented 2 years ago
import Hero, {LocationStatus} from "@ulixee/hero-playground";

(async () => {
    const hero = new Hero({
        showChrome: true,
        disableDevtools: false,
        disableMitm: false
    });
    await hero.goto("https://ulixee.org/docs/hero/plugins/core-plugins");
    await hero.activeTab.on('resource', (resource) => {
        console.log(resource);
    })
    await hero.waitForPaintingStable();
    await hero.waitForLoad(LocationStatus.AllContentLoaded);
    await hero.reload();
})();

Output :

Resource {
  url: 'https://ulixee.org/css/chunk-vendors.43c1d780.css',
  documentUrl: 'https://ulixee.org/docs/hero/plugins/core-plugins',
  type: 'Stylesheet',
  isRedirect: false,
  request: ResourceRequest {
    headers: {
      ':path': '/css/chunk-vendors.43c1d780.css',
      ':scheme': 'https',
      ':authority': 'ulixee.org',
      ':method': 'GET',
      'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"',
      'sec-ch-ua-mobile': '?0',
      'user-agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.106 Safari/537.36',
      'sec-ch-ua-platform': '"Windows"',
      accept: 'text/css,*/*;q=0.1',
      'sec-fetch-site': 'same-origin',
      'sec-fetch-mode': 'no-cors',
      'sec-fetch-dest': 'style',
      referer: 'https://ulixee.org/docs/hero/plugins/core-plugins',
      'accept-encoding': 'gzip, deflate, br',
      'accept-language': 'en-US,en;q=0.9'
    },
    url: 'https://ulixee.org/css/chunk-vendors.43c1d780.css',
    timestamp: 2022-09-18T09:15:06.587Z,
    method: 'GET',
    postData: [Getter]
  },
  response: ResourceResponse {
    url: 'https://ulixee.org/css/chunk-vendors.43c1d780.css',
    timestamp: 2022-09-18T09:15:06.629Z,
    headers: {
      'cache-control': 'max-age=3600',
      'content-encoding': 'br',
      'content-type': 'text/css; charset=utf-8',
      etag: '"20847d5a88c80f90f4ef87868c9f9801a7b302f384362cb9b0c464d5ef497c6c-br"',
      'last-modified': 'Thu, 01 Sep 2022 14:47:02 GMT',
      'strict-transport-security': 'max-age=31556926',
      'accept-ranges': 'bytes',
      date: 'Sun, 18 Sep 2022 09:15:06 GMT',
      'x-served-by': 'cache-hhn4050-HHN',
      'x-cache': 'HIT',
      'x-cache-hits': '1',
      'x-timer': 'S1663492507.606459,VS0,VE1',
      vary: 'x-fh-requested-host, accept-encoding',
      'alt-svc': 'h3=":443";ma=86400,h3-29=":443";ma=86400,h3-27=":443";ma=86400',
      'content-length': '5473'
    },
    trailers: undefined,
    browserServedFromCache: undefined,
    browserLoadedTime: null,
    browserLoadFailure: undefined,
    statusCode: 200,
    statusMessage: undefined,
    remoteAddress: '151.101.65.195:443',
    buffer: [Getter],
    text: [Getter],
    json: [Getter]
  },
  buffer: [Getter],
  text: [Getter],
  json: [Getter]
}
Resource {
  url: 'https://ulixee.org/css/chunk-vendors.43c1d780.css',
  documentUrl: 'https://ulixee.org/docs/hero/plugins/core-plugins',
  type: 'Stylesheet',
  isRedirect: false,
  request: ResourceRequest {
    headers: {
      ':path': '/css/chunk-vendors.43c1d780.css',
      ':scheme': 'https',
      ':authority': 'ulixee.org',
      ':method': 'GET',
      'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"',
      'sec-ch-ua-mobile': '?0',
      'user-agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.106 Safari/537.36',
      'sec-ch-ua-platform': '"Windows"',
      accept: 'text/css,*/*;q=0.1',
      'sec-fetch-site': 'same-origin',
      'sec-fetch-mode': 'no-cors',
      'sec-fetch-dest': 'style',
      referer: 'https://ulixee.org/docs/hero/plugins/core-plugins',
      'accept-encoding': 'gzip, deflate, br',
      'accept-language': 'en-US,en;q=0.9'
    },
    url: 'https://ulixee.org/css/chunk-vendors.43c1d780.css',
    timestamp: 2022-09-18T09:15:06.587Z,
    method: 'GET',
    postData: [Getter]
  },
  response: ResourceResponse {
    url: 'https://ulixee.org/css/chunk-vendors.43c1d780.css',
    timestamp: 2022-09-18T09:15:06.629Z,
    headers: {
      'cache-control': 'max-age=3600',
      'content-encoding': 'br',
      'content-type': 'text/css; charset=utf-8',
      etag: '"20847d5a88c80f90f4ef87868c9f9801a7b302f384362cb9b0c464d5ef497c6c-br"',
      'last-modified': 'Thu, 01 Sep 2022 14:47:02 GMT',
      'strict-transport-security': 'max-age=31556926',
      'accept-ranges': 'bytes',
      date: 'Sun, 18 Sep 2022 09:15:06 GMT',
      'x-served-by': 'cache-hhn4050-HHN',
      'x-cache': 'HIT',
      'x-cache-hits': '1',
      'x-timer': 'S1663492507.606459,VS0,VE1',
      vary: 'x-fh-requested-host, accept-encoding',
      'alt-svc': 'h3=":443";ma=86400,h3-29=":443";ma=86400,h3-27=":443";ma=86400',
      'content-length': '5473'
    },
    trailers: undefined,
    browserServedFromCache: undefined,
    browserLoadedTime: null,
    browserLoadFailure: undefined,
    statusCode: 200,
    statusMessage: undefined,
    remoteAddress: '151.101.65.195:443',
    buffer: [Getter],
    text: [Getter],
    json: [Getter]
  },
  buffer: [Getter],
  text: [Getter],
  json: [Getter]
}

Currently using "@ulixee/hero-playground": "^2.0.0-alpha.10"

blakebyrnes commented 2 years ago

The urls are actually correct if you access the properties, but they're printing with a cache of the first seen resource. Will be fixed in the next version.