as driver for getting the rendered HTML.
*/
const browserless = require('browserless')()
const getContent = async url => {
// create a browser context inside the main Chromium process
const browserContext = browserless.createContext()
const promise = getHTML(url, { getBrowserless: () => browserContext })
// close browser resources before return the result
promise.then(() => browserContext).then(browser => browser.destroyContext())
return promise
}
/**
metascraper is a collection of tiny packages,
so you can just use what you actually need.
*/
const metascraper = require('metascraper')([
require('metascraper-instagram')(),
require('metascraper-author')(),
require('metascraper-date')(),
require('metascraper-description')(),
require('metascraper-image')(),
require('metascraper-logo')(),
require('metascraper-publisher')(),
require('metascraper-title')(),
require('metascraper-url')()
])
/**
The main logic
*/
getContent('https://www.instagram.com/p/Cv_Lbq1ygce')
.then(metascraper)
.then(metadata => console.log(metadata))
.then(browserless.close)
.then(process.exit)
Prerequisites
package.json
.Subject of the issue
So I try scraping instagram post with metascraper and got some field was null like author and logo which should have some value according to the snapshots in this page https://github.com/microlinkhq/metascraper/blob/master/packages/metascraper-instagram/test/snapshots/index.js.md
Steps to reproduce
/**
browserless
will be passed tohtml-get
const getContent = async url => { // create a browser context inside the main Chromium process const browserContext = browserless.createContext() const promise = getHTML(url, { getBrowserless: () => browserContext }) // close browser resources before return the result promise.then(() => browserContext).then(browser => browser.destroyContext()) return promise }
/**
metascraper
is a collection of tiny packages,/**
Expected behaviour
all scraped field must have values as in snapshot
Actual behaviour
{ author: null, video: null, title: 'KRITIS, INFORMATIF, EDUKATIF on Instagram: “Sebab Kerusuhan Dago Elos, Kepolisan Tolak Laporan Dugaan Data Palsu dan Penipuan...”', date: '2023-08-21T07:47:48.000Z', image: 'https://scontent-cgk1-1.cdninstagram.com/v/t51.29350-15/368080123_864654158412825_2083724060336667766_n.webp?stp=c216.0.648.648a_dst-jpg_s640x640&_nc_cat=103&ccb=1-7&_nc_sid=8ae9d6&_nc_ohc=NLt-IeCY_owAX98WBsr&_nc_oc=AQmtubtZ85gSxAW0UM_2HHn5c-kvgnyhKKVPkRFSpIYIJ81ew6bIcU9_GKU_5ebG3Nw&_nc_ht=scontent-cgk1-1.cdninstagram.com&oh=00_AfAeHSaNPnDOcpTOs2j9tikXyxlLlJRwA44ygDiJlagrZw&oe=64E8B60B', description: '12K likes, 644 comments - undercover.id on August 15, 2023: “Sebab Kerusuhan Dago Elos, Kepolisan Tolak Laporan Dugaan Data Palsu dan Penipuan...”', publisher: 'Instagram', logo: null, url: 'https://www.instagram.com/p/Cv_Lbq1ygce/' }