spinlud / linkedin-jobs-scraper

151 stars 41 forks source link

Can we filter date while storing it to json file? #12

Closed testcon closed 4 years ago

testcon commented 4 years ago

I tried using FILTER but it's giving error on it. Can you demonstrate use of filter

const { 
    events,
    IData,
    LinkedinScraper,
    ERelevanceFilterOptions,
    ETimeFilterOptions
} = require('linkedin-jobs-scraper');

(async () => {
    // Each scraper instance is associated with one browser.
    // Concurrent queries will run on different pages within the same browser instance.
    const scraper = new LinkedinScraper({
        headless: true,
        slowMo: 10,
    });

    // Add listeners for scraper events
    var res = {table : []};

  // Listen for custom events
  scraper.on(events.custom.data, ({
                                   query,
                                   location,
                                   link,
                                   title,
                                   company,
                                   place,
                                   date,
                                   description,
                                   senorityLevel,
                                   jobFunction,
                                   employmentType,
                                   industries,
                                   time
                                 }) => {
    res.table.push({
      query : query,
      location : location,
      title : title,
      company : company,
      place : place,
      date : date,
      // description: description,
      link : link,
      senorityLevel : senorityLevel,
      function : jobFunction,
      employmentType : employmentType,
      industries : industries,
      time : time
    });
  });

    scraper.on(events.scraper.error, (err) => {
        console.error(err);
    });
    scraper.on(events.scraper.end, () => {
         console.log("All done!");
     let fs = require("fs");
     let time = '{"time" : ' +
               '"' + new Date().toLocaleString() + '",';
        fs.writeFile("src/linkedin_output.json",
                 time + '"data" : ' + JSON.stringify(res.table) + "}", "utf8",
                 () => {});
    });

    // Add listeners for puppeteer browser events
    scraper.on(events.puppeteer.browser.targetcreated, () => {
    });
    scraper.on(events.puppeteer.browser.targetchanged, () => {
    });
    scraper.on(events.puppeteer.browser.targetdestroyed, () => {
    });
    scraper.on(events.puppeteer.browser.disconnected, () => {
    });

    // Custom function executed on browser side to extract job description
    const descriptionProcessor = () => document.querySelector(".description__text")
        .innerText
        .replace(/[\s\n\r]+/g, " ")
        .trim();

    // Run queries concurrently
    await Promise.all([
        scraper.run(
            "Graphic Designer",
            "London",
            {
                paginationMax: 2,
            }
        ),
        scraper.run(
            ["Cloud Engineer"],
            ["San Francisco", "New York"],
            {
                paginationMax: 1,
                descriptionProcessor,
                filter: {
                    relevance: ERelevanceFilterOptions.RECENT,
                    time: ETimeFilterOptions.DAY,
                },
                optimize: true, // Block resources such as images, fonts etc to improve bandwidth usage
            }
        )
    ]);

    // Close browser
    await scraper.close();
})();
spinlud commented 4 years ago

Hi, could you share the error and stack trace please

testcon commented 4 years ago

Error at Object. (/home/testcon/Desktop/jobs/works/LinkedIn-Scraper-master/test.js:1:13) at Module._compile (internal/modules/cjs/loader.js:1138:30) at Object.Module._extensions..js (internal/modules/cjs/loader.js:1158:10) at Module.load (internal/modules/cjs/loader.js:986:32) at Function.Module._load (internal/modules/cjs/loader.js:879:14) at Function.executeUserEntryPoint [as runMain] (internal/modules/run_main.js:71:12) at internal/main/run_main_module.js:17:47 (node:7802) UnhandledPromiseRejectionWarning: TypeError: Cannot read property 'error' of undefined at /home/testcon/Desktop/jobs/works/LinkedIn-Scraper-master/test.js:55:29 at Object. (/home/testcon/Desktop/jobs/works/LinkedIn-Scraper-master/test.js:97:3) at Module._compile (internal/modules/cjs/loader.js:1138:30) at Object.Module._extensions..js (internal/modules/cjs/loader.js:1158:10) at Module.load (internal/modules/cjs/loader.js:986:32) at Function.Module._load (internal/modules/cjs/loader.js:879:14) at Function.executeUserEntryPoint [as runMain] (internal/modules/run_main.js:71:12) at internal/main/run_main_module.js:17:47 (node:7802) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). To terminate the node process on unhandled promise rejection, use the CLI flag --unhandled-rejections=strict (see https://nodejs.org/api/cli.html#cli_unhandled_rejections_mode). (rejection id: 1) (node:7802) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.

Sorry I'm kinda new to node js, just getting my some taske done.

spinlud commented 4 years ago

Please try the new version (mind there are breaking changes so look at the documentation). You can find usage examples here

testcon commented 4 years ago

okay