spinlud / py-linkedin-jobs-scraper

MIT License
337 stars 95 forks source link

Where is the download folder in Debian GNU Linux? #9

Closed marcelolaia closed 3 years ago

marcelolaia commented 3 years ago

Please, I run thee code below and I do not found where it salved the downloaded pages.

import logging
from linkedin_jobs_scraper import LinkedinScraper
from linkedin_jobs_scraper.events import Events, EventData
from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, TypeFilters, ExperienceLevelFilters

# Change root logger level (default is WARN)
logging.basicConfig(level = logging.INFO)

def on_data(data: EventData):
    print('[ON_DATA]', data.title, data.company, data.date, data.link, len(data.description))

def on_error(error):
    print('[ON_ERROR]', error)

def on_end():
    print('[ON_END]')

scraper = LinkedinScraper(
    chrome_executable_path='/home/myuser/bin/chromedriver/chromedriver', # Custom Chrome executable path (e.g. /foo/bar/bin/chromedriver) 
    chrome_options=None,  # Custom Chrome options here
    headless=True,  # Overrides headless mode only if chrome_options is None
    max_workers=1,  # How many threads will be spawned to run queries concurrently (one Chrome driver for each thread)
    slow_mo=1.3,  # Slow down the scraper to avoid 'Too many requests (429)' errors
)

# Add event listeners
scraper.on(Events.DATA, on_data)
scraper.on(Events.ERROR, on_error)
scraper.on(Events.END, on_end)

queries = [
#    Query(
#        options=QueryOptions(
#            optimize=True,  # Blocks requests for resources like images and stylesheet
#            limit=400  # Limit the number of jobs to scrape
#        )
#    ),
    Query(
        query='Florestal',
        options=QueryOptions(
            locations=['Brasil'], # **Here I use Brasil or Brazil?**
            optimize=False,
            limit=500,
            filters=QueryFilters(
                company_jobs_url=None,  # Filter by companies
                relevance=RelevanceFilters.RECENT,
                time=TimeFilters.MONTH,
                type=[TypeFilters.FULL_TIME, TypeFilters.INTERNSHIP],
                experience=None,
            )
        )
    ),
]

scraper.run(queries)
spinlud commented 3 years ago

It doesn't download anything, it just fetch the data and print on the console. It is up to you to save the data in the locatin and with the format you want.