spinlud / py-linkedin-jobs-scraper

MIT License
311 stars 86 forks source link

Scrapper not searching with the keyword entered #3

Closed shikharvaish28 closed 3 years ago

shikharvaish28 commented 3 years ago

Thanks for such a wonderful tool @spinlud !

I tried running the scrapper but it is not working with keywords i.e. if I search for any position like 'Performance Marketing' then it is not searching for that keyword. Rather it is giving out general job postings irrespective of the keywords. I am using the code in the example folder. Please have a look at it once!

Screenshot 2021-01-06 at 9 39 15 PM

from linkedin_jobs_scraper import LinkedinScraper
from linkedin_jobs_scraper.events import Events, EventData
from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, TypeFilters, ExperienceLevelFilters
from csv import writer

def on_data(data: EventData):
    with open('linkedinFile.csv', 'a+', newline='') as write_obj:
        # Create a writer object from csv module
        csv_writer = writer(write_obj)
        # Add contents of list as last row in the csv file
        csv_writer.writerow((data.title, data.company, data.date, data.link, data.description))
        print('[ON_DATA]', data.title, data.company, data.date, data.link, len(data.description))

def on_error(error):
    print('[ON_ERROR]', error)

def on_end():
    print('[ON_END]')

scraper = LinkedinScraper(
    chrome_executable_path='/Users/shikharvaish/Downloads/chromedriver',
    chrome_options=None,  # You can pass your custom Chrome options here
    max_workers=1,  # How many threads will be spawn to run queries concurrently (one Chrome driver for each thread)
    slow_mo=1,  # Slow down the scraper to avoid 'Too many requests (429)' errors
)

# Add event listeners
scraper.on(Events.DATA, on_data)
scraper.on(Events.ERROR, on_error)
scraper.on(Events.END, on_end)

queries = [
    Query(
        options=QueryOptions(
            optimize=False,  # Blocks requests for resources like images and stylesheet
            limit=207  # Limit the number of jobs to scrape
        )
    ),
    Query(
        query='Performance Marketing',
        options=QueryOptions(
            locations=['Worldwide'],
            optimize=True,
            limit=5,
            filters=QueryFilters(
                # company_jobs_url="https://www.linkedin.com/jobs/search/?f_E=3%2C4%2C5%2C6&f_F=mrkt&f_JT=F&geoId=92000000&keywords=performance%20marketing&location=Worldwide",  # Filter by companies
                relevance=RelevanceFilters.RELEVANT,
                time=TimeFilters.MONTH,
                type=[TypeFilters.FULL_TIME],
                experience=[ExperienceLevelFilters.ASSOCIATE, ExperienceLevelFilters.DIRECTOR, ExperienceLevelFilters.MID_SENIOR],
            )
        )
    ),
]

scraper.run(queries)
spinlud commented 3 years ago

Hi, you are passing two queries to the scraper:

image

shikharvaish28 commented 3 years ago

Thanks a lot @spinlud . It worked :)