Open DboukAli98 opened 2 years ago
You should be able to just write the file out as csv or json using python's native methods
Hi, actually no. You have to write your own code to export result to csv.
Hello,
This is how I did it.
import logging
import csv
from linkedin_jobs_scraper import LinkedinScraper
from linkedin_jobs_scraper.events import Events, EventData, EventMetrics
from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, TypeFilters, ExperienceLevelFilters, RemoteFilters
# Change root logger level (default is WARN)
logging.basicConfig(level = logging.INFO)
job_data = []
# Fired once for each successfully processed job
def on_data(data: EventData):
job_data.append([data.title, data.company])
# Fired once for each page (25 jobs)
def on_metrics(metrics: EventMetrics):
print('[ON_METRICS]', str(metrics))
def on_error(error):
print('[ON_ERROR]', error)
def on_end():
print('[ON_END]')
scraper = LinkedinScraper(
chrome_executable_path='/Users/voi/chromedriver', # Custom Chrome executable path (e.g. /foo/bar/bin/chromedriver)
chrome_options=None, # Custom Chrome options here
headless=True, # Overrides headless mode only if chrome_options is None
max_workers=2, # How many threads will be spawned to run queries concurrently (one Chrome driver for each thread)
slow_mo=3, # Slow down the scraper to avoid 'Too many requests 429' errors (in seconds)
page_load_timeout=25 # Page load timeout (in seconds)
)
# Add event listeners
scraper.on(Events.DATA, on_data)
scraper.on(Events.ERROR, on_error)
scraper.on(Events.END, on_end)
queries = [
Query(
options=QueryOptions(
optimize = True,
limit=2000 # Limit the number of jobs to scrape.
)
),
Query(
query='',
options=QueryOptions(
locations=['United States', 'California', 'Texas','New York', 'Michigan'],
apply_link = False, # Try to extract apply link (easy applies are skipped). Default to False.
limit=500,
filters=QueryFilters(
relevance=RelevanceFilters.RECENT,
time=TimeFilters.ANY,
type=[TypeFilters.FULL_TIME, TypeFilters.INTERNSHIP, TypeFilters.PART_TIME],
experience=None,
)
)
),
]
scraper.run(queries)
fields = ['Job', 'Company']
rows = []
for x in job_data:
i = 0
rows.append([x[0], x[1]])
i = i + 1
with open('GFG', 'w') as f:
write = csv.writer(f)
write.writerow(fields)
write.writerows(rows)
I changed the on_data method to just save the job title and company name (as a list) to the job_data list.
Then, at the very end, I wrote in some code that saved the job_data information to a csv file.
Keep in mind that this will only work if you run the script to completion; if you stop it, or it crashes, it won't save the data to a csv file.
please did you have made a feature to save the scraped in to some file csv or txt ?