2020-08-21 21:58:26 +00:00
|
|
|
"""
|
|
|
|
Scraping jobs given job title and location from indeed website
|
|
|
|
"""
|
2024-03-13 06:52:41 +00:00
|
|
|
|
2020-09-23 11:30:13 +00:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2022-07-11 08:19:52 +00:00
|
|
|
from collections.abc import Generator
|
2020-08-21 21:58:26 +00:00
|
|
|
|
|
|
|
import requests
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
url = "https://www.indeed.co.in/jobs?q=mobile+app+development&l="
|
|
|
|
|
|
|
|
|
2020-09-23 11:30:13 +00:00
|
|
|
def fetch_jobs(location: str = "mumbai") -> Generator[tuple[str, str], None, None]:
|
2024-04-21 17:34:18 +00:00
|
|
|
soup = BeautifulSoup(
|
|
|
|
requests.get(url + location, timeout=10).content, "html.parser"
|
|
|
|
)
|
2020-08-21 21:58:26 +00:00
|
|
|
# This attribute finds out all the specifics listed in a job
|
|
|
|
for job in soup.find_all("div", attrs={"data-tn-component": "organicJob"}):
|
|
|
|
job_title = job.find("a", attrs={"data-tn-element": "jobTitle"}).text.strip()
|
|
|
|
company_name = job.find("span", {"class": "company"}).text.strip()
|
|
|
|
yield job_title, company_name
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
for i, job in enumerate(fetch_jobs("Bangalore"), 1):
|
|
|
|
print(f"Job {i:>2} is {job[0]} at {job[1]}")
|