Fix get amazon product data erroring due to whitespace in headers (#9009)

* updating DIRECTORY.md

* fix(get-amazon-product-data): Remove whitespace in headers

* refactor(get-amazon-product-data): Don't print to_csv

---------

Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
This commit is contained in:
Caeden Perelli-Harris 2023-09-05 05:49:00 +01:00 committed by GitHub
parent 79b043d35c
commit 72f6000365
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -19,11 +19,13 @@ def get_amazon_product_data(product: str = "laptop") -> DataFrame:
"""
url = f"https://www.amazon.in/laptop/s?k={product}"
header = {
"User-Agent": """Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36
(KHTML, like Gecko)Chrome/44.0.2403.157 Safari/537.36""",
"User-Agent": (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
"(KHTML, like Gecko)Chrome/44.0.2403.157 Safari/537.36"
),
"Accept-Language": "en-US, en;q=0.5",
}
soup = BeautifulSoup(requests.get(url, headers=header).text)
soup = BeautifulSoup(requests.get(url, headers=header).text, features="lxml")
# Initialize a Pandas dataframe with the column titles
data_frame = DataFrame(
columns=[
@ -74,8 +76,8 @@ def get_amazon_product_data(product: str = "laptop") -> DataFrame:
except ValueError:
discount = float("nan")
except AttributeError:
pass
data_frame.loc[len(data_frame.index)] = [
continue
data_frame.loc[str(len(data_frame.index))] = [
product_title,
product_link,
product_price,