Fix get amazon product data erroring due to whitespace in headers (#9009)

* updating DIRECTORY.md

* fix(get-amazon-product-data): Remove whitespace in headers

* refactor(get-amazon-product-data): Don't print to_csv

---------

Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
This commit is contained in:
Caeden Perelli-Harris 2023-09-05 05:49:00 +01:00 committed by GitHub
parent 79b043d35c
commit 72f6000365
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -19,11 +19,13 @@ def get_amazon_product_data(product: str = "laptop") -> DataFrame:
""" """
url = f"https://www.amazon.in/laptop/s?k={product}" url = f"https://www.amazon.in/laptop/s?k={product}"
header = { header = {
"User-Agent": """Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "User-Agent": (
(KHTML, like Gecko)Chrome/44.0.2403.157 Safari/537.36""", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
"(KHTML, like Gecko)Chrome/44.0.2403.157 Safari/537.36"
),
"Accept-Language": "en-US, en;q=0.5", "Accept-Language": "en-US, en;q=0.5",
} }
soup = BeautifulSoup(requests.get(url, headers=header).text) soup = BeautifulSoup(requests.get(url, headers=header).text, features="lxml")
# Initialize a Pandas dataframe with the column titles # Initialize a Pandas dataframe with the column titles
data_frame = DataFrame( data_frame = DataFrame(
columns=[ columns=[
@ -74,8 +76,8 @@ def get_amazon_product_data(product: str = "laptop") -> DataFrame:
except ValueError: except ValueError:
discount = float("nan") discount = float("nan")
except AttributeError: except AttributeError:
pass continue
data_frame.loc[len(data_frame.index)] = [ data_frame.loc[str(len(data_frame.index))] = [
product_title, product_title,
product_link, product_link,
product_price, product_price,