mirror of
https://github.com/TheAlgorithms/Python.git
synced 2024-11-23 21:11:08 +00:00
Fix get amazon product data erroring due to whitespace in headers (#9009)
* updating DIRECTORY.md * fix(get-amazon-product-data): Remove whitespace in headers * refactor(get-amazon-product-data): Don't print to_csv --------- Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
This commit is contained in:
parent
79b043d35c
commit
72f6000365
|
@ -19,11 +19,13 @@ def get_amazon_product_data(product: str = "laptop") -> DataFrame:
|
|||
"""
|
||||
url = f"https://www.amazon.in/laptop/s?k={product}"
|
||||
header = {
|
||||
"User-Agent": """Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36
|
||||
(KHTML, like Gecko)Chrome/44.0.2403.157 Safari/537.36""",
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
|
||||
"(KHTML, like Gecko)Chrome/44.0.2403.157 Safari/537.36"
|
||||
),
|
||||
"Accept-Language": "en-US, en;q=0.5",
|
||||
}
|
||||
soup = BeautifulSoup(requests.get(url, headers=header).text)
|
||||
soup = BeautifulSoup(requests.get(url, headers=header).text, features="lxml")
|
||||
# Initialize a Pandas dataframe with the column titles
|
||||
data_frame = DataFrame(
|
||||
columns=[
|
||||
|
@ -74,8 +76,8 @@ def get_amazon_product_data(product: str = "laptop") -> DataFrame:
|
|||
except ValueError:
|
||||
discount = float("nan")
|
||||
except AttributeError:
|
||||
pass
|
||||
data_frame.loc[len(data_frame.index)] = [
|
||||
continue
|
||||
data_frame.loc[str(len(data_frame.index))] = [
|
||||
product_title,
|
||||
product_link,
|
||||
product_price,
|
||||
|
|
Loading…
Reference in New Issue
Block a user