mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-02-17 06:48:09 +00:00
Fix get amazon product data erroring due to whitespace in headers (#9009)
* updating DIRECTORY.md * fix(get-amazon-product-data): Remove whitespace in headers * refactor(get-amazon-product-data): Don't print to_csv --------- Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
This commit is contained in:
parent
79b043d35c
commit
72f6000365
|
@ -19,11 +19,13 @@ def get_amazon_product_data(product: str = "laptop") -> DataFrame:
|
||||||
"""
|
"""
|
||||||
url = f"https://www.amazon.in/laptop/s?k={product}"
|
url = f"https://www.amazon.in/laptop/s?k={product}"
|
||||||
header = {
|
header = {
|
||||||
"User-Agent": """Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36
|
"User-Agent": (
|
||||||
(KHTML, like Gecko)Chrome/44.0.2403.157 Safari/537.36""",
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
|
||||||
|
"(KHTML, like Gecko)Chrome/44.0.2403.157 Safari/537.36"
|
||||||
|
),
|
||||||
"Accept-Language": "en-US, en;q=0.5",
|
"Accept-Language": "en-US, en;q=0.5",
|
||||||
}
|
}
|
||||||
soup = BeautifulSoup(requests.get(url, headers=header).text)
|
soup = BeautifulSoup(requests.get(url, headers=header).text, features="lxml")
|
||||||
# Initialize a Pandas dataframe with the column titles
|
# Initialize a Pandas dataframe with the column titles
|
||||||
data_frame = DataFrame(
|
data_frame = DataFrame(
|
||||||
columns=[
|
columns=[
|
||||||
|
@ -74,8 +76,8 @@ def get_amazon_product_data(product: str = "laptop") -> DataFrame:
|
||||||
except ValueError:
|
except ValueError:
|
||||||
discount = float("nan")
|
discount = float("nan")
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
pass
|
continue
|
||||||
data_frame.loc[len(data_frame.index)] = [
|
data_frame.loc[str(len(data_frame.index))] = [
|
||||||
product_title,
|
product_title,
|
||||||
product_link,
|
product_link,
|
||||||
product_price,
|
product_price,
|
||||||
|
|
Loading…
Reference in New Issue
Block a user