Python/web_programming/crawl_google_scholar_citation.py
Christian Clauss a2fa32c7ad
Lukazlim: Replace dependency requests with httpx (#12744)
* Replace dependency `requests` with `httpx`

Fixes #12742
Signed-off-by: Lim, Lukaz Wei Hwang <lukaz.wei.hwang.lim@intel.com>

* updating DIRECTORY.md

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Lim, Lukaz Wei Hwang <lukaz.wei.hwang.lim@intel.com>
Co-authored-by: Lim, Lukaz Wei Hwang <lukaz.wei.hwang.lim@intel.com>
Co-authored-by: cclauss <cclauss@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-05-14 04:42:11 +03:00

43 lines
1.1 KiB
Python

"""
Get the citation from google scholar
using title and year of publication, and volume and pages of journal.
"""
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "beautifulsoup4",
# "httpx",
# ]
# ///
import httpx
from bs4 import BeautifulSoup
def get_citation(base_url: str, params: dict) -> str:
"""
Return the citation number.
"""
soup = BeautifulSoup(
httpx.get(base_url, params=params, timeout=10).content, "html.parser"
)
div = soup.find("div", attrs={"class": "gs_ri"})
anchors = div.find("div", attrs={"class": "gs_fl"}).find_all("a")
return anchors[2].get_text()
if __name__ == "__main__":
params = {
"title": (
"Precisely geometry controlled microsupercapacitors for ultrahigh areal "
"capacitance, volumetric capacitance, and energy density"
),
"journal": "Chem. Mater.",
"volume": 30,
"pages": "3979-3990",
"year": 2018,
"hl": "en",
}
print(get_citation("https://scholar.google.com/scholar_lookup", params=params))