diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 01da6cad0..a3288e1c5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,7 +42,7 @@ repos: hooks: - id: codespell args: - - --ignore-words-list=ans,fo,followings,hist,iff,secant,som,tim + - --ignore-words-list=ans,fo,followings,hist,iff,mater,secant,som,tim - --skip="./.*,./other/dictionary.txt,./other/words,./project_euler/problem_022/p022_names.txt" - --quiet-level=2 exclude: | diff --git a/web_programming/crawl_google_scholar_citation.py b/web_programming/crawl_google_scholar_citation.py new file mode 100644 index 000000000..d023380c0 --- /dev/null +++ b/web_programming/crawl_google_scholar_citation.py @@ -0,0 +1,32 @@ +""" +Get the citation from google scholar +using title and year of publication, and volume and pages of journal. +""" + +import requests +from bs4 import BeautifulSoup + + +def get_citation(base_url: str, params: dict) -> str: + """ + Return the citation number. + """ + soup = BeautifulSoup(requests.get(base_url, params=params).content, "html.parser") + div = soup.find("div", attrs={"class": "gs_ri"}) + anchors = div.find("div", attrs={"class": "gs_fl"}).find_all("a") + return anchors[2].get_text() + + +if __name__ == "__main__": + params = { + "title": ( + "Precisely geometry controlled microsupercapacitors for ultrahigh areal " + "capacitance, volumetric capacitance, and energy density" + ), + "journal": "Chem. Mater.", + "volume": 30, + "pages": "3979-3990", + "year": 2018, + "hl": "en", + } + print(get_citation("http://scholar.google.com/scholar_lookup", params=params))