add crawl_google_scholar_citation.py (#3879)

* add crawl_google_scholar_citation.py

* pass flack8

* pass isort

* pass isort

* change comment in main

* modify main code

* delete file

* change how to build url

* add a key 'hl' in params dict

* Update crawl_google_scholar_citation.py

* Create crawl_google_results.py

* codespell: Mater

Co-authored-by: Christian Clauss <cclauss@me.com>
This commit is contained in:
boyuuuun 2020-11-13 22:55:23 +09:00 committed by GitHub
parent 9f6188cc40
commit 32def4b3c5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 33 additions and 1 deletions

View File

@ -42,7 +42,7 @@ repos:
hooks:
- id: codespell
args:
- --ignore-words-list=ans,fo,followings,hist,iff,secant,som,tim
- --ignore-words-list=ans,fo,followings,hist,iff,mater,secant,som,tim
- --skip="./.*,./other/dictionary.txt,./other/words,./project_euler/problem_022/p022_names.txt"
- --quiet-level=2
exclude: |

View File

@ -0,0 +1,32 @@
"""
Get the citation from google scholar
using title and year of publication, and volume and pages of journal.
"""
import requests
from bs4 import BeautifulSoup
def get_citation(base_url: str, params: dict) -> str:
"""
Return the citation number.
"""
soup = BeautifulSoup(requests.get(base_url, params=params).content, "html.parser")
div = soup.find("div", attrs={"class": "gs_ri"})
anchors = div.find("div", attrs={"class": "gs_fl"}).find_all("a")
return anchors[2].get_text()
if __name__ == "__main__":
params = {
"title": (
"Precisely geometry controlled microsupercapacitors for ultrahigh areal "
"capacitance, volumetric capacitance, and energy density"
),
"journal": "Chem. Mater.",
"volume": 30,
"pages": "3979-3990",
"year": 2018,
"hl": "en",
}
print(get_citation("http://scholar.google.com/scholar_lookup", params=params))