mirror of
https://github.com/TheAlgorithms/Python.git
synced 2024-11-30 16:31:08 +00:00
add crawl_google_scholar_citation.py (#3879)
* add crawl_google_scholar_citation.py * pass flack8 * pass isort * pass isort * change comment in main * modify main code * delete file * change how to build url * add a key 'hl' in params dict * Update crawl_google_scholar_citation.py * Create crawl_google_results.py * codespell: Mater Co-authored-by: Christian Clauss <cclauss@me.com>
This commit is contained in:
parent
9f6188cc40
commit
32def4b3c5
|
@ -42,7 +42,7 @@ repos:
|
|||
hooks:
|
||||
- id: codespell
|
||||
args:
|
||||
- --ignore-words-list=ans,fo,followings,hist,iff,secant,som,tim
|
||||
- --ignore-words-list=ans,fo,followings,hist,iff,mater,secant,som,tim
|
||||
- --skip="./.*,./other/dictionary.txt,./other/words,./project_euler/problem_022/p022_names.txt"
|
||||
- --quiet-level=2
|
||||
exclude: |
|
||||
|
|
32
web_programming/crawl_google_scholar_citation.py
Normal file
32
web_programming/crawl_google_scholar_citation.py
Normal file
|
@ -0,0 +1,32 @@
|
|||
"""
|
||||
Get the citation from google scholar
|
||||
using title and year of publication, and volume and pages of journal.
|
||||
"""
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def get_citation(base_url: str, params: dict) -> str:
|
||||
"""
|
||||
Return the citation number.
|
||||
"""
|
||||
soup = BeautifulSoup(requests.get(base_url, params=params).content, "html.parser")
|
||||
div = soup.find("div", attrs={"class": "gs_ri"})
|
||||
anchors = div.find("div", attrs={"class": "gs_fl"}).find_all("a")
|
||||
return anchors[2].get_text()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
params = {
|
||||
"title": (
|
||||
"Precisely geometry controlled microsupercapacitors for ultrahigh areal "
|
||||
"capacitance, volumetric capacitance, and energy density"
|
||||
),
|
||||
"journal": "Chem. Mater.",
|
||||
"volume": 30,
|
||||
"pages": "3979-3990",
|
||||
"year": 2018,
|
||||
"hl": "en",
|
||||
}
|
||||
print(get_citation("http://scholar.google.com/scholar_lookup", params=params))
|
Loading…
Reference in New Issue
Block a user