awesome-fastapi-projects/fastapi_projects/__main__.py

113 lines
3.3 KiB
Python
Raw Normal View History

2020-11-20 23:44:10 +00:00
import json
2020-11-20 03:39:14 +00:00
import logging
import os
import re
import shutil
2020-11-20 23:44:10 +00:00
from datetime import datetime
from typing import List
2020-11-20 03:39:14 +00:00
from git import Git
from git.exc import GitCommandError
from github import Github
from github.Repository import Repository
logging.basicConfig(level=logging.INFO)
# Github
github_access_token = os.getenv("ACCESS_TOKEN_GITHUB")
g = Github(github_access_token)
MAX_SIZE = 100 * 1000 # 100 MB
# Directory
dir = os.getcwd()
clone_dir = os.path.join(dir, "tmp")
2020-11-20 23:44:10 +00:00
data_file = os.path.join(dir, "results.json")
2020-11-20 03:39:14 +00:00
INVALID_FOLDERS = ("site-packages", "venv")
# Functions
def clone(repository: Repository):
try:
clone_url = repository.clone_url
Git(clone_dir).clone(clone_url)
except GitCommandError:
pass
2020-11-20 23:44:10 +00:00
def get_packages_from_file(path: str) -> List[str]:
2020-11-20 03:39:14 +00:00
packages = set()
logging.info("Reading file '%s'.", path)
try:
with open(path, "r") as file:
for line in file.readlines():
result = re.search(r"from (\w+)[\.\w+]*|:[ ]*import (\w*)\n", line)
if result:
if result.group(1):
packages.add(result.group(1))
if result.group(2):
packages.add(result.group(2))
except FileNotFoundError:
logging.info("File not found '%s'.", path)
except UnicodeDecodeError:
logging.info("Invalid character on file '%s'.", path)
2020-11-20 23:44:10 +00:00
return list(packages)
2020-11-20 03:39:14 +00:00
def extract_data(repository: Repository) -> dict:
data = {}
for (root, _, files) in os.walk(os.path.join(clone_dir, repository.name)):
for file in files:
path = os.path.join(root, file)
if file.endswith(".py") and all(
folder not in path for folder in INVALID_FOLDERS
):
data["packages"] = get_packages_from_file(path)
return data
def run():
2020-11-20 23:44:10 +00:00
with open(data_file) as json_file:
data = json.load(json_file)
2020-11-20 03:39:14 +00:00
snippets = g.search_code('l=Python&q="from+fastapi+import+FastAPI"&type=Code')
2020-11-20 23:44:10 +00:00
found = len(snippets)
logging.info("Found '%d' snippets.", found)
for i, snippet in enumerate(snippets):
2020-11-20 03:39:14 +00:00
repository = snippet.repository
name = repository.name
2020-11-20 23:44:10 +00:00
owner = repository.owner
logging.info("Got repository '%s' (%d / %d).", name, i + 1, found)
if repository.id in data:
commits = repository.get_commits()
last_commit_date = [commit.commit.author.date for commit in commits][0]
if (datetime.today() - last_commit_date).days > 7:
logging.info("Repository '%s' already stored.", name)
2020-11-20 03:39:14 +00:00
continue
if repository.size > MAX_SIZE:
2020-11-20 23:44:10 +00:00
logging.info("Repository size is '%d' MB. (SKIP)", repository.size // 1000)
2020-11-20 03:39:14 +00:00
continue
logging.info("Cloning repository '%s'.", name)
clone(repository)
logging.info("Extracting data from '%s'.", name)
2020-11-20 23:44:10 +00:00
extracted_data = extract_data(repository)
data[repository.id] = {"name": name, "owner": owner, **extracted_data}
2020-11-20 03:39:14 +00:00
logging.info("Removing repository '%s'.", name)
shutil.rmtree(os.path.join(clone_dir, name))
2020-11-20 23:44:10 +00:00
logging.info("Writing on file!")
with open(os.path.join(dir, "results.json"), "w") as json_file:
json.dump(data, json_file)
2020-11-20 03:39:14 +00:00
# Run!
run()