awesome-fastapi-projects/populate/main.py
Marcelo Trylesinski 3dd21d4d04 Refactor project
2021-11-16 17:44:33 +01:00

65 lines
2.0 KiB
Python

import os
from time import sleep
from github import ContentFile, Github, PaginatedList
from populate.database import SessionLocal, engine
from populate.models import Base, Repository
# from sqlalchemy import create_engine
QUERY = '"from fastapi import" language:python in:file' # size:{from_}..{to}'
MAX_SIZE = 50 * (2 ** 10) # 50 KB
g = Github(os.getenv("GITHUB_TOKEN"))
# TOKEN = os.getenv("GITHUB_TOKEN")
# token_auth = ApiTokenHeader("Authorization", f"token {TOKEN}")
# git = GitHub("https://api.github.com/", auth=token_auth)
# best_split = git.get_best_split()
# out = []
# for key, value in best_split.items():
# out.append({"from": key[0], "to": key[1], "count": value})
# with open("best_split.json", "w") as f:
# json.dump(out, f)
if __name__ == "__main__":
# TODO: Create migrations setup.
Base.metadata.create_all(engine)
# 1. Query projects within a range
interval = 2 ** 9 # 512 bytes
with SessionLocal() as session:
for from_ in range(0, MAX_SIZE, interval):
to = from_ + interval
files = g.search_code(QUERY.format(from_=from_, to=to))
for file in files:
repo = file.repository
repo_obj = Repository.get(session, full_name=repo.full_name)
if repo_obj is not None:
# TODO: Check when last commit was made.
# If it was recent, update dependencies.
continue
repo_obj = Repository(
full_name=repo.full_name,
html_url=repo.html_url,
clone_url=repo.clone_url,
stargazers=repo.stargazers_count,
)
session.add(repo_obj)
session.flush()
# TODO: Check if repository already analyzed.
print(repo.full_name)
sleep(1)
# 2. Clone repositories
# 3. Find import statements
# 4. Store project data into the database
# 5. Push the sqlite file to gcs