diff --git a/fastapi_projects/__init__.py b/fastapi_projects/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fastapi_projects/__pycache__/clone.cpython-38.pyc b/fastapi_projects/__pycache__/clone.cpython-38.pyc new file mode 100644 index 0000000..23068f0 Binary files /dev/null and b/fastapi_projects/__pycache__/clone.cpython-38.pyc differ diff --git a/fastapi_projects/__pycache__/files.cpython-38.pyc b/fastapi_projects/__pycache__/files.cpython-38.pyc new file mode 100644 index 0000000..48becbf Binary files /dev/null and b/fastapi_projects/__pycache__/files.cpython-38.pyc differ diff --git a/fastapi_projects/__pycache__/find.cpython-38.pyc b/fastapi_projects/__pycache__/find.cpython-38.pyc new file mode 100644 index 0000000..b6d8bfa Binary files /dev/null and b/fastapi_projects/__pycache__/find.cpython-38.pyc differ diff --git a/fastapi_projects/__pycache__/logger.cpython-38.pyc b/fastapi_projects/__pycache__/logger.cpython-38.pyc new file mode 100644 index 0000000..d1c95c0 Binary files /dev/null and b/fastapi_projects/__pycache__/logger.cpython-38.pyc differ diff --git a/fastapi_projects/__pycache__/packages.cpython-38.pyc b/fastapi_projects/__pycache__/packages.cpython-38.pyc new file mode 100644 index 0000000..2fc07bc Binary files /dev/null and b/fastapi_projects/__pycache__/packages.cpython-38.pyc differ diff --git a/fastapi_projects/clone.py b/fastapi_projects/clone.py new file mode 100644 index 0000000..7d1f613 --- /dev/null +++ b/fastapi_projects/clone.py @@ -0,0 +1,10 @@ +from tempfile import TemporaryDirectory + +from git.repo.base import Repo +from github.Repository import Repository + + +def clone_repository(repository: Repository) -> TemporaryDirectory: + dir = TemporaryDirectory() + Repo.clone_from(repository.clone_url, dir.name) + return dir diff --git a/fastapi_projects/database.py b/fastapi_projects/database.py new file mode 100644 index 0000000..a13a8fc --- /dev/null +++ b/fastapi_projects/database.py @@ -0,0 +1,16 @@ +from sqlalchemy import create_engine +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker + +engine = create_engine("sqlite://database.db") +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) +Base = declarative_base() + + +class SessionManager: + def __enter__(self): + self.session = SessionLocal() + return self.session + + def __exit__(self, *exc_data): + self.session.close() diff --git a/fastapi_projects/files.py b/fastapi_projects/files.py new file mode 100644 index 0000000..3a8f1e1 --- /dev/null +++ b/fastapi_projects/files.py @@ -0,0 +1,10 @@ +import os +from tempfile import TemporaryDirectory +from typing import Generator, TextIO + + +def get_python_files(dir: TemporaryDirectory) -> Generator[TextIO, None, None]: + for dirpath, _, filenames in os.walk(dir.name): + for filename in filenames: + if filename.endswith(".py"): + yield open(os.sep.join([dirpath, filename]), "r") diff --git a/fastapi_projects/find.py b/fastapi_projects/find.py new file mode 100644 index 0000000..77f0080 --- /dev/null +++ b/fastapi_projects/find.py @@ -0,0 +1,50 @@ +import calendar +import os +import time +from typing import Generator + +from github import Github, RateLimitExceededException +from github.Repository import Repository + +from fastapi_projects.logger import logger + +access_token = os.getenv("ACCESS_TOKEN_GITHUB") +g = Github(access_token) + +MAX_FILE_SIZE = 384_000 + + +def find_repositories(interval: int = 60) -> Generator[Repository, None, None]: + min_value = 0 + + while min_value < MAX_FILE_SIZE: + size = f"{min_value}..{min_value + interval - 1}" + snippets = g.search_code("fastapi", language="Python", size=size) + count = 0 + + snippets = iter(snippets) + while True: + try: + snippet = next(snippets) + logger.info(snippet.repository.full_name) + logger.info(snippet.repository.clone_url) + yield snippet.repository + count += 1 + except StopIteration: + break + except RateLimitExceededException: + rate_limit = g.get_rate_limit() + search_rate_limit = rate_limit.search + core_rate_limit = rate_limit.core + if search_rate_limit.remaining == 0: + logger.debug(f"search remaining: {search_rate_limit.remaining}") + reset_time = calendar.timegm(search_rate_limit.reset.timetuple()) + else: + logger.debug(f"core remaining: {core_rate_limit.remaining}") + reset_time = calendar.timegm(core_rate_limit.reset.timetuple()) + # add 10 seconds to be sure the rate limit has been reset + sleep_time = reset_time - calendar.timegm(time.gmtime()) + 10 + time.sleep(sleep_time) + min_value += interval + + logger.info("Found '%d' snippets.", count) diff --git a/fastapi_projects/flow.py b/fastapi_projects/flow.py new file mode 100644 index 0000000..aa6b4aa --- /dev/null +++ b/fastapi_projects/flow.py @@ -0,0 +1,13 @@ +from fastapi_projects.clone import clone_repository +from fastapi_projects.files import get_python_files +from fastapi_projects.find import find_repositories +from fastapi_projects.packages import get_packages + +if __name__ == "__main__": + for repository in find_repositories(): + dir = clone_repository(repository) + for file in get_python_files(dir): + for package in get_packages(file): + print(package) + file.close() + dir.cleanup() diff --git a/fastapi_projects/logger.py b/fastapi_projects/logger.py new file mode 100644 index 0000000..9c5d92e --- /dev/null +++ b/fastapi_projects/logger.py @@ -0,0 +1,3 @@ +import logging + +logger = logging.getLogger("fastapi-projects") diff --git a/fastapi_projects/packages.py b/fastapi_projects/packages.py new file mode 100644 index 0000000..ec89958 --- /dev/null +++ b/fastapi_projects/packages.py @@ -0,0 +1,8 @@ +from typing import TextIO + + +def get_packages(file: TextIO): + for line in file.readlines(): + clean_line = line.strip() + if clean_line.startswith(("from", "import")): + yield clean_line.replace(".", " ").split()[1] diff --git a/generate_table.py b/generate_table.py index 42aff73..049d38f 100644 --- a/generate_table.py +++ b/generate_table.py @@ -1,5 +1,6 @@ -from typing import List import json +from typing import List + from pytablewriter import MarkdownTableWriter from stdlib_list import stdlib_list @@ -24,6 +25,7 @@ def format_with_link(project: str) -> str: if project in link: return f"[{project}]({link})" + with open("results.json") as json_file: data = json.load(json_file) writer = MarkdownTableWriter() diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..f41588b --- /dev/null +++ b/poetry.lock @@ -0,0 +1,8 @@ +package = [] + +[metadata] +lock-version = "1.1" +python-versions = "^3.8" +content-hash = "fafb334cb038533f851c23d0b63254223abf72ce4f02987e7064b0c95566699a" + +[metadata.files] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..dcdc637 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,15 @@ +[tool.poetry] +name = "fastapi-projects" +version = "0.1.0" +description = "" +authors = ["Marcelo Trylesinski "] +license = "MIT" + +[tool.poetry.dependencies] +python = "^3.8" + +[tool.poetry.dev-dependencies] + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api"