awesome-fastapi-projects/app/dependencies.py
Vladyslav Fedoriuk 2fdd348a15
Web App (#25)
* Set up the web project dependencies

- Add linters, pre-commit, and GitHub Actions
- Add a Makefile
- Add a pyproject.toml

* Fix pyupgrade job

- Remove continue_on_error everywhere

* Remove old code

* Rename a GithubActions job

* Change README

* Adjust pre-commit and GitHub actions

* Add tables and set up alembic

* Set up tests

* Extend tests

* Add coverage config

* Adjust the GithubActions workflow

* Fix GithubActions workflow

* Try fixing pyproject-fmt config

* Fix formatting of pyproject.toml

* Fix formatting of pyproject.toml

* Add coverage report

* Test listing the repositories

* Add a working prototype of SourceGraph client

* Add parsing of the SourceGraph SSE data

* Fix tests

* Ged rid of packages replaced by ruff

* Fix waits in the SourceGraph client

* Refactor the models and add a mapper

- A new mapper allows to create database repositories from the SourceGraph data

* Add mypy

* Try fixing mypy action

* Remove redundant configs

* Exclude tests from type checking

* Fix mypy pre-commit and GitHub action

* Ignore factories

* Make upserting possible for source graph data

* Add logic for parsing the dependencies and populating the database

* Add a database and a cron GitHub Action job

* Try manually trigger a workflow

* Bring back the old config

* Add ReadTimeout for errors to retry for in SourceGraph client

* Add typer

* Adjust the docstrings

* Update the database

* Refactor and optimize scraping and dependencies parsing

* Make scraping run on push for now

* Add a unique constraint for the repo url and source graph repo id

* Change the index columns in on_conflict statement for repo creation

* Optimize dependencies parsing

- Do not parse dependencies for a repo  when revision did not change

* Scraped repositories from Source Graph

* Refactor scraping

* Set up frontend

* Scraped repositories from Source Graph

* Add TODOs

* Skip scraping when testing

* Fix a test with updating the repos

* Scraped repositories from Source Graph

* Add some more TODOs

* Scraped repositories from Source Graph

* Add some more TODO comments

* Add chadcn/ui

* Scraped repositories from Source Graph

* Create index.json

* Scraped repositories from Source Graph

* Add a draft of data table and display all the repos

* Scraped repositories from Source Graph

* Implement stars badges and description with overflow

* Format the links to Github repos

* Fix link clicking

* Scraped repositories from Source Graph

* Add simple pagination and stars column sorting

* Scraped repositories from Source Graph

* Implement basic searching

* Scraped repositories from Source Graph

* Implement a multiselect for dependencies

* Scraped repositories from Source Graph

* Implement actual filtering by dependencies

* Scraped repositories from Source Graph

* Add a workflow to deploy nextjs on github pages

* Try fixing the deployment job

* Enable static exports for app router

* Fix uploading arifacts for nextjs job

* Set base path to properly load JS and CSS

* Fix the base path

* Scraped repositories from Source Graph

* Add header

* Remove language version

* Scraped repositories from Source Graph

* Add some more TODOs

* Scraped repositories from Source Graph

* Adjust the pre-commit config

* Fix pipelines

* Scraped repositories from Source Graph

* Add a footer

* Create the indexes

* Scraped repositories from Source Graph

* Add more TODOs

* Introduce minor footer adjustments

* Adjust the scraping actions

* Scraped repositories from Source Graph, parsed the dependencies, and generated the indexes

* Implement query params state

* Scraped repositories from Source Graph, parsed the dependencies, and generated the indexes

* Do not commit query state on unmount

* Scraped repositories from Source Graph, parsed the dependencies, and generated the indexes

* Hopefully fix query states and multiselect input

* Scraped repositories from Source Graph, parsed the dependencies, and generated the indexes

* Extend the Makefile

* Resolve most of TODOs

* Resolve the conflicts with anyio version, bring back httpx

* Adjust the Makefile and README.md

* Fix a typo in README.md

* Adjust readme

* Fix the Makefile

* Fix some stuff

* Make some adjustments

* Possibly fix failing scraping jobs

* Load the repo project URL from env

---------

Co-authored-by: vladfedoriuk <vladfedoriuk@users.noreply.github.com>
Co-authored-by: Vladyslav Fedoriuk <vladyslav.fedoriuk@deployed.pl>
2023-10-28 21:39:02 +02:00

150 lines
4.6 KiB
Python

"""Dependencies parsing."""
import asyncio
import subprocess
from collections.abc import Sequence
import aiofiles.tempfile
import stamina
from loguru import logger
from app.database import Repo
from app.models import DependencyCreateData
from app.types import RevisionHash
async def run_command(*cmd: str, cwd: str | None = None) -> str:
"""
Run the given command in a subprocess and return the stdout as plain text.
:param cmd: The command to run.
:param cwd: The working directory to run the command in.
:return: The stdout result
"""
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=cwd,
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
raise RuntimeError(
f"Command '{cmd}' failed with exit code '{process.returncode}':\n"
f"[stdout]: '{stdout.decode()}'\n"
f"[stderr]: '{stderr.decode()}'"
)
return stdout.decode()
async def acquire_dependencies_data_for_repository(
repo: Repo,
) -> tuple[RevisionHash, list[DependencyCreateData]]:
"""
Acquire dependencies for the given repository.
The function will use the "third-party-imports" tool to
parse the third-party dependencies of the repository.
Since this tool has been written in Rust and is basically
a CLI tool, the parsing will happen is a subprocess.
:param repo: A repository for which to return the dependencies.
:return: The dependencies data required to create the dependencies in the DB.
"""
logger.info(
"Acquiring the dependencies data for the repo with id {repo_id}.",
repo_id=repo.id,
enqueue=True,
)
async with aiofiles.tempfile.TemporaryDirectory() as directory:
# Clone the repository
logger.info(
"Cloning the repo with id {repo_id} into the directory {directory}.",
repo_id=repo.id,
directory=directory,
enqueue=True,
)
await run_command(
"git",
"clone",
"--depth",
"1",
repo.url,
directory,
)
# Get the latest commit hash
logger.info(
"Getting the latest commit hash for the repo with id {repo_id}.",
repo_id=repo.id,
enqueue=True,
)
revision: str = await run_command(
"git",
"rev-parse",
"HEAD",
cwd=directory,
)
if repo.last_checked_revision == revision:
# Assume there are no new dependencies to return
# since all the repo dependencies have already
# been parsed.
logger.info(
"The repo with id {repo_id} has already been updated.",
repo_id=repo.id,
enqueue=True,
)
return RevisionHash(revision), []
# Parse the dependencies
async for attempt in stamina.retry_context(on=RuntimeError, attempts=3):
with attempt:
logger.info(
"Parsing the dependencies for the repo with id {repo_id}.",
repo_id=repo.id,
enqueue=True,
)
dependencies: str = await run_command(
"third-party-imports",
directory,
)
if dependencies:
logger.info(
"Successfully parsed the dependencies for the repo with id {repo_id}.",
repo_id=repo.id,
enqueue=True,
)
# Split the dependencies by new line
dependencies_list: Sequence[str] = dependencies.split("\n")
# Drop the first two lines (the info lines)
dependencies_list = (
dependencies_list[2:] if len(dependencies_list) > 2 else []
)
logger.info(
"Found {count} dependencies for the repo with id {repo_id}.",
count=len(dependencies_list),
repo_id=repo.id,
enqueue=True,
)
else:
logger.info(
"No dependencies found for the repo with id {repo_id}.",
repo_id=repo.id,
enqueue=True,
)
dependencies_list = []
return (
RevisionHash(revision),
[
DependencyCreateData(
name=dependency.strip(),
)
for dependency in dependencies_list
if dependency.strip()
],
)