mirror of
https://github.com/Kludex/awesome-fastapi-projects.git
synced 2024-11-23 20:11:07 +00:00
Fix repos scraping and parsing (#28)
This commit is contained in:
parent
0610553651
commit
d4c00793b8
|
@ -1,6 +1,6 @@
|
||||||
# Awesome FastAPI Projects
|
# Awesome FastAPI Projects
|
||||||
|
|
||||||
View the website: https://Kludex.github.io/awesome-fastapi-projects/
|
View the website: https://kludex.github.io/awesome-fastapi-projects/
|
||||||
|
|
||||||
## Local Development
|
## Local Development
|
||||||
|
|
||||||
|
@ -42,7 +42,7 @@ make
|
||||||
#### Frontend
|
#### Frontend
|
||||||
|
|
||||||
The frontend is built with [React](https://reactjs.org/) and [Next.js](https://nextjs.org/).
|
The frontend is built with [React](https://reactjs.org/) and [Next.js](https://nextjs.org/).
|
||||||
It is being statically built and served on GitHub Pages: https://Kludex.github.io/awesome-fastapi-projects/
|
It is being statically built and served on GitHub Pages: https://kludex.github.io/awesome-fastapi-projects/
|
||||||
|
|
||||||
To run the frontend locally, you need to install [Node.js](https://nodejs.org/en/) and [pnpm](https://pnpm.io/).
|
To run the frontend locally, you need to install [Node.js](https://nodejs.org/en/) and [pnpm](https://pnpm.io/).
|
||||||
The node version is specified in the `.node-version` file.
|
The node version is specified in the `.node-version` file.
|
||||||
|
|
|
@ -132,13 +132,12 @@ async def db_uow(
|
||||||
db_session: AsyncSession,
|
db_session: AsyncSession,
|
||||||
) -> AsyncGenerator[AsyncSession, None]:
|
) -> AsyncGenerator[AsyncSession, None]:
|
||||||
"""Provide a transactional scope around a series of operations."""
|
"""Provide a transactional scope around a series of operations."""
|
||||||
|
from app.uow import async_session_uow
|
||||||
|
|
||||||
# This context manager will start a transaction, and roll it back at the end
|
# This context manager will start a transaction, and roll it back at the end
|
||||||
# https://docs.sqlalchemy.org/en/20/orm/extensions/asyncio.html#sqlalchemy.ext.asyncio.AsyncSessionTransaction
|
# https://docs.sqlalchemy.org/en/20/orm/extensions/asyncio.html#sqlalchemy.ext.asyncio.AsyncSessionTransaction
|
||||||
async with db_session.begin() as transaction:
|
async with async_session_uow(db_session) as session:
|
||||||
try:
|
yield session
|
||||||
yield db_session
|
|
||||||
finally:
|
|
||||||
await transaction.rollback()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
|
|
14
app/index.py
14
app/index.py
|
@ -18,7 +18,7 @@ import aiofiles
|
||||||
import sqlalchemy.orm
|
import sqlalchemy.orm
|
||||||
import typer
|
import typer
|
||||||
|
|
||||||
from app.database import Dependency, Repo
|
from app.database import Dependency, Repo, async_session_maker
|
||||||
from app.models import DependencyDetail, RepoDetail
|
from app.models import DependencyDetail, RepoDetail
|
||||||
from app.uow import async_session_uow
|
from app.uow import async_session_uow
|
||||||
|
|
||||||
|
@ -38,9 +38,9 @@ async def create_repos_index() -> None:
|
||||||
|
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
async with async_session_uow() as session, aiofiles.open(
|
async with async_session_maker() as session, async_session_uow(
|
||||||
REPOS_INDEX_PATH, "w"
|
session
|
||||||
) as index_file:
|
), aiofiles.open(REPOS_INDEX_PATH, "w") as index_file:
|
||||||
await index_file.write(
|
await index_file.write(
|
||||||
json.dumps(
|
json.dumps(
|
||||||
{
|
{
|
||||||
|
@ -66,9 +66,9 @@ async def create_dependencies_index() -> None:
|
||||||
|
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
async with async_session_uow() as session, aiofiles.open(
|
async with async_session_maker() as session, async_session_uow(
|
||||||
DEPENDENCIES_INDEX_PATH, "w"
|
session
|
||||||
) as index_file:
|
) as session, aiofiles.open(DEPENDENCIES_INDEX_PATH, "w") as index_file:
|
||||||
dependencies = [
|
dependencies = [
|
||||||
DependencyDetail.model_validate(dependency).model_dump()
|
DependencyDetail.model_validate(dependency).model_dump()
|
||||||
async for dependency in (
|
async for dependency in (
|
||||||
|
|
101
app/scrape.py
101
app/scrape.py
|
@ -6,11 +6,11 @@ import typer
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.database import Dependency, Repo, RepoDependency
|
from app.database import Dependency, Repo, RepoDependency, async_session_maker
|
||||||
from app.dependencies import acquire_dependencies_data_for_repository
|
from app.dependencies import acquire_dependencies_data_for_repository
|
||||||
from app.source_graph.client import AsyncSourceGraphSSEClient
|
from app.source_graph.client import AsyncSourceGraphSSEClient
|
||||||
from app.source_graph.mapper import create_or_update_repos_from_source_graph_repos_data
|
from app.source_graph.mapper import create_or_update_repos_from_source_graph_repos_data
|
||||||
from app.types import RepoId
|
from app.source_graph.models import SourceGraphRepoData
|
||||||
from app.uow import async_session_uow
|
from app.uow import async_session_uow
|
||||||
|
|
||||||
|
|
||||||
|
@ -124,15 +124,48 @@ async def _create_dependencies_for_repo(session: AsyncSession, repo: Repo) -> No
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _save_scraped_repos_from_source_graph_repos_data(
|
||||||
|
source_graph_repos_data: list[SourceGraphRepoData],
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Save the scraped repos from the source graph repos data.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
This function is meant to be used in a task group.
|
||||||
|
From the SQLAlchemy documentation:
|
||||||
|
::
|
||||||
|
https://docs.sqlalchemy.org/en/20/orm/extensions/asyncio.html#using-asyncsession-with-concurrent-tasks
|
||||||
|
|
||||||
|
The AsyncSession object is a mutable, stateful object
|
||||||
|
which represents a single, stateful database
|
||||||
|
transaction in progress. Using concurrent tasks with asyncio,
|
||||||
|
with APIs such as asyncio.gather() for example, should use
|
||||||
|
a separate AsyncSession per individual task.
|
||||||
|
|
||||||
|
|
||||||
|
:param source_graph_repos_data: The source graph repos data.
|
||||||
|
:return: None
|
||||||
|
""" # noqa: E501
|
||||||
|
async with async_session_maker() as session, async_session_uow(session):
|
||||||
|
saved_repos = await create_or_update_repos_from_source_graph_repos_data(
|
||||||
|
session=session,
|
||||||
|
source_graph_repos_data=source_graph_repos_data,
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Saving {count} repos.",
|
||||||
|
count=len(saved_repos),
|
||||||
|
enqueue=True,
|
||||||
|
)
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
|
|
||||||
async def scrape_source_graph_repos() -> None:
|
async def scrape_source_graph_repos() -> None:
|
||||||
"""
|
"""
|
||||||
Iterate over the source graph repos and create or update them in the database.
|
Iterate over the source graph repos and create or update them in the database.
|
||||||
|
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
async with AsyncSourceGraphSSEClient() as sg_client:
|
async with AsyncSourceGraphSSEClient() as sg_client, asyncio.TaskGroup() as tg:
|
||||||
async with async_session_uow() as session:
|
|
||||||
async with asyncio.TaskGroup() as tg:
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Creating or updating repos from source graph repos data.",
|
"Creating or updating repos from source graph repos data.",
|
||||||
enqueue=True,
|
enqueue=True,
|
||||||
|
@ -144,36 +177,40 @@ async def scrape_source_graph_repos() -> None:
|
||||||
enqueue=True,
|
enqueue=True,
|
||||||
)
|
)
|
||||||
tg.create_task(
|
tg.create_task(
|
||||||
create_or_update_repos_from_source_graph_repos_data(
|
_save_scraped_repos_from_source_graph_repos_data(
|
||||||
session=session,
|
source_graph_repos_data=sg_repos_data
|
||||||
source_graph_repos_data=sg_repos_data,
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
await session.commit()
|
|
||||||
|
|
||||||
|
|
||||||
async def parse_dependencies_for_repo(
|
async def parse_dependencies_for_repo(semaphore: asyncio.Semaphore, repo: Repo) -> None:
|
||||||
semaphore: asyncio.Semaphore, repo_id: RepoId
|
|
||||||
) -> None:
|
|
||||||
"""
|
"""
|
||||||
Parse the dependencies for a given repo and create them in the database.
|
Parse the dependencies for a given repo and create them in the database.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
This function is meant to be used in a task group.
|
||||||
|
From the SQLAlchemy documentation:
|
||||||
|
::
|
||||||
|
https://docs.sqlalchemy.org/en/20/orm/extensions/asyncio.html#using-asyncsession-with-concurrent-tasks
|
||||||
|
|
||||||
|
The AsyncSession object is a mutable, stateful object
|
||||||
|
which represents a single, stateful database
|
||||||
|
transaction in progress. Using concurrent tasks with asyncio,
|
||||||
|
with APIs such as asyncio.gather() for example, should use
|
||||||
|
a separate AsyncSession per individual task.
|
||||||
|
|
||||||
|
|
||||||
:param semaphore: A semaphore to limit the number of concurrent requests
|
:param semaphore: A semaphore to limit the number of concurrent requests
|
||||||
:param repo_id: The id of the repo for which to parse the dependencies
|
:param repo: A repo for which to create and assign the dependencies
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
""" # noqa: E501
|
||||||
async with async_session_uow() as session, semaphore:
|
async with semaphore, async_session_maker() as session, async_session_uow(session):
|
||||||
# Fetch the repo from the database
|
# Associate the repo object with a fresh session instance
|
||||||
logger.info(
|
repo = await session.merge(repo)
|
||||||
"Fetching the repo with id {repo_id}.", repo_id=repo_id, enqueue=True
|
|
||||||
)
|
|
||||||
repo = (
|
|
||||||
await session.scalars(sqlalchemy.select(Repo).where(Repo.id == repo_id))
|
|
||||||
).one()
|
|
||||||
# Create the dependencies for the repo
|
# Create the dependencies for the repo
|
||||||
logger.info(
|
logger.info(
|
||||||
"Creating the dependencies for the repo with id {repo_id}.",
|
"Creating the dependencies for the repo with id {repo_id}.",
|
||||||
repo_id=repo_id,
|
repo_id=repo.id,
|
||||||
enqueue=True,
|
enqueue=True,
|
||||||
)
|
)
|
||||||
await _create_dependencies_for_repo(session=session, repo=repo)
|
await _create_dependencies_for_repo(session=session, repo=repo)
|
||||||
|
@ -187,29 +224,25 @@ async def parse_dependencies_for_repos() -> None:
|
||||||
:return: None.
|
:return: None.
|
||||||
"""
|
"""
|
||||||
logger.info("Fetching the repos from the database.", enqueue=True)
|
logger.info("Fetching the repos from the database.", enqueue=True)
|
||||||
async with async_session_uow() as session:
|
async with async_session_maker() as session:
|
||||||
repo_ids = (
|
repos = (
|
||||||
await session.scalars(
|
await session.scalars(
|
||||||
sqlalchemy.select(Repo.id).order_by(
|
sqlalchemy.select(Repo).order_by(
|
||||||
Repo.last_checked_revision.is_(None).desc()
|
Repo.last_checked_revision.is_(None).desc()
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
).all()
|
).all()
|
||||||
logger.info("Fetched {count} repos.", count=len(repo_ids), enqueue=True)
|
logger.info("Fetched {count} repos.", count=len(repos), enqueue=True)
|
||||||
logger.info("Parsing the dependencies for the repos.", enqueue=True)
|
logger.info("Parsing the dependencies for the repos.", enqueue=True)
|
||||||
semaphore = asyncio.Semaphore(10)
|
semaphore = asyncio.Semaphore(10)
|
||||||
async with asyncio.TaskGroup() as tg:
|
async with asyncio.TaskGroup() as tg:
|
||||||
for repo_id in repo_ids:
|
for repo in repos:
|
||||||
logger.info(
|
logger.info(
|
||||||
"Parsing the dependencies for repo {repo_id}.",
|
"Parsing the dependencies for repo {repo_id}.",
|
||||||
repo_id=repo_id,
|
repo_id=repo.id,
|
||||||
enqueue=True,
|
enqueue=True,
|
||||||
)
|
)
|
||||||
tg.create_task(
|
tg.create_task(parse_dependencies_for_repo(semaphore=semaphore, repo=repo))
|
||||||
parse_dependencies_for_repo(
|
|
||||||
semaphore=semaphore, repo_id=RepoId(repo_id)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
app = typer.Typer()
|
app = typer.Typer()
|
||||||
|
|
18
app/uow.py
18
app/uow.py
|
@ -9,19 +9,25 @@ from contextlib import asynccontextmanager
|
||||||
|
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.database import async_session_maker
|
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def async_session_uow() -> AsyncGenerator[AsyncSession, None]:
|
async def async_session_uow(
|
||||||
|
session: AsyncSession,
|
||||||
|
) -> AsyncGenerator[AsyncSession, None]:
|
||||||
"""
|
"""
|
||||||
Provide a transactional scope around a series of operations.
|
Provide a transactional scope around a series of operations.
|
||||||
|
|
||||||
|
:param session: The database session.
|
||||||
:return: a UoW instance
|
:return: a UoW instance
|
||||||
"""
|
"""
|
||||||
async with async_session_maker() as session:
|
async with session.begin():
|
||||||
async with session.begin() as transaction:
|
|
||||||
try:
|
try:
|
||||||
yield session
|
yield session
|
||||||
finally:
|
finally:
|
||||||
await transaction.rollback()
|
if session.in_transaction() and session.is_active:
|
||||||
|
# session.is_active is True if this Session not in “partial rollback”
|
||||||
|
# state. If this Session is within a transaction, and that transaction
|
||||||
|
# has not been rolled back internally, the Session.is_active will also
|
||||||
|
# be True.
|
||||||
|
# https://docs.sqlalchemy.org/en/20/orm/extensions/asyncio.html#sqlalchemy.ext.asyncio.AsyncSession.is_active
|
||||||
|
await session.rollback()
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
NEXT_PUBLIC_PROJECT_REPO_URL="https://github.com/Kludex/awesome-fastapi-projects"
|
NEXT_PUBLIC_PROJECT_REPO_URL="https://github.com/kludex/awesome-fastapi-projects"
|
||||||
|
|
|
@ -10,6 +10,7 @@ import { useVirtual } from "@tanstack/react-virtual";
|
||||||
import { useDependenciesOrama } from "@/lib/search";
|
import { useDependenciesOrama } from "@/lib/search";
|
||||||
import { search } from "@orama/orama";
|
import { search } from "@orama/orama";
|
||||||
import { Dependency } from "@/lib/schemas";
|
import { Dependency } from "@/lib/schemas";
|
||||||
|
import { cn } from "@/lib/utils";
|
||||||
|
|
||||||
export function MultiSelect<DataType extends { id: string; name: string }>({
|
export function MultiSelect<DataType extends { id: string; name: string }>({
|
||||||
data,
|
data,
|
||||||
|
@ -147,7 +148,7 @@ export function MultiSelect<DataType extends { id: string; name: string }>({
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="relative mt-2">
|
<div className={cn("relative", open && "mt-2")}>
|
||||||
{open && selectables.length > 0 ? (
|
{open && selectables.length > 0 ? (
|
||||||
<div className="absolute w-full z-10 top-0 rounded-md border bg-popover text-popover-foreground shadow-md outline-none animate-in">
|
<div className="absolute w-full z-10 top-0 rounded-md border bg-popover text-popover-foreground shadow-md outline-none animate-in">
|
||||||
<CommandGroup
|
<CommandGroup
|
||||||
|
|
Loading…
Reference in New Issue
Block a user