awesome-fastapi-projects/app/source_graph/client.py

124 lines
4.4 KiB
Python
Raw Normal View History

Web App (#25) * Set up the web project dependencies - Add linters, pre-commit, and GitHub Actions - Add a Makefile - Add a pyproject.toml * Fix pyupgrade job - Remove continue_on_error everywhere * Remove old code * Rename a GithubActions job * Change README * Adjust pre-commit and GitHub actions * Add tables and set up alembic * Set up tests * Extend tests * Add coverage config * Adjust the GithubActions workflow * Fix GithubActions workflow * Try fixing pyproject-fmt config * Fix formatting of pyproject.toml * Fix formatting of pyproject.toml * Add coverage report * Test listing the repositories * Add a working prototype of SourceGraph client * Add parsing of the SourceGraph SSE data * Fix tests * Ged rid of packages replaced by ruff * Fix waits in the SourceGraph client * Refactor the models and add a mapper - A new mapper allows to create database repositories from the SourceGraph data * Add mypy * Try fixing mypy action * Remove redundant configs * Exclude tests from type checking * Fix mypy pre-commit and GitHub action * Ignore factories * Make upserting possible for source graph data * Add logic for parsing the dependencies and populating the database * Add a database and a cron GitHub Action job * Try manually trigger a workflow * Bring back the old config * Add ReadTimeout for errors to retry for in SourceGraph client * Add typer * Adjust the docstrings * Update the database * Refactor and optimize scraping and dependencies parsing * Make scraping run on push for now * Add a unique constraint for the repo url and source graph repo id * Change the index columns in on_conflict statement for repo creation * Optimize dependencies parsing - Do not parse dependencies for a repo when revision did not change * Scraped repositories from Source Graph * Refactor scraping * Set up frontend * Scraped repositories from Source Graph * Add TODOs * Skip scraping when testing * Fix a test with updating the repos * Scraped repositories from Source Graph * Add some more TODOs * Scraped repositories from Source Graph * Add some more TODO comments * Add chadcn/ui * Scraped repositories from Source Graph * Create index.json * Scraped repositories from Source Graph * Add a draft of data table and display all the repos * Scraped repositories from Source Graph * Implement stars badges and description with overflow * Format the links to Github repos * Fix link clicking * Scraped repositories from Source Graph * Add simple pagination and stars column sorting * Scraped repositories from Source Graph * Implement basic searching * Scraped repositories from Source Graph * Implement a multiselect for dependencies * Scraped repositories from Source Graph * Implement actual filtering by dependencies * Scraped repositories from Source Graph * Add a workflow to deploy nextjs on github pages * Try fixing the deployment job * Enable static exports for app router * Fix uploading arifacts for nextjs job * Set base path to properly load JS and CSS * Fix the base path * Scraped repositories from Source Graph * Add header * Remove language version * Scraped repositories from Source Graph * Add some more TODOs * Scraped repositories from Source Graph * Adjust the pre-commit config * Fix pipelines * Scraped repositories from Source Graph * Add a footer * Create the indexes * Scraped repositories from Source Graph * Add more TODOs * Introduce minor footer adjustments * Adjust the scraping actions * Scraped repositories from Source Graph, parsed the dependencies, and generated the indexes * Implement query params state * Scraped repositories from Source Graph, parsed the dependencies, and generated the indexes * Do not commit query state on unmount * Scraped repositories from Source Graph, parsed the dependencies, and generated the indexes * Hopefully fix query states and multiselect input * Scraped repositories from Source Graph, parsed the dependencies, and generated the indexes * Extend the Makefile * Resolve most of TODOs * Resolve the conflicts with anyio version, bring back httpx * Adjust the Makefile and README.md * Fix a typo in README.md * Adjust readme * Fix the Makefile * Fix some stuff * Make some adjustments * Possibly fix failing scraping jobs * Load the repo project URL from env --------- Co-authored-by: vladfedoriuk <vladfedoriuk@users.noreply.github.com> Co-authored-by: Vladyslav Fedoriuk <vladyslav.fedoriuk@deployed.pl>
2023-10-28 19:39:02 +00:00
"""The client for the SourceGraph API."""
import asyncio
from collections.abc import AsyncGenerator, Mapping, MutableMapping
from contextlib import asynccontextmanager
from datetime import timedelta
from types import TracebackType
from typing import Any, Final, Self
from urllib.parse import quote
import httpx
import stamina
from httpx_sse import EventSource, ServerSentEvent, aconnect_sse
from loguru import logger
from app.source_graph.models import SourceGraphRepoData, SourceGraphRepoDataListAdapter
#: The URL of the SourceGraph SSE API.
SOURCE_GRAPH_STREAM_API_URL: Final[str] = "https://sourcegraph.com/.api/search/stream"
#: The query parameters for the SourceGraph SSE API.
FASTAPI_REPOS_QUERY_PARAMS: Final[Mapping[str, str]] = {
"q": quote(
" ".join(
[
"repo:has.content(from fastapi import FastApi)",
"type:repo",
"visibility:public",
"archived:no",
"fork:no",
]
)
),
}
class AsyncSourceGraphSSEClient:
"""
A client for the SourceGraph SSE API.
To learn more about the underlying API, see the ``SourceGraph SSE API``
https://docs.sourcegraph.com/api/stream_api#sourcegraph-stream-api
"""
def __init__(self: Self) -> None:
"""Initialize the client."""
self._last_event_id: str | None = None
self._reconnection_delay: float = 0.0
self._aclient: httpx.AsyncClient = httpx.AsyncClient()
async def __aenter__(self: Self) -> Self:
"""Enter the async context manager."""
await self._aclient.__aenter__()
return self
async def __aexit__(
self: Self,
exc_type: type[BaseException] | None = None,
exc_val: BaseException | None = None,
exc_tb: TracebackType | None = None,
) -> None:
"""Exit the async context manager."""
return await self._aclient.__aexit__(exc_type, exc_val, exc_tb)
@asynccontextmanager
async def _aconnect_sse(
self: Self, **kwargs: MutableMapping[str, Any]
) -> AsyncGenerator[EventSource, None]:
"""Connect to the SourceGraph SSE API."""
headers = kwargs.pop("headers", {})
if self._last_event_id is not None:
headers["Last-Event-ID"] = self._last_event_id
async with aconnect_sse(
client=self._aclient,
url=str(SOURCE_GRAPH_STREAM_API_URL),
method="GET",
headers=headers,
**kwargs,
) as event_source:
yield event_source
async def _aiter_sse(
self: Self, **kwargs: MutableMapping[str, Any]
) -> AsyncGenerator[ServerSentEvent, None]:
"""Iterate over the SourceGraph SSE API."""
async with self._aconnect_sse(**kwargs) as event_source:
async for event in event_source.aiter_sse():
yield event
async def _aiter_sse_with_retries(
self: Self, **kwargs: MutableMapping[str, Any]
) -> AsyncGenerator[ServerSentEvent, None]:
"""Iterate over the SourceGraph SSE API with retries."""
async for attempt in stamina.retry_context(
on=(httpx.ReadError, httpx.ReadTimeout)
):
with attempt:
await asyncio.sleep(self._reconnection_delay)
async for event in self._aiter_sse(**kwargs):
self._last_event_id = event.id
if event.retry is not None:
logger.error(
"Received a retry event from the SourceGraph SSE API. "
"Schedule a reconnection in {retry} milliseconds.",
retry=event.retry,
enqueue=True,
)
self._reconnection_delay = timedelta(
milliseconds=event.retry
).total_seconds()
else:
self._reconnection_delay = 0.0
yield event
async def aiter_fastapi_repos(
self: Self,
) -> AsyncGenerator[list[SourceGraphRepoData], None]:
"""Iterate over the SourceGraph SSE API with retries."""
async for event in self._aiter_sse_with_retries(
params=dict(FASTAPI_REPOS_QUERY_PARAMS)
):
if event.event == "matches":
yield SourceGraphRepoDataListAdapter.validate_python(event.json())