Web App (#25)
* Set up the web project dependencies
- Add linters, pre-commit, and GitHub Actions
- Add a Makefile
- Add a pyproject.toml
* Fix pyupgrade job
- Remove continue_on_error everywhere
* Remove old code
* Rename a GithubActions job
* Change README
* Adjust pre-commit and GitHub actions
* Add tables and set up alembic
* Set up tests
* Extend tests
* Add coverage config
* Adjust the GithubActions workflow
* Fix GithubActions workflow
* Try fixing pyproject-fmt config
* Fix formatting of pyproject.toml
* Fix formatting of pyproject.toml
* Add coverage report
* Test listing the repositories
* Add a working prototype of SourceGraph client
* Add parsing of the SourceGraph SSE data
* Fix tests
* Ged rid of packages replaced by ruff
* Fix waits in the SourceGraph client
* Refactor the models and add a mapper
- A new mapper allows to create database repositories from the SourceGraph data
* Add mypy
* Try fixing mypy action
* Remove redundant configs
* Exclude tests from type checking
* Fix mypy pre-commit and GitHub action
* Ignore factories
* Make upserting possible for source graph data
* Add logic for parsing the dependencies and populating the database
* Add a database and a cron GitHub Action job
* Try manually trigger a workflow
* Bring back the old config
* Add ReadTimeout for errors to retry for in SourceGraph client
* Add typer
* Adjust the docstrings
* Update the database
* Refactor and optimize scraping and dependencies parsing
* Make scraping run on push for now
* Add a unique constraint for the repo url and source graph repo id
* Change the index columns in on_conflict statement for repo creation
* Optimize dependencies parsing
- Do not parse dependencies for a repo when revision did not change
* Scraped repositories from Source Graph
* Refactor scraping
* Set up frontend
* Scraped repositories from Source Graph
* Add TODOs
* Skip scraping when testing
* Fix a test with updating the repos
* Scraped repositories from Source Graph
* Add some more TODOs
* Scraped repositories from Source Graph
* Add some more TODO comments
* Add chadcn/ui
* Scraped repositories from Source Graph
* Create index.json
* Scraped repositories from Source Graph
* Add a draft of data table and display all the repos
* Scraped repositories from Source Graph
* Implement stars badges and description with overflow
* Format the links to Github repos
* Fix link clicking
* Scraped repositories from Source Graph
* Add simple pagination and stars column sorting
* Scraped repositories from Source Graph
* Implement basic searching
* Scraped repositories from Source Graph
* Implement a multiselect for dependencies
* Scraped repositories from Source Graph
* Implement actual filtering by dependencies
* Scraped repositories from Source Graph
* Add a workflow to deploy nextjs on github pages
* Try fixing the deployment job
* Enable static exports for app router
* Fix uploading arifacts for nextjs job
* Set base path to properly load JS and CSS
* Fix the base path
* Scraped repositories from Source Graph
* Add header
* Remove language version
* Scraped repositories from Source Graph
* Add some more TODOs
* Scraped repositories from Source Graph
* Adjust the pre-commit config
* Fix pipelines
* Scraped repositories from Source Graph
* Add a footer
* Create the indexes
* Scraped repositories from Source Graph
* Add more TODOs
* Introduce minor footer adjustments
* Adjust the scraping actions
* Scraped repositories from Source Graph, parsed the dependencies, and generated the indexes
* Implement query params state
* Scraped repositories from Source Graph, parsed the dependencies, and generated the indexes
* Do not commit query state on unmount
* Scraped repositories from Source Graph, parsed the dependencies, and generated the indexes
* Hopefully fix query states and multiselect input
* Scraped repositories from Source Graph, parsed the dependencies, and generated the indexes
* Extend the Makefile
* Resolve most of TODOs
* Resolve the conflicts with anyio version, bring back httpx
* Adjust the Makefile and README.md
* Fix a typo in README.md
* Adjust readme
* Fix the Makefile
* Fix some stuff
* Make some adjustments
* Possibly fix failing scraping jobs
* Load the repo project URL from env
---------
Co-authored-by: vladfedoriuk <vladfedoriuk@users.noreply.github.com>
Co-authored-by: Vladyslav Fedoriuk <vladyslav.fedoriuk@deployed.pl>
2023-10-28 19:39:02 +00:00
|
|
|
"""The client for the SourceGraph API."""
|
|
|
|
import asyncio
|
|
|
|
from collections.abc import AsyncGenerator, Mapping, MutableMapping
|
|
|
|
from contextlib import asynccontextmanager
|
|
|
|
from datetime import timedelta
|
|
|
|
from types import TracebackType
|
|
|
|
from typing import Any, Final, Self
|
|
|
|
from urllib.parse import quote
|
|
|
|
|
|
|
|
import httpx
|
|
|
|
import stamina
|
|
|
|
from httpx_sse import EventSource, ServerSentEvent, aconnect_sse
|
|
|
|
from loguru import logger
|
|
|
|
|
|
|
|
from app.source_graph.models import SourceGraphRepoData, SourceGraphRepoDataListAdapter
|
|
|
|
|
|
|
|
#: The URL of the SourceGraph SSE API.
|
|
|
|
SOURCE_GRAPH_STREAM_API_URL: Final[str] = "https://sourcegraph.com/.api/search/stream"
|
|
|
|
|
|
|
|
|
|
|
|
#: The query parameters for the SourceGraph SSE API.
|
|
|
|
FASTAPI_REPOS_QUERY_PARAMS: Final[Mapping[str, str]] = {
|
|
|
|
"q": quote(
|
|
|
|
" ".join(
|
|
|
|
[
|
|
|
|
"repo:has.content(from fastapi import FastApi)",
|
|
|
|
"type:repo",
|
|
|
|
"visibility:public",
|
|
|
|
"archived:no",
|
|
|
|
"fork:no",
|
|
|
|
]
|
|
|
|
)
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
class AsyncSourceGraphSSEClient:
|
|
|
|
"""
|
|
|
|
A client for the SourceGraph SSE API.
|
|
|
|
|
|
|
|
To learn more about the underlying API, see the ``SourceGraph SSE API``
|
|
|
|
https://docs.sourcegraph.com/api/stream_api#sourcegraph-stream-api
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self: Self) -> None:
|
|
|
|
"""Initialize the client."""
|
|
|
|
self._last_event_id: str | None = None
|
|
|
|
self._reconnection_delay: float = 0.0
|
|
|
|
self._aclient: httpx.AsyncClient = httpx.AsyncClient()
|
|
|
|
|
|
|
|
async def __aenter__(self: Self) -> Self:
|
|
|
|
"""Enter the async context manager."""
|
|
|
|
await self._aclient.__aenter__()
|
|
|
|
return self
|
|
|
|
|
|
|
|
async def __aexit__(
|
|
|
|
self: Self,
|
|
|
|
exc_type: type[BaseException] | None = None,
|
|
|
|
exc_val: BaseException | None = None,
|
|
|
|
exc_tb: TracebackType | None = None,
|
|
|
|
) -> None:
|
|
|
|
"""Exit the async context manager."""
|
|
|
|
return await self._aclient.__aexit__(exc_type, exc_val, exc_tb)
|
|
|
|
|
|
|
|
@asynccontextmanager
|
|
|
|
async def _aconnect_sse(
|
|
|
|
self: Self, **kwargs: MutableMapping[str, Any]
|
|
|
|
) -> AsyncGenerator[EventSource, None]:
|
|
|
|
"""Connect to the SourceGraph SSE API."""
|
|
|
|
headers = kwargs.pop("headers", {})
|
|
|
|
if self._last_event_id is not None:
|
|
|
|
headers["Last-Event-ID"] = self._last_event_id
|
|
|
|
async with aconnect_sse(
|
|
|
|
client=self._aclient,
|
2023-11-18 19:27:38 +00:00
|
|
|
url=SOURCE_GRAPH_STREAM_API_URL,
|
Web App (#25)
* Set up the web project dependencies
- Add linters, pre-commit, and GitHub Actions
- Add a Makefile
- Add a pyproject.toml
* Fix pyupgrade job
- Remove continue_on_error everywhere
* Remove old code
* Rename a GithubActions job
* Change README
* Adjust pre-commit and GitHub actions
* Add tables and set up alembic
* Set up tests
* Extend tests
* Add coverage config
* Adjust the GithubActions workflow
* Fix GithubActions workflow
* Try fixing pyproject-fmt config
* Fix formatting of pyproject.toml
* Fix formatting of pyproject.toml
* Add coverage report
* Test listing the repositories
* Add a working prototype of SourceGraph client
* Add parsing of the SourceGraph SSE data
* Fix tests
* Ged rid of packages replaced by ruff
* Fix waits in the SourceGraph client
* Refactor the models and add a mapper
- A new mapper allows to create database repositories from the SourceGraph data
* Add mypy
* Try fixing mypy action
* Remove redundant configs
* Exclude tests from type checking
* Fix mypy pre-commit and GitHub action
* Ignore factories
* Make upserting possible for source graph data
* Add logic for parsing the dependencies and populating the database
* Add a database and a cron GitHub Action job
* Try manually trigger a workflow
* Bring back the old config
* Add ReadTimeout for errors to retry for in SourceGraph client
* Add typer
* Adjust the docstrings
* Update the database
* Refactor and optimize scraping and dependencies parsing
* Make scraping run on push for now
* Add a unique constraint for the repo url and source graph repo id
* Change the index columns in on_conflict statement for repo creation
* Optimize dependencies parsing
- Do not parse dependencies for a repo when revision did not change
* Scraped repositories from Source Graph
* Refactor scraping
* Set up frontend
* Scraped repositories from Source Graph
* Add TODOs
* Skip scraping when testing
* Fix a test with updating the repos
* Scraped repositories from Source Graph
* Add some more TODOs
* Scraped repositories from Source Graph
* Add some more TODO comments
* Add chadcn/ui
* Scraped repositories from Source Graph
* Create index.json
* Scraped repositories from Source Graph
* Add a draft of data table and display all the repos
* Scraped repositories from Source Graph
* Implement stars badges and description with overflow
* Format the links to Github repos
* Fix link clicking
* Scraped repositories from Source Graph
* Add simple pagination and stars column sorting
* Scraped repositories from Source Graph
* Implement basic searching
* Scraped repositories from Source Graph
* Implement a multiselect for dependencies
* Scraped repositories from Source Graph
* Implement actual filtering by dependencies
* Scraped repositories from Source Graph
* Add a workflow to deploy nextjs on github pages
* Try fixing the deployment job
* Enable static exports for app router
* Fix uploading arifacts for nextjs job
* Set base path to properly load JS and CSS
* Fix the base path
* Scraped repositories from Source Graph
* Add header
* Remove language version
* Scraped repositories from Source Graph
* Add some more TODOs
* Scraped repositories from Source Graph
* Adjust the pre-commit config
* Fix pipelines
* Scraped repositories from Source Graph
* Add a footer
* Create the indexes
* Scraped repositories from Source Graph
* Add more TODOs
* Introduce minor footer adjustments
* Adjust the scraping actions
* Scraped repositories from Source Graph, parsed the dependencies, and generated the indexes
* Implement query params state
* Scraped repositories from Source Graph, parsed the dependencies, and generated the indexes
* Do not commit query state on unmount
* Scraped repositories from Source Graph, parsed the dependencies, and generated the indexes
* Hopefully fix query states and multiselect input
* Scraped repositories from Source Graph, parsed the dependencies, and generated the indexes
* Extend the Makefile
* Resolve most of TODOs
* Resolve the conflicts with anyio version, bring back httpx
* Adjust the Makefile and README.md
* Fix a typo in README.md
* Adjust readme
* Fix the Makefile
* Fix some stuff
* Make some adjustments
* Possibly fix failing scraping jobs
* Load the repo project URL from env
---------
Co-authored-by: vladfedoriuk <vladfedoriuk@users.noreply.github.com>
Co-authored-by: Vladyslav Fedoriuk <vladyslav.fedoriuk@deployed.pl>
2023-10-28 19:39:02 +00:00
|
|
|
method="GET",
|
|
|
|
headers=headers,
|
|
|
|
**kwargs,
|
|
|
|
) as event_source:
|
|
|
|
yield event_source
|
|
|
|
|
|
|
|
async def _aiter_sse(
|
|
|
|
self: Self, **kwargs: MutableMapping[str, Any]
|
|
|
|
) -> AsyncGenerator[ServerSentEvent, None]:
|
|
|
|
"""Iterate over the SourceGraph SSE API."""
|
|
|
|
async with self._aconnect_sse(**kwargs) as event_source:
|
|
|
|
async for event in event_source.aiter_sse():
|
|
|
|
yield event
|
|
|
|
|
|
|
|
async def _aiter_sse_with_retries(
|
|
|
|
self: Self, **kwargs: MutableMapping[str, Any]
|
|
|
|
) -> AsyncGenerator[ServerSentEvent, None]:
|
|
|
|
"""Iterate over the SourceGraph SSE API with retries."""
|
|
|
|
async for attempt in stamina.retry_context(
|
|
|
|
on=(httpx.ReadError, httpx.ReadTimeout)
|
|
|
|
):
|
|
|
|
with attempt:
|
|
|
|
await asyncio.sleep(self._reconnection_delay)
|
|
|
|
async for event in self._aiter_sse(**kwargs):
|
|
|
|
self._last_event_id = event.id
|
|
|
|
if event.retry is not None:
|
|
|
|
logger.error(
|
|
|
|
"Received a retry event from the SourceGraph SSE API. "
|
|
|
|
"Schedule a reconnection in {retry} milliseconds.",
|
|
|
|
retry=event.retry,
|
|
|
|
enqueue=True,
|
|
|
|
)
|
|
|
|
self._reconnection_delay = timedelta(
|
|
|
|
milliseconds=event.retry
|
|
|
|
).total_seconds()
|
|
|
|
else:
|
|
|
|
self._reconnection_delay = 0.0
|
|
|
|
yield event
|
|
|
|
|
|
|
|
async def aiter_fastapi_repos(
|
|
|
|
self: Self,
|
|
|
|
) -> AsyncGenerator[list[SourceGraphRepoData], None]:
|
|
|
|
"""Iterate over the SourceGraph SSE API with retries."""
|
|
|
|
async for event in self._aiter_sse_with_retries(
|
|
|
|
params=dict(FASTAPI_REPOS_QUERY_PARAMS)
|
|
|
|
):
|
|
|
|
if event.event == "matches":
|
|
|
|
yield SourceGraphRepoDataListAdapter.validate_python(event.json())
|