mirror of
https://github.com/Kludex/awesome-fastapi-projects.git
synced 2025-05-16 06:07:05 +00:00
Add parsing of the SourceGraph SSE data
This commit is contained in:
parent
dae524ab0c
commit
9fa3b8e43b
2
.github/workflows/app.yaml
vendored
2
.github/workflows/app.yaml
vendored
@ -52,4 +52,4 @@ jobs:
|
|||||||
python -m pytest -v -s --failed-first --cov=app --cov-report=xml
|
python -m pytest -v -s --failed-first --cov=app --cov-report=xml
|
||||||
- name: Generate Coverage Report
|
- name: Generate Coverage Report
|
||||||
run: |
|
run: |
|
||||||
coverage report -m
|
python -m coverage report -m
|
||||||
|
@ -20,6 +20,7 @@ repos:
|
|||||||
rev: v0.0.280
|
rev: v0.0.280
|
||||||
hooks:
|
hooks:
|
||||||
- id: ruff
|
- id: ruff
|
||||||
|
args: [--fix, --exit-non-zero-on-fix]
|
||||||
- repo: https://github.com/PyCQA/isort
|
- repo: https://github.com/PyCQA/isort
|
||||||
rev: 5.12.0
|
rev: 5.12.0
|
||||||
hooks:
|
hooks:
|
||||||
|
@ -1,23 +1,77 @@
|
|||||||
"""The client for the SourceGraph API."""
|
"""The client for the SourceGraph API."""
|
||||||
|
import datetime
|
||||||
from collections.abc import AsyncGenerator, Mapping
|
from collections.abc import AsyncGenerator, Mapping
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from typing import Any, AnyStr, Final, Self
|
from typing import Any, AnyStr, Final, Literal, NewType, Self
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
import stamina
|
import stamina
|
||||||
from httpx_sse import EventSource, ServerSentEvent, aconnect_sse
|
from httpx_sse import EventSource, ServerSentEvent, aconnect_sse
|
||||||
from pydantic import HttpUrl
|
from pydantic import (
|
||||||
|
BaseModel,
|
||||||
|
Field,
|
||||||
|
HttpUrl,
|
||||||
|
NonNegativeInt,
|
||||||
|
TypeAdapter,
|
||||||
|
computed_field,
|
||||||
|
)
|
||||||
|
|
||||||
#: The URL of the SourceGraph SSE API.
|
#: The URL of the SourceGraph SSE API.
|
||||||
SOURCE_GRAPH_STREAM_API_URL: Final[
|
SOURCE_GRAPH_STREAM_API_URL: Final[
|
||||||
HttpUrl
|
HttpUrl
|
||||||
] = "https://sourcegraph.com/.api/search/stream"
|
] = "https://sourcegraph.com/.api/search/stream"
|
||||||
|
|
||||||
|
#: The ID of a repository from the SourceGraph API.
|
||||||
|
SourceGraphRepoId = NewType("SourceGraphRepoId", int)
|
||||||
|
|
||||||
|
|
||||||
|
class SourceGraphRepoData(BaseModel):
|
||||||
|
"""The data of a repository."""
|
||||||
|
|
||||||
|
type: Literal["repo"]
|
||||||
|
repo_id: SourceGraphRepoId = Field(..., alias="repositoryID")
|
||||||
|
repo_handle: str = Field(..., alias="repository")
|
||||||
|
stars: NonNegativeInt = Field(..., alias="repoStars")
|
||||||
|
last_fetched_at: datetime.datetime = Field(..., alias="repoLastFetched")
|
||||||
|
description: str = Field(default="")
|
||||||
|
|
||||||
|
@computed_field
|
||||||
|
@property
|
||||||
|
def repo_url(self: Self) -> HttpUrl:
|
||||||
|
"""The URL of the repository."""
|
||||||
|
return TypeAdapter(HttpUrl).validate_python(f"https://{self.repo_handle}")
|
||||||
|
|
||||||
|
|
||||||
|
#: The type adapter for the SourceGraphRepoData.
|
||||||
|
SourceGraphRepoDataAdapter = TypeAdapter(SourceGraphRepoData)
|
||||||
|
#: The type adapter for the SourceGraphRepoData list.
|
||||||
|
SourceGraphRepoDataListAdapter = TypeAdapter(list[SourceGraphRepoData])
|
||||||
|
|
||||||
|
#: The query parameters for the SourceGraph SSE API.
|
||||||
|
FASTAPI_REPOS_QUERY_PARAMS: Final[Mapping[str, str]] = {
|
||||||
|
"q": quote(
|
||||||
|
" ".join(
|
||||||
|
[
|
||||||
|
"repo:has.content(from fastapi import FastApi)",
|
||||||
|
"type:repo",
|
||||||
|
"visibility:public",
|
||||||
|
"archived:no",
|
||||||
|
"fork:no",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class AsyncSourceGraphSSEClient:
|
class AsyncSourceGraphSSEClient:
|
||||||
"""A client for the SourceGraph SSE API."""
|
"""
|
||||||
|
A client for the SourceGraph SSE API.
|
||||||
|
|
||||||
|
To learn more about the underlying API, see the ``SourceGraph SSE API``
|
||||||
|
https://docs.sourcegraph.com/api/stream_api#sourcegraph-stream-api
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self: Self) -> None:
|
def __init__(self: Self) -> None:
|
||||||
"""Initialize the client."""
|
"""Initialize the client."""
|
||||||
@ -68,18 +122,8 @@ class AsyncSourceGraphSSEClient:
|
|||||||
|
|
||||||
async def aiter_fastapi_repos(self: Self) -> AsyncGenerator[ServerSentEvent, None]:
|
async def aiter_fastapi_repos(self: Self) -> AsyncGenerator[ServerSentEvent, None]:
|
||||||
"""Iterate over the SourceGraph SSE API with retries."""
|
"""Iterate over the SourceGraph SSE API with retries."""
|
||||||
params = {
|
async for event in self._aiter_sse_with_retries(
|
||||||
"q": quote(
|
params=FASTAPI_REPOS_QUERY_PARAMS
|
||||||
" ".join(
|
):
|
||||||
[
|
if event.event == "matches":
|
||||||
"repo:has.content(from fastapi import FastApi)",
|
yield SourceGraphRepoDataListAdapter.validate_python(event.json())
|
||||||
"type:repo",
|
|
||||||
"visibility:public",
|
|
||||||
"archived:no",
|
|
||||||
"fork:no",
|
|
||||||
]
|
|
||||||
)
|
|
||||||
),
|
|
||||||
}
|
|
||||||
async for event in self._aiter_sse_with_retries(params=params):
|
|
||||||
yield event
|
|
||||||
|
1
app/scraper/tests/__init__.py
Normal file
1
app/scraper/tests/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
"""Test the scraping of the SourceGraph API."""
|
118
app/scraper/tests/test_client.py
Normal file
118
app/scraper/tests/test_client.py
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
"""Test the client module of the scraper app."""
|
||||||
|
import pytest
|
||||||
|
from dirty_equals import HasLen, IsDatetime, IsInstance, IsPositiveInt
|
||||||
|
from pydantic import Json, TypeAdapter
|
||||||
|
|
||||||
|
from app.scraper.client import SourceGraphRepoData
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def source_graph_matched_repos_data() -> Json:
|
||||||
|
"""Return the sample data of the matched repositories."""
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"type": "repo",
|
||||||
|
"repositoryID": 55636527,
|
||||||
|
"repository": "github.com/tiangolo/sqlmodel",
|
||||||
|
"repoStars": 10277,
|
||||||
|
"repoLastFetched": "2023-07-31T18:47:22.875731Z",
|
||||||
|
"description": (
|
||||||
|
"SQL databases in Python, designed "
|
||||||
|
"for simplicity, compatibility, "
|
||||||
|
"and robustness."
|
||||||
|
),
|
||||||
|
"metadata": {
|
||||||
|
"fastapi": "null",
|
||||||
|
"json": "null",
|
||||||
|
"json-schema": "null",
|
||||||
|
"pydantic": "null",
|
||||||
|
"python": "null",
|
||||||
|
"sql": "null",
|
||||||
|
"sqlalchemy": "null",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "repo",
|
||||||
|
"repositoryID": 59434622,
|
||||||
|
"repository": "github.com/reflex-dev/reflex",
|
||||||
|
"repoStars": 10061,
|
||||||
|
"repoLastFetched": "2023-07-31T08:58:42.692906Z",
|
||||||
|
"description": "(Previously Pynecone) 🕸 Web apps in pure Python 🐍",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "repo",
|
||||||
|
"repositoryID": 42982149,
|
||||||
|
"repository": "github.com/PaddlePaddle/PaddleNLP",
|
||||||
|
"repoStars": 9804,
|
||||||
|
"repoLastFetched": "2023-07-31T16:48:08.839209Z",
|
||||||
|
"description": (
|
||||||
|
"👑 Easy-to-use and powerful NLP library with 🤗 "
|
||||||
|
"Awesome model zoo, supporting wide-range of NLP tasks "
|
||||||
|
"from research to industrial applications, including"
|
||||||
|
" 🗂Text Classification, 🔍 Neural Search, ❓ Question "
|
||||||
|
"Answering, ℹ️ Information Extraction, "
|
||||||
|
"📄 Document Intelligence, 💌 Sentiment Analysis etc."
|
||||||
|
),
|
||||||
|
"metadata": {
|
||||||
|
"bert": "null",
|
||||||
|
"embedding": "null",
|
||||||
|
"ernie": "null",
|
||||||
|
"information-extraction": "null",
|
||||||
|
"neural-search": "null",
|
||||||
|
"nlp": "null",
|
||||||
|
"paddlenlp": "null",
|
||||||
|
"pretrained-models": "null",
|
||||||
|
"question-answering": "null",
|
||||||
|
"search-engine": "null",
|
||||||
|
"semantic-analysis": "null",
|
||||||
|
"sentiment-analysis": "null",
|
||||||
|
"seq2seq": "null",
|
||||||
|
"transformer": "null",
|
||||||
|
"transformers": "null",
|
||||||
|
"uie": "null",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "repo",
|
||||||
|
"repositoryID": 36246068,
|
||||||
|
"repository": "github.com/realpython/materials",
|
||||||
|
"repoStars": 4359,
|
||||||
|
"repoLastFetched": "2023-07-31T05:15:16.993896Z",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_source_graph_repo_data(source_graph_matched_repos_data: Json) -> None:
|
||||||
|
"""Test the SourceGraphRepoData deserialization."""
|
||||||
|
assert source_graph_matched_repos_data == HasLen(3)
|
||||||
|
_SourceGraphRepoDataListValidator = TypeAdapter(list[SourceGraphRepoData])
|
||||||
|
repos_parsed = _SourceGraphRepoDataListValidator.validate_python(
|
||||||
|
source_graph_matched_repos_data
|
||||||
|
)
|
||||||
|
assert repos_parsed == HasLen(3)
|
||||||
|
assert all(repo == IsInstance[SourceGraphRepoData] for repo in repos_parsed)
|
||||||
|
assert all(
|
||||||
|
repo.repo_id == repo_data["repositoryID"]
|
||||||
|
for repo, repo_data in zip(
|
||||||
|
repos_parsed, source_graph_matched_repos_data, strict=True
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assert all(
|
||||||
|
repo.repo_handle == repo_data["repository"]
|
||||||
|
for repo, repo_data in zip(
|
||||||
|
repos_parsed, source_graph_matched_repos_data, strict=True
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assert all(
|
||||||
|
repo.stars == IsPositiveInt and repo.stars == repo_data["repoStars"]
|
||||||
|
for repo, repo_data in zip(
|
||||||
|
repos_parsed, source_graph_matched_repos_data, strict=True
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assert all(
|
||||||
|
str(repo.repo_url) == f"https://{repo_data['repository']}"
|
||||||
|
for repo, repo_data in zip(
|
||||||
|
repos_parsed, source_graph_matched_repos_data, strict=True
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assert all(repo.last_fetched_at == IsDatetime for repo in repos_parsed)
|
Loading…
x
Reference in New Issue
Block a user