Optimize dependencies parsing

- Do not parse dependencies for a repo  when revision did not change
This commit is contained in:
Vladyslav Fedoriuk 2023-08-16 23:06:55 +02:00
parent aa3ee07a90
commit 5f7aabc6e7
5 changed files with 76 additions and 10 deletions

View File

@ -78,6 +78,9 @@ class Repo(Base):
dependencies: Mapped[list["Dependency"]] = relationship(
"Dependency", secondary="repo_dependency", back_populates="repos"
)
last_checked_revision: Mapped[str | None] = mapped_column(
String(255), nullable=True
)
__table_args__ = (UniqueConstraint("url", "source_graph_repo_id"),)

View File

@ -2,6 +2,7 @@
import asyncio
import subprocess
from collections.abc import Sequence
from typing import NewType
import aiofiles.tempfile
import stamina
@ -10,17 +11,19 @@ from app.database import Repo
from app.models import DependencyCreateData
async def run_command(*cmd: str) -> str:
async def run_command(*cmd: str, cwd: str | None = None) -> str:
"""
Run the given command in a subprocess and return the stdout as plain text.
:param cmd: The command to run.
:param cwd: The working directory to run the command in.
:return: The stdout result
"""
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=cwd,
)
stdout, stderr = await process.communicate()
@ -35,9 +38,12 @@ async def run_command(*cmd: str) -> str:
return stdout.decode()
RevisionHash = NewType("RevisionHash", str)
async def acquire_dependencies_data_for_repository(
repo: Repo,
) -> list[DependencyCreateData]:
) -> tuple[RevisionHash, list[DependencyCreateData]]:
"""
Acquire dependencies for the given repository.
@ -61,6 +67,20 @@ async def acquire_dependencies_data_for_repository(
directory,
)
# Get the latest commit hash
revision: str = await run_command(
"git",
"rev-parse",
"HEAD",
cwd=directory,
)
if repo.last_checked_revision == revision:
# Assume there are no new dependencies to return
# since all the repo dependencies have already
# been parsed.
return RevisionHash(revision), []
# Parse the dependencies
async for attempt in stamina.retry_context(on=RuntimeError, attempts=3):
with attempt:
@ -75,10 +95,13 @@ async def acquire_dependencies_data_for_repository(
dependencies_list = (
dependencies_list[2:] if len(dependencies_list) > 2 else []
)
return [
DependencyCreateData(
name=dependency.strip(),
)
for dependency in dependencies_list
if dependency.strip()
]
return (
RevisionHash(revision),
[
DependencyCreateData(
name=dependency.strip(),
)
for dependency in dependencies_list
if dependency.strip()
],
)

View File

@ -25,13 +25,24 @@ async def _create_dependencies_for_repo(session: AsyncSession, repo: Repo) -> No
:param repo: A repo for which to create and assign the dependencies
"""
try:
dependencies_create_data = await acquire_dependencies_data_for_repository(repo)
(
revision,
dependencies_create_data,
) = await acquire_dependencies_data_for_repository(repo)
except RuntimeError:
# If the parsing fails, just skip creating the dependencies
return
if not dependencies_create_data:
# If there are no dependencies, just skip creating the dependencies
return
# Update the repo with the revision hash
if repo.last_checked_revision != revision:
update_repo_statement = (
sqlalchemy.update(Repo)
.where(Repo.id == repo.id)
.values(last_checked_revision=revision)
)
await session.execute(update_repo_statement)
# Create dependencies - on conflict do nothing.
insert_statement = sqlalchemy.dialects.sqlite.insert(
Dependency

Binary file not shown.

View File

@ -0,0 +1,29 @@
"""Add a last_checked_revision column
Revision ID: ac7c35039d70
Revises: 90eb9d1f9267
Create Date: 2023-08-16 22:35:25.314490
"""
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision = "ac7c35039d70"
down_revision = "90eb9d1f9267"
branch_labels = None
depends_on = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"repo", sa.Column("last_checked_revision", sa.String(length=255), nullable=True)
)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("repo", "last_checked_revision")
# ### end Alembic commands ###