diff --git a/app/database.py b/app/database.py index 0eec57d..c555d35 100644 --- a/app/database.py +++ b/app/database.py @@ -78,6 +78,9 @@ class Repo(Base): dependencies: Mapped[list["Dependency"]] = relationship( "Dependency", secondary="repo_dependency", back_populates="repos" ) + last_checked_revision: Mapped[str | None] = mapped_column( + String(255), nullable=True + ) __table_args__ = (UniqueConstraint("url", "source_graph_repo_id"),) diff --git a/app/dependencies.py b/app/dependencies.py index 9f2deba..478d245 100644 --- a/app/dependencies.py +++ b/app/dependencies.py @@ -2,6 +2,7 @@ import asyncio import subprocess from collections.abc import Sequence +from typing import NewType import aiofiles.tempfile import stamina @@ -10,17 +11,19 @@ from app.database import Repo from app.models import DependencyCreateData -async def run_command(*cmd: str) -> str: +async def run_command(*cmd: str, cwd: str | None = None) -> str: """ Run the given command in a subprocess and return the stdout as plain text. :param cmd: The command to run. + :param cwd: The working directory to run the command in. :return: The stdout result """ process = await asyncio.create_subprocess_exec( *cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + cwd=cwd, ) stdout, stderr = await process.communicate() @@ -35,9 +38,12 @@ async def run_command(*cmd: str) -> str: return stdout.decode() +RevisionHash = NewType("RevisionHash", str) + + async def acquire_dependencies_data_for_repository( repo: Repo, -) -> list[DependencyCreateData]: +) -> tuple[RevisionHash, list[DependencyCreateData]]: """ Acquire dependencies for the given repository. @@ -61,6 +67,20 @@ async def acquire_dependencies_data_for_repository( directory, ) + # Get the latest commit hash + revision: str = await run_command( + "git", + "rev-parse", + "HEAD", + cwd=directory, + ) + + if repo.last_checked_revision == revision: + # Assume there are no new dependencies to return + # since all the repo dependencies have already + # been parsed. + return RevisionHash(revision), [] + # Parse the dependencies async for attempt in stamina.retry_context(on=RuntimeError, attempts=3): with attempt: @@ -75,10 +95,13 @@ async def acquire_dependencies_data_for_repository( dependencies_list = ( dependencies_list[2:] if len(dependencies_list) > 2 else [] ) - return [ - DependencyCreateData( - name=dependency.strip(), - ) - for dependency in dependencies_list - if dependency.strip() - ] + return ( + RevisionHash(revision), + [ + DependencyCreateData( + name=dependency.strip(), + ) + for dependency in dependencies_list + if dependency.strip() + ], + ) diff --git a/app/scrape.py b/app/scrape.py index 45311d0..9a86315 100644 --- a/app/scrape.py +++ b/app/scrape.py @@ -25,13 +25,24 @@ async def _create_dependencies_for_repo(session: AsyncSession, repo: Repo) -> No :param repo: A repo for which to create and assign the dependencies """ try: - dependencies_create_data = await acquire_dependencies_data_for_repository(repo) + ( + revision, + dependencies_create_data, + ) = await acquire_dependencies_data_for_repository(repo) except RuntimeError: # If the parsing fails, just skip creating the dependencies return if not dependencies_create_data: # If there are no dependencies, just skip creating the dependencies return + # Update the repo with the revision hash + if repo.last_checked_revision != revision: + update_repo_statement = ( + sqlalchemy.update(Repo) + .where(Repo.id == repo.id) + .values(last_checked_revision=revision) + ) + await session.execute(update_repo_statement) # Create dependencies - on conflict do nothing. insert_statement = sqlalchemy.dialects.sqlite.insert( Dependency diff --git a/db.sqlite3 b/db.sqlite3 index efd8fa9..7ba1bd6 100644 Binary files a/db.sqlite3 and b/db.sqlite3 differ diff --git a/migrations/versions/ac7c35039d70_add_a_last_checked_revision_column.py b/migrations/versions/ac7c35039d70_add_a_last_checked_revision_column.py new file mode 100644 index 0000000..977996a --- /dev/null +++ b/migrations/versions/ac7c35039d70_add_a_last_checked_revision_column.py @@ -0,0 +1,29 @@ +"""Add a last_checked_revision column + +Revision ID: ac7c35039d70 +Revises: 90eb9d1f9267 +Create Date: 2023-08-16 22:35:25.314490 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "ac7c35039d70" +down_revision = "90eb9d1f9267" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + "repo", sa.Column("last_checked_revision", sa.String(length=255), nullable=True) + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("repo", "last_checked_revision") + # ### end Alembic commands ###