Refactor scraping

This commit is contained in:
Vladyslav Fedoriuk 2023-08-16 23:29:25 +02:00
parent d819ff29f8
commit a8fc5fcdc1

View File

@ -30,13 +30,18 @@ async def _create_dependencies_for_repo(session: AsyncSession, repo: Repo) -> No
dependencies_create_data, dependencies_create_data,
) = await acquire_dependencies_data_for_repository(repo) ) = await acquire_dependencies_data_for_repository(repo)
except RuntimeError: except RuntimeError:
# If the parsing fails, just skip creating the dependencies # If the parsing fails,
# just skip creating the dependencies
return
if repo.last_checked_revision == revision:
# If the repo has already been updated,
# just skip creating the dependencies
return return
if not dependencies_create_data: if not dependencies_create_data:
# If there are no dependencies, just skip creating the dependencies # If there are no dependencies,
# just skip creating the dependencies
return return
# Update the repo with the revision hash # Update the repo with the revision hash
if repo.last_checked_revision != revision:
update_repo_statement = ( update_repo_statement = (
sqlalchemy.update(Repo) sqlalchemy.update(Repo)
.where(Repo.id == repo.id) .where(Repo.id == repo.id)
@ -44,11 +49,11 @@ async def _create_dependencies_for_repo(session: AsyncSession, repo: Repo) -> No
) )
await session.execute(update_repo_statement) await session.execute(update_repo_statement)
# Create dependencies - on conflict do nothing. # Create dependencies - on conflict do nothing.
insert_statement = sqlalchemy.dialects.sqlite.insert( insert_dependencies_statement = sqlalchemy.dialects.sqlite.insert(
Dependency Dependency
).on_conflict_do_nothing(index_elements=[Dependency.name]) ).on_conflict_do_nothing(index_elements=[Dependency.name])
await session.execute( await session.execute(
insert_statement.returning(Dependency), insert_dependencies_statement.returning(Dependency),
[ [
{ {
"name": dependency_data.name, "name": dependency_data.name,
@ -70,11 +75,11 @@ async def _create_dependencies_for_repo(session: AsyncSession, repo: Repo) -> No
) )
).all() ).all()
# Add the dependencies to the repo # Add the dependencies to the repo
repo_dependencies_insert_statement = sqlalchemy.dialects.sqlite.insert( insert_repo_dependencies_statement = sqlalchemy.dialects.sqlite.insert(
RepoDependency RepoDependency
).on_conflict_do_nothing([RepoDependency.repo_id, RepoDependency.dependency_id]) ).on_conflict_do_nothing([RepoDependency.repo_id, RepoDependency.dependency_id])
await session.execute( await session.execute(
repo_dependencies_insert_statement, insert_repo_dependencies_statement,
[ [
{ {
"repo_id": repo.id, "repo_id": repo.id,