change sqlite by json

This commit is contained in:
Marcelo Trylesinski 2020-11-21 00:44:10 +01:00
parent cc9dbf28c4
commit 72fa22cbac
3 changed files with 30 additions and 84 deletions

5
app.py
View File

@ -2,7 +2,7 @@ import json
import dash
import dash_html_components as html
import dash_table
import dash_table as dt
from dash.dependencies import Input, Output
HEADERS = ("name", "age", "dependencies")
@ -17,7 +17,8 @@ print(data)
app.layout = html.Div(
[
dash_table.DataTable(
html.Link(rel="stylesheet", href="/static/dash-datatable.css"),
dt.DataTable(
id="datatable-interactivity",
columns=[
{"name": i.capitalize(), "id": i, "deletable": True, "selectable": True}

View File

@ -1,19 +1,15 @@
import json
import logging
import os
import re
import shutil
from contextlib import contextmanager
from typing import Set
from datetime import datetime
from typing import List
from git import Git
from git.exc import GitCommandError
from github import Github
from github.Repository import Repository
from sqlalchemy import ForeignKey, create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship, sessionmaker
from sqlalchemy.sql.schema import Column, UniqueConstraint
from sqlalchemy.sql.sqltypes import Integer, String
logging.basicConfig(level=logging.INFO)
@ -26,53 +22,10 @@ MAX_SIZE = 100 * 1000 # 100 MB
# Directory
dir = os.getcwd()
clone_dir = os.path.join(dir, "tmp")
data_file = os.path.join(dir, "results.json")
INVALID_FOLDERS = ("site-packages", "venv")
# Database
engine = create_engine("sqlite:///packages.db")
SessionLocal = sessionmaker(bind=engine)
Base = declarative_base()
class Association(Base):
__tablename__ = "association"
package_id = Column(Integer, ForeignKey("package.id"), primary_key=True)
project_id = Column(Integer, ForeignKey("project.id"), primary_key=True)
package = relationship("Package", backref="package_associations")
project = relationship("Project", backref="project_associations")
class Package(Base):
__tablename__ = "package"
__table_args__ = (UniqueConstraint("name"),)
id = Column(Integer, primary_key=True)
name = Column(String, nullable=False)
class Project(Base):
__tablename__ = "project"
__table_args__ = (UniqueConstraint("name", "owner"),)
id = Column(Integer, primary_key=True)
name = Column(String, nullable=False)
owner = Column(String)
packages = relationship("Package", secondary="association")
Base.metadata.create_all(engine, checkfirst=True)
@contextmanager
def get_session():
session = SessionLocal()
yield session
session.close()
# Functions
def clone(repository: Repository):
@ -83,7 +36,7 @@ def clone(repository: Repository):
pass
def get_packages_from_file(path: str) -> Set[str]:
def get_packages_from_file(path: str) -> List[str]:
packages = set()
logging.info("Reading file '%s'.", path)
try:
@ -99,7 +52,7 @@ def get_packages_from_file(path: str) -> Set[str]:
logging.info("File not found '%s'.", path)
except UnicodeDecodeError:
logging.info("Invalid character on file '%s'.", path)
return packages
return list(packages)
def extract_data(repository: Repository) -> dict:
@ -115,52 +68,45 @@ def extract_data(repository: Repository) -> dict:
def run():
with open(data_file) as json_file:
data = json.load(json_file)
snippets = g.search_code('l=Python&q="from+fastapi+import+FastAPI"&type=Code')
for snippet in snippets:
found = len(snippets)
logging.info("Found '%d' snippets.", found)
for i, snippet in enumerate(snippets):
repository = snippet.repository
name = repository.name
owner = repository.owner.name
logging.info("Got repository '%s'.", name)
owner = repository.owner
logging.info("Got repository '%s' (%d / %d).", name, i + 1, found)
with get_session() as session:
if (
session.query(Project)
.filter(Project.name == name, Project.owner == owner)
.first()
):
if repository.id in data:
commits = repository.get_commits()
last_commit_date = [commit.commit.author.date for commit in commits][0]
if (datetime.today() - last_commit_date).days > 7:
logging.info("Repository '%s' already stored.", name)
continue
# NOTE: When deployed! Ignore repositories that didn't change.
# from datetime import datetime
# commits = repository.get_commits()
# last_commit_date = [commit.commit.author.date for commit in commits][0]
# if (datetime.today() - last_commit_date).days > 7:
# continue
if repository.size > MAX_SIZE:
logging.info("Repository size is '%d' MB. (SKIP)", repository.size // 1000)
continue
logging.info("Cloning repository '%s'.", name)
clone(repository)
logging.info("Extracting data from '%s'.", name)
data = extract_data(repository)
extracted_data = extract_data(repository)
with get_session() as session:
project = Project(name=name, owner=owner)
for package_name in data.get("packages", {}):
package = (
session.query(Package).filter(Package.name == package_name).first()
)
if package is None:
package = Package(name=package_name)
project.packages.append(package)
session.add(project)
session.commit()
data[repository.id] = {"name": name, "owner": owner, **extracted_data}
logging.info("Removing repository '%s'.", name)
shutil.rmtree(os.path.join(clone_dir, name))
logging.info("Writing on file!")
with open(os.path.join(dir, "results.json"), "w") as json_file:
json.dump(data, json_file)
# Run!
run()

View File

@ -1,4 +1,3 @@
dash==1.17.0
PyGithub==1.53
gitpython==3.1.11
sqlalchemy==1.3.20