change sqlite by json

This commit is contained in:
Marcelo Trylesinski 2020-11-21 00:44:10 +01:00
parent cc9dbf28c4
commit 72fa22cbac
3 changed files with 30 additions and 84 deletions

5
app.py
View File

@ -2,7 +2,7 @@ import json
import dash import dash
import dash_html_components as html import dash_html_components as html
import dash_table import dash_table as dt
from dash.dependencies import Input, Output from dash.dependencies import Input, Output
HEADERS = ("name", "age", "dependencies") HEADERS = ("name", "age", "dependencies")
@ -17,7 +17,8 @@ print(data)
app.layout = html.Div( app.layout = html.Div(
[ [
dash_table.DataTable( html.Link(rel="stylesheet", href="/static/dash-datatable.css"),
dt.DataTable(
id="datatable-interactivity", id="datatable-interactivity",
columns=[ columns=[
{"name": i.capitalize(), "id": i, "deletable": True, "selectable": True} {"name": i.capitalize(), "id": i, "deletable": True, "selectable": True}

View File

@ -1,19 +1,15 @@
import json
import logging import logging
import os import os
import re import re
import shutil import shutil
from contextlib import contextmanager from datetime import datetime
from typing import Set from typing import List
from git import Git from git import Git
from git.exc import GitCommandError from git.exc import GitCommandError
from github import Github from github import Github
from github.Repository import Repository from github.Repository import Repository
from sqlalchemy import ForeignKey, create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship, sessionmaker
from sqlalchemy.sql.schema import Column, UniqueConstraint
from sqlalchemy.sql.sqltypes import Integer, String
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
@ -26,53 +22,10 @@ MAX_SIZE = 100 * 1000 # 100 MB
# Directory # Directory
dir = os.getcwd() dir = os.getcwd()
clone_dir = os.path.join(dir, "tmp") clone_dir = os.path.join(dir, "tmp")
data_file = os.path.join(dir, "results.json")
INVALID_FOLDERS = ("site-packages", "venv") INVALID_FOLDERS = ("site-packages", "venv")
# Database
engine = create_engine("sqlite:///packages.db")
SessionLocal = sessionmaker(bind=engine)
Base = declarative_base()
class Association(Base):
__tablename__ = "association"
package_id = Column(Integer, ForeignKey("package.id"), primary_key=True)
project_id = Column(Integer, ForeignKey("project.id"), primary_key=True)
package = relationship("Package", backref="package_associations")
project = relationship("Project", backref="project_associations")
class Package(Base):
__tablename__ = "package"
__table_args__ = (UniqueConstraint("name"),)
id = Column(Integer, primary_key=True)
name = Column(String, nullable=False)
class Project(Base):
__tablename__ = "project"
__table_args__ = (UniqueConstraint("name", "owner"),)
id = Column(Integer, primary_key=True)
name = Column(String, nullable=False)
owner = Column(String)
packages = relationship("Package", secondary="association")
Base.metadata.create_all(engine, checkfirst=True)
@contextmanager
def get_session():
session = SessionLocal()
yield session
session.close()
# Functions # Functions
def clone(repository: Repository): def clone(repository: Repository):
@ -83,7 +36,7 @@ def clone(repository: Repository):
pass pass
def get_packages_from_file(path: str) -> Set[str]: def get_packages_from_file(path: str) -> List[str]:
packages = set() packages = set()
logging.info("Reading file '%s'.", path) logging.info("Reading file '%s'.", path)
try: try:
@ -99,7 +52,7 @@ def get_packages_from_file(path: str) -> Set[str]:
logging.info("File not found '%s'.", path) logging.info("File not found '%s'.", path)
except UnicodeDecodeError: except UnicodeDecodeError:
logging.info("Invalid character on file '%s'.", path) logging.info("Invalid character on file '%s'.", path)
return packages return list(packages)
def extract_data(repository: Repository) -> dict: def extract_data(repository: Repository) -> dict:
@ -115,52 +68,45 @@ def extract_data(repository: Repository) -> dict:
def run(): def run():
with open(data_file) as json_file:
data = json.load(json_file)
snippets = g.search_code('l=Python&q="from+fastapi+import+FastAPI"&type=Code') snippets = g.search_code('l=Python&q="from+fastapi+import+FastAPI"&type=Code')
for snippet in snippets: found = len(snippets)
logging.info("Found '%d' snippets.", found)
for i, snippet in enumerate(snippets):
repository = snippet.repository repository = snippet.repository
name = repository.name name = repository.name
owner = repository.owner.name owner = repository.owner
logging.info("Got repository '%s'.", name) logging.info("Got repository '%s' (%d / %d).", name, i + 1, found)
with get_session() as session: if repository.id in data:
if ( commits = repository.get_commits()
session.query(Project) last_commit_date = [commit.commit.author.date for commit in commits][0]
.filter(Project.name == name, Project.owner == owner) if (datetime.today() - last_commit_date).days > 7:
.first() logging.info("Repository '%s' already stored.", name)
):
continue continue
# NOTE: When deployed! Ignore repositories that didn't change.
# from datetime import datetime
# commits = repository.get_commits()
# last_commit_date = [commit.commit.author.date for commit in commits][0]
# if (datetime.today() - last_commit_date).days > 7:
# continue
if repository.size > MAX_SIZE: if repository.size > MAX_SIZE:
logging.info("Repository size is '%d' MB. (SKIP)", repository.size // 1000)
continue continue
logging.info("Cloning repository '%s'.", name) logging.info("Cloning repository '%s'.", name)
clone(repository) clone(repository)
logging.info("Extracting data from '%s'.", name) logging.info("Extracting data from '%s'.", name)
data = extract_data(repository) extracted_data = extract_data(repository)
with get_session() as session: data[repository.id] = {"name": name, "owner": owner, **extracted_data}
project = Project(name=name, owner=owner)
for package_name in data.get("packages", {}):
package = (
session.query(Package).filter(Package.name == package_name).first()
)
if package is None:
package = Package(name=package_name)
project.packages.append(package)
session.add(project)
session.commit()
logging.info("Removing repository '%s'.", name) logging.info("Removing repository '%s'.", name)
shutil.rmtree(os.path.join(clone_dir, name)) shutil.rmtree(os.path.join(clone_dir, name))
logging.info("Writing on file!")
with open(os.path.join(dir, "results.json"), "w") as json_file:
json.dump(data, json_file)
# Run! # Run!
run() run()

View File

@ -1,4 +1,3 @@
dash==1.17.0 dash==1.17.0
PyGithub==1.53 PyGithub==1.53
gitpython==3.1.11 gitpython==3.1.11
sqlalchemy==1.3.20