mirror of
https://github.com/Kludex/awesome-fastapi-projects.git
synced 2024-11-27 14:01:09 +00:00
feat: new script
This commit is contained in:
parent
233b94976a
commit
cc9dbf28c4
10
.gitignore
vendored
10
.gitignore
vendored
|
@ -3,3 +3,13 @@ links.txt
|
||||||
unique_links.txt
|
unique_links.txt
|
||||||
imports.txt
|
imports.txt
|
||||||
reps/
|
reps/
|
||||||
|
|
||||||
|
# Text editor
|
||||||
|
.vscode
|
||||||
|
|
||||||
|
# Temporary files
|
||||||
|
tmp/*
|
||||||
|
!tmp/.gitkeep
|
||||||
|
|
||||||
|
# Python
|
||||||
|
**/__pycache__/
|
||||||
|
|
57
app.py
Normal file
57
app.py
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
import json
|
||||||
|
|
||||||
|
import dash
|
||||||
|
import dash_html_components as html
|
||||||
|
import dash_table
|
||||||
|
from dash.dependencies import Input, Output
|
||||||
|
|
||||||
|
HEADERS = ("name", "age", "dependencies")
|
||||||
|
|
||||||
|
|
||||||
|
app = dash.Dash()
|
||||||
|
|
||||||
|
with open("results.json") as json_file:
|
||||||
|
data = json.load(json_file)
|
||||||
|
|
||||||
|
print(data)
|
||||||
|
|
||||||
|
app.layout = html.Div(
|
||||||
|
[
|
||||||
|
dash_table.DataTable(
|
||||||
|
id="datatable-interactivity",
|
||||||
|
columns=[
|
||||||
|
{"name": i.capitalize(), "id": i, "deletable": True, "selectable": True}
|
||||||
|
for i in HEADERS
|
||||||
|
],
|
||||||
|
data=data,
|
||||||
|
editable=True,
|
||||||
|
filter_action="native",
|
||||||
|
sort_action="native",
|
||||||
|
sort_mode="multi",
|
||||||
|
column_selectable="single",
|
||||||
|
row_selectable="multi",
|
||||||
|
row_deletable=True,
|
||||||
|
selected_columns=[],
|
||||||
|
selected_rows=[],
|
||||||
|
page_action="native",
|
||||||
|
page_current=0,
|
||||||
|
page_size=10,
|
||||||
|
),
|
||||||
|
html.Div(id="datatable-interactivity-container"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.callback(
|
||||||
|
Output("datatable-interactivity", "style_data_conditional"),
|
||||||
|
[Input("datatable-interactivity", "selected_columns")],
|
||||||
|
)
|
||||||
|
def update_styles(selected_columns):
|
||||||
|
return [
|
||||||
|
{"if": {"column_id": i}, "background_color": "#D2F3FF"}
|
||||||
|
for i in selected_columns
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app.run_server(debug=True)
|
166
fastapi_projects/__main__.py
Normal file
166
fastapi_projects/__main__.py
Normal file
|
@ -0,0 +1,166 @@
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from typing import Set
|
||||||
|
|
||||||
|
from git import Git
|
||||||
|
from git.exc import GitCommandError
|
||||||
|
from github import Github
|
||||||
|
from github.Repository import Repository
|
||||||
|
from sqlalchemy import ForeignKey, create_engine
|
||||||
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
from sqlalchemy.orm import relationship, sessionmaker
|
||||||
|
from sqlalchemy.sql.schema import Column, UniqueConstraint
|
||||||
|
from sqlalchemy.sql.sqltypes import Integer, String
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
# Github
|
||||||
|
github_access_token = os.getenv("ACCESS_TOKEN_GITHUB")
|
||||||
|
g = Github(github_access_token)
|
||||||
|
|
||||||
|
MAX_SIZE = 100 * 1000 # 100 MB
|
||||||
|
|
||||||
|
# Directory
|
||||||
|
dir = os.getcwd()
|
||||||
|
clone_dir = os.path.join(dir, "tmp")
|
||||||
|
|
||||||
|
INVALID_FOLDERS = ("site-packages", "venv")
|
||||||
|
|
||||||
|
# Database
|
||||||
|
engine = create_engine("sqlite:///packages.db")
|
||||||
|
SessionLocal = sessionmaker(bind=engine)
|
||||||
|
|
||||||
|
Base = declarative_base()
|
||||||
|
|
||||||
|
|
||||||
|
class Association(Base):
|
||||||
|
__tablename__ = "association"
|
||||||
|
|
||||||
|
package_id = Column(Integer, ForeignKey("package.id"), primary_key=True)
|
||||||
|
project_id = Column(Integer, ForeignKey("project.id"), primary_key=True)
|
||||||
|
|
||||||
|
package = relationship("Package", backref="package_associations")
|
||||||
|
project = relationship("Project", backref="project_associations")
|
||||||
|
|
||||||
|
|
||||||
|
class Package(Base):
|
||||||
|
__tablename__ = "package"
|
||||||
|
__table_args__ = (UniqueConstraint("name"),)
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
name = Column(String, nullable=False)
|
||||||
|
|
||||||
|
|
||||||
|
class Project(Base):
|
||||||
|
__tablename__ = "project"
|
||||||
|
__table_args__ = (UniqueConstraint("name", "owner"),)
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
name = Column(String, nullable=False)
|
||||||
|
owner = Column(String)
|
||||||
|
packages = relationship("Package", secondary="association")
|
||||||
|
|
||||||
|
|
||||||
|
Base.metadata.create_all(engine, checkfirst=True)
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def get_session():
|
||||||
|
session = SessionLocal()
|
||||||
|
yield session
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
|
||||||
|
# Functions
|
||||||
|
def clone(repository: Repository):
|
||||||
|
try:
|
||||||
|
clone_url = repository.clone_url
|
||||||
|
Git(clone_dir).clone(clone_url)
|
||||||
|
except GitCommandError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def get_packages_from_file(path: str) -> Set[str]:
|
||||||
|
packages = set()
|
||||||
|
logging.info("Reading file '%s'.", path)
|
||||||
|
try:
|
||||||
|
with open(path, "r") as file:
|
||||||
|
for line in file.readlines():
|
||||||
|
result = re.search(r"from (\w+)[\.\w+]*|:[ ]*import (\w*)\n", line)
|
||||||
|
if result:
|
||||||
|
if result.group(1):
|
||||||
|
packages.add(result.group(1))
|
||||||
|
if result.group(2):
|
||||||
|
packages.add(result.group(2))
|
||||||
|
except FileNotFoundError:
|
||||||
|
logging.info("File not found '%s'.", path)
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
logging.info("Invalid character on file '%s'.", path)
|
||||||
|
return packages
|
||||||
|
|
||||||
|
|
||||||
|
def extract_data(repository: Repository) -> dict:
|
||||||
|
data = {}
|
||||||
|
for (root, _, files) in os.walk(os.path.join(clone_dir, repository.name)):
|
||||||
|
for file in files:
|
||||||
|
path = os.path.join(root, file)
|
||||||
|
if file.endswith(".py") and all(
|
||||||
|
folder not in path for folder in INVALID_FOLDERS
|
||||||
|
):
|
||||||
|
data["packages"] = get_packages_from_file(path)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def run():
|
||||||
|
snippets = g.search_code('l=Python&q="from+fastapi+import+FastAPI"&type=Code')
|
||||||
|
for snippet in snippets:
|
||||||
|
repository = snippet.repository
|
||||||
|
name = repository.name
|
||||||
|
owner = repository.owner.name
|
||||||
|
logging.info("Got repository '%s'.", name)
|
||||||
|
|
||||||
|
with get_session() as session:
|
||||||
|
if (
|
||||||
|
session.query(Project)
|
||||||
|
.filter(Project.name == name, Project.owner == owner)
|
||||||
|
.first()
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# NOTE: When deployed! Ignore repositories that didn't change.
|
||||||
|
# from datetime import datetime
|
||||||
|
# commits = repository.get_commits()
|
||||||
|
# last_commit_date = [commit.commit.author.date for commit in commits][0]
|
||||||
|
# if (datetime.today() - last_commit_date).days > 7:
|
||||||
|
# continue
|
||||||
|
|
||||||
|
if repository.size > MAX_SIZE:
|
||||||
|
continue
|
||||||
|
|
||||||
|
logging.info("Cloning repository '%s'.", name)
|
||||||
|
clone(repository)
|
||||||
|
|
||||||
|
logging.info("Extracting data from '%s'.", name)
|
||||||
|
data = extract_data(repository)
|
||||||
|
|
||||||
|
with get_session() as session:
|
||||||
|
project = Project(name=name, owner=owner)
|
||||||
|
for package_name in data.get("packages", {}):
|
||||||
|
package = (
|
||||||
|
session.query(Package).filter(Package.name == package_name).first()
|
||||||
|
)
|
||||||
|
if package is None:
|
||||||
|
package = Package(name=package_name)
|
||||||
|
project.packages.append(package)
|
||||||
|
session.add(project)
|
||||||
|
session.commit()
|
||||||
|
|
||||||
|
logging.info("Removing repository '%s'.", name)
|
||||||
|
shutil.rmtree(os.path.join(clone_dir, name))
|
||||||
|
|
||||||
|
|
||||||
|
# Run!
|
||||||
|
run()
|
BIN
packages.db
Normal file
BIN
packages.db
Normal file
Binary file not shown.
4
requirements.txt
Normal file
4
requirements.txt
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
dash==1.17.0
|
||||||
|
PyGithub==1.53
|
||||||
|
gitpython==3.1.11
|
||||||
|
sqlalchemy==1.3.20
|
15385
results.json
15385
results.json
File diff suppressed because it is too large
Load Diff
|
@ -1,21 +0,0 @@
|
||||||
import shutil
|
|
||||||
|
|
||||||
import git
|
|
||||||
from git.repo.base import Repo
|
|
||||||
from giturlparse import parse
|
|
||||||
|
|
||||||
# class Progress(git.remote.RemoteProgress):
|
|
||||||
# def update(self, op_code, cur_count, max_count=None, message=''):
|
|
||||||
# print(self._cur_line)
|
|
||||||
|
|
||||||
with open("unique_links.txt") as fp:
|
|
||||||
links = fp.readlines()
|
|
||||||
for i, link in enumerate(links, start=1):
|
|
||||||
link = link.rstrip()
|
|
||||||
name = parse(link).name
|
|
||||||
print(f"File num: {i}")
|
|
||||||
Repo.clone_from(link, name)
|
|
||||||
try:
|
|
||||||
shutil.move(name, "reps")
|
|
||||||
except:
|
|
||||||
shutil.rmtree(name)
|
|
|
@ -1,13 +0,0 @@
|
||||||
f_in = open("links.txt", "r")
|
|
||||||
f_out = open("unique_links.txt", "w")
|
|
||||||
|
|
||||||
links = set()
|
|
||||||
|
|
||||||
for line in f_in.readlines():
|
|
||||||
links.add(line)
|
|
||||||
|
|
||||||
for link in links:
|
|
||||||
f_out.write(link)
|
|
||||||
|
|
||||||
f_in.close()
|
|
||||||
f_out.close()
|
|
|
@ -1,29 +0,0 @@
|
||||||
import json
|
|
||||||
import re
|
|
||||||
from typing import Dict, Union
|
|
||||||
|
|
||||||
f_in = open("imports.txt", "r")
|
|
||||||
|
|
||||||
mp: Dict[str, Union[set, list]] = {}
|
|
||||||
|
|
||||||
for line in f_in.readlines():
|
|
||||||
try:
|
|
||||||
rep_name = line.split("/")[1]
|
|
||||||
except IndexError:
|
|
||||||
rep_name = ""
|
|
||||||
mp[rep_name] = mp.get(rep_name, set())
|
|
||||||
result = re.search(r"from (\w+)[\.\w+]*|:[ ]*import (\w*)\n", line)
|
|
||||||
if result:
|
|
||||||
if result.group(1):
|
|
||||||
mp[rep_name].add(result.group(1))
|
|
||||||
if result.group(2):
|
|
||||||
mp[rep_name].add(result.group(2))
|
|
||||||
|
|
||||||
for key in mp:
|
|
||||||
mp[key] = list(mp[key])
|
|
||||||
|
|
||||||
with open("results.json", "w") as f:
|
|
||||||
json.dump(mp, f, sort_keys=True, indent=2)
|
|
||||||
|
|
||||||
print(len(mp))
|
|
||||||
f_in.close()
|
|
|
@ -1,4 +0,0 @@
|
||||||
|
|
||||||
for file in $(find reps -maxdepth 1 -type d); do
|
|
||||||
grep -r "import" --include \*.py $file > imports.txt
|
|
||||||
done
|
|
|
@ -1,57 +0,0 @@
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
from time import sleep
|
|
||||||
|
|
||||||
import requests
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
username = os.getenv("GITHUB_USERNAME")
|
|
||||||
password = os.getenv("GITHUB_PASSWORD")
|
|
||||||
API_URL = "https://api.github.com"
|
|
||||||
|
|
||||||
|
|
||||||
def get_response(page: int) -> dict:
|
|
||||||
res = requests.get(
|
|
||||||
f"{API_URL}/search/code",
|
|
||||||
auth=(username, password),
|
|
||||||
params={"q": "fastapi language:Python", "per_page": 100, "page": page},
|
|
||||||
)
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
def get_next_link(link_header: str) -> str:
|
|
||||||
return getattr(
|
|
||||||
{
|
|
||||||
rel: link
|
|
||||||
for (link, rel) in re.findall(r'<(http.*?)>; rel="(.*?)"', link_header)
|
|
||||||
},
|
|
||||||
"next",
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
filename = "links.txt"
|
|
||||||
file1 = open(filename, "a") # append mode
|
|
||||||
has_next = True
|
|
||||||
page = 1
|
|
||||||
while has_next:
|
|
||||||
sleep(1)
|
|
||||||
res = get_response(page)
|
|
||||||
res_json = res.json()
|
|
||||||
if "items" in res_json:
|
|
||||||
for item in res_json["items"]:
|
|
||||||
file1.write(f"{item['repository']['html_url']}\n")
|
|
||||||
print(f"Page: {page}")
|
|
||||||
print(res.headers)
|
|
||||||
# print(json.dumps(res_json, indent=4, sort_keys=True))
|
|
||||||
# print(res.headers.get('X-RateLimit-Reset', 0))
|
|
||||||
if int(
|
|
||||||
res.headers.get("X-RateLimit-Remaining", 0)
|
|
||||||
) == 0 or "422" in res.headers.get("Status", "422"):
|
|
||||||
has_next = False
|
|
||||||
page += 1
|
|
||||||
|
|
||||||
file1.close()
|
|
|
@ -1,22 +0,0 @@
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
|
|
||||||
filename_in = sys.argv[1]
|
|
||||||
filename_out = sys.argv[2]
|
|
||||||
file_in = open(filename_in, "r")
|
|
||||||
lines = file_in.readlines()
|
|
||||||
file_out = open(filename_out, "w")
|
|
||||||
|
|
||||||
imports = set()
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
match1 = re.search(r"(from *(?!\.)(.+?)(?= |\.))", line)
|
|
||||||
match2 = re.search(r"(: *(import) (.+))", line)
|
|
||||||
if match1 is not None:
|
|
||||||
imports.add(match1.group(2))
|
|
||||||
if match2 is not None:
|
|
||||||
imports.add(match2.group(3))
|
|
||||||
|
|
||||||
|
|
||||||
for imp in sorted(list(imports)):
|
|
||||||
file_out.write(f"{imp}\n")
|
|
0
tmp/.gitkeep
Normal file
0
tmp/.gitkeep
Normal file
Loading…
Reference in New Issue
Block a user