feat: limit number of dependencies on table

This commit is contained in:
Marcelo Trylesinski 2020-07-10 01:15:08 +02:00
parent b4343c261c
commit c0df8d6f92
6 changed files with 592 additions and 688 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,11 +1,30 @@
from typing import List
import json
from pytablewriter import MarkdownTableWriter
from stdlib_list import stdlib_list
with open('results.json') as json_file:
NATIVE = ["fastapi", "starlette", "pydantic", "typing", "uvicorn"]
def filter_list(dependencies: List[str]) -> List[str]:
return [
dependency
for dependency in dependencies
if not (
dependency in NATIVE
or dependency in stdlib_list("3.8")
or dependency.startswith("_")
)
]
with open("results.json") as json_file:
data = json.load(json_file)
writer = MarkdownTableWriter()
writer.headers = ["Project", "Dependencies"]
writer.value_matrix = [
(project, dependencies) for project, dependencies in data.items()
[project, filter_list(dependencies)]
for project, dependencies in data.items()
if len(filter_list(dependencies)) > 0 and len(filter_list(dependencies)) < 20
]
writer.write_table()

View File

@ -8,14 +8,14 @@ from giturlparse import parse
# def update(self, op_code, cur_count, max_count=None, message=''):
# print(self._cur_line)
with open('unique_links.txt') as fp:
with open("unique_links.txt") as fp:
links = fp.readlines()
for i, link in enumerate(links, start=1):
link = link.rstrip()
name = parse(link).name
print(f'File num: {i}')
print(f"File num: {i}")
Repo.clone_from(link, name)
try:
shutil.move(name, 'reps')
shutil.move(name, "reps")
except:
shutil.rmtree(name)

View File

@ -8,11 +8,11 @@ mp: Dict[str, Union[set, list]] = {}
for line in f_in.readlines():
try:
rep_name = line.split('/')[1]
rep_name = line.split("/")[1]
except IndexError:
rep_name = ""
mp[rep_name] = mp.get(rep_name, set())
result = re.search(r'from (\w+)[\.\w+]*|:[ ]*import (\w*)\n', line)
result = re.search(r"from (\w+)[\.\w+]*|:[ ]*import (\w*)\n", line)
if result:
if result.group(1):
mp[rep_name].add(result.group(1))
@ -22,7 +22,7 @@ for line in f_in.readlines():
for key in mp:
mp[key] = list(mp[key])
with open('results.json', 'w') as f:
with open("results.json", "w") as f:
json.dump(mp, f, sort_keys=True, indent=2)
print(len(mp))

View File

@ -8,46 +8,49 @@ from dotenv import load_dotenv
load_dotenv()
username = os.getenv('GITHUB_USERNAME')
password = os.getenv('GITHUB_PASSWORD')
API_URL = 'https://api.github.com'
username = os.getenv("GITHUB_USERNAME")
password = os.getenv("GITHUB_PASSWORD")
API_URL = "https://api.github.com"
def get_response(page: int) -> dict:
res = requests.get(
f'{API_URL}/search/code',
f"{API_URL}/search/code",
auth=(username, password),
params={
'q': 'fastapi language:Python',
'per_page': 100,
'page': page
}
params={"q": "fastapi language:Python", "per_page": 100, "page": page},
)
return res
def get_next_link(link_header: str) -> str:
return getattr({
rel: link
for (link, rel) in re.findall(r'<(http.*?)>; rel="(.*?)"', link_header)
}, 'next', None)
return getattr(
{
rel: link
for (link, rel) in re.findall(r'<(http.*?)>; rel="(.*?)"', link_header)
},
"next",
None,
)
filename = "links.txt"
file1 = open(filename, "a") # append mode
file1 = open(filename, "a") # append mode
has_next = True
page = 1
while has_next:
sleep(1)
res = get_response(page)
res_json = res.json()
if 'items' in res_json:
for item in res_json['items']:
if "items" in res_json:
for item in res_json["items"]:
file1.write(f"{item['repository']['html_url']}\n")
print(f"Page: {page}")
print(res.headers)
# print(json.dumps(res_json, indent=4, sort_keys=True))
# print(res.headers.get('X-RateLimit-Reset', 0))
if int(res.headers.get('X-RateLimit-Remaining', 0)) == 0 or '422' in res.headers.get('Status', '422'):
if int(
res.headers.get("X-RateLimit-Remaining", 0)
) == 0 or "422" in res.headers.get("Status", "422"):
has_next = False
page += 1

View File

@ -4,14 +4,14 @@ import sys
filename_in = sys.argv[1]
filename_out = sys.argv[2]
file_in = open(filename_in, "r")
lines = file_in.readlines()
lines = file_in.readlines()
file_out = open(filename_out, "w")
imports = set()
for line in lines:
match1 = re.search(r'(from *(?!\.)(.+?)(?= |\.))', line)
match2 = re.search(r'(: *(import) (.+))', line)
match1 = re.search(r"(from *(?!\.)(.+?)(?= |\.))", line)
match2 = re.search(r"(: *(import) (.+))", line)
if match1 is not None:
imports.add(match1.group(2))
if match2 is not None: