feat: limit number of dependencies on table

This commit is contained in:
Marcelo Trylesinski 2020-07-10 01:15:08 +02:00
parent b4343c261c
commit c0df8d6f92
6 changed files with 592 additions and 688 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,11 +1,30 @@
from typing import List
import json import json
from pytablewriter import MarkdownTableWriter from pytablewriter import MarkdownTableWriter
from stdlib_list import stdlib_list
with open('results.json') as json_file: NATIVE = ["fastapi", "starlette", "pydantic", "typing", "uvicorn"]
def filter_list(dependencies: List[str]) -> List[str]:
return [
dependency
for dependency in dependencies
if not (
dependency in NATIVE
or dependency in stdlib_list("3.8")
or dependency.startswith("_")
)
]
with open("results.json") as json_file:
data = json.load(json_file) data = json.load(json_file)
writer = MarkdownTableWriter() writer = MarkdownTableWriter()
writer.headers = ["Project", "Dependencies"] writer.headers = ["Project", "Dependencies"]
writer.value_matrix = [ writer.value_matrix = [
(project, dependencies) for project, dependencies in data.items() [project, filter_list(dependencies)]
for project, dependencies in data.items()
if len(filter_list(dependencies)) > 0 and len(filter_list(dependencies)) < 20
] ]
writer.write_table() writer.write_table()

View File

@ -8,14 +8,14 @@ from giturlparse import parse
# def update(self, op_code, cur_count, max_count=None, message=''): # def update(self, op_code, cur_count, max_count=None, message=''):
# print(self._cur_line) # print(self._cur_line)
with open('unique_links.txt') as fp: with open("unique_links.txt") as fp:
links = fp.readlines() links = fp.readlines()
for i, link in enumerate(links, start=1): for i, link in enumerate(links, start=1):
link = link.rstrip() link = link.rstrip()
name = parse(link).name name = parse(link).name
print(f'File num: {i}') print(f"File num: {i}")
Repo.clone_from(link, name) Repo.clone_from(link, name)
try: try:
shutil.move(name, 'reps') shutil.move(name, "reps")
except: except:
shutil.rmtree(name) shutil.rmtree(name)

View File

@ -8,11 +8,11 @@ mp: Dict[str, Union[set, list]] = {}
for line in f_in.readlines(): for line in f_in.readlines():
try: try:
rep_name = line.split('/')[1] rep_name = line.split("/")[1]
except IndexError: except IndexError:
rep_name = "" rep_name = ""
mp[rep_name] = mp.get(rep_name, set()) mp[rep_name] = mp.get(rep_name, set())
result = re.search(r'from (\w+)[\.\w+]*|:[ ]*import (\w*)\n', line) result = re.search(r"from (\w+)[\.\w+]*|:[ ]*import (\w*)\n", line)
if result: if result:
if result.group(1): if result.group(1):
mp[rep_name].add(result.group(1)) mp[rep_name].add(result.group(1))
@ -22,7 +22,7 @@ for line in f_in.readlines():
for key in mp: for key in mp:
mp[key] = list(mp[key]) mp[key] = list(mp[key])
with open('results.json', 'w') as f: with open("results.json", "w") as f:
json.dump(mp, f, sort_keys=True, indent=2) json.dump(mp, f, sort_keys=True, indent=2)
print(len(mp)) print(len(mp))

View File

@ -8,28 +8,29 @@ from dotenv import load_dotenv
load_dotenv() load_dotenv()
username = os.getenv('GITHUB_USERNAME') username = os.getenv("GITHUB_USERNAME")
password = os.getenv('GITHUB_PASSWORD') password = os.getenv("GITHUB_PASSWORD")
API_URL = 'https://api.github.com' API_URL = "https://api.github.com"
def get_response(page: int) -> dict: def get_response(page: int) -> dict:
res = requests.get( res = requests.get(
f'{API_URL}/search/code', f"{API_URL}/search/code",
auth=(username, password), auth=(username, password),
params={ params={"q": "fastapi language:Python", "per_page": 100, "page": page},
'q': 'fastapi language:Python',
'per_page': 100,
'page': page
}
) )
return res return res
def get_next_link(link_header: str) -> str: def get_next_link(link_header: str) -> str:
return getattr({ return getattr(
rel: link {
for (link, rel) in re.findall(r'<(http.*?)>; rel="(.*?)"', link_header) rel: link
}, 'next', None) for (link, rel) in re.findall(r'<(http.*?)>; rel="(.*?)"', link_header)
},
"next",
None,
)
filename = "links.txt" filename = "links.txt"
@ -40,14 +41,16 @@ while has_next:
sleep(1) sleep(1)
res = get_response(page) res = get_response(page)
res_json = res.json() res_json = res.json()
if 'items' in res_json: if "items" in res_json:
for item in res_json['items']: for item in res_json["items"]:
file1.write(f"{item['repository']['html_url']}\n") file1.write(f"{item['repository']['html_url']}\n")
print(f"Page: {page}") print(f"Page: {page}")
print(res.headers) print(res.headers)
# print(json.dumps(res_json, indent=4, sort_keys=True)) # print(json.dumps(res_json, indent=4, sort_keys=True))
# print(res.headers.get('X-RateLimit-Reset', 0)) # print(res.headers.get('X-RateLimit-Reset', 0))
if int(res.headers.get('X-RateLimit-Remaining', 0)) == 0 or '422' in res.headers.get('Status', '422'): if int(
res.headers.get("X-RateLimit-Remaining", 0)
) == 0 or "422" in res.headers.get("Status", "422"):
has_next = False has_next = False
page += 1 page += 1

View File

@ -10,8 +10,8 @@ file_out = open(filename_out, "w")
imports = set() imports = set()
for line in lines: for line in lines:
match1 = re.search(r'(from *(?!\.)(.+?)(?= |\.))', line) match1 = re.search(r"(from *(?!\.)(.+?)(?= |\.))", line)
match2 = re.search(r'(: *(import) (.+))', line) match2 = re.search(r"(: *(import) (.+))", line)
if match1 is not None: if match1 is not None:
imports.add(match1.group(2)) imports.add(match1.group(2))
if match2 is not None: if match2 is not None: