mirror of
https://github.com/wsvincent/awesome-django.git
synced 2025-01-18 07:17:02 +00:00
338 lines
11 KiB
Python
338 lines
11 KiB
Python
|
#!/usr/bin/env -S uv --quiet run --script
|
||
|
# /// script
|
||
|
# requires-python = ">=3.12"
|
||
|
# dependencies = [
|
||
|
# "bs4",
|
||
|
# "httpx",
|
||
|
# "pydantic",
|
||
|
# "python-dateutil",
|
||
|
# "python-frontmatter",
|
||
|
# "python-slugify",
|
||
|
# "pytz",
|
||
|
# "rich",
|
||
|
# "typer",
|
||
|
# "markdown-it-py",
|
||
|
# ]
|
||
|
# ///
|
||
|
import os
|
||
|
import re
|
||
|
from pathlib import Path
|
||
|
from typing import Any
|
||
|
from urllib.parse import urlparse
|
||
|
|
||
|
import frontmatter
|
||
|
import httpx
|
||
|
import typer
|
||
|
from bs4 import BeautifulSoup
|
||
|
from bs4 import Tag
|
||
|
from markdown_it import MarkdownIt
|
||
|
from pydantic import BaseModel
|
||
|
from pydantic import ConfigDict
|
||
|
from pydantic import Field
|
||
|
from rich import print
|
||
|
from rich.progress import track
|
||
|
from slugify import slugify
|
||
|
|
||
|
|
||
|
app = typer.Typer(
|
||
|
add_help_option=False,
|
||
|
no_args_is_help=True,
|
||
|
rich_markup_mode="rich",
|
||
|
)
|
||
|
|
||
|
|
||
|
class Project(BaseModel):
|
||
|
"""Model representing a Django project from the awesome list."""
|
||
|
|
||
|
model_config = ConfigDict(extra="allow")
|
||
|
|
||
|
name: str
|
||
|
description: str
|
||
|
url: str
|
||
|
category: str
|
||
|
slug: str = Field(default="")
|
||
|
tags: list[str] = Field(default_factory=list)
|
||
|
github_stars: int | None = None
|
||
|
github_forks: int | None = None
|
||
|
github_last_update: str | None = None
|
||
|
previous_urls: list[str] = Field(default_factory=list)
|
||
|
|
||
|
def __init__(self, **data):
|
||
|
super().__init__(**data)
|
||
|
if not self.slug:
|
||
|
self.slug = slugify(self.name)
|
||
|
|
||
|
|
||
|
def parse_project_line(line: Tag, category: str) -> Project | None:
|
||
|
"""Parse a project line from the markdown and return a Project object."""
|
||
|
try:
|
||
|
# Find the project link
|
||
|
link = line.find("a")
|
||
|
if not link:
|
||
|
return None
|
||
|
|
||
|
name = link.text.strip()
|
||
|
url = link.get("href", "").strip()
|
||
|
|
||
|
# Get description (text after the link)
|
||
|
description = line.text.replace(name, "").strip()
|
||
|
description = re.sub(r"^\s*-\s*", "", description) # Remove leading dash
|
||
|
description = re.sub(r"^\s*", "", description) # Remove leading whitespace
|
||
|
|
||
|
if not all([name, url, description]):
|
||
|
return None
|
||
|
|
||
|
return Project(name=name, description=description, url=url, category=category)
|
||
|
except Exception as e:
|
||
|
print(f"[red]Error parsing project line: {e}[/red]")
|
||
|
return None
|
||
|
|
||
|
|
||
|
def read_readme(file_path: Path) -> str:
|
||
|
"""Read README content from local file and convert to HTML."""
|
||
|
markdown_content = file_path.read_text()
|
||
|
md = MarkdownIt()
|
||
|
html_content = md.render(markdown_content)
|
||
|
return html_content
|
||
|
|
||
|
|
||
|
def parse_readme(content: str) -> list[Project]:
|
||
|
"""Parse README content and extract projects."""
|
||
|
soup = BeautifulSoup(content, "html.parser")
|
||
|
projects = []
|
||
|
current_category = ""
|
||
|
|
||
|
for element in soup.find_all(["h2", "h3", "li"]):
|
||
|
if element.name in ["h2", "h3"]:
|
||
|
current_category = element.text.strip()
|
||
|
elif element.name == "li" and current_category:
|
||
|
if current_category == "Contents":
|
||
|
continue
|
||
|
|
||
|
project = parse_project_line(element, current_category)
|
||
|
if project:
|
||
|
projects.append(project)
|
||
|
|
||
|
return projects
|
||
|
|
||
|
|
||
|
def merge_project_data(existing: dict[str, Any], new: dict[str, Any]) -> dict[str, Any]:
|
||
|
"""
|
||
|
Merge existing project data with new data, preserving existing values
|
||
|
while updating with new information where appropriate.
|
||
|
"""
|
||
|
# Start with the existing data
|
||
|
merged = existing.copy()
|
||
|
|
||
|
# Always update core fields from the README
|
||
|
core_fields = {"name", "url", "category"}
|
||
|
for field in core_fields:
|
||
|
if field in new:
|
||
|
# If URL is changing, store the old URL in previous_urls
|
||
|
if field == "url" and new["url"] != existing.get("url"):
|
||
|
previous_urls = merged.get("previous_urls", [])
|
||
|
old_url = existing.get("url")
|
||
|
if old_url and old_url not in previous_urls:
|
||
|
previous_urls.append(old_url)
|
||
|
merged["previous_urls"] = previous_urls
|
||
|
merged[field] = new[field]
|
||
|
|
||
|
# Smart merge for description - update only if meaningfully different
|
||
|
if "description" in new and new["description"] != existing.get("description", ""):
|
||
|
merged["description"] = new["description"]
|
||
|
|
||
|
# Update GitHub metrics if they exist in new data
|
||
|
github_fields = {"github_stars", "github_forks", "github_last_update"}
|
||
|
for field in github_fields:
|
||
|
if field in new and new[field] is not None:
|
||
|
merged[field] = new[field]
|
||
|
|
||
|
return merged
|
||
|
|
||
|
|
||
|
def save_project(project: Project, output_dir: Path):
|
||
|
"""Save project as a markdown file with frontmatter, preserving and merging existing content."""
|
||
|
output_file = output_dir / f"{project.slug}.md"
|
||
|
project_data = project.model_dump(exclude_none=True)
|
||
|
|
||
|
if output_file.exists():
|
||
|
try:
|
||
|
# Load existing file
|
||
|
existing_post = frontmatter.load(output_file)
|
||
|
existing_data = dict(existing_post.metadata)
|
||
|
|
||
|
# Merge data, favoring preservation of existing content
|
||
|
merged_data = merge_project_data(existing_data, project_data)
|
||
|
|
||
|
# Create new post with merged data but keep existing content
|
||
|
post = frontmatter.Post(existing_post.content, **merged_data)
|
||
|
except Exception as e:
|
||
|
print(
|
||
|
f"[yellow]Warning: Could not load existing file {output_file}, creating new: {e}[/yellow]"
|
||
|
)
|
||
|
post = frontmatter.Post(project.description, **project_data)
|
||
|
else:
|
||
|
# Create new file
|
||
|
post = frontmatter.Post(project.description, **project_data)
|
||
|
|
||
|
output_file.write_text(frontmatter.dumps(post))
|
||
|
|
||
|
|
||
|
def extract_github_info(url: str) -> dict[str, str] | None:
|
||
|
"""Extract owner and repo from a GitHub URL."""
|
||
|
parsed = urlparse(url)
|
||
|
if parsed.netloc != "github.com":
|
||
|
return None
|
||
|
|
||
|
parts = parsed.path.strip("/").split("/")
|
||
|
if len(parts) >= 2:
|
||
|
return {"owner": parts[0], "repo": parts[1]}
|
||
|
return None
|
||
|
|
||
|
|
||
|
def get_github_metrics(
|
||
|
owner: str, repo: str, client: httpx.Client
|
||
|
) -> tuple[dict, str | None]:
|
||
|
"""
|
||
|
Fetch GitHub metrics for a repository.
|
||
|
Returns a tuple of (metrics_dict, new_url) where new_url is set if the repo has moved.
|
||
|
"""
|
||
|
headers = {}
|
||
|
if github_token := os.environ.get("GITHUB_TOKEN"):
|
||
|
headers["Authorization"] = f"token {github_token}"
|
||
|
|
||
|
api_url = f"https://api.github.com/repos/{owner}/{repo}"
|
||
|
try:
|
||
|
response = client.get(
|
||
|
api_url,
|
||
|
headers=headers,
|
||
|
timeout=10.0,
|
||
|
follow_redirects=True, # Enable following redirects
|
||
|
)
|
||
|
|
||
|
# Check if we followed a redirect
|
||
|
new_url = None
|
||
|
if len(response.history) > 0:
|
||
|
for r in response.history:
|
||
|
if r.status_code == 301:
|
||
|
# Get the new location from the API response
|
||
|
data = response.json()
|
||
|
new_url = data.get("html_url")
|
||
|
if new_url:
|
||
|
print(
|
||
|
f"[yellow]Repository moved: {owner}/{repo} -> {new_url}[/yellow]"
|
||
|
)
|
||
|
break
|
||
|
|
||
|
response.raise_for_status()
|
||
|
data = response.json()
|
||
|
|
||
|
return {
|
||
|
"github_stars": data["stargazers_count"],
|
||
|
"github_forks": data["forks_count"],
|
||
|
"github_last_update": data["updated_at"],
|
||
|
}, new_url
|
||
|
|
||
|
except httpx.HTTPError as e:
|
||
|
print(f"[red]Error fetching GitHub metrics for {owner}/{repo}: {str(e)}[/red]")
|
||
|
return {}, None
|
||
|
|
||
|
|
||
|
def load_project(file_path: Path) -> Project | None:
|
||
|
"""Load a project from a markdown file."""
|
||
|
try:
|
||
|
post = frontmatter.load(file_path)
|
||
|
return Project(**post.metadata)
|
||
|
except Exception as e:
|
||
|
print(f"[red]Error loading project from {file_path}: {str(e)}[/red]")
|
||
|
return None
|
||
|
|
||
|
|
||
|
@app.command()
|
||
|
def parse(readme_path: Path = Path("README.md"), output_dir: str = "_projects"):
|
||
|
"""
|
||
|
Parse local Awesome Django README and create individual project files with frontmatter.
|
||
|
Preserves existing file content and metadata while updating with new information from README.
|
||
|
"""
|
||
|
if not readme_path.exists():
|
||
|
print(f"[red]Error: README file not found at {readme_path}[/red]")
|
||
|
raise typer.Exit(1)
|
||
|
|
||
|
print(f"[bold blue]Reading README from {readme_path}...[/bold blue]")
|
||
|
|
||
|
# Create output directory
|
||
|
output_path = Path(output_dir)
|
||
|
output_path.mkdir(exist_ok=True)
|
||
|
|
||
|
# Read and parse README
|
||
|
content = read_readme(readme_path)
|
||
|
projects = parse_readme(content)
|
||
|
|
||
|
print(f"[green]Found {len(projects)} projects[/green]")
|
||
|
|
||
|
# Save individual project files
|
||
|
for project in projects:
|
||
|
save_project(project, output_path)
|
||
|
print(f"[green]Updated {project.name} in {project.slug}.md[/green]")
|
||
|
|
||
|
|
||
|
@app.command()
|
||
|
def update_metrics(projects_dir: Path = Path("_projects"), batch_size: int = 50):
|
||
|
"""
|
||
|
Update GitHub metrics (stars, forks, last update) for all projects.
|
||
|
"""
|
||
|
if not projects_dir.exists():
|
||
|
print(f"[red]Error: Projects directory not found at {projects_dir}[/red]")
|
||
|
raise typer.Exit(1)
|
||
|
|
||
|
print(
|
||
|
f"[bold blue]Updating GitHub metrics for projects in {projects_dir}...[/bold blue]"
|
||
|
)
|
||
|
|
||
|
# Load all projects
|
||
|
project_files = list(projects_dir.glob("*.md"))
|
||
|
projects = []
|
||
|
for file in project_files:
|
||
|
if project := load_project(file):
|
||
|
projects.append((file, project))
|
||
|
|
||
|
print(f"[green]Found {len(projects)} projects to update[/green]")
|
||
|
|
||
|
# Update metrics in batches to avoid rate limiting
|
||
|
with httpx.Client() as client:
|
||
|
for i in track(
|
||
|
range(0, len(projects), batch_size), description="Updating projects"
|
||
|
):
|
||
|
batch = projects[i : i + batch_size]
|
||
|
for file_path, project in batch:
|
||
|
if github_info := extract_github_info(project.url):
|
||
|
metrics, new_url = get_github_metrics(
|
||
|
github_info["owner"], github_info["repo"], client
|
||
|
)
|
||
|
|
||
|
if metrics:
|
||
|
# Update project with new metrics
|
||
|
for key, value in metrics.items():
|
||
|
setattr(project, key, value)
|
||
|
|
||
|
# Update URL if repository has moved
|
||
|
if new_url and new_url != project.url:
|
||
|
# Store the old URL in previous_urls
|
||
|
if not hasattr(project, "previous_urls"):
|
||
|
project.previous_urls = []
|
||
|
project.previous_urls.append(project.url)
|
||
|
# Update to new URL
|
||
|
project.url = new_url
|
||
|
print(
|
||
|
f"[yellow]Updated URL for {project.name}: {project.url}[/yellow]"
|
||
|
)
|
||
|
|
||
|
save_project(project, projects_dir)
|
||
|
print(f"[green]Updated metrics for {project.name}[/green]")
|
||
|
|
||
|
print("[bold blue]Finished updating GitHub metrics![/bold blue]")
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
app()
|