61 lines
1.9 KiB
Python
61 lines
1.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
URL utility functions for PlatformIO and other tools.
|
|
|
|
This module provides utilities for working with URLs, particularly for
|
|
sanitizing URLs to create filesystem-safe path names.
|
|
"""
|
|
|
|
import hashlib
|
|
from pathlib import Path
|
|
|
|
|
|
def sanitize_url_for_path(url: str) -> Path:
|
|
"""
|
|
Sanitize URL to create a valid filesystem path name.
|
|
Simply strips protocol and replaces invalid filesystem characters.
|
|
|
|
Args:
|
|
url: URL to sanitize
|
|
|
|
Returns:
|
|
Sanitized Path suitable for use as a filesystem path component
|
|
|
|
Example:
|
|
>>> sanitize_url_for_path("https://github.com/owner/repo/releases/download/v1.0.0/file.zip")
|
|
Path('github_com_owner_repo_releases_download_v1_0_0_file_zip')
|
|
"""
|
|
# Remove protocol (http://, https://, ftp://, etc.)
|
|
if "://" in url:
|
|
url = url.split("://", 1)[1]
|
|
|
|
# Replace invalid filesystem characters with underscores
|
|
# Invalid characters: / \ : * ? " < > |
|
|
invalid_chars = ["/", "\\", ":", "*", "?", '"', "<", ">", "|"]
|
|
result = url
|
|
for char in invalid_chars:
|
|
result = result.replace(char, "_")
|
|
|
|
# Replace dots with underscores to avoid issues with file extensions
|
|
result = result.replace(".", "_")
|
|
|
|
# Replace spaces and dashes with underscores for consistency
|
|
result = result.replace(" ", "_").replace("-", "_")
|
|
|
|
# Remove any remaining non-alphanumeric characters except underscores
|
|
result = "".join(c if c.isalnum() or c == "_" else "_" for c in result)
|
|
|
|
# Remove multiple consecutive underscores
|
|
while "__" in result:
|
|
result = result.replace("__", "_")
|
|
|
|
# Remove leading/trailing underscores
|
|
result = result.strip("_")
|
|
|
|
# Ensure reasonable length (max 100 chars)
|
|
if len(result) > 100:
|
|
# Keep first 70 chars and add a short hash
|
|
result = result[:70] + "_" + hashlib.sha256(url.encode()).hexdigest()[:8]
|
|
|
|
return Path(result)
|