cache github downloads

2025-11-13 13:25:50 +00:00 · 2025-10-19 14:33:26 -10:00
parent 0f87e7508b
commit 0eab64ffe5
5 changed files with 870 additions and 4 deletions
--- a/esphome/github_cache.py
+++ b/esphome/github_cache.py
@@ -0,0 +1,279 @@
 """GitHub download cache for ESPHome.
 This module provides caching functionality for GitHub release downloads
 to avoid redundant network I/O when switching between platforms.
 """
 from __future__ import annotations
 import hashlib
 import json
 import logging
 from pathlib import Path
 import shutil
 import time
 import urllib.error
 import urllib.request
 _LOGGER = logging.getLogger(__name__)
 class GitHubCache:
    """Manages caching of GitHub release downloads."""
    def __init__(self, cache_dir: Path | None = None):
        """Initialize the cache manager.
        Args:
            cache_dir: Directory to store cached files.
                      Defaults to ~/.esphome_cache/github
        """
        if cache_dir is None:
            cache_dir = Path.home() / ".esphome_cache" / "github"
        self.cache_dir = cache_dir
        self.cache_dir.mkdir(parents=True, exist_ok=True)
        self.metadata_file = self.cache_dir / "cache_metadata.json"
    def _load_metadata(self) -> dict:
        """Load cache metadata from disk."""
        if self.metadata_file.exists():
            try:
                with open(self.metadata_file) as f:
                    return json.load(f)
            except Exception:
                return {}
        return {}
    def _save_metadata(self, metadata: dict) -> None:
        """Save cache metadata to disk."""
        try:
            with open(self.metadata_file, "w") as f:
                json.dump(metadata, f, indent=2)
        except Exception as e:
            _LOGGER.debug("Failed to save cache metadata: %s", e)
    @staticmethod
    def is_github_url(url: str) -> bool:
        """Check if URL is a GitHub release download."""
        return "github.com" in url.lower() and url.endswith(".zip")
    def _get_cache_key(self, url: str) -> str:
        """Get cache key (hash) for a URL."""
        return hashlib.sha256(url.encode()).hexdigest()
    def _get_cache_path(self, url: str) -> Path:
        """Get cache file path for a URL."""
        cache_key = self._get_cache_key(url)
        ext = Path(url.split("?")[0]).suffix
        return self.cache_dir / f"{cache_key}{ext}"
    def _check_if_modified(
        self,
        url: str,
        last_modified: str | None = None,
        etag: str | None = None,
    ) -> bool:
        """Check if a URL has been modified using HTTP 304.
        Args:
            url: URL to check
            last_modified: Last-Modified header from previous response
            etag: ETag header from previous response
        Returns:
            True if modified (or unable to check), False if not modified
        """
        if not last_modified and not etag:
            # No cache headers available, assume modified
            return True
        try:
            request = urllib.request.Request(url)
            request.get_method = lambda: "HEAD"
            if last_modified:
                request.add_header("If-Modified-Since", last_modified)
            if etag:
                request.add_header("If-None-Match", etag)
            try:
                urllib.request.urlopen(request, timeout=10)
                # 200 OK = file was modified
                return True
            except urllib.error.HTTPError as e:
                if e.code == 304:
                    # Not modified
                    _LOGGER.debug("File not modified (HTTP 304): %s", url)
                    return False
                # Other errors, assume modified to be safe
                return True
        except Exception as e:
            # If check fails, assume not modified (use cache)
            _LOGGER.debug("Failed to check if modified: %s", e)
            return False
    def get_cached_path(self, url: str, check_updates: bool = True) -> Path | None:
        """Get path to cached file if available and valid.
        Args:
            url: URL to check
            check_updates: Whether to check for updates using HTTP 304
        Returns:
            Path to cached file if valid, None if needs download
        """
        if not self.is_github_url(url):
            return None
        cache_path = self._get_cache_path(url)
        if not cache_path.exists():
            return None
        if not check_updates:
            _LOGGER.debug("Using cached file (no update check): %s", url)
            return cache_path
        # Load metadata and check if modified
        metadata = self._load_metadata()
        cache_key = self._get_cache_key(url)
        if cache_key not in metadata:
            # Have file but no metadata, use it anyway
            _LOGGER.debug("Using cached file (no metadata): %s", url)
            return cache_path
        last_modified = metadata[cache_key].get("last_modified")
        etag = metadata[cache_key].get("etag")
        if self._check_if_modified(url, last_modified, etag):
            # File was modified, need to re-download
            _LOGGER.debug("Cached file is outdated: %s", url)
            return None
        # File not modified, use cache
        _LOGGER.debug("Using cached file: %s", url)
        return cache_path
    def save_to_cache(self, url: str, source_path: Path) -> None:
        """Save a downloaded file to cache.
        Args:
            url: URL the file was downloaded from
            source_path: Path to the downloaded file
        """
        if not self.is_github_url(url):
            return
        try:
            cache_path = self._get_cache_path(url)
            # Only copy if source and destination are different
            if source_path.resolve() != cache_path.resolve():
                shutil.copy2(source_path, cache_path)
            # Try to get HTTP headers for caching
            last_modified = None
            etag = None
            try:
                request = urllib.request.Request(url)
                request.get_method = lambda: "HEAD"
                response = urllib.request.urlopen(request, timeout=10)
                last_modified = response.headers.get("Last-Modified")
                etag = response.headers.get("ETag")
            except Exception:
                pass
            # Update metadata
            metadata = self._load_metadata()
            cache_key = self._get_cache_key(url)
            metadata[cache_key] = {
                "url": url,
                "size": cache_path.stat().st_size,
                "cached_at": time.time(),
                "last_modified": last_modified,
                "etag": etag,
            }
            self._save_metadata(metadata)
            _LOGGER.debug("Saved to cache: %s", url)
        except Exception as e:
            _LOGGER.debug("Failed to save to cache: %s", e)
    def copy_from_cache(self, url: str, destination: Path) -> bool:
        """Copy a cached file to destination.
        Args:
            url: URL of the cached file
            destination: Where to copy the file
        Returns:
            True if successful, False otherwise
        """
        cached_path = self.get_cached_path(url, check_updates=True)
        if not cached_path:
            return False
        try:
            shutil.copy2(cached_path, destination)
            _LOGGER.info("Using cached download for %s", url)
            return True
        except Exception as e:
            _LOGGER.warning("Failed to use cache: %s", e)
            return False
    def cache_size(self) -> int:
        """Get total size of cached files in bytes."""
        total = 0
        try:
            for file_path in self.cache_dir.glob("*"):
                if file_path.is_file() and file_path != self.metadata_file:
                    total += file_path.stat().st_size
        except Exception:
            pass
        return total
    def list_cached(self) -> list[dict]:
        """List all cached files with metadata."""
        cached_files = []
        metadata = self._load_metadata()
        for cache_key, meta in metadata.items():
            cache_path = (
                self.cache_dir / f"{cache_key}{Path(meta['url'].split('?')[0]).suffix}"
            )
            if cache_path.exists():
                cached_files.append(
                    {
                        "url": meta["url"],
                        "path": cache_path,
                        "size": meta["size"],
                        "cached_at": meta.get("cached_at"),
                        "last_modified": meta.get("last_modified"),
                        "etag": meta.get("etag"),
                    }
                )
        return cached_files
    def clear_cache(self) -> None:
        """Clear all cached files."""
        try:
            for file_path in self.cache_dir.glob("*"):
                if file_path.is_file():
                    file_path.unlink()
            _LOGGER.info("Cache cleared: %s", self.cache_dir)
        except Exception as e:
            _LOGGER.warning("Failed to clear cache: %s", e)
 # Global cache instance
 _cache: GitHubCache | None = None
 def get_cache() -> GitHubCache:
    """Get the global GitHub cache instance."""
    global _cache  # noqa: PLW0603
    if _cache is None:
        _cache = GitHubCache()
    return _cache
--- a/esphome/platformio_api.py
+++ b/esphome/platformio_api.py
@@ -5,7 +5,6 @@ import os
 from pathlib import Path
 import re
 import subprocess
 from typing import Any
 from esphome.const import CONF_COMPILE_PROCESS_LIMIT, CONF_ESPHOME, KEY_CORE
 from esphome.core import CORE, EsphomeError
@@ -44,15 +43,67 @@ def patch_structhash():
 def patch_file_downloader():
-    """Patch PlatformIO's FileDownloader to retry on PackageException errors."""
+    """Patch PlatformIO's FileDownloader to add caching and retry on PackageException errors."""
    from platformio.package.download import FileDownloader
    from platformio.package.exception import PackageException
    # Import our cache module
    from esphome.github_cache import GitHubCache
    _LOGGER.info("Applying GitHub download cache patch...")
    original_init = FileDownloader.__init__
    original_start = FileDownloader.start
-    def patched_init(self, *args: Any, **kwargs: Any) -> None:
+    # Initialize cache in .platformio directory so it benefits from GitHub Actions cache
    platformio_dir = Path.home() / ".platformio"
    cache = GitHubCache(cache_dir=platformio_dir / "esphome_download_cache")
    _LOGGER.info("GitHub download cache initialized at: %s", cache.cache_dir)
    def patched_init(self, *args, **kwargs):
        """Patched init that checks cache before making HTTP connection."""
        # Extract URL from args (first positional argument)
        url = args[0] if args else kwargs.get("url")
        dest_dir = args[1] if len(args) > 1 else kwargs.get("dest_dir")
        # Debug: Log all downloads
        _LOGGER.debug("[GitHub Cache] Download request for: %s", url)
        # Store URL for later use (original FileDownloader doesn't store it)
        self._esphome_cache_url = url if cache.is_github_url(url) else None
        # Check cache for GitHub URLs BEFORE making HTTP request
        if self._esphome_cache_url:
            _LOGGER.debug("[GitHub Cache] This is a GitHub URL, checking cache...")
            self._esphome_use_cache = cache.get_cached_path(url, check_updates=True)
            if self._esphome_use_cache:
                _LOGGER.debug(
                    "[GitHub Cache] Found in cache: %s", self._esphome_use_cache
                )
            else:
                _LOGGER.debug("[GitHub Cache] Not in cache, will download and cache")
        else:
            self._esphome_use_cache = None
            if url and str(url).startswith("http"):
                _LOGGER.debug("[GitHub Cache] Not a GitHub URL, skipping cache")
        # Only make HTTP connection if we don't have cached file
        if self._esphome_use_cache:
            # Skip HTTP connection, we'll handle this in start()
            # Set minimal attributes to satisfy FileDownloader
            self._http_session = None
            self._http_response = None
            self._fname = Path(url.split("?")[0]).name
            self._destination = self._fname
            if dest_dir:
                from os.path import join
                self._destination = join(dest_dir, self._fname)
            _LOGGER.info("Using cached download for %s", url)
            return None  # Don't call original_init
        # Normal initialization with retry logic
        max_retries = 3
        for attempt in range(max_retries):
            try:
                return original_init(self, *args, **kwargs)
@@ -69,7 +120,37 @@ def patch_file_downloader():
                    raise
        return None
    def patched_start(self, *args, **kwargs):
        """Patched start that uses cache when available."""
        import shutil
        # Get the cache URL and path that were set in __init__
        cache_url = getattr(self, "_esphome_cache_url", None)
        cached_file = getattr(self, "_esphome_use_cache", None)
        # If we're using cache, copy file instead of downloading
        if cached_file:
            try:
                shutil.copy2(cached_file, self._destination)
                return True
            except Exception as e:
                _LOGGER.warning("Failed to copy from cache: %s", e)
                # Fall through to re-download
        # Perform normal download
        result = original_start(self, *args, **kwargs)
        # Save to cache if it was a GitHub URL
        if cache_url:
            try:
                cache.save_to_cache(cache_url, Path(self._destination))
            except Exception as e:
                _LOGGER.debug("Failed to save to cache: %s", e)
        return result
    FileDownloader.__init__ = patched_init
    FileDownloader.start = patched_start
 IGNORE_LIB_WARNINGS = f"(?:{'|'.join(['Hash', 'Update'])})"
@@ -87,6 +168,8 @@ FILTER_PLATFORMIO_LINES = [
    r"Memory Usage -> https://bit.ly/pio-memory-usage",
    r"Found: https://platformio.org/lib/show/.*",
    r"Using cache: .*",
    # Don't filter our cache messages - let users see when cache is being used
    # r"Using cached download for .*",
    r"Installing dependencies",
    r"Library Manager: Already installed, built-in library",
    r"Building in .* mode",
--- a/script/cache_platformio_downloads.py
+++ b/script/cache_platformio_downloads.py
@@ -0,0 +1,171 @@
 #!/usr/bin/env python3
 """
 Pre-cache PlatformIO GitHub Downloads
 This script extracts GitHub URLs from platformio.ini and pre-caches them
 to avoid redundant downloads when switching between ESP8266 and ESP32 builds.
 Usage:
    python3 script/cache_platformio_downloads.py [platformio.ini]
 """
 import argparse
 import configparser
 from pathlib import Path
 import re
 import sys
 # Import the cache manager
 sys.path.insert(0, str(Path(__file__).parent.parent))
 from esphome.github_cache import GitHubCache
 def extract_github_urls(platformio_ini: Path) -> list[str]:
    """Extract all GitHub URLs from platformio.ini.
    Args:
        platformio_ini: Path to platformio.ini file
    Returns:
        List of GitHub URLs found
    """
    config = configparser.ConfigParser(inline_comment_prefixes=(";",))
    config.read(platformio_ini)
    urls = []
    github_pattern = re.compile(r"https://github\.com/[^\s;]+\.zip")
    for section in config.sections():
        conf = config[section]
        # Check platform
        if "platform" in conf:
            platform_value = conf["platform"]
            matches = github_pattern.findall(platform_value)
            urls.extend(matches)
        # Check platform_packages
        if "platform_packages" in conf:
            for line in conf["platform_packages"].splitlines():
                line = line.strip()
                if not line or line.startswith("#"):
                    continue
                matches = github_pattern.findall(line)
                urls.extend(matches)
    # Remove duplicates while preserving order
    seen = set()
    unique_urls = []
    for url in urls:
        if url not in seen:
            seen.add(url)
            unique_urls.append(url)
    return unique_urls
 def main():
    """Main entry point."""
    parser = argparse.ArgumentParser(
        description="Pre-cache PlatformIO GitHub downloads",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
 This script scans platformio.ini for GitHub URLs and pre-caches them.
 This avoids redundant downloads when switching between platforms (e.g., ESP8266 and ESP32).
 Examples:
  # Cache downloads from default platformio.ini
  %(prog)s
  # Cache downloads from specific file
  %(prog)s custom_platformio.ini
  # Show what would be cached without downloading
  %(prog)s --dry-run
        """,
    )
    parser.add_argument(
        "platformio_ini",
        nargs="?",
        default="platformio.ini",
        help="Path to platformio.ini (default: platformio.ini)",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Show what would be cached without downloading",
    )
    parser.add_argument(
        "--cache-dir",
        type=Path,
        help="Cache directory (default: ~/.platformio/esphome_download_cache)",
    )
    parser.add_argument(
        "--force",
        action="store_true",
        help="Force re-download even if cached",
    )
    args = parser.parse_args()
    platformio_ini = Path(args.platformio_ini)
    if not platformio_ini.exists():
        print(f"Error: {platformio_ini} not found", file=sys.stderr)
        return 1
    # Extract URLs
    print(f"Scanning {platformio_ini} for GitHub URLs...")
    urls = extract_github_urls(platformio_ini)
    if not urls:
        print("No GitHub URLs found in platformio.ini")
        return 0
    print(f"Found {len(urls)} unique GitHub URL(s):")
    for url in urls:
        print(f"  - {url}")
    print()
    if args.dry_run:
        print("Dry run - not downloading")
        return 0
    # Initialize cache (use PlatformIO directory by default)
    cache_dir = args.cache_dir
    if cache_dir is None:
        cache_dir = Path.home() / ".platformio" / "esphome_download_cache"
    cache = GitHubCache(cache_dir)
    # Cache each URL
    success_count = 0
    for i, url in enumerate(urls, 1):
        print(f"[{i}/{len(urls)}] Caching {url}")
        try:
            # Use the download_with_progress from github_download_cache CLI
            from script.github_download_cache import download_with_progress
            download_with_progress(cache, url, force=args.force, check_updates=True)
            success_count += 1
            print()
        except Exception as e:
            print(f"Error caching {url}: {e}", file=sys.stderr)
            print()
    # Show cache stats
    total_size = cache.cache_size()
    size_mb = total_size / (1024 * 1024)
    print("\nCache summary:")
    print(f"  Successfully cached: {success_count}/{len(urls)}")
    print(f"  Total cache size: {size_mb:.2f} MB")
    print(f"  Cache location: {cache.cache_dir}")
    return 0 if success_count == len(urls) else 1
 if __name__ == "__main__":
    sys.exit(main())
--- a/script/github_download_cache.py
+++ b/script/github_download_cache.py
@@ -0,0 +1,195 @@
 #!/usr/bin/env python3
 """
 GitHub Download Cache CLI
 This script provides a command-line interface to the GitHub download cache.
 The actual caching logic is in esphome/github_cache.py.
 Usage:
    python3 script/github_download_cache.py download URL
    python3 script/github_download_cache.py list
    python3 script/github_download_cache.py stats
    python3 script/github_download_cache.py clear
 """
 import argparse
 from pathlib import Path
 import sys
 import urllib.request
 # Add parent directory to path to import esphome modules
 sys.path.insert(0, str(Path(__file__).parent.parent))
 from esphome.github_cache import GitHubCache
 def download_with_progress(
    cache: GitHubCache, url: str, force: bool = False, check_updates: bool = True
 ) -> Path:
    """Download a URL with progress indicator and caching.
    Args:
        cache: GitHubCache instance
        url: URL to download
        force: Force re-download even if cached
        check_updates: Check for updates using HTTP 304
    Returns:
        Path to cached file
    """
    # If force, skip cache check
    if not force:
        cached_path = cache.get_cached_path(url, check_updates=check_updates)
        if cached_path:
            print(f"Using cached file for {url}")
            print(f"  Cache: {cached_path}")
            return cached_path
    # Need to download
    print(f"Downloading {url}")
    cache_path = cache._get_cache_path(url)
    print(f"  Cache: {cache_path}")
    # Download with progress
    temp_path = cache_path.with_suffix(cache_path.suffix + ".tmp")
    try:
        with urllib.request.urlopen(url) as response:
            total_size = int(response.headers.get("Content-Length", 0))
            downloaded = 0
            with open(temp_path, "wb") as f:
                while True:
                    chunk = response.read(8192)
                    if not chunk:
                        break
                    f.write(chunk)
                    downloaded += len(chunk)
                    if total_size > 0:
                        percent = (downloaded / total_size) * 100
                        print(f"\r  Progress: {percent:.1f}%", end="", flush=True)
            print()  # New line after progress
        # Move to final location
        temp_path.replace(cache_path)
        # Let cache handle metadata
        cache.save_to_cache(url, cache_path)
        return cache_path
    except Exception as e:
        if temp_path.exists():
            temp_path.unlink()
        raise RuntimeError(f"Failed to download {url}: {e}") from e
 def main():
    """CLI entry point."""
    parser = argparse.ArgumentParser(
        description="GitHub Download Cache Manager",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
 Examples:
  # Download and cache a URL
  %(prog)s download https://github.com/pioarduino/registry/releases/download/0.0.1/esptoolpy-v5.1.0.zip
  # List cached files
  %(prog)s list
  # Show cache statistics
  %(prog)s stats
  # Clear cache
  %(prog)s clear
        """,
    )
    parser.add_argument(
        "--cache-dir",
        type=Path,
        help="Cache directory (default: ~/.platformio/esphome_download_cache)",
    )
    subparsers = parser.add_subparsers(dest="command", help="Command to execute")
    # Download command
    download_parser = subparsers.add_parser("download", help="Download and cache a URL")
    download_parser.add_argument("url", help="URL to download")
    download_parser.add_argument(
        "--force", action="store_true", help="Force re-download even if cached"
    )
    download_parser.add_argument(
        "--no-check-updates",
        action="store_true",
        help="Skip checking for updates (don't use HTTP 304)",
    )
    # List command
    subparsers.add_parser("list", help="List cached files")
    # Stats command
    subparsers.add_parser("stats", help="Show cache statistics")
    # Clear command
    subparsers.add_parser("clear", help="Clear all cached files")
    args = parser.parse_args()
    if not args.command:
        parser.print_help()
        return 1
    # Use PlatformIO cache directory by default
    if args.cache_dir is None:
        args.cache_dir = Path.home() / ".platformio" / "esphome_download_cache"
    cache = GitHubCache(args.cache_dir)
    if args.command == "download":
        try:
            check_updates = not args.no_check_updates
            cache_path = download_with_progress(
                cache, args.url, force=args.force, check_updates=check_updates
            )
            print(f"\nCached at: {cache_path}")
            return 0
        except Exception as e:
            print(f"Error: {e}", file=sys.stderr)
            return 1
    elif args.command == "list":
        cached = cache.list_cached()
        if not cached:
            print("No cached files")
            return 0
        print(f"Cached files ({len(cached)}):")
        for item in cached:
            size_mb = item["size"] / (1024 * 1024)
            print(f"  {item['url']}")
            print(f"    Size: {size_mb:.2f} MB")
            print(f"    Path: {item['path']}")
        return 0
    elif args.command == "stats":
        total_size = cache.cache_size()
        cached_count = len(cache.list_cached())
        size_mb = total_size / (1024 * 1024)
        print(f"Cache directory: {cache.cache_dir}")
        print(f"Cached files: {cached_count}")
        print(f"Total size: {size_mb:.2f} MB")
        return 0
    elif args.command == "clear":
        cache.clear_cache()
        return 0
    return 1
 if __name__ == "__main__":
    sys.exit(main())
--- a/script/platformio_download_wrapper.py
+++ b/script/platformio_download_wrapper.py
@@ -0,0 +1,138 @@
 #!/usr/bin/env python3
 """
 PlatformIO Download Wrapper with Caching
 This script can be used as a wrapper around PlatformIO downloads to add caching.
 It intercepts download operations and uses the GitHub download cache.
 This is designed to be called from PlatformIO's extra_scripts if needed.
 """
 from pathlib import Path
 import sys
 # Import the cache manager
 sys.path.insert(0, str(Path(__file__).parent))
 from github_download_cache import GitHubDownloadCache
 def is_github_url(url: str) -> bool:
    """Check if a URL is a GitHub URL."""
    return "github.com" in url.lower()
 def cached_download_handler(source, target, env):
    """PlatformIO download handler that uses caching for GitHub URLs.
    This function can be registered as a custom download handler in PlatformIO.
    Args:
        source: Source URL
        target: Target file path
        env: SCons environment
    """
    import shutil
    import urllib.request
    url = str(source[0])
    target_path = Path(str(target[0]))
    # Only cache GitHub URLs
    if not is_github_url(url):
        # Fall back to default download
        print(f"Downloading (no cache): {url}")
        with (
            urllib.request.urlopen(url) as response,
            open(target_path, "wb") as out_file,
        ):
            shutil.copyfileobj(response, out_file)
        return
    # Use cache for GitHub URLs
    cache = GitHubDownloadCache()
    print(f"Downloading with cache: {url}")
    try:
        cached_path = cache.download_with_cache(url, check_updates=True)
        # Copy from cache to target
        shutil.copy2(cached_path, target_path)
        print(f"  Copied to: {target_path}")
    except Exception as e:
        print(f"Cache download failed, using direct download: {e}")
        # Fall back to direct download
        with (
            urllib.request.urlopen(url) as response,
            open(target_path, "wb") as out_file,
        ):
            shutil.copyfileobj(response, out_file)
 def setup_platformio_caching():
    """Setup PlatformIO to use cached downloads.
    This should be called from an extra_scripts file in platformio.ini.
    Example extra_scripts file (e.g., platformio_hooks.py):
        Import("env")
        from script.platformio_download_wrapper import setup_platformio_caching
        setup_platformio_caching()
    """
    try:
        from SCons.Script import DefaultEnvironment
        DefaultEnvironment()
        # Register custom download handler
        # Note: This may not work with all PlatformIO versions
        # as the download mechanism is internal
        print("Note: Direct download interception is not fully supported.")
        print("Please use the cache_platformio_downloads.py script instead.")
    except ImportError:
        print("Warning: SCons not available, cannot setup download caching")
 if __name__ == "__main__":
    # CLI mode - can be used to manually download a URL with caching
    import argparse
    parser = argparse.ArgumentParser(description="Download a URL with caching")
    parser.add_argument("url", help="URL to download")
    parser.add_argument("target", help="Target file path")
    parser.add_argument("--cache-dir", type=Path, help="Cache directory")
    args = parser.parse_args()
    cache = GitHubDownloadCache(args.cache_dir)
    target_path = Path(args.target)
    try:
        if is_github_url(args.url):
            print(f"Downloading with cache: {args.url}")
            cached_path = cache.download_with_cache(args.url)
            # Copy to target
            import shutil
            target_path.parent.mkdir(parents=True, exist_ok=True)
            shutil.copy2(cached_path, target_path)
            print(f"Copied to: {target_path}")
        else:
            print(f"Downloading directly (not a GitHub URL): {args.url}")
            import shutil
            import urllib.request
            target_path.parent.mkdir(parents=True, exist_ok=True)
            with (
                urllib.request.urlopen(args.url) as response,
                open(target_path, "wb") as out_file,
            ):
                shutil.copyfileobj(response, out_file)
        sys.exit(0)
    except Exception as e:
        print(f"Error: {e}", file=sys.stderr)
        sys.exit(1)