tweak

wip
2025-11-05 09:31:54 +00:00 · 2025-10-19 14:56:40 -10:00 · 2025-10-19 14:54:31 -10:00 · 2025-10-19 14:47:19 -10:00 · 2025-10-19 14:46:25 -10:00 · 2025-10-19 14:44:04 -10:00
6 changed files with 1032 additions and 21 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -432,6 +432,21 @@ jobs:
        with:
          python-version: ${{ env.DEFAULT_PYTHON }}
          cache-key: ${{ needs.common.outputs.cache-key }}
+
+      - name: Cache platformio
+        if: github.ref == 'refs/heads/dev'
+        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
+        with:
+          path: ~/.platformio
+          key: platformio-test-${{ hashFiles('platformio.ini') }}
+
+      - name: Cache platformio
+        if: github.ref != 'refs/heads/dev'
+        uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
+        with:
+          path: ~/.platformio
+          key: platformio-test-${{ hashFiles('platformio.ini') }}
+
      - name: Validate and compile components with intelligent grouping
        run: |
          . venv/bin/activate
--- a/esphome/github_cache.py
+++ b/esphome/github_cache.py
@@ -0,0 +1,362 @@
+"""GitHub download cache for ESPHome.
+
+This module provides caching functionality for GitHub release downloads
+to avoid redundant network I/O when switching between platforms.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+from pathlib import Path
+import shutil
+import time
+import urllib.error
+import urllib.request
+
+_LOGGER = logging.getLogger(__name__)
+
+
+class GitHubCache:
+    """Manages caching of GitHub release downloads."""
+
+    # Cache expiration time in seconds (30 days)
+    CACHE_EXPIRATION_SECONDS = 30 * 24 * 60 * 60
+
+    def __init__(self, cache_dir: Path | None = None):
+        """Initialize the cache manager.
+
+        Args:
+            cache_dir: Directory to store cached files.
+                      Defaults to ~/.esphome_cache/github
+        """
+        if cache_dir is None:
+            cache_dir = Path.home() / ".esphome_cache" / "github"
+        self.cache_dir = cache_dir
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self.metadata_file = self.cache_dir / "cache_metadata.json"
+        # Prune old files on initialization
+        try:
+            self._prune_old_files()
+        except Exception as e:
+            _LOGGER.debug("Failed to prune old cache files: %s", e)
+
+    def _load_metadata(self) -> dict:
+        """Load cache metadata from disk."""
+        if self.metadata_file.exists():
+            try:
+                with open(self.metadata_file) as f:
+                    return json.load(f)
+            except (OSError, ValueError, json.JSONDecodeError):
+                return {}
+        return {}
+
+    def _save_metadata(self, metadata: dict) -> None:
+        """Save cache metadata to disk."""
+        try:
+            with open(self.metadata_file, "w") as f:
+                json.dump(metadata, f, indent=2)
+        except OSError as e:
+            _LOGGER.debug("Failed to save cache metadata: %s", e)
+
+    @staticmethod
+    def is_github_url(url: str) -> bool:
+        """Check if URL is a GitHub release download."""
+        return "github.com" in url.lower() and url.endswith(".zip")
+
+    def _get_cache_key(self, url: str) -> str:
+        """Get cache key (hash) for a URL."""
+        return hashlib.sha256(url.encode()).hexdigest()
+
+    def _get_cache_path(self, url: str) -> Path:
+        """Get cache file path for a URL."""
+        cache_key = self._get_cache_key(url)
+        ext = Path(url.split("?")[0]).suffix
+        return self.cache_dir / f"{cache_key}{ext}"
+
+    def _check_if_modified(
+        self,
+        url: str,
+        last_modified: str | None = None,
+        etag: str | None = None,
+    ) -> bool:
+        """Check if a URL has been modified using HTTP 304.
+
+        Args:
+            url: URL to check
+            last_modified: Last-Modified header from previous response
+            etag: ETag header from previous response
+
+        Returns:
+            True if modified, False if not modified (or offline/unreachable)
+        """
+        if not last_modified and not etag:
+            # No cache headers available, assume modified
+            return True
+
+        try:
+            request = urllib.request.Request(url)
+            request.get_method = lambda: "HEAD"
+
+            if last_modified:
+                request.add_header("If-Modified-Since", last_modified)
+            if etag:
+                request.add_header("If-None-Match", etag)
+
+            try:
+                urllib.request.urlopen(request, timeout=10)
+                # 200 OK = file was modified
+                return True
+            except urllib.error.HTTPError as e:
+                if e.code == 304:
+                    # Not modified
+                    _LOGGER.debug("File not modified (HTTP 304): %s", url)
+                    return False
+                # Other errors, assume modified to be safe
+                return True
+        except (OSError, urllib.error.URLError):
+            # If check fails (offline/network error), assume not modified (use cache)
+            _LOGGER.info("Cannot reach server (offline?), using cached file: %s", url)
+            return False
+
+    def get_cached_path(self, url: str, check_updates: bool = True) -> Path | None:
+        """Get path to cached file if available and valid.
+
+        Args:
+            url: URL to check
+            check_updates: Whether to check for updates using HTTP 304
+
+        Returns:
+            Path to cached file if valid, None if needs download
+        """
+        if not self.is_github_url(url):
+            return None
+
+        cache_path = self._get_cache_path(url)
+        if not cache_path.exists():
+            return None
+
+        # Load metadata
+        metadata = self._load_metadata()
+        cache_key = self._get_cache_key(url)
+
+        # Check if file should be re-downloaded
+        should_redownload = False
+        if check_updates and cache_key in metadata:
+            last_modified = metadata[cache_key].get("last_modified")
+            etag = metadata[cache_key].get("etag")
+            if self._check_if_modified(url, last_modified, etag):
+                # File was modified, need to re-download
+                _LOGGER.debug("Cached file is outdated: %s", url)
+                should_redownload = True
+
+        if should_redownload:
+            return None
+
+        # File is valid, update cached_at timestamp to keep it fresh
+        if cache_key in metadata:
+            metadata[cache_key]["cached_at"] = time.time()
+            self._save_metadata(metadata)
+
+        # Log appropriate message
+        if not check_updates:
+            _LOGGER.debug("Using cached file (no update check): %s", url)
+        elif cache_key not in metadata:
+            _LOGGER.debug("Using cached file (no metadata): %s", url)
+        else:
+            _LOGGER.debug("Using cached file: %s", url)
+
+        return cache_path
+
+    def save_to_cache(self, url: str, source_path: Path) -> None:
+        """Save a downloaded file to cache.
+
+        Args:
+            url: URL the file was downloaded from
+            source_path: Path to the downloaded file
+        """
+        if not self.is_github_url(url):
+            return
+
+        try:
+            cache_path = self._get_cache_path(url)
+            # Only copy if source and destination are different
+            if source_path.resolve() != cache_path.resolve():
+                shutil.copy2(source_path, cache_path)
+
+            # Try to get HTTP headers for caching
+            last_modified = None
+            etag = None
+            try:
+                request = urllib.request.Request(url)
+                request.get_method = lambda: "HEAD"
+                response = urllib.request.urlopen(request, timeout=10)
+                last_modified = response.headers.get("Last-Modified")
+                etag = response.headers.get("ETag")
+            except (OSError, urllib.error.URLError):
+                pass
+
+            # Update metadata
+            metadata = self._load_metadata()
+            cache_key = self._get_cache_key(url)
+
+            metadata[cache_key] = {
+                "url": url,
+                "size": cache_path.stat().st_size,
+                "cached_at": time.time(),
+                "last_modified": last_modified,
+                "etag": etag,
+            }
+            self._save_metadata(metadata)
+
+            _LOGGER.debug("Saved to cache: %s", url)
+
+        except OSError as e:
+            _LOGGER.debug("Failed to save to cache: %s", e)
+
+    def copy_from_cache(self, url: str, destination: Path) -> bool:
+        """Copy a cached file to destination.
+
+        Args:
+            url: URL of the cached file
+            destination: Where to copy the file
+
+        Returns:
+            True if successful, False otherwise
+        """
+        cached_path = self.get_cached_path(url, check_updates=True)
+        if not cached_path:
+            return False
+
+        try:
+            shutil.copy2(cached_path, destination)
+            _LOGGER.info("Using cached download for %s", url)
+            return True
+        except OSError as e:
+            _LOGGER.warning("Failed to use cache: %s", e)
+            return False
+
+    def cache_size(self) -> int:
+        """Get total size of cached files in bytes."""
+        total = 0
+        try:
+            for file_path in self.cache_dir.glob("*"):
+                if file_path.is_file() and file_path != self.metadata_file:
+                    total += file_path.stat().st_size
+        except OSError:
+            pass
+        return total
+
+    def list_cached(self) -> list[dict]:
+        """List all cached files with metadata."""
+        cached_files = []
+        metadata = self._load_metadata()
+
+        for cache_key, meta in metadata.items():
+            cache_path = (
+                self.cache_dir / f"{cache_key}{Path(meta['url'].split('?')[0]).suffix}"
+            )
+            if cache_path.exists():
+                cached_files.append(
+                    {
+                        "url": meta["url"],
+                        "path": cache_path,
+                        "size": meta["size"],
+                        "cached_at": meta.get("cached_at"),
+                        "last_modified": meta.get("last_modified"),
+                        "etag": meta.get("etag"),
+                    }
+                )
+
+        return cached_files
+
+    def clear_cache(self) -> None:
+        """Clear all cached files."""
+        try:
+            for file_path in self.cache_dir.glob("*"):
+                if file_path.is_file():
+                    file_path.unlink()
+            _LOGGER.info("Cache cleared: %s", self.cache_dir)
+        except OSError as e:
+            _LOGGER.warning("Failed to clear cache: %s", e)
+
+    def _prune_old_files(self) -> None:
+        """Remove cache files older than CACHE_EXPIRATION_SECONDS."""
+        current_time = time.time()
+        metadata = self._load_metadata()
+        removed_count = 0
+        removed_size = 0
+
+        # Check each file in metadata
+        for cache_key, meta in list(metadata.items()):
+            cached_at = meta.get("cached_at", 0)
+            age_seconds = current_time - cached_at
+
+            if age_seconds > self.CACHE_EXPIRATION_SECONDS:
+                # File is too old, remove it
+                cache_path = (
+                    self.cache_dir
+                    / f"{cache_key}{Path(meta['url'].split('?')[0]).suffix}"
+                )
+                if cache_path.exists():
+                    file_size = cache_path.stat().st_size
+                    cache_path.unlink()
+                    removed_size += file_size
+                    removed_count += 1
+                    _LOGGER.debug(
+                        "Pruned old cache file (age: %.1f days): %s",
+                        age_seconds / (24 * 60 * 60),
+                        meta["url"],
+                    )
+
+                # Remove from metadata
+                del metadata[cache_key]
+
+        # Also check for orphaned files (files without metadata)
+        for file_path in self.cache_dir.glob("*.zip"):
+            if file_path == self.metadata_file:
+                continue
+
+            # Check if file is in metadata
+            found_in_metadata = False
+            for cache_key in metadata:
+                if file_path.name.startswith(cache_key):
+                    found_in_metadata = True
+                    break
+
+            if not found_in_metadata:
+                # Orphaned file - check age by modification time
+                file_age = current_time - file_path.stat().st_mtime
+                if file_age > self.CACHE_EXPIRATION_SECONDS:
+                    file_size = file_path.stat().st_size
+                    file_path.unlink()
+                    removed_size += file_size
+                    removed_count += 1
+                    _LOGGER.debug(
+                        "Pruned orphaned cache file (age: %.1f days): %s",
+                        file_age / (24 * 60 * 60),
+                        file_path.name,
+                    )
+
+        # Save updated metadata if anything was removed
+        if removed_count > 0:
+            self._save_metadata(metadata)
+            removed_mb = removed_size / (1024 * 1024)
+            _LOGGER.info(
+                "Pruned %d old cache file(s), freed %.2f MB",
+                removed_count,
+                removed_mb,
+            )
+
+
+# Global cache instance
+_cache: GitHubCache | None = None
+
+
+def get_cache() -> GitHubCache:
+    """Get the global GitHub cache instance."""
+    global _cache  # noqa: PLW0603
+    if _cache is None:
+        _cache = GitHubCache()
+    return _cache
--- a/esphome/platformio_api.py
+++ b/esphome/platformio_api.py
@@ -5,7 +5,6 @@ import os
 from pathlib import Path
 import re
 import subprocess
-from typing import Any

 from esphome.const import CONF_COMPILE_PROCESS_LIMIT, CONF_ESPHOME, KEY_CORE
 from esphome.core import CORE, EsphomeError
@@ -44,15 +43,101 @@ def patch_structhash():


 def patch_file_downloader():
-    """Patch PlatformIO's FileDownloader to retry on PackageException errors."""
+    """Patch PlatformIO's FileDownloader to add caching and retry on PackageException errors.
+
+    This function attempts to patch PlatformIO's internal download mechanism.
+    If patching fails (due to API changes), it gracefully falls back to no caching.
+    """
+    try:
        from platformio.package.download import FileDownloader
        from platformio.package.exception import PackageException
+    except ImportError as e:
+        _LOGGER.debug("Could not import PlatformIO modules for patching: %s", e)
+        return

+    # Import our cache module
+    from esphome.github_cache import GitHubCache
+
+    _LOGGER.debug("Applying GitHub download cache patch...")
+
+    # Verify the classes have the expected methods before patching
+    if not hasattr(FileDownloader, "__init__") or not hasattr(FileDownloader, "start"):
+        _LOGGER.warning(
+            "PlatformIO FileDownloader API has changed, skipping cache patch"
+        )
+        return
+
+    try:
        original_init = FileDownloader.__init__
+        original_start = FileDownloader.start

-    def patched_init(self, *args: Any, **kwargs: Any) -> None:
+        # Initialize cache in .platformio directory so it benefits from GitHub Actions cache
+        platformio_dir = Path.home() / ".platformio"
+        cache_dir = platformio_dir / "esphome_download_cache"
+        cache_dir_existed = cache_dir.exists()
+        cache = GitHubCache(cache_dir=cache_dir)
+        if not cache_dir_existed:
+            _LOGGER.info("Created GitHub download cache at: %s", cache.cache_dir)
+    except Exception as e:
+        _LOGGER.warning("Failed to initialize GitHub download cache: %s", e)
+        return
+
+    def patched_init(self, *args, **kwargs):
+        """Patched init that checks cache before making HTTP connection."""
+        try:
+            # Extract URL from args (first positional argument)
+            url = args[0] if args else kwargs.get("url")
+            dest_dir = args[1] if len(args) > 1 else kwargs.get("dest_dir")
+
+            # Debug: Log all downloads
+            _LOGGER.debug("[GitHub Cache] Download request for: %s", url)
+
+            # Store URL for later use (original FileDownloader doesn't store it)
+            self._esphome_cache_url = url if cache.is_github_url(url) else None
+
+            # Check cache for GitHub URLs BEFORE making HTTP request
+            if self._esphome_cache_url:
+                _LOGGER.debug("[GitHub Cache] This is a GitHub URL, checking cache...")
+                self._esphome_use_cache = cache.get_cached_path(url, check_updates=True)
+                if self._esphome_use_cache:
+                    _LOGGER.info(
+                        "Found %s in cache, will restore instead of downloading",
+                        Path(url.split("?")[0]).name,
+                    )
+                    _LOGGER.debug(
+                        "[GitHub Cache] Found in cache: %s", self._esphome_use_cache
+                    )
+                else:
+                    _LOGGER.debug(
+                        "[GitHub Cache] Not in cache, will download and cache"
+                    )
+            else:
+                self._esphome_use_cache = None
+                if url and str(url).startswith("http"):
+                    _LOGGER.debug("[GitHub Cache] Not a GitHub URL, skipping cache")
+
+            # Only make HTTP connection if we don't have cached file
+            if self._esphome_use_cache:
+                # Skip HTTP connection, we'll handle this in start()
+                # Set minimal attributes to satisfy FileDownloader
+                # Create a mock session that can be safely closed in __del__
+                class MockSession:
+                    def close(self):
+                        pass
+
+                self._http_session = MockSession()
+                self._http_response = None
+                self._fname = Path(url.split("?")[0]).name
+                self._destination = self._fname
+                if dest_dir:
+                    from os.path import join
+
+                    self._destination = join(dest_dir, self._fname)
+                # Note: Actual restoration logged in patched_start
+                return None  # Don't call original_init
+
+            # Normal initialization with retry logic
            max_retries = 3
-
            for attempt in range(max_retries):
                try:
                    return original_init(self, *args, **kwargs)
@@ -68,8 +153,58 @@ def patch_file_downloader():
                        # Final attempt - re-raise
                        raise
            return None
+        except Exception as e:
+            # If anything goes wrong in our cache logic, fall back to normal download
+            _LOGGER.debug("Cache check failed, falling back to normal download: %s", e)
+            self._esphome_cache_url = None
+            self._esphome_use_cache = None
+            return original_init(self, *args, **kwargs)

+    def patched_start(self, *args, **kwargs):
+        """Patched start that uses cache when available."""
+        try:
+            import shutil
+
+            # Get the cache URL and path that were set in __init__
+            cache_url = getattr(self, "_esphome_cache_url", None)
+            cached_file = getattr(self, "_esphome_use_cache", None)
+
+            # If we're using cache, copy file instead of downloading
+            if cached_file:
+                try:
+                    shutil.copy2(cached_file, self._destination)
+                    _LOGGER.info(
+                        "Restored %s from cache (avoided download)",
+                        Path(cached_file).name,
+                    )
+                    return True
+                except OSError as e:
+                    _LOGGER.warning("Failed to copy from cache: %s", e)
+                    # Fall through to re-download
+
+            # Perform normal download
+            result = original_start(self, *args, **kwargs)
+
+            # Save to cache if it was a GitHub URL
+            if cache_url:
+                try:
+                    cache.save_to_cache(cache_url, Path(self._destination))
+                except OSError as e:
+                    _LOGGER.debug("Failed to save to cache: %s", e)
+
+            return result
+        except Exception as e:
+            # If anything goes wrong, fall back to normal download
+            _LOGGER.debug("Cache restoration failed, using normal download: %s", e)
+            return original_start(self, *args, **kwargs)
+
+    # Apply the patches
+    try:
        FileDownloader.__init__ = patched_init
+        FileDownloader.start = patched_start
+        _LOGGER.debug("GitHub download cache patch applied successfully")
+    except Exception as e:
+        _LOGGER.warning("Failed to apply GitHub download cache patch: %s", e)


 IGNORE_LIB_WARNINGS = f"(?:{'|'.join(['Hash', 'Update'])})"
@@ -87,6 +222,8 @@ FILTER_PLATFORMIO_LINES = [
    r"Memory Usage -> https://bit.ly/pio-memory-usage",
    r"Found: https://platformio.org/lib/show/.*",
    r"Using cache: .*",
+    # Don't filter our cache messages - let users see when cache is being used
+    # r"Using cached download for .*",
    r"Installing dependencies",
    r"Library Manager: Already installed, built-in library",
    r"Building in .* mode",
--- a/script/cache_platformio_downloads.py
+++ b/script/cache_platformio_downloads.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+"""
+Pre-cache PlatformIO GitHub Downloads
+
+This script extracts GitHub URLs from platformio.ini and pre-caches them
+to avoid redundant downloads when switching between ESP8266 and ESP32 builds.
+
+Usage:
+    python3 script/cache_platformio_downloads.py [platformio.ini]
+"""
+
+import argparse
+import configparser
+from pathlib import Path
+import re
+import sys
+
+# Import the cache manager
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from esphome.github_cache import GitHubCache
+
+
+def extract_github_urls(platformio_ini: Path) -> list[str]:
+    """Extract all GitHub URLs from platformio.ini.
+
+    Args:
+        platformio_ini: Path to platformio.ini file
+
+    Returns:
+        List of GitHub URLs found
+    """
+    config = configparser.ConfigParser(inline_comment_prefixes=(";",))
+    config.read(platformio_ini)
+
+    urls = []
+    github_pattern = re.compile(r"https://github\.com/[^\s;]+\.zip")
+
+    for section in config.sections():
+        conf = config[section]
+
+        # Check platform
+        if "platform" in conf:
+            platform_value = conf["platform"]
+            matches = github_pattern.findall(platform_value)
+            urls.extend(matches)
+
+        # Check platform_packages
+        if "platform_packages" in conf:
+            for line in conf["platform_packages"].splitlines():
+                line = line.strip()
+                if not line or line.startswith("#"):
+                    continue
+                matches = github_pattern.findall(line)
+                urls.extend(matches)
+
+    # Remove duplicates while preserving order using dict
+    return list(dict.fromkeys(urls))
+
+
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(
+        description="Pre-cache PlatformIO GitHub downloads",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+This script scans platformio.ini for GitHub URLs and pre-caches them.
+This avoids redundant downloads when switching between platforms (e.g., ESP8266 and ESP32).
+
+Examples:
+  # Cache downloads from default platformio.ini
+  %(prog)s
+
+  # Cache downloads from specific file
+  %(prog)s custom_platformio.ini
+
+  # Show what would be cached without downloading
+  %(prog)s --dry-run
+        """,
+    )
+
+    parser.add_argument(
+        "platformio_ini",
+        nargs="?",
+        default="platformio.ini",
+        help="Path to platformio.ini (default: platformio.ini)",
+    )
+
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Show what would be cached without downloading",
+    )
+
+    parser.add_argument(
+        "--cache-dir",
+        type=Path,
+        help="Cache directory (default: ~/.platformio/esphome_download_cache)",
+    )
+
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Force re-download even if cached",
+    )
+
+    args = parser.parse_args()
+
+    platformio_ini = Path(args.platformio_ini)
+
+    if not platformio_ini.exists():
+        print(f"Error: {platformio_ini} not found", file=sys.stderr)
+        return 1
+
+    # Extract URLs
+    print(f"Scanning {platformio_ini} for GitHub URLs...")
+    urls = extract_github_urls(platformio_ini)
+
+    if not urls:
+        print("No GitHub URLs found in platformio.ini")
+        return 0
+
+    print(f"Found {len(urls)} unique GitHub URL(s):")
+    for url in urls:
+        print(f"  - {url}")
+    print()
+
+    if args.dry_run:
+        print("Dry run - not downloading")
+        return 0
+
+    # Initialize cache (use PlatformIO directory by default)
+    cache_dir = args.cache_dir
+    if cache_dir is None:
+        cache_dir = Path.home() / ".platformio" / "esphome_download_cache"
+    cache = GitHubCache(cache_dir)
+
+    # Cache each URL
+    success_count = 0
+    for i, url in enumerate(urls, 1):
+        print(f"[{i}/{len(urls)}] Checking {url}")
+        try:
+            # Use the download_with_progress from github_download_cache CLI
+            from script.github_download_cache import download_with_progress
+
+            download_with_progress(cache, url, force=args.force, check_updates=True)
+            success_count += 1
+            print()
+        except Exception as e:
+            print(f"Error caching {url}: {e}", file=sys.stderr)
+            print()
+
+    # Show cache stats
+    total_size = cache.cache_size()
+    size_mb = total_size / (1024 * 1024)
+    print("\nCache summary:")
+    print(f"  Successfully cached: {success_count}/{len(urls)}")
+    print(f"  Total cache size: {size_mb:.2f} MB")
+    print(f"  Cache location: {cache.cache_dir}")
+
+    return 0 if success_count == len(urls) else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/script/github_download_cache.py
+++ b/script/github_download_cache.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python3
+"""
+GitHub Download Cache CLI
+
+This script provides a command-line interface to the GitHub download cache.
+The actual caching logic is in esphome/github_cache.py.
+
+Usage:
+    python3 script/github_download_cache.py download URL
+    python3 script/github_download_cache.py list
+    python3 script/github_download_cache.py stats
+    python3 script/github_download_cache.py clear
+"""
+
+import argparse
+from pathlib import Path
+import sys
+import urllib.request
+
+# Add parent directory to path to import esphome modules
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from esphome.github_cache import GitHubCache
+
+
+def download_with_progress(
+    cache: GitHubCache, url: str, force: bool = False, check_updates: bool = True
+) -> Path:
+    """Download a URL with progress indicator and caching.
+
+    Args:
+        cache: GitHubCache instance
+        url: URL to download
+        force: Force re-download even if cached
+        check_updates: Check for updates using HTTP 304
+
+    Returns:
+        Path to cached file
+    """
+    # If force, skip cache check
+    if not force:
+        cached_path = cache.get_cached_path(url, check_updates=check_updates)
+        if cached_path:
+            print(f"Using cached file for {url}")
+            print(f"  Cache: {cached_path}")
+            return cached_path
+
+    # Need to download
+    print(f"Downloading {url}")
+    cache_path = cache._get_cache_path(url)
+    print(f"  Cache: {cache_path}")
+
+    # Download with progress
+    temp_path = cache_path.with_suffix(cache_path.suffix + ".tmp")
+
+    try:
+        with urllib.request.urlopen(url) as response:
+            total_size = int(response.headers.get("Content-Length", 0))
+            downloaded = 0
+
+            with open(temp_path, "wb") as f:
+                while True:
+                    chunk = response.read(8192)
+                    if not chunk:
+                        break
+                    f.write(chunk)
+                    downloaded += len(chunk)
+
+                    if total_size > 0:
+                        percent = (downloaded / total_size) * 100
+                        print(f"\r  Progress: {percent:.1f}%", end="", flush=True)
+
+            print()  # New line after progress
+
+        # Move to final location
+        temp_path.replace(cache_path)
+
+        # Let cache handle metadata
+        cache.save_to_cache(url, cache_path)
+
+        return cache_path
+
+    except (OSError, urllib.error.URLError) as e:
+        if temp_path.exists():
+            temp_path.unlink()
+        raise RuntimeError(f"Failed to download {url}: {e}") from e
+
+
+def main():
+    """CLI entry point."""
+    parser = argparse.ArgumentParser(
+        description="GitHub Download Cache Manager",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Download and cache a URL
+  %(prog)s download https://github.com/pioarduino/registry/releases/download/0.0.1/esptoolpy-v5.1.0.zip
+
+  # List cached files
+  %(prog)s list
+
+  # Show cache statistics
+  %(prog)s stats
+
+  # Clear cache
+  %(prog)s clear
+        """,
+    )
+
+    parser.add_argument(
+        "--cache-dir",
+        type=Path,
+        help="Cache directory (default: ~/.platformio/esphome_download_cache)",
+    )
+
+    subparsers = parser.add_subparsers(dest="command", help="Command to execute")
+
+    # Download command
+    download_parser = subparsers.add_parser("download", help="Download and cache a URL")
+    download_parser.add_argument("url", help="URL to download")
+    download_parser.add_argument(
+        "--force", action="store_true", help="Force re-download even if cached"
+    )
+    download_parser.add_argument(
+        "--no-check-updates",
+        action="store_true",
+        help="Skip checking for updates (don't use HTTP 304)",
+    )
+
+    # List command
+    subparsers.add_parser("list", help="List cached files")
+
+    # Stats command
+    subparsers.add_parser("stats", help="Show cache statistics")
+
+    # Clear command
+    subparsers.add_parser("clear", help="Clear all cached files")
+
+    args = parser.parse_args()
+
+    if not args.command:
+        parser.print_help()
+        return 1
+
+    # Use PlatformIO cache directory by default
+    if args.cache_dir is None:
+        args.cache_dir = Path.home() / ".platformio" / "esphome_download_cache"
+
+    cache = GitHubCache(args.cache_dir)
+
+    if args.command == "download":
+        try:
+            check_updates = not args.no_check_updates
+            cache_path = download_with_progress(
+                cache, args.url, force=args.force, check_updates=check_updates
+            )
+            print(f"\nCached at: {cache_path}")
+            return 0
+        except Exception as e:
+            print(f"Error: {e}", file=sys.stderr)
+            return 1
+
+    elif args.command == "list":
+        cached = cache.list_cached()
+        if not cached:
+            print("No cached files")
+            return 0
+
+        print(f"Cached files ({len(cached)}):")
+        for item in cached:
+            size_mb = item["size"] / (1024 * 1024)
+            print(f"  {item['url']}")
+            print(f"    Size: {size_mb:.2f} MB")
+            print(f"    Path: {item['path']}")
+        return 0
+
+    elif args.command == "stats":
+        total_size = cache.cache_size()
+        cached_count = len(cache.list_cached())
+        size_mb = total_size / (1024 * 1024)
+
+        print(f"Cache directory: {cache.cache_dir}")
+        print(f"Cached files: {cached_count}")
+        print(f"Total size: {size_mb:.2f} MB")
+        return 0
+
+    elif args.command == "clear":
+        cache.clear_cache()
+        return 0
+
+    return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/script/platformio_download_wrapper.py
+++ b/script/platformio_download_wrapper.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+"""
+PlatformIO Download Wrapper with Caching
+
+This script can be used as a wrapper around PlatformIO downloads to add caching.
+It intercepts download operations and uses the GitHub download cache.
+
+This is designed to be called from PlatformIO's extra_scripts if needed.
+"""
+
+from pathlib import Path
+import sys
+
+# Import the cache manager
+sys.path.insert(0, str(Path(__file__).parent))
+from github_download_cache import GitHubDownloadCache
+
+
+def is_github_url(url: str) -> bool:
+    """Check if a URL is a GitHub URL."""
+    return "github.com" in url.lower()
+
+
+def cached_download_handler(source, target, env):
+    """PlatformIO download handler that uses caching for GitHub URLs.
+
+    This function can be registered as a custom download handler in PlatformIO.
+
+    Args:
+        source: Source URL
+        target: Target file path
+        env: SCons environment
+    """
+    import shutil
+    import urllib.request
+
+    url = str(source[0])
+    target_path = Path(str(target[0]))
+
+    # Only cache GitHub URLs
+    if not is_github_url(url):
+        # Fall back to default download
+        print(f"Downloading (no cache): {url}")
+        with (
+            urllib.request.urlopen(url) as response,
+            open(target_path, "wb") as out_file,
+        ):
+            shutil.copyfileobj(response, out_file)
+        return
+
+    # Use cache for GitHub URLs
+    cache = GitHubDownloadCache()
+    print(f"Downloading with cache: {url}")
+
+    try:
+        cached_path = cache.download_with_cache(url, check_updates=True)
+
+        # Copy from cache to target
+        shutil.copy2(cached_path, target_path)
+        print(f"  Copied to: {target_path}")
+
+    except Exception as e:
+        print(f"Cache download failed, using direct download: {e}")
+        # Fall back to direct download
+        with (
+            urllib.request.urlopen(url) as response,
+            open(target_path, "wb") as out_file,
+        ):
+            shutil.copyfileobj(response, out_file)
+
+
+def setup_platformio_caching():
+    """Setup PlatformIO to use cached downloads.
+
+    This should be called from an extra_scripts file in platformio.ini.
+
+    Example extra_scripts file (e.g., platformio_hooks.py):
+        Import("env")
+        from script.platformio_download_wrapper import setup_platformio_caching
+        setup_platformio_caching()
+    """
+    try:
+        from SCons.Script import DefaultEnvironment
+
+        DefaultEnvironment()
+
+        # Register custom download handler
+        # Note: This may not work with all PlatformIO versions
+        # as the download mechanism is internal
+        print("Note: Direct download interception is not fully supported.")
+        print("Please use the cache_platformio_downloads.py script instead.")
+
+    except ImportError:
+        print("Warning: SCons not available, cannot setup download caching")
+
+
+if __name__ == "__main__":
+    # CLI mode - can be used to manually download a URL with caching
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Download a URL with caching")
+    parser.add_argument("url", help="URL to download")
+    parser.add_argument("target", help="Target file path")
+    parser.add_argument("--cache-dir", type=Path, help="Cache directory")
+
+    args = parser.parse_args()
+
+    cache = GitHubDownloadCache(args.cache_dir)
+    target_path = Path(args.target)
+
+    try:
+        if is_github_url(args.url):
+            print(f"Downloading with cache: {args.url}")
+            cached_path = cache.download_with_cache(args.url)
+
+            # Copy to target
+            import shutil
+
+            target_path.parent.mkdir(parents=True, exist_ok=True)
+            shutil.copy2(cached_path, target_path)
+            print(f"Copied to: {target_path}")
+        else:
+            print(f"Downloading directly (not a GitHub URL): {args.url}")
+            import shutil
+            import urllib.request
+
+            target_path.parent.mkdir(parents=True, exist_ok=True)
+            with (
+                urllib.request.urlopen(args.url) as response,
+                open(target_path, "wb") as out_file,
+            ):
+                shutil.copyfileobj(response, out_file)
+
+        sys.exit(0)
+
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        sys.exit(1)
Author	SHA1	Message	Date
J. Nick Koston	4c9e4d30e9	tweak	2025-10-19 14:56:40 -10:00
J. Nick Koston	db42983f0c	wip	2025-10-19 14:54:31 -10:00
J. Nick Koston	20c65f70ed	wip	2025-10-19 14:47:19 -10:00
J. Nick Koston	38e31e328c	wip	2025-10-19 14:46:25 -10:00
J. Nick Koston	58cecff778	wip	2025-10-19 14:44:04 -10:00
J. Nick Koston	1946656ea8	wip	2025-10-19 14:40:47 -10:00
J. Nick Koston	c9700a0450	wip	2025-10-19 14:35:09 -10:00
J. Nick Koston	0eab64ffe5	cache github downloads	2025-10-19 14:33:26 -10:00