From b927cea0d61795ad112c7819a9fa7d35fe704b89 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 14 Oct 2025 16:23:06 -1000 Subject: [PATCH] [git] Automatically recover from broken git repositories in external_components --- esphome/git.py | 59 +++++++++---- tests/unit_tests/test_git.py | 160 +++++++++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+), 15 deletions(-) diff --git a/esphome/git.py b/esphome/git.py index 62fe37a3fe..7f023e7834 100644 --- a/esphome/git.py +++ b/esphome/git.py @@ -5,6 +5,7 @@ import hashlib import logging from pathlib import Path import re +import shutil import subprocess import urllib.parse @@ -55,6 +56,7 @@ def clone_or_update( username: str = None, password: str = None, submodules: list[str] | None = None, + _recover_broken: bool = True, ) -> tuple[Path, Callable[[], None] | None]: key = f"{url}@{ref}" @@ -80,7 +82,7 @@ def clone_or_update( if submodules is not None: _LOGGER.info( - "Initialising submodules (%s) for %s", ", ".join(submodules), key + "Initializing submodules (%s) for %s", ", ".join(submodules), key ) run_git_command( ["git", "submodule", "update", "--init"] + submodules, str(repo_dir) @@ -99,20 +101,47 @@ def clone_or_update( file_timestamp = Path(repo_dir / ".git" / "HEAD") age = datetime.now() - datetime.fromtimestamp(file_timestamp.stat().st_mtime) if refresh is None or age.total_seconds() > refresh.total_seconds: - old_sha = run_git_command(["git", "rev-parse", "HEAD"], str(repo_dir)) - _LOGGER.info("Updating %s", key) - _LOGGER.debug("Location: %s", repo_dir) - # Stash local changes (if any) - run_git_command( - ["git", "stash", "push", "--include-untracked"], str(repo_dir) - ) - # Fetch remote ref - cmd = ["git", "fetch", "--", "origin"] - if ref is not None: - cmd.append(ref) - run_git_command(cmd, str(repo_dir)) - # Hard reset to FETCH_HEAD (short-lived git ref corresponding to most recent fetch) - run_git_command(["git", "reset", "--hard", "FETCH_HEAD"], str(repo_dir)) + # Try to update the repository, recovering from broken state if needed + old_sha: str | None = None + try: + old_sha = run_git_command(["git", "rev-parse", "HEAD"], str(repo_dir)) + _LOGGER.info("Updating %s", key) + _LOGGER.debug("Location: %s", repo_dir) + # Stash local changes (if any) + run_git_command( + ["git", "stash", "push", "--include-untracked"], str(repo_dir) + ) + # Fetch remote ref + cmd = ["git", "fetch", "--", "origin"] + if ref is not None: + cmd.append(ref) + run_git_command(cmd, str(repo_dir)) + # Hard reset to FETCH_HEAD (short-lived git ref corresponding to most recent fetch) + run_git_command(["git", "reset", "--hard", "FETCH_HEAD"], str(repo_dir)) + except cv.Invalid as err: + # Repository is in a broken state or update failed + # Only attempt recovery once to prevent infinite recursion + if not _recover_broken: + raise + + _LOGGER.warning( + "Repository %s has issues (%s), removing and re-cloning", + key, + err, + ) + shutil.rmtree(repo_dir) + # Recursively call clone_or_update to re-clone + # Set _recover_broken=False to prevent infinite recursion + return clone_or_update( + url=url, + ref=ref, + refresh=refresh, + domain=domain, + username=username, + password=password, + submodules=submodules, + _recover_broken=False, + ) if submodules is not None: _LOGGER.info( diff --git a/tests/unit_tests/test_git.py b/tests/unit_tests/test_git.py index 6a51206ec2..748d384018 100644 --- a/tests/unit_tests/test_git.py +++ b/tests/unit_tests/test_git.py @@ -6,7 +6,10 @@ import os from pathlib import Path from unittest.mock import Mock +import pytest + from esphome import git +import esphome.config_validation as cv from esphome.core import CORE, TimePeriodSeconds @@ -244,3 +247,160 @@ def test_clone_or_update_with_none_refresh_always_updates( if len(call[0]) > 0 and "fetch" in call[0][0] ] assert len(fetch_calls) > 0 + + +@pytest.mark.parametrize( + ("fail_command", "error_message"), + [ + ( + "rev-parse", + "ambiguous argument 'HEAD': unknown revision or path not in the working tree.", + ), + ("stash", "fatal: unable to write new index file"), + ( + "fetch", + "fatal: unable to access 'https://github.com/test/repo/': Could not resolve host", + ), + ("reset", "fatal: Could not reset index file to revision 'FETCH_HEAD'"), + ], +) +def test_clone_or_update_recovers_from_git_failures( + tmp_path: Path, mock_run_git_command: Mock, fail_command: str, error_message: str +) -> None: + """Test that repos are re-cloned when various git commands fail.""" + # Set up CORE.config_path so data_dir uses tmp_path + CORE.config_path = tmp_path / "test.yaml" + + url = "https://github.com/test/repo" + ref = "main" + key = f"{url}@{ref}" + domain = "test" + + h = hashlib.new("sha256") + h.update(key.encode()) + repo_dir = tmp_path / ".esphome" / domain / h.hexdigest()[:8] + + # Create repo directory + repo_dir.mkdir(parents=True) + git_dir = repo_dir / ".git" + git_dir.mkdir() + + fetch_head = git_dir / "FETCH_HEAD" + fetch_head.write_text("test") + old_time = datetime.now() - timedelta(days=2) + fetch_head.touch() + os.utime(fetch_head, (old_time.timestamp(), old_time.timestamp())) + + # Track command call counts to make first call fail, subsequent calls succeed + call_counts: dict[str, int] = {} + + def git_command_side_effect(cmd: list[str], cwd: str | None = None) -> str: + # Determine which command this is + cmd_type = None + if "rev-parse" in cmd: + cmd_type = "rev-parse" + elif "stash" in cmd: + cmd_type = "stash" + elif "fetch" in cmd: + cmd_type = "fetch" + elif "reset" in cmd: + cmd_type = "reset" + elif "clone" in cmd: + cmd_type = "clone" + + # Track call count for this command type + if cmd_type: + call_counts[cmd_type] = call_counts.get(cmd_type, 0) + 1 + + # Fail on first call to the specified command, succeed on subsequent calls + if cmd_type == fail_command and call_counts[cmd_type] == 1: + raise cv.Invalid(error_message) + + # Default successful responses + if cmd_type == "rev-parse": + return "abc123" + return "" + + mock_run_git_command.side_effect = git_command_side_effect + + refresh = TimePeriodSeconds(days=1) + result_dir, revert = git.clone_or_update( + url=url, + ref=ref, + refresh=refresh, + domain=domain, + ) + + # Verify recovery happened + call_list = mock_run_git_command.call_args_list + + # Should have attempted the failing command + assert any(fail_command in str(c) for c in call_list) + + # Should have called clone for recovery + assert any("clone" in str(c) for c in call_list) + + # Verify the repo directory path is returned + assert result_dir == repo_dir + + +def test_clone_or_update_fails_when_recovery_also_fails( + tmp_path: Path, mock_run_git_command: Mock +) -> None: + """Test that we don't infinitely recurse when recovery also fails.""" + # Set up CORE.config_path so data_dir uses tmp_path + CORE.config_path = tmp_path / "test.yaml" + + url = "https://github.com/test/repo" + ref = "main" + key = f"{url}@{ref}" + domain = "test" + + h = hashlib.new("sha256") + h.update(key.encode()) + repo_dir = tmp_path / ".esphome" / domain / h.hexdigest()[:8] + + # Create repo directory + repo_dir.mkdir(parents=True) + git_dir = repo_dir / ".git" + git_dir.mkdir() + + fetch_head = git_dir / "FETCH_HEAD" + fetch_head.write_text("test") + old_time = datetime.now() - timedelta(days=2) + fetch_head.touch() + os.utime(fetch_head, (old_time.timestamp(), old_time.timestamp())) + + # Mock git command to fail on clone (simulating network failure during recovery) + def git_command_side_effect(cmd: list[str], cwd: str | None = None) -> str: + if "rev-parse" in cmd: + # First time fails (broken repo) + raise cv.Invalid( + "ambiguous argument 'HEAD': unknown revision or path not in the working tree." + ) + if "clone" in cmd: + # Clone also fails (recovery fails) + raise cv.Invalid("fatal: unable to access repository") + return "" + + mock_run_git_command.side_effect = git_command_side_effect + + refresh = TimePeriodSeconds(days=1) + + # Should raise after one recovery attempt fails + with pytest.raises(cv.Invalid, match="fatal: unable to access repository"): + git.clone_or_update( + url=url, + ref=ref, + refresh=refresh, + domain=domain, + ) + + # Verify we only tried to clone once (no infinite recursion) + call_list = mock_run_git_command.call_args_list + clone_calls = [c for c in call_list if "clone" in c[0][0]] + # Should have exactly one clone call (the recovery attempt that failed) + assert len(clone_calls) == 1 + # Should have tried rev-parse once (which failed and triggered recovery) + rev_parse_calls = [c for c in call_list if "rev-parse" in c[0][0]] + assert len(rev_parse_calls) == 1