1
0
mirror of https://github.com/esphome/esphome.git synced 2025-10-16 00:33:54 +01:00

Merge branch 'recover_broken_git_repo' into integration

This commit is contained in:
J. Nick Koston
2025-10-14 16:24:19 -10:00
2 changed files with 204 additions and 15 deletions

View File

@@ -5,6 +5,7 @@ import hashlib
import logging
from pathlib import Path
import re
import shutil
import subprocess
import urllib.parse
@@ -55,6 +56,7 @@ def clone_or_update(
username: str = None,
password: str = None,
submodules: list[str] | None = None,
_recover_broken: bool = True,
) -> tuple[Path, Callable[[], None] | None]:
key = f"{url}@{ref}"
@@ -80,7 +82,7 @@ def clone_or_update(
if submodules is not None:
_LOGGER.info(
"Initialising submodules (%s) for %s", ", ".join(submodules), key
"Initializing submodules (%s) for %s", ", ".join(submodules), key
)
run_git_command(
["git", "submodule", "update", "--init"] + submodules, str(repo_dir)
@@ -99,20 +101,47 @@ def clone_or_update(
file_timestamp = Path(repo_dir / ".git" / "HEAD")
age = datetime.now() - datetime.fromtimestamp(file_timestamp.stat().st_mtime)
if refresh is None or age.total_seconds() > refresh.total_seconds:
old_sha = run_git_command(["git", "rev-parse", "HEAD"], str(repo_dir))
_LOGGER.info("Updating %s", key)
_LOGGER.debug("Location: %s", repo_dir)
# Stash local changes (if any)
run_git_command(
["git", "stash", "push", "--include-untracked"], str(repo_dir)
)
# Fetch remote ref
cmd = ["git", "fetch", "--", "origin"]
if ref is not None:
cmd.append(ref)
run_git_command(cmd, str(repo_dir))
# Hard reset to FETCH_HEAD (short-lived git ref corresponding to most recent fetch)
run_git_command(["git", "reset", "--hard", "FETCH_HEAD"], str(repo_dir))
# Try to update the repository, recovering from broken state if needed
old_sha: str | None = None
try:
old_sha = run_git_command(["git", "rev-parse", "HEAD"], str(repo_dir))
_LOGGER.info("Updating %s", key)
_LOGGER.debug("Location: %s", repo_dir)
# Stash local changes (if any)
run_git_command(
["git", "stash", "push", "--include-untracked"], str(repo_dir)
)
# Fetch remote ref
cmd = ["git", "fetch", "--", "origin"]
if ref is not None:
cmd.append(ref)
run_git_command(cmd, str(repo_dir))
# Hard reset to FETCH_HEAD (short-lived git ref corresponding to most recent fetch)
run_git_command(["git", "reset", "--hard", "FETCH_HEAD"], str(repo_dir))
except cv.Invalid as err:
# Repository is in a broken state or update failed
# Only attempt recovery once to prevent infinite recursion
if not _recover_broken:
raise
_LOGGER.warning(
"Repository %s has issues (%s), removing and re-cloning",
key,
err,
)
shutil.rmtree(repo_dir)
# Recursively call clone_or_update to re-clone
# Set _recover_broken=False to prevent infinite recursion
return clone_or_update(
url=url,
ref=ref,
refresh=refresh,
domain=domain,
username=username,
password=password,
submodules=submodules,
_recover_broken=False,
)
if submodules is not None:
_LOGGER.info(

View File

@@ -6,7 +6,10 @@ import os
from pathlib import Path
from unittest.mock import Mock
import pytest
from esphome import git
import esphome.config_validation as cv
from esphome.core import CORE, TimePeriodSeconds
@@ -244,3 +247,160 @@ def test_clone_or_update_with_none_refresh_always_updates(
if len(call[0]) > 0 and "fetch" in call[0][0]
]
assert len(fetch_calls) > 0
@pytest.mark.parametrize(
("fail_command", "error_message"),
[
(
"rev-parse",
"ambiguous argument 'HEAD': unknown revision or path not in the working tree.",
),
("stash", "fatal: unable to write new index file"),
(
"fetch",
"fatal: unable to access 'https://github.com/test/repo/': Could not resolve host",
),
("reset", "fatal: Could not reset index file to revision 'FETCH_HEAD'"),
],
)
def test_clone_or_update_recovers_from_git_failures(
tmp_path: Path, mock_run_git_command: Mock, fail_command: str, error_message: str
) -> None:
"""Test that repos are re-cloned when various git commands fail."""
# Set up CORE.config_path so data_dir uses tmp_path
CORE.config_path = tmp_path / "test.yaml"
url = "https://github.com/test/repo"
ref = "main"
key = f"{url}@{ref}"
domain = "test"
h = hashlib.new("sha256")
h.update(key.encode())
repo_dir = tmp_path / ".esphome" / domain / h.hexdigest()[:8]
# Create repo directory
repo_dir.mkdir(parents=True)
git_dir = repo_dir / ".git"
git_dir.mkdir()
fetch_head = git_dir / "FETCH_HEAD"
fetch_head.write_text("test")
old_time = datetime.now() - timedelta(days=2)
fetch_head.touch()
os.utime(fetch_head, (old_time.timestamp(), old_time.timestamp()))
# Track command call counts to make first call fail, subsequent calls succeed
call_counts: dict[str, int] = {}
def git_command_side_effect(cmd: list[str], cwd: str | None = None) -> str:
# Determine which command this is
cmd_type = None
if "rev-parse" in cmd:
cmd_type = "rev-parse"
elif "stash" in cmd:
cmd_type = "stash"
elif "fetch" in cmd:
cmd_type = "fetch"
elif "reset" in cmd:
cmd_type = "reset"
elif "clone" in cmd:
cmd_type = "clone"
# Track call count for this command type
if cmd_type:
call_counts[cmd_type] = call_counts.get(cmd_type, 0) + 1
# Fail on first call to the specified command, succeed on subsequent calls
if cmd_type == fail_command and call_counts[cmd_type] == 1:
raise cv.Invalid(error_message)
# Default successful responses
if cmd_type == "rev-parse":
return "abc123"
return ""
mock_run_git_command.side_effect = git_command_side_effect
refresh = TimePeriodSeconds(days=1)
result_dir, revert = git.clone_or_update(
url=url,
ref=ref,
refresh=refresh,
domain=domain,
)
# Verify recovery happened
call_list = mock_run_git_command.call_args_list
# Should have attempted the failing command
assert any(fail_command in str(c) for c in call_list)
# Should have called clone for recovery
assert any("clone" in str(c) for c in call_list)
# Verify the repo directory path is returned
assert result_dir == repo_dir
def test_clone_or_update_fails_when_recovery_also_fails(
tmp_path: Path, mock_run_git_command: Mock
) -> None:
"""Test that we don't infinitely recurse when recovery also fails."""
# Set up CORE.config_path so data_dir uses tmp_path
CORE.config_path = tmp_path / "test.yaml"
url = "https://github.com/test/repo"
ref = "main"
key = f"{url}@{ref}"
domain = "test"
h = hashlib.new("sha256")
h.update(key.encode())
repo_dir = tmp_path / ".esphome" / domain / h.hexdigest()[:8]
# Create repo directory
repo_dir.mkdir(parents=True)
git_dir = repo_dir / ".git"
git_dir.mkdir()
fetch_head = git_dir / "FETCH_HEAD"
fetch_head.write_text("test")
old_time = datetime.now() - timedelta(days=2)
fetch_head.touch()
os.utime(fetch_head, (old_time.timestamp(), old_time.timestamp()))
# Mock git command to fail on clone (simulating network failure during recovery)
def git_command_side_effect(cmd: list[str], cwd: str | None = None) -> str:
if "rev-parse" in cmd:
# First time fails (broken repo)
raise cv.Invalid(
"ambiguous argument 'HEAD': unknown revision or path not in the working tree."
)
if "clone" in cmd:
# Clone also fails (recovery fails)
raise cv.Invalid("fatal: unable to access repository")
return ""
mock_run_git_command.side_effect = git_command_side_effect
refresh = TimePeriodSeconds(days=1)
# Should raise after one recovery attempt fails
with pytest.raises(cv.Invalid, match="fatal: unable to access repository"):
git.clone_or_update(
url=url,
ref=ref,
refresh=refresh,
domain=domain,
)
# Verify we only tried to clone once (no infinite recursion)
call_list = mock_run_git_command.call_args_list
clone_calls = [c for c in call_list if "clone" in c[0][0]]
# Should have exactly one clone call (the recovery attempt that failed)
assert len(clone_calls) == 1
# Should have tried rev-parse once (which failed and triggered recovery)
rev_parse_calls = [c for c in call_list if "rev-parse" in c[0][0]]
assert len(rev_parse_calls) == 1