1
0
mirror of https://github.com/esphome/esphome.git synced 2025-10-25 21:23:53 +01:00

[git] Automatically recover from broken git repositories in external_components (#11246)

This commit is contained in:
J. Nick Koston
2025-10-15 16:58:58 -10:00
committed by GitHub
parent 708f8a95e5
commit 42f1b61e31
3 changed files with 594 additions and 62 deletions

View File

@@ -5,6 +5,7 @@ import hashlib
import logging
from pathlib import Path
import re
import shutil
import subprocess
import urllib.parse
@@ -17,14 +18,60 @@ _LOGGER = logging.getLogger(__name__)
NEVER_REFRESH = TimePeriodSeconds(seconds=-1)
def run_git_command(cmd, cwd=None) -> str:
_LOGGER.debug("Running git command: %s", " ".join(cmd))
class GitException(cv.Invalid):
"""Base exception for git-related errors."""
class GitNotInstalledError(GitException):
"""Exception raised when git is not installed on the system."""
class GitCommandError(GitException):
"""Exception raised when a git command fails."""
class GitRepositoryError(GitException):
"""Exception raised when a git repository is in an invalid state."""
def run_git_command(cmd: list[str], git_dir: Path | None = None) -> str:
if git_dir is not None:
_LOGGER.debug(
"Running git command with repository isolation: %s (git_dir=%s)",
" ".join(cmd),
git_dir,
)
else:
_LOGGER.debug("Running git command: %s", " ".join(cmd))
# Set up environment for repository isolation if git_dir is provided
# Force git to only operate on this specific repository by setting
# GIT_DIR and GIT_WORK_TREE. This prevents git from walking up the
# directory tree to find parent repositories when the target repo's
# .git directory is corrupt. Without this, commands like 'git stash'
# could accidentally operate on parent repositories (e.g., the main
# ESPHome repo) instead of failing, causing data loss.
env: dict[str, str] | None = None
cwd: str | None = None
if git_dir is not None:
env = {
**subprocess.os.environ,
"GIT_DIR": str(Path(git_dir) / ".git"),
"GIT_WORK_TREE": str(git_dir),
}
cwd = str(git_dir)
try:
ret = subprocess.run(
cmd, cwd=cwd, capture_output=True, check=False, close_fds=False
cmd,
cwd=cwd,
capture_output=True,
check=False,
close_fds=False,
env=env,
)
except FileNotFoundError as err:
raise cv.Invalid(
raise GitNotInstalledError(
"git is not installed but required for external_components.\n"
"Please see https://git-scm.com/book/en/v2/Getting-Started-Installing-Git for installing git"
) from err
@@ -33,8 +80,8 @@ def run_git_command(cmd, cwd=None) -> str:
err_str = ret.stderr.decode("utf-8")
lines = [x.strip() for x in err_str.splitlines()]
if lines[-1].startswith("fatal:"):
raise cv.Invalid(lines[-1][len("fatal: ") :])
raise cv.Invalid(err_str)
raise GitCommandError(lines[-1][len("fatal: ") :])
raise GitCommandError(err_str)
return ret.stdout.decode("utf-8").strip()
@@ -55,6 +102,7 @@ def clone_or_update(
username: str = None,
password: str = None,
submodules: list[str] | None = None,
_recover_broken: bool = True,
) -> tuple[Path, Callable[[], None] | None]:
key = f"{url}@{ref}"
@@ -75,15 +123,15 @@ def clone_or_update(
# We need to fetch the PR branch first, otherwise git will complain
# about missing objects
_LOGGER.info("Fetching %s", ref)
run_git_command(["git", "fetch", "--", "origin", ref], str(repo_dir))
run_git_command(["git", "reset", "--hard", "FETCH_HEAD"], str(repo_dir))
run_git_command(["git", "fetch", "--", "origin", ref], git_dir=repo_dir)
run_git_command(["git", "reset", "--hard", "FETCH_HEAD"], git_dir=repo_dir)
if submodules is not None:
_LOGGER.info(
"Initialising submodules (%s) for %s", ", ".join(submodules), key
"Initializing submodules (%s) for %s", ", ".join(submodules), key
)
run_git_command(
["git", "submodule", "update", "--init"] + submodules, str(repo_dir)
["git", "submodule", "update", "--init"] + submodules, git_dir=repo_dir
)
else:
@@ -99,32 +147,82 @@ def clone_or_update(
file_timestamp = Path(repo_dir / ".git" / "HEAD")
age = datetime.now() - datetime.fromtimestamp(file_timestamp.stat().st_mtime)
if refresh is None or age.total_seconds() > refresh.total_seconds:
old_sha = run_git_command(["git", "rev-parse", "HEAD"], str(repo_dir))
_LOGGER.info("Updating %s", key)
_LOGGER.debug("Location: %s", repo_dir)
# Stash local changes (if any)
run_git_command(
["git", "stash", "push", "--include-untracked"], str(repo_dir)
)
# Fetch remote ref
cmd = ["git", "fetch", "--", "origin"]
if ref is not None:
cmd.append(ref)
run_git_command(cmd, str(repo_dir))
# Hard reset to FETCH_HEAD (short-lived git ref corresponding to most recent fetch)
run_git_command(["git", "reset", "--hard", "FETCH_HEAD"], str(repo_dir))
# Try to update the repository, recovering from broken state if needed
old_sha: str | None = None
try:
# First verify the repository is valid by checking HEAD
# Use git_dir parameter to prevent git from walking up to parent repos
old_sha = run_git_command(
["git", "rev-parse", "HEAD"], git_dir=repo_dir
)
_LOGGER.info("Updating %s", key)
_LOGGER.debug("Location: %s", repo_dir)
# Stash local changes (if any)
# Use git_dir to ensure this only affects the specific repo
run_git_command(
["git", "stash", "push", "--include-untracked"],
git_dir=repo_dir,
)
# Fetch remote ref
cmd = ["git", "fetch", "--", "origin"]
if ref is not None:
cmd.append(ref)
run_git_command(cmd, git_dir=repo_dir)
# Hard reset to FETCH_HEAD (short-lived git ref corresponding to most recent fetch)
run_git_command(
["git", "reset", "--hard", "FETCH_HEAD"],
git_dir=repo_dir,
)
except GitException as err:
# Repository is in a broken state or update failed
# Only attempt recovery once to prevent infinite recursion
if not _recover_broken:
_LOGGER.error(
"Repository %s recovery failed, cannot retry (already attempted once)",
key,
)
raise
_LOGGER.warning(
"Repository %s has issues (%s), attempting recovery",
key,
err,
)
_LOGGER.info("Removing broken repository at %s", repo_dir)
shutil.rmtree(repo_dir)
_LOGGER.info("Successfully removed broken repository, re-cloning...")
# Recursively call clone_or_update to re-clone
# Set _recover_broken=False to prevent infinite recursion
result = clone_or_update(
url=url,
ref=ref,
refresh=refresh,
domain=domain,
username=username,
password=password,
submodules=submodules,
_recover_broken=False,
)
_LOGGER.info("Repository %s successfully recovered", key)
return result
if submodules is not None:
_LOGGER.info(
"Updating submodules (%s) for %s", ", ".join(submodules), key
)
run_git_command(
["git", "submodule", "update", "--init"] + submodules, str(repo_dir)
["git", "submodule", "update", "--init"] + submodules,
git_dir=repo_dir,
)
def revert():
_LOGGER.info("Reverting changes to %s -> %s", key, old_sha)
run_git_command(["git", "reset", "--hard", old_sha], str(repo_dir))
run_git_command(["git", "reset", "--hard", old_sha], git_dir=repo_dir)
return repo_dir, revert