mirror of
https://github.com/esphome/esphome.git
synced 2025-10-25 21:23:53 +01:00
[git] Automatically recover from broken git repositories in external_components (#11246)
This commit is contained in:
150
esphome/git.py
150
esphome/git.py
@@ -5,6 +5,7 @@ import hashlib
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import urllib.parse
|
||||
|
||||
@@ -17,14 +18,60 @@ _LOGGER = logging.getLogger(__name__)
|
||||
NEVER_REFRESH = TimePeriodSeconds(seconds=-1)
|
||||
|
||||
|
||||
def run_git_command(cmd, cwd=None) -> str:
|
||||
_LOGGER.debug("Running git command: %s", " ".join(cmd))
|
||||
class GitException(cv.Invalid):
|
||||
"""Base exception for git-related errors."""
|
||||
|
||||
|
||||
class GitNotInstalledError(GitException):
|
||||
"""Exception raised when git is not installed on the system."""
|
||||
|
||||
|
||||
class GitCommandError(GitException):
|
||||
"""Exception raised when a git command fails."""
|
||||
|
||||
|
||||
class GitRepositoryError(GitException):
|
||||
"""Exception raised when a git repository is in an invalid state."""
|
||||
|
||||
|
||||
def run_git_command(cmd: list[str], git_dir: Path | None = None) -> str:
|
||||
if git_dir is not None:
|
||||
_LOGGER.debug(
|
||||
"Running git command with repository isolation: %s (git_dir=%s)",
|
||||
" ".join(cmd),
|
||||
git_dir,
|
||||
)
|
||||
else:
|
||||
_LOGGER.debug("Running git command: %s", " ".join(cmd))
|
||||
|
||||
# Set up environment for repository isolation if git_dir is provided
|
||||
# Force git to only operate on this specific repository by setting
|
||||
# GIT_DIR and GIT_WORK_TREE. This prevents git from walking up the
|
||||
# directory tree to find parent repositories when the target repo's
|
||||
# .git directory is corrupt. Without this, commands like 'git stash'
|
||||
# could accidentally operate on parent repositories (e.g., the main
|
||||
# ESPHome repo) instead of failing, causing data loss.
|
||||
env: dict[str, str] | None = None
|
||||
cwd: str | None = None
|
||||
if git_dir is not None:
|
||||
env = {
|
||||
**subprocess.os.environ,
|
||||
"GIT_DIR": str(Path(git_dir) / ".git"),
|
||||
"GIT_WORK_TREE": str(git_dir),
|
||||
}
|
||||
cwd = str(git_dir)
|
||||
|
||||
try:
|
||||
ret = subprocess.run(
|
||||
cmd, cwd=cwd, capture_output=True, check=False, close_fds=False
|
||||
cmd,
|
||||
cwd=cwd,
|
||||
capture_output=True,
|
||||
check=False,
|
||||
close_fds=False,
|
||||
env=env,
|
||||
)
|
||||
except FileNotFoundError as err:
|
||||
raise cv.Invalid(
|
||||
raise GitNotInstalledError(
|
||||
"git is not installed but required for external_components.\n"
|
||||
"Please see https://git-scm.com/book/en/v2/Getting-Started-Installing-Git for installing git"
|
||||
) from err
|
||||
@@ -33,8 +80,8 @@ def run_git_command(cmd, cwd=None) -> str:
|
||||
err_str = ret.stderr.decode("utf-8")
|
||||
lines = [x.strip() for x in err_str.splitlines()]
|
||||
if lines[-1].startswith("fatal:"):
|
||||
raise cv.Invalid(lines[-1][len("fatal: ") :])
|
||||
raise cv.Invalid(err_str)
|
||||
raise GitCommandError(lines[-1][len("fatal: ") :])
|
||||
raise GitCommandError(err_str)
|
||||
|
||||
return ret.stdout.decode("utf-8").strip()
|
||||
|
||||
@@ -55,6 +102,7 @@ def clone_or_update(
|
||||
username: str = None,
|
||||
password: str = None,
|
||||
submodules: list[str] | None = None,
|
||||
_recover_broken: bool = True,
|
||||
) -> tuple[Path, Callable[[], None] | None]:
|
||||
key = f"{url}@{ref}"
|
||||
|
||||
@@ -75,15 +123,15 @@ def clone_or_update(
|
||||
# We need to fetch the PR branch first, otherwise git will complain
|
||||
# about missing objects
|
||||
_LOGGER.info("Fetching %s", ref)
|
||||
run_git_command(["git", "fetch", "--", "origin", ref], str(repo_dir))
|
||||
run_git_command(["git", "reset", "--hard", "FETCH_HEAD"], str(repo_dir))
|
||||
run_git_command(["git", "fetch", "--", "origin", ref], git_dir=repo_dir)
|
||||
run_git_command(["git", "reset", "--hard", "FETCH_HEAD"], git_dir=repo_dir)
|
||||
|
||||
if submodules is not None:
|
||||
_LOGGER.info(
|
||||
"Initialising submodules (%s) for %s", ", ".join(submodules), key
|
||||
"Initializing submodules (%s) for %s", ", ".join(submodules), key
|
||||
)
|
||||
run_git_command(
|
||||
["git", "submodule", "update", "--init"] + submodules, str(repo_dir)
|
||||
["git", "submodule", "update", "--init"] + submodules, git_dir=repo_dir
|
||||
)
|
||||
|
||||
else:
|
||||
@@ -99,32 +147,82 @@ def clone_or_update(
|
||||
file_timestamp = Path(repo_dir / ".git" / "HEAD")
|
||||
age = datetime.now() - datetime.fromtimestamp(file_timestamp.stat().st_mtime)
|
||||
if refresh is None or age.total_seconds() > refresh.total_seconds:
|
||||
old_sha = run_git_command(["git", "rev-parse", "HEAD"], str(repo_dir))
|
||||
_LOGGER.info("Updating %s", key)
|
||||
_LOGGER.debug("Location: %s", repo_dir)
|
||||
# Stash local changes (if any)
|
||||
run_git_command(
|
||||
["git", "stash", "push", "--include-untracked"], str(repo_dir)
|
||||
)
|
||||
# Fetch remote ref
|
||||
cmd = ["git", "fetch", "--", "origin"]
|
||||
if ref is not None:
|
||||
cmd.append(ref)
|
||||
run_git_command(cmd, str(repo_dir))
|
||||
# Hard reset to FETCH_HEAD (short-lived git ref corresponding to most recent fetch)
|
||||
run_git_command(["git", "reset", "--hard", "FETCH_HEAD"], str(repo_dir))
|
||||
# Try to update the repository, recovering from broken state if needed
|
||||
old_sha: str | None = None
|
||||
try:
|
||||
# First verify the repository is valid by checking HEAD
|
||||
# Use git_dir parameter to prevent git from walking up to parent repos
|
||||
old_sha = run_git_command(
|
||||
["git", "rev-parse", "HEAD"], git_dir=repo_dir
|
||||
)
|
||||
|
||||
_LOGGER.info("Updating %s", key)
|
||||
_LOGGER.debug("Location: %s", repo_dir)
|
||||
|
||||
# Stash local changes (if any)
|
||||
# Use git_dir to ensure this only affects the specific repo
|
||||
run_git_command(
|
||||
["git", "stash", "push", "--include-untracked"],
|
||||
git_dir=repo_dir,
|
||||
)
|
||||
|
||||
# Fetch remote ref
|
||||
cmd = ["git", "fetch", "--", "origin"]
|
||||
if ref is not None:
|
||||
cmd.append(ref)
|
||||
run_git_command(cmd, git_dir=repo_dir)
|
||||
|
||||
# Hard reset to FETCH_HEAD (short-lived git ref corresponding to most recent fetch)
|
||||
run_git_command(
|
||||
["git", "reset", "--hard", "FETCH_HEAD"],
|
||||
git_dir=repo_dir,
|
||||
)
|
||||
except GitException as err:
|
||||
# Repository is in a broken state or update failed
|
||||
# Only attempt recovery once to prevent infinite recursion
|
||||
if not _recover_broken:
|
||||
_LOGGER.error(
|
||||
"Repository %s recovery failed, cannot retry (already attempted once)",
|
||||
key,
|
||||
)
|
||||
raise
|
||||
|
||||
_LOGGER.warning(
|
||||
"Repository %s has issues (%s), attempting recovery",
|
||||
key,
|
||||
err,
|
||||
)
|
||||
_LOGGER.info("Removing broken repository at %s", repo_dir)
|
||||
shutil.rmtree(repo_dir)
|
||||
_LOGGER.info("Successfully removed broken repository, re-cloning...")
|
||||
|
||||
# Recursively call clone_or_update to re-clone
|
||||
# Set _recover_broken=False to prevent infinite recursion
|
||||
result = clone_or_update(
|
||||
url=url,
|
||||
ref=ref,
|
||||
refresh=refresh,
|
||||
domain=domain,
|
||||
username=username,
|
||||
password=password,
|
||||
submodules=submodules,
|
||||
_recover_broken=False,
|
||||
)
|
||||
_LOGGER.info("Repository %s successfully recovered", key)
|
||||
return result
|
||||
|
||||
if submodules is not None:
|
||||
_LOGGER.info(
|
||||
"Updating submodules (%s) for %s", ", ".join(submodules), key
|
||||
)
|
||||
run_git_command(
|
||||
["git", "submodule", "update", "--init"] + submodules, str(repo_dir)
|
||||
["git", "submodule", "update", "--init"] + submodules,
|
||||
git_dir=repo_dir,
|
||||
)
|
||||
|
||||
def revert():
|
||||
_LOGGER.info("Reverting changes to %s -> %s", key, old_sha)
|
||||
run_git_command(["git", "reset", "--hard", old_sha], str(repo_dir))
|
||||
run_git_command(["git", "reset", "--hard", old_sha], git_dir=repo_dir)
|
||||
|
||||
return repo_dir, revert
|
||||
|
||||
|
||||
Reference in New Issue
Block a user