1
0
mirror of https://github.com/esphome/esphome.git synced 2025-11-17 07:15:48 +00:00

[ci] Cache component dependency graph for up to 3.4x faster determine-jobs (#11648)

This commit is contained in:
J. Nick Koston
2025-11-03 22:38:57 -06:00
committed by GitHub
parent 980098ca77
commit 060bb4159f
4 changed files with 341 additions and 3 deletions

View File

@@ -192,6 +192,11 @@ jobs:
with: with:
python-version: ${{ env.DEFAULT_PYTHON }} python-version: ${{ env.DEFAULT_PYTHON }}
cache-key: ${{ needs.common.outputs.cache-key }} cache-key: ${{ needs.common.outputs.cache-key }}
- name: Restore components graph cache
uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
with:
path: .temp/components_graph.json
key: components-graph-${{ hashFiles('esphome/components/**/*.py') }}
- name: Determine which tests to run - name: Determine which tests to run
id: determine id: determine
env: env:
@@ -216,6 +221,12 @@ jobs:
echo "cpp-unit-tests-run-all=$(echo "$output" | jq -r '.cpp_unit_tests_run_all')" >> $GITHUB_OUTPUT echo "cpp-unit-tests-run-all=$(echo "$output" | jq -r '.cpp_unit_tests_run_all')" >> $GITHUB_OUTPUT
echo "cpp-unit-tests-components=$(echo "$output" | jq -c '.cpp_unit_tests_components')" >> $GITHUB_OUTPUT echo "cpp-unit-tests-components=$(echo "$output" | jq -c '.cpp_unit_tests_components')" >> $GITHUB_OUTPUT
echo "component-test-batches=$(echo "$output" | jq -c '.component_test_batches')" >> $GITHUB_OUTPUT echo "component-test-batches=$(echo "$output" | jq -c '.component_test_batches')" >> $GITHUB_OUTPUT
- name: Save components graph cache
if: github.ref == 'refs/heads/dev'
uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
with:
path: .temp/components_graph.json
key: components-graph-${{ hashFiles('esphome/components/**/*.py') }}
integration-tests: integration-tests:
name: Run integration tests name: Run integration tests

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
from collections.abc import Callable from collections.abc import Callable
from functools import cache from functools import cache
import hashlib
import json import json
import os import os
import os.path import os.path
@@ -52,6 +53,10 @@ BASE_BUS_COMPONENTS = {
"remote_receiver", "remote_receiver",
} }
# Cache version for components graph
# Increment this when the cache format or graph building logic changes
COMPONENTS_GRAPH_CACHE_VERSION = 1
def parse_list_components_output(output: str) -> list[str]: def parse_list_components_output(output: str) -> list[str]:
"""Parse the output from list-components.py script. """Parse the output from list-components.py script.
@@ -756,20 +761,71 @@ def resolve_auto_load(
return auto_load() return auto_load()
@cache
def get_components_graph_cache_key() -> str:
"""Generate cache key based on all component Python file hashes.
Uses git ls-files with sha1 hashes to generate a stable cache key that works
across different machines and CI runs. This is faster and more reliable than
reading file contents or using modification times.
Returns:
SHA256 hex string uniquely identifying the current component state
"""
# Use git ls-files -s to get sha1 hashes of all component Python files
# Format: <mode> <sha1> <stage> <path>
# This is fast and works consistently across CI and local dev
# We hash all .py files because AUTO_LOAD, DEPENDENCIES, etc. can be defined
# in any Python file, not just __init__.py
cmd = ["git", "ls-files", "-s", "esphome/components/**/*.py"]
result = subprocess.run(
cmd, capture_output=True, text=True, check=True, cwd=root_path, close_fds=False
)
# Hash the git output (includes file paths and their sha1 hashes)
# This changes only when component Python files actually change
hasher = hashlib.sha256()
hasher.update(result.stdout.encode())
return hasher.hexdigest()
def create_components_graph() -> dict[str, list[str]]: def create_components_graph() -> dict[str, list[str]]:
"""Create a graph of component dependencies. """Create a graph of component dependencies (cached).
This function is expensive (5-6 seconds) because it imports all ESPHome components
to extract their DEPENDENCIES and AUTO_LOAD metadata. The result is cached based
on component file modification times, so unchanged components don't trigger a rebuild.
Returns: Returns:
Dictionary mapping parent components to their children (dependencies) Dictionary mapping parent components to their children (dependencies)
""" """
from pathlib import Path # Check cache first - use fixed filename since GitHub Actions cache doesn't support wildcards
cache_file = Path(temp_folder) / "components_graph.json"
if cache_file.exists():
try:
cached_data = json.loads(cache_file.read_text())
except (OSError, json.JSONDecodeError):
# Cache file corrupted or unreadable, rebuild
pass
else:
# Verify cache version matches
if cached_data.get("_version") == COMPONENTS_GRAPH_CACHE_VERSION:
# Verify cache is for current component state
cache_key = get_components_graph_cache_key()
if cached_data.get("_cache_key") == cache_key:
return cached_data.get("graph", {})
# Cache key mismatch - stale cache, rebuild
# Cache version mismatch - incompatible format, rebuild
from esphome import const from esphome import const
from esphome.core import CORE from esphome.core import CORE
from esphome.loader import ComponentManifest, get_component, get_platform from esphome.loader import ComponentManifest, get_component, get_platform
# The root directory of the repo # The root directory of the repo
root = Path(__file__).parent.parent root = Path(root_path)
components_dir = root / ESPHOME_COMPONENTS_PATH components_dir = root / ESPHOME_COMPONENTS_PATH
# Fake some directory so that get_component works # Fake some directory so that get_component works
CORE.config_path = root CORE.config_path = root
@@ -846,6 +902,15 @@ def create_components_graph() -> dict[str, list[str]]:
# restore config # restore config
CORE.data[KEY_CORE] = TARGET_CONFIGURATIONS[0] CORE.data[KEY_CORE] = TARGET_CONFIGURATIONS[0]
# Save to cache with version and cache key for validation
cache_data = {
"_version": COMPONENTS_GRAPH_CACHE_VERSION,
"_cache_key": get_components_graph_cache_key(),
"graph": components_graph,
}
cache_file.parent.mkdir(exist_ok=True)
cache_file.write_text(json.dumps(cache_data))
return components_graph return components_graph

View File

@@ -543,6 +543,7 @@ def test_main_filters_components_without_tests(
with ( with (
patch.object(determine_jobs, "root_path", str(tmp_path)), patch.object(determine_jobs, "root_path", str(tmp_path)),
patch.object(helpers, "root_path", str(tmp_path)), patch.object(helpers, "root_path", str(tmp_path)),
patch.object(helpers, "create_components_graph", return_value={}),
patch("sys.argv", ["determine-jobs.py"]), patch("sys.argv", ["determine-jobs.py"]),
patch.object( patch.object(
determine_jobs, determine_jobs,
@@ -640,6 +641,7 @@ def test_main_detects_components_with_variant_tests(
with ( with (
patch.object(determine_jobs, "root_path", str(tmp_path)), patch.object(determine_jobs, "root_path", str(tmp_path)),
patch.object(helpers, "root_path", str(tmp_path)), patch.object(helpers, "root_path", str(tmp_path)),
patch.object(helpers, "create_components_graph", return_value={}),
patch("sys.argv", ["determine-jobs.py"]), patch("sys.argv", ["determine-jobs.py"]),
patch.object( patch.object(
determine_jobs, determine_jobs,

View File

@@ -1,5 +1,6 @@
"""Unit tests for script/helpers.py module.""" """Unit tests for script/helpers.py module."""
from collections.abc import Generator
import json import json
import os import os
from pathlib import Path from pathlib import Path
@@ -1106,3 +1107,262 @@ def test_get_component_from_path(
"""Test extraction of component names from file paths.""" """Test extraction of component names from file paths."""
result = helpers.get_component_from_path(file_path) result = helpers.get_component_from_path(file_path)
assert result == expected_component assert result == expected_component
# Components graph cache tests
@pytest.fixture
def mock_git_output() -> str:
"""Fixture for mock git ls-files output with realistic component files.
Includes examples of AUTO_LOAD in sensor.py and binary_sensor.py files,
which is why we need to hash all .py files, not just __init__.py.
"""
return (
"100644 abc123... 0 esphome/components/wifi/__init__.py\n"
"100644 def456... 0 esphome/components/api/__init__.py\n"
"100644 ghi789... 0 esphome/components/xiaomi_lywsd03mmc/__init__.py\n"
"100644 jkl012... 0 esphome/components/xiaomi_lywsd03mmc/sensor.py\n"
"100644 mno345... 0 esphome/components/xiaomi_cgpr1/__init__.py\n"
"100644 pqr678... 0 esphome/components/xiaomi_cgpr1/binary_sensor.py\n"
)
@pytest.fixture
def mock_cache_file(tmp_path: Path) -> Path:
"""Fixture for a temporary cache file path."""
return tmp_path / "components_graph.json"
@pytest.fixture(autouse=True)
def clear_cache_key_cache() -> None:
"""Clear the components graph cache key cache before each test."""
helpers.get_components_graph_cache_key.cache_clear()
@pytest.fixture
def mock_subprocess_run() -> Generator[Mock, None, None]:
"""Fixture to mock subprocess.run for git commands."""
with patch("subprocess.run") as mock_run:
yield mock_run
def test_cache_key_generation(mock_git_output: str, mock_subprocess_run: Mock) -> None:
"""Test that cache key is generated based on git file hashes."""
mock_result = Mock()
mock_result.stdout = mock_git_output
mock_subprocess_run.return_value = mock_result
key = helpers.get_components_graph_cache_key()
# Should be a 64-character hex string (SHA256)
assert len(key) == 64
assert all(c in "0123456789abcdef" for c in key)
def test_cache_key_consistent_for_same_files(
mock_git_output: str, mock_subprocess_run: Mock
) -> None:
"""Test that same git output produces same cache key."""
mock_result = Mock()
mock_result.stdout = mock_git_output
mock_subprocess_run.return_value = mock_result
key1 = helpers.get_components_graph_cache_key()
key2 = helpers.get_components_graph_cache_key()
assert key1 == key2
def test_cache_key_different_for_changed_files(mock_subprocess_run: Mock) -> None:
"""Test that different git output produces different cache key.
This test demonstrates that changes to any .py file (not just __init__.py)
will invalidate the cache, which is important because AUTO_LOAD can be
defined in sensor.py, binary_sensor.py, etc.
"""
mock_result1 = Mock()
mock_result1.stdout = (
"100644 abc123... 0 esphome/components/xiaomi_lywsd03mmc/sensor.py\n"
)
mock_result2 = Mock()
# Same file, different hash - simulates a change to AUTO_LOAD
mock_result2.stdout = (
"100644 xyz789... 0 esphome/components/xiaomi_lywsd03mmc/sensor.py\n"
)
mock_subprocess_run.return_value = mock_result1
key1 = helpers.get_components_graph_cache_key()
helpers.get_components_graph_cache_key.cache_clear()
mock_subprocess_run.return_value = mock_result2
key2 = helpers.get_components_graph_cache_key()
assert key1 != key2
def test_cache_key_uses_git_ls_files(
mock_git_output: str, mock_subprocess_run: Mock
) -> None:
"""Test that git ls-files command is called correctly."""
mock_result = Mock()
mock_result.stdout = mock_git_output
mock_subprocess_run.return_value = mock_result
helpers.get_components_graph_cache_key()
# Verify git ls-files was called with correct arguments
mock_subprocess_run.assert_called_once()
call_args = mock_subprocess_run.call_args
assert call_args[0][0] == [
"git",
"ls-files",
"-s",
"esphome/components/**/*.py",
]
assert call_args[1]["capture_output"] is True
assert call_args[1]["text"] is True
assert call_args[1]["check"] is True
assert call_args[1]["close_fds"] is False
def test_cache_hit_returns_cached_graph(
tmp_path: Path, mock_git_output: str, mock_subprocess_run: Mock
) -> None:
"""Test that cache hit returns cached data without rebuilding."""
mock_graph = {"wifi": ["network"], "api": ["socket"]}
cache_key = "a" * 64
cache_data = {
"_version": helpers.COMPONENTS_GRAPH_CACHE_VERSION,
"_cache_key": cache_key,
"graph": mock_graph,
}
# Write cache file
cache_file = tmp_path / "components_graph.json"
cache_file.write_text(json.dumps(cache_data))
mock_result = Mock()
mock_result.stdout = mock_git_output
mock_subprocess_run.return_value = mock_result
with (
patch("helpers.get_components_graph_cache_key", return_value=cache_key),
patch("helpers.temp_folder", str(tmp_path)),
):
result = helpers.create_components_graph()
assert result == mock_graph
def test_cache_miss_no_cache_file(
tmp_path: Path, mock_git_output: str, mock_subprocess_run: Mock
) -> None:
"""Test that cache miss rebuilds graph when no cache file exists."""
mock_result = Mock()
mock_result.stdout = mock_git_output
mock_subprocess_run.return_value = mock_result
# Create minimal components directory structure
components_dir = tmp_path / "esphome" / "components"
components_dir.mkdir(parents=True)
with (
patch("helpers.root_path", str(tmp_path)),
patch("helpers.temp_folder", str(tmp_path / ".temp")),
patch("helpers.get_components_graph_cache_key", return_value="test_key"),
):
result = helpers.create_components_graph()
# Should return empty graph for empty components directory
assert result == {}
def test_cache_miss_version_mismatch(
tmp_path: Path, mock_git_output: str, mock_subprocess_run: Mock
) -> None:
"""Test that cache miss rebuilds graph when version doesn't match."""
cache_data = {
"_version": 999, # Wrong version
"_cache_key": "test_key",
"graph": {"old": ["data"]},
}
cache_file = tmp_path / ".temp" / "components_graph.json"
cache_file.parent.mkdir(parents=True)
cache_file.write_text(json.dumps(cache_data))
mock_result = Mock()
mock_result.stdout = mock_git_output
mock_subprocess_run.return_value = mock_result
# Create minimal components directory structure
components_dir = tmp_path / "esphome" / "components"
components_dir.mkdir(parents=True)
with (
patch("helpers.root_path", str(tmp_path)),
patch("helpers.temp_folder", str(tmp_path / ".temp")),
patch("helpers.get_components_graph_cache_key", return_value="test_key"),
):
result = helpers.create_components_graph()
# Should rebuild and return empty graph, not use cached data
assert result == {}
def test_cache_miss_key_mismatch(
tmp_path: Path, mock_git_output: str, mock_subprocess_run: Mock
) -> None:
"""Test that cache miss rebuilds graph when cache key doesn't match."""
cache_data = {
"_version": helpers.COMPONENTS_GRAPH_CACHE_VERSION,
"_cache_key": "old_key",
"graph": {"old": ["data"]},
}
cache_file = tmp_path / ".temp" / "components_graph.json"
cache_file.parent.mkdir(parents=True)
cache_file.write_text(json.dumps(cache_data))
mock_result = Mock()
mock_result.stdout = mock_git_output
mock_subprocess_run.return_value = mock_result
# Create minimal components directory structure
components_dir = tmp_path / "esphome" / "components"
components_dir.mkdir(parents=True)
with (
patch("helpers.root_path", str(tmp_path)),
patch("helpers.temp_folder", str(tmp_path / ".temp")),
patch("helpers.get_components_graph_cache_key", return_value="new_key"),
):
result = helpers.create_components_graph()
# Should rebuild and return empty graph, not use cached data with old key
assert result == {}
def test_cache_miss_corrupted_json(
tmp_path: Path, mock_git_output: str, mock_subprocess_run: Mock
) -> None:
"""Test that cache miss rebuilds graph when cache file has invalid JSON."""
cache_file = tmp_path / ".temp" / "components_graph.json"
cache_file.parent.mkdir(parents=True)
cache_file.write_text("{invalid json")
mock_result = Mock()
mock_result.stdout = mock_git_output
mock_subprocess_run.return_value = mock_result
# Create minimal components directory structure
components_dir = tmp_path / "esphome" / "components"
components_dir.mkdir(parents=True)
with (
patch("helpers.root_path", str(tmp_path)),
patch("helpers.temp_folder", str(tmp_path / ".temp")),
patch("helpers.get_components_graph_cache_key", return_value="test_key"),
):
result = helpers.create_components_graph()
# Should handle corruption gracefully and rebuild
assert result == {}