From 060bb4159f6c638d3ce03a1e3169607a751ec1d1 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 3 Nov 2025 22:38:57 -0600 Subject: [PATCH] [ci] Cache component dependency graph for up to 3.4x faster determine-jobs (#11648) --- .github/workflows/ci.yml | 11 ++ script/helpers.py | 71 +++++++- tests/script/test_determine_jobs.py | 2 + tests/script/test_helpers.py | 260 ++++++++++++++++++++++++++++ 4 files changed, 341 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1756d5b765..16837b3186 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -192,6 +192,11 @@ jobs: with: python-version: ${{ env.DEFAULT_PYTHON }} cache-key: ${{ needs.common.outputs.cache-key }} + - name: Restore components graph cache + uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + path: .temp/components_graph.json + key: components-graph-${{ hashFiles('esphome/components/**/*.py') }} - name: Determine which tests to run id: determine env: @@ -216,6 +221,12 @@ jobs: echo "cpp-unit-tests-run-all=$(echo "$output" | jq -r '.cpp_unit_tests_run_all')" >> $GITHUB_OUTPUT echo "cpp-unit-tests-components=$(echo "$output" | jq -c '.cpp_unit_tests_components')" >> $GITHUB_OUTPUT echo "component-test-batches=$(echo "$output" | jq -c '.component_test_batches')" >> $GITHUB_OUTPUT + - name: Save components graph cache + if: github.ref == 'refs/heads/dev' + uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + path: .temp/components_graph.json + key: components-graph-${{ hashFiles('esphome/components/**/*.py') }} integration-tests: name: Run integration tests diff --git a/script/helpers.py b/script/helpers.py index 33f95d6f8a..5b2fe6cd06 100644 --- a/script/helpers.py +++ b/script/helpers.py @@ -2,6 +2,7 @@ from __future__ import annotations from collections.abc import Callable from functools import cache +import hashlib import json import os import os.path @@ -52,6 +53,10 @@ BASE_BUS_COMPONENTS = { "remote_receiver", } +# Cache version for components graph +# Increment this when the cache format or graph building logic changes +COMPONENTS_GRAPH_CACHE_VERSION = 1 + def parse_list_components_output(output: str) -> list[str]: """Parse the output from list-components.py script. @@ -756,20 +761,71 @@ def resolve_auto_load( return auto_load() +@cache +def get_components_graph_cache_key() -> str: + """Generate cache key based on all component Python file hashes. + + Uses git ls-files with sha1 hashes to generate a stable cache key that works + across different machines and CI runs. This is faster and more reliable than + reading file contents or using modification times. + + Returns: + SHA256 hex string uniquely identifying the current component state + """ + + # Use git ls-files -s to get sha1 hashes of all component Python files + # Format: + # This is fast and works consistently across CI and local dev + # We hash all .py files because AUTO_LOAD, DEPENDENCIES, etc. can be defined + # in any Python file, not just __init__.py + cmd = ["git", "ls-files", "-s", "esphome/components/**/*.py"] + result = subprocess.run( + cmd, capture_output=True, text=True, check=True, cwd=root_path, close_fds=False + ) + + # Hash the git output (includes file paths and their sha1 hashes) + # This changes only when component Python files actually change + hasher = hashlib.sha256() + hasher.update(result.stdout.encode()) + + return hasher.hexdigest() + + def create_components_graph() -> dict[str, list[str]]: - """Create a graph of component dependencies. + """Create a graph of component dependencies (cached). + + This function is expensive (5-6 seconds) because it imports all ESPHome components + to extract their DEPENDENCIES and AUTO_LOAD metadata. The result is cached based + on component file modification times, so unchanged components don't trigger a rebuild. Returns: Dictionary mapping parent components to their children (dependencies) """ - from pathlib import Path + # Check cache first - use fixed filename since GitHub Actions cache doesn't support wildcards + cache_file = Path(temp_folder) / "components_graph.json" + + if cache_file.exists(): + try: + cached_data = json.loads(cache_file.read_text()) + except (OSError, json.JSONDecodeError): + # Cache file corrupted or unreadable, rebuild + pass + else: + # Verify cache version matches + if cached_data.get("_version") == COMPONENTS_GRAPH_CACHE_VERSION: + # Verify cache is for current component state + cache_key = get_components_graph_cache_key() + if cached_data.get("_cache_key") == cache_key: + return cached_data.get("graph", {}) + # Cache key mismatch - stale cache, rebuild + # Cache version mismatch - incompatible format, rebuild from esphome import const from esphome.core import CORE from esphome.loader import ComponentManifest, get_component, get_platform # The root directory of the repo - root = Path(__file__).parent.parent + root = Path(root_path) components_dir = root / ESPHOME_COMPONENTS_PATH # Fake some directory so that get_component works CORE.config_path = root @@ -846,6 +902,15 @@ def create_components_graph() -> dict[str, list[str]]: # restore config CORE.data[KEY_CORE] = TARGET_CONFIGURATIONS[0] + # Save to cache with version and cache key for validation + cache_data = { + "_version": COMPONENTS_GRAPH_CACHE_VERSION, + "_cache_key": get_components_graph_cache_key(), + "graph": components_graph, + } + cache_file.parent.mkdir(exist_ok=True) + cache_file.write_text(json.dumps(cache_data)) + return components_graph diff --git a/tests/script/test_determine_jobs.py b/tests/script/test_determine_jobs.py index a33eca5b19..e084e2e398 100644 --- a/tests/script/test_determine_jobs.py +++ b/tests/script/test_determine_jobs.py @@ -543,6 +543,7 @@ def test_main_filters_components_without_tests( with ( patch.object(determine_jobs, "root_path", str(tmp_path)), patch.object(helpers, "root_path", str(tmp_path)), + patch.object(helpers, "create_components_graph", return_value={}), patch("sys.argv", ["determine-jobs.py"]), patch.object( determine_jobs, @@ -640,6 +641,7 @@ def test_main_detects_components_with_variant_tests( with ( patch.object(determine_jobs, "root_path", str(tmp_path)), patch.object(helpers, "root_path", str(tmp_path)), + patch.object(helpers, "create_components_graph", return_value={}), patch("sys.argv", ["determine-jobs.py"]), patch.object( determine_jobs, diff --git a/tests/script/test_helpers.py b/tests/script/test_helpers.py index 5eb55c0722..1bfffef51c 100644 --- a/tests/script/test_helpers.py +++ b/tests/script/test_helpers.py @@ -1,5 +1,6 @@ """Unit tests for script/helpers.py module.""" +from collections.abc import Generator import json import os from pathlib import Path @@ -1106,3 +1107,262 @@ def test_get_component_from_path( """Test extraction of component names from file paths.""" result = helpers.get_component_from_path(file_path) assert result == expected_component + + +# Components graph cache tests + + +@pytest.fixture +def mock_git_output() -> str: + """Fixture for mock git ls-files output with realistic component files. + + Includes examples of AUTO_LOAD in sensor.py and binary_sensor.py files, + which is why we need to hash all .py files, not just __init__.py. + """ + return ( + "100644 abc123... 0 esphome/components/wifi/__init__.py\n" + "100644 def456... 0 esphome/components/api/__init__.py\n" + "100644 ghi789... 0 esphome/components/xiaomi_lywsd03mmc/__init__.py\n" + "100644 jkl012... 0 esphome/components/xiaomi_lywsd03mmc/sensor.py\n" + "100644 mno345... 0 esphome/components/xiaomi_cgpr1/__init__.py\n" + "100644 pqr678... 0 esphome/components/xiaomi_cgpr1/binary_sensor.py\n" + ) + + +@pytest.fixture +def mock_cache_file(tmp_path: Path) -> Path: + """Fixture for a temporary cache file path.""" + return tmp_path / "components_graph.json" + + +@pytest.fixture(autouse=True) +def clear_cache_key_cache() -> None: + """Clear the components graph cache key cache before each test.""" + helpers.get_components_graph_cache_key.cache_clear() + + +@pytest.fixture +def mock_subprocess_run() -> Generator[Mock, None, None]: + """Fixture to mock subprocess.run for git commands.""" + with patch("subprocess.run") as mock_run: + yield mock_run + + +def test_cache_key_generation(mock_git_output: str, mock_subprocess_run: Mock) -> None: + """Test that cache key is generated based on git file hashes.""" + mock_result = Mock() + mock_result.stdout = mock_git_output + mock_subprocess_run.return_value = mock_result + + key = helpers.get_components_graph_cache_key() + + # Should be a 64-character hex string (SHA256) + assert len(key) == 64 + assert all(c in "0123456789abcdef" for c in key) + + +def test_cache_key_consistent_for_same_files( + mock_git_output: str, mock_subprocess_run: Mock +) -> None: + """Test that same git output produces same cache key.""" + mock_result = Mock() + mock_result.stdout = mock_git_output + mock_subprocess_run.return_value = mock_result + + key1 = helpers.get_components_graph_cache_key() + key2 = helpers.get_components_graph_cache_key() + + assert key1 == key2 + + +def test_cache_key_different_for_changed_files(mock_subprocess_run: Mock) -> None: + """Test that different git output produces different cache key. + + This test demonstrates that changes to any .py file (not just __init__.py) + will invalidate the cache, which is important because AUTO_LOAD can be + defined in sensor.py, binary_sensor.py, etc. + """ + mock_result1 = Mock() + mock_result1.stdout = ( + "100644 abc123... 0 esphome/components/xiaomi_lywsd03mmc/sensor.py\n" + ) + + mock_result2 = Mock() + # Same file, different hash - simulates a change to AUTO_LOAD + mock_result2.stdout = ( + "100644 xyz789... 0 esphome/components/xiaomi_lywsd03mmc/sensor.py\n" + ) + + mock_subprocess_run.return_value = mock_result1 + key1 = helpers.get_components_graph_cache_key() + + helpers.get_components_graph_cache_key.cache_clear() + mock_subprocess_run.return_value = mock_result2 + key2 = helpers.get_components_graph_cache_key() + + assert key1 != key2 + + +def test_cache_key_uses_git_ls_files( + mock_git_output: str, mock_subprocess_run: Mock +) -> None: + """Test that git ls-files command is called correctly.""" + mock_result = Mock() + mock_result.stdout = mock_git_output + mock_subprocess_run.return_value = mock_result + + helpers.get_components_graph_cache_key() + + # Verify git ls-files was called with correct arguments + mock_subprocess_run.assert_called_once() + call_args = mock_subprocess_run.call_args + assert call_args[0][0] == [ + "git", + "ls-files", + "-s", + "esphome/components/**/*.py", + ] + assert call_args[1]["capture_output"] is True + assert call_args[1]["text"] is True + assert call_args[1]["check"] is True + assert call_args[1]["close_fds"] is False + + +def test_cache_hit_returns_cached_graph( + tmp_path: Path, mock_git_output: str, mock_subprocess_run: Mock +) -> None: + """Test that cache hit returns cached data without rebuilding.""" + mock_graph = {"wifi": ["network"], "api": ["socket"]} + cache_key = "a" * 64 + cache_data = { + "_version": helpers.COMPONENTS_GRAPH_CACHE_VERSION, + "_cache_key": cache_key, + "graph": mock_graph, + } + + # Write cache file + cache_file = tmp_path / "components_graph.json" + cache_file.write_text(json.dumps(cache_data)) + + mock_result = Mock() + mock_result.stdout = mock_git_output + mock_subprocess_run.return_value = mock_result + + with ( + patch("helpers.get_components_graph_cache_key", return_value=cache_key), + patch("helpers.temp_folder", str(tmp_path)), + ): + result = helpers.create_components_graph() + assert result == mock_graph + + +def test_cache_miss_no_cache_file( + tmp_path: Path, mock_git_output: str, mock_subprocess_run: Mock +) -> None: + """Test that cache miss rebuilds graph when no cache file exists.""" + mock_result = Mock() + mock_result.stdout = mock_git_output + mock_subprocess_run.return_value = mock_result + + # Create minimal components directory structure + components_dir = tmp_path / "esphome" / "components" + components_dir.mkdir(parents=True) + + with ( + patch("helpers.root_path", str(tmp_path)), + patch("helpers.temp_folder", str(tmp_path / ".temp")), + patch("helpers.get_components_graph_cache_key", return_value="test_key"), + ): + result = helpers.create_components_graph() + # Should return empty graph for empty components directory + assert result == {} + + +def test_cache_miss_version_mismatch( + tmp_path: Path, mock_git_output: str, mock_subprocess_run: Mock +) -> None: + """Test that cache miss rebuilds graph when version doesn't match.""" + cache_data = { + "_version": 999, # Wrong version + "_cache_key": "test_key", + "graph": {"old": ["data"]}, + } + + cache_file = tmp_path / ".temp" / "components_graph.json" + cache_file.parent.mkdir(parents=True) + cache_file.write_text(json.dumps(cache_data)) + + mock_result = Mock() + mock_result.stdout = mock_git_output + mock_subprocess_run.return_value = mock_result + + # Create minimal components directory structure + components_dir = tmp_path / "esphome" / "components" + components_dir.mkdir(parents=True) + + with ( + patch("helpers.root_path", str(tmp_path)), + patch("helpers.temp_folder", str(tmp_path / ".temp")), + patch("helpers.get_components_graph_cache_key", return_value="test_key"), + ): + result = helpers.create_components_graph() + # Should rebuild and return empty graph, not use cached data + assert result == {} + + +def test_cache_miss_key_mismatch( + tmp_path: Path, mock_git_output: str, mock_subprocess_run: Mock +) -> None: + """Test that cache miss rebuilds graph when cache key doesn't match.""" + cache_data = { + "_version": helpers.COMPONENTS_GRAPH_CACHE_VERSION, + "_cache_key": "old_key", + "graph": {"old": ["data"]}, + } + + cache_file = tmp_path / ".temp" / "components_graph.json" + cache_file.parent.mkdir(parents=True) + cache_file.write_text(json.dumps(cache_data)) + + mock_result = Mock() + mock_result.stdout = mock_git_output + mock_subprocess_run.return_value = mock_result + + # Create minimal components directory structure + components_dir = tmp_path / "esphome" / "components" + components_dir.mkdir(parents=True) + + with ( + patch("helpers.root_path", str(tmp_path)), + patch("helpers.temp_folder", str(tmp_path / ".temp")), + patch("helpers.get_components_graph_cache_key", return_value="new_key"), + ): + result = helpers.create_components_graph() + # Should rebuild and return empty graph, not use cached data with old key + assert result == {} + + +def test_cache_miss_corrupted_json( + tmp_path: Path, mock_git_output: str, mock_subprocess_run: Mock +) -> None: + """Test that cache miss rebuilds graph when cache file has invalid JSON.""" + cache_file = tmp_path / ".temp" / "components_graph.json" + cache_file.parent.mkdir(parents=True) + cache_file.write_text("{invalid json") + + mock_result = Mock() + mock_result.stdout = mock_git_output + mock_subprocess_run.return_value = mock_result + + # Create minimal components directory structure + components_dir = tmp_path / "esphome" / "components" + components_dir.mkdir(parents=True) + + with ( + patch("helpers.root_path", str(tmp_path)), + patch("helpers.temp_folder", str(tmp_path / ".temp")), + patch("helpers.get_components_graph_cache_key", return_value="test_key"), + ): + result = helpers.create_components_graph() + # Should handle corruption gracefully and rebuild + assert result == {}