1
0
mirror of https://github.com/esphome/esphome.git synced 2025-09-22 05:02:23 +01:00

CI: Centralize test determination logic to reduce unnecessary job runners (#9432)

This commit is contained in:
J. Nick Koston
2025-07-10 23:54:57 -10:00
committed by GitHub
parent 143702beef
commit 8953e53a04
6 changed files with 963 additions and 57 deletions

245
script/determine-jobs.py Executable file
View File

@@ -0,0 +1,245 @@
#!/usr/bin/env python3
"""Determine which CI jobs should run based on changed files.
This script is a centralized way to determine which CI jobs need to run based on
what files have changed. It outputs JSON with the following structure:
{
"integration_tests": true/false,
"clang_tidy": true/false,
"clang_format": true/false,
"python_linters": true/false,
"changed_components": ["component1", "component2", ...],
"component_test_count": 5
}
The CI workflow uses this information to:
- Skip or run integration tests
- Skip or run clang-tidy (and whether to do a full scan)
- Skip or run clang-format
- Skip or run Python linters (ruff, flake8, pylint, pyupgrade)
- Determine which components to test individually
- Decide how to split component tests (if there are many)
Usage:
python script/determine-jobs.py [-b BRANCH]
Options:
-b, --branch BRANCH Branch to compare against (default: dev)
"""
from __future__ import annotations
import argparse
import json
import os
from pathlib import Path
import subprocess
import sys
from typing import Any
from helpers import (
CPP_FILE_EXTENSIONS,
ESPHOME_COMPONENTS_PATH,
PYTHON_FILE_EXTENSIONS,
changed_files,
get_all_dependencies,
get_components_from_integration_fixtures,
parse_list_components_output,
root_path,
)
def should_run_integration_tests(branch: str | None = None) -> bool:
"""Determine if integration tests should run based on changed files.
This function is used by the CI workflow to intelligently skip integration tests when they're
not needed, saving significant CI time and resources.
Integration tests will run when ANY of the following conditions are met:
1. Core C++ files changed (esphome/core/*)
- Any .cpp, .h, .tcc files in the core directory
- These files contain fundamental functionality used throughout ESPHome
- Examples: esphome/core/component.cpp, esphome/core/application.h
2. Core Python files changed (esphome/core/*.py)
- Only .py files in the esphome/core/ directory
- These are core Python files that affect the entire system
- Examples: esphome/core/config.py, esphome/core/__init__.py
- NOT included: esphome/*.py, esphome/dashboard/*.py, esphome/components/*/*.py
3. Integration test files changed
- Any file in tests/integration/ directory
- This includes test files themselves and fixture YAML files
- Examples: tests/integration/test_api.py, tests/integration/fixtures/api.yaml
4. Components used by integration tests (or their dependencies) changed
- The function parses all YAML files in tests/integration/fixtures/
- Extracts which components are used in integration tests
- Recursively finds all dependencies of those components
- If any of these components have changes, tests must run
- Example: If api.yaml uses 'sensor' and 'api' components, and 'api' depends on 'socket',
then changes to sensor/, api/, or socket/ components trigger tests
Args:
branch: Branch to compare against. If None, uses default.
Returns:
True if integration tests should run, False otherwise.
"""
files = changed_files(branch)
# Check if any core files changed (esphome/core/*)
for file in files:
if file.startswith("esphome/core/"):
return True
# Check if any integration test files changed
if any("tests/integration" in file for file in files):
return True
# Get all components used in integration tests and their dependencies
fixture_components = get_components_from_integration_fixtures()
all_required_components = get_all_dependencies(fixture_components)
# Check if any required components changed
for file in files:
if file.startswith(ESPHOME_COMPONENTS_PATH):
parts = file.split("/")
if len(parts) >= 3:
component = parts[2]
if component in all_required_components:
return True
return False
def should_run_clang_tidy(branch: str | None = None) -> bool:
"""Determine if clang-tidy should run based on changed files.
This function is used by the CI workflow to intelligently skip clang-tidy checks when they're
not needed, saving significant CI time and resources.
Clang-tidy will run when ANY of the following conditions are met:
1. Clang-tidy configuration changed
- The hash of .clang-tidy configuration file has changed
- The hash includes the .clang-tidy file, clang-tidy version from requirements_dev.txt,
and relevant platformio.ini sections
- When configuration changes, a full scan is needed to ensure all code complies
with the new rules
- Detected by script/clang_tidy_hash.py --check returning exit code 0
2. Any C++ source files changed
- Any file with C++ extensions: .cpp, .h, .hpp, .cc, .cxx, .c, .tcc
- Includes files anywhere in the repository, not just in esphome/
- This ensures all C++ code is checked, including tests, examples, etc.
- Examples: esphome/core/component.cpp, tests/custom/my_component.h
If the hash check fails for any reason, clang-tidy runs as a safety measure to ensure
code quality is maintained.
Args:
branch: Branch to compare against. If None, uses default.
Returns:
True if clang-tidy should run, False otherwise.
"""
# First check if clang-tidy configuration changed (full scan needed)
try:
result = subprocess.run(
[os.path.join(root_path, "script", "clang_tidy_hash.py"), "--check"],
capture_output=True,
check=False,
)
# Exit 0 means hash changed (full scan needed)
if result.returncode == 0:
return True
except Exception:
# If hash check fails, run clang-tidy to be safe
return True
return _any_changed_file_endswith(branch, CPP_FILE_EXTENSIONS)
def should_run_clang_format(branch: str | None = None) -> bool:
"""Determine if clang-format should run based on changed files.
This function is used by the CI workflow to skip clang-format checks when no C++ files
have changed, saving CI time and resources.
Clang-format will run when any C++ source files have changed.
Args:
branch: Branch to compare against. If None, uses default.
Returns:
True if clang-format should run, False otherwise.
"""
return _any_changed_file_endswith(branch, CPP_FILE_EXTENSIONS)
def should_run_python_linters(branch: str | None = None) -> bool:
"""Determine if Python linters (ruff, flake8, pylint, pyupgrade) should run based on changed files.
This function is used by the CI workflow to skip Python linting checks when no Python files
have changed, saving CI time and resources.
Python linters will run when any Python source files have changed.
Args:
branch: Branch to compare against. If None, uses default.
Returns:
True if Python linters should run, False otherwise.
"""
return _any_changed_file_endswith(branch, PYTHON_FILE_EXTENSIONS)
def _any_changed_file_endswith(branch: str | None, extensions: tuple[str, ...]) -> bool:
"""Check if a changed file ends with any of the specified extensions."""
return any(file.endswith(extensions) for file in changed_files(branch))
def main() -> None:
"""Main function that determines which CI jobs to run."""
parser = argparse.ArgumentParser(
description="Determine which CI jobs should run based on changed files"
)
parser.add_argument(
"-b", "--branch", help="Branch to compare changed files against"
)
args = parser.parse_args()
# Determine what should run
run_integration = should_run_integration_tests(args.branch)
run_clang_tidy = should_run_clang_tidy(args.branch)
run_clang_format = should_run_clang_format(args.branch)
run_python_linters = should_run_python_linters(args.branch)
# Get changed components using list-components.py for exact compatibility
script_path = Path(__file__).parent / "list-components.py"
cmd = [sys.executable, str(script_path), "--changed"]
if args.branch:
cmd.extend(["-b", args.branch])
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
changed_components = parse_list_components_output(result.stdout)
# Build output
output: dict[str, Any] = {
"integration_tests": run_integration,
"clang_tidy": run_clang_tidy,
"clang_format": run_clang_format,
"python_linters": run_python_linters,
"changed_components": changed_components,
"component_test_count": len(changed_components),
}
# Output as JSON
print(json.dumps(output))
if __name__ == "__main__":
main()

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
from functools import cache
import json
import os
import os.path
@@ -7,6 +8,7 @@ from pathlib import Path
import re
import subprocess
import time
from typing import Any
import colorama
@@ -15,6 +17,34 @@ basepath = os.path.join(root_path, "esphome")
temp_folder = os.path.join(root_path, ".temp")
temp_header_file = os.path.join(temp_folder, "all-include.cpp")
# C++ file extensions used for clang-tidy and clang-format checks
CPP_FILE_EXTENSIONS = (".cpp", ".h", ".hpp", ".cc", ".cxx", ".c", ".tcc")
# Python file extensions
PYTHON_FILE_EXTENSIONS = (".py", ".pyi")
# YAML file extensions
YAML_FILE_EXTENSIONS = (".yaml", ".yml")
# Component path prefix
ESPHOME_COMPONENTS_PATH = "esphome/components/"
def parse_list_components_output(output: str) -> list[str]:
"""Parse the output from list-components.py script.
The script outputs one component name per line.
Args:
output: The stdout from list-components.py
Returns:
List of component names, or empty list if no output
"""
if not output or not output.strip():
return []
return [c.strip() for c in output.strip().split("\n") if c.strip()]
def styled(color: str | tuple[str, ...], msg: str, reset: bool = True) -> str:
prefix = "".join(color) if isinstance(color, tuple) else color
@@ -96,6 +126,7 @@ def _get_pr_number_from_github_env() -> str | None:
return None
@cache
def _get_changed_files_github_actions() -> list[str] | None:
"""Get changed files in GitHub Actions environment.
@@ -135,7 +166,7 @@ def changed_files(branch: str | None = None) -> list[str]:
return github_files
# Original implementation for local development
if branch is None:
if not branch: # Treat None and empty string the same
branch = "dev"
check_remotes = ["upstream", "origin"]
check_remotes.extend(splitlines_no_ends(get_output("git", "remote")))
@@ -183,7 +214,7 @@ def get_changed_components() -> list[str] | None:
changed = changed_files()
core_cpp_changed = any(
f.startswith("esphome/core/")
and f.endswith((".cpp", ".h", ".hpp", ".cc", ".cxx", ".c"))
and f.endswith(CPP_FILE_EXTENSIONS[:-1]) # Exclude .tcc for core files
for f in changed
)
if core_cpp_changed:
@@ -198,8 +229,7 @@ def get_changed_components() -> list[str] | None:
result = subprocess.run(
cmd, capture_output=True, text=True, check=True, close_fds=False
)
components = [c.strip() for c in result.stdout.strip().split("\n") if c.strip()]
return components
return parse_list_components_output(result.stdout)
except subprocess.CalledProcessError:
# If the script fails, fall back to full scan
print("Could not determine changed components - will run full clang-tidy scan")
@@ -249,7 +279,9 @@ def _filter_changed_ci(files: list[str]) -> list[str]:
# Action: Check only the specific non-component files that changed
changed = changed_files()
files = [
f for f in files if f in changed and not f.startswith("esphome/components/")
f
for f in files
if f in changed and not f.startswith(ESPHOME_COMPONENTS_PATH)
]
if not files:
print("No files changed")
@@ -267,7 +299,7 @@ def _filter_changed_ci(files: list[str]) -> list[str]:
# because changes in one file can affect other files in the same component.
filtered_files = []
for f in files:
if f.startswith("esphome/components/"):
if f.startswith(ESPHOME_COMPONENTS_PATH):
# Check if file belongs to any of the changed components
parts = f.split("/")
if len(parts) >= 3 and parts[2] in component_set:
@@ -326,7 +358,7 @@ def git_ls_files(patterns: list[str] | None = None) -> dict[str, int]:
return {s[3].strip(): int(s[0]) for s in lines}
def load_idedata(environment):
def load_idedata(environment: str) -> dict[str, Any]:
start_time = time.time()
print(f"Loading IDE data for environment '{environment}'...")
@@ -442,3 +474,83 @@ def get_usable_cpu_count() -> int:
return (
os.process_cpu_count() if hasattr(os, "process_cpu_count") else os.cpu_count()
)
def get_all_dependencies(component_names: set[str]) -> set[str]:
"""Get all dependencies for a set of components.
Args:
component_names: Set of component names to get dependencies for
Returns:
Set of all components including dependencies and auto-loaded components
"""
from esphome.const import KEY_CORE
from esphome.core import CORE
from esphome.loader import get_component
all_components: set[str] = set(component_names)
# Reset CORE to ensure clean state
CORE.reset()
# Set up fake config path for component loading
root = Path(__file__).parent.parent
CORE.config_path = str(root)
CORE.data[KEY_CORE] = {}
# Keep finding dependencies until no new ones are found
while True:
new_components: set[str] = set()
for comp_name in all_components:
comp = get_component(comp_name)
if not comp:
continue
# Add dependencies (extract component name before '.')
new_components.update(dep.split(".")[0] for dep in comp.dependencies)
# Add auto_load components
new_components.update(comp.auto_load)
# Check if we found any new components
new_components -= all_components
if not new_components:
break
all_components.update(new_components)
return all_components
def get_components_from_integration_fixtures() -> set[str]:
"""Extract all components used in integration test fixtures.
Returns:
Set of component names used in integration test fixtures
"""
import yaml
components: set[str] = set()
fixtures_dir = Path(__file__).parent.parent / "tests" / "integration" / "fixtures"
for yaml_file in fixtures_dir.glob("*.yaml"):
with open(yaml_file) as f:
config: dict[str, any] | None = yaml.safe_load(f)
if not config:
continue
# Add all top-level component keys
components.update(config.keys())
# Add platform components (e.g., output.template)
for value in config.values():
if not isinstance(value, list):
continue
for item in value:
if isinstance(item, dict) and "platform" in item:
components.add(item["platform"])
return components

View File

@@ -20,6 +20,12 @@ def filter_component_files(str):
return str.startswith("esphome/components/") | str.startswith("tests/components/")
def get_all_component_files() -> list[str]:
"""Get all component files from git."""
files = git_ls_files()
return list(filter(filter_component_files, files))
def extract_component_names_array_from_files_array(files):
components = []
for file in files:
@@ -165,17 +171,20 @@ def main():
if args.branch and not args.changed:
parser.error("--branch requires --changed")
files = git_ls_files()
files = filter(filter_component_files, files)
if args.changed:
if args.branch:
changed = changed_files(args.branch)
else:
changed = changed_files()
# When --changed is passed, only get the changed files
changed = changed_files(args.branch)
# If any base test file(s) changed, there's no need to filter out components
if not any("tests/test_build_components" in file for file in changed):
files = [f for f in files if f in changed]
if any("tests/test_build_components" in file for file in changed):
# Need to get all component files
files = get_all_component_files()
else:
# Only look at changed component files
files = [f for f in changed if filter_component_files(f)]
else:
# Get all component files
files = get_all_component_files()
for c in get_components(files, args.changed):
print(c)