CI: Centralize test determination logic to reduce unnecessary job runners (#9432)

2025-09-22 05:02:23 +01:00 · 2025-07-10 23:54:57 -10:00
parent 143702beef
commit 8953e53a04
6 changed files with 963 additions and 57 deletions
--- a/script/determine-jobs.py
+++ b/script/determine-jobs.py
@@ -0,0 +1,245 @@
+#!/usr/bin/env python3
+"""Determine which CI jobs should run based on changed files.
+
+This script is a centralized way to determine which CI jobs need to run based on
+what files have changed. It outputs JSON with the following structure:
+
+{
+  "integration_tests": true/false,
+  "clang_tidy": true/false,
+  "clang_format": true/false,
+  "python_linters": true/false,
+  "changed_components": ["component1", "component2", ...],
+  "component_test_count": 5
+}
+
+The CI workflow uses this information to:
+- Skip or run integration tests
+- Skip or run clang-tidy (and whether to do a full scan)
+- Skip or run clang-format
+- Skip or run Python linters (ruff, flake8, pylint, pyupgrade)
+- Determine which components to test individually
+- Decide how to split component tests (if there are many)
+
+Usage:
+  python script/determine-jobs.py [-b BRANCH]
+
+Options:
+  -b, --branch BRANCH  Branch to compare against (default: dev)
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+from pathlib import Path
+import subprocess
+import sys
+from typing import Any
+
+from helpers import (
+    CPP_FILE_EXTENSIONS,
+    ESPHOME_COMPONENTS_PATH,
+    PYTHON_FILE_EXTENSIONS,
+    changed_files,
+    get_all_dependencies,
+    get_components_from_integration_fixtures,
+    parse_list_components_output,
+    root_path,
+)
+
+
+def should_run_integration_tests(branch: str | None = None) -> bool:
+    """Determine if integration tests should run based on changed files.
+
+    This function is used by the CI workflow to intelligently skip integration tests when they're
+    not needed, saving significant CI time and resources.
+
+    Integration tests will run when ANY of the following conditions are met:
+
+    1. Core C++ files changed (esphome/core/*)
+       - Any .cpp, .h, .tcc files in the core directory
+       - These files contain fundamental functionality used throughout ESPHome
+       - Examples: esphome/core/component.cpp, esphome/core/application.h
+
+    2. Core Python files changed (esphome/core/*.py)
+       - Only .py files in the esphome/core/ directory
+       - These are core Python files that affect the entire system
+       - Examples: esphome/core/config.py, esphome/core/__init__.py
+       - NOT included: esphome/*.py, esphome/dashboard/*.py, esphome/components/*/*.py
+
+    3. Integration test files changed
+       - Any file in tests/integration/ directory
+       - This includes test files themselves and fixture YAML files
+       - Examples: tests/integration/test_api.py, tests/integration/fixtures/api.yaml
+
+    4. Components used by integration tests (or their dependencies) changed
+       - The function parses all YAML files in tests/integration/fixtures/
+       - Extracts which components are used in integration tests
+       - Recursively finds all dependencies of those components
+       - If any of these components have changes, tests must run
+       - Example: If api.yaml uses 'sensor' and 'api' components, and 'api' depends on 'socket',
+         then changes to sensor/, api/, or socket/ components trigger tests
+
+    Args:
+        branch: Branch to compare against. If None, uses default.
+
+    Returns:
+        True if integration tests should run, False otherwise.
+    """
+    files = changed_files(branch)
+
+    # Check if any core files changed (esphome/core/*)
+    for file in files:
+        if file.startswith("esphome/core/"):
+            return True
+
+    # Check if any integration test files changed
+    if any("tests/integration" in file for file in files):
+        return True
+
+    # Get all components used in integration tests and their dependencies
+    fixture_components = get_components_from_integration_fixtures()
+    all_required_components = get_all_dependencies(fixture_components)
+
+    # Check if any required components changed
+    for file in files:
+        if file.startswith(ESPHOME_COMPONENTS_PATH):
+            parts = file.split("/")
+            if len(parts) >= 3:
+                component = parts[2]
+                if component in all_required_components:
+                    return True
+
+    return False
+
+
+def should_run_clang_tidy(branch: str | None = None) -> bool:
+    """Determine if clang-tidy should run based on changed files.
+
+    This function is used by the CI workflow to intelligently skip clang-tidy checks when they're
+    not needed, saving significant CI time and resources.
+
+    Clang-tidy will run when ANY of the following conditions are met:
+
+    1. Clang-tidy configuration changed
+       - The hash of .clang-tidy configuration file has changed
+       - The hash includes the .clang-tidy file, clang-tidy version from requirements_dev.txt,
+         and relevant platformio.ini sections
+       - When configuration changes, a full scan is needed to ensure all code complies
+         with the new rules
+       - Detected by script/clang_tidy_hash.py --check returning exit code 0
+
+    2. Any C++ source files changed
+       - Any file with C++ extensions: .cpp, .h, .hpp, .cc, .cxx, .c, .tcc
+       - Includes files anywhere in the repository, not just in esphome/
+       - This ensures all C++ code is checked, including tests, examples, etc.
+       - Examples: esphome/core/component.cpp, tests/custom/my_component.h
+
+    If the hash check fails for any reason, clang-tidy runs as a safety measure to ensure
+    code quality is maintained.
+
+    Args:
+        branch: Branch to compare against. If None, uses default.
+
+    Returns:
+        True if clang-tidy should run, False otherwise.
+    """
+    # First check if clang-tidy configuration changed (full scan needed)
+    try:
+        result = subprocess.run(
+            [os.path.join(root_path, "script", "clang_tidy_hash.py"), "--check"],
+            capture_output=True,
+            check=False,
+        )
+        # Exit 0 means hash changed (full scan needed)
+        if result.returncode == 0:
+            return True
+    except Exception:
+        # If hash check fails, run clang-tidy to be safe
+        return True
+
+    return _any_changed_file_endswith(branch, CPP_FILE_EXTENSIONS)
+
+
+def should_run_clang_format(branch: str | None = None) -> bool:
+    """Determine if clang-format should run based on changed files.
+
+    This function is used by the CI workflow to skip clang-format checks when no C++ files
+    have changed, saving CI time and resources.
+
+    Clang-format will run when any C++ source files have changed.
+
+    Args:
+        branch: Branch to compare against. If None, uses default.
+
+    Returns:
+        True if clang-format should run, False otherwise.
+    """
+    return _any_changed_file_endswith(branch, CPP_FILE_EXTENSIONS)
+
+
+def should_run_python_linters(branch: str | None = None) -> bool:
+    """Determine if Python linters (ruff, flake8, pylint, pyupgrade) should run based on changed files.
+
+    This function is used by the CI workflow to skip Python linting checks when no Python files
+    have changed, saving CI time and resources.
+
+    Python linters will run when any Python source files have changed.
+
+    Args:
+        branch: Branch to compare against. If None, uses default.
+
+    Returns:
+        True if Python linters should run, False otherwise.
+    """
+    return _any_changed_file_endswith(branch, PYTHON_FILE_EXTENSIONS)
+
+
+def _any_changed_file_endswith(branch: str | None, extensions: tuple[str, ...]) -> bool:
+    """Check if a changed file ends with any of the specified extensions."""
+    return any(file.endswith(extensions) for file in changed_files(branch))
+
+
+def main() -> None:
+    """Main function that determines which CI jobs to run."""
+    parser = argparse.ArgumentParser(
+        description="Determine which CI jobs should run based on changed files"
+    )
+    parser.add_argument(
+        "-b", "--branch", help="Branch to compare changed files against"
+    )
+    args = parser.parse_args()
+
+    # Determine what should run
+    run_integration = should_run_integration_tests(args.branch)
+    run_clang_tidy = should_run_clang_tidy(args.branch)
+    run_clang_format = should_run_clang_format(args.branch)
+    run_python_linters = should_run_python_linters(args.branch)
+
+    # Get changed components using list-components.py for exact compatibility
+    script_path = Path(__file__).parent / "list-components.py"
+    cmd = [sys.executable, str(script_path), "--changed"]
+    if args.branch:
+        cmd.extend(["-b", args.branch])
+
+    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+    changed_components = parse_list_components_output(result.stdout)
+
+    # Build output
+    output: dict[str, Any] = {
+        "integration_tests": run_integration,
+        "clang_tidy": run_clang_tidy,
+        "clang_format": run_clang_format,
+        "python_linters": run_python_linters,
+        "changed_components": changed_components,
+        "component_test_count": len(changed_components),
+    }
+
+    # Output as JSON
+    print(json.dumps(output))
+
+
+if __name__ == "__main__":
+    main()
--- a/script/helpers.py
+++ b/script/helpers.py
@@ -1,5 +1,6 @@
 from __future__ import annotations

+from functools import cache
 import json
 import os
 import os.path
@@ -7,6 +8,7 @@ from pathlib import Path
 import re
 import subprocess
 import time
+from typing import Any

 import colorama

@@ -15,6 +17,34 @@ basepath = os.path.join(root_path, "esphome")
 temp_folder = os.path.join(root_path, ".temp")
 temp_header_file = os.path.join(temp_folder, "all-include.cpp")

+# C++ file extensions used for clang-tidy and clang-format checks
+CPP_FILE_EXTENSIONS = (".cpp", ".h", ".hpp", ".cc", ".cxx", ".c", ".tcc")
+
+# Python file extensions
+PYTHON_FILE_EXTENSIONS = (".py", ".pyi")
+
+# YAML file extensions
+YAML_FILE_EXTENSIONS = (".yaml", ".yml")
+
+# Component path prefix
+ESPHOME_COMPONENTS_PATH = "esphome/components/"
+
+
+def parse_list_components_output(output: str) -> list[str]:
+    """Parse the output from list-components.py script.
+
+    The script outputs one component name per line.
+
+    Args:
+        output: The stdout from list-components.py
+
+    Returns:
+        List of component names, or empty list if no output
+    """
+    if not output or not output.strip():
+        return []
+    return [c.strip() for c in output.strip().split("\n") if c.strip()]
+

 def styled(color: str | tuple[str, ...], msg: str, reset: bool = True) -> str:
    prefix = "".join(color) if isinstance(color, tuple) else color
@@ -96,6 +126,7 @@ def _get_pr_number_from_github_env() -> str | None:
    return None


+@cache
 def _get_changed_files_github_actions() -> list[str] | None:
    """Get changed files in GitHub Actions environment.

@@ -135,7 +166,7 @@ def changed_files(branch: str | None = None) -> list[str]:
            return github_files

    # Original implementation for local development
-    if branch is None:
+    if not branch:  # Treat None and empty string the same
        branch = "dev"
    check_remotes = ["upstream", "origin"]
    check_remotes.extend(splitlines_no_ends(get_output("git", "remote")))
@@ -183,7 +214,7 @@ def get_changed_components() -> list[str] | None:
    changed = changed_files()
    core_cpp_changed = any(
        f.startswith("esphome/core/")
-        and f.endswith((".cpp", ".h", ".hpp", ".cc", ".cxx", ".c"))
+        and f.endswith(CPP_FILE_EXTENSIONS[:-1])  # Exclude .tcc for core files
        for f in changed
    )
    if core_cpp_changed:
@@ -198,8 +229,7 @@ def get_changed_components() -> list[str] | None:
        result = subprocess.run(
            cmd, capture_output=True, text=True, check=True, close_fds=False
        )
-        components = [c.strip() for c in result.stdout.strip().split("\n") if c.strip()]
-        return components
+        return parse_list_components_output(result.stdout)
    except subprocess.CalledProcessError:
        # If the script fails, fall back to full scan
        print("Could not determine changed components - will run full clang-tidy scan")
@@ -249,7 +279,9 @@ def _filter_changed_ci(files: list[str]) -> list[str]:
        # Action: Check only the specific non-component files that changed
        changed = changed_files()
        files = [
-            f for f in files if f in changed and not f.startswith("esphome/components/")
+            f
+            for f in files
+            if f in changed and not f.startswith(ESPHOME_COMPONENTS_PATH)
        ]
        if not files:
            print("No files changed")
@@ -267,7 +299,7 @@ def _filter_changed_ci(files: list[str]) -> list[str]:
    # because changes in one file can affect other files in the same component.
    filtered_files = []
    for f in files:
-        if f.startswith("esphome/components/"):
+        if f.startswith(ESPHOME_COMPONENTS_PATH):
            # Check if file belongs to any of the changed components
            parts = f.split("/")
            if len(parts) >= 3 and parts[2] in component_set:
@@ -326,7 +358,7 @@ def git_ls_files(patterns: list[str] | None = None) -> dict[str, int]:
    return {s[3].strip(): int(s[0]) for s in lines}


-def load_idedata(environment):
+def load_idedata(environment: str) -> dict[str, Any]:
    start_time = time.time()
    print(f"Loading IDE data for environment '{environment}'...")

@@ -442,3 +474,83 @@ def get_usable_cpu_count() -> int:
    return (
        os.process_cpu_count() if hasattr(os, "process_cpu_count") else os.cpu_count()
    )
+
+
+def get_all_dependencies(component_names: set[str]) -> set[str]:
+    """Get all dependencies for a set of components.
+
+    Args:
+        component_names: Set of component names to get dependencies for
+
+    Returns:
+        Set of all components including dependencies and auto-loaded components
+    """
+    from esphome.const import KEY_CORE
+    from esphome.core import CORE
+    from esphome.loader import get_component
+
+    all_components: set[str] = set(component_names)
+
+    # Reset CORE to ensure clean state
+    CORE.reset()
+
+    # Set up fake config path for component loading
+    root = Path(__file__).parent.parent
+    CORE.config_path = str(root)
+    CORE.data[KEY_CORE] = {}
+
+    # Keep finding dependencies until no new ones are found
+    while True:
+        new_components: set[str] = set()
+
+        for comp_name in all_components:
+            comp = get_component(comp_name)
+            if not comp:
+                continue
+
+            # Add dependencies (extract component name before '.')
+            new_components.update(dep.split(".")[0] for dep in comp.dependencies)
+
+            # Add auto_load components
+            new_components.update(comp.auto_load)
+
+        # Check if we found any new components
+        new_components -= all_components
+        if not new_components:
+            break
+
+        all_components.update(new_components)
+
+    return all_components
+
+
+def get_components_from_integration_fixtures() -> set[str]:
+    """Extract all components used in integration test fixtures.
+
+    Returns:
+        Set of component names used in integration test fixtures
+    """
+    import yaml
+
+    components: set[str] = set()
+    fixtures_dir = Path(__file__).parent.parent / "tests" / "integration" / "fixtures"
+
+    for yaml_file in fixtures_dir.glob("*.yaml"):
+        with open(yaml_file) as f:
+            config: dict[str, any] | None = yaml.safe_load(f)
+            if not config:
+                continue
+
+            # Add all top-level component keys
+            components.update(config.keys())
+
+            # Add platform components (e.g., output.template)
+            for value in config.values():
+                if not isinstance(value, list):
+                    continue
+
+                for item in value:
+                    if isinstance(item, dict) and "platform" in item:
+                        components.add(item["platform"])
+
+    return components
--- a/script/list-components.py
+++ b/script/list-components.py
@@ -20,6 +20,12 @@ def filter_component_files(str):
    return str.startswith("esphome/components/") | str.startswith("tests/components/")


+def get_all_component_files() -> list[str]:
+    """Get all component files from git."""
+    files = git_ls_files()
+    return list(filter(filter_component_files, files))
+
+
 def extract_component_names_array_from_files_array(files):
    components = []
    for file in files:
@@ -165,17 +171,20 @@ def main():
    if args.branch and not args.changed:
        parser.error("--branch requires --changed")

-    files = git_ls_files()
-    files = filter(filter_component_files, files)
-
    if args.changed:
-        if args.branch:
-            changed = changed_files(args.branch)
-        else:
-            changed = changed_files()
+        # When --changed is passed, only get the changed files
+        changed = changed_files(args.branch)
+
        # If any base test file(s) changed, there's no need to filter out components
-        if not any("tests/test_build_components" in file for file in changed):
-            files = [f for f in files if f in changed]
+        if any("tests/test_build_components" in file for file in changed):
+            # Need to get all component files
+            files = get_all_component_files()
+        else:
+            # Only look at changed component files
+            files = [f for f in changed if filter_component_files(f)]
+    else:
+        # Get all component files
+        files = get_all_component_files()

    for c in get_components(files, args.changed):
        print(c)