From e0b02ccfc5f6a9cb9abd209262fb5e69906da474 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 29 Oct 2025 18:43:49 -0500 Subject: [PATCH] [ci] Consolidate component splitting into determine-jobs --- .github/workflows/ci.yml | 52 +++-------------------------- script/determine-jobs.py | 22 ++++++++++++ script/split_components_for_ci.py | 4 +++ tests/script/test_determine_jobs.py | 11 ++++++ 4 files changed, 42 insertions(+), 47 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bd45adb78b..c0b595b2dd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -536,59 +536,18 @@ jobs: run: script/ci-suggest-changes if: always() - test-build-components-splitter: - name: Split components for intelligent grouping (40 weighted per batch) - runs-on: ubuntu-24.04 - needs: - - common - - determine-jobs - if: github.event_name == 'pull_request' && fromJSON(needs.determine-jobs.outputs.component-test-count) > 0 - outputs: - matrix: ${{ steps.split.outputs.components }} - steps: - - name: Check out code from GitHub - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - - name: Restore Python - uses: ./.github/actions/restore-python - with: - python-version: ${{ env.DEFAULT_PYTHON }} - cache-key: ${{ needs.common.outputs.cache-key }} - - name: Split components intelligently based on bus configurations - id: split - run: | - . venv/bin/activate - - # Use intelligent splitter that groups components with same bus configs - components='${{ needs.determine-jobs.outputs.changed-components-with-tests }}' - - # Only isolate directly changed components when targeting dev branch - # For beta/release branches, group everything for faster CI - if [[ "${{ github.base_ref }}" == beta* ]] || [[ "${{ github.base_ref }}" == release* ]]; then - directly_changed='[]' - echo "Target branch: ${{ github.base_ref }} - grouping all components" - else - directly_changed='${{ needs.determine-jobs.outputs.directly-changed-components-with-tests }}' - echo "Target branch: ${{ github.base_ref }} - isolating directly changed components" - fi - - echo "Splitting components intelligently..." - output=$(python3 script/split_components_for_ci.py --components "$components" --directly-changed "$directly_changed" --batch-size 40 --output github) - - echo "$output" >> $GITHUB_OUTPUT - test-build-components-split: - name: Test components batch (${{ matrix.components }}) + name: Test components batch (${{ join(matrix.components, ' ') }}) runs-on: ubuntu-24.04 needs: - common - determine-jobs - - test-build-components-splitter if: github.event_name == 'pull_request' && fromJSON(needs.determine-jobs.outputs.component-test-count) > 0 strategy: fail-fast: false max-parallel: ${{ (startsWith(github.base_ref, 'beta') || startsWith(github.base_ref, 'release')) && 8 || 4 }} matrix: - components: ${{ fromJson(needs.test-build-components-splitter.outputs.matrix) }} + components: ${{ fromJson(needs.determine-jobs.outputs.component-test-batches) }} steps: - name: Show disk space run: | @@ -596,7 +555,7 @@ jobs: df -h - name: List components - run: echo ${{ matrix.components }} + run: echo ${{ join(matrix.components, ' ') }} - name: Cache apt packages uses: awalsh128/cache-apt-pkgs-action@acb598e5ddbc6f68a970c5da0688d2f3a9f04d05 # v1.5.3 @@ -640,8 +599,8 @@ jobs: echo "Using / for build files (more space available than /mnt or /mnt unavailable)" fi - # Convert space-separated components to comma-separated for Python script - components_csv=$(echo "${{ matrix.components }}" | tr ' ' ',') + # Convert JSON array to comma-separated for Python script + components_csv=$(echo '${{ toJson(matrix.components) }}' | jq -r 'join(",")') # Only isolate directly changed components when targeting dev branch # For beta/release branches, group everything for faster CI @@ -980,7 +939,6 @@ jobs: - clang-tidy-nosplit - clang-tidy-split - determine-jobs - - test-build-components-splitter - test-build-components-split - pre-commit-ci-lite - memory-impact-target-branch diff --git a/script/determine-jobs.py b/script/determine-jobs.py index 21eb529f33..4324431ef5 100755 --- a/script/determine-jobs.py +++ b/script/determine-jobs.py @@ -43,12 +43,14 @@ from enum import StrEnum from functools import cache import json import os +from pathlib import Path import subprocess import sys from typing import Any from helpers import ( CPP_FILE_EXTENSIONS, + ESPHOME_TESTS_COMPONENTS_PATH, PYTHON_FILE_EXTENSIONS, changed_files, core_changed, @@ -65,12 +67,17 @@ from helpers import ( parse_test_filename, root_path, ) +from split_components_for_ci import create_intelligent_batches # Threshold for splitting clang-tidy jobs # For small PRs (< 65 files), use nosplit for faster CI # For large PRs (>= 65 files), use split for better parallelization CLANG_TIDY_SPLIT_THRESHOLD = 65 +# Component test batch size (weighted) +# Isolated components count as 10x, groupable components count as 1x +COMPONENT_TEST_BATCH_SIZE = 40 + class Platform(StrEnum): """Platform identifiers for memory impact analysis.""" @@ -686,6 +693,20 @@ def main() -> None: # Determine which C++ unit tests to run cpp_run_all, cpp_components = determine_cpp_unit_tests(args.branch) + # Split components into batches for CI testing + # This intelligently groups components with similar bus configurations + component_test_batches: list[list[str]] + if changed_components_with_tests: + tests_dir = Path(root_path) / ESPHOME_TESTS_COMPONENTS_PATH + component_test_batches, _ = create_intelligent_batches( + components=changed_components_with_tests, + tests_dir=tests_dir, + batch_size=COMPONENT_TEST_BATCH_SIZE, + directly_changed=directly_changed_with_tests, + ) + else: + component_test_batches = [] + output: dict[str, Any] = { "integration_tests": run_integration, "clang_tidy": run_clang_tidy, @@ -703,6 +724,7 @@ def main() -> None: "memory_impact": memory_impact, "cpp_unit_tests_run_all": cpp_run_all, "cpp_unit_tests_components": cpp_components, + "component_test_batches": component_test_batches, } # Output as JSON diff --git a/script/split_components_for_ci.py b/script/split_components_for_ci.py index 87da540d43..65d09efb9b 100755 --- a/script/split_components_for_ci.py +++ b/script/split_components_for_ci.py @@ -62,6 +62,10 @@ def create_intelligent_batches( ) -> tuple[list[list[str]], dict[tuple[str, str], list[str]]]: """Create batches optimized for component grouping. + IMPORTANT: This function is called from both split_components_for_ci.py (standalone script) + and determine-jobs.py (integrated into job determination). Be careful when refactoring + to ensure changes work in both contexts. + Args: components: List of component names to batch tests_dir: Path to tests/components directory diff --git a/tests/script/test_determine_jobs.py b/tests/script/test_determine_jobs.py index c8ef76184f..ad658cd53c 100644 --- a/tests/script/test_determine_jobs.py +++ b/tests/script/test_determine_jobs.py @@ -152,6 +152,14 @@ def test_main_all_tests_should_run( assert output["memory_impact"]["should_run"] == "false" assert output["cpp_unit_tests_run_all"] is False assert output["cpp_unit_tests_components"] == ["wifi", "api", "sensor"] + # component_test_batches should be present and be a list of lists + assert "component_test_batches" in output + assert isinstance(output["component_test_batches"], list) + # Each batch should be a list of component names + for batch in output["component_test_batches"]: + assert isinstance(batch, list) + for component in batch: + assert isinstance(component, str) def test_main_no_tests_should_run( @@ -209,6 +217,9 @@ def test_main_no_tests_should_run( assert output["memory_impact"]["should_run"] == "false" assert output["cpp_unit_tests_run_all"] is False assert output["cpp_unit_tests_components"] == [] + # component_test_batches should be empty list + assert "component_test_batches" in output + assert output["component_test_batches"] == [] def test_main_with_branch_argument(