1
0
mirror of https://github.com/esphome/esphome.git synced 2025-10-24 20:53:48 +01:00

Speed up clang-tidy CI by 80%+ with incremental checking (#9396)

This commit is contained in:
J. Nick Koston
2025-07-09 11:00:44 -10:00
committed by GitHub
parent 0ffc446315
commit 6616567b05
11 changed files with 1774 additions and 53 deletions

View File

@@ -270,7 +270,7 @@ def lint_newline(fname):
return "File contains Windows newline. Please set your editor to Unix newline mode."
@lint_content_check(exclude=["*.svg"])
@lint_content_check(exclude=["*.svg", ".clang-tidy.hash"])
def lint_end_newline(fname, content):
if content and not content.endswith("\n"):
return "File does not end with a newline, please add an empty line at the end of the file."

View File

@@ -22,6 +22,7 @@ from helpers import (
git_ls_files,
load_idedata,
print_error_for_file,
print_file_list,
root_path,
temp_header_file,
)
@@ -218,13 +219,14 @@ def main():
)
args = parser.parse_args()
idedata = load_idedata(args.environment)
options = clang_options(idedata)
files = []
for path in git_ls_files(["*.cpp"]):
files.append(os.path.relpath(path, os.getcwd()))
# Print initial file count if it's large
if len(files) > 50:
print(f"Found {len(files)} total files to process")
if args.files:
# Match against files specified on command-line
file_name_re = re.compile("|".join(args.files))
@@ -240,10 +242,28 @@ def main():
if args.split_num:
files = split_list(files, args.split_num)[args.split_at - 1]
print(f"Split {args.split_at}/{args.split_num}: checking {len(files)} files")
# Print file count before adding header file
print(f"\nTotal files to check: {len(files)}")
# Early exit if no files to check
if not files:
print("No files to check - exiting early")
return 0
# Only build header file if we have actual files to check
if args.all_headers and args.split_at in (None, 1):
build_all_include()
files.insert(0, temp_header_file)
print(f"Added all-include header file, new total: {len(files)}")
# Print final file list before loading idedata
print_file_list(files, "Final files to process:")
# Load idedata and options only if we have files to check
idedata = load_idedata(args.environment)
options = clang_options(idedata)
tmpdir = None
if args.fix:

188
script/clang_tidy_hash.py Executable file
View File

@@ -0,0 +1,188 @@
#!/usr/bin/env python3
"""Calculate and manage hash for clang-tidy configuration."""
from __future__ import annotations
import argparse
import hashlib
from pathlib import Path
import re
import sys
# Add the script directory to path to import helpers
script_dir = Path(__file__).parent
sys.path.insert(0, str(script_dir))
def read_file_lines(path: Path) -> list[str]:
"""Read lines from a file."""
with open(path) as f:
return f.readlines()
def parse_requirement_line(line: str) -> tuple[str, str] | None:
"""Parse a requirement line and return (package, original_line) or None.
Handles formats like:
- package==1.2.3
- package==1.2.3 # comment
- package>=1.2.3,<2.0.0
"""
original_line = line.strip()
# Extract the part before any comment for parsing
parse_line = line
if "#" in parse_line:
parse_line = parse_line[: parse_line.index("#")]
parse_line = parse_line.strip()
if not parse_line:
return None
# Use regex to extract package name
# This matches package names followed by version operators
match = re.match(r"^([a-zA-Z0-9_-]+)(==|>=|<=|>|<|!=|~=)(.+)$", parse_line)
if match:
return (match.group(1), original_line) # Return package name and original line
return None
def get_clang_tidy_version_from_requirements() -> str:
"""Get clang-tidy version from requirements_dev.txt"""
requirements_path = Path(__file__).parent.parent / "requirements_dev.txt"
lines = read_file_lines(requirements_path)
for line in lines:
parsed = parse_requirement_line(line)
if parsed and parsed[0] == "clang-tidy":
# Return the original line (preserves comments)
return parsed[1]
return "clang-tidy version not found"
def extract_platformio_flags() -> str:
"""Extract clang-tidy related flags from platformio.ini"""
flags: list[str] = []
in_clangtidy_section = False
platformio_path = Path(__file__).parent.parent / "platformio.ini"
lines = read_file_lines(platformio_path)
for line in lines:
line = line.strip()
if line.startswith("[flags:clangtidy]"):
in_clangtidy_section = True
continue
elif line.startswith("[") and in_clangtidy_section:
break
elif in_clangtidy_section and line and not line.startswith("#"):
flags.append(line)
return "\n".join(sorted(flags))
def read_file_bytes(path: Path) -> bytes:
"""Read bytes from a file."""
with open(path, "rb") as f:
return f.read()
def calculate_clang_tidy_hash() -> str:
"""Calculate hash of clang-tidy configuration and version"""
hasher = hashlib.sha256()
# Hash .clang-tidy file
clang_tidy_path = Path(__file__).parent.parent / ".clang-tidy"
content = read_file_bytes(clang_tidy_path)
hasher.update(content)
# Hash clang-tidy version from requirements_dev.txt
version = get_clang_tidy_version_from_requirements()
hasher.update(version.encode())
# Hash relevant platformio.ini sections
pio_flags = extract_platformio_flags()
hasher.update(pio_flags.encode())
return hasher.hexdigest()
def read_stored_hash() -> str | None:
"""Read the stored hash from file"""
hash_file = Path(__file__).parent.parent / ".clang-tidy.hash"
if hash_file.exists():
lines = read_file_lines(hash_file)
return lines[0].strip() if lines else None
return None
def write_file_content(path: Path, content: str) -> None:
"""Write content to a file."""
with open(path, "w") as f:
f.write(content)
def write_hash(hash_value: str) -> None:
"""Write hash to file"""
hash_file = Path(__file__).parent.parent / ".clang-tidy.hash"
write_file_content(hash_file, hash_value)
def main() -> None:
parser = argparse.ArgumentParser(description="Manage clang-tidy configuration hash")
parser.add_argument(
"--check",
action="store_true",
help="Check if full scan needed (exit 0 if needed)",
)
parser.add_argument("--update", action="store_true", help="Update the hash file")
parser.add_argument(
"--update-if-changed",
action="store_true",
help="Update hash only if configuration changed (for pre-commit)",
)
parser.add_argument(
"--verify", action="store_true", help="Verify hash matches (for CI)"
)
args = parser.parse_args()
current_hash = calculate_clang_tidy_hash()
stored_hash = read_stored_hash()
if args.check:
# Exit 0 if full scan needed (hash changed or no hash file)
sys.exit(0 if current_hash != stored_hash else 1)
elif args.update:
write_hash(current_hash)
print(f"Hash updated: {current_hash}")
elif args.update_if_changed:
if current_hash != stored_hash:
write_hash(current_hash)
print(f"Clang-tidy hash updated: {current_hash}")
# Exit 0 so pre-commit can stage the file
sys.exit(0)
else:
print("Clang-tidy hash unchanged")
sys.exit(0)
elif args.verify:
if current_hash != stored_hash:
print("ERROR: Clang-tidy configuration has changed but hash not updated!")
print(f"Expected: {current_hash}")
print(f"Found: {stored_hash}")
print("\nPlease run: script/clang_tidy_hash.py --update")
sys.exit(1)
print("Hash verification passed")
else:
print(f"Current hash: {current_hash}")
print(f"Stored hash: {stored_hash}")
print(f"Match: {current_hash == stored_hash}")
if __name__ == "__main__":
main()

View File

@@ -1,8 +1,12 @@
from __future__ import annotations
import json
import os
import os.path
from pathlib import Path
import re
import subprocess
import time
import colorama
@@ -12,13 +16,13 @@ temp_folder = os.path.join(root_path, ".temp")
temp_header_file = os.path.join(temp_folder, "all-include.cpp")
def styled(color, msg, reset=True):
def styled(color: str | tuple[str, ...], msg: str, reset: bool = True) -> str:
prefix = "".join(color) if isinstance(color, tuple) else color
suffix = colorama.Style.RESET_ALL if reset else ""
return prefix + msg + suffix
def print_error_for_file(file, body):
def print_error_for_file(file: str, body: str | None) -> None:
print(
styled(colorama.Fore.GREEN, "### File ")
+ styled((colorama.Fore.GREEN, colorama.Style.BRIGHT), file)
@@ -29,17 +33,22 @@ def print_error_for_file(file, body):
print()
def build_all_include():
def build_all_include() -> None:
# Build a cpp file that includes all header files in this repo.
# Otherwise header-only integrations would not be tested by clang-tidy
headers = []
for path in walk_files(basepath):
filetypes = (".h",)
ext = os.path.splitext(path)[1]
if ext in filetypes:
path = os.path.relpath(path, root_path)
include_p = path.replace(os.path.sep, "/")
headers.append(f'#include "{include_p}"')
# Use git ls-files to find all .h files in the esphome directory
# This is much faster than walking the filesystem
cmd = ["git", "ls-files", "esphome/**/*.h"]
proc = subprocess.run(cmd, capture_output=True, text=True, check=True)
# Process git output - git already returns paths relative to repo root
headers = [
f'#include "{include_p}"'
for line in proc.stdout.strip().split("\n")
if (include_p := line.replace(os.path.sep, "/"))
]
headers.sort()
headers.append("")
content = "\n".join(headers)
@@ -48,29 +57,86 @@ def build_all_include():
p.write_text(content, encoding="utf-8")
def walk_files(path):
for root, _, files in os.walk(path):
for name in files:
yield os.path.join(root, name)
def get_output(*args):
def get_output(*args: str) -> str:
with subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as proc:
output, _ = proc.communicate()
return output.decode("utf-8")
def get_err(*args):
def get_err(*args: str) -> str:
with subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as proc:
_, err = proc.communicate()
return err.decode("utf-8")
def splitlines_no_ends(string):
def splitlines_no_ends(string: str) -> list[str]:
return [s.strip() for s in string.splitlines()]
def changed_files(branch="dev"):
def _get_pr_number_from_github_env() -> str | None:
"""Extract PR number from GitHub environment variables.
Returns:
PR number as string, or None if not found
"""
# First try parsing GITHUB_REF (fastest)
github_ref = os.environ.get("GITHUB_REF", "")
if "/pull/" in github_ref:
return github_ref.split("/pull/")[1].split("/")[0]
# Fallback to GitHub event file
github_event_path = os.environ.get("GITHUB_EVENT_PATH")
if github_event_path and os.path.exists(github_event_path):
with open(github_event_path) as f:
event_data = json.load(f)
pr_data = event_data.get("pull_request", {})
if pr_number := pr_data.get("number"):
return str(pr_number)
return None
def _get_changed_files_github_actions() -> list[str] | None:
"""Get changed files in GitHub Actions environment.
Returns:
List of changed files, or None if should fall back to git method
"""
event_name = os.environ.get("GITHUB_EVENT_NAME")
# For pull requests
if event_name == "pull_request":
pr_number = _get_pr_number_from_github_env()
if pr_number:
# Use GitHub CLI to get changed files directly
cmd = ["gh", "pr", "diff", pr_number, "--name-only"]
return _get_changed_files_from_command(cmd)
# For pushes (including squash-and-merge)
elif event_name == "push":
# For push events, we want to check what changed in this commit
try:
# Get the changed files in the last commit
return _get_changed_files_from_command(
["git", "diff", "HEAD~1..HEAD", "--name-only"]
)
except: # noqa: E722
# Fall back to the original method if this fails
pass
return None
def changed_files(branch: str | None = None) -> list[str]:
# In GitHub Actions, we can use the API to get changed files more efficiently
if os.environ.get("GITHUB_ACTIONS") == "true":
github_files = _get_changed_files_github_actions()
if github_files is not None:
return github_files
# Original implementation for local development
if branch is None:
branch = "dev"
check_remotes = ["upstream", "origin"]
check_remotes.extend(splitlines_no_ends(get_output("git", "remote")))
for remote in check_remotes:
@@ -83,25 +149,159 @@ def changed_files(branch="dev"):
pass
else:
raise ValueError("Git not configured")
command = ["git", "diff", merge_base, "--name-only"]
changed = splitlines_no_ends(get_output(*command))
changed = [os.path.relpath(f, os.getcwd()) for f in changed]
changed.sort()
return changed
return _get_changed_files_from_command(["git", "diff", merge_base, "--name-only"])
def filter_changed(files):
def _get_changed_files_from_command(command: list[str]) -> list[str]:
"""Run a git command to get changed files and return them as a list."""
proc = subprocess.run(command, capture_output=True, text=True, check=False)
if proc.returncode != 0:
raise Exception(f"Command failed: {' '.join(command)}\nstderr: {proc.stderr}")
changed_files = splitlines_no_ends(proc.stdout)
changed_files = [os.path.relpath(f, os.getcwd()) for f in changed_files if f]
changed_files.sort()
return changed_files
def get_changed_components() -> list[str] | None:
"""Get list of changed components using list-components.py script.
This function:
1. First checks if any core files (esphome/core/*) changed - if so, returns None
2. Otherwise delegates to ./script/list-components.py --changed which:
- Analyzes all changed files
- Determines which components are affected (including dependencies)
- Returns a list of component names that need to be checked
Returns:
- None: Core files changed, need full scan
- Empty list: No components changed (only non-component files changed)
- List of strings: Names of components that need checking (e.g., ["wifi", "mqtt"])
"""
# Check if any core files changed first
changed = changed_files()
files = [f for f in files if f in changed]
print("Changed files:")
if not files:
print(" No changed files!")
for c in files:
print(f" {c}")
if any(f.startswith("esphome/core/") for f in changed):
print("Core files changed - will run full clang-tidy scan")
return None
# Use list-components.py to get changed components
script_path = os.path.join(root_path, "script", "list-components.py")
cmd = [script_path, "--changed"]
try:
result = subprocess.run(
cmd, capture_output=True, text=True, check=True, close_fds=False
)
components = [c.strip() for c in result.stdout.strip().split("\n") if c.strip()]
return components
except subprocess.CalledProcessError:
# If the script fails, fall back to full scan
print("Could not determine changed components - will run full clang-tidy scan")
return None
def _filter_changed_ci(files: list[str]) -> list[str]:
"""Filter files based on changed components in CI environment.
This function implements intelligent filtering to reduce CI runtime by only
checking files that could be affected by the changes. It handles three scenarios:
1. Core files changed (returns None from get_changed_components):
- Triggered when any file in esphome/core/ is modified
- Action: Check ALL files (full scan)
- Reason: Core files are used throughout the codebase
2. No components changed (returns empty list from get_changed_components):
- Triggered when only non-component files changed (e.g., scripts, configs)
- Action: Check only the specific non-component files that changed
- Example: If only script/clang-tidy changed, only check that file
3. Specific components changed (returns list of component names):
- Component detection done by: ./script/list-components.py --changed
- That script analyzes which components are affected by the changed files
INCLUDING their dependencies
- Action: Check ALL files in each component that list-components.py identifies
- Example: If wifi.cpp changed, list-components.py might return ["wifi", "network"]
if network depends on wifi. We then check ALL files in both
esphome/components/wifi/ and esphome/components/network/
- Reason: Component files often have interdependencies (headers, base classes)
Args:
files: List of all files that clang-tidy would normally check
Returns:
Filtered list of files to check
"""
components = get_changed_components()
if components is None:
# Scenario 1: Core files changed or couldn't determine components
# Action: Return all files for full scan
return files
if not components:
# Scenario 2: No components changed - only non-component files changed
# Action: Check only the specific non-component files that changed
changed = changed_files()
files = [
f for f in files if f in changed and not f.startswith("esphome/components/")
]
if not files:
print("No files changed")
return files
# Scenario 3: Specific components changed
# Action: Check ALL files in each changed component
# Convert component list to set for O(1) lookups
component_set = set(components)
print(f"Changed components: {', '.join(sorted(components))}")
# The 'files' parameter contains ALL files in the codebase that clang-tidy would check.
# We filter this down to only files in the changed components.
# We check ALL files in each changed component (not just the changed files)
# because changes in one file can affect other files in the same component.
filtered_files = []
for f in files:
if f.startswith("esphome/components/"):
# Check if file belongs to any of the changed components
parts = f.split("/")
if len(parts) >= 3 and parts[2] in component_set:
filtered_files.append(f)
return filtered_files
def _filter_changed_local(files: list[str]) -> list[str]:
"""Filter files based on git changes for local development.
Args:
files: List of all files to filter
Returns:
Filtered list of files to check
"""
# For local development, just check changed files directly
changed = changed_files()
return [f for f in files if f in changed]
def filter_changed(files: list[str]) -> list[str]:
"""Filter files to only those that changed or are in changed components.
Args:
files: List of files to filter
"""
# When running from CI, use component-based filtering
if os.environ.get("GITHUB_ACTIONS") == "true":
files = _filter_changed_ci(files)
else:
files = _filter_changed_local(files)
print_file_list(files, "Files to check after filtering:")
return files
def filter_grep(files, value):
def filter_grep(files: list[str], value: str) -> list[str]:
matched = []
for file in files:
with open(file, encoding="utf-8") as handle:
@@ -111,7 +311,7 @@ def filter_grep(files, value):
return matched
def git_ls_files(patterns=None):
def git_ls_files(patterns: list[str] | None = None) -> dict[str, int]:
command = ["git", "ls-files", "-s"]
if patterns is not None:
command.extend(patterns)
@@ -122,6 +322,9 @@ def git_ls_files(patterns=None):
def load_idedata(environment):
start_time = time.time()
print(f"Loading IDE data for environment '{environment}'...")
platformio_ini = Path(root_path) / "platformio.ini"
temp_idedata = Path(temp_folder) / f"idedata-{environment}.json"
changed = False
@@ -142,7 +345,10 @@ def load_idedata(environment):
changed = True
if not changed:
return json.loads(temp_idedata.read_text())
data = json.loads(temp_idedata.read_text())
elapsed = time.time() - start_time
print(f"IDE data loaded from cache in {elapsed:.2f} seconds")
return data
# ensure temp directory exists before running pio, as it writes sdkconfig to it
Path(temp_folder).mkdir(exist_ok=True)
@@ -158,6 +364,9 @@ def load_idedata(environment):
match = re.search(r'{\s*".*}', stdout.decode("utf-8"))
data = json.loads(match.group())
temp_idedata.write_text(json.dumps(data, indent=2) + "\n")
elapsed = time.time() - start_time
print(f"IDE data generated and cached in {elapsed:.2f} seconds")
return data
@@ -196,6 +405,29 @@ def get_binary(name: str, version: str) -> str:
raise
def print_file_list(
files: list[str], title: str = "Files:", max_files: int = 20
) -> None:
"""Print a list of files with optional truncation for large lists.
Args:
files: List of file paths to print
title: Title to print before the list
max_files: Maximum number of files to show before truncating (default: 20)
"""
print(title)
if not files:
print(" No files to check!")
elif len(files) <= max_files:
for f in sorted(files):
print(f" {f}")
else:
sorted_files = sorted(files)
for f in sorted_files[:10]:
print(f" {f}")
print(f" ... and {len(files) - 10} more files")
def get_usable_cpu_count() -> int:
"""Return the number of CPUs that can be used for processes.