From 862bbb7fe158ea44d519c8c705b444f928d62ed4 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Oct 2025 13:09:09 -1000 Subject: [PATCH] [ci] Fix memory impact analysis failing on fork PRs (#11380) Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: pre-commit-ci-lite[bot] <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> --- .../workflows/ci-memory-impact-comment.yml | 108 +++++++++ .github/workflows/ci.yml | 65 ++---- script/ci_add_metadata_to_json.py | 88 ++++++++ script/ci_memory_impact_comment.py | 207 ++++++++++++------ 4 files changed, 358 insertions(+), 110 deletions(-) create mode 100644 .github/workflows/ci-memory-impact-comment.yml create mode 100755 script/ci_add_metadata_to_json.py diff --git a/.github/workflows/ci-memory-impact-comment.yml b/.github/workflows/ci-memory-impact-comment.yml new file mode 100644 index 0000000000..4ce7abfb85 --- /dev/null +++ b/.github/workflows/ci-memory-impact-comment.yml @@ -0,0 +1,108 @@ +--- +name: Memory Impact Comment (Forks) + +on: + workflow_run: + workflows: ["CI"] + types: [completed] + +permissions: + contents: read + pull-requests: write + actions: read + +jobs: + memory-impact-comment: + name: Post memory impact comment (fork PRs only) + runs-on: ubuntu-24.04 + # Only run for PRs from forks that had successful CI runs + if: > + github.event.workflow_run.event == 'pull_request' && + github.event.workflow_run.conclusion == 'success' && + github.event.workflow_run.head_repository.full_name != github.repository + env: + GH_TOKEN: ${{ github.token }} + steps: + - name: Get PR details + id: pr + run: | + # Get PR details by searching for PR with matching head SHA + # The workflow_run.pull_requests field is often empty for forks + head_sha="${{ github.event.workflow_run.head_sha }}" + pr_data=$(gh api "/repos/${{ github.repository }}/commits/$head_sha/pulls" \ + --jq '.[0] | {number: .number, base_ref: .base.ref}') + if [ -z "$pr_data" ] || [ "$pr_data" == "null" ]; then + echo "No PR found for SHA $head_sha, skipping" + echo "skip=true" >> $GITHUB_OUTPUT + exit 0 + fi + + pr_number=$(echo "$pr_data" | jq -r '.number') + base_ref=$(echo "$pr_data" | jq -r '.base_ref') + + echo "pr_number=$pr_number" >> $GITHUB_OUTPUT + echo "base_ref=$base_ref" >> $GITHUB_OUTPUT + echo "Found PR #$pr_number targeting base branch: $base_ref" + + - name: Check out code from base repository + if: steps.pr.outputs.skip != 'true' + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + # Always check out from the base repository (esphome/esphome), never from forks + # Use the PR's target branch to ensure we run trusted code from the main repo + repository: ${{ github.repository }} + ref: ${{ steps.pr.outputs.base_ref }} + + - name: Restore Python + if: steps.pr.outputs.skip != 'true' + uses: ./.github/actions/restore-python + with: + python-version: "3.11" + cache-key: ${{ hashFiles('.cache-key') }} + + - name: Download memory analysis artifacts + if: steps.pr.outputs.skip != 'true' + run: | + run_id="${{ github.event.workflow_run.id }}" + echo "Downloading artifacts from workflow run $run_id" + + mkdir -p memory-analysis + + # Download target analysis artifact + if gh run download --name "memory-analysis-target" --dir memory-analysis --repo "${{ github.repository }}" "$run_id"; then + echo "Downloaded memory-analysis-target artifact." + else + echo "No memory-analysis-target artifact found." + fi + + # Download PR analysis artifact + if gh run download --name "memory-analysis-pr" --dir memory-analysis --repo "${{ github.repository }}" "$run_id"; then + echo "Downloaded memory-analysis-pr artifact." + else + echo "No memory-analysis-pr artifact found." + fi + + - name: Check if artifacts exist + id: check + if: steps.pr.outputs.skip != 'true' + run: | + if [ -f ./memory-analysis/memory-analysis-target.json ] && [ -f ./memory-analysis/memory-analysis-pr.json ]; then + echo "found=true" >> $GITHUB_OUTPUT + else + echo "found=false" >> $GITHUB_OUTPUT + echo "Memory analysis artifacts not found, skipping comment" + fi + + - name: Post or update PR comment + if: steps.pr.outputs.skip != 'true' && steps.check.outputs.found == 'true' + env: + PR_NUMBER: ${{ steps.pr.outputs.pr_number }} + run: | + . venv/bin/activate + # Pass PR number and JSON file paths directly to Python script + # Let Python parse the JSON to avoid shell injection risks + # The script will validate and sanitize all inputs + python script/ci_memory_impact_comment.py \ + --pr-number "$PR_NUMBER" \ + --target-json ./memory-analysis/memory-analysis-target.json \ + --pr-json ./memory-analysis/memory-analysis-pr.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 42f934de9d..6f96f2ac14 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -641,6 +641,12 @@ jobs: --output-env \ --output-json memory-analysis-target.json + # Add metadata to JSON before caching + python script/ci_add_metadata_to_json.py \ + --json-file memory-analysis-target.json \ + --components "$components" \ + --platform "$platform" + - name: Save memory analysis to cache if: steps.check-script.outputs.skip != 'true' && steps.cache-memory-analysis.outputs.cache-hit != 'true' && steps.build.outcome == 'success' uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 @@ -720,6 +726,13 @@ jobs: python script/ci_memory_impact_extract.py \ --output-env \ --output-json memory-analysis-pr.json + + # Add metadata to JSON (components and platform are in shell variables above) + python script/ci_add_metadata_to_json.py \ + --json-file memory-analysis-pr.json \ + --components "$components" \ + --platform "$platform" + - name: Upload memory analysis JSON uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: @@ -736,10 +749,12 @@ jobs: - determine-jobs - memory-impact-target-branch - memory-impact-pr-branch - if: github.event_name == 'pull_request' && fromJSON(needs.determine-jobs.outputs.memory_impact).should_run == 'true' && needs.memory-impact-target-branch.outputs.skip != 'true' + if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository && fromJSON(needs.determine-jobs.outputs.memory_impact).should_run == 'true' && needs.memory-impact-target-branch.outputs.skip != 'true' permissions: contents: read pull-requests: write + env: + GH_TOKEN: ${{ github.token }} steps: - name: Check out code uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 @@ -762,52 +777,16 @@ jobs: continue-on-error: true - name: Post or update PR comment env: - GH_TOKEN: ${{ github.token }} - COMPONENTS: ${{ toJSON(fromJSON(needs.determine-jobs.outputs.memory_impact).components) }} - PLATFORM: ${{ fromJSON(needs.determine-jobs.outputs.memory_impact).platform }} - TARGET_RAM: ${{ needs.memory-impact-target-branch.outputs.ram_usage }} - TARGET_FLASH: ${{ needs.memory-impact-target-branch.outputs.flash_usage }} - PR_RAM: ${{ needs.memory-impact-pr-branch.outputs.ram_usage }} - PR_FLASH: ${{ needs.memory-impact-pr-branch.outputs.flash_usage }} - TARGET_CACHE_HIT: ${{ needs.memory-impact-target-branch.outputs.cache_hit }} + PR_NUMBER: ${{ github.event.pull_request.number }} run: | . venv/bin/activate - # Check if analysis JSON files exist - target_json_arg="" - pr_json_arg="" - - if [ -f ./memory-analysis/memory-analysis-target.json ]; then - echo "Found target analysis JSON" - target_json_arg="--target-json ./memory-analysis/memory-analysis-target.json" - else - echo "No target analysis JSON found" - fi - - if [ -f ./memory-analysis/memory-analysis-pr.json ]; then - echo "Found PR analysis JSON" - pr_json_arg="--pr-json ./memory-analysis/memory-analysis-pr.json" - else - echo "No PR analysis JSON found" - fi - - # Add cache flag if target was cached - cache_flag="" - if [ "$TARGET_CACHE_HIT" == "true" ]; then - cache_flag="--target-cache-hit" - fi - + # Pass JSON file paths directly to Python script + # All data is extracted from JSON files for security python script/ci_memory_impact_comment.py \ - --pr-number "${{ github.event.pull_request.number }}" \ - --components "$COMPONENTS" \ - --platform "$PLATFORM" \ - --target-ram "$TARGET_RAM" \ - --target-flash "$TARGET_FLASH" \ - --pr-ram "$PR_RAM" \ - --pr-flash "$PR_FLASH" \ - $target_json_arg \ - $pr_json_arg \ - $cache_flag + --pr-number "$PR_NUMBER" \ + --target-json ./memory-analysis/memory-analysis-target.json \ + --pr-json ./memory-analysis/memory-analysis-pr.json ci-status: name: CI Status diff --git a/script/ci_add_metadata_to_json.py b/script/ci_add_metadata_to_json.py new file mode 100755 index 0000000000..687b5131c0 --- /dev/null +++ b/script/ci_add_metadata_to_json.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +"""Add metadata to memory analysis JSON file. + +This script adds components and platform metadata to an existing +memory analysis JSON file. Used by CI to ensure all required fields are present +for the comment script. +""" + +from __future__ import annotations + +import argparse +import json +from pathlib import Path +import sys + + +def main() -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Add metadata to memory analysis JSON file" + ) + parser.add_argument( + "--json-file", + required=True, + help="Path to JSON file to update", + ) + parser.add_argument( + "--components", + required=True, + help='JSON array of component names (e.g., \'["api", "wifi"]\')', + ) + parser.add_argument( + "--platform", + required=True, + help="Platform name", + ) + + args = parser.parse_args() + + # Load existing JSON + json_path = Path(args.json_file) + if not json_path.exists(): + print(f"Error: JSON file not found: {args.json_file}", file=sys.stderr) + return 1 + + try: + with open(json_path, encoding="utf-8") as f: + data = json.load(f) + except (json.JSONDecodeError, OSError) as e: + print(f"Error loading JSON: {e}", file=sys.stderr) + return 1 + + # Parse components + try: + components = json.loads(args.components) + if not isinstance(components, list): + print("Error: --components must be a JSON array", file=sys.stderr) + return 1 + # Element-level validation: ensure each component is a non-empty string + for idx, comp in enumerate(components): + if not isinstance(comp, str) or not comp.strip(): + print( + f"Error: component at index {idx} is not a non-empty string: {comp!r}", + file=sys.stderr, + ) + return 1 + except json.JSONDecodeError as e: + print(f"Error parsing components: {e}", file=sys.stderr) + return 1 + + # Add metadata + data["components"] = components + data["platform"] = args.platform + + # Write back + try: + with open(json_path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2) + print(f"Added metadata to {args.json_file}", file=sys.stderr) + except OSError as e: + print(f"Error writing JSON: {e}", file=sys.stderr) + return 1 + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/script/ci_memory_impact_comment.py b/script/ci_memory_impact_comment.py index 4e3fbb9086..1331a44d03 100755 --- a/script/ci_memory_impact_comment.py +++ b/script/ci_memory_impact_comment.py @@ -24,6 +24,37 @@ sys.path.insert(0, str(Path(__file__).parent.parent)) # Comment marker to identify our memory impact comments COMMENT_MARKER = "" + +def run_gh_command(args: list[str], operation: str) -> subprocess.CompletedProcess: + """Run a gh CLI command with error handling. + + Args: + args: Command arguments (including 'gh') + operation: Description of the operation for error messages + + Returns: + CompletedProcess result + + Raises: + subprocess.CalledProcessError: If command fails (with detailed error output) + """ + try: + return subprocess.run( + args, + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError as e: + print( + f"ERROR: {operation} failed with exit code {e.returncode}", file=sys.stderr + ) + print(f"ERROR: Command: {' '.join(args)}", file=sys.stderr) + print(f"ERROR: stdout: {e.stdout}", file=sys.stderr) + print(f"ERROR: stderr: {e.stderr}", file=sys.stderr) + raise + + # Thresholds for emoji significance indicators (percentage) OVERALL_CHANGE_THRESHOLD = 1.0 # Overall RAM/Flash changes COMPONENT_CHANGE_THRESHOLD = 3.0 # Component breakdown changes @@ -238,7 +269,6 @@ def create_comment_body( pr_analysis: dict | None = None, target_symbols: dict | None = None, pr_symbols: dict | None = None, - target_cache_hit: bool = False, ) -> str: """Create the comment body with memory impact analysis using Jinja2 templates. @@ -253,7 +283,6 @@ def create_comment_body( pr_analysis: Optional component breakdown for PR branch target_symbols: Optional symbol map for target branch pr_symbols: Optional symbol map for PR branch - target_cache_hit: Whether target branch analysis was loaded from cache Returns: Formatted comment body @@ -283,7 +312,6 @@ def create_comment_body( "flash_change": format_change( target_flash, pr_flash, threshold=OVERALL_CHANGE_THRESHOLD ), - "target_cache_hit": target_cache_hit, "component_change_threshold": COMPONENT_CHANGE_THRESHOLD, } @@ -356,7 +384,7 @@ def find_existing_comment(pr_number: str) -> str | None: print(f"DEBUG: Looking for existing comment on PR #{pr_number}", file=sys.stderr) # Use gh api to get comments directly - this returns the numeric id field - result = subprocess.run( + result = run_gh_command( [ "gh", "api", @@ -364,9 +392,7 @@ def find_existing_comment(pr_number: str) -> str | None: "--jq", ".[] | {id, body}", ], - capture_output=True, - text=True, - check=True, + operation="Get PR comments", ) print( @@ -420,7 +446,8 @@ def update_existing_comment(comment_id: str, comment_body: str) -> None: subprocess.CalledProcessError: If gh command fails """ print(f"DEBUG: Updating existing comment {comment_id}", file=sys.stderr) - result = subprocess.run( + print(f"DEBUG: Comment body length: {len(comment_body)} bytes", file=sys.stderr) + result = run_gh_command( [ "gh", "api", @@ -430,9 +457,7 @@ def update_existing_comment(comment_id: str, comment_body: str) -> None: "-f", f"body={comment_body}", ], - check=True, - capture_output=True, - text=True, + operation="Update PR comment", ) print(f"DEBUG: Update response: {result.stdout}", file=sys.stderr) @@ -448,11 +473,10 @@ def create_new_comment(pr_number: str, comment_body: str) -> None: subprocess.CalledProcessError: If gh command fails """ print(f"DEBUG: Posting new comment on PR #{pr_number}", file=sys.stderr) - result = subprocess.run( + print(f"DEBUG: Comment body length: {len(comment_body)} bytes", file=sys.stderr) + result = run_gh_command( ["gh", "pr", "comment", pr_number, "--body", comment_body], - check=True, - capture_output=True, - text=True, + operation="Create PR comment", ) print(f"DEBUG: Post response: {result.stdout}", file=sys.stderr) @@ -484,80 +508,129 @@ def main() -> int: description="Post or update PR comment with memory impact analysis" ) parser.add_argument("--pr-number", required=True, help="PR number") - parser.add_argument( - "--components", - required=True, - help='JSON array of component names (e.g., \'["api", "wifi"]\')', - ) - parser.add_argument("--platform", required=True, help="Platform name") - parser.add_argument( - "--target-ram", type=int, required=True, help="Target branch RAM usage" - ) - parser.add_argument( - "--target-flash", type=int, required=True, help="Target branch flash usage" - ) - parser.add_argument("--pr-ram", type=int, required=True, help="PR branch RAM usage") - parser.add_argument( - "--pr-flash", type=int, required=True, help="PR branch flash usage" - ) parser.add_argument( "--target-json", - help="Optional path to target branch analysis JSON (for detailed analysis)", + required=True, + help="Path to target branch analysis JSON file", ) parser.add_argument( "--pr-json", - help="Optional path to PR branch analysis JSON (for detailed analysis)", - ) - parser.add_argument( - "--target-cache-hit", - action="store_true", - help="Indicates that target branch analysis was loaded from cache", + required=True, + help="Path to PR branch analysis JSON file", ) args = parser.parse_args() - # Parse components from JSON - try: - components = json.loads(args.components) - if not isinstance(components, list): - print("Error: --components must be a JSON array", file=sys.stderr) - sys.exit(1) - except json.JSONDecodeError as e: - print(f"Error parsing --components JSON: {e}", file=sys.stderr) + # Load analysis JSON files (all data comes from JSON for security) + target_data: dict | None = load_analysis_json(args.target_json) + if not target_data: + print("Error: Failed to load target analysis JSON", file=sys.stderr) sys.exit(1) - # Load analysis JSON files - target_analysis = None - pr_analysis = None - target_symbols = None - pr_symbols = None + pr_data: dict | None = load_analysis_json(args.pr_json) + if not pr_data: + print("Error: Failed to load PR analysis JSON", file=sys.stderr) + sys.exit(1) - if args.target_json: - target_data = load_analysis_json(args.target_json) - if target_data and target_data.get("detailed_analysis"): - target_analysis = target_data["detailed_analysis"].get("components") - target_symbols = target_data["detailed_analysis"].get("symbols") + # Extract detailed analysis if available + target_analysis: dict | None = None + pr_analysis: dict | None = None + target_symbols: dict | None = None + pr_symbols: dict | None = None - if args.pr_json: - pr_data = load_analysis_json(args.pr_json) - if pr_data and pr_data.get("detailed_analysis"): - pr_analysis = pr_data["detailed_analysis"].get("components") - pr_symbols = pr_data["detailed_analysis"].get("symbols") + if target_data.get("detailed_analysis"): + target_analysis = target_data["detailed_analysis"].get("components") + target_symbols = target_data["detailed_analysis"].get("symbols") + + if pr_data.get("detailed_analysis"): + pr_analysis = pr_data["detailed_analysis"].get("components") + pr_symbols = pr_data["detailed_analysis"].get("symbols") + + # Extract all values from JSON files (prevents shell injection from PR code) + components = target_data.get("components") + platform = target_data.get("platform") + target_ram = target_data.get("ram_bytes") + target_flash = target_data.get("flash_bytes") + pr_ram = pr_data.get("ram_bytes") + pr_flash = pr_data.get("flash_bytes") + + # Validate required fields and types + missing_fields: list[str] = [] + type_errors: list[str] = [] + + if components is None: + missing_fields.append("components") + elif not isinstance(components, list): + type_errors.append( + f"components must be a list, got {type(components).__name__}" + ) + else: + for idx, comp in enumerate(components): + if not isinstance(comp, str): + type_errors.append( + f"components[{idx}] must be a string, got {type(comp).__name__}" + ) + if platform is None: + missing_fields.append("platform") + elif not isinstance(platform, str): + type_errors.append(f"platform must be a string, got {type(platform).__name__}") + + if target_ram is None: + missing_fields.append("target.ram_bytes") + elif not isinstance(target_ram, int): + type_errors.append( + f"target.ram_bytes must be an integer, got {type(target_ram).__name__}" + ) + + if target_flash is None: + missing_fields.append("target.flash_bytes") + elif not isinstance(target_flash, int): + type_errors.append( + f"target.flash_bytes must be an integer, got {type(target_flash).__name__}" + ) + + if pr_ram is None: + missing_fields.append("pr.ram_bytes") + elif not isinstance(pr_ram, int): + type_errors.append( + f"pr.ram_bytes must be an integer, got {type(pr_ram).__name__}" + ) + + if pr_flash is None: + missing_fields.append("pr.flash_bytes") + elif not isinstance(pr_flash, int): + type_errors.append( + f"pr.flash_bytes must be an integer, got {type(pr_flash).__name__}" + ) + + if missing_fields or type_errors: + if missing_fields: + print( + f"Error: JSON files missing required fields: {', '.join(missing_fields)}", + file=sys.stderr, + ) + if type_errors: + print( + f"Error: Type validation failed: {'; '.join(type_errors)}", + file=sys.stderr, + ) + print(f"Target JSON keys: {list(target_data.keys())}", file=sys.stderr) + print(f"PR JSON keys: {list(pr_data.keys())}", file=sys.stderr) + sys.exit(1) # Create comment body # Note: Memory totals (RAM/Flash) are summed across all builds if multiple were run. comment_body = create_comment_body( components=components, - platform=args.platform, - target_ram=args.target_ram, - target_flash=args.target_flash, - pr_ram=args.pr_ram, - pr_flash=args.pr_flash, + platform=platform, + target_ram=target_ram, + target_flash=target_flash, + pr_ram=pr_ram, + pr_flash=pr_flash, target_analysis=target_analysis, pr_analysis=pr_analysis, target_symbols=target_symbols, pr_symbols=pr_symbols, - target_cache_hit=args.target_cache_hit, ) # Post or update comment