[analyze-memory] Trace CSWTCH switch table symbols to source components (#13798)

Co-authored-by: Jonathan Swoboda <154711427+swoboda1337@users.noreply.github.com>
2026-02-08 00:31:58 +00:00 · 2026-02-06 18:08:30 +01:00
parent c7c9ffe7e1
commit 2917057da8
3 changed files with 267 additions and 13 deletions
--- a/esphome/analyze_memory/init.py
+++ b/esphome/analyze_memory/init.py
@@ -12,7 +12,6 @@ from .const import (
    CORE_SUBCATEGORY_PATTERNS,
    DEMANGLED_PATTERNS,
    ESPHOME_COMPONENT_PATTERN,
-    SECTION_TO_ATTR,
    SYMBOL_PATTERNS,
 )
 from .demangle import batch_demangle
@@ -91,6 +90,17 @@ class ComponentMemory:
    bss_size: int = 0  # Uninitialized data (ram only)
    symbol_count: int = 0

+    def add_section_size(self, section_name: str, size: int) -> None:
+        """Add size to the appropriate attribute for a section."""
+        if section_name == ".text":
+            self.text_size += size
+        elif section_name == ".rodata":
+            self.rodata_size += size
+        elif section_name == ".data":
+            self.data_size += size
+        elif section_name == ".bss":
+            self.bss_size += size
+
    @property
    def flash_total(self) -> int:
        """Total flash usage (text + rodata + data)."""
@@ -167,12 +177,15 @@ class MemoryAnalyzer:
        self._elf_symbol_names: set[str] = set()
        # SDK symbols not in ELF (static/local symbols from closed-source libs)
        self._sdk_symbols: list[SDKSymbol] = []
+        # CSWTCH symbols: list of (name, size, source_file, component)
+        self._cswtch_symbols: list[tuple[str, int, str, str]] = []

    def analyze(self) -> dict[str, ComponentMemory]:
        """Analyze the ELF file and return component memory usage."""
        self._parse_sections()
        self._parse_symbols()
        self._categorize_symbols()
+        self._analyze_cswtch_symbols()
        self._analyze_sdk_libraries()
        return dict(self.components)

@@ -255,8 +268,7 @@ class MemoryAnalyzer:
                comp_mem.symbol_count += 1

                # Update the appropriate size attribute based on section
-                if attr_name := SECTION_TO_ATTR.get(section_name):
-                    setattr(comp_mem, attr_name, getattr(comp_mem, attr_name) + size)
+                comp_mem.add_section_size(section_name, size)

                # Track uncategorized symbols
                if component == "other" and size > 0:
@@ -372,6 +384,205 @@ class MemoryAnalyzer:

        return "Other Core"

+    def _find_object_files_dir(self) -> Path | None:
+        """Find the directory containing object files for this build.
+
+        Returns:
+            Path to the directory containing .o files, or None if not found.
+        """
+        # The ELF is typically at .pioenvs/<env>/firmware.elf
+        # Object files are in .pioenvs/<env>/src/ and .pioenvs/<env>/lib*/
+        pioenvs_dir = self.elf_path.parent
+        if pioenvs_dir.exists() and any(pioenvs_dir.glob("src/*.o")):
+            return pioenvs_dir
+        return None
+
+    def _scan_cswtch_in_objects(
+        self, obj_dir: Path
+    ) -> dict[str, list[tuple[str, int]]]:
+        """Scan object files for CSWTCH symbols using a single nm invocation.
+
+        Uses ``nm --print-file-name -S`` on all ``.o`` files at once.
+        Output format: ``/path/to/file.o:address size type name``
+
+        Args:
+            obj_dir: Directory containing object files (.pioenvs/<env>/)
+
+        Returns:
+            Dict mapping "CSWTCH$NNN:size" to list of (source_file, size) tuples.
+        """
+        cswtch_map: dict[str, list[tuple[str, int]]] = defaultdict(list)
+
+        if not self.nm_path:
+            return cswtch_map
+
+        # Find all .o files recursively, sorted for deterministic output
+        obj_files = sorted(obj_dir.rglob("*.o"))
+        if not obj_files:
+            return cswtch_map
+
+        _LOGGER.debug("Scanning %d object files for CSWTCH symbols", len(obj_files))
+
+        # Single nm call with --print-file-name for all object files
+        result = run_tool(
+            [self.nm_path, "--print-file-name", "-S"] + [str(f) for f in obj_files],
+            timeout=30,
+        )
+        if result is None or result.returncode != 0:
+            return cswtch_map
+
+        for line in result.stdout.splitlines():
+            if "CSWTCH$" not in line:
+                continue
+
+            # Split on last ":" that precedes a hex address.
+            # nm --print-file-name format: filepath:hex_addr hex_size type name
+            # We split from the right: find the last colon followed by hex digits.
+            parts_after_colon = line.rsplit(":", 1)
+            if len(parts_after_colon) != 2:
+                continue
+
+            file_path = parts_after_colon[0]
+            fields = parts_after_colon[1].split()
+            # fields: [address, size, type, name]
+            if len(fields) < 4:
+                continue
+
+            sym_name = fields[3]
+            if not sym_name.startswith("CSWTCH$"):
+                continue
+
+            try:
+                size = int(fields[1], 16)
+            except ValueError:
+                continue
+
+            # Get relative path from obj_dir for readability
+            try:
+                rel_path = str(Path(file_path).relative_to(obj_dir))
+            except ValueError:
+                rel_path = file_path
+
+            key = f"{sym_name}:{size}"
+            cswtch_map[key].append((rel_path, size))
+
+        return cswtch_map
+
+    def _source_file_to_component(self, source_file: str) -> str:
+        """Map a source object file path to its component name.
+
+        Args:
+            source_file: Relative path like 'src/esphome/components/wifi/wifi_component.cpp.o'
+
+        Returns:
+            Component name like '[esphome]wifi' or the source file if unknown.
+        """
+        parts = Path(source_file).parts
+
+        # ESPHome component: src/esphome/components/<name>/...
+        if "components" in parts:
+            idx = parts.index("components")
+            if idx + 1 < len(parts):
+                component_name = parts[idx + 1]
+                if component_name in get_esphome_components():
+                    return f"{_COMPONENT_PREFIX_ESPHOME}{component_name}"
+                if component_name in self.external_components:
+                    return f"{_COMPONENT_PREFIX_EXTERNAL}{component_name}"
+
+        # ESPHome core: src/esphome/core/... or src/esphome/...
+        if "core" in parts and "esphome" in parts:
+            return _COMPONENT_CORE
+        if "esphome" in parts and "components" not in parts:
+            return _COMPONENT_CORE
+
+        # Framework/library files - return the first path component
+        # e.g., lib65b/ESPAsyncTCP/... -> lib65b
+        #        FrameworkArduino/... -> FrameworkArduino
+        return parts[0] if parts else source_file
+
+    def _analyze_cswtch_symbols(self) -> None:
+        """Analyze CSWTCH (GCC switch table) symbols by tracing to source objects.
+
+        CSWTCH symbols are compiler-generated lookup tables for switch statements.
+        They are local symbols, so the same name can appear in different object files.
+        This method scans .o files to attribute them to their source components.
+        """
+        obj_dir = self._find_object_files_dir()
+        if obj_dir is None:
+            _LOGGER.debug("No object files directory found, skipping CSWTCH analysis")
+            return
+
+        # Scan object files for CSWTCH symbols
+        cswtch_map = self._scan_cswtch_in_objects(obj_dir)
+        if not cswtch_map:
+            _LOGGER.debug("No CSWTCH symbols found in object files")
+            return
+
+        # Collect CSWTCH symbols from the ELF (already parsed in sections)
+        # Include section_name for re-attribution of component totals
+        elf_cswtch = [
+            (symbol_name, size, section_name)
+            for section_name, section in self.sections.items()
+            for symbol_name, size, _ in section.symbols
+            if symbol_name.startswith("CSWTCH$")
+        ]
+
+        _LOGGER.debug(
+            "Found %d CSWTCH symbols in ELF, %d unique in object files",
+            len(elf_cswtch),
+            len(cswtch_map),
+        )
+
+        # Match ELF CSWTCH symbols to source files and re-attribute component totals.
+        # _categorize_symbols() already ran and put these into "other" since CSWTCH$
+        # names don't match any component pattern. We move the bytes to the correct
+        # component based on the object file mapping.
+        other_mem = self.components.get("other")
+
+        for sym_name, size, section_name in elf_cswtch:
+            key = f"{sym_name}:{size}"
+            sources = cswtch_map.get(key, [])
+
+            if len(sources) == 1:
+                source_file = sources[0][0]
+                component = self._source_file_to_component(source_file)
+            elif len(sources) > 1:
+                # Ambiguous - multiple object files have same CSWTCH name+size
+                source_file = "ambiguous"
+                component = "ambiguous"
+                _LOGGER.debug(
+                    "Ambiguous CSWTCH %s (%d B) found in %d files: %s",
+                    sym_name,
+                    size,
+                    len(sources),
+                    ", ".join(src for src, _ in sources),
+                )
+            else:
+                source_file = "unknown"
+                component = "unknown"
+
+            self._cswtch_symbols.append((sym_name, size, source_file, component))
+
+            # Re-attribute from "other" to the correct component
+            if (
+                component not in ("other", "unknown", "ambiguous")
+                and other_mem is not None
+            ):
+                other_mem.add_section_size(section_name, -size)
+                if component not in self.components:
+                    self.components[component] = ComponentMemory(component)
+                self.components[component].add_section_size(section_name, size)
+
+        # Sort by size descending
+        self._cswtch_symbols.sort(key=lambda x: x[1], reverse=True)
+
+        total_size = sum(size for _, size, _, _ in self._cswtch_symbols)
+        _LOGGER.debug(
+            "CSWTCH analysis: %d symbols, %d bytes total",
+            len(self._cswtch_symbols),
+            total_size,
+        )
+
    def get_unattributed_ram(self) -> tuple[int, int, int]:
        """Get unattributed RAM sizes (SDK/framework overhead).

--- a/esphome/analyze_memory/cli.py
+++ b/esphome/analyze_memory/cli.py
@@ -184,6 +184,52 @@ class MemoryAnalyzerCLI(MemoryAnalyzer):
                f"{i + 1:>2}. {size:>7,} B {section_label:<8} {demangled_display:<{self.COL_TOP_SYMBOL_NAME}} {component}"
            )

+    def _add_cswtch_analysis(self, lines: list[str]) -> None:
+        """Add CSWTCH (GCC switch table lookup) analysis section."""
+        self._add_section_header(lines, "CSWTCH Analysis (GCC Switch Table Lookups)")
+
+        total_size = sum(size for _, size, _, _ in self._cswtch_symbols)
+        lines.append(
+            f"Total: {len(self._cswtch_symbols)} switch table(s), {total_size:,} B"
+        )
+        lines.append("")
+
+        # Group by component
+        by_component: dict[str, list[tuple[str, int, str]]] = defaultdict(list)
+        for sym_name, size, source_file, component in self._cswtch_symbols:
+            by_component[component].append((sym_name, size, source_file))
+
+        # Sort components by total size descending
+        sorted_components = sorted(
+            by_component.items(),
+            key=lambda x: sum(s[1] for s in x[1]),
+            reverse=True,
+        )
+
+        for component, symbols in sorted_components:
+            comp_total = sum(s[1] for s in symbols)
+            lines.append(f"{component} ({comp_total:,} B, {len(symbols)} tables):")
+
+            # Group by source file within component
+            by_file: dict[str, list[tuple[str, int]]] = defaultdict(list)
+            for sym_name, size, source_file in symbols:
+                by_file[source_file].append((sym_name, size))
+
+            for source_file, file_symbols in sorted(
+                by_file.items(),
+                key=lambda x: sum(s[1] for s in x[1]),
+                reverse=True,
+            ):
+                file_total = sum(s[1] for s in file_symbols)
+                lines.append(
+                    f"  {source_file} ({file_total:,} B, {len(file_symbols)} tables)"
+                )
+                for sym_name, size in sorted(
+                    file_symbols, key=lambda x: x[1], reverse=True
+                ):
+                    lines.append(f"    {size:>6,} B  {sym_name}")
+            lines.append("")
+
    def generate_report(self, detailed: bool = False) -> str:
        """Generate a formatted memory report."""
        components = sorted(
@@ -471,6 +517,10 @@ class MemoryAnalyzerCLI(MemoryAnalyzer):
                    lines.append(f"    ... and {len(large_ram_syms) - 10} more")
            lines.append("")

+        # CSWTCH (GCC switch table) analysis
+        if self._cswtch_symbols:
+            self._add_cswtch_analysis(lines)
+
        lines.append(
            "Note: This analysis covers symbols in the ELF file. Some runtime allocations may not be included."
        )
--- a/esphome/analyze_memory/const.py
+++ b/esphome/analyze_memory/const.py
@@ -66,15 +66,6 @@ SECTION_MAPPING = {
    ),
 }

-# Section to ComponentMemory attribute mapping
-# Maps section names to the attribute name in ComponentMemory dataclass
-SECTION_TO_ATTR = {
-    ".text": "text_size",
-    ".rodata": "rodata_size",
-    ".data": "data_size",
-    ".bss": "bss_size",
-}
-
 # Component identification rules
 # Symbol patterns: patterns found in raw symbol names
 SYMBOL_PATTERNS = {
@@ -513,7 +504,9 @@ SYMBOL_PATTERNS = {
        "__FUNCTION__$",
        "DAYS_IN_MONTH",
        "_DAYS_BEFORE_MONTH",
-        "CSWTCH$",
+        # Note: CSWTCH$ symbols are GCC switch table lookup tables.
+        # They are attributed to their source object files via _analyze_cswtch_symbols()
+        # rather than being lumped into libc.
        "dst$",
        "sulp",
        "_strtol_l",  # String to long with locale