1
0
mirror of https://github.com/esphome/esphome.git synced 2025-10-22 03:33:52 +01:00
Files
esphome/esphome/analyze_memory/__init__.py
J. Nick Koston f9807db08a preen
2025-10-17 18:37:24 -10:00

498 lines
18 KiB
Python

"""Memory usage analyzer for ESPHome compiled binaries."""
from collections import defaultdict
from dataclasses import dataclass, field
import logging
from pathlib import Path
import re
import subprocess
from typing import TYPE_CHECKING
from .const import (
CORE_SUBCATEGORY_PATTERNS,
DEMANGLED_PATTERNS,
ESPHOME_COMPONENT_PATTERN,
SECTION_TO_ATTR,
SYMBOL_PATTERNS,
)
from .helpers import (
get_component_class_patterns,
get_esphome_components,
map_section_name,
parse_symbol_line,
)
if TYPE_CHECKING:
from esphome.platformio_api import IDEData
_LOGGER = logging.getLogger(__name__)
# GCC global constructor/destructor prefix annotations
_GCC_PREFIX_ANNOTATIONS = {
"_GLOBAL__sub_I_": "global constructor for",
"_GLOBAL__sub_D_": "global destructor for",
}
# GCC optimization suffix pattern (e.g., $isra$0, $part$1, $constprop$2)
_GCC_OPTIMIZATION_SUFFIX_PATTERN = re.compile(r"(\$(?:isra|part|constprop)\$\d+)")
# C++ runtime patterns for categorization
_CPP_RUNTIME_PATTERNS = frozenset(["vtable", "typeinfo", "thunk"])
# libc printf/scanf family base names (used to detect variants like _printf_r, vfprintf, etc.)
_LIBC_PRINTF_SCANF_FAMILY = frozenset(["printf", "fprintf", "sprintf", "scanf"])
# Regex pattern for parsing readelf section headers
# Format: [ #] name type addr off size
_READELF_SECTION_PATTERN = re.compile(
r"\s*\[\s*\d+\]\s+([\.\w]+)\s+\w+\s+[\da-fA-F]+\s+[\da-fA-F]+\s+([\da-fA-F]+)"
)
# Component category prefixes
_COMPONENT_PREFIX_ESPHOME = "[esphome]"
_COMPONENT_PREFIX_EXTERNAL = "[external]"
_COMPONENT_CORE = f"{_COMPONENT_PREFIX_ESPHOME}core"
_COMPONENT_API = f"{_COMPONENT_PREFIX_ESPHOME}api"
@dataclass
class MemorySection:
"""Represents a memory section with its symbols."""
name: str
symbols: list[tuple[str, int, str]] = field(
default_factory=list
) # (symbol_name, size, component)
total_size: int = 0
@dataclass
class ComponentMemory:
"""Tracks memory usage for a component."""
name: str
text_size: int = 0 # Code in flash
rodata_size: int = 0 # Read-only data in flash
data_size: int = 0 # Initialized data (flash + ram)
bss_size: int = 0 # Uninitialized data (ram only)
symbol_count: int = 0
@property
def flash_total(self) -> int:
"""Total flash usage (text + rodata + data)."""
return self.text_size + self.rodata_size + self.data_size
@property
def ram_total(self) -> int:
"""Total RAM usage (data + bss)."""
return self.data_size + self.bss_size
class MemoryAnalyzer:
"""Analyzes memory usage from ELF files."""
def __init__(
self,
elf_path: str,
objdump_path: str | None = None,
readelf_path: str | None = None,
external_components: set[str] | None = None,
idedata: "IDEData | None" = None,
) -> None:
"""Initialize memory analyzer.
Args:
elf_path: Path to ELF file to analyze
objdump_path: Path to objdump binary (auto-detected from idedata if not provided)
readelf_path: Path to readelf binary (auto-detected from idedata if not provided)
external_components: Set of external component names
idedata: Optional PlatformIO IDEData object to auto-detect toolchain paths
"""
self.elf_path = Path(elf_path)
if not self.elf_path.exists():
raise FileNotFoundError(f"ELF file not found: {elf_path}")
# Auto-detect toolchain paths from idedata if not provided
if idedata is not None and (objdump_path is None or readelf_path is None):
objdump_path = objdump_path or idedata.objdump_path
readelf_path = readelf_path or idedata.readelf_path
_LOGGER.debug("Using toolchain paths from PlatformIO idedata")
self.objdump_path = objdump_path or "objdump"
self.readelf_path = readelf_path or "readelf"
self.external_components = external_components or set()
self.sections: dict[str, MemorySection] = {}
self.components: dict[str, ComponentMemory] = defaultdict(
lambda: ComponentMemory("")
)
self._demangle_cache: dict[str, str] = {}
self._uncategorized_symbols: list[tuple[str, str, int]] = []
self._esphome_core_symbols: list[
tuple[str, str, int]
] = [] # Track core symbols
self._component_symbols: dict[str, list[tuple[str, str, int]]] = defaultdict(
list
) # Track symbols for all components
def analyze(self) -> dict[str, ComponentMemory]:
"""Analyze the ELF file and return component memory usage."""
self._parse_sections()
self._parse_symbols()
self._categorize_symbols()
return dict(self.components)
def _parse_sections(self) -> None:
"""Parse section headers from ELF file."""
result = subprocess.run(
[self.readelf_path, "-S", str(self.elf_path)],
capture_output=True,
text=True,
check=True,
)
# Parse section headers
for line in result.stdout.splitlines():
# Look for section entries
if not (match := _READELF_SECTION_PATTERN.match(line)):
continue
section_name = match.group(1)
size_hex = match.group(2)
size = int(size_hex, 16)
# Map to standard section name
mapped_section = map_section_name(section_name)
if not mapped_section:
continue
if mapped_section not in self.sections:
self.sections[mapped_section] = MemorySection(mapped_section)
self.sections[mapped_section].total_size += size
def _parse_symbols(self) -> None:
"""Parse symbols from ELF file."""
result = subprocess.run(
[self.objdump_path, "-t", str(self.elf_path)],
capture_output=True,
text=True,
check=True,
)
# Track seen addresses to avoid duplicates
seen_addresses: set[str] = set()
for line in result.stdout.splitlines():
if not (symbol_info := parse_symbol_line(line)):
continue
section, name, size, address = symbol_info
# Skip duplicate symbols at the same address (e.g., C1/C2 constructors)
if address in seen_addresses or section not in self.sections:
continue
self.sections[section].symbols.append((name, size, ""))
seen_addresses.add(address)
def _categorize_symbols(self) -> None:
"""Categorize symbols by component."""
# First, collect all unique symbol names for batch demangling
all_symbols = {
symbol_name
for section in self.sections.values()
for symbol_name, _, _ in section.symbols
}
# Batch demangle all symbols at once
self._batch_demangle_symbols(list(all_symbols))
# Now categorize with cached demangled names
for section_name, section in self.sections.items():
for symbol_name, size, _ in section.symbols:
component = self._identify_component(symbol_name)
if component not in self.components:
self.components[component] = ComponentMemory(component)
comp_mem = self.components[component]
comp_mem.symbol_count += 1
# Update the appropriate size attribute based on section
if attr_name := SECTION_TO_ATTR.get(section_name):
setattr(comp_mem, attr_name, getattr(comp_mem, attr_name) + size)
# Track uncategorized symbols
if component == "other" and size > 0:
demangled = self._demangle_symbol(symbol_name)
self._uncategorized_symbols.append((symbol_name, demangled, size))
# Track ESPHome core symbols for detailed analysis
if component == _COMPONENT_CORE and size > 0:
demangled = self._demangle_symbol(symbol_name)
self._esphome_core_symbols.append((symbol_name, demangled, size))
# Track all component symbols for detailed analysis
if size > 0:
demangled = self._demangle_symbol(symbol_name)
self._component_symbols[component].append(
(symbol_name, demangled, size)
)
def _identify_component(self, symbol_name: str) -> str:
"""Identify which component a symbol belongs to."""
# Demangle C++ names if needed
demangled = self._demangle_symbol(symbol_name)
# Check for special component classes first (before namespace pattern)
# This handles cases like esphome::ESPHomeOTAComponent which should map to ota
if "esphome::" in demangled:
# Check for special component classes that include component name in the class
# For example: esphome::ESPHomeOTAComponent -> ota component
for component_name in get_esphome_components():
patterns = get_component_class_patterns(component_name)
if any(pattern in demangled for pattern in patterns):
return f"{_COMPONENT_PREFIX_ESPHOME}{component_name}"
# Check for ESPHome component namespaces
match = ESPHOME_COMPONENT_PATTERN.search(demangled)
if match:
component_name = match.group(1)
# Strip trailing underscore if present (e.g., switch_ -> switch)
component_name = component_name.rstrip("_")
# Check if this is an actual component in the components directory
if component_name in get_esphome_components():
return f"{_COMPONENT_PREFIX_ESPHOME}{component_name}"
# Check if this is a known external component from the config
if component_name in self.external_components:
return f"{_COMPONENT_PREFIX_EXTERNAL}{component_name}"
# Everything else in esphome:: namespace is core
return _COMPONENT_CORE
# Check for esphome core namespace (no component namespace)
if "esphome::" in demangled:
# If no component match found, it's core
return _COMPONENT_CORE
# Check against symbol patterns
for component, patterns in SYMBOL_PATTERNS.items():
if any(pattern in symbol_name for pattern in patterns):
return component
# Check against demangled patterns
for component, patterns in DEMANGLED_PATTERNS.items():
if any(pattern in demangled for pattern in patterns):
return component
# Special cases that need more complex logic
# Check if spi_flash vs spi_driver
if "spi_" in symbol_name or "SPI" in symbol_name:
return "spi_flash" if "spi_flash" in symbol_name else "spi_driver"
# libc special printf variants
if (
symbol_name.startswith("_")
and symbol_name[1:].replace("_r", "").replace("v", "").replace("s", "")
in _LIBC_PRINTF_SCANF_FAMILY
):
return "libc"
# Track uncategorized symbols for analysis
return "other"
def _batch_demangle_symbols(self, symbols: list[str]) -> None:
"""Batch demangle C++ symbol names for efficiency."""
if not symbols:
return
# Try to find the appropriate c++filt for the platform
cppfilt_cmd = "c++filt"
_LOGGER.info("Demangling %d symbols", len(symbols))
_LOGGER.debug("objdump_path = %s", self.objdump_path)
# Check if we have a toolchain-specific c++filt
if self.objdump_path and self.objdump_path != "objdump":
# Replace objdump with c++filt in the path
potential_cppfilt = self.objdump_path.replace("objdump", "c++filt")
_LOGGER.info("Checking for toolchain c++filt at: %s", potential_cppfilt)
if Path(potential_cppfilt).exists():
cppfilt_cmd = potential_cppfilt
_LOGGER.info("✓ Using toolchain c++filt: %s", cppfilt_cmd)
else:
_LOGGER.info(
"✗ Toolchain c++filt not found at %s, using system c++filt",
potential_cppfilt,
)
else:
_LOGGER.info("✗ Using system c++filt (objdump_path=%s)", self.objdump_path)
# Strip GCC optimization suffixes and prefixes before demangling
# Suffixes like $isra$0, $part$0, $constprop$0 confuse c++filt
# Prefixes like _GLOBAL__sub_I_ need to be removed and tracked
symbols_stripped: list[str] = []
symbols_prefixes: list[str] = [] # Track removed prefixes
for symbol in symbols:
# Remove GCC optimization markers
stripped = _GCC_OPTIMIZATION_SUFFIX_PATTERN.sub("", symbol)
# Handle GCC global constructor/initializer prefixes
# _GLOBAL__sub_I_<mangled> -> extract <mangled> for demangling
prefix = ""
for gcc_prefix in _GCC_PREFIX_ANNOTATIONS:
if stripped.startswith(gcc_prefix):
prefix = gcc_prefix
stripped = stripped[len(prefix) :]
break
symbols_stripped.append(stripped)
symbols_prefixes.append(prefix)
try:
# Send all symbols to c++filt at once
result = subprocess.run(
[cppfilt_cmd],
input="\n".join(symbols_stripped),
capture_output=True,
text=True,
check=False,
)
except (subprocess.SubprocessError, OSError, UnicodeDecodeError) as e:
# On error, cache originals
_LOGGER.warning("Failed to batch demangle symbols: %s", e)
for symbol in symbols:
self._demangle_cache[symbol] = symbol
return
if result.returncode != 0:
_LOGGER.warning(
"c++filt exited with code %d: %s",
result.returncode,
result.stderr[:200] if result.stderr else "(no error output)",
)
# Cache originals on failure
for symbol in symbols:
self._demangle_cache[symbol] = symbol
return
# Process demangled output
self._process_demangled_output(
symbols, symbols_stripped, symbols_prefixes, result.stdout, cppfilt_cmd
)
def _process_demangled_output(
self,
symbols: list[str],
symbols_stripped: list[str],
symbols_prefixes: list[str],
demangled_output: str,
cppfilt_cmd: str,
) -> None:
"""Process demangled symbol output and populate cache.
Args:
symbols: Original symbol names
symbols_stripped: Stripped symbol names sent to c++filt
symbols_prefixes: Removed prefixes to restore
demangled_output: Output from c++filt
cppfilt_cmd: Path to c++filt command (for logging)
"""
demangled_lines = demangled_output.strip().split("\n")
failed_count = 0
for original, stripped, prefix, demangled in zip(
symbols, symbols_stripped, symbols_prefixes, demangled_lines
):
# Add back any prefix that was removed
demangled = self._restore_symbol_prefix(prefix, stripped, demangled)
# If we stripped a suffix, add it back to the demangled name for clarity
if original != stripped and not prefix:
demangled = self._restore_symbol_suffix(original, demangled)
self._demangle_cache[original] = demangled
# Log symbols that failed to demangle (stayed the same as stripped version)
if stripped == demangled and stripped.startswith("_Z"):
failed_count += 1
if failed_count <= 5: # Only log first 5 failures
_LOGGER.warning("Failed to demangle: %s", original)
if failed_count == 0:
_LOGGER.info("Successfully demangled all %d symbols", len(symbols))
return
_LOGGER.warning(
"Failed to demangle %d/%d symbols using %s",
failed_count,
len(symbols),
cppfilt_cmd,
)
@staticmethod
def _restore_symbol_prefix(prefix: str, stripped: str, demangled: str) -> str:
"""Restore prefix that was removed before demangling.
Args:
prefix: Prefix that was removed (e.g., "_GLOBAL__sub_I_")
stripped: Stripped symbol name
demangled: Demangled symbol name
Returns:
Demangled name with prefix restored/annotated
"""
if not prefix:
return demangled
# Successfully demangled - add descriptive prefix
if demangled != stripped and (
annotation := _GCC_PREFIX_ANNOTATIONS.get(prefix)
):
return f"[{annotation}: {demangled}]"
# Failed to demangle - restore original prefix
return prefix + demangled
@staticmethod
def _restore_symbol_suffix(original: str, demangled: str) -> str:
"""Restore GCC optimization suffix that was removed before demangling.
Args:
original: Original symbol name with suffix
demangled: Demangled symbol name without suffix
Returns:
Demangled name with suffix annotation
"""
if suffix_match := _GCC_OPTIMIZATION_SUFFIX_PATTERN.search(original):
return f"{demangled} [{suffix_match.group(1)}]"
return demangled
def _demangle_symbol(self, symbol: str) -> str:
"""Get demangled C++ symbol name from cache."""
return self._demangle_cache.get(symbol, symbol)
def _categorize_esphome_core_symbol(self, demangled: str) -> str:
"""Categorize ESPHome core symbols into subcategories."""
# Special patterns that need to be checked separately
if any(pattern in demangled for pattern in _CPP_RUNTIME_PATTERNS):
return "C++ Runtime (vtables/RTTI)"
if demangled.startswith("std::"):
return "C++ STL"
# Check against patterns from const.py
for category, patterns in CORE_SUBCATEGORY_PATTERNS.items():
if any(pattern in demangled for pattern in patterns):
return category
return "Other Core"
if __name__ == "__main__":
from .cli import main
main()