mirror of
https://github.com/esphome/esphome.git
synced 2025-10-22 03:33:52 +01:00
452 lines
17 KiB
Python
452 lines
17 KiB
Python
"""Memory usage analyzer for ESPHome compiled binaries."""
|
|
|
|
from collections import defaultdict
|
|
from dataclasses import dataclass, field
|
|
import json
|
|
import logging
|
|
from pathlib import Path
|
|
import re
|
|
import subprocess
|
|
from typing import TYPE_CHECKING
|
|
|
|
from .const import (
|
|
CORE_SUBCATEGORY_PATTERNS,
|
|
DEMANGLED_PATTERNS,
|
|
ESPHOME_COMPONENT_PATTERN,
|
|
SECTION_TO_ATTR,
|
|
SYMBOL_PATTERNS,
|
|
)
|
|
from .helpers import (
|
|
get_component_class_patterns,
|
|
get_esphome_components,
|
|
map_section_name,
|
|
parse_symbol_line,
|
|
)
|
|
|
|
if TYPE_CHECKING:
|
|
from esphome.platformio_api import IDEData
|
|
|
|
_LOGGER = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class MemorySection:
|
|
"""Represents a memory section with its symbols."""
|
|
|
|
name: str
|
|
symbols: list[tuple[str, int, str]] = field(
|
|
default_factory=list
|
|
) # (symbol_name, size, component)
|
|
total_size: int = 0
|
|
|
|
|
|
@dataclass
|
|
class ComponentMemory:
|
|
"""Tracks memory usage for a component."""
|
|
|
|
name: str
|
|
text_size: int = 0 # Code in flash
|
|
rodata_size: int = 0 # Read-only data in flash
|
|
data_size: int = 0 # Initialized data (flash + ram)
|
|
bss_size: int = 0 # Uninitialized data (ram only)
|
|
symbol_count: int = 0
|
|
|
|
@property
|
|
def flash_total(self) -> int:
|
|
"""Total flash usage (text + rodata + data)."""
|
|
return self.text_size + self.rodata_size + self.data_size
|
|
|
|
@property
|
|
def ram_total(self) -> int:
|
|
"""Total RAM usage (data + bss)."""
|
|
return self.data_size + self.bss_size
|
|
|
|
|
|
class MemoryAnalyzer:
|
|
"""Analyzes memory usage from ELF files."""
|
|
|
|
def __init__(
|
|
self,
|
|
elf_path: str,
|
|
objdump_path: str | None = None,
|
|
readelf_path: str | None = None,
|
|
external_components: set[str] | None = None,
|
|
idedata: "IDEData | None" = None,
|
|
):
|
|
"""Initialize memory analyzer.
|
|
|
|
Args:
|
|
elf_path: Path to ELF file to analyze
|
|
objdump_path: Path to objdump binary (auto-detected from idedata if not provided)
|
|
readelf_path: Path to readelf binary (auto-detected from idedata if not provided)
|
|
external_components: Set of external component names
|
|
idedata: Optional PlatformIO IDEData object to auto-detect toolchain paths
|
|
"""
|
|
self.elf_path = Path(elf_path)
|
|
if not self.elf_path.exists():
|
|
raise FileNotFoundError(f"ELF file not found: {elf_path}")
|
|
|
|
# Auto-detect toolchain paths from idedata if not provided
|
|
if idedata is not None and (objdump_path is None or readelf_path is None):
|
|
objdump_path = objdump_path or idedata.objdump_path
|
|
readelf_path = readelf_path or idedata.readelf_path
|
|
_LOGGER.debug("Using toolchain paths from PlatformIO idedata")
|
|
|
|
self.objdump_path = objdump_path or "objdump"
|
|
self.readelf_path = readelf_path or "readelf"
|
|
self.external_components = external_components or set()
|
|
|
|
self.sections: dict[str, MemorySection] = {}
|
|
self.components: dict[str, ComponentMemory] = defaultdict(
|
|
lambda: ComponentMemory("")
|
|
)
|
|
self._demangle_cache: dict[str, str] = {}
|
|
self._uncategorized_symbols: list[tuple[str, str, int]] = []
|
|
self._esphome_core_symbols: list[
|
|
tuple[str, str, int]
|
|
] = [] # Track core symbols
|
|
self._component_symbols: dict[str, list[tuple[str, str, int]]] = defaultdict(
|
|
list
|
|
) # Track symbols for all components
|
|
|
|
def analyze(self) -> dict[str, ComponentMemory]:
|
|
"""Analyze the ELF file and return component memory usage."""
|
|
self._parse_sections()
|
|
self._parse_symbols()
|
|
self._categorize_symbols()
|
|
return dict(self.components)
|
|
|
|
def _parse_sections(self) -> None:
|
|
"""Parse section headers from ELF file."""
|
|
result = subprocess.run(
|
|
[self.readelf_path, "-S", str(self.elf_path)],
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
|
|
# Parse section headers
|
|
for line in result.stdout.splitlines():
|
|
# Look for section entries
|
|
if not (
|
|
match := re.match(
|
|
r"\s*\[\s*\d+\]\s+([\.\w]+)\s+\w+\s+[\da-fA-F]+\s+[\da-fA-F]+\s+([\da-fA-F]+)",
|
|
line,
|
|
)
|
|
):
|
|
continue
|
|
|
|
section_name = match.group(1)
|
|
size_hex = match.group(2)
|
|
size = int(size_hex, 16)
|
|
|
|
# Map to standard section name
|
|
mapped_section = map_section_name(section_name)
|
|
if not mapped_section:
|
|
continue
|
|
|
|
if mapped_section not in self.sections:
|
|
self.sections[mapped_section] = MemorySection(mapped_section)
|
|
self.sections[mapped_section].total_size += size
|
|
|
|
def _parse_symbols(self) -> None:
|
|
"""Parse symbols from ELF file."""
|
|
result = subprocess.run(
|
|
[self.objdump_path, "-t", str(self.elf_path)],
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
|
|
# Track seen addresses to avoid duplicates
|
|
seen_addresses: set[str] = set()
|
|
|
|
for line in result.stdout.splitlines():
|
|
if not (symbol_info := parse_symbol_line(line)):
|
|
continue
|
|
|
|
section, name, size, address = symbol_info
|
|
|
|
# Skip duplicate symbols at the same address (e.g., C1/C2 constructors)
|
|
if address in seen_addresses or section not in self.sections:
|
|
continue
|
|
|
|
self.sections[section].symbols.append((name, size, ""))
|
|
seen_addresses.add(address)
|
|
|
|
def _categorize_symbols(self) -> None:
|
|
"""Categorize symbols by component."""
|
|
# First, collect all unique symbol names for batch demangling
|
|
all_symbols = {
|
|
symbol_name
|
|
for section in self.sections.values()
|
|
for symbol_name, _, _ in section.symbols
|
|
}
|
|
|
|
# Batch demangle all symbols at once
|
|
self._batch_demangle_symbols(list(all_symbols))
|
|
|
|
# Now categorize with cached demangled names
|
|
for section_name, section in self.sections.items():
|
|
for symbol_name, size, _ in section.symbols:
|
|
component = self._identify_component(symbol_name)
|
|
|
|
if component not in self.components:
|
|
self.components[component] = ComponentMemory(component)
|
|
|
|
comp_mem = self.components[component]
|
|
comp_mem.symbol_count += 1
|
|
|
|
# Update the appropriate size attribute based on section
|
|
if attr_name := SECTION_TO_ATTR.get(section_name):
|
|
setattr(comp_mem, attr_name, getattr(comp_mem, attr_name) + size)
|
|
|
|
# Track uncategorized symbols
|
|
if component == "other" and size > 0:
|
|
demangled = self._demangle_symbol(symbol_name)
|
|
self._uncategorized_symbols.append((symbol_name, demangled, size))
|
|
|
|
# Track ESPHome core symbols for detailed analysis
|
|
if component == "[esphome]core" and size > 0:
|
|
demangled = self._demangle_symbol(symbol_name)
|
|
self._esphome_core_symbols.append((symbol_name, demangled, size))
|
|
|
|
# Track all component symbols for detailed analysis
|
|
if size > 0:
|
|
demangled = self._demangle_symbol(symbol_name)
|
|
self._component_symbols[component].append(
|
|
(symbol_name, demangled, size)
|
|
)
|
|
|
|
def _identify_component(self, symbol_name: str) -> str:
|
|
"""Identify which component a symbol belongs to."""
|
|
# Demangle C++ names if needed
|
|
demangled = self._demangle_symbol(symbol_name)
|
|
|
|
# Check for special component classes first (before namespace pattern)
|
|
# This handles cases like esphome::ESPHomeOTAComponent which should map to ota
|
|
if "esphome::" in demangled:
|
|
# Check for special component classes that include component name in the class
|
|
# For example: esphome::ESPHomeOTAComponent -> ota component
|
|
for component_name in get_esphome_components():
|
|
patterns = get_component_class_patterns(component_name)
|
|
if any(pattern in demangled for pattern in patterns):
|
|
return f"[esphome]{component_name}"
|
|
|
|
# Check for ESPHome component namespaces
|
|
match = ESPHOME_COMPONENT_PATTERN.search(demangled)
|
|
if match:
|
|
component_name = match.group(1)
|
|
# Strip trailing underscore if present (e.g., switch_ -> switch)
|
|
component_name = component_name.rstrip("_")
|
|
|
|
# Check if this is an actual component in the components directory
|
|
if component_name in get_esphome_components():
|
|
return f"[esphome]{component_name}"
|
|
# Check if this is a known external component from the config
|
|
if component_name in self.external_components:
|
|
return f"[external]{component_name}"
|
|
# Everything else in esphome:: namespace is core
|
|
return "[esphome]core"
|
|
|
|
# Check for esphome core namespace (no component namespace)
|
|
if "esphome::" in demangled:
|
|
# If no component match found, it's core
|
|
return "[esphome]core"
|
|
|
|
# Check against symbol patterns
|
|
for component, patterns in SYMBOL_PATTERNS.items():
|
|
if any(pattern in symbol_name for pattern in patterns):
|
|
return component
|
|
|
|
# Check against demangled patterns
|
|
for component, patterns in DEMANGLED_PATTERNS.items():
|
|
if any(pattern in demangled for pattern in patterns):
|
|
return component
|
|
|
|
# Special cases that need more complex logic
|
|
|
|
# Check if spi_flash vs spi_driver
|
|
if "spi_" in symbol_name or "SPI" in symbol_name:
|
|
if "spi_flash" in symbol_name:
|
|
return "spi_flash"
|
|
return "spi_driver"
|
|
|
|
# libc special printf variants
|
|
if symbol_name.startswith("_") and symbol_name[1:].replace("_r", "").replace(
|
|
"v", ""
|
|
).replace("s", "") in ["printf", "fprintf", "sprintf", "scanf"]:
|
|
return "libc"
|
|
|
|
# Track uncategorized symbols for analysis
|
|
return "other"
|
|
|
|
def _batch_demangle_symbols(self, symbols: list[str]) -> None:
|
|
"""Batch demangle C++ symbol names for efficiency."""
|
|
if not symbols:
|
|
return
|
|
|
|
# Try to find the appropriate c++filt for the platform
|
|
cppfilt_cmd = "c++filt"
|
|
|
|
_LOGGER.warning("Demangling %d symbols", len(symbols))
|
|
_LOGGER.warning("objdump_path = %s", self.objdump_path)
|
|
|
|
# Check if we have a toolchain-specific c++filt
|
|
if self.objdump_path and self.objdump_path != "objdump":
|
|
# Replace objdump with c++filt in the path
|
|
potential_cppfilt = self.objdump_path.replace("objdump", "c++filt")
|
|
_LOGGER.warning("Checking for toolchain c++filt at: %s", potential_cppfilt)
|
|
if Path(potential_cppfilt).exists():
|
|
cppfilt_cmd = potential_cppfilt
|
|
_LOGGER.warning("✓ Using toolchain c++filt: %s", cppfilt_cmd)
|
|
else:
|
|
_LOGGER.warning(
|
|
"✗ Toolchain c++filt not found at %s, using system c++filt",
|
|
potential_cppfilt,
|
|
)
|
|
else:
|
|
_LOGGER.warning(
|
|
"✗ Using system c++filt (objdump_path=%s)", self.objdump_path
|
|
)
|
|
|
|
# Strip GCC optimization suffixes and prefixes before demangling
|
|
# Suffixes like $isra$0, $part$0, $constprop$0 confuse c++filt
|
|
# Prefixes like _GLOBAL__sub_I_ need to be removed and tracked
|
|
symbols_stripped = []
|
|
symbols_prefixes = [] # Track removed prefixes
|
|
for symbol in symbols:
|
|
# Remove GCC optimization markers
|
|
stripped = re.sub(r"\$(?:isra|part|constprop)\$\d+", "", symbol)
|
|
|
|
# Handle GCC global constructor/initializer prefixes
|
|
# _GLOBAL__sub_I_<mangled> -> extract <mangled> for demangling
|
|
prefix = ""
|
|
if stripped.startswith("_GLOBAL__sub_I_"):
|
|
prefix = "_GLOBAL__sub_I_"
|
|
stripped = stripped[len(prefix) :]
|
|
elif stripped.startswith("_GLOBAL__sub_D_"):
|
|
prefix = "_GLOBAL__sub_D_"
|
|
stripped = stripped[len(prefix) :]
|
|
|
|
symbols_stripped.append(stripped)
|
|
symbols_prefixes.append(prefix)
|
|
|
|
try:
|
|
# Send all symbols to c++filt at once
|
|
result = subprocess.run(
|
|
[cppfilt_cmd],
|
|
input="\n".join(symbols_stripped),
|
|
capture_output=True,
|
|
text=True,
|
|
check=False,
|
|
)
|
|
if result.returncode == 0:
|
|
demangled_lines = result.stdout.strip().split("\n")
|
|
# Map original to demangled names
|
|
failed_count = 0
|
|
for original, stripped, prefix, demangled in zip(
|
|
symbols, symbols_stripped, symbols_prefixes, demangled_lines
|
|
):
|
|
# Add back any prefix that was removed
|
|
if prefix:
|
|
if demangled != stripped:
|
|
# Successfully demangled - add descriptive prefix
|
|
if prefix == "_GLOBAL__sub_I_":
|
|
demangled = f"[global constructor for: {demangled}]"
|
|
elif prefix == "_GLOBAL__sub_D_":
|
|
demangled = f"[global destructor for: {demangled}]"
|
|
else:
|
|
# Failed to demangle - restore original prefix
|
|
demangled = prefix + demangled
|
|
|
|
# If we stripped a suffix, add it back to the demangled name for clarity
|
|
if original != stripped and not prefix:
|
|
# Find what was stripped
|
|
suffix_match = re.search(
|
|
r"(\$(?:isra|part|constprop)\$\d+)", original
|
|
)
|
|
if suffix_match:
|
|
demangled = f"{demangled} [{suffix_match.group(1)}]"
|
|
|
|
self._demangle_cache[original] = demangled
|
|
|
|
# Log symbols that failed to demangle (stayed the same as stripped version)
|
|
if stripped == demangled and stripped.startswith("_Z"):
|
|
failed_count += 1
|
|
if failed_count <= 5: # Only log first 5 failures
|
|
_LOGGER.warning("Failed to demangle: %s", original[:100])
|
|
|
|
if failed_count > 0:
|
|
_LOGGER.warning(
|
|
"Failed to demangle %d/%d symbols using %s",
|
|
failed_count,
|
|
len(symbols),
|
|
cppfilt_cmd,
|
|
)
|
|
else:
|
|
_LOGGER.warning(
|
|
"Successfully demangled all %d symbols", len(symbols)
|
|
)
|
|
return
|
|
_LOGGER.warning(
|
|
"c++filt exited with code %d: %s",
|
|
result.returncode,
|
|
result.stderr[:200] if result.stderr else "(no error output)",
|
|
)
|
|
except (subprocess.SubprocessError, OSError, UnicodeDecodeError) as e:
|
|
# On error, cache originals
|
|
_LOGGER.warning("Failed to batch demangle symbols: %s", e)
|
|
|
|
# If demangling failed, cache originals
|
|
for symbol in symbols:
|
|
self._demangle_cache[symbol] = symbol
|
|
|
|
def _demangle_symbol(self, symbol: str) -> str:
|
|
"""Get demangled C++ symbol name from cache."""
|
|
return self._demangle_cache.get(symbol, symbol)
|
|
|
|
def _categorize_esphome_core_symbol(self, demangled: str) -> str:
|
|
"""Categorize ESPHome core symbols into subcategories."""
|
|
# Special patterns that need to be checked separately
|
|
if any(pattern in demangled for pattern in ["vtable", "typeinfo", "thunk"]):
|
|
return "C++ Runtime (vtables/RTTI)"
|
|
|
|
if demangled.startswith("std::"):
|
|
return "C++ STL"
|
|
|
|
# Check against patterns from const.py
|
|
for category, patterns in CORE_SUBCATEGORY_PATTERNS.items():
|
|
if any(pattern in demangled for pattern in patterns):
|
|
return category
|
|
|
|
return "Other Core"
|
|
|
|
def to_json(self) -> str:
|
|
"""Export analysis results as JSON."""
|
|
data = {
|
|
"components": {
|
|
name: {
|
|
"text": mem.text_size,
|
|
"rodata": mem.rodata_size,
|
|
"data": mem.data_size,
|
|
"bss": mem.bss_size,
|
|
"flash_total": mem.flash_total,
|
|
"ram_total": mem.ram_total,
|
|
"symbol_count": mem.symbol_count,
|
|
}
|
|
for name, mem in self.components.items()
|
|
},
|
|
"totals": {
|
|
"flash": sum(c.flash_total for c in self.components.values()),
|
|
"ram": sum(c.ram_total for c in self.components.values()),
|
|
},
|
|
}
|
|
return json.dumps(data, indent=2)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
from .cli import main
|
|
|
|
main()
|