|
|
|
|
@@ -43,6 +43,7 @@ _READELF_SECTION_PATTERN = re.compile(
|
|
|
|
|
# Component category prefixes
|
|
|
|
|
_COMPONENT_PREFIX_ESPHOME = "[esphome]"
|
|
|
|
|
_COMPONENT_PREFIX_EXTERNAL = "[external]"
|
|
|
|
|
_COMPONENT_PREFIX_LIB = "[lib]"
|
|
|
|
|
_COMPONENT_CORE = f"{_COMPONENT_PREFIX_ESPHOME}core"
|
|
|
|
|
_COMPONENT_API = f"{_COMPONENT_PREFIX_ESPHOME}api"
|
|
|
|
|
|
|
|
|
|
@@ -56,6 +57,16 @@ SymbolInfoType = tuple[str, int, str]
|
|
|
|
|
# RAM sections - symbols in these sections consume RAM
|
|
|
|
|
RAM_SECTIONS = frozenset([".data", ".bss"])
|
|
|
|
|
|
|
|
|
|
# nm symbol types for global/weak defined symbols (used for library symbol mapping)
|
|
|
|
|
# Only global (uppercase) and weak symbols are safe to use - local symbols (lowercase)
|
|
|
|
|
# can have name collisions across compilation units
|
|
|
|
|
_NM_DEFINED_GLOBAL_TYPES = frozenset({"T", "D", "B", "R", "W", "V"})
|
|
|
|
|
|
|
|
|
|
# Pattern matching compiler-generated local names that can collide across compilation
|
|
|
|
|
# units (e.g., packet$19, buf$20, flag$5261). These are unsafe for name-based lookup.
|
|
|
|
|
# Does NOT match mangled C++ names with optimization suffixes (e.g., func$isra$0).
|
|
|
|
|
_COMPILER_LOCAL_PATTERN = re.compile(r"^[a-zA-Z_]\w*\$\d+$")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
class MemorySection:
|
|
|
|
|
@@ -179,11 +190,19 @@ class MemoryAnalyzer:
|
|
|
|
|
self._sdk_symbols: list[SDKSymbol] = []
|
|
|
|
|
# CSWTCH symbols: list of (name, size, source_file, component)
|
|
|
|
|
self._cswtch_symbols: list[tuple[str, int, str, str]] = []
|
|
|
|
|
# Library symbol mapping: symbol_name -> library_name
|
|
|
|
|
self._lib_symbol_map: dict[str, str] = {}
|
|
|
|
|
# Library dir to name mapping: "lib641" -> "espsoftwareserial",
|
|
|
|
|
# "espressif__mdns" -> "mdns"
|
|
|
|
|
self._lib_hash_to_name: dict[str, str] = {}
|
|
|
|
|
# Heuristic category to library redirect: "mdns_lib" -> "[lib]mdns"
|
|
|
|
|
self._heuristic_to_lib: dict[str, str] = {}
|
|
|
|
|
|
|
|
|
|
def analyze(self) -> dict[str, ComponentMemory]:
|
|
|
|
|
"""Analyze the ELF file and return component memory usage."""
|
|
|
|
|
self._parse_sections()
|
|
|
|
|
self._parse_symbols()
|
|
|
|
|
self._scan_libraries()
|
|
|
|
|
self._categorize_symbols()
|
|
|
|
|
self._analyze_cswtch_symbols()
|
|
|
|
|
self._analyze_sdk_libraries()
|
|
|
|
|
@@ -328,15 +347,19 @@ class MemoryAnalyzer:
|
|
|
|
|
# If no component match found, it's core
|
|
|
|
|
return _COMPONENT_CORE
|
|
|
|
|
|
|
|
|
|
# Check library symbol map (more accurate than heuristic patterns)
|
|
|
|
|
if lib_name := self._lib_symbol_map.get(symbol_name):
|
|
|
|
|
return f"{_COMPONENT_PREFIX_LIB}{lib_name}"
|
|
|
|
|
|
|
|
|
|
# Check against symbol patterns
|
|
|
|
|
for component, patterns in SYMBOL_PATTERNS.items():
|
|
|
|
|
if any(pattern in symbol_name for pattern in patterns):
|
|
|
|
|
return component
|
|
|
|
|
return self._heuristic_to_lib.get(component, component)
|
|
|
|
|
|
|
|
|
|
# Check against demangled patterns
|
|
|
|
|
for component, patterns in DEMANGLED_PATTERNS.items():
|
|
|
|
|
if any(pattern in demangled for pattern in patterns):
|
|
|
|
|
return component
|
|
|
|
|
return self._heuristic_to_lib.get(component, component)
|
|
|
|
|
|
|
|
|
|
# Special cases that need more complex logic
|
|
|
|
|
|
|
|
|
|
@@ -384,6 +407,327 @@ class MemoryAnalyzer:
|
|
|
|
|
|
|
|
|
|
return "Other Core"
|
|
|
|
|
|
|
|
|
|
def _discover_pio_libraries(
|
|
|
|
|
self,
|
|
|
|
|
libraries: dict[str, list[Path]],
|
|
|
|
|
hash_to_name: dict[str, str],
|
|
|
|
|
) -> None:
|
|
|
|
|
"""Discover PlatformIO third-party libraries from the build directory.
|
|
|
|
|
|
|
|
|
|
Scans ``lib<hex>/`` directories under ``.pioenvs/<env>/`` to find
|
|
|
|
|
library names and their ``.a`` archive or ``.o`` file paths.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
libraries: Dict to populate with library name -> file path list mappings.
|
|
|
|
|
Prefers ``.a`` archives when available, falls back to ``.o`` files
|
|
|
|
|
(e.g., pioarduino ESP32 Arduino builds only produce ``.o`` files).
|
|
|
|
|
hash_to_name: Dict to populate with dir name -> library name mappings
|
|
|
|
|
for CSWTCH attribution (e.g., ``lib641`` -> ``espsoftwareserial``).
|
|
|
|
|
"""
|
|
|
|
|
build_dir = self.elf_path.parent
|
|
|
|
|
|
|
|
|
|
for entry in build_dir.iterdir():
|
|
|
|
|
if not entry.is_dir() or not entry.name.startswith("lib"):
|
|
|
|
|
continue
|
|
|
|
|
# Validate that the suffix after "lib" is a hex hash
|
|
|
|
|
hex_part = entry.name[3:]
|
|
|
|
|
if not hex_part:
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
int(hex_part, 16)
|
|
|
|
|
except ValueError:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Each lib<hex>/ directory contains a subdirectory named after the library
|
|
|
|
|
for lib_subdir in entry.iterdir():
|
|
|
|
|
if not lib_subdir.is_dir():
|
|
|
|
|
continue
|
|
|
|
|
lib_name = lib_subdir.name.lower()
|
|
|
|
|
|
|
|
|
|
# Prefer .a archive (lib<LibraryName>.a), fall back to .o files
|
|
|
|
|
# e.g., lib72a/ESPAsyncTCP/... has lib72a/libESPAsyncTCP.a
|
|
|
|
|
archive = entry / f"lib{lib_subdir.name}.a"
|
|
|
|
|
if archive.exists():
|
|
|
|
|
file_paths = [archive]
|
|
|
|
|
elif archives := list(entry.glob("*.a")):
|
|
|
|
|
# Case-insensitive fallback
|
|
|
|
|
file_paths = [archives[0]]
|
|
|
|
|
else:
|
|
|
|
|
# No .a archive (e.g., pioarduino CMake builds) - use .o files
|
|
|
|
|
file_paths = sorted(lib_subdir.rglob("*.o"))
|
|
|
|
|
|
|
|
|
|
if file_paths:
|
|
|
|
|
libraries[lib_name] = file_paths
|
|
|
|
|
hash_to_name[entry.name] = lib_name
|
|
|
|
|
_LOGGER.debug(
|
|
|
|
|
"Discovered PlatformIO library: %s -> %s",
|
|
|
|
|
lib_subdir.name,
|
|
|
|
|
file_paths[0],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def _discover_idf_managed_components(
|
|
|
|
|
self,
|
|
|
|
|
libraries: dict[str, list[Path]],
|
|
|
|
|
hash_to_name: dict[str, str],
|
|
|
|
|
) -> None:
|
|
|
|
|
"""Discover ESP-IDF managed component libraries from the build directory.
|
|
|
|
|
|
|
|
|
|
ESP-IDF managed components (from the IDF component registry) use a
|
|
|
|
|
``<vendor>__<name>`` naming convention. Source files live under
|
|
|
|
|
``managed_components/<vendor>__<name>/`` and the compiled archives are at
|
|
|
|
|
``esp-idf/<vendor>__<name>/lib<vendor>__<name>.a``.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
libraries: Dict to populate with library name -> file path list mappings.
|
|
|
|
|
hash_to_name: Dict to populate with dir name -> library name mappings
|
|
|
|
|
for CSWTCH attribution (e.g., ``espressif__mdns`` -> ``mdns``).
|
|
|
|
|
"""
|
|
|
|
|
build_dir = self.elf_path.parent
|
|
|
|
|
|
|
|
|
|
managed_dir = build_dir / "managed_components"
|
|
|
|
|
if not managed_dir.is_dir():
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
espidf_dir = build_dir / "esp-idf"
|
|
|
|
|
|
|
|
|
|
for entry in managed_dir.iterdir():
|
|
|
|
|
if not entry.is_dir() or "__" not in entry.name:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Extract the short name: espressif__mdns -> mdns
|
|
|
|
|
full_name = entry.name # e.g., espressif__mdns
|
|
|
|
|
short_name = full_name.split("__", 1)[1].lower()
|
|
|
|
|
|
|
|
|
|
# Find the .a archive under esp-idf/<vendor>__<name>/
|
|
|
|
|
archive = espidf_dir / full_name / f"lib{full_name}.a"
|
|
|
|
|
if archive.exists():
|
|
|
|
|
libraries[short_name] = [archive]
|
|
|
|
|
hash_to_name[full_name] = short_name
|
|
|
|
|
_LOGGER.debug(
|
|
|
|
|
"Discovered IDF managed component: %s -> %s",
|
|
|
|
|
short_name,
|
|
|
|
|
archive,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def _build_library_symbol_map(
|
|
|
|
|
self, libraries: dict[str, list[Path]]
|
|
|
|
|
) -> dict[str, str]:
|
|
|
|
|
"""Build a symbol-to-library mapping from library archives or object files.
|
|
|
|
|
|
|
|
|
|
Runs ``nm --defined-only`` on each ``.a`` or ``.o`` file to collect
|
|
|
|
|
global and weak defined symbols.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
libraries: Dictionary mapping library name to list of file paths
|
|
|
|
|
(``.a`` archives or ``.o`` object files).
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary mapping symbol name to library name.
|
|
|
|
|
"""
|
|
|
|
|
symbol_map: dict[str, str] = {}
|
|
|
|
|
|
|
|
|
|
if not self.nm_path:
|
|
|
|
|
return symbol_map
|
|
|
|
|
|
|
|
|
|
for lib_name, file_paths in libraries.items():
|
|
|
|
|
result = run_tool(
|
|
|
|
|
[self.nm_path, "--defined-only", *(str(p) for p in file_paths)],
|
|
|
|
|
timeout=10,
|
|
|
|
|
)
|
|
|
|
|
if result is None or result.returncode != 0:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
for line in result.stdout.splitlines():
|
|
|
|
|
parts = line.split()
|
|
|
|
|
if len(parts) < 3:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
sym_type = parts[-2]
|
|
|
|
|
sym_name = parts[-1]
|
|
|
|
|
|
|
|
|
|
# Include global defined symbols (uppercase) and weak symbols (W/V)
|
|
|
|
|
if sym_type in _NM_DEFINED_GLOBAL_TYPES:
|
|
|
|
|
symbol_map[sym_name] = lib_name
|
|
|
|
|
|
|
|
|
|
return symbol_map
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _build_heuristic_to_lib_mapping(
|
|
|
|
|
library_names: set[str],
|
|
|
|
|
) -> dict[str, str]:
|
|
|
|
|
"""Build mapping from heuristic pattern categories to discovered libraries.
|
|
|
|
|
|
|
|
|
|
Heuristic categories like ``mdns_lib``, ``web_server_lib``, ``async_tcp``
|
|
|
|
|
exist as approximations for library attribution. When we discover the
|
|
|
|
|
actual library, symbols matching those heuristics should be redirected
|
|
|
|
|
to the ``[lib]`` category instead.
|
|
|
|
|
|
|
|
|
|
The mapping is built by checking if the normalized category name
|
|
|
|
|
(stripped of ``_lib`` suffix and underscores) appears as a substring
|
|
|
|
|
of any discovered library name.
|
|
|
|
|
|
|
|
|
|
Examples::
|
|
|
|
|
|
|
|
|
|
mdns_lib -> mdns -> in "mdns" or "esp8266mdns" -> [lib]mdns
|
|
|
|
|
web_server_lib -> webserver -> in "espasyncwebserver" -> [lib]espasyncwebserver
|
|
|
|
|
async_tcp -> asynctcp -> in "espasynctcp" -> [lib]espasynctcp
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
library_names: Set of discovered library names (lowercase).
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary mapping heuristic category to ``[lib]<name>`` string.
|
|
|
|
|
"""
|
|
|
|
|
mapping: dict[str, str] = {}
|
|
|
|
|
all_categories = set(SYMBOL_PATTERNS) | set(DEMANGLED_PATTERNS)
|
|
|
|
|
|
|
|
|
|
for category in all_categories:
|
|
|
|
|
base = category.removesuffix("_lib").replace("_", "")
|
|
|
|
|
# Collect all libraries whose name contains the base string
|
|
|
|
|
candidates = [lib_name for lib_name in library_names if base in lib_name]
|
|
|
|
|
if not candidates:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Choose a deterministic "best" match:
|
|
|
|
|
# 1. Prefer exact name matches over substring matches.
|
|
|
|
|
# 2. Among non-exact matches, prefer the shortest library name.
|
|
|
|
|
# 3. Break remaining ties lexicographically.
|
|
|
|
|
best_lib = min(
|
|
|
|
|
candidates,
|
|
|
|
|
key=lambda lib_name, _base=base: (
|
|
|
|
|
lib_name != _base,
|
|
|
|
|
len(lib_name),
|
|
|
|
|
lib_name,
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
mapping[category] = f"{_COMPONENT_PREFIX_LIB}{best_lib}"
|
|
|
|
|
|
|
|
|
|
if mapping:
|
|
|
|
|
_LOGGER.debug(
|
|
|
|
|
"Heuristic-to-library redirects: %s",
|
|
|
|
|
", ".join(f"{k} -> {v}" for k, v in sorted(mapping.items())),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return mapping
|
|
|
|
|
|
|
|
|
|
def _parse_map_file(self) -> dict[str, str] | None:
|
|
|
|
|
"""Parse linker map file to build authoritative symbol-to-library mapping.
|
|
|
|
|
|
|
|
|
|
The linker map file contains the definitive source attribution for every
|
|
|
|
|
symbol, including local/static ones that ``nm`` cannot safely export.
|
|
|
|
|
|
|
|
|
|
Map file format (GNU ld)::
|
|
|
|
|
|
|
|
|
|
.text._mdns_service_task
|
|
|
|
|
0x400e9fdc 0x65c .pioenvs/env/esp-idf/espressif__mdns/libespressif__mdns.a(mdns.c.o)
|
|
|
|
|
|
|
|
|
|
Each section entry has a ``.section.symbol_name`` line followed by an
|
|
|
|
|
indented line with address, size, and source path.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Symbol-to-library dict, or ``None`` if no usable map file exists.
|
|
|
|
|
"""
|
|
|
|
|
map_path = self.elf_path.with_suffix(".map")
|
|
|
|
|
if not map_path.exists() or map_path.stat().st_size < 10000:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
_LOGGER.info("Parsing linker map file: %s", map_path.name)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
map_text = map_path.read_text(encoding="utf-8", errors="replace")
|
|
|
|
|
except OSError as err:
|
|
|
|
|
_LOGGER.warning("Failed to read map file: %s", err)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
symbol_map: dict[str, str] = {}
|
|
|
|
|
current_symbol: str | None = None
|
|
|
|
|
section_prefixes = (".text.", ".rodata.", ".data.", ".bss.", ".literal.")
|
|
|
|
|
|
|
|
|
|
for line in map_text.splitlines():
|
|
|
|
|
# Match section.symbol line: " .text.symbol_name"
|
|
|
|
|
# Single space indent, starts with dot
|
|
|
|
|
if len(line) > 2 and line[0] == " " and line[1] == ".":
|
|
|
|
|
stripped = line.strip()
|
|
|
|
|
for prefix in section_prefixes:
|
|
|
|
|
if stripped.startswith(prefix):
|
|
|
|
|
current_symbol = stripped[len(prefix) :]
|
|
|
|
|
break
|
|
|
|
|
else:
|
|
|
|
|
current_symbol = None
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Match source attribution line: " 0xADDR 0xSIZE source_path"
|
|
|
|
|
if current_symbol is None:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
fields = line.split()
|
|
|
|
|
# Skip compiler-generated local names (e.g., packet$19, buf$20)
|
|
|
|
|
# that can collide across compilation units
|
|
|
|
|
if (
|
|
|
|
|
len(fields) >= 3
|
|
|
|
|
and fields[0].startswith("0x")
|
|
|
|
|
and fields[1].startswith("0x")
|
|
|
|
|
and not _COMPILER_LOCAL_PATTERN.match(current_symbol)
|
|
|
|
|
):
|
|
|
|
|
source_path = fields[2]
|
|
|
|
|
# Check if source path contains a known library directory
|
|
|
|
|
for dir_key, lib_name in self._lib_hash_to_name.items():
|
|
|
|
|
if dir_key in source_path:
|
|
|
|
|
symbol_map[current_symbol] = lib_name
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
current_symbol = None
|
|
|
|
|
|
|
|
|
|
return symbol_map or None
|
|
|
|
|
|
|
|
|
|
def _scan_libraries(self) -> None:
|
|
|
|
|
"""Discover third-party libraries and build symbol mapping.
|
|
|
|
|
|
|
|
|
|
Scans both PlatformIO ``lib<hex>/`` directories (Arduino builds) and
|
|
|
|
|
ESP-IDF ``managed_components/`` (IDF builds) to find library archives.
|
|
|
|
|
|
|
|
|
|
Uses the linker map file for authoritative symbol attribution when
|
|
|
|
|
available, falling back to ``nm`` scanning with heuristic redirects.
|
|
|
|
|
"""
|
|
|
|
|
libraries: dict[str, list[Path]] = {}
|
|
|
|
|
self._discover_pio_libraries(libraries, self._lib_hash_to_name)
|
|
|
|
|
self._discover_idf_managed_components(libraries, self._lib_hash_to_name)
|
|
|
|
|
|
|
|
|
|
if not libraries:
|
|
|
|
|
_LOGGER.debug("No third-party libraries found")
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
_LOGGER.info(
|
|
|
|
|
"Scanning %d libraries: %s",
|
|
|
|
|
len(libraries),
|
|
|
|
|
", ".join(sorted(libraries)),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Heuristic redirect catches local symbols (e.g., mdns_task_buffer$14)
|
|
|
|
|
# that can't be safely added to the symbol map due to name collisions
|
|
|
|
|
self._heuristic_to_lib = self._build_heuristic_to_lib_mapping(
|
|
|
|
|
set(libraries.keys())
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Try linker map file first (authoritative, includes local symbols)
|
|
|
|
|
map_symbols = self._parse_map_file()
|
|
|
|
|
if map_symbols is not None:
|
|
|
|
|
self._lib_symbol_map = map_symbols
|
|
|
|
|
_LOGGER.info(
|
|
|
|
|
"Built library symbol map from linker map: %d symbols",
|
|
|
|
|
len(self._lib_symbol_map),
|
|
|
|
|
)
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# Fall back to nm scanning (global symbols only)
|
|
|
|
|
self._lib_symbol_map = self._build_library_symbol_map(libraries)
|
|
|
|
|
|
|
|
|
|
_LOGGER.info(
|
|
|
|
|
"Built library symbol map from nm: %d symbols from %d libraries",
|
|
|
|
|
len(self._lib_symbol_map),
|
|
|
|
|
len(libraries),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def _find_object_files_dir(self) -> Path | None:
|
|
|
|
|
"""Find the directory containing object files for this build.
|
|
|
|
|
|
|
|
|
|
@@ -397,47 +741,38 @@ class MemoryAnalyzer:
|
|
|
|
|
return pioenvs_dir
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def _scan_cswtch_in_objects(
|
|
|
|
|
self, obj_dir: Path
|
|
|
|
|
) -> dict[str, list[tuple[str, int]]]:
|
|
|
|
|
"""Scan object files for CSWTCH symbols using a single nm invocation.
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _parse_nm_cswtch_output(
|
|
|
|
|
output: str,
|
|
|
|
|
base_dir: Path | None,
|
|
|
|
|
cswtch_map: dict[str, list[tuple[str, int]]],
|
|
|
|
|
) -> None:
|
|
|
|
|
"""Parse nm output for CSWTCH symbols and add to cswtch_map.
|
|
|
|
|
|
|
|
|
|
Uses ``nm --print-file-name -S`` on all ``.o`` files at once.
|
|
|
|
|
Output format: ``/path/to/file.o:address size type name``
|
|
|
|
|
Handles both ``.o`` files and ``.a`` archives.
|
|
|
|
|
|
|
|
|
|
nm output formats::
|
|
|
|
|
|
|
|
|
|
.o files: /path/file.o:hex_addr hex_size type name
|
|
|
|
|
.a files: /path/lib.a:member.o:hex_addr hex_size type name
|
|
|
|
|
|
|
|
|
|
For ``.o`` files, paths are made relative to *base_dir* when possible.
|
|
|
|
|
For ``.a`` archives (detected by ``:`` in the file portion), paths are
|
|
|
|
|
formatted as ``archive_stem/member.o`` (e.g. ``liblwip2-536-feat/lwip-esp.o``).
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
obj_dir: Directory containing object files (.pioenvs/<env>/)
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dict mapping "CSWTCH$NNN:size" to list of (source_file, size) tuples.
|
|
|
|
|
output: Raw stdout from ``nm --print-file-name -S``.
|
|
|
|
|
base_dir: Base directory for computing relative paths of ``.o`` files.
|
|
|
|
|
Pass ``None`` when scanning archives outside the build tree.
|
|
|
|
|
cswtch_map: Dict to populate, mapping ``"CSWTCH$N:size"`` to source list.
|
|
|
|
|
"""
|
|
|
|
|
cswtch_map: dict[str, list[tuple[str, int]]] = defaultdict(list)
|
|
|
|
|
|
|
|
|
|
if not self.nm_path:
|
|
|
|
|
return cswtch_map
|
|
|
|
|
|
|
|
|
|
# Find all .o files recursively, sorted for deterministic output
|
|
|
|
|
obj_files = sorted(obj_dir.rglob("*.o"))
|
|
|
|
|
if not obj_files:
|
|
|
|
|
return cswtch_map
|
|
|
|
|
|
|
|
|
|
_LOGGER.debug("Scanning %d object files for CSWTCH symbols", len(obj_files))
|
|
|
|
|
|
|
|
|
|
# Single nm call with --print-file-name for all object files
|
|
|
|
|
result = run_tool(
|
|
|
|
|
[self.nm_path, "--print-file-name", "-S"] + [str(f) for f in obj_files],
|
|
|
|
|
timeout=30,
|
|
|
|
|
)
|
|
|
|
|
if result is None or result.returncode != 0:
|
|
|
|
|
return cswtch_map
|
|
|
|
|
|
|
|
|
|
for line in result.stdout.splitlines():
|
|
|
|
|
for line in output.splitlines():
|
|
|
|
|
if "CSWTCH$" not in line:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Split on last ":" that precedes a hex address.
|
|
|
|
|
# nm --print-file-name format: filepath:hex_addr hex_size type name
|
|
|
|
|
# We split from the right: find the last colon followed by hex digits.
|
|
|
|
|
# For .o: "filepath.o" : "hex_addr hex_size type name"
|
|
|
|
|
# For .a: "filepath.a:member.o" : "hex_addr hex_size type name"
|
|
|
|
|
parts_after_colon = line.rsplit(":", 1)
|
|
|
|
|
if len(parts_after_colon) != 2:
|
|
|
|
|
continue
|
|
|
|
|
@@ -457,16 +792,89 @@ class MemoryAnalyzer:
|
|
|
|
|
except ValueError:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Get relative path from obj_dir for readability
|
|
|
|
|
try:
|
|
|
|
|
rel_path = str(Path(file_path).relative_to(obj_dir))
|
|
|
|
|
except ValueError:
|
|
|
|
|
# Determine readable source path
|
|
|
|
|
# Use ".a:" to detect archive format (not bare ":" which matches
|
|
|
|
|
# Windows drive letters like "C:\...\file.o").
|
|
|
|
|
if ".a:" in file_path:
|
|
|
|
|
# Archive format: "archive.a:member.o" → "archive_stem/member.o"
|
|
|
|
|
archive_part, member = file_path.rsplit(":", 1)
|
|
|
|
|
archive_name = Path(archive_part).stem
|
|
|
|
|
rel_path = f"{archive_name}/{member}"
|
|
|
|
|
elif base_dir is not None:
|
|
|
|
|
try:
|
|
|
|
|
rel_path = str(Path(file_path).relative_to(base_dir))
|
|
|
|
|
except ValueError:
|
|
|
|
|
rel_path = file_path
|
|
|
|
|
else:
|
|
|
|
|
rel_path = file_path
|
|
|
|
|
|
|
|
|
|
key = f"{sym_name}:{size}"
|
|
|
|
|
cswtch_map[key].append((rel_path, size))
|
|
|
|
|
|
|
|
|
|
return cswtch_map
|
|
|
|
|
def _run_nm_cswtch_scan(
|
|
|
|
|
self,
|
|
|
|
|
files: list[Path],
|
|
|
|
|
base_dir: Path | None,
|
|
|
|
|
cswtch_map: dict[str, list[tuple[str, int]]],
|
|
|
|
|
) -> None:
|
|
|
|
|
"""Run nm on *files* and add any CSWTCH symbols to *cswtch_map*.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
files: Object (``.o``) or archive (``.a``) files to scan.
|
|
|
|
|
base_dir: Base directory for relative path computation (see
|
|
|
|
|
:meth:`_parse_nm_cswtch_output`).
|
|
|
|
|
cswtch_map: Dict to populate with results.
|
|
|
|
|
"""
|
|
|
|
|
if not self.nm_path or not files:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
_LOGGER.debug("Scanning %d files for CSWTCH symbols", len(files))
|
|
|
|
|
|
|
|
|
|
result = run_tool(
|
|
|
|
|
[self.nm_path, "--print-file-name", "-S"] + [str(f) for f in files],
|
|
|
|
|
timeout=30,
|
|
|
|
|
)
|
|
|
|
|
if result is None or result.returncode != 0:
|
|
|
|
|
_LOGGER.debug(
|
|
|
|
|
"nm failed or timed out scanning %d files for CSWTCH symbols",
|
|
|
|
|
len(files),
|
|
|
|
|
)
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
self._parse_nm_cswtch_output(result.stdout, base_dir, cswtch_map)
|
|
|
|
|
|
|
|
|
|
def _scan_cswtch_in_sdk_archives(
|
|
|
|
|
self, cswtch_map: dict[str, list[tuple[str, int]]]
|
|
|
|
|
) -> None:
|
|
|
|
|
"""Scan SDK library archives (.a) for CSWTCH symbols.
|
|
|
|
|
|
|
|
|
|
Prebuilt SDK libraries (e.g. lwip, bearssl) are not compiled from source,
|
|
|
|
|
so their CSWTCH symbols only exist inside ``.a`` archives. Results are
|
|
|
|
|
merged into *cswtch_map* for keys not already found in ``.o`` files.
|
|
|
|
|
|
|
|
|
|
The same source file (e.g. ``lwip-esp.o``) often appears in multiple
|
|
|
|
|
library variants (``liblwip2-536.a``, ``liblwip2-1460-feat.a``, etc.),
|
|
|
|
|
so results are deduplicated by member name.
|
|
|
|
|
"""
|
|
|
|
|
sdk_dirs = self._find_sdk_library_dirs()
|
|
|
|
|
if not sdk_dirs:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
sdk_archives = sorted(a for sdk_dir in sdk_dirs for a in sdk_dir.glob("*.a"))
|
|
|
|
|
|
|
|
|
|
sdk_map: dict[str, list[tuple[str, int]]] = defaultdict(list)
|
|
|
|
|
self._run_nm_cswtch_scan(sdk_archives, None, sdk_map)
|
|
|
|
|
|
|
|
|
|
# Merge SDK results, deduplicating by member name.
|
|
|
|
|
for key, sources in sdk_map.items():
|
|
|
|
|
if key in cswtch_map:
|
|
|
|
|
continue
|
|
|
|
|
seen: dict[str, tuple[str, int]] = {}
|
|
|
|
|
for path, sz in sources:
|
|
|
|
|
member = Path(path).name
|
|
|
|
|
if member not in seen:
|
|
|
|
|
seen[member] = (path, sz)
|
|
|
|
|
cswtch_map[key] = list(seen.values())
|
|
|
|
|
|
|
|
|
|
def _source_file_to_component(self, source_file: str) -> str:
|
|
|
|
|
"""Map a source object file path to its component name.
|
|
|
|
|
@@ -495,9 +903,21 @@ class MemoryAnalyzer:
|
|
|
|
|
if "esphome" in parts and "components" not in parts:
|
|
|
|
|
return _COMPONENT_CORE
|
|
|
|
|
|
|
|
|
|
# Framework/library files - return the first path component
|
|
|
|
|
# e.g., lib65b/ESPAsyncTCP/... -> lib65b
|
|
|
|
|
# FrameworkArduino/... -> FrameworkArduino
|
|
|
|
|
# Framework/library files - check for PlatformIO library hash dirs
|
|
|
|
|
# e.g., lib65b/ESPAsyncTCP/... -> [lib]espasynctcp
|
|
|
|
|
if parts and parts[0] in self._lib_hash_to_name:
|
|
|
|
|
return f"{_COMPONENT_PREFIX_LIB}{self._lib_hash_to_name[parts[0]]}"
|
|
|
|
|
|
|
|
|
|
# ESP-IDF managed components: managed_components/espressif__mdns/... -> [lib]mdns
|
|
|
|
|
if (
|
|
|
|
|
len(parts) >= 2
|
|
|
|
|
and parts[0] == "managed_components"
|
|
|
|
|
and parts[1] in self._lib_hash_to_name
|
|
|
|
|
):
|
|
|
|
|
return f"{_COMPONENT_PREFIX_LIB}{self._lib_hash_to_name[parts[1]]}"
|
|
|
|
|
|
|
|
|
|
# Other framework/library files - return the first path component
|
|
|
|
|
# e.g., FrameworkArduino/... -> FrameworkArduino
|
|
|
|
|
return parts[0] if parts else source_file
|
|
|
|
|
|
|
|
|
|
def _analyze_cswtch_symbols(self) -> None:
|
|
|
|
|
@@ -505,17 +925,25 @@ class MemoryAnalyzer:
|
|
|
|
|
|
|
|
|
|
CSWTCH symbols are compiler-generated lookup tables for switch statements.
|
|
|
|
|
They are local symbols, so the same name can appear in different object files.
|
|
|
|
|
This method scans .o files to attribute them to their source components.
|
|
|
|
|
This method scans .o files and SDK archives to attribute them to their
|
|
|
|
|
source components.
|
|
|
|
|
"""
|
|
|
|
|
obj_dir = self._find_object_files_dir()
|
|
|
|
|
if obj_dir is None:
|
|
|
|
|
_LOGGER.debug("No object files directory found, skipping CSWTCH analysis")
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# Scan object files for CSWTCH symbols
|
|
|
|
|
cswtch_map = self._scan_cswtch_in_objects(obj_dir)
|
|
|
|
|
# Scan build-dir object files for CSWTCH symbols
|
|
|
|
|
cswtch_map: dict[str, list[tuple[str, int]]] = defaultdict(list)
|
|
|
|
|
self._run_nm_cswtch_scan(sorted(obj_dir.rglob("*.o")), obj_dir, cswtch_map)
|
|
|
|
|
|
|
|
|
|
# Also scan SDK library archives (.a) for CSWTCH symbols.
|
|
|
|
|
# Prebuilt SDK libraries (e.g. lwip, bearssl) are not compiled from source
|
|
|
|
|
# so their symbols only exist inside .a archives, not as loose .o files.
|
|
|
|
|
self._scan_cswtch_in_sdk_archives(cswtch_map)
|
|
|
|
|
|
|
|
|
|
if not cswtch_map:
|
|
|
|
|
_LOGGER.debug("No CSWTCH symbols found in object files")
|
|
|
|
|
_LOGGER.debug("No CSWTCH symbols found in object files or SDK archives")
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# Collect CSWTCH symbols from the ELF (already parsed in sections)
|
|
|
|
|
|