Open yashagarwal-314 opened 2 days ago
Something is off in your logic. I'm assuming mc1.controlScheme.estimHybrid.countLimit
refers to a struct member; those don't have a DW_AT_location
, the struct variable itself has a location, and the struct members need to be tracked down via DW_AT_member_location
, which encodes an offset (in one of the several ways). Also, when a location is a block (your code doesn't check the form, but it looks as if it is), it's a DWARF expression, and you'd want to parse it as one, using DWARFExprParser
.
Similar discussion here: #572
import logging
from elftools.elf.elffile import ELFFile from elftools.elf.sections import SymbolTableSection
from pyx2cscope.parser.elf_parser import ElfParser, VariableInfo
class Elf32Parser(ElfParser): """Class for parsing ELF files compatible with 32-bit architectures."""
def __init__(self, elf_path):
"""Initialize the Elf32Parser with the given ELF file path."""
self.elf_path = elf_path
self.variable_map = {}
self.symbol_table = {} # Ensure this initialization is included
self.address = None
self.var_name = None
self.die_variable = None
self.elf_file = None
self.dwarf_info = None
self._load_elf_file()
self._load_symbol_table() # Load symbol table entries into a dictionary
def _load_elf_file(self):
try:
self.stream = open(self.elf_path, "rb")
self.elf_file = ELFFile(self.stream)
self.dwarf_info = self.elf_file.get_dwarf_info()
except IOError:
raise Exception(f"Error loading ELF file: {self.elf_path}")
def close_elf_file(self):
"""Closes the ELF file stream."""
if self.stream:
self.stream.close()
def _map_variables(self) -> dict[str, VariableInfo]:
self.variable_map.clear()
for compilation_unit in self.dwarf_info.iter_CUs():
root_die = compilation_unit.iter_DIEs()
tag_variables = filter(lambda die: die.tag == "DW_TAG_variable", root_die)
for die_variable in tag_variables:
self._process_variable_die(die_variable)
vars_to_remove = [var_name for var_name, var_info in self.variable_map.items() if var_info.address is None]
# Remove the variables with no address from the map
for var_name in vars_to_remove:
self.variable_map.pop(var_name)
return self.variable_map
def _process_variable_die(self, die_variable):
"""Process an individual variable DIE."""
if "DW_AT_specification" in die_variable.attributes:
spec_ref_addr = (
die_variable.attributes["DW_AT_specification"].value
+ die_variable.cu.cu_offset
)
spec_die = self.dwarf_info.get_DIE_from_refaddr(spec_ref_addr)
if spec_die.tag == "DW_TAG_variable":
self.die_variable = spec_die
self.var_name = self.die_variable.attributes.get(
"DW_AT_name"
).value.decode("utf-8")
self._extract_address(die_variable)
else:
return
elif (
die_variable.attributes.get("DW_AT_location")
and die_variable.attributes.get("DW_AT_name") is not None
):
self.var_name = die_variable.attributes.get("DW_AT_name").value.decode(
"utf-8"
)
self.die_variable = die_variable
self._extract_address(die_variable)
elif (
die_variable.attributes.get("DW_AT_external")
and die_variable.attributes.get("DW_AT_name") is not None
):
self.var_name = die_variable.attributes.get("DW_AT_name").value.decode(
"utf-8"
)
self.die_variable = die_variable
self._extract_address(die_variable)
else:
return
type_attr = self.die_variable.attributes.get("DW_AT_type")
if type_attr is None:
return
ref_addr = type_attr.value + self.die_variable.cu.cu_offset
type_die = self.dwarf_info.get_DIE_from_refaddr(ref_addr)
if type_die.tag != "DW_TAG_volatile_type":
end_die = self._get_end_die(type_die)
if end_die is None:
logging.warning(
f"Skipping variable {self.var_name} due to missing end DIE"
)
return
self._processing_end_die(end_die)
elif type_die.tag == "DW_TAG_volatile_type":
end_die = self._get_end_die(type_die)
if end_die is None:
logging.warning(
f"Skipping volatile type variable {self.var_name} due to missing end DIE"
)
return
self._processing_end_die(end_die)
def _get_end_die(self, current_die):
"""Find the end DIE of a type."""
valid_words = {
"DW_TAG_base_type",
"DW_TAG_pointer_type",
"DW_TAG_structure_type",
"DW_TAG_array_type",
}
while current_die.tag not in valid_words:
if "DW_AT_type" not in current_die.attributes:
logging.warning(
f"Skipping DIE at offset {current_die.offset} with no 'DW_AT_type' attribute"
)
return None
ref_addr = (
current_die.attributes["DW_AT_type"].value + current_die.cu.cu_offset
)
current_die = self.dwarf_info.get_DIE_from_refaddr(ref_addr)
return current_die
def _processing_end_die(self, end_die):
"""Processes the end DIE of a tag to extract variable information."""
self._extract_address(self.die_variable)
if self.address is None and not self.die_variable.attributes.get(
"DW_AT_external"
):
return
if end_die.tag == "DW_TAG_pointer_type":
self._process_pointer_type(end_die)
elif end_die.tag == "DW_TAG_structure_type":
self._process_structure_type(end_die)
elif end_die.tag == "DW_TAG_array_type":
self._process_array_type(end_die)
else:
self._process_base_type(end_die)
def _extract_address(self, die_variable):
"""Extracts the address of the current variable or fetches it from the symbol table if not found."""
try:
if "DW_AT_location" in die_variable.attributes:
data = list(die_variable.attributes["DW_AT_location"].value)[1:]
self.address = int.from_bytes(bytes(data), byteorder="little") & 0XFFFF
print(self.address)
else:
self.address = self._fetch_address_from_symtab(
die_variable.attributes.get("DW_AT_name").value.decode("utf-8")
)
print(die_variable)
except Exception as e:
logging.error(e)
self.address = None
def _load_symbol_table(self):
"""Loads symbol table entries into a dictionary for fast access."""
for section in self.elf_file.iter_sections():
if isinstance(section, SymbolTableSection):
for symbol in section.iter_symbols():
if symbol["st_info"].type == "STT_OBJECT":
self.symbol_table[symbol.name] = symbol["st_value"]
def _fetch_address_from_symtab(self, variable_name):
"""Fetches the address of a variable from the preloaded symbol table."""
return self.symbol_table.get(variable_name, None)
def _find_actual_declaration(self, die_variable):
"""Find the actual declaration of an extern variable."""
while "DW_AT_specification" in die_variable.attributes:
spec_ref_addr = (
die_variable.attributes["DW_AT_specification"].value
+ die_variable.cu.cu_offset
)
die_variable = self.dwarf_info.get_DIE_from_refaddr(spec_ref_addr)
return die_variable
def _process_pointer_type(self, end_die):
"""Process a pointer type variable."""
type_name = "pointer"
self.variable_map[self.var_name] = VariableInfo(
name=self.var_name,
byte_size=end_die.attributes["DW_AT_byte_size"].value,
type=type_name,
address=self.address,
)
def _process_structure_type(self, end_die):
"""Process a structure type variable."""
members = self._get_structure_members(end_die, self.var_name)
for member_name, member_data in members.items():
self.variable_map[member_name] = VariableInfo(
name=member_name,
byte_size=member_data["byte_size"],
type=member_data["type"],
address=(
self.address + member_data["address_offset"]
if self.address
else None
),
array_size=member_data["array_size"],
)
def _process_array_type(self, end_die):
"""Process an array type variable."""
array_size = self._get_array_length(end_die)
base_type_attr = end_die.attributes.get("DW_AT_type")
if base_type_attr:
base_type_offset = base_type_attr.value + end_die.cu.cu_offset
base_type_die = self.dwarf_info.get_DIE_from_refaddr(base_type_offset)
if base_type_die:
base_type_die = self._get_end_die(base_type_die)
type_name = base_type_die.attributes.get("DW_AT_name")
type_name = type_name.value.decode("utf-8") if type_name else "unknown"
byte_size_attr = base_type_die.attributes.get("DW_AT_byte_size")
byte_size = byte_size_attr.value if byte_size_attr else 0
self.variable_map[self.var_name] = VariableInfo(
name=self.var_name,
byte_size=byte_size,
type=type_name,
address=self.address,
array_size=array_size,
)
def _process_base_type(self, end_die):
"""Process a base type variable."""
type_name_attr = end_die.attributes.get("DW_AT_name")
type_name = (
type_name_attr.value.decode("utf-8") if type_name_attr else "unknown"
)
self.variable_map[self.var_name] = VariableInfo(
name=self.var_name,
byte_size=end_die.attributes["DW_AT_byte_size"].value,
type=type_name,
address=self.address,
)
def _get_structure_members_recursive(
self, die, parent_name: str, prev_address_offset=0
):
"""Recursively gets structure members from a DWARF DIE."""
members = {}
for child_die in die.iter_children():
if child_die.tag in {
"DW_TAG_member",
"DW_TAG_pointer_type",
"DW_TAG_array_type",
}:
member = {}
member_name = parent_name
name_attr = child_die.attributes.get("DW_AT_name")
if name_attr:
member_name += "." + name_attr.value.decode("utf-8")
type_attr = child_die.attributes.get("DW_AT_type")
if type_attr:
type_offset = type_attr.value + child_die.cu.cu_offset
try:
member_type = self._get_member_type(type_offset)
offset_value = child_die.attributes.get(
"DW_AT_data_member_location"
)
offset_value = int(offset_value.value[1]) if offset_value else 0
nested_die = self._get_end_die(child_die)
if nested_die.tag == "DW_TAG_structure_type":
nested_members, _ = self._get_structure_members_recursive(
nested_die,
member_name,
prev_address_offset + offset_value,
)
if nested_members:
members.update(nested_members)
elif nested_die.tag == "DW_TAG_array_type":
array_size = self._get_array_length(nested_die)
base_type_attr = nested_die.attributes.get("DW_AT_type")
if base_type_attr:
base_type_offset = (
base_type_attr.value + nested_die.cu.cu_offset
)
base_type_die = self.dwarf_info.get_DIE_from_refaddr(
base_type_offset
)
base_type_die = self._get_end_die(base_type_die)
if base_type_die:
type_name = base_type_die.attributes.get(
"DW_AT_name"
)
type_name = (
type_name.value.decode("utf-8")
if type_name
else "unknown"
)
byte_size_attr = base_type_die.attributes.get(
"DW_AT_byte_size"
)
byte_size = (
byte_size_attr.value if byte_size_attr else 0
)
member["type"] = type_name
member["byte_size"] = byte_size
member["address_offset"] = (
prev_address_offset + offset_value
)
member["array_size"] = array_size
members[member_name] = member
else:
member["type"] = member_type["name"]
member["byte_size"] = member_type["byte_size"]
member["address_offset"] = (
prev_address_offset + offset_value
)
member["array_size"] = self._get_array_length(child_die)
members[member_name] = member
except Exception as e:
logging.error("exception", exc_info=e)
continue
return members, prev_address_offset
def _get_structure_members(self, structure_die, var_name):
"""Retrieves structure members from a DWARF DIE."""
return self._get_structure_members_recursive(structure_die, var_name)[0]
def _get_array_length(self, type_die):
"""Gets the length of an array type."""
for child in type_die.iter_children():
if child.tag == "DW_TAG_subrange_type":
array_length_attr = child.attributes.get("DW_AT_upper_bound")
if array_length_attr:
array_length = array_length_attr.value + 1
return array_length
return 0
def _get_member_type(self, type_offset):
"""Retrieve the type information from DWARF given a type offset."""
type_die = self.dwarf_info.get_DIE_from_refaddr(type_offset)
if type_die:
type_die = self._get_end_die(type_die)
if type_die.tag == "DW_TAG_base_type":
type_name = type_die.attributes["DW_AT_name"].value.decode("utf-8")
byte_size_attr = type_die.attributes.get("DW_AT_byte_size")
byte_size = byte_size_attr.value if byte_size_attr else None
return {
"name": type_name,
"byte_size": byte_size,
}
elif type_die.tag != "DW_TAG_base_type":
base_type_attr = type_die.attributes.get("DW_AT_type")
if base_type_attr:
base_type_offset = base_type_attr.value
return self._get_member_type(base_type_offset)
def _get_dwarf_die_by_offset(self, offset):
"""Retrieve a DWARF DIE given its offset."""
for compilation_unit in self.dwarf_info.iter_CUs():
root_die = compilation_unit.iter_DIEs()
for die in root_die:
if die.offset == offset:
return die
return None
yes i am using a recursive method for the structures.
I have attached the whole code, It works exactly as expected when i am using a 32-bit elf file.
This line is wrong:
offset_value = int(offset_value.value[1]) if offset_value else 0
The way the member offset is stored in DW_AT_data_member_location
is more tricky than that. Read the linked discussion please.
Hey Everyone,
First of all thank you for your effort for making pyelftools xc16 compatible.
While i was using it, i noticed with big structure, there is some offset issue.
as an example.
mc1.controlScheme.estimHybrid.countLimit: in dict1: VariableInfo(name='mc1.controlScheme.estimHybrid.countLimit', type='int', byte_size='2', address=10294, array_size=0) in dict2: VariableInfo(name='mc1.controlScheme.estimHybrid.countLimit', type='int', byte_size=2, address=9782, array_size=0) Address difference: 512 mc1.controlScheme.estimZSMT.hpfBeta.output: in dict1: VariableInfo(name='mc1.controlScheme.estimZSMT.hpfBeta.output', type='int', byte_size='2', address=10080, array_size=0) in dict2: VariableInfo(name='mc1.controlScheme.estimZSMT.hpfBeta.output', type='int', byte_size=2, address=9952, array_size=0) Address difference: 128 mc1.load.ipd.startVoltageVector: in dict1: VariableInfo(name='mc1.load.ipd.startVoltageVector', type='int', byte_size='2', address=10442, array_size=0) in dict2: VariableInfo(name='mc1.load.ipd.startVoltageVector', type='int', byte_size=2, address=9674, array_size=0) Address difference: 768
Here the correct addresses value are in dict 1
the code i am using to extract address :
"DW_AT_location" in die_variable.attributes: data = list(die_variable.attributes["DW_AT_location"].value)[1:] self.address = int.from_bytes(bytes(data), byteorder="little") & 0XFFFF
I assume the wrong address or the offset is somehow related to this.
I am attaching the elf file for your support.
ELF16Compiled.elf.zip