|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108 |
- """
- ELF file parser.
-
- This provides a class ``ELFFile`` that parses an ELF executable in a similar
- interface to ``ZipFile``. Only the read interface is implemented.
-
- Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca
- ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
- """
-
- import enum
- import os
- import struct
- from typing import IO, Optional, Tuple
-
-
- class ELFInvalid(ValueError):
- pass
-
-
- class EIClass(enum.IntEnum):
- C32 = 1
- C64 = 2
-
-
- class EIData(enum.IntEnum):
- Lsb = 1
- Msb = 2
-
-
- class EMachine(enum.IntEnum):
- I386 = 3
- S390 = 22
- Arm = 40
- X8664 = 62
- AArc64 = 183
-
-
- class ELFFile:
- """
- Representation of an ELF executable.
- """
-
- def __init__(self, f: IO[bytes]) -> None:
- self._f = f
-
- try:
- ident = self._read("16B")
- except struct.error:
- raise ELFInvalid("unable to parse identification")
- magic = bytes(ident[:4])
- if magic != b"\x7fELF":
- raise ELFInvalid(f"invalid magic: {magic!r}")
-
- self.capacity = ident[4] # Format for program header (bitness).
- self.encoding = ident[5] # Data structure encoding (endianness).
-
- try:
- # e_fmt: Format for program header.
- # p_fmt: Format for section header.
- # p_idx: Indexes to find p_type, p_offset, and p_filesz.
- e_fmt, self._p_fmt, self._p_idx = {
- (1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)), # 32-bit LSB.
- (1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB.
- (2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)), # 64-bit LSB.
- (2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB.
- }[(self.capacity, self.encoding)]
- except KeyError:
- raise ELFInvalid(
- f"unrecognized capacity ({self.capacity}) or "
- f"encoding ({self.encoding})"
- )
-
- try:
- (
- _,
- self.machine, # Architecture type.
- _,
- _,
- self._e_phoff, # Offset of program header.
- _,
- self.flags, # Processor-specific flags.
- _,
- self._e_phentsize, # Size of section.
- self._e_phnum, # Number of sections.
- ) = self._read(e_fmt)
- except struct.error as e:
- raise ELFInvalid("unable to parse machine and section information") from e
-
- def _read(self, fmt: str) -> Tuple[int, ...]:
- return struct.unpack(fmt, self._f.read(struct.calcsize(fmt)))
-
- @property
- def interpreter(self) -> Optional[str]:
- """
- The path recorded in the ``PT_INTERP`` section header.
- """
- for index in range(self._e_phnum):
- self._f.seek(self._e_phoff + self._e_phentsize * index)
- try:
- data = self._read(self._p_fmt)
- except struct.error:
- continue
- if data[self._p_idx[0]] != 3: # Not PT_INTERP.
- continue
- self._f.seek(data[self._p_idx[1]])
- return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0")
- return None
|