diff --git a/src/ElfReader.cc b/src/ElfReader.cc index 3a90c5b3ef4..a13a4e90013 100644 --- a/src/ElfReader.cc +++ b/src/ElfReader.cc @@ -28,7 +28,7 @@ class ElfReaderImplBase { virtual Debugaltlink read_debugaltlink() = 0; virtual string read_buildid() = 0; virtual string read_interp() = 0; - virtual bool addr_to_offset(uintptr_t addr, uintptr_t& offset) = 0; + virtual bool addr_to_phdr(uintptr_t addr, PhdrInfo& phdr) = 0; virtual SectionOffsets find_section_file_offsets(const char* name) = 0; virtual const vector* decompress_section(SectionOffsets offsets) = 0; bool ok() { return ok_; } @@ -49,7 +49,7 @@ template class ElfReaderImpl : public ElfReaderImplBase { virtual Debugaltlink read_debugaltlink() override; virtual string read_buildid() override; virtual string read_interp() override; - virtual bool addr_to_offset(uintptr_t addr, uintptr_t& offset) override; + virtual bool addr_to_phdr(uintptr_t addr, PhdrInfo& phdr) override; virtual SectionOffsets find_section_file_offsets(const char* name) override; virtual const vector* decompress_section(SectionOffsets offsets) override; @@ -518,20 +518,19 @@ string ElfReaderImpl::read_interp() { } template -bool ElfReaderImpl::addr_to_offset(uintptr_t addr, uintptr_t& offset) { - for (size_t i = 0; i < sections_size; ++i) { - const auto& section = sections[i]; - // Skip the section if it either "occupies no space in the file" or - // doesn't have a valid address because it does not "occupy memory - // during process execution". - if (section.sh_type == SHT_NOBITS || !(section.sh_flags & SHF_ALLOC)) { - continue; - } - if (addr >= section.sh_addr && addr - section.sh_addr < section.sh_size) { - offset = addr - section.sh_addr + section.sh_offset; +bool ElfReaderImpl::addr_to_phdr(uintptr_t addr, PhdrInfo& phdr) { + for (size_t i = 0; i < programheader_size; ++i) { + auto& p = programheader[i]; + if (p.p_type == PT_LOAD && addr >= p.p_vaddr && + addr - p.p_vaddr < p.p_memsz) { + phdr.vaddr = p.p_vaddr; + phdr.offset = p.p_offset; + phdr.filesz = p.p_filesz; + phdr.flags = p.p_flags; return true; } } + return false; } @@ -573,8 +572,8 @@ DwarfSpan ElfReader::dwarf_section(const char* name, bool known_to_be_compressed string ElfReader::read_buildid() { return impl().read_buildid(); } string ElfReader::read_interp() { return impl().read_interp(); } -bool ElfReader::addr_to_offset(uintptr_t addr, uintptr_t& offset) { - return impl().addr_to_offset(addr, offset); +bool ElfReader::addr_to_phdr(uintptr_t addr, PhdrInfo& phdr) { + return impl().addr_to_phdr(addr, phdr); } bool ElfReader::ok() { return impl().ok(); } diff --git a/src/ElfReader.h b/src/ElfReader.h index 9e28816f777..ebd682b1463 100644 --- a/src/ElfReader.h +++ b/src/ElfReader.h @@ -74,6 +74,13 @@ struct SectionOffsets { bool compressed; }; +struct PhdrInfo { + uint64_t vaddr; + uint64_t offset; + uint64_t filesz; + uint64_t flags; +}; + class ElfReader { public: ElfReader(SupportedArch arch); @@ -102,11 +109,11 @@ class ElfReader { Debugaltlink read_debugaltlink(); std::string read_buildid(); std::string read_interp(); - // Returns true and sets file |offset| if ELF address |addr| is mapped from - // a section in the ELF file. Returns false if no section maps to - // |addr|. |addr| is an address indicated by the ELF file, not its - // relocated address in memory. - bool addr_to_offset(uintptr_t addr, uintptr_t& offset); + // Returns true if ELF address |addr| is mapped from a program header in the + // ELF file, and sets the fields of |phdr| to the program header fields. + // Returns false if no program header maps to |addr|. |addr| is an address + // indicated by the ELF file, not its relocated address in memory. + bool addr_to_phdr(uintptr_t addr, PhdrInfo& phdr); SectionOffsets find_section_file_offsets(const char* name); DwarfSpan dwarf_section(const char* name, bool known_to_be_compressed = false); SupportedArch arch() const { return arch_; } diff --git a/src/Monkeypatcher.cc b/src/Monkeypatcher.cc index 21a6709eb1e..9bd920de425 100644 --- a/src/Monkeypatcher.cc +++ b/src/Monkeypatcher.cc @@ -2,6 +2,7 @@ #include "Monkeypatcher.h" +#include #include #include @@ -1423,9 +1424,8 @@ void patch_after_exec_arch(RecordTask* t, Monkeypatcher& patcher) { for (const auto& m : t->vm()->maps()) { auto& km = m.map; - patcher.patch_after_mmap(t, km.start(), km.size(), - km.file_offset_bytes(), -1, - Monkeypatcher::MMAP_EXEC); + patcher.patch_after_mmap(t, km.start(), km.size(), km.file_offset_bytes(), + km.prot(), -1, Monkeypatcher::MMAP_EXEC); } if (!t->vm()->has_vdso()) { @@ -1443,9 +1443,8 @@ void patch_after_exec_arch(RecordTask* t, Monkeypatcher& patcher) { for (const auto& m : t->vm()->maps()) { auto& km = m.map; - patcher.patch_after_mmap(t, km.start(), km.size(), - km.file_offset_bytes(), -1, - Monkeypatcher::MMAP_EXEC); + patcher.patch_after_mmap(t, km.start(), km.size(), km.file_offset_bytes(), + km.prot(), -1, Monkeypatcher::MMAP_EXEC); } if (!t->vm()->has_vdso()) { @@ -1498,27 +1497,46 @@ void Monkeypatcher::patch_at_preload_init(RecordTask* t) { static remote_ptr resolve_address(ElfReader& reader, uintptr_t elf_addr, remote_ptr map_start, - size_t map_size, - uintptr_t map_offset) { - uintptr_t file_offset; - if (!reader.addr_to_offset(elf_addr, file_offset)) { + size_t map_size, uintptr_t map_offset, + uint64_t prot) { + PhdrInfo phdr; + if (!reader.addr_to_phdr(elf_addr, phdr)) { LOG(warn) << "ELF address " << HEX(elf_addr) << " not in file"; } - if (file_offset < map_offset || file_offset + 32 > map_offset + map_size) { - // The value(s) to be set are outside the mapped range. This happens - // because code and data can be mapped in separate, partial mmaps in which - // case some symbols will be outside the mapped range. + if (phdr.offset < map_offset || + phdr.offset + phdr.filesz > map_offset + map_size || + bool(phdr.flags & PF_R) != bool(prot & PROT_READ) || + bool(phdr.flags & PF_W) != bool(prot & PROT_WRITE) || + bool(phdr.flags & PF_X) != bool(prot & PROT_EXEC)) { + // The value(s) to be set are outside the mapped range or belong to another + // mapping as indicated by the flags. This happens because code and data can + // be mapped in separate, partial mmaps in which case some symbols will be + // outside the mapped range. return nullptr; } - return map_start + uintptr_t(file_offset - map_offset); + + // This is the distance from the file offset of the mapping to p_offset, which + // is the same as the distance from map_start to p_vaddr + load bias. + auto page_start_offset = phdr.offset - map_offset; + + // Adjust map_start so that it points to p_vaddr + load bias. + map_start += page_start_offset; + + // Now subtract p_vaddr to make it point to virtual address 0 (the load bias). + map_start -= phdr.vaddr; + + // Finally, add the address from the symbol table to adjust it by the load + // bias. + return map_start + elf_addr; } static void set_and_record_bytes(RecordTask* t, ElfReader& reader, uintptr_t elf_addr, const void* bytes, size_t size, remote_ptr map_start, - size_t map_size, size_t map_offset) { + size_t map_size, size_t map_offset, + uint64_t prot) { remote_ptr addr = - resolve_address(reader, elf_addr, map_start, map_size, map_offset); + resolve_address(reader, elf_addr, map_start, map_size, map_offset, prot); if (!addr) { return; } @@ -1539,13 +1557,13 @@ static void set_and_record_bytes(RecordTask* t, ElfReader& reader, void Monkeypatcher::patch_dl_runtime_resolve(RecordTask* t, ElfReader& reader, uintptr_t elf_addr, remote_ptr map_start, - size_t map_size, - size_t map_offset) { + size_t map_size, size_t map_offset, + uint64_t prot) { if (t->arch() != x86_64) { return; } remote_ptr addr = - resolve_address(reader, elf_addr, map_start, map_size, map_offset); + resolve_address(reader, elf_addr, map_start, map_size, map_offset, prot); if (!addr) { return; } @@ -1641,14 +1659,13 @@ static bool is_aarch64_ldrb(uint32_t instruction, uint32_t* offset) { * Patch the __aarch64_have_lse_atomics variable to ensure that LSE atomics are * always used even if init_lse_atomics */ -void Monkeypatcher::patch_aarch64_have_lse_atomics(RecordTask* t, ElfReader& reader, - uintptr_t ldadd4_addr, - remote_ptr map_start, - size_t map_size, - size_t map_offset) { +void Monkeypatcher::patch_aarch64_have_lse_atomics( + RecordTask* t, ElfReader& reader, uintptr_t ldadd4_addr, + remote_ptr map_start, size_t map_size, size_t map_offset, + uint64_t prot) { ASSERT(t, t->arch() == aarch64); remote_ptr addr = - resolve_address(reader, ldadd4_addr, map_start, map_size, map_offset); + resolve_address(reader, ldadd4_addr, map_start, map_size, map_offset, prot); if (!addr) { return; } @@ -1694,7 +1711,7 @@ static bool file_may_need_instrumentation(const AddressSpace::Mapping& map) { } void Monkeypatcher::patch_after_mmap(RecordTask* t, remote_ptr start, - size_t size, size_t offset_bytes, + size_t size, size_t offset_bytes, uint64_t prot, int child_fd, MmapMode mode) { const auto& map = t->vm()->mapping_of(start); if (!file_may_need_instrumentation(map)) { @@ -1747,7 +1764,7 @@ void Monkeypatcher::patch_after_mmap(RecordTask* t, remote_ptr start, // pthread rwlocks don't try to use elision at all. See ELIDE_LOCK // in glibc's elide.h. set_and_record_bytes(t, reader, syms.addr(i) + 8, &zero, sizeof(zero), - start, size, offset_bytes); + start, size, offset_bytes, prot); } if (syms.is_name(i, "elision_init")) { // Make elision_init return without doing anything. This means @@ -1756,7 +1773,7 @@ void Monkeypatcher::patch_after_mmap(RecordTask* t, remote_ptr start, // elision-conf.c. static const uint8_t ret = 0xC3; set_and_record_bytes(t, reader, syms.addr(i), &ret, sizeof(ret), start, - size, offset_bytes); + size, offset_bytes, prot); } // The following operations can only be applied once because after the // patch is applied the code no longer matches the expected template. @@ -1768,7 +1785,7 @@ void Monkeypatcher::patch_after_mmap(RecordTask* t, remote_ptr start, syms.is_name(i, "_dl_runtime_resolve_xsave") || syms.is_name(i, "_dl_runtime_resolve_xsavec"))) { patch_dl_runtime_resolve(t, reader, syms.addr(i), start, size, - offset_bytes); + offset_bytes, prot); } } break; @@ -1776,7 +1793,7 @@ void Monkeypatcher::patch_after_mmap(RecordTask* t, remote_ptr start, for (size_t i = 0; i < syms.size(); ++i) { if (syms.is_name(i, "__aarch64_ldadd4_relax")) { patch_aarch64_have_lse_atomics(t, reader, syms.addr(i), start, size, - offset_bytes); + offset_bytes, prot); } } break; diff --git a/src/Monkeypatcher.h b/src/Monkeypatcher.h index 1a98a3ceea2..803b2209110 100644 --- a/src/Monkeypatcher.h +++ b/src/Monkeypatcher.h @@ -118,7 +118,8 @@ class Monkeypatcher { * patch libpthread.so. */ void patch_after_mmap(RecordTask* t, remote_ptr start, size_t size, - size_t offset_bytes, int child_fd, MmapMode mode); + size_t offset_bytes, uint64_t prot, int child_fd, + MmapMode mode); /** * The list of pages we've allocated to hold our extended jumps. @@ -153,15 +154,14 @@ class Monkeypatcher { private: void patch_dl_runtime_resolve(RecordTask* t, ElfReader& reader, - uintptr_t elf_addr, - remote_ptr map_start, - size_t map_size, - size_t map_offset); + uintptr_t elf_addr, remote_ptr map_start, + size_t map_size, size_t map_offset, + uint64_t prot); void patch_aarch64_have_lse_atomics(RecordTask* t, ElfReader& reader, uintptr_t elf_addr, remote_ptr map_start, - size_t map_size, - size_t map_offset); + size_t map_size, size_t map_offset, + uint64_t prot); /** * `ip` is the address of the instruction that triggered the syscall or trap diff --git a/src/record_syscall.cc b/src/record_syscall.cc index bfa4b6f67a8..3000b53a358 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -6394,7 +6394,7 @@ static void process_mmap(RecordTask* t, size_t length, int prot, int flags, // at an assertion, in the worst case, we'd end up modifying the underlying // file. if (!(flags & MAP_SHARED)) { - t->vm()->monkeypatcher().patch_after_mmap(t, addr, size, offset, fd, + t->vm()->monkeypatcher().patch_after_mmap(t, addr, size, offset, prot, fd, Monkeypatcher::MMAP_SYSCALL); }