8 #include <boost/regex.hpp>
12 #include "includes/elf_file.h"
13 #include "includes/log.h"
14 #include "includes/util.h"
15 #include "includes/subprocess.h"
17 using namespace linuxdeploy::core::log;
19 namespace bf = boost::filesystem;
21 namespace linuxdeploy {
24 class ElfFile::PrivateData {
27 uint8_t elfClass = ELFCLASSNONE;
29 bool isDebugSymbolsFile = false;
30 bool isDynamicallyLinked = false;
33 explicit PrivateData(bf::path path) : path(std::move(path)) {}
36 static std::string getPatchelfPath() {
37 // by default, try to use a patchelf next to the makeappimage binary
38 // if that isn't available, fall back to searching for patchelf in the PATH
39 std::string patchelfPath;
41 const auto envPatchelf = getenv("PATCHELF");
43 // allows users to use a custom patchelf instead of the bundled one
44 if (envPatchelf != nullptr) {
45 ldLog() << LD_DEBUG << "Using patchelf specified in $PATCHELF:" << envPatchelf << std::endl;
46 patchelfPath = envPatchelf;
48 auto binDirPath = bf::path(util::getOwnExecutablePath());
49 auto localPatchelfPath = binDirPath.parent_path() / "patchelf";
51 if (bf::exists(localPatchelfPath)) {
52 patchelfPath = localPatchelfPath.string();
54 for (const bf::path directory : util::split(getenv("PATH"), ':')) {
55 if (!bf::is_directory(directory))
58 auto path = directory / "patchelf";
60 if (bf::is_regular_file(path)) {
61 patchelfPath = path.string();
68 if (!bf::is_regular_file(patchelfPath)) {
69 ldLog() << LD_ERROR << "Could not find patchelf: no such file:" << patchelfPath << std::endl;
70 throw std::runtime_error("Could not find patchelf");
73 ldLog() << LD_DEBUG << "Using patchelf:" << patchelfPath << std::endl;
78 template<typename Ehdr_T, typename Shdr_T, typename Phdr_T>
79 void parseElfHeader(std::shared_ptr<uint8_t> data) {
80 // TODO: the following code will _only_ work if the native byte order equals the program's
81 // this should not be a big problem as we don't offer ARM builds yet, and require the user to
82 // use a matching binary for the target binaries
84 auto* ehdr = reinterpret_cast<Ehdr_T*>(data.get());
86 elfABI = ehdr->e_ident[EI_OSABI];
88 std::vector<Shdr_T> sections;
90 // parse section header table
91 // first, we collect all entries in a vector so we can conveniently iterate over it
92 for (uint64_t i = 0; i < ehdr->e_shnum; ++i) {
93 auto* nextShdr = reinterpret_cast<Shdr_T*>(data.get() + ehdr->e_shoff + i * sizeof(Shdr_T));
94 sections.emplace_back(*nextShdr);
97 auto getString = [data, §ions, ehdr](uint64_t offset) {
98 assert(ehdr->e_shstrndx != SHN_UNDEF);
99 const auto& stringTableSection = sections[ehdr->e_shstrndx];
100 return std::string{reinterpret_cast<char*>(data.get() + stringTableSection.sh_offset + offset)};
103 // now that we can look up texts, we can create a map to easily access the sections by name
104 std::unordered_map<std::string, Shdr_T> sectionsMap;
105 std::for_each(sections.begin(), sections.end(), [§ionsMap, &getString](const Shdr_T& shdr) {
106 const auto headerName = getString(shdr.sh_name);
107 sectionsMap.insert(std::make_pair(headerName, shdr));
110 // this function is based on observations of the behavior of:
111 // - strip --only-keep-debug
112 // - objcopy --only-keep-debug
113 isDebugSymbolsFile = (sectionsMap[".text"].sh_type == SHT_NOBITS);
115 // https://stackoverflow.com/a/7298931
116 for (uint64_t i = 0; i < ehdr->e_phnum && !isDynamicallyLinked; ++i) {
117 auto* nextPhdr = reinterpret_cast<Phdr_T*>(data.get() + ehdr->e_phoff + i * sizeof(Phdr_T));
118 switch (nextPhdr->p_type) {
121 isDynamicallyLinked = true;
128 void readDataUsingElfAPI() {
129 int fd = open(path.c_str(), O_RDONLY);
130 auto map_size = static_cast<size_t>(lseek(fd, 0, SEEK_END));
132 std::shared_ptr<uint8_t> data(
133 static_cast<uint8_t*>(mmap(nullptr, map_size, PROT_READ, MAP_SHARED, fd, 0)),
134 [map_size](uint8_t* p) {
135 if (munmap(static_cast<void*>(p), map_size) != 0) {
137 throw ElfFileParseError(std::string("Failed to call munmap(): ") + strerror(error));
144 // check which ELF "class" (32-bit or 64-bit) to use
145 // the "class" is available at a specific constant offset in the section e_ident, which
146 // happens to be the first section, so just reading one byte at EI_CLASS yields the data we're
148 elfClass = data.get()[EI_CLASS];
152 parseElfHeader<Elf32_Ehdr, Elf32_Shdr, Elf32_Phdr>(data);
155 parseElfHeader<Elf64_Ehdr, Elf64_Shdr, Elf64_Phdr>(data);
158 throw ElfFileParseError("Unknown ELF class: " + std::to_string(elfClass));
163 ElfFile::ElfFile(const boost::filesystem::path& path) {
164 // check if file exists
165 if (!bf::exists(path))
166 throw ElfFileParseError("No such file or directory: " + path.string());
169 std::ifstream ifs(path.string());
171 throw ElfFileParseError("Could not open file: " + path.string());
173 std::vector<char> magicBytes(4);
174 ifs.read(magicBytes.data(), 4);
176 if (strncmp(magicBytes.data(), "\177ELF", 4) != 0)
177 throw ElfFileParseError("Invalid magic bytes in file header");
179 d = new PrivateData(path);
180 d->readDataUsingElfAPI();
183 ElfFile::~ElfFile() {
187 std::vector<bf::path> ElfFile::traceDynamicDependencies() {
188 // this method's purpose is to abstract this process
189 // the caller doesn't care _how_ it's done, after all
191 // for now, we use the same ldd based method linuxdeployqt uses
193 std::vector<bf::path> paths;
195 subprocess::subprocess_env_map_t env;
198 // workaround for https://sourceware.org/bugzilla/show_bug.cgi?id=25263
199 // when you pass an absolute path to ldd, it can find libraries referenced in the rpath properly
200 // this bug was first found when trying to find a library next to the binary which contained $ORIGIN
201 // note that this is just a bug in ldd, the linker has always worked as intended
202 const auto resolvedPath = bf::canonical(d->path);
204 subprocess::subprocess lddProc({"ldd", resolvedPath.string()}, env);
206 const auto result = lddProc.run();
208 if (result.exit_code() != 0) {
209 if (result.stdout_string().find("not a dynamic executable") != std::string::npos || result.stderr_string().find("not a dynamic executable") != std::string::npos) {
210 ldLog() << LD_WARNING << this->d->path << "is not linked dynamically" << std::endl;
214 throw std::runtime_error{"Failed to run ldd: exited with code " + std::to_string(result.exit_code())};
217 const boost::regex expr(R"(\s*(.+)\s+\=>\s+(.+)\s+\((.+)\)\s*)");
220 for (const auto& line : util::splitLines(result.stdout_string())) {
221 if (boost::regex_search(line, what, expr)) {
222 auto libraryPath = what[2].str();
223 util::trim(libraryPath);
224 paths.push_back(bf::absolute(libraryPath));
226 if (util::stringContains(line, "=> not found")) {
227 auto missingLib = line;
228 static const std::string pattern = "=> not found";
229 missingLib.erase(missingLib.find(pattern), pattern.size());
230 util::trim(missingLib);
231 util::trim(missingLib, '\t');
232 throw DependencyNotFoundError("Could not find dependency: " + missingLib);
234 ldLog() << LD_DEBUG << "Invalid ldd output: " << line << std::endl;
242 std::string ElfFile::getRPath() {
243 // don't try to fetch patchelf path in a catchall to make sure the process exists when the tool cannot be found
244 const auto patchelfPath = PrivateData::getPatchelfPath();
247 subprocess::subprocess patchelfProc({patchelfPath, "--print-rpath", d->path.string()});
249 const auto result = patchelfProc.run();
251 if (result.exit_code() != 0) {
252 // if file is not an ELF executable, there is no need for a detailed error message
253 if (result.exit_code() == 1 && result.stderr_string().find("not an ELF executable") != std::string::npos) {
256 ldLog() << LD_ERROR << "Call to patchelf failed:" << std::endl << result.stderr_string();
261 auto stdoutContents = result.stdout_string();
263 util::trim(stdoutContents, '\n');
264 util::trim(stdoutContents);
266 return stdoutContents;
267 } catch (const std::exception&) {
272 bool ElfFile::setRPath(const std::string& value) {
273 // don't try to fetch patchelf path in a catchall to make sure the process exists when the tool cannot be found
274 const auto patchelfPath = PrivateData::getPatchelfPath();
277 subprocess::subprocess patchelfProc({patchelfPath.c_str(), "--set-rpath", value.c_str(), d->path.c_str()});
279 const auto result = patchelfProc.run();
281 if (result.exit_code() != 0) {
282 ldLog() << LD_ERROR << "Call to patchelf failed:" << std::endl << result.stderr_string() << std::endl;
285 } catch (const std::exception&) {
292 uint8_t ElfFile::getSystemElfABI() {
293 // the only way to get the system's ELF ABI is to read the own executable using the ELF header,
294 // and get the ELFOSABI flag
295 auto self = std::shared_ptr<char>(realpath("/proc/self/exe", nullptr), [](char* p) { free(p); });
298 throw ElfFileParseError("Could not read /proc/self/exe");
300 std::ifstream ifs(self.get());
303 throw ElfFileParseError("Could not open file: " + std::string(self.get()));
305 // the "class" is available at a specific constant offset in the section e_ident, which
306 // happens to be the first section, so just reading one byte at EI_CLASS yields the data we're
313 return static_cast<uint8_t>(buf);
316 uint8_t ElfFile::getSystemElfClass() {
317 #if __SIZEOF_POINTER__ == 4
319 #elif __SIZEOF_POINTER__ == 8
322 #error "Invalid address size"
326 uint8_t ElfFile::getSystemElfEndianness() {
327 #if __BYTE_ORDER == __LITTLE_ENDIAN
329 #elif __BYTE_ORDER == __BIG_ENDIAN
332 #error "Unknown machine endianness"
336 uint8_t ElfFile::getElfClass() {
340 uint8_t ElfFile::getElfABI() {
344 bool ElfFile::isDebugSymbolsFile() {
345 return d->isDebugSymbolsFile;
348 bool ElfFile::isDynamicallyLinked() {
349 return d->isDynamicallyLinked;