additional improvements for freebsd mostly
[goodguy/cinelerra.git] / cinelerra-5.1 / tools / makeappimagetool / elf_file.cpp
1 // system includes
2 #include <fstream>
3 #include <memory>
4 #include <utility>
5 #include <fcntl.h>
6
7 // library includes
8 #include <boost/regex.hpp>
9 #include <sys/mman.h>
10
11 // local includes
12 #include "includes/elf_file.h"
13 #include "includes/log.h"
14 #include "includes/util.h"
15 #include "includes/subprocess.h"
16
17 using namespace linuxdeploy::core::log;
18
19 namespace bf = boost::filesystem;
20
21 namespace linuxdeploy {
22     namespace core {
23         namespace elf_file {
24             class ElfFile::PrivateData {
25                 public:
26                     const bf::path path;
27                     uint8_t elfClass = ELFCLASSNONE;
28                     uint8_t elfABI = 0;
29                     bool isDebugSymbolsFile = false;
30                     bool isDynamicallyLinked = false;
31
32                 public:
33                     explicit PrivateData(bf::path path) : path(std::move(path)) {}
34
35                 public:
36                     static std::string getPatchelfPath() {
37                         // by default, try to use a patchelf next to the makeappimage binary
38                         // if that isn't available, fall back to searching for patchelf in the PATH
39                         std::string patchelfPath;
40
41                         const auto envPatchelf = getenv("PATCHELF");
42
43                         // allows users to use a custom patchelf instead of the bundled one
44                         if (envPatchelf != nullptr) {
45                             ldLog() << LD_DEBUG << "Using patchelf specified in $PATCHELF:" << envPatchelf << std::endl;
46                             patchelfPath = envPatchelf;
47                         } else {
48                             auto binDirPath = bf::path(util::getOwnExecutablePath());
49                             auto localPatchelfPath = binDirPath.parent_path() / "patchelf";
50
51                             if (bf::exists(localPatchelfPath)) {
52                                 patchelfPath = localPatchelfPath.string();
53                             } else {
54                                 for (const bf::path directory : util::split(getenv("PATH"), ':')) {
55                                     if (!bf::is_directory(directory))
56                                         continue;
57
58                                     auto path = directory / "patchelf";
59
60                                     if (bf::is_regular_file(path)) {
61                                         patchelfPath = path.string();
62                                         break;
63                                     }
64                                 }
65                             }
66                         }
67
68                         if (!bf::is_regular_file(patchelfPath)) {
69                             ldLog() << LD_ERROR << "Could not find patchelf: no such file:" << patchelfPath << std::endl;
70                             throw std::runtime_error("Could not find patchelf");
71                         }
72
73                         ldLog() << LD_DEBUG << "Using patchelf:" << patchelfPath << std::endl;
74                         return patchelfPath;
75                     }
76
77                 private:
78                     template<typename Ehdr_T, typename Shdr_T, typename Phdr_T>
79                     void parseElfHeader(std::shared_ptr<uint8_t> data) {
80                         // TODO: the following code will _only_ work if the native byte order equals the program's
81                         // this should not be a big problem as we don't offer ARM builds yet, and require the user to
82                         // use a matching binary for the target binaries
83
84                         auto* ehdr = reinterpret_cast<Ehdr_T*>(data.get());
85
86                         elfABI = ehdr->e_ident[EI_OSABI];
87
88                         std::vector<Shdr_T> sections;
89
90                         // parse section header table
91                         // first, we collect all entries in a vector so we can conveniently iterate over it
92                         for (uint64_t i = 0; i < ehdr->e_shnum; ++i) {
93                             auto* nextShdr = reinterpret_cast<Shdr_T*>(data.get() + ehdr->e_shoff + i * sizeof(Shdr_T));
94                             sections.emplace_back(*nextShdr);
95                         }
96
97                         auto getString = [data, &sections, ehdr](uint64_t offset) {
98                             assert(ehdr->e_shstrndx != SHN_UNDEF);
99                             const auto& stringTableSection = sections[ehdr->e_shstrndx];
100                             return std::string{reinterpret_cast<char*>(data.get() + stringTableSection.sh_offset + offset)};
101                         };
102
103                         // now that we can look up texts, we can create a map to easily access the sections by name
104                         std::unordered_map<std::string, Shdr_T> sectionsMap;
105                         std::for_each(sections.begin(), sections.end(), [&sectionsMap, &getString](const Shdr_T& shdr) {
106                             const auto headerName = getString(shdr.sh_name);
107                             sectionsMap.insert(std::make_pair(headerName, shdr));
108                         });
109
110                         // this function is based on observations of the behavior of:
111                         // - strip --only-keep-debug
112                         // - objcopy --only-keep-debug
113                         isDebugSymbolsFile = (sectionsMap[".text"].sh_type == SHT_NOBITS);
114
115                         // https://stackoverflow.com/a/7298931
116                         for (uint64_t i = 0; i < ehdr->e_phnum && !isDynamicallyLinked; ++i) {
117                             auto* nextPhdr = reinterpret_cast<Phdr_T*>(data.get() + ehdr->e_phoff + i * sizeof(Phdr_T));
118                             switch (nextPhdr->p_type) {
119                                 case PT_DYNAMIC:
120                                 case PT_INTERP:
121                                     isDynamicallyLinked = true;
122                                     break;
123                             }
124                         }
125                     }
126
127                 public:
128                     void readDataUsingElfAPI() {
129                         int fd = open(path.c_str(), O_RDONLY);
130                         auto map_size = static_cast<size_t>(lseek(fd, 0, SEEK_END));
131
132                         std::shared_ptr<uint8_t> data(
133                             static_cast<uint8_t*>(mmap(nullptr, map_size, PROT_READ, MAP_SHARED, fd, 0)),
134                             [map_size](uint8_t* p) {
135                                 if (munmap(static_cast<void*>(p), map_size) != 0) {
136                                     int error = errno;
137                                     throw ElfFileParseError(std::string("Failed to call munmap(): ") + strerror(error));
138                                 }
139                                 p = nullptr;
140                             }
141                         );
142                         close(fd);
143
144                         // check which ELF "class" (32-bit or 64-bit) to use
145                         // the "class" is available at a specific constant offset in the section e_ident, which
146                         // happens to be the first section, so just reading one byte at EI_CLASS yields the data we're
147                         // looking for
148                         elfClass = data.get()[EI_CLASS];
149
150                         switch (elfClass) {
151                             case ELFCLASS32:
152                                 parseElfHeader<Elf32_Ehdr, Elf32_Shdr, Elf32_Phdr>(data);
153                                 break;
154                             case ELFCLASS64:
155                                 parseElfHeader<Elf64_Ehdr, Elf64_Shdr, Elf64_Phdr>(data);
156                                 break;
157                             default:
158                                 throw ElfFileParseError("Unknown ELF class: " + std::to_string(elfClass));
159                         }
160                     }
161             };
162
163             ElfFile::ElfFile(const boost::filesystem::path& path) {
164                 // check if file exists
165                 if (!bf::exists(path))
166                     throw ElfFileParseError("No such file or directory: " + path.string());
167
168                 // check magic bytes
169                 std::ifstream ifs(path.string());
170                 if (!ifs)
171                     throw ElfFileParseError("Could not open file: " + path.string());
172
173                 std::vector<char> magicBytes(4);
174                 ifs.read(magicBytes.data(), 4);
175
176                 if (strncmp(magicBytes.data(), "\177ELF", 4) != 0)
177                     throw ElfFileParseError("Invalid magic bytes in file header");
178
179                 d = new PrivateData(path);
180                 d->readDataUsingElfAPI();
181             }
182
183             ElfFile::~ElfFile() {
184                 delete d;
185             }
186
187             std::vector<bf::path> ElfFile::traceDynamicDependencies() {
188                 // this method's purpose is to abstract this process
189                 // the caller doesn't care _how_ it's done, after all
190
191                 // for now, we use the same ldd based method linuxdeployqt uses
192
193                 std::vector<bf::path> paths;
194
195                 subprocess::subprocess_env_map_t env;
196                 env["LC_ALL"] = "C";
197
198                 // workaround for https://sourceware.org/bugzilla/show_bug.cgi?id=25263
199                 // when you pass an absolute path to ldd, it can find libraries referenced in the rpath properly
200                 // this bug was first found when trying to find a library next to the binary which contained $ORIGIN
201                 // note that this is just a bug in ldd, the linker has always worked as intended
202                 const auto resolvedPath = bf::canonical(d->path);
203
204                 subprocess::subprocess lddProc({"ldd", resolvedPath.string()}, env);
205
206                 const auto result = lddProc.run();
207
208                 if (result.exit_code() != 0) {
209                     if (result.stdout_string().find("not a dynamic executable") != std::string::npos || result.stderr_string().find("not a dynamic executable") != std::string::npos) {
210                         ldLog() << LD_WARNING << this->d->path << "is not linked dynamically" << std::endl;
211                         return {};
212                     }
213
214                     throw std::runtime_error{"Failed to run ldd: exited with code " + std::to_string(result.exit_code())};
215                 }
216
217                 const boost::regex expr(R"(\s*(.+)\s+\=>\s+(.+)\s+\((.+)\)\s*)");
218                 boost::smatch what;
219
220                 for (const auto& line : util::splitLines(result.stdout_string())) {
221                     if (boost::regex_search(line, what, expr)) {
222                         auto libraryPath = what[2].str();
223                         util::trim(libraryPath);
224                         paths.push_back(bf::absolute(libraryPath));
225                     } else {
226                         if (util::stringContains(line, "=> not found")) {
227                             auto missingLib = line;
228                             static const std::string pattern = "=> not found";
229                             missingLib.erase(missingLib.find(pattern), pattern.size());
230                             util::trim(missingLib);
231                             util::trim(missingLib, '\t');
232                             throw DependencyNotFoundError("Could not find dependency: " + missingLib);
233                         } else {
234                             ldLog() << LD_DEBUG << "Invalid ldd output: " << line << std::endl;
235                         }
236                     }
237                 }
238
239                 return paths;
240             }
241
242             std::string ElfFile::getRPath() {
243                 // don't try to fetch patchelf path in a catchall to make sure the process exists when the tool cannot be found
244                 const auto patchelfPath = PrivateData::getPatchelfPath();
245
246                 try {
247                     subprocess::subprocess patchelfProc({patchelfPath, "--print-rpath", d->path.string()});
248
249                     const auto result = patchelfProc.run();
250
251                     if (result.exit_code() != 0) {
252                         // if file is not an ELF executable, there is no need for a detailed error message
253                         if (result.exit_code() == 1 && result.stderr_string().find("not an ELF executable") != std::string::npos) {
254                             return "";
255                         } else {
256                             ldLog() << LD_ERROR << "Call to patchelf failed:" << std::endl << result.stderr_string();
257                             return "";
258                         }
259                     }
260
261                     auto stdoutContents = result.stdout_string();
262
263                     util::trim(stdoutContents, '\n');
264                     util::trim(stdoutContents);
265
266                     return stdoutContents;
267                 } catch (const std::exception&) {
268                     return "";
269                 }
270             }
271
272             bool ElfFile::setRPath(const std::string& value) {
273                 // don't try to fetch patchelf path in a catchall to make sure the process exists when the tool cannot be found
274                 const auto patchelfPath = PrivateData::getPatchelfPath();
275
276                 try {
277                     subprocess::subprocess patchelfProc({patchelfPath.c_str(), "--set-rpath", value.c_str(), d->path.c_str()});
278
279                     const auto result = patchelfProc.run();
280
281                     if (result.exit_code() != 0) {
282                         ldLog() << LD_ERROR << "Call to patchelf failed:" << std::endl << result.stderr_string() << std::endl;
283                         return false;
284                     }
285                 } catch (const std::exception&) {
286                     return false;
287                 }
288
289                 return true;
290             }
291
292             uint8_t ElfFile::getSystemElfABI() {
293                 // the only way to get the system's ELF ABI is to read the own executable using the ELF header,
294                 // and get the ELFOSABI flag
295                 auto self = std::shared_ptr<char>(realpath("/proc/self/exe", nullptr), [](char* p) { free(p); });
296
297                 if (self == nullptr)
298                     throw ElfFileParseError("Could not read /proc/self/exe");
299
300                 std::ifstream ifs(self.get());
301
302                 if (!ifs)
303                     throw ElfFileParseError("Could not open file: " + std::string(self.get()));
304
305                 // the "class" is available at a specific constant offset in the section e_ident, which
306                 // happens to be the first section, so just reading one byte at EI_CLASS yields the data we're
307                 // looking for
308                 ifs.seekg(EI_OSABI);
309
310                 char buf;
311                 ifs.read(&buf, 1);
312
313                 return static_cast<uint8_t>(buf);
314             }
315
316             uint8_t ElfFile::getSystemElfClass() {
317                 #if __SIZEOF_POINTER__ == 4
318                 return ELFCLASS32;
319                 #elif __SIZEOF_POINTER__ == 8
320                 return ELFCLASS64;
321                 #else
322                 #error "Invalid address size"
323                 #endif
324             }
325
326             uint8_t ElfFile::getSystemElfEndianness() {
327                 #if __BYTE_ORDER == __LITTLE_ENDIAN
328                 return ELFDATA2LSB;
329                 #elif __BYTE_ORDER == __BIG_ENDIAN
330                 return ELFDATA2MSB;
331                 #else
332                 #error "Unknown machine endianness"
333                 #endif
334             }
335
336             uint8_t ElfFile::getElfClass()  {
337                 return d->elfClass;
338             }
339
340             uint8_t ElfFile::getElfABI() {
341                 return d->elfABI;
342             }
343
344             bool ElfFile::isDebugSymbolsFile() {
345                 return d->isDebugSymbolsFile;
346             }
347
348             bool ElfFile::isDynamicallyLinked() {
349                 return d->isDynamicallyLinked;
350             }
351         }
352     }
353 }