elf_info.hpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. // Copyright 2014 Renato Tegon Forti, Antony Polukhin.
  2. // Copyright Antony Polukhin, 2015-2024.
  3. //
  4. // Distributed under the Boost Software License, Version 1.0.
  5. // (See accompanying file LICENSE_1_0.txt
  6. // or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. #ifndef BOOST_DLL_DETAIL_POSIX_ELF_INFO_HPP
  8. #define BOOST_DLL_DETAIL_POSIX_ELF_INFO_HPP
  9. #include <boost/dll/config.hpp>
  10. #ifdef BOOST_HAS_PRAGMA_ONCE
  11. # pragma once
  12. #endif
  13. #include <cstring>
  14. #include <fstream>
  15. #include <limits>
  16. #include <vector>
  17. #include <boost/cstdint.hpp>
  18. #include <boost/throw_exception.hpp>
  19. namespace boost { namespace dll { namespace detail {
  20. template <class AddressOffsetT>
  21. struct Elf_Ehdr_template {
  22. unsigned char e_ident[16]; /* Magic number and other info */
  23. boost::uint16_t e_type; /* Object file type */
  24. boost::uint16_t e_machine; /* Architecture */
  25. boost::uint32_t e_version; /* Object file version */
  26. AddressOffsetT e_entry; /* Entry point virtual address */
  27. AddressOffsetT e_phoff; /* Program header table file offset */
  28. AddressOffsetT e_shoff; /* Section header table file offset */
  29. boost::uint32_t e_flags; /* Processor-specific flags */
  30. boost::uint16_t e_ehsize; /* ELF header size in bytes */
  31. boost::uint16_t e_phentsize; /* Program header table entry size */
  32. boost::uint16_t e_phnum; /* Program header table entry count */
  33. boost::uint16_t e_shentsize; /* Section header table entry size */
  34. boost::uint16_t e_shnum; /* Section header table entry count */
  35. boost::uint16_t e_shstrndx; /* Section header string table index */
  36. };
  37. typedef Elf_Ehdr_template<boost::uint32_t> Elf32_Ehdr_;
  38. typedef Elf_Ehdr_template<boost::uint64_t> Elf64_Ehdr_;
  39. template <class AddressOffsetT>
  40. struct Elf_Shdr_template {
  41. boost::uint32_t sh_name; /* Section name (string tbl index) */
  42. boost::uint32_t sh_type; /* Section type */
  43. AddressOffsetT sh_flags; /* Section flags */
  44. AddressOffsetT sh_addr; /* Section virtual addr at execution */
  45. AddressOffsetT sh_offset; /* Section file offset */
  46. AddressOffsetT sh_size; /* Section size in bytes */
  47. boost::uint32_t sh_link; /* Link to another section */
  48. boost::uint32_t sh_info; /* Additional section information */
  49. AddressOffsetT sh_addralign; /* Section alignment */
  50. AddressOffsetT sh_entsize; /* Entry size if section holds table */
  51. };
  52. typedef Elf_Shdr_template<boost::uint32_t> Elf32_Shdr_;
  53. typedef Elf_Shdr_template<boost::uint64_t> Elf64_Shdr_;
  54. template <class AddressOffsetT>
  55. struct Elf_Sym_template;
  56. template <>
  57. struct Elf_Sym_template<boost::uint32_t> {
  58. typedef boost::uint32_t AddressOffsetT;
  59. boost::uint32_t st_name; /* Symbol name (string tbl index) */
  60. AddressOffsetT st_value; /* Symbol value */
  61. AddressOffsetT st_size; /* Symbol size */
  62. unsigned char st_info; /* Symbol type and binding */
  63. unsigned char st_other; /* Symbol visibility */
  64. boost::uint16_t st_shndx; /* Section index */
  65. };
  66. template <>
  67. struct Elf_Sym_template<boost::uint64_t> {
  68. typedef boost::uint64_t AddressOffsetT;
  69. boost::uint32_t st_name; /* Symbol name (string tbl index) */
  70. unsigned char st_info; /* Symbol type and binding */
  71. unsigned char st_other; /* Symbol visibility */
  72. boost::uint16_t st_shndx; /* Section index */
  73. AddressOffsetT st_value; /* Symbol value */
  74. AddressOffsetT st_size; /* Symbol size */
  75. };
  76. typedef Elf_Sym_template<boost::uint32_t> Elf32_Sym_;
  77. typedef Elf_Sym_template<boost::uint64_t> Elf64_Sym_;
  78. template <class AddressOffsetT>
  79. class elf_info {
  80. typedef boost::dll::detail::Elf_Ehdr_template<AddressOffsetT> header_t;
  81. typedef boost::dll::detail::Elf_Shdr_template<AddressOffsetT> section_t;
  82. typedef boost::dll::detail::Elf_Sym_template<AddressOffsetT> symbol_t;
  83. BOOST_STATIC_CONSTANT(boost::uint32_t, SHT_SYMTAB_ = 2);
  84. BOOST_STATIC_CONSTANT(boost::uint32_t, SHT_STRTAB_ = 3);
  85. BOOST_STATIC_CONSTANT(boost::uint32_t, SHT_DYNSYM_ = 11);
  86. BOOST_STATIC_CONSTANT(unsigned char, STB_LOCAL_ = 0); /* Local symbol */
  87. BOOST_STATIC_CONSTANT(unsigned char, STB_GLOBAL_ = 1); /* Global symbol */
  88. BOOST_STATIC_CONSTANT(unsigned char, STB_WEAK_ = 2); /* Weak symbol */
  89. /* Symbol visibility specification encoded in the st_other field. */
  90. BOOST_STATIC_CONSTANT(unsigned char, STV_DEFAULT_ = 0); /* Default symbol visibility rules */
  91. BOOST_STATIC_CONSTANT(unsigned char, STV_INTERNAL_ = 1); /* Processor specific hidden class */
  92. BOOST_STATIC_CONSTANT(unsigned char, STV_HIDDEN_ = 2); /* Sym unavailable in other modules */
  93. BOOST_STATIC_CONSTANT(unsigned char, STV_PROTECTED_ = 3); /* Not preemptible, not exported */
  94. public:
  95. static bool parsing_supported(std::ifstream& fs) {
  96. const unsigned char magic_bytes[5] = {
  97. 0x7f, 'E', 'L', 'F', sizeof(boost::uint32_t) == sizeof(AddressOffsetT) ? 1 : 2
  98. };
  99. unsigned char ch;
  100. fs.seekg(0);
  101. for (std::size_t i = 0; i < sizeof(magic_bytes); ++i) {
  102. fs >> ch;
  103. if (ch != magic_bytes[i]) {
  104. return false;
  105. }
  106. }
  107. return true;
  108. }
  109. static std::vector<std::string> sections(std::ifstream& fs) {
  110. std::vector<std::string> ret;
  111. std::vector<char> names;
  112. sections_names_raw(fs, names);
  113. const char* name_begin = &names[0];
  114. const char* const name_end = name_begin + names.size();
  115. ret.reserve(header(fs).e_shnum);
  116. do {
  117. if (*name_begin) {
  118. ret.push_back(name_begin);
  119. name_begin += ret.back().size() + 1;
  120. } else {
  121. ++name_begin;
  122. }
  123. } while (name_begin != name_end);
  124. return ret;
  125. }
  126. private:
  127. template <class Integer>
  128. static void checked_seekg(std::ifstream& fs, Integer pos) {
  129. /* TODO: use cmp_less, cmp_greater
  130. if ((std::numeric_limits<std::streamoff>::max)() < pos) {
  131. boost::throw_exception(std::runtime_error("Integral overflow while getting info from ELF file"));
  132. }
  133. if ((std::numeric_limits<std::streamoff>::min)() > pos){
  134. boost::throw_exception(std::runtime_error("Integral underflow while getting info from ELF file"));
  135. }
  136. */
  137. fs.seekg(static_cast<std::streamoff>(pos));
  138. }
  139. template <class T>
  140. static void read_raw(std::ifstream& fs, T& value, std::size_t size = sizeof(T)) {
  141. fs.read(reinterpret_cast<char*>(&value), size);
  142. }
  143. static header_t header(std::ifstream& fs) {
  144. header_t elf;
  145. fs.seekg(0);
  146. read_raw(fs, elf);
  147. return elf;
  148. }
  149. static void sections_names_raw(std::ifstream& fs, std::vector<char>& sections) {
  150. const header_t elf = header(fs);
  151. section_t section_names_section;
  152. checked_seekg(fs, elf.e_shoff + elf.e_shstrndx * sizeof(section_t));
  153. read_raw(fs, section_names_section);
  154. sections.resize(static_cast<std::size_t>(section_names_section.sh_size) + 1, '\0');
  155. checked_seekg(fs, section_names_section.sh_offset);
  156. read_raw(fs, sections[0], static_cast<std::size_t>(section_names_section.sh_size));
  157. }
  158. static void symbols_text(std::ifstream& fs, std::vector<symbol_t>& symbols, std::vector<char>& text) {
  159. std::vector<char> names;
  160. sections_names_raw(fs, names);
  161. symbols_text(fs, symbols, text, names);
  162. }
  163. static void symbols_text(std::ifstream& fs, std::vector<symbol_t>& symbols, std::vector<char>& text, const std::vector<char>& names) {
  164. const header_t elf = header(fs);
  165. checked_seekg(fs, elf.e_shoff);
  166. // ".dynsym" section may not have info on symbols that could be used while self loading an executable,
  167. // so we prefer ".symtab" section.
  168. AddressOffsetT symtab_size = 0;
  169. AddressOffsetT symtab_offset = 0;
  170. AddressOffsetT strtab_size = 0;
  171. AddressOffsetT strtab_offset = 0;
  172. AddressOffsetT dynsym_size = 0;
  173. AddressOffsetT dynsym_offset = 0;
  174. AddressOffsetT dynstr_size = 0;
  175. AddressOffsetT dynstr_offset = 0;
  176. for (std::size_t i = 0; i < elf.e_shnum; ++i) {
  177. section_t section;
  178. read_raw(fs, section);
  179. if (section.sh_name >= names.size()) {
  180. continue;
  181. }
  182. const char* name = &names[section.sh_name];
  183. if (section.sh_type == SHT_SYMTAB_ && !std::strcmp(name, ".symtab")) {
  184. symtab_size = section.sh_size;
  185. symtab_offset = section.sh_offset;
  186. } else if (section.sh_type == SHT_STRTAB_) {
  187. if (!std::strcmp(name, ".dynstr")) {
  188. dynstr_size = section.sh_size;
  189. dynstr_offset = section.sh_offset;
  190. } else if (!std::strcmp(name, ".strtab")) {
  191. strtab_size = section.sh_size;
  192. strtab_offset = section.sh_offset;
  193. }
  194. } else if (section.sh_type == SHT_DYNSYM_ && !std::strcmp(name, ".dynsym")) {
  195. dynsym_size = section.sh_size;
  196. dynsym_offset = section.sh_offset;
  197. }
  198. }
  199. if (!symtab_size || !strtab_size) {
  200. // ".symtab" stripped from the binary and we have to fallback to ".dynsym"
  201. symtab_size = dynsym_size;
  202. symtab_offset = dynsym_offset;
  203. strtab_size = dynstr_size;
  204. strtab_offset = dynstr_offset;
  205. }
  206. if (!symtab_size || !strtab_size) {
  207. return;
  208. }
  209. text.resize(static_cast<std::size_t>(strtab_size) + 1, '\0');
  210. checked_seekg(fs, strtab_offset);
  211. read_raw(fs, text[0], static_cast<std::size_t>(strtab_size));
  212. symbols.resize(static_cast<std::size_t>(symtab_size / sizeof(symbol_t)));
  213. checked_seekg(fs, symtab_offset);
  214. read_raw(fs, symbols[0], static_cast<std::size_t>(symtab_size - (symtab_size % sizeof(symbol_t))) );
  215. }
  216. static bool is_visible(const symbol_t& sym) BOOST_NOEXCEPT {
  217. const unsigned char visibility = (sym.st_other & 0x03);
  218. // `(sym.st_info >> 4) != STB_LOCAL_ && !!sym.st_size` check also workarounds the
  219. // GCC's issue https://sourceware.org/bugzilla/show_bug.cgi?id=13621
  220. return (visibility == STV_DEFAULT_ || visibility == STV_PROTECTED_)
  221. && (sym.st_info >> 4) != STB_LOCAL_ && !!sym.st_size;
  222. }
  223. public:
  224. static std::vector<std::string> symbols(std::ifstream& fs) {
  225. std::vector<std::string> ret;
  226. std::vector<symbol_t> symbols;
  227. std::vector<char> text;
  228. symbols_text(fs, symbols, text);
  229. ret.reserve(symbols.size());
  230. for (std::size_t i = 0; i < symbols.size(); ++i) {
  231. if (is_visible(symbols[i]) && symbols[i].st_name < text.size()) {
  232. ret.push_back(&text[symbols[i].st_name]);
  233. if (ret.back().empty()) {
  234. ret.pop_back(); // Do not show empty names
  235. }
  236. }
  237. }
  238. return ret;
  239. }
  240. static std::vector<std::string> symbols(std::ifstream& fs, const char* section_name) {
  241. std::vector<std::string> ret;
  242. std::size_t index = 0;
  243. std::size_t ptrs_in_section_count = 0;
  244. std::vector<char> names;
  245. sections_names_raw(fs, names);
  246. const header_t elf = header(fs);
  247. for (; index < elf.e_shnum; ++index) {
  248. section_t section;
  249. checked_seekg(fs, elf.e_shoff + index * sizeof(section_t));
  250. read_raw(fs, section);
  251. if (!std::strcmp(&names.at(section.sh_name), section_name)) {
  252. if (!section.sh_entsize) {
  253. section.sh_entsize = 1;
  254. }
  255. ptrs_in_section_count = static_cast<std::size_t>(section.sh_size / section.sh_entsize);
  256. break;
  257. }
  258. }
  259. std::vector<symbol_t> symbols;
  260. std::vector<char> text;
  261. symbols_text(fs, symbols, text, names);
  262. if (ptrs_in_section_count < symbols.size()) {
  263. ret.reserve(ptrs_in_section_count);
  264. } else {
  265. ret.reserve(symbols.size());
  266. }
  267. for (std::size_t i = 0; i < symbols.size(); ++i) {
  268. if (symbols[i].st_shndx == index && is_visible(symbols[i]) && symbols[i].st_name < text.size()) {
  269. ret.push_back(&text[symbols[i].st_name]);
  270. if (ret.back().empty()) {
  271. ret.pop_back(); // Do not show empty names
  272. }
  273. }
  274. }
  275. return ret;
  276. }
  277. };
  278. typedef elf_info<boost::uint32_t> elf_info32;
  279. typedef elf_info<boost::uint64_t> elf_info64;
  280. }}} // namespace boost::dll::detail
  281. #endif // BOOST_DLL_DETAIL_POSIX_ELF_INFO_HPP