From 202337a1155c7b511387cff3c9df47d00cc14924 Mon Sep 17 00:00:00 2001 From: Dhruv Chawla Date: Thu, 1 May 2025 06:34:42 -0700 Subject: [PATCH 1/6] Write out file names to GCOV file This patch modifies the profile readers and writers to support writing out the file names for functions into the GCOV file to enable using file names for more accurate profile annotation in GCC's auto-profile pass. This patch goes along with the RFC posted to the GCC mailing lists at https://gcc.gnu.org/pipermail/gcc-patches/2025-June/686835.html. This also adds support for GCOV version 3, to support the breaking change in the file format. --- gcov.cc | 6 +++-- llvm_profile_writer.cc | 3 ++- llvm_profile_writer_test.cc | 6 +++-- profile_reader.cc | 20 +++++++++++--- profile_reader.h | 3 ++- profile_writer.cc | 14 ++++++++-- profile_writer.h | 52 ++++++++++++++++++++++++++++++++++--- symbol_map.cc | 17 ++++++------ 8 files changed, 98 insertions(+), 23 deletions(-) diff --git a/gcov.cc b/gcov.cc index 811cbf0..c111bc8 100644 --- a/gcov.cc +++ b/gcov.cc @@ -149,7 +149,8 @@ void gcov_write_string(const char *string) { if (string) { length = strlen(string); - if (absl::GetFlag(FLAGS_gcov_version) == 2) { + if (absl::GetFlag(FLAGS_gcov_version) == 2 || + absl::GetFlag(FLAGS_gcov_version) == 3) { // Length includes the terminating 0 and is saved in bytes. alloc = length + 1; char *byte_buffer = gcov_write_bytes(4 + alloc); @@ -229,7 +230,8 @@ const char * gcov_read_string(void) { return 0; } - if (absl::GetFlag(FLAGS_gcov_version) == 2) { + if (absl::GetFlag(FLAGS_gcov_version) == 2 || + absl::GetFlag(FLAGS_gcov_version) == 3) { return gcov_read_bytes (length); } else { diff --git a/llvm_profile_writer.cc b/llvm_profile_writer.cc index 473cc2c..d3b9357 100644 --- a/llvm_profile_writer.cc +++ b/llvm_profile_writer.cc @@ -186,7 +186,8 @@ bool LLVMProfileWriter::WriteToFile(const std::string &output_filename) { // Populate the symbol table. This table contains all the symbols // for functions found in the binary. StringIndexMap name_table; - StringTableUpdater::Update(*symbol_map_, &name_table); + FileIndexMap file_table; + StringTableUpdater::Update(*symbol_map_, &name_table, &file_table); // If the underlying llvm profile writer has not been created yet, // create it here. diff --git a/llvm_profile_writer_test.cc b/llvm_profile_writer_test.cc index 590de25..804f192 100644 --- a/llvm_profile_writer_test.cc +++ b/llvm_profile_writer_test.cc @@ -33,7 +33,8 @@ TEST(LlvmProfileWriterTest, ReadProfile) { ASSERT_TRUE(creator.ComputeProfile(&symbol_map, true)); StringIndexMap name_table; - StringTableUpdater::Update(symbol_map, &name_table); + FileIndexMap file_table; + StringTableUpdater::Update(symbol_map, &name_table, &file_table); LLVMProfileBuilder builder(name_table); const auto &profiles = builder.ConvertProfiles(symbol_map); @@ -98,7 +99,8 @@ TEST(LlvmProfileWriterTest, ConvertProfile) { symbol_map.AddSourceCount("foo", src2, 200, 1); StringIndexMap name_table; - StringTableUpdater::Update(symbol_map, &name_table); + FileIndexMap file_table; + StringTableUpdater::Update(symbol_map, &name_table, &file_table); LLVMProfileBuilder builder(name_table); const auto &profiles = builder.ConvertProfiles(symbol_map); // LLVM_BEFORE_SAMPLEFDO_SPLIT_CONTEXT is defined when llvm version is before diff --git a/profile_reader.cc b/profile_reader.cc index 8097a50..0ec5393 100644 --- a/profile_reader.cc +++ b/profile_reader.cc @@ -38,11 +38,17 @@ void AutoFDOProfileReader::ReadSymbolProfile(const SourceStack &stack, } else { head_count = 0; } - const char *name = names_.at(gcov_read_unsigned()).c_str(); + uint32_t name_index = gcov_read_unsigned(); + const char *name = names_.at(name_index).first.c_str(); + uint32_t file_index = names_.at(name_index).second; + const std::string &file_name = + file_index < file_names_.size() ? file_names_.at(file_index) : ""; uint32_t num_pos_counts = gcov_read_unsigned(); uint32_t num_callsites = gcov_read_unsigned(); if (stack.size() == 0) { symbol_map_->AddSymbol(name); + const_cast(symbol_map_->GetSymbolByName(name))->info.file_name = + file_name; if (!force_update_ && symbol_map_->GetSymbolByName(name)->total_count > 0) { update = false; } @@ -55,6 +61,7 @@ void AutoFDOProfileReader::ReadSymbolProfile(const SourceStack &stack, uint32_t num_targets = gcov_read_unsigned(); uint64_t count = gcov_read_counter(); SourceInfo info(name, "", "", 0, offset >> 16, offset & 0xffff); + info.file_name = file_name; SourceStack new_stack; new_stack.push_back(info); new_stack.insert(new_stack.end(), stack.begin(), stack.end()); @@ -65,7 +72,7 @@ void AutoFDOProfileReader::ReadSymbolProfile(const SourceStack &stack, for (int j = 0; j < num_targets; j++) { // Only indirect call target histogram is supported now. CHECK_EQ(gcov_read_unsigned(), HIST_TYPE_INDIR_CALL_TOPN); - const std::string &target_name = names_.at(gcov_read_counter()); + const std::string &target_name = names_.at(gcov_read_counter()).first; uint64_t target_count = gcov_read_counter(); if (force_update_ || update) { symbol_map_->AddIndirectCallTarget( @@ -80,6 +87,7 @@ void AutoFDOProfileReader::ReadSymbolProfile(const SourceStack &stack, // lower 16 bits: discriminator. uint32_t offset = gcov_read_unsigned(); SourceInfo info(name, "", "", 0, offset >> 16, offset & 0xffff); + info.file_name = file_name; SourceStack new_stack; new_stack.push_back(info); new_stack.insert(new_stack.end(), stack.begin(), stack.end()); @@ -90,9 +98,15 @@ void AutoFDOProfileReader::ReadSymbolProfile(const SourceStack &stack, void AutoFDOProfileReader::ReadNameTable() { CHECK_EQ(gcov_read_unsigned(), GCOV_TAG_AFDO_FILE_NAMES); gcov_read_unsigned(); + uint32_t file_name_vector_size = gcov_read_unsigned(); + for (uint32_t i = 0; i < file_name_vector_size; i++) { + file_names_.push_back(gcov_read_string()); + } uint32_t name_vector_size = gcov_read_unsigned(); for (uint32_t i = 0; i < name_vector_size; i++) { - names_.push_back(gcov_read_string()); + const char *name = gcov_read_string(); + uint32_t file_index = gcov_read_unsigned(); + names_.emplace_back(name, file_index); } } diff --git a/profile_reader.h b/profile_reader.h index 02a09f0..3812cf6 100644 --- a/profile_reader.h +++ b/profile_reader.h @@ -58,7 +58,8 @@ class AutoFDOProfileReader : public ProfileReader { SymbolMap *symbol_map_; bool force_update_; - std::vector names_; + std::vector> names_; + std::vector file_names_; }; } // namespace devtools_crosstool_autofdo diff --git a/profile_writer.cc b/profile_writer.cc index 875ed06..bdf4bf3 100644 --- a/profile_writer.cc +++ b/profile_writer.cc @@ -153,7 +153,8 @@ void AutoFDOProfileWriter::WriteFunctionProfile() { int length_4bytes = 0, current_name_index = 0; string_index_map[std::string()] = 0; - StringTableUpdater::Update(*symbol_map_, &string_index_map); + FileIndexMap file_map; + StringTableUpdater::Update(*symbol_map_, &string_index_map, &file_map); for (auto &name_index : string_index_map) { name_index.second = current_name_index++; @@ -166,6 +167,9 @@ void AutoFDOProfileWriter::WriteFunctionProfile() { // Writes the GCOV_TAG_AFDO_FILE_NAMES section. gcov_write_unsigned(GCOV_TAG_AFDO_FILE_NAMES); gcov_write_unsigned(length_4bytes); + gcov_write_unsigned (file_map.Size()); + for (const auto &file_name : file_map.GetFileNames()) + gcov_write_string (file_name.c_str()); gcov_write_unsigned(string_index_map.size()); for (const auto &[name, index] : string_index_map) { char *c = strdup(name.c_str()); @@ -182,6 +186,11 @@ void AutoFDOProfileWriter::WriteFunctionProfile() { c[len - 10] = '2'; } gcov_write_string(c); + if (int lookup = file_map.GetFileIndex(name); lookup != -1) + gcov_write_unsigned (lookup); + else + gcov_write_unsigned (-1); + free(c); } @@ -352,7 +361,8 @@ class ProfileDumper : public SymbolTraverser { // Emit a dump of the input profile on stdout. void ProfileWriter::Dump() { StringIndexMap string_index_map; - StringTableUpdater::Update(*symbol_map_, &string_index_map); + FileIndexMap file_map; + StringTableUpdater::Update(*symbol_map_, &string_index_map, &file_map); SourceProfileLengther length(*symbol_map_); printf("Length of symbol map: %d\n", length.length() + 1); printf("Number of functions: %d\n", length.num_functions()); diff --git a/profile_writer.h b/profile_writer.h index 2557b08..82d3107 100644 --- a/profile_writer.h +++ b/profile_writer.h @@ -149,19 +149,61 @@ class SymbolTraverser { typedef std::map StringIndexMap; +class FileIndexMap { + std::vector file_names_; + std::unordered_map name_map_; + std::unordered_map file_map_; + + public: + int GetFileIndex(const std::string &symbol_name) { + if (auto it = name_map_.find(symbol_name); it != name_map_.end()) + return it->second; + else + return -1; + } + + std::string_view GetFileName(const std::string &symbol_name) { + if (auto it = name_map_.find(symbol_name); it != name_map_.end()) + return file_names_[it->second]; + else + return ""; + } + + void AddFileName(const std::string &symbol_name, + const std::string &file_name) { + if (auto it = file_map_.find(file_name); it != file_map_.end()) + name_map_[symbol_name] = it->second; + else { + file_names_.emplace_back(file_name); + file_map_[file_names_.back()] = file_names_.size() - 1; + name_map_[symbol_name] = file_names_.size() - 1; + } + } + + size_t Size() const { + return file_names_.size(); + } + + const std::vector &GetFileNames() const { + return file_names_; + } +}; + class StringTableUpdater: public SymbolTraverser { public: // This type is neither copyable nor movable. StringTableUpdater(const StringTableUpdater &) = delete; StringTableUpdater &operator=(const StringTableUpdater &) = delete; - static void Update(const SymbolMap &symbol_map, StringIndexMap *map) { - StringTableUpdater updater(map); + static void Update(const SymbolMap &symbol_map, StringIndexMap *map, + FileIndexMap *file_map) { + StringTableUpdater updater(map, file_map); updater.Start(symbol_map); } protected: void Visit(const Symbol *node) override { + file_map_->AddFileName(node->info.func_name, node->info.file_name); for (const auto &pos_count : node->pos_counts) { for (const auto &name_count : pos_count.second.target_map) { (*map_)[name_count.first] = 0; @@ -178,8 +220,10 @@ class StringTableUpdater: public SymbolTraverser { } private: - explicit StringTableUpdater(StringIndexMap *map) : map_(map) {} - StringIndexMap *map_; + explicit StringTableUpdater(StringIndexMap *map, FileIndexMap *file_map) + : map_(map), file_map_(file_map) {} + StringIndexMap *map_; + FileIndexMap *file_map_; }; } // namespace devtools_crosstool_autofdo diff --git a/symbol_map.cc b/symbol_map.cc index 2483835..6a87c38 100644 --- a/symbol_map.cc +++ b/symbol_map.cc @@ -634,7 +634,8 @@ void Symbol::DumpBody(int indent, bool for_analysis) const { GetSortedTargetCountPairs(ret->second.target_map, &target_count_pairs); for (const auto &target_count : target_count_pairs) { const std::string printed_name = getPrintName(target_count.first.data()); - absl::PrintF(" %s:%u", printed_name, target_count.second); + absl::PrintF(" %s:%s:%u", printed_name, info.file_name, + target_count.second); } printf("\n"); } @@ -655,10 +656,10 @@ void Symbol::DumpBody(int indent, bool for_analysis) const { void Symbol::Dump(int indent) const { const std::string printed_name = getPrintName(info.func_name); if (indent == 0) { - absl::PrintF("%s total:%u head:%u\n", printed_name, total_count, - head_count); + absl::PrintF("%s:%s total:%u head:%u\n", printed_name, info.file_name, + total_count, head_count); } else { - absl::PrintF("%s total:%u\n", printed_name, total_count); + absl::PrintF("%s:%s total:%u\n", printed_name, info.file_name, total_count); } DumpBody(indent, false); } @@ -667,12 +668,12 @@ void Symbol::DumpForAnalysis(int indent) const { const std::string printed_name = getPrintName(info.func_name); if (indent == 0) { absl::PrintF( - "%s total:%u head:%u total_incl:%u total_incl_per_iter:%.2f\n", - printed_name, total_count, head_count, total_count_incl, + "%s:%s total:%u head:%u total_incl:%u total_incl_per_iter:%.2f\n", + printed_name, info.file_name, total_count, head_count, total_count_incl, head_count ? static_cast(total_count_incl) / head_count : 0); } else { - absl::PrintF("%s total:%u total_incl:%u\n", printed_name, total_count, - total_count_incl); + absl::PrintF("%s:%s total:%u total_incl:%u\n", printed_name, info.file_name, + total_count, total_count_incl); } DumpBody(indent, true); } From 2c4f834420c5f1e0e37caec6a5d5455911abe984 Mon Sep 17 00:00:00 2001 From: Dhruv Chawla Date: Wed, 2 Jul 2025 22:00:45 -0700 Subject: [PATCH 2/6] Read and write file name information only for GCOV version 3 --- profile_reader.cc | 11 +++++++---- profile_writer.cc | 19 ++++++++++++------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/profile_reader.cc b/profile_reader.cc index 0ec5393..099fe77 100644 --- a/profile_reader.cc +++ b/profile_reader.cc @@ -98,14 +98,17 @@ void AutoFDOProfileReader::ReadSymbolProfile(const SourceStack &stack, void AutoFDOProfileReader::ReadNameTable() { CHECK_EQ(gcov_read_unsigned(), GCOV_TAG_AFDO_FILE_NAMES); gcov_read_unsigned(); - uint32_t file_name_vector_size = gcov_read_unsigned(); - for (uint32_t i = 0; i < file_name_vector_size; i++) { - file_names_.push_back(gcov_read_string()); + if (absl::GetFlag(FLAGS_gcov_version) >= 3) { + uint32_t file_name_vector_size = gcov_read_unsigned(); + for (uint32_t i = 0; i < file_name_vector_size; i++) { + file_names_.push_back(gcov_read_string()); + } } uint32_t name_vector_size = gcov_read_unsigned(); for (uint32_t i = 0; i < name_vector_size; i++) { const char *name = gcov_read_string(); - uint32_t file_index = gcov_read_unsigned(); + uint32_t file_index = + absl::GetFlag(FLAGS_gcov_version) >= 3 ? gcov_read_unsigned() : -1; names_.emplace_back(name, file_index); } } diff --git a/profile_writer.cc b/profile_writer.cc index bdf4bf3..00bf257 100644 --- a/profile_writer.cc +++ b/profile_writer.cc @@ -167,9 +167,12 @@ void AutoFDOProfileWriter::WriteFunctionProfile() { // Writes the GCOV_TAG_AFDO_FILE_NAMES section. gcov_write_unsigned(GCOV_TAG_AFDO_FILE_NAMES); gcov_write_unsigned(length_4bytes); - gcov_write_unsigned (file_map.Size()); - for (const auto &file_name : file_map.GetFileNames()) - gcov_write_string (file_name.c_str()); + // File names in the profile are a feature of GCOV version 3. + if (absl::GetFlag(FLAGS_gcov_version) >= 3) { + gcov_write_unsigned(file_map.Size()); + for (const auto &file_name : file_map.GetFileNames()) + gcov_write_string(file_name.c_str()); + } gcov_write_unsigned(string_index_map.size()); for (const auto &[name, index] : string_index_map) { char *c = strdup(name.c_str()); @@ -186,10 +189,12 @@ void AutoFDOProfileWriter::WriteFunctionProfile() { c[len - 10] = '2'; } gcov_write_string(c); - if (int lookup = file_map.GetFileIndex(name); lookup != -1) - gcov_write_unsigned (lookup); - else - gcov_write_unsigned (-1); + if (absl::GetFlag(FLAGS_gcov_version) >= 3) { + if (int lookup = file_map.GetFileIndex(name); lookup != -1) + gcov_write_unsigned(lookup); + else + gcov_write_unsigned(-1); + } free(c); } From 8545e1fa7224f1e947b5f12ee4b2ed5351007867 Mon Sep 17 00:00:00 2001 From: Dhruv Chawla Date: Thu, 31 Jul 2025 03:22:17 -0700 Subject: [PATCH 3/6] Fix crash when function name is NULL --- profile_writer.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/profile_writer.h b/profile_writer.h index 82d3107..2b26ab0 100644 --- a/profile_writer.h +++ b/profile_writer.h @@ -203,7 +203,8 @@ class StringTableUpdater: public SymbolTraverser { protected: void Visit(const Symbol *node) override { - file_map_->AddFileName(node->info.func_name, node->info.file_name); + if (node->info.func_name != nullptr) + file_map_->AddFileName(node->info.func_name, node->info.file_name); for (const auto &pos_count : node->pos_counts) { for (const auto &name_count : pos_count.second.target_map) { (*map_)[name_count.first] = 0; From 503752028fc4a6979b24790a86ce94f0b56fd83f Mon Sep 17 00:00:00 2001 From: Dhruv Chawla Date: Thu, 16 Oct 2025 01:09:42 -0700 Subject: [PATCH 4/6] Write full file path to GCOV if present --- profile_writer.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/profile_writer.h b/profile_writer.h index 2b26ab0..e6d211a 100644 --- a/profile_writer.h +++ b/profile_writer.h @@ -204,7 +204,11 @@ class StringTableUpdater: public SymbolTraverser { protected: void Visit(const Symbol *node) override { if (node->info.func_name != nullptr) - file_map_->AddFileName(node->info.func_name, node->info.file_name); + file_map_->AddFileName(node->info.func_name, + node->info.dir_name != "" + ? node->info.dir_name + "/" + + node->info.file_name + : node->info.file_name); for (const auto &pos_count : node->pos_counts) { for (const auto &name_count : pos_count.second.target_map) { (*map_)[name_count.first] = 0; From f80a8e32f759f60b028726802eae0efb59fb28ab Mon Sep 17 00:00:00 2001 From: Dhruv Chawla Date: Mon, 27 Oct 2025 00:19:50 -0700 Subject: [PATCH 5/6] Simplify version comparison --- gcov.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/gcov.cc b/gcov.cc index c111bc8..0351048 100644 --- a/gcov.cc +++ b/gcov.cc @@ -149,8 +149,7 @@ void gcov_write_string(const char *string) { if (string) { length = strlen(string); - if (absl::GetFlag(FLAGS_gcov_version) == 2 || - absl::GetFlag(FLAGS_gcov_version) == 3) { + if (absl::GetFlag(FLAGS_gcov_version) >= 2) { // Length includes the terminating 0 and is saved in bytes. alloc = length + 1; char *byte_buffer = gcov_write_bytes(4 + alloc); @@ -230,8 +229,7 @@ const char * gcov_read_string(void) { return 0; } - if (absl::GetFlag(FLAGS_gcov_version) == 2 || - absl::GetFlag(FLAGS_gcov_version) == 3) { + if (absl::GetFlag(FLAGS_gcov_version) >= 2) { return gcov_read_bytes (length); } else { From 81c98c320e5bdb68cba28021bc826899605fa236 Mon Sep 17 00:00:00 2001 From: Dhruv Chawla Date: Tue, 9 Dec 2025 19:54:02 -0800 Subject: [PATCH 6/6] Fix incomplete file paths being written out to GCOV This was being caused due to DW_AT_comp_dir not being prepended before DW_AT_decl_file. --- legacy_addr2line.cc | 20 ++++++++++++++++++-- profile_writer.h | 14 ++++++++------ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/legacy_addr2line.cc b/legacy_addr2line.cc index f19c1b1..ae4b96c 100644 --- a/legacy_addr2line.cc +++ b/legacy_addr2line.cc @@ -204,6 +204,7 @@ void Google3Addr2line::GetInlineStack(uint64_t address, const SubprogramInfo *subprog = inline_stack_handler_->GetSubprogramForAddress(address); + const char *comp_dir = NULL; const char *function_name = NULL; uint32_t start_line = 0; if (subprog != NULL) { @@ -214,10 +215,17 @@ void Google3Addr2line::GetInlineStack(uint64_t address, inline_stack_handler_->GetAbstractOrigin(subprog)->callsite_line(); if (start_line == 0) start_line = declaration->callsite_line(); + comp_dir = subprog->comp_directory(); + if (comp_dir == 0) + comp_dir = inline_stack_handler_->GetAbstractOrigin(subprog)->comp_directory(); } + std::filesystem::path dir_name = LI.file.first; + if (!dir_name.is_absolute() && comp_dir != nullptr) + dir_name = comp_dir / dir_name; + stack->push_back(SourceInfo(function_name, - LI.file.first, + dir_name, LI.file.second, start_line, LI.line, @@ -234,9 +242,17 @@ void Google3Addr2line::GetInlineStack(uint64_t address, if (start_line == 0) start_line = subprog->callsite_line(); + dir_name = subprog->callsite_directory(); + if (!dir_name.is_absolute()) { + if (subprog->comp_directory()) + dir_name = subprog->comp_directory() / dir_name; + else if (const char *abstract_comp = inline_stack_handler_->GetAbstractOrigin(subprog)->comp_directory()) + dir_name = abstract_comp / dir_name; + } + stack->push_back(SourceInfo( canonical_parent->name().c_str(), - subprog->callsite_directory(), + dir_name, subprog->callsite_filename(), start_line, subprog->callsite_line(), diff --git a/profile_writer.h b/profile_writer.h index e6d211a..210b3b8 100644 --- a/profile_writer.h +++ b/profile_writer.h @@ -203,12 +203,14 @@ class StringTableUpdater: public SymbolTraverser { protected: void Visit(const Symbol *node) override { - if (node->info.func_name != nullptr) - file_map_->AddFileName(node->info.func_name, - node->info.dir_name != "" - ? node->info.dir_name + "/" + - node->info.file_name - : node->info.file_name); + if (node->info.func_name != nullptr) { + if (node->info.dir_name != "") + file_map_->AddFileName(node->info.func_name, + std::filesystem::path(node->info.dir_name) / + node->info.file_name); + else + file_map_->AddFileName(node->info.func_name, node->info.file_name); + } for (const auto &pos_count : node->pos_counts) { for (const auto &name_count : pos_count.second.target_map) { (*map_)[name_count.first] = 0;