template <typename E> void ObjectFile<E>::mark_addrsig(Context<E> &ctx) { // Parse a .llvm_addrsig section. if (llvm_addrsig) { u8 *cur = (u8 *)llvm_addrsig->contents.data(); u8 *end = cur + llvm_addrsig->contents.size();
while (cur != end) { Symbol<E> &sym = *this->symbols[read_uleb(cur)]; if (sym.file == this) if (InputSection<E> *isec = sym.get_input_section()) isec->address_significant = true; } }
// We treat a symbol's address as significant if // // 1. we have no address significance information for the symbol, or // 2. the symbol can be referenced from the outside in an address- // significant manner. for (Symbol<E> *sym : this->symbols) if (sym->file == this) if (InputSection<E> *isec = sym->get_input_section()) if (!llvm_addrsig || sym->is_exported) isec->address_significant = true; }
gc_sections主要是对section像GC一样进行mark and sweep,清理掉未被使用的段,关于gc_sections的选项
–gc-sections Remove unreferenced sections
mark_nonalloc_fragments
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// Non-alloc section fragments are not subject of garbage collection. // This function marks such fragments. template <typename E> staticvoidmark_nonalloc_fragments(Context<E> &ctx){ Timer t(ctx, "mark_nonalloc_fragments");
tbb::parallel_for_each(ctx.objs, [](ObjectFile<E> *file) { for (std::unique_ptr<MergeableSection<E>> &m : file->mergeable_sections) if (m) for (SectionFragment<E> *frag : m->fragments) if (!(frag->output_section.shdr.sh_flags & SHF_ALLOC)) frag->is_alive.store(true, std::memory_order_relaxed); }); }
auto enqueue_section = [&](InputSection<E> *isec) { if (mark_section(isec)) rootset.push_back(isec); };
auto enqueue_symbol = [&](Symbol<E> *sym) { if (sym) { if (SectionFragment<E> *frag = sym->get_frag()) frag->is_alive.store(true, std::memory_order_relaxed); else enqueue_section(sym->get_input_section()); } };
// Add sections that are not subject to garbage collection. tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) { for (std::unique_ptr<InputSection<E>> &isec : file->sections) { if (!isec || !isec->is_alive) continue;
// -gc-sections discards only SHF_ALLOC sections. If you want to // reduce the amount of non-memory-mapped segments, you should // use `strip` command, compile without debug info or use // -strip-all linker option. u32 flags = isec->shdr().sh_flags; if (!(flags & SHF_ALLOC)) isec->is_visited = true;
if (should_keep(*isec)) enqueue_section(isec.get()); } });
// Add sections referenced by root symbols. enqueue_symbol(get_symbol(ctx, ctx.arg.entry));
for (std::string_view name : ctx.arg.undefined) enqueue_symbol(get_symbol(ctx, name));
for (std::string_view name : ctx.arg.require_defined) enqueue_symbol(get_symbol(ctx, name));
// .eh_frame consists of variable-length records called CIE and FDE // records, and they are a unit of inclusion or exclusion. // We just keep all CIEs and everything that are referenced by them. tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) { for (CieRecord<E> &cie : file->cies) for (const ElfRel<E> &rel : cie.get_rels()) enqueue_symbol(file->symbols[rel.r_sym]); }); }
// If this is a text section, .eh_frame may contain records // describing how to handle exceptions for that function. // We want to keep associated .eh_frame records. for (FdeRecord<E> &fde : isec->get_fdes()) for (const ElfRel<E> &rel : fde.get_rels(isec->file).subspan(1)) if (Symbol<E> *sym = isec->file.symbols[rel.r_sym]) if (mark_section(sym->get_input_section())) feeder.add(sym->get_input_section());
// Symbol can refer either a section fragment or an input section. // Mark a fragment as alive. if (SectionFragment<E> *frag = sym.get_frag()) { frag->is_alive.store(true, std::memory_order_relaxed); continue; }
if (!mark_section(sym.get_input_section())) continue;
// Mark a section alive. For better performacne, we don't call // `feeder.add` too often. if (depth < 3) visit(ctx, sym.get_input_section(), feeder, depth + 1); else feeder.add(sym.get_input_section()); } }
// Early merge of leaf nodes, which can be processed without constructing the // entire graph. This reduces the vertex count and improves memory efficiency. template <typename E> staticvoidmerge_leaf_nodes(Context<E> &ctx){ Timer t(ctx, "merge_leaf_nodes");
// For ICF // // `leader` is the section that this section has been merged with. // Three kind of values are possible: // - `leader == nullptr`: This section was not eligible for ICF. // - `leader == this`: This section was retained. // - `leader != this`: This section was merged with another identical section. InputSection<E> *leader = nullptr; u32 icf_idx = -1; bool icf_eligible = false; bool icf_leaf = false;
typedef std::array<uint8_t, HASH_SIZE> Digest; ... // We allocate 3 arrays to store hashes for each vertex. // // Index 0 and 1 are used for tree hashes from the previous // iteration and the current iteration. They switch roles every // iteration. See `slot` below. // // Index 2 stores the initial, single-vertex hash. This is combined // with hashes from the connected vertices to form the tree hash // described above. std::vector<std::vector<Digest>> digests(3); digests[0] = compute_digests<E>(ctx, sections); digests[1].resize(digests[0].size()); digests[2] = digests[0];
// Execute the propagation rounds until convergence is obtained. { Timer t(ctx, "propagate"); tbb::affinity_partitioner ap;
// A cheap test that the graph hasn't converged yet. // The loop after this one uses a strict condition, but it's expensive // as it requires sorting the entire hash collection. // // For nodes that have a cycle in downstream (i.e. recursive // functions and functions that calls recursive functions) will always // change with the iterations. Nodes that doesn't (i.e. non-recursive // functions) will stop changing as soon as the propagation depth reaches // the call tree depth. // Here, we test whether we have reached sufficient depth for the latter, // which is a necessary (but not sufficient) condition for convergence. i64 num_changed = -1; for (;;) { i64 n = propagate<E>(digests, edges, edge_indices, slot, converged, ap); if (n == num_changed) break; num_changed = n; } // Run the pass until the unique number of hashes stop increasing, at which // point we have achieved convergence (proof omitted for brevity). i64 num_classes = -1; for (;;) { // count_num_classes requires sorting which is O(n log n), so do a little // more work beforehand to amortize that log factor. for (i64 i = 0; i < 10; i++) propagate<E>(digests, edges, edge_indices, slot, converged, ap);
i64 n = count_num_classes<E>(digests[slot], ap); if (n == num_classes) break; num_classes = n; } }