pixiv:101015341 p7  
上期的内容主要是section size相关的优化,这期内容是创建输出段前的最后一些处理
Compute Merged Section Size 1 2 compute_merged_section_sizes (ctx);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 template  <typename  E>void  compute_merged_section_sizes (Context<E> &ctx)    Timer t (ctx, "compute_merged_section_sizes" )  ;      if  (!ctx.arg.gc_sections) {     tbb::parallel_for_each (ctx.objs, [](ObjectFile<E> *file) {       for  (std::unique_ptr<MergeableSection<E>> &m : file->mergeable_sections)         if  (m)           for  (SectionFragment<E> *frag : m->fragments)             frag->is_alive.store (true , std::memory_order_relaxed);     });   }      if  (!ctx.arg.oformat_binary)     add_comment_string (ctx, mold_version);      if  (char  *env = getenv ("MOLD_DEBUG" ); env && env[0 ])     add_comment_string (ctx, "mold command line: "  + get_cmdline_args (ctx));   Timer t2 (ctx, "MergedSection assign_offsets" )  ;   tbb::parallel_for_each (ctx.merged_sections,                          [&](std::unique_ptr<MergedSection<E>> &sec) {     sec->assign_offsets (ctx);   }); } 
这个过程做了三件事情
对非gc_sections的情况下标记fragment,在开启这个选项时会在之前的过程标记过 
之后是添加comment string。 
最后是针对每一个merged_section调用assign_offsets 
 
关于上面的oformat,其命令行的描述如下
-oformat=binary Omit ELF, section and program headers
 
add_comment_string的实现
1 2 3 4 5 6 7 8 9 10 11 template  <typename  E>void  add_comment_string (Context<E> &ctx, std::string str)    MergedSection<E> *sec =     MergedSection<E>::get_instance (ctx, ".comment" , SHT_PROGBITS,                                    SHF_MERGE | SHF_STRINGS);   std::string_view buf = save_string (ctx, str);   std::string_view data (buf.data(), buf.size() + 1 )  ;   SectionFragment<E> *frag = sec->insert (data, hash_string (data), 0 );   frag->is_alive = true ; } 
这个过程获取对应的MergeSecgtion的instance,之后插入comment str中一个新的fragment。
接下来看一下assign_offsets的实现
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 template  <typename  E>void  MergedSection<E>::assign_offsets (Context<E> &ctx) {  std::vector<i64> sizes (map.NUM_SHARDS)  ;   std::vector<i64> max_p2aligns (map.NUM_SHARDS)  ;   shard_offsets.resize (map.NUM_SHARDS + 1 );   i64 shard_size = map.nbuckets / map.NUM_SHARDS;   tbb::parallel_for ((i64)0 , map.NUM_SHARDS, [&](i64 i) {     struct KeyVal {       std::string_view key;       SectionFragment<E> *val;     };     std::vector<KeyVal> fragments;     fragments.reserve (shard_size);     for  (i64 j = shard_size * i; j < shard_size * (i + 1 ); j++)       if  (SectionFragment<E> &frag = map.values[j]; frag.is_alive)         fragments.push_back ({{map.keys[j], map.key_sizes[j]}, &frag});          tbb::parallel_sort (fragments.begin (), fragments.end (),                        [](const  KeyVal &a, const  KeyVal &b) {       return  std::tuple{(u32)a.val->p2align, a.key.size (), a.key} <              std::tuple{(u32)b.val->p2align, b.key.size (), b.key};     });          i64 offset = 0 ;     i64 p2align = 0 ;     for  (KeyVal &kv : fragments) {       SectionFragment<E> &frag = *kv.val;       offset = align_to (offset, 1  << frag.p2align);       frag.offset = offset;       offset += kv.key.size ();       p2align = std::max<i64>(p2align, frag.p2align);     }     sizes[i] = offset;     max_p2aligns[i] = p2align;     static  Counter merged_strings ("merged_strings" );     merged_strings += fragments.size ();   });   i64 p2align = 0 ;   for  (i64 x : max_p2aligns)     p2align = std::max (p2align, x);   for  (i64 i = 1 ; i < map.NUM_SHARDS + 1 ; i++)     shard_offsets[i] =       align_to (shard_offsets[i - 1 ] + sizes[i - 1 ], 1  << p2align);   tbb::parallel_for ((i64)1 , map.NUM_SHARDS, [&](i64 i) {     for  (i64 j = shard_size * i; j < shard_size * (i + 1 ); j++)       if  (SectionFragment<E> &frag = map.values[j]; frag.is_alive)         frag.offset += shard_offsets[i];   });   this ->shdr.sh_size = shard_offsets[map.NUM_SHARDS];   this ->shdr.sh_addralign = 1  << p2align; } 
assign_offsets主要目的是设置对应MergedSection的section header中的sh_size和sh_addralign
这里的实现首先为了并行计算,将数据划分为了map.NUM_SHARDS个shard块。在每个并行的body中,先构建了对应的KeyVal,之后为了输出的确定性进行排序,最后计算其section fragment的p2aligns,以及将其长度设置为offset的初始值
在这之后算出一个最大的p2align用于设置MergedSection的section header的sh_addralign,以及计算出每一个shard块中fragment的shared_offset,最后将最后一个shard的offset(下标为n的元素,类似于vector的end的位置)作为整个MergedSection的size
Create Synthetic Sections 这里主要创建一些特殊的段
1 2   create_synthetic_sections (ctx); 
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 template  <typename  E>void  create_synthetic_sections (Context<E> &ctx)    auto  push = [&]<typename  T>(T *x) {     ctx.chunks.push_back (x);     ctx.chunk_pool.emplace_back (x);     return  x;   };   if  (!ctx.arg.oformat_binary) {     auto  find = [&](std::string_view name) {       for  (SectionOrder &ord : ctx.arg.section_order)         if  (ord.type == SectionOrder::SECTION && ord.name == name)           return  true ;       return  false ;     };     if  (ctx.arg.section_order.empty () || find ("EHDR" ))       ctx.ehdr = push (new  OutputEhdr<E>(SHF_ALLOC));     else        ctx.ehdr = push (new  OutputEhdr<E>(0 ));     if  (ctx.arg.section_order.empty () || find ("PHDR" ))       ctx.phdr = push (new  OutputPhdr<E>(SHF_ALLOC));     else        ctx.phdr = push (new  OutputPhdr<E>(0 ));     ctx.shdr = push (new  OutputShdr<E>);   }   ctx.got = push (new  GotSection<E>);   if  constexpr  (!is_sparc<E>)      ctx.gotplt  = push (new  GotPltSection<E>);  ctx.reldyn = push (new  RelDynSection<E>);   ctx.relplt = push (new  RelPltSection<E>);   if  (ctx.arg.pack_dyn_relocs_relr)     ctx.relrdyn = push (new  RelrDynSection<E>);   ctx.strtab = push (new  StrtabSection<E>);   ctx.plt = push (new  PltSection<E>);   ctx.pltgot = push (new  PltGotSection<E>);   ctx.symtab = push (new  SymtabSection<E>);   ctx.dynsym = push (new  DynsymSection<E>);   ctx.dynstr = push (new  DynstrSection<E>);   ctx.eh_frame = push (new  EhFrameSection<E>);   ctx.copyrel = push (new  CopyrelSection<E>(false ));   ctx.copyrel_relro = push (new  CopyrelSection<E>(true ));   if  (!ctx.arg.oformat_binary)     ctx.shstrtab = push (new  ShstrtabSection<E>);   if  (!ctx.arg.dynamic_linker.empty ())     ctx.interp = push (new  InterpSection<E>);   if  (ctx.arg.build_id.kind != BuildId::NONE)     ctx.buildid = push (new  BuildIdSection<E>);   if  (ctx.arg.eh_frame_hdr)     ctx.eh_frame_hdr = push (new  EhFrameHdrSection<E>);   if  (ctx.arg.gdb_index)     ctx.gdb_index = push (new  GdbIndexSection<E>);   if  (ctx.arg.z_relro && ctx.arg.section_order.empty () &&       ctx.arg.z_separate_code != SEPARATE_LOADABLE_SEGMENTS)     ctx.relro_padding = push (new  RelroPaddingSection<E>);   if  (ctx.arg.hash_style_sysv)     ctx.hash = push (new  HashSection<E>);   if  (ctx.arg.hash_style_gnu)     ctx.gnu_hash = push (new  GnuHashSection<E>);   if  (!ctx.arg.version_definitions.empty ())     ctx.verdef = push (new  VerdefSection<E>);   if  (ctx.arg.emit_relocs)     ctx.eh_frame_reloc = push (new  EhFrameRelocSection<E>);   if  (ctx.arg.shared || !ctx.dsos.empty () || ctx.arg.pie)     ctx.dynamic = push (new  DynamicSection<E>);   ctx.versym = push (new  VersymSection<E>);   ctx.verneed = push (new  VerneedSection<E>);   ctx.note_package = push (new  NotePackageSection<E>);   ctx.note_property = push (new  NotePropertySection<E>);   if  (ctx.arg.is_static) {     if  constexpr  (is_s390x<E>)        ctx.s390x_tls_get_offset  = push (new  S390XTlsGetOffsetSection);    if  constexpr  (is_sparc<E>)        ctx.sparc_tls_get_addr  = push (new  SparcTlsGetAddrSection);  }   if  constexpr  (std::is_same_v<E, PPC64V1>)      ctx.ppc64_opd  = push (new  PPC64OpdSection);        if  (ctx.dynamic) {     ctx.dynstr->keep ();     ctx.dynsym->keep ();   }   ctx.tls_get_addr = get_symbol (ctx, "__tls_get_addr" );   ctx.tls_get_offset = get_symbol (ctx, "__tls_get_offset" ); } 
在这里其实已经开始创建输出的内容了,因为是直接push到chunk中。在mold中chunk则是表示用于输出的一片区域,关于Chunk类源码中有这样的注释
Chunk represents a contiguous region in an output file.
 
首先是oformat_binary选项控制的EHDR和PHDR。
EHDR和PHDR分别是ELF Header和Program Header
EHDR和PHDR在不指定section_order或者指定的情况下存在对应的section则作为一个ALLOC的chunk加入到chunks中。
之后是添加了一些常见的段,以及各种参数控制的段,不再一一赘述。
最后提一下如果dynamic section存在的话,那么保留dynstr和dynsym段,也就是设置其size为1
1 void  keep ()  this ->shdr.sh_size = 1 ; }
Check Duplicate Symbol 1 2 3 if  (!ctx.arg.allow_multiple_definition)  check_duplicate_symbols (ctx); 
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 template  <typename  E>void  check_duplicate_symbols (Context<E> &ctx)    Timer t (ctx, "check_duplicate_symbols" )  ;   tbb::parallel_for_each (ctx.objs, [&](ObjectFile<E> *file) {     for  (i64 i = file->first_global; i < file->elf_syms.size (); i++) {       const  ElfSym<E> &esym = file->elf_syms[i];       Symbol<E> &sym = *file->symbols[i];              if  (sym.file == file || sym.file == ctx.internal_obj ||           esym.is_undef () || esym.is_common () || (esym.st_bind == STB_WEAK))         continue ;                     if  (!esym.is_abs ()) {         InputSection<E> *isec = file->get_section (esym);         if  (!isec || !isec->is_alive)           continue ;       }       Error (ctx) << "duplicate symbol: "  << *file << ": "  << *sym.file                  << ": "  << sym;     }   });   ctx.checkpoint (); } 
针对所有的obj进行检查,遍历所有的global symbol。
首先通过sym.file ==file 检查符号owner是否为当前文件
1 2 3 4 InputFile<E> *file = nullptr ; 
以及如果是internal_obj中的符号,也进行跳过。剩下的就是可能有冲突的情况,但undef、weak、common的符号冲突不会造成影响,只有重复定义会导致冲突,因此这些情况也进行跳过。
最后跳过在dead section的符号,未满足前面条件的符号则是重复符号
Check Symbol Types 1 2   check_symbol_types (ctx); 
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 template  <typename  E>void  check_symbol_types (Context<E> &ctx)    Timer t (ctx, "check_symbol_types" )  ;   auto  normalize_type = [](u32 type) {     if  (type == STT_GNU_IFUNC)       return  STT_FUNC;     return  type;   };   auto  check = [&](InputFile<E> *file) {     for  (i64 i = file->first_global; i < file->elf_syms.size (); i++) {       const  ElfSym<E> &esym = file->elf_syms[i];       Symbol<E> &sym = *file->symbols[i];       if  (!sym.file)         continue ;       u32 their_type = normalize_type (sym.esym ().st_type);       u32 our_type = normalize_type (esym.st_type);       if  (their_type != STT_NOTYPE && our_type != STT_NOTYPE &&           their_type != our_type)         Warn (ctx) << "symbol type mismatch: "  << sym << '\n'                    << ">>> defined in "  << *sym.file << " as "                    << stt_to_string (sym.esym ().st_type) << '\n'                    << ">>> defined in "  << *file << " as "                    << stt_to_string (esym.st_type);     }   };   tbb::parallel_for_each (ctx.objs, check);   tbb::parallel_for_each (ctx.dsos, check); } 
这里针对的是所有的obj和dso里的所有global_symbol进行检查。检查实际的Symbol和ElfSym中的type是否一致,但这里只是warning,而不像之前重复符号的检查一样直接报错。检查的方式是首先对两者的type进行normalize的操作,之后进行比较,都不为空NOTYPE的情况下判断相等性。我觉得这里更像是一种针对resolve的结果检查,因为一个esym是不会被修改的,只有Symbol引用的esym对象会发生改变。