top | item 40117242

(no title)

nickwanninger | 1 year ago

At the level that LLVM's LTO operates, no information about classes or objects is left, so LLVM itself can't really devirtualize C++ methods in most cases

discuss

order

nwallin|1 year ago

You appear to be correct. Clang does not devirtualize in LTO, but GCC does. Personally I consider this very strange.

     $ cat animal.h cat.cpp main.cpp
    // animal.h
    
    #pragma once
    
    class animal {
     public:
      virtual ~animal() {}
      virtual void speak() = 0;
    };
    
    animal& get_mystery_animal();
    // cat.cpp
    
    #include "animal.h"
    #include <cstdio>
    
    class cat final : public animal {
    public:
      ~cat() override{}
      void speak() override{
        puts("meow");
      }
    };
    
    static cat garfield{};
    
    animal& get_mystery_animal() {
      return garfield;
    }
    // main.cpp
    
    #include "animal.h"
    
    int main() {
      animal& a = get_mystery_animal();
      a.speak();
    }
     $ make clean && CXX=clang++ make -j && objdump --disassemble=main -C lto_test
    rm -f *.o lto_test
    clang++ -c -flto -O3 -g cat.cpp -o cat.o
    clang++ -c -flto -O3 -g main.cpp -o main.o
    clang++ -flto -O3 -g cat.o main.o -o lto_test
    
    lto_test:     file format elf64-x86-64
    
    
    Disassembly of section .init:
    
    Disassembly of section .plt:
    
    Disassembly of section .plt.got:
    
    Disassembly of section .text:
    
    00000000000011b0 <main>:
        11b0: 50                    push   %rax
        11b1: 48 8b 05 58 2e 00 00  mov    0x2e58(%rip),%rax        # 4010 <garfield>
        11b8: 48 8d 3d 51 2e 00 00  lea    0x2e51(%rip),%rdi        # 4010 <garfield>
        11bf: ff 50 10              call   *0x10(%rax)
        11c2: 31 c0                 xor    %eax,%eax
        11c4: 59                    pop    %rcx
        11c5: c3                    ret
    
    Disassembly of section .fini:
     $ make clean && CXX=g++ make -j && objdump --disassemble=main -C lto_test|sed -e 's,^,    ,'
    rm -f *.o lto_test
    g++ -c -flto -O3 -g cat.cpp -o cat.o
    g++ -c -flto -O3 -g main.cpp -o main.o
    g++ -flto -O3 -g cat.o main.o -o lto_test
    
    lto_test:     file format elf64-x86-64
    
    
    Disassembly of section .init:
    
    Disassembly of section .plt:
    
    Disassembly of section .plt.got:
    
    Disassembly of section .text:
    
    0000000000001090 <main>:
        1090: 48 83 ec 08           sub    $0x8,%rsp
        1094: 48 8d 3d 75 2f 00 00  lea    0x2f75(%rip),%rdi        # 4010 <garfield>
        109b: e8 50 01 00 00        call   11f0 <cat::speak()>
        10a0: 31 c0                 xor    %eax,%eax
        10a2: 48 83 c4 08           add    $0x8,%rsp
        10a6: c3                    ret
    
    Disassembly of section .fini:

JonChesterfield|1 year ago

I think this is a bug. There's dedicated metadata that's supposed to end up on the indirect call to list the possible targets and when that list of possible targets is this short it should be turning into a switch over concrete targets. Don't have time to dig into the IR now but it might be worth posting to the github llvm issues.

ranger_danger|1 year ago

What if you add -fwhole-program-vtables on clang?