0%

c++ inline测试

总结

  • 测试cpo与tag_invoke时, 反汇编发现有些理应inline的函数, 并没有真的inline, 于是打算测试下inline的机制
  • gcc文档: https://gcc.gnu.org/onlinedocs/gcc/Inline.html#Inline
  • 总的来说, inline只是标记, 让编译器可以的话进行内联, 但是具体是否内联, 还是要看编译器决定
  • 我们就没有办法自己定了吗?
  • 一般编译时, 会通过-O0 -O1 -O3 之类的编译选项来控制
  • 通过gcc -Q --help=optimizers -O3命令 我们可以看到不同的编译选项下的开关
  • 当然, 也是可以在编译时强制指定编译选项
  • 也可以用inline void foo (const char) __attribute__((always_inline));格式来强制内联

编译选项

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# gcc -Q --help=optimizers -O3|grep -i inline
-ffold-simple-inlines [available in C++, ObjC++]
-finline [enabled]
-finline-atomics [enabled]
-finline-functions [enabled]
-finline-functions-called-once [enabled]
-finline-small-functions [enabled]
-flive-patching -flive-patching=inline-clone
-flive-patching=[inline-only-static|inline-clone] [default]

# gcc -Q --help=optimizers -O0|grep -i inline
-ffold-simple-inlines [available in C++, ObjC++]
-finline [disabled]
-finline-atomics [enabled]
-finline-functions [disabled]
-finline-functions-called-once [disabled]
-finline-small-functions [disabled]
-flive-patching -flive-patching=inline-clone
-flive-patching=[inline-only-static|inline-clone] [default]

测试代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#include <cstdio>

#if true
#define print_i printf("%d", i);
#else
#define print_i
#endif
#define BODY() \
int i = 0; \
i += 10; \
print_i

void no_inline_func() { BODY() }
inline void inline_func() { BODY() }
class Holder {
public:
void no_inline_class_func() { BODY() }
inline void inline_class_func() { BODY() }
template <class T>
void template_no_inline_class_func() {
BODY()
}
template <class T>
inline void template_inline_class_func() {
BODY()
}
};
int main(int argc, char **argv) {
no_inline_func();
inline_func();
Holder h{};
h.no_inline_class_func();
h.inline_class_func();
h.template_no_inline_class_func<int>();
h.template_inline_class_func<int>();

return 0;
}

反汇编

-O3选项下, 基本开启了全部内联选项, 和代码裁减

  • 关闭print, BODY()的内容都可以裁掉了, i没有被用到, 属于无用分支
  • 基本被裁减干净了
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
0000000000001040 <main>:
1040: f3 0f 1e fa endbr64
1044: 48 83 ec 08 sub $0x8,%rsp
1048: e8 13 01 00 00 call 1160 <_Z14no_inline_funcv>
104d: 31 c0 xor %eax,%eax
104f: 48 83 c4 08 add $0x8,%rsp
1053: c3 ret
1054: 66 2e 0f 1f 84 00 00 cs nopw 0x0(%rax,%rax,1)
105b: 00 00 00
105e: 66 90 xchg %ax,%ax

0000000000001160 <_Z14no_inline_funcv>:
1160: f3 0f 1e fa endbr64
1164: c3 ret

  • 开启print 也基本都内联了
  • 除了no_inline_func外, 全部原地展开了
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
00000000000011e0 <_Z14no_inline_funcv>:
11e0: f3 0f 1e fa endbr64
11e4: ba 0a 00 00 00 mov $0xa,%edx
11e9: 48 8d 35 14 0e 00 00 lea 0xe14(%rip),%rsi # 2004 <_IO_stdin_used+0x4>
11f0: bf 01 00 00 00 mov $0x1,%edi
11f5: 31 c0 xor %eax,%eax
11f7: e9 54 fe ff ff jmp 1050 <__printf_chk@plt>
0000000000001060 <main>:
1060: f3 0f 1e fa endbr64
1064: 53 push %rbx
1065: 48 8d 1d 98 0f 00 00 lea 0xf98(%rip),%rbx # 2004 <_IO_stdin_used+0x4>
106c: e8 6f 01 00 00 call 11e0 <_Z14no_inline_funcv>
1071: 48 89 de mov %rbx,%rsi
1074: ba 0a 00 00 00 mov $0xa,%edx
1079: 31 c0 xor %eax,%eax
107b: bf 01 00 00 00 mov $0x1,%edi
1080: e8 cb ff ff ff call 1050 <__printf_chk@plt>
1085: 48 89 de mov %rbx,%rsi
1088: ba 0a 00 00 00 mov $0xa,%edx
108d: 31 c0 xor %eax,%eax
108f: bf 01 00 00 00 mov $0x1,%edi
1094: e8 b7 ff ff ff call 1050 <__printf_chk@plt>
1099: 48 89 de mov %rbx,%rsi
109c: ba 0a 00 00 00 mov $0xa,%edx
10a1: 31 c0 xor %eax,%eax
10a3: bf 01 00 00 00 mov $0x1,%edi
10a8: e8 a3 ff ff ff call 1050 <__printf_chk@plt>
10ad: 48 89 de mov %rbx,%rsi
10b0: ba 0a 00 00 00 mov $0xa,%edx
10b5: 31 c0 xor %eax,%eax
10b7: bf 01 00 00 00 mov $0x1,%edi
10bc: e8 8f ff ff ff call 1050 <__printf_chk@plt>
10c1: 48 89 de mov %rbx,%rsi
10c4: ba 0a 00 00 00 mov $0xa,%edx
10c9: 31 c0 xor %eax,%eax
10cb: bf 01 00 00 00 mov $0x1,%edi
10d0: e8 7b ff ff ff call 1050 <__printf_chk@plt>
10d5: 31 c0 xor %eax,%eax
10d7: 5b pop %rbx
10d8: c3 ret
10d9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)

-O0选项下

  • 基本关掉了所有内联选项
  • 无论是否是有需要的分支, 全部编译到目标中了
  • 这里对于没有内联的函数, 只保留了一个, 其他的都相似, 省略掉了
  • 开启打印
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
000000000000128a <_ZN6Holder17inline_class_funcEv>:
128a: f3 0f 1e fa endbr64
128e: 55 push %rbp
128f: 48 89 e5 mov %rsp,%rbp
1292: 48 83 ec 20 sub $0x20,%rsp
1296: 48 89 7d e8 mov %rdi,-0x18(%rbp)
129a: c7 45 fc 00 00 00 00 movl $0x0,-0x4(%rbp)
12a1: 83 45 fc 0a addl $0xa,-0x4(%rbp)
12a5: 8b 45 fc mov -0x4(%rbp),%eax
12a8: 89 c6 mov %eax,%esi
12aa: 48 8d 05 53 0d 00 00 lea 0xd53(%rip),%rax # 2004 <_IO_stdin_used+0x4>
12b1: 48 89 c7 mov %rax,%rdi
12b4: b8 00 00 00 00 mov $0x0,%eax
12b9: e8 b2 fd ff ff call 1070 <printf@plt>
12be: 90 nop
12bf: c9 leave
12c0: c3 ret
12c1: 90 nop

00000000000011a7 <main>:
11a7: f3 0f 1e fa endbr64
11ab: 55 push %rbp
11ac: 48 89 e5 mov %rsp,%rbp
11af: 48 83 ec 20 sub $0x20,%rsp
11b3: 89 7d ec mov %edi,-0x14(%rbp)
11b6: 48 89 75 e0 mov %rsi,-0x20(%rbp)
11ba: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax
11c1: 00 00
11c3: 48 89 45 f8 mov %rax,-0x8(%rbp)
11c7: 31 c0 xor %eax,%eax
11c9: e8 a6 ff ff ff call 1174 <_Z14no_inline_funcv>
11ce: e8 4b 00 00 00 call 121e <_Z11inline_funcv>
11d3: 48 8d 45 f7 lea -0x9(%rbp),%rax
11d7: 48 89 c7 mov %rax,%rdi
11da: e8 73 00 00 00 call 1252 <_ZN6Holder20no_inline_class_funcEv>
11df: 48 8d 45 f7 lea -0x9(%rbp),%rax
11e3: 48 89 c7 mov %rax,%rdi
11e6: e8 9f 00 00 00 call 128a <_ZN6Holder17inline_class_funcEv>
11eb: 48 8d 45 f7 lea -0x9(%rbp),%rax
11ef: 48 89 c7 mov %rax,%rdi
11f2: e8 cb 00 00 00 call 12c2 <_ZN6Holder29template_no_inline_class_funcIiEEvv>
11f7: 48 8d 45 f7 lea -0x9(%rbp),%rax
11fb: 48 89 c7 mov %rax,%rdi
11fe: e8 f7 00 00 00 call 12fa <_ZN6Holder26template_inline_class_funcIiEEvv>
1203: b8 00 00 00 00 mov $0x0,%eax
1208: 48 8b 55 f8 mov -0x8(%rbp),%rdx
120c: 64 48 2b 14 25 28 00 sub %fs:0x28,%rdx
1213: 00 00
1215: 74 05 je 121c <main+0x75>
1217: e8 44 fe ff ff call 1060 <__stack_chk_fail@plt>
121c: c9 leave
121d: c3 ret
  • 关闭打印
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
0000000000001206 <_ZN6Holder17inline_class_funcEv>:
1206: f3 0f 1e fa endbr64
120a: 55 push %rbp
120b: 48 89 e5 mov %rsp,%rbp
120e: 48 89 7d e8 mov %rdi,-0x18(%rbp)
1212: c7 45 fc 00 00 00 00 movl $0x0,-0x4(%rbp)
1219: 83 45 fc 0a addl $0xa,-0x4(%rbp)
121d: 90 nop
121e: 5d pop %rbp
121f: c3 ret

000000000000115f <main>:
115f: f3 0f 1e fa endbr64
1163: 55 push %rbp
1164: 48 89 e5 mov %rsp,%rbp
1167: 48 83 ec 20 sub $0x20,%rsp
116b: 89 7d ec mov %edi,-0x14(%rbp)
116e: 48 89 75 e0 mov %rsi,-0x20(%rbp)
1172: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax
1179: 00 00
117b: 48 89 45 f8 mov %rax,-0x8(%rbp)
117f: 31 c0 xor %eax,%eax
1181: e8 c3 ff ff ff call 1149 <_Z14no_inline_funcv>
1186: e8 4b 00 00 00 call 11d6 <_Z11inline_funcv>
118b: 48 8d 45 f7 lea -0x9(%rbp),%rax
118f: 48 89 c7 mov %rax,%rdi
1192: e8 55 00 00 00 call 11ec <_ZN6Holder20no_inline_class_funcEv>
1197: 48 8d 45 f7 lea -0x9(%rbp),%rax
119b: 48 89 c7 mov %rax,%rdi
119e: e8 63 00 00 00 call 1206 <_ZN6Holder17inline_class_funcEv>
11a3: 48 8d 45 f7 lea -0x9(%rbp),%rax
11a7: 48 89 c7 mov %rax,%rdi
11aa: e8 71 00 00 00 call 1220 <_ZN6Holder29template_no_inline_class_funcIiEEvv>
11af: 48 8d 45 f7 lea -0x9(%rbp),%rax
11b3: 48 89 c7 mov %rax,%rdi
11b6: e8 7f 00 00 00 call 123a <_ZN6Holder26template_inline_class_funcIiEEvv>
11bb: b8 00 00 00 00 mov $0x0,%eax
11c0: 48 8b 55 f8 mov -0x8(%rbp),%rdx
11c4: 64 48 2b 14 25 28 00 sub %fs:0x28,%rdx
11cb: 00 00
11cd: 74 05 je 11d4 <main+0x75>
11cf: e8 7c fe ff ff call 1050 <__stack_chk_fail@plt>
11d4: c9 leave
11d5: c3 ret