-
即使声明
volatile,GCC编译器仍然可能跨过asm volatile做其他指令的优化具体而言看下面这个例子
#include <iostream> int main() { int a = 0x1234; //声明变量a //连续的 asm volatile 语句 asm volatile ("nop\n\t"); asm volatile ("nop\n\t"); asm volatile ("nop\n\t"); int b = 0x2345; //声明变量b int c = a+b; //使用a+b声明变量c printf("%d \n", c); //输出c return 0; }在O0情况下仅编译和汇编得到.o文件 ,然后反汇编
[ws@eos test]$ gcc -O0 nop.cpp -c [ws@eos test]$ objdump -d -C nop.o nop.o: 文件格式 elf64-x86-64 Disassembly of section .text:0000000000000000 <main>: 0: 55 push %rbp 1: 48 89 e5 mov %rsp,%rbp 4: 48 83 ec 10 sub $0x10,%rsp 8: c7 45 f4 34 12 00 00 movl $0x1234,-0xc(%rbp) # 声明变量 a = 0x1234 f: 90 nop 10: 90 nop 11: 90 nop 12: c7 45 f8 45 23 00 00 movl $0x2345,-0x8(%rbp) # 声明变量 b = 0x2345 19: 8b 55 f4 mov -0xc(%rbp),%edx 1c: 8b 45 f8 mov -0x8(%rbp),%eax 1f: 01 d0 add %edx,%eax # 计算 a+b = 0x3579 21: 89 45 fc mov %eax,-0x4(%rbp) # c = a+b 24: 8b 45 fc mov -0x4(%rbp),%eax 27: 89 c6 mov %eax,%esi # 将c放到%esi寄存器 29: 48 8d 05 00 00 00 00 lea 0x0(%rip),%rax # 30 <main+0x30> 30: 48 89 c7 mov %rax,%rdi 33: b8 00 00 00 00 mov $0x0,%eax 38: e8 00 00 00 00 call 3d <main+0x3d> 3d: b8 00 00 00 00 mov $0x0,%eax 42: c9 leave 43: c3 ret但如果是在O1及以上情况下编译和汇编得到.o文件 ,然后反汇编
[ws@eos test]$ gcc -O1 nop.cpp -c [ws@eos test]$ objdump -d -C nop.o nop.o: 文件格式 elf64-x86-64 Disassembly of section .text:0000000000000000 <main>: 0: 48 83 ec 08 sub $0x8,%rsp 4: 90 nop 5: 90 nop 6: 90 nop 7: be 79 35 00 00 mov $0x3579,%esi # 直接将0x3579放到%esi寄存器 c: 48 8d 3d 00 00 00 00 lea 0x0(%rip),%rdi # 13 <main+0x13> 13: b8 00 00 00 00 mov $0x0,%eax 18: e8 00 00 00 00 call 1d <main+0x1d> 1d: b8 00 00 00 00 mov $0x0,%eax 22: 48 83 c4 08 add $0x8,%rsp 26: c3 ret仅就该例子而言,在O0情况下,汇编代码是先声明变量a,接着是内联汇编volatile序列,然后才是声明变量b, 计算c=a+b等等 但在O1及以上情况下,汇编代码会直接优化到:先是内联汇编volatile序列,然后直接将c的结果放到目标寄存器里
我不确定这是否算“跨过”内联汇编volatile序列,如果是强迫编译器将分开看序列前后的2部分,编译器是做不到这种优化的,但从结果上来说,编译器只是删除(delete)了序列前的一些声明变量a的代码,并没有跨序列插入或者reorder
-
GCC文档中 有关编译器可能相对于其他代码移动
volatile asm指令的节选同样来自文档
Extended Asm (Using the GNU Compiler Collection (GCC))
Note that the compiler can move even
volatile asminstructions relative to other code, including across jump instructions. For example, on many targets there is a system register that controls the rounding mode of floating-point operations. Setting it with avolatile asmstatement, as in the following PowerPC example, does not work reliably. 请注意,编译器甚至可以相对于其他代码移动volatile asm指令,包括跨过跳转指令。例如,在许多目标上,有一个系统寄存器来控制浮点运算的舍入模式。使用volatile asm语句设置它(如以下 PowerPC 示例中所示)无法可靠地工作。asm volatile("mtfsf 255, %0" : : "f" (fpenv)); sum = x + y;The compiler may move the addition back before the
volatile asmstatement. To make it work as expected, add an artificial dependency to theasmby referencing a variable in the subsequent code, for example: 编译器可以将加法操作移到volatile asm语句之前。要使其按预期工作,请通过在后续代码中引用变量来向asm添加人工依赖项,例如:asm volatile ("mtfsf 255,%1" : "=X" (sum) : "f" (fpenv)); sum = x + y;但仅就文档中提到的这个在PowerPC架构上实现的例子而言,我没有条件进行复现。
而且其实这个例子描述还是停留在C语言层面,比较抽象(所谓的将加法操作移到
volatile asm语句之前具体是个什么表现),也有待商榷,如果最终的汇编不是直接移动汇编指令add, 而是将相当于 add 的步骤合并到了另一个在volatile asm语句之前的汇编指令中,符合这一表述吗?在接下来的实验例子,出现了类似的现象,只不过是将
volatile asm之前赋值操作对应的汇编指令,与volatile asm之后加法操作对应的汇编指令,合并到了volatile asm之后一个lea汇编指令中。 -
其他指令相对做跨
asm volatile优化的另一个例子#include <iostream> int main() { int a; scanf("%d",&a); int b = 0x2345; asm volatile ("nop\n\t"); asm volatile ("nop\n\t"); asm volatile ("nop\n\t"); int c = a + b; printf("%d \n", c); return 0; }在该例子中,严格来说
int b = 0x2345在asm volatile前声明并赋值,O0条件下编译也符合这一描述。但在O1及以上优化后的编译汇编代码中,可以看到立即数
0x2345作为lea汇编命令的参数出现在asm volatile之后。O0
0000000000000000 <main>: 0: 55 push %rbp 1: 48 89 e5 mov %rsp,%rbp 4: 48 83 ec 20 sub $0x20,%rsp 8: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax f: 00 00 11: 48 89 45 f8 mov %rax,-0x8(%rbp) 15: 31 c0 xor %eax,%eax 17: 48 8d 45 ec lea -0x14(%rbp),%rax 1b: 48 89 c6 mov %rax,%rsi 1e: 48 8d 05 00 00 00 00 lea 0x0(%rip),%rax # 25 <main+0x25> 25: 48 89 c7 mov %rax,%rdi 28: b8 00 00 00 00 mov $0x0,%eax 2d: e8 00 00 00 00 call 32 <main+0x32> 32: c7 45 f0 45 23 00 00 movl $0x2345,-0x10(%rbp) 39: 90 nop 3a: 90 nop 3b: 90 nop 3c: 8b 55 ec mov -0x14(%rbp),%edx 3f: 8b 45 f0 mov -0x10(%rbp),%eax 42: 01 d0 add %edx,%eax 44: 89 45 f4 mov %eax,-0xc(%rbp) 47: 8b 45 f4 mov -0xc(%rbp),%eax 4a: 89 c6 mov %eax,%esi 4c: 48 8d 05 00 00 00 00 lea 0x0(%rip),%rax # 53 <main+0x53> 53: 48 89 c7 mov %rax,%rdi 56: b8 00 00 00 00 mov $0x0,%eax 5b: e8 00 00 00 00 call 60 <main+0x60> 60: b8 00 00 00 00 mov $0x0,%eax 65: 48 8b 55 f8 mov -0x8(%rbp),%rdx 69: 64 48 2b 14 25 28 00 sub %fs:0x28,%rdx 70: 00 00 72: 74 05 je 79 <main+0x79> 74: e8 00 00 00 00 call 79 <main+0x79> 79: c9 leave 7a: c3 ret具体而言,O0条件下可以清楚地看到变量b在
asm volatile前声明并赋值给-0x10(%rbp),在asm volatile后有add %edx, %eax对应c = a + b,最后才是将c值存储到%esi寄存器中,作为函数参数调用输出函数输出c值O1
0000000000000000 <main>: 0: 48 83 ec 18 sub $0x18,%rsp 4: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax b: 00 00 d: 48 89 44 24 08 mov %rax,0x8(%rsp) 12: 31 c0 xor %eax,%eax 14: 48 8d 74 24 04 lea 0x4(%rsp),%rsi 19: 48 8d 3d 00 00 00 00 lea 0x0(%rip),%rdi # 20 <main+0x20> 20: e8 00 00 00 00 call 25 <main+0x25> 25: 90 nop 26: 90 nop 27: 90 nop 28: 8b 44 24 04 mov 0x4(%rsp),%eax 2c: 8d b0 45 23 00 00 lea 0x2345(%rax),%esi 32: 48 8d 3d 00 00 00 00 lea 0x0(%rip),%rdi # 39 <main+0x39> 39: b8 00 00 00 00 mov $0x0,%eax 3e: e8 00 00 00 00 call 43 <main+0x43> 43: 48 8b 44 24 08 mov 0x8(%rsp),%rax 48: 64 48 2b 04 25 28 00 sub %fs:0x28,%rax 4f: 00 00 51: 75 0a jne 5d <main+0x5d> 53: b8 00 00 00 00 mov $0x0,%eax 58: 48 83 c4 18 add $0x18,%rsp 5c: c3 ret 5d: e8 00 00 00 00 call 62 <.LC1+0x5f>然而在O1条件下,直接优化到了在
asm volatile后有lea 0x2345(%rax),%esi对应 然后%esi和%rdi作为存储函数参数的寄存器,调用函数输出变量c的值