C语言中自加与自减效率的思考

原帖地址：http://out.bitunion.org/thread-10461486-1-2.html

在正常的C语言中，使用

cpp 代码 [复制到剪贴板]

for (val = 0; val < num; val++)
for (val = num; val > 0; val--)

从表面上看是一样的，通常我们的目的是使循环进行num次。然而在经典的C语言效率的讨论中，往往会有人说“第一种效率较第二种而言较低”。针对这一问题，昨天试图在C99手册上搜索相关说明，但是没有找到。因此我考虑这种效率的差异是由机器执行过程中产生的。至此，我决定通过反汇编的方式分析其中的差异。

下面我们进行测试。
首先编写一段测试代码，内容较为随意，只要应用到for循环就好了。我使用的是累加...

正常编译，查看结果等就不罗嗦了。下面贴出自加与自减两段代码的汇编部分：

代码:

[复制到剪贴板]

00000000 <test_addl>:
0: 55 push %ebp
1: 89 e5 mov %esp,%ebp
3: 83 ec 10 sub $0x10,%esp
6: c7 45 fc 00 00 00 00 movl $0x0,-0x4(%ebp)
d: c7 45 f8 00 00 00 00 movl $0x0,-0x8(%ebp)
14: c7 45 fc 00 00 00 00 movl $0x0,-0x4(%ebp)
1b: eb 0a jmp 27 <test_addl+0x27>
1d: 8b 45 fc mov -0x4(%ebp),%eax
20: 01 45 f8 add %eax,-0x8(%ebp)
23: 83 45 fc 01 addl $0x1,-0x4(%ebp)
27: 8b 45 fc mov -0x4(%ebp),%eax
2a: 3b 45 08 cmp 0x8(%ebp),%eax
2d: 7c ee jl 1d <test_addl+0x1d>
2f: 8b 45 f8 mov -0x8(%ebp),%eax
32: c9 leave
33: c3 ret
00000034 <test_subl>:
34: 55 push %ebp
35: 89 e5 mov %esp,%ebp
37: 83 ec 10 sub $0x10,%esp
3a: c7 45 fc 00 00 00 00 movl $0x0,-0x4(%ebp)
41: c7 45 f8 00 00 00 00 movl $0x0,-0x8(%ebp)
48: 8b 45 08 mov 0x8(%ebp),%eax
4b: 89 45 fc mov %eax,-0x4(%ebp)
4e: eb 0a jmp 5a <test_subl+0x26>
50: 8b 45 fc mov -0x4(%ebp),%eax
53: 01 45 f8 add %eax,-0x8(%ebp)
56: 83 6d fc 01 subl $0x1,-0x4(%ebp)
5a: 83 7d fc 00 cmpl $0x0,-0x4(%ebp)
5e: 7f f0 jg 50 <test_subl+0x1c>
60: 8b 45 f8 mov -0x8(%ebp),%eax
63: c9 leave
64: c3 ret

代码:

[复制到剪贴板]

00000000 <test_addl>:
0: 55 push %ebp
1: 31 c0 xor %eax,%eax
3: 89 e5 mov %esp,%ebp
5: 31 d2 xor %edx,%edx
7: 8b 4d 08 mov 0x8(%ebp),%ecx
a: 85 c9 test %ecx,%ecx
c: 7e 0b jle 19 <test_addl+0x19>
e: 66 90 xchg %ax,%ax
10: 01 d0 add %edx,%eax
12: 83 c2 01 add $0x1,%edx
15: 39 ca cmp %ecx,%edx
17: 75 f7 jne 10 <test_addl+0x10>
19: 5d pop %ebp
1a: c3 ret
1b: 90 nop
1c: 8d 74 26 00 lea 0x0(%esi,%eiz,1),%esi
00000020 <test_subl>:
20: 55 push %ebp
21: 31 c0 xor %eax,%eax
23: 89 e5 mov %esp,%ebp
25: 8b 55 08 mov 0x8(%ebp),%edx
28: 85 d2 test %edx,%edx
2a: 7e 0b jle 37 <test_subl+0x17>
2c: 8d 74 26 00 lea 0x0(%esi,%eiz,1),%esi
30: 01 d0 add %edx,%eax
32: 83 ea 01 sub $0x1,%edx
35: 75 f9 jne 30 <test_subl+0x10>
37: 5d pop %ebp
38: c3 ret
39: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi

上述两部分编译不一样...第二段使用-O2优化了一下...

对比一下，就能看出问题了...自减是不需要判断的，并且节约代码...
产生原因很纠结...

查阅《ARM体系结构与编程》，书中间接指出了原因：
在执行sub时，程序状态寄存器可以直接通过Z标志位对结果判断是否为0（C标志位判断溢出），从而判断循环是否结束。而在执行add时，程序状态寄存器仅仅可以通过C标志位判断是否有溢出。所以导致自加过程中需要指令mov

posted on 2012-07-22 20:03 百万军中阅读(656) 评论(0) 收藏举报