+ -
当前位置:首页 → 问答吧 → strcpy优化问题

strcpy优化问题

时间:2011-09-25

来源:互联网

我捣鼓了半天汇编,优化的还没c优化的版本快,郁闷。。。。感觉lodsl 比 movl + add 还慢。。。哎。。。
看反汇编后的字节吗,我用汇编优化的明明更加精简,可就是慢,不知到为什么 。。。


char* strcpy(char* s1, char const* s2)
{
char* s = s1;
if (s1 == s2) return s;

while (1) 
{
if (!(s1[0] = s2[0])) break;
if (!(s1[1] = s2[1])) break;
if (!(s1[2] = s2[2])) break;
if (!(s1[3] = s2[3])) break;
s1 += 4;
s2 += 4;
}

return s;
}


char* strcpy(char* s1, char const* s2)
{
tb_size_t edi, esi, eax;
__tb_asm__ __tb_volatile__
(
  // 我尝试在前面加入地址对齐处理,感觉没效果,就去了
"1:\n"
" movl (%%esi), %%eax\n" // 之前 lodsl 更慢
" add $4, %%esi\n"
" movl %%eax, %%edx\n"
" testb %%dl, %%dl\n"
" je 2f\n"
" shr $8, %%edx\n"
" testb %%dl, %%dl\n"
" je 2f\n"
" shr $8, %%edx\n"
" testb %%dl, %%dl\n"
" je 2f\n"
" shr $8, %%edx\n"
" testb %%dl, %%dl\n"
" je 2f\n"
" stosl\n"
" jmp 1b\n"
"2:\n"
" stosb\n"
" testb %%al, %%al\n"
" je 3f\n"
" shr $8, %%eax\n"
" jmp 2b\n"
"3:\n"


: "=&S" (esi), "=&D" (edi)
: "0" (s2), "1" (s1) 
: "memory", "eax", "edx"
);
return s1;
}

////////////////////////////////////////////////////////////
两者的反汇编河字节吗:
C:
00000000 <tb_strcpy>:
  0: 53 push %ebx
  1: 8b 4c 24 0c mov 0xc(%esp),%ecx
  5: 8b 44 24 08 mov 0x8(%esp),%eax
  9: 85 c9 test %ecx,%ecx
  b: 74 7b je 88 <tb_strcpy+0x88>
  d: 85 c0 test %eax,%eax
  f: 74 77 je 88 <tb_strcpy+0x88>
  11: 39 c8 cmp %ecx,%eax
  13: 74 6a je 7f <tb_strcpy+0x7f>
  15: 0f b6 11 movzbl (%ecx),%edx
  18: 88 10 mov %dl,(%eax)
  1a: 84 d2 test %dl,%dl
  1c: 74 61 je 7f <tb_strcpy+0x7f>
  1e: 0f b6 51 01 movzbl 0x1(%ecx),%edx
  22: 88 50 01 mov %dl,0x1(%eax)
  25: 84 d2 test %dl,%dl
  27: 74 56 je 7f <tb_strcpy+0x7f>
  29: 0f b6 51 02 movzbl 0x2(%ecx),%edx
  2d: 88 50 02 mov %dl,0x2(%eax)
  30: 84 d2 test %dl,%dl
  32: 74 4b je 7f <tb_strcpy+0x7f>
  34: 0f b6 59 03 movzbl 0x3(%ecx),%ebx
  38: 31 d2 xor %edx,%edx
  3a: 88 58 03 mov %bl,0x3(%eax)
  3d: 84 db test %bl,%bl
  3f: 75 31 jne 72 <tb_strcpy+0x72>
  41: eb 3c jmp 7f <tb_strcpy+0x7f>
  43: 90 nop
  44: 8d 74 26 00 lea 0x0(%esi,%eiz,1),%esi
  48: 0f b6 5c 11 05 movzbl 0x5(%ecx,%edx,1),%ebx
  4d: 88 5c 10 05 mov %bl,0x5(%eax,%edx,1)
  51: 84 db test %bl,%bl
  53: 74 2a je 7f <tb_strcpy+0x7f>
  55: 0f b6 5c 11 06 movzbl 0x6(%ecx,%edx,1),%ebx
  5a: 88 5c 10 06 mov %bl,0x6(%eax,%edx,1)
  5e: 84 db test %bl,%bl
  60: 74 1d je 7f <tb_strcpy+0x7f>
  62: 0f b6 5c 11 07 movzbl 0x7(%ecx,%edx,1),%ebx
  67: 88 5c 10 07 mov %bl,0x7(%eax,%edx,1)
  6b: 83 c2 04 add $0x4,%edx
  6e: 84 db test %bl,%bl
  70: 74 0d je 7f <tb_strcpy+0x7f>
  72: 0f b6 5c 11 04 movzbl 0x4(%ecx,%edx,1),%ebx
  77: 88 5c 10 04 mov %bl,0x4(%eax,%edx,1)
  7b: 84 db test %bl,%bl
  7d: 75 c9 jne 48 <tb_strcpy+0x48>
  7f: 5b pop %ebx
  80: c3 ret  
  81: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi
  88: 31 c0 xor %eax,%eax
  8a: 5b pop %ebx
  8b: c3 ret  


asm:
00000000 <tb_strcpy>:
  0: 83 ec 08 sub $0x8,%esp
  3: 89 34 24 mov %esi,(%esp)
  6: 89 7c 24 04 mov %edi,0x4(%esp)
  a: 8b 74 24 10 mov 0x10(%esp),%esi
  e: 8b 4c 24 0c mov 0xc(%esp),%ecx
  12: 85 f6 test %esi,%esi
  14: 74 4a je 60 <tb_strcpy+0x60>
  16: 85 c9 test %ecx,%ecx
  18: 74 46 je 60 <tb_strcpy+0x60>
  1a: 89 cf mov %ecx,%edi
  1c: 8b 06 mov (%esi),%eax
  1e: 83 c6 04 add $0x4,%esi
  21: 89 c2 mov %eax,%edx
  23: 84 d2 test %dl,%dl
  25: 74 18 je 3f <tb_strcpy+0x3f>
  27: c1 ea 08 shr $0x8,%edx
  2a: 84 d2 test %dl,%dl
  2c: 74 11 je 3f <tb_strcpy+0x3f>
  2e: c1 ea 08 shr $0x8,%edx
  31: 84 d2 test %dl,%dl
  33: 74 0a je 3f <tb_strcpy+0x3f>
  35: c1 ea 08 shr $0x8,%edx
  38: 84 d2 test %dl,%dl
  3a: 74 03 je 3f <tb_strcpy+0x3f>
  3c: ab stos %eax,%es:(%edi)
  3d: eb dd jmp 1c <tb_strcpy+0x1c>
  3f: aa stos %al,%es:(%edi)
  40: 84 c0 test %al,%al
  42: 74 05 je 49 <tb_strcpy+0x49>
  44: c1 e8 08 shr $0x8,%eax
  47: eb f6 jmp 3f <tb_strcpy+0x3f>
  49: 89 c8 mov %ecx,%eax
  4b: 8b 34 24 mov (%esp),%esi
  4e: 8b 7c 24 04 mov 0x4(%esp),%edi
  52: 83 c4 08 add $0x8,%esp
  55: c3 ret  
  56: 8d 76 00 lea 0x0(%esi),%esi
  59: 8d bc 27 00 00 00 00 lea 0x0(%edi,%eiz,1),%edi
  60: 31 c9 xor %ecx,%ecx
  62: 8b 34 24 mov (%esp),%esi
  65: 89 c8 mov %ecx,%eax
  67: 8b 7c 24 04 mov 0x4(%esp),%edi
  6b: 83 c4 08 add $0x8,%esp
  6e: c3 ret  

作者: waruqi   发布时间: 2011-09-25

现在的编译器的优化已经不错了

作者: MSOKD   发布时间: 2011-09-25