当前位置：首页 → 问答吧 → IA64,SUSE10,生成汇编语言的奇怪问题

IA64,SUSE10,生成汇编语言的奇怪问题

时间：2010-06-22

来源：互联网

本帖最后由 yulihua49 于 2010-06-22 16:00 编辑

sdbc@jgbticket:~/rsa> cat mula.c
mula(long a,long b,long c[2])
{
long d;
d=a*b;
c[0]=d;
}
sdbc@jgbticket:~/rsa> cc -m32 -S mula.c
sdbc@jgbticket:~/rsa> cat mula.s
.file "mula.c"
.text
.globl mula
.type mula, @function
mula:
pushl %ebp
movl %esp, %ebp
subl $16, %esp
movl 8(%ebp), %eax
imull 12(%ebp), %eax
movl %eax, -4(%ebp)
movl 16(%ebp), %edx
movl -4(%ebp), %eax
movl %eax, (%edx)
leave
ret
.size mula, .-mula
.ident "GCC: (GNU) 4.1.2 20070115 (prerelease) (SUSE Linux)"
.section .note.GNU-stack,"",@progbits
sdbc@jgbticket:~/rsa> cc -S mula.c
sdbc@jgbticket:~/rsa> cat mula.s
.file "mula.c"
.text
.globl mula
.type mula, @function
mula:
.LFB2:
pushq %rbp
.LCFI0:
movq %rsp, %rbp
.LCFI1:
movq %rdi, -24(%rbp)
movq %rsi, -32(%rbp)
movq %rdx, -40(%rbp)
movq -24(%rbp), %rax
imulq -32(%rbp), %rax
movq %rax, -8(%rbp)
movq -40(%rbp), %rdx
movq -8(%rbp), %rax
movq %rax, (%rdx)
leave
ret
.LFE2:
.size mula, .-mula
.section .eh_frame,"a",@progbits
.Lframe1:
.long .LECIE1-.LSCIE1
.LSCIE1:
.long 0x0
.byte 0x1
.string "zR"
.uleb128 0x1
.sleb128 -8
.byte 0x10
.uleb128 0x1
.byte 0x3
.byte 0xc
.uleb128 0x7
.uleb128 0x8
.byte 0x90
.uleb128 0x1
.align 8
.LECIE1:
.LSFDE1:
.long .LEFDE1-.LASFDE1
.LASFDE1:
.long .LASFDE1-.Lframe1
.long .LFB2
.long .LFE2-.LFB2
.uleb128 0x0
.byte 0x4
.long .LCFI0-.LFB2
.byte 0xe
.uleb128 0x10
.byte 0x86
.uleb128 0x2
.byte 0x4
.long .LCFI1-.LCFI0
.byte 0xd
.uleb128 0x6
.align 8
.LEFDE1:
.ident "GCC: (GNU) 4.1.2 20070115 (prerelease) (SUSE Linux)"
.section .note.GNU-stack,"",@progbits

复制代码

2-7行是一个简单的C函数。

第8行，按32位生成汇编。
10-28行是生成的32位汇编语言，完全没有问题。
29行，按照64位进行汇编。
31-93行是64位汇编。
对比32位汇编，发现17行，32位汇编在栈里预留了局部变量$16，虽然大了点但没问题。
在64位代码里，没有发现预留局部变量的指令。
18行，movl 8(%ebp), %eax；从堆栈帧取自变量a。
而41行，64位代码，从寄存器取得自变量，暂存到局部变量区，而局部变量区并未分配。（%rsp没有动作），它为什么能正确工作呢？
我故意定义了一个局部变量，可以看到分别在22，48行使用了这个局部变量。
准备为大数乘法写个64位的高效算法：64bit × 64bit = 128bit，弄不懂这个问题，写不下去了。
请高人解答，普罗大众可以热烈讨论。

作者: yulihua49 发布时间: 2010-06-22

好像只有青蛙做过IA64吧。

作者: prolj 发布时间: 2010-06-22

不懂IA64位汇编

作者: hellioncu 发布时间: 2010-06-22

void mula(long a, long b, long c[2])
{
long d;
d = a * b;
c[0] = d;
}
int main(int argc, char *argv[])
{
long a, b, c[2];
a = 0x000000010000000a;
b = 0x000000020000000b;
mula(a, b, c);
}
stack:
rbp - 40: [c array address]
rbp - 32: [b value ]
rbp - 24: [a value ]
rbp - 16: [not use ]
rbp - 8: [d vaule ]
rbp : [rbp value ]
rbp + 8: [return address ]
.file "mula.c"
.text
.globl mula
.type mula, @function
mula:
.LFB2:
pushq %rbp ; push stack rbp (8 bytes)
.LCFI0:
movq %rsp, %rbp ; rbp = rsp
.LCFI1:
movq %rdi, -24(%rbp) ; a = rdi
movq %rsi, -32(%rbp) ; b = rsi
movq %rdx, -40(%rbp) ; c = rdx
movq -24(%rbp), %rax ; rax = a
imulq -32(%rbp), %rax ; rax = rax * b
movq %rax, -8(%rbp) ; d = rax
movq -40(%rbp), %rdx ; rdx = c
movq -8(%rbp), %rax ; rax = d
movq %rax, (%rdx) ; c[0] = d
leave
ret
.LFE2:
.size mula, .-mula
.globl main
.type main, @function
main:
.LFB3:
pushq %rbp
.LCFI2:
movq %rsp, %rbp
.LCFI3:
subq $64, %rsp
.LCFI4:
movl %edi, -36(%rbp)
movq %rsi, -48(%rbp)
movl $10, -16(%rbp) ; low a = 0x0000000a
movl $1, -12(%rbp) ; high a = 0x00000001
movl $11, -8(%rbp) ; low b = 0x0000000b
movl $2, -4(%rbp) ; high b = 0x00000002
leaq -32(%rbp), %rdx ; rdx = c
movq -8(%rbp), %rsi ; rsi = b
movq -16(%rbp), %rdi ; rdi = a
call mula ; call func, push stack return address (8 bytes)
leave
ret
.LFE3:
.size main, .-main
.section .eh_frame,"a",@progbits
.Lframe1:
.long .LECIE1-.LSCIE1
.LSCIE1:
.long 0x0
.byte 0x1
.string "zR"
.uleb128 0x1
.sleb128 -8
.byte 0x10
.uleb128 0x1
.byte 0x3
.byte 0xc
.uleb128 0x7
.uleb128 0x8
.byte 0x90
.uleb128 0x1
.align 8
.LECIE1:
.LSFDE1:
.long .LEFDE1-.LASFDE1
.LASFDE1:
.long .LASFDE1-.Lframe1
.long .LFB2
.long .LFE2-.LFB2
.uleb128 0x0
.byte 0x4
.long .LCFI0-.LFB2
.byte 0xe
.uleb128 0x10
.byte 0x86
.uleb128 0x2
.byte 0x4
.long .LCFI1-.LCFI0
.byte 0xd
.uleb128 0x6
.align 8
.LEFDE1:
.LSFDE3:
.long .LEFDE3-.LASFDE3
.LASFDE3:
.long .LASFDE3-.Lframe1
.long .LFB3
.long .LFE3-.LFB3
.uleb128 0x0
.byte 0x4
.long .LCFI2-.LFB3
.byte 0xe
.uleb128 0x10
.byte 0x86
.uleb128 0x2
.byte 0x4
.long .LCFI3-.LCFI2
.byte 0xd
.uleb128 0x6
.align 8
.LEFDE3:
.ident "GCC: (Debian 4.3.2-1.1) 4.3.2"
.section .note.GNU-stack,"",@progbits

复制代码

作者: guoruimin 发布时间: 2010-06-22

这个是x86_64或者叫amd64，不是ia64。要安腾处理器的汇编码才是IA64。

作者: 没本发布时间: 2010-06-22

rsp没有加是因为你的函数mula()内没有对别的函数的调用，编译器知道后，就没必要调整rsp了，节省一条指令，退出时用了leave指令，栈框不会出问题。
在x86_64下的rsp调整规范是，由调用者(caller)来处理栈指针而不是由被调用者(callee)来处理，因此编译器拥有这方面的完备信息，可以按情况处理。你加一个嵌套调用就能看到编译器调整rsp了，gcc会用(subq $40, %rsp)。

作者: 没本发布时间: 2010-06-22

本帖最后由 yulihua49 于 2010-06-22 20:39 编辑

QUOTE:

rsp没有加是因为你的函数mula()内没有对别的函数的调用，编译器知道后，就没必要调整rsp了，节省一条指令， ...
没本发表于 2010-06-22 19:57

高人。看到了。看来系统栈与应用栈是绝对分离的，其中不会因中断冲毁应用数据。

明白了。IA_64与X86_64还是有区别的。

作者: yulihua49 发布时间: 2010-06-22

从x86和x86_64的Linux和Windows WRK源码来看，系统栈和应用栈是分离的。

作者: 没本发布时间: 2010-06-22

本帖最后由 yulihua49 于 2010-06-22 20:43 编辑

QUOTE:

不懂IA64位汇编
hellioncu 发表于 2010-06-22 16:26

简单啊，eax变成rax就是64位的了。
就是函数自变量改寄存器传送了，可能是编译器的区别吧，但这样，不同编译器产生的模块不能互相连接了。是否能用
__stdc__ 声明标准栈调用呢？

作者: yulihua49 发布时间: 2010-06-22

本帖最后由没本于 2010-06-22 21:09 编辑

64位下的调用约定规范是AMD/Intel制定的，参数个数依次是：RCX, RDX, R8, R9, 栈。。。。。。, 调整RSP时，要把RCX..R9寄存器的栈空间也留出来，也就是几个参数就要几个参数的栈空间，不管是不是用的寄存器。不过GCC会优化，如果调用的函数没有下一层调用时，RSP不会变。

有兴趣可以看看段教学视频（英语字幕,Windows x64的）：http://www.woodmann.com/TiGa/videos/TiGa-vid5.htm

作者: 没本发布时间: 2010-06-22

QUOTE:

64位下的调用约定规范是Intel制定的，参数个数依次是：RCX, RDX, R8, R9, 栈。。。。。。, 调整RSP时，要把 ...
没本发表于 2010-06-22 20:55

学习了！

作者: yulihua49 发布时间: 2010-06-22

本帖最后由 guoruimin 于 2010-06-22 22:11 编辑

/*---------- main.c ----------*/
#include <stdio.h>
void mula(long a, long b, long c[2]);
int main(int argc, char *argv[])
{
long a, b, c[2];
a = 0x12345678a;
b = 0x12345678b;
mula(a, b, c);
printf("%016lx%016lx\n", c[1], c[0]);
return 0;
}
/*---------- mula.c ----------*/
void mula(long a, long b, long c[2])
{
c[0] = a * b;
}
/*---------- mula.S ----------*/
.file "mula.c"
.text
.p2align 4,,15
.globl mula
.type mula, @function
mula:
.LFB2:
/*
* imulq %rdi, %rsi
* movq %rsi, (%rdx)
*/
/* replace with */
movq %rdi, %rax /* rax = a */
movq %rdx, %rdi /* rdi = c */
imulq %rsi /* rdx:rax = rax * b */
movq %rax, (%rdi) /* c[0] = rax */
movq %rdx, 8(%rdi) /* c[1] = rdx */
ret
.LFE2:
.size mula, .-mula
.section .eh_frame,"a",@progbits
.Lframe1:
.long .LECIE1-.LSCIE1
.LSCIE1:
.long 0x0
.byte 0x1
.string "zR"
.uleb128 0x1
.sleb128 -8
.byte 0x10
.uleb128 0x1
.byte 0x3
.byte 0xc
.uleb128 0x7
.uleb128 0x8
.byte 0x90
.uleb128 0x1
.align 8
.LECIE1:
.LSFDE1:
.long .LEFDE1-.LASFDE1
.LASFDE1:
.long .LASFDE1-.Lframe1
.long .LFB2
.long .LFE2-.LFB2
.uleb128 0x0
.align 8
.LEFDE1:
.ident "GCC: (Debian 4.3.2-1.1) 4.3.2"
.section .note.GNU-stack,"",@progbits

复制代码

gcc -O2 -S mula.c -o mula.S
优化过的汇编代码，根本就没有用 rsp, rbp。
修改一下 mula.S，如上。
gcc main.c mula.S -o mula
./mula
00000000000000014b66dc35d989bdee

作者: guoruimin 发布时间: 2010-06-22

回复没本

那是 windows 下的 Calling convention，不是 amd 所定义的。windows 是没有遵循 amd 的规定
amd 所定义的 AMD64 ABI 在：http://www.x86-64.org/documentation/abi-0.99.pdf

AMD64 ABI 所定义的是：
1、7个通用寄存器（rdi,rsi,rdx,rcx,r8,r9 和 rax）依次用作函数传递参数。
2、rsp 及 rbp 用于管理堆栈
3、r10 及 r11 用于临时寄存器
4、5个通用寄存器（r12,r13,r14,r15 及 rbx）由被调用方保存

作者: mik 发布时间: 2010-06-22

是的，我搞错了，那是Windows x64的stdcall调用约定。

作者: 没本发布时间: 2010-06-22

windows 的 Calling convention 在一篇文章介绍：

http://msdn.microsoft.com/en-us/library/ms235286.aspx

作者: mik 发布时间: 2010-06-22