Mark Smotherman. Last updated September 2002.
(under construction)
IA-64 uses overlapping register windows of variable sizes along with unique
RSE (register stack engine) which automatically spills/fills registers to/from
memory (along with NaT bits)
dedicated registers
cfm - current frame marker, contains these fields
rrb.pr - register rename base for predicate registers
rrb.fr - register rename base for floating-point registers
rrb.gr - register rename base for general registers
sor - size of rotating portion of stack frame
sol - size of locals portion of stack frame
sof - size of stack frame
pfs - previous function state (contains pfm - previous frame marker)
rsc - register stack configuration, contains fields that control
aggressiveness of RSE
bsp - backing store pointer
bspstore - backing store pointer for memory stores
rnat - RSE NaT collection
register conventions
r12 - stack pointer
subroutine instructions
(bp) br.call b1 = subr - call branch with branch predicate (bp)
current value of the cfm along with other values are saved into
the pfs register; new stack frame consists of only caller's out
registers; return address placed in branch register b1
(bp) br.ret b2 - return branch with branch predicate (bp)
cfm along with other values are restored from the pfs register;
caller's stack frame is restored
special instructions for register windows
alloc r1 = ar.pfs,i,l,o,r - allocate stack frame
the previous function state register copied to r1;
four parameters are
i - size of inputs
l - size of locals
o - size of outputs, and
r - size of rotating portion of registers
new sof = i+l+o (can be at max = 96)
new sol = i+l
flushrs - flush register stack to backing store
generated assembly code for Itanium from gcc 2.96 (optimization turned on)
void main() { main:
void swap(); .prologue 12, 33
int a,b; .save ar.pfs, r34
a = 5; b = 44; alloc r34 = ar.pfs, 0, 3, 2, 0
swap(&a,&b); .fframe 16
} adds r12 = -16, r12
;;
adds r14 = 16, r12
addl r15 = 44, r0
.save rp, r33
mov r33 = b0
;;
.body
mov r36 = r14
addl r14 = 5, r0
;;
st4 [r36] = r14, 4
adds r14 = 16, r12
;;
st4 [r36] = r15
mov r35 = r14
br.call.sptk.many b0 = swap#
;;
mov ar.pfs = r34
mov b0 = r33
.restore sp
adds r12 = 16, r12
br.ret.sptk.many b0
void swap(x,y) swap:
int *x,*y; .prologue
{ .body
int temp; ld4 r15 = [r32]
temp = *x; ld4 r14 = [r33]
*x = *y; ;;
*y = temp; st4 [r32] = r14
return; st4 [r33] = r15
} br.ret.sptk.many b0
(assembly code for swap() is 34 instructions long with no optimization)
(to do: RSE)
[History of subroutines page] [Mark's homepage] [CPSC homepage] [Clemson Univ. homepage]
mark@cs.clemson.edu