



Study with the several resources on Docsity
Earn points by helping other students or get them with a premium plan
Prepare for your exams
Study with the several resources on Docsity
Earn points to download
Earn points by helping other students or get them with a premium plan
An overview of the knowledge required to write in assembly language for x86 architecture, including application binary interface (abi), instruction set architecture (isa), registers/flags, assembler usage, calling sequence, stack frame, addressing modes, integer movement/init operations, common arithmetic operations, bit-level operations, and comparison instructions. It also covers x87 floating point registers and computations, and x86 assembly programming tricks.
Typology: Study notes
1 / 5
This page cannot be seen from the preview
Don't miss anything!




(^) Required Knowledge to Write in Assembly
(c) Register conventions(b) Stack handling(a) Argument passing
but
most
assem-
blers
use suggested
mneu-
monics
(^) These
are the
instructions
out ofthat you must build programs
(a) Assembler
directives
(pre-
fixed by
(b) Operand order (
src, dest
(c) Const identifier (
(d) Register identifiers (
(e) x86/gas
(^) suffixes
(^) commands
i. with precision qualifier: b : 1 byte int
ii. (^) w (^) : 2 byte int
iii. (^) l (^) : 4 byte int
iv. (^) q (^) : 8 byte int
v. (^) l (^) : 8 byte float
vi. (^) s (^) : 4 byte float
(^) Further Resources
80x80 Assembly Language and Computer Architecture
(^) by (^) Richard
(^) x86 Calling Sequence and Stack Frame
oryStack grows downward in mem-
Caller
(^) puts
(^) callees’
(^) args
(^) in (^) its
frame
Frame 4-byte (32 bit) aligned
can leave SP unmodifiedIf callee needs no scratch space,
Otherwise,
(^) subtract frame size
from SP, keeping 4-byte aligned
... last arg Caller’s frame
%esp+
1st arg
%esp
return address
Stack frame passed to callee
(^) x86 Integer Registers
general purpose
All but
(^) eax , (^) edx , (^) ecx (^) callee-saved
8 regs of every size
esp (^) is stack pointer
ebp (^) can be frame ptr
eax (^) is integer return val reg
int64:
edx ← hi32,
eax ← lo
index
Also have:EDI
EIP/IP – instruction ptr
(^) control
status bits
(^) x86 Assembly Overview
:
∗^ Dest is input/output! add src, dst
(^) dst = dst + src;
Most ops take
(^) (mem)/reg/immed, rdest
(^) call this
(^) mris, rd
constants begin with $, regs with %, mem is address
Ops with memory operands need size suffix (b,w,l,q,[l,s])
Usually only one operand can be from memory
CISC format: diff inst of different sizes/efficiencies
Generally, arithmetic affects status word, movement does not
(^) x86 Addressing Modes
CONST(breg, nreg, mul)
@ = (^) val(breg) + val(nreg)*mul
(^) is signed integer constant:
, (^) 127] : 8 bit offset
else : 32 bit offset
breg : reg holding base address
nreg : index reg
mul : amount to mul index value
examples
→120(%esp) (^) Load item 120B above SP
→(%eax,%edi,4) (^) X[i]
(^) assuming
(^) edi=i
eax=X
(single)
→16(%eax,%ecx,8) (^) A[2+1*lda]
assuming
ecx =lda,
(^) eax =A (double)
(^) x86 Integer Movement/Init Operations
Mnemonic
Operands
Action
mov
rs, rd
rd = rs (
register copy
mov
rs, (mem)
*(mem) = rs (
store
)
mov
(mem), rd
rd = *(mem) (
load )
mov
const8/16/32, rd
rd = const; (
reg init from inst
xchg
r0, r
Exchanges reg contents (
register swap
xchg
r0, eax
Exchanges reg contents; eax faster
xchg
r0, (mem)
Exchanges reg and mem contents
(^) Common Integer Arithmetic Operations
Mnemonic
Operands
Action
add
mris, rd
rd += mris
sub
mris, rd
rd -= mris
inc
mrd
mrd++ (unsigned) (does not set CF)
dec
mrd
mrd −− (^) (unsigned) (does not set CF)
neg
mrd
mrd = -mrd
mul
mrs
(unsigned) eax = lo(eaxmrs); edx = hi(eaxmr
imul
mrs
(signed) eax = lo(eaxmrs); edx = hi(eaxmrs)
imul
mris, rd
(signed) rd = lo(rdmrs); hi(rdmrs) == 0
imul
imm, mrs, rd
(signed) rd = lo(immmrs); hi(immmrs) == 0
idiv/div
mrs
eax (^) ← (^) (hi32(edx):lo32(eax))/(mrs), dx
(^) rem
lea
(mem), rd
rd = &(mem) (extremely flexible!)
add, without needing scratch regs: LEA (Load Effective Address) can be used for multiply, shift, and/or
takes any valid indexing mode, puts target @ in rdest
ecx = 8 + 4*eax + ebx
(^) lea 8(ebx,eax,4), ecx
(^) x87 Floating Point Registers
ST(0) aka ST (stack top)
For most fp ops,
(^) one operand
operand
register
or
memory
pushed down in stackAs new values are loaded, values
Two regs swapped via
plicit/explicit)Values added by push ops (im-
plicit/explicit)Values removed by pop ops (im-
(^) Common x87 Stack Manipulation Instructions
Mnemonic
Operands
Action
finit
none
init FPU & clear stack
fld
(mem@)
*(mem@) pushed onto stack
fld
st(x)
ST(x) pushed onto stack
fld
none
1.0 pushed onto stack
fldz
none
0.0 pushed onto stack
fst
st(x)
ST(x) = ST
fstp
st(x)
ST(x) = ST; ST popped
fst
(mem@)
*(mem@) = ST
fstp
(mem@)
*(mem@) = ST; ST popped
fxch
none
swap ST and ST(1)
fxch
st(x)
swap ST and ST(x)
(^) Common x87 Floating Point Computation Instructions
Mnemonic
Operands
Action
fadd(fmul)
none
replace ST & ST(1) with their sum (product)
fadd(fmul)
(mem)
replace ST with its sum (product) with *(mem)
fadd(fmul)
st,st(x)
replace ST(x) with sum (product)
fadd(fmul)
st(x),st
replace ST with sum (product)
faddp(fmulp)
st,st(x)
replace ST(x) with sum (product), pop ST
fsub
none
replace ST & ST(1) with ST(1)-ST
fsub
(mem)
replace ST with ST - *(mem)
fsub
st,st(x)
ST(x) -= ST
fsub
st(x),st
ST -= ST(x)
fsubp
st,st(x)
ST(x) -= ST; pop ST
fsubr
none
replace ST & ST(1) with ST-ST(1)
fsubr
(mem)
replace ST with *mem - ST
fsubr
st,st(x)
ST(x) = ST - ST(x)
fsubr
st(x),st
ST = ST(x) - ST
fsubpr
st,st(x)
ST(x) = ST - ST(x); pop ST
fabs
none
fchs
none
(^) x87 Comparison Instructions
Mnem
Ops
cmp ST against?
fcom
none
fcom
st(x)
ST(x)
fcom
mem
*(mem)
ftst
none
fcomp
none
ST(1), pop ST
fcomp
st(x)
ST(x), pop ST
fcomp
mem
*(mem), pop ST
fcompp
none
ST(1), pop ST(0) & ST(1)
fstsw
ax
ax = fp status word
From PPRO on, have
(^) fcomi
(^) for all all-
register
(^) fcom
(^) variants that directly sets
int flags as shown in table
word to ax, then useuse fstsw store fp status
(^) test
or (^) bt (^) to trigger branches:
Result
of fcom
fcomi
: ZF
PF
CF
fp status bits set
(^) Three Sections for Assembly Routines
(^) prologue
Figures local frame size
Moves stack pointer
Saves all used callee-saved registers
Loads arguments from previous frame
(^) body
:
Function body
(^) epilogue
Restores saved registers (including SP)
Sets any return values
returns to caller
(^) If this separation enforced, func body unchanged as ABI changes
(^) Simple DAXPY in x86 Assembly
#define N
%eax
#define X
%edx
#define Y
%ecx
#define II
%edi
void ATL_UAXPY/*#define FSIZE 4
(^4)
(^8)
16
int incX, TYPE *Y, int incY)(const int N, double alpha, TYPE *X,
20
24
28
Prologue
ATL_UAXPY:.global ATL_UAXPY.text finit
subl
$FSIZE, %esp # get frame space
movl
%edi, (%esp) # save reg
movlLoad paras
FSIZE+4(%esp), N
fldl
FSIZE+8(%esp)
movl
FSIZE+16(%esp), X
movl
FSIZE+24(%esp), Y
for (i=0; i < N; i++)
Y[i] = alpha * X[i] + Y[i]
cmp
$0, N
je
DONE
xorl
II, II
LOOP1: fldl
(X,II,8)
fmul
%st(1), %st # ST = {alpha*X[i], a
faddl
(Y,II,8)
fstpl
(Y,II,8)
Increment II, and jmp back to top of loop
addl
$1, II
cmp
II, N
jne
LOOP
Epilogue: restore regs and return
DONE: fstp
%st(0)
movl
(%esp), %edi
addl
$FSIZE, %esp
ret
(^) Avoiding Loop Comparison
On x86,
all int
ops
do implicit
comparison to 0
Can
often
save
a comparison
(^) in-
struction
(^) by (^) run-
ning
loop
back-
wards
for (i=N; i; i--)
Y[N-i] = alpha * X[N-i] + Y[N-i]
cmp
$0, N
je
DONE
movl
N, II
neg
II
lea
(X, N, 8), X
lea
(Y, N, 8), Y
LOOP1: fldl
(X,II,8)
fmul
%st(1), %st
faddl
(Y,II,8)
fstpl
(Y,II,8)
inclIncrement II, and jmp back to top of loop
II
jnz
LOOP
(^) Misc x86 Assembly Tricks
(a) Misc Tips
Use
-save-temps -g
to debug
(ddd)
(^) assembly
x
inst
more
compact
wt
signed
bytes
than
subl $-128, %eax
not
addl $128, %eax
and start fromBefore loop, add 128 to ptrs,
(^) -128(%ptr)
(^) to
max (^) compact
(^) inst
(^) unrolling
range
Try using
(^) .align [4,8,16]
, part
for loop label and br
inst window usingMake sure unrolled blocks fit in
(^) xsize
(b) Finding the size of insn(s):
instsize.c:
main() { int findSize(void); printf("size of inst(s) = %d bytes\n",
findSize()); } findSize.S:
findSize:.global findSize.text ######################## mov
%esp, %eax
L1: addl
$SIZE, %eax
L2: mov
$L2-L1, %eax
ret
######################## gcc -DSIZE=XXX -o xsize instsize.c findSize.S
:
XXX=
30 : 5 bytes^