Complete, working programs you can assemble and run.
No dependencies on libc - direct system calls.
section .data
msg: db "Hello, World!", 10
msg_len: equ $ - msg
section .text
global _start
_start:
; write(1, msg, msg_len)
MOV RAX, 1 ; write syscall
MOV RDI, 1 ; fd = stdout
LEA RSI, [rel msg] ; buffer
MOV RDX, msg_len ; count
SYSCALL
; exit(0)
MOV RAX, 60 ; exit syscall
XOR RDI, RDI ; exit code = 0
SYSCALLCompile and run:
nasm -felf64 hello.asm -o hello.o
ld hello.o -o hello
./hellosection .text
global main
main:
MOV RAX, 10
MOV RBX, 20
ADD RAX, RBX ; RAX = 30
RETWith libc:
nasm -felf64 add.asm -o add.o
gcc add.o -o addsection .data
numbers: dq 10, 20, 30, 40, 50
count: equ ($ - numbers) / 8
section .text
global sum_array
sum_array:
MOV RDI, [rel numbers] ; RDI = address of array
MOV RCX, count ; RCX = count
XOR RAX, RAX ; RAX = sum = 0
loop:
TEST RCX, RCX
JZ done
ADD RAX, [RDI]
ADD RDI, 8
DEC RCX
JMP loop
done:
; RAX contains sum
RETTest in C:
#include <stdio.h>
extern long sum_array();
int main() {
printf("Sum: %ld\n", sum_array());
return 0;
}Compile:
nasm -felf64 sum.asm -o sum.o
gcc sum.o -c test.c && gcc sum.o test.o -o test
./testsection .text
global factorial
factorial:
; RDI = n
; Return: RAX = n!
CMP RDI, 1 ; if (n <= 1)
JLE base_case
PUSH RBP
MOV RBP, RSP
PUSH RDI ; Save n
MOV RAX, RDI
DEC RDI
CALL factorial ; RAX = (n-1)!
MOV RDI, [RBP - 8] ; Restore n
IMUL RAX, RDI ; RAX = RAX * n
POP RDI
POP RBP
RET
base_case:
MOV RAX, 1
RETTest:
#include <stdio.h>
extern long factorial(long n);
int main() {
printf("5! = %ld\n", factorial(5)); // 120
printf("10! = %ld\n", factorial(10)); // 3628800
return 0;
}section .text
global find_max
find_max:
; RDI = array pointer
; RSI = count
; Return: RAX = maximum value
TEST RSI, RSI ; if count == 0
JZ error
MOV RAX, [RDI] ; max = array[0]
MOV RCX, 1 ; i = 1
loop:
CMP RCX, RSI
JGE done
MOV RDX, [RDI + RCX*8] ; Load array[i]
CMP RAX, RDX
JGE skip
MOV RAX, RDX ; Update max
skip:
INC RCX
JMP loop
done:
RET
error:
MOV RAX, -1
RETglobal string_length
string_length:
; RDI = string pointer
; Return: RAX = length (excluding null terminator)
XOR RAX, RAX ; count = 0
loop:
CMP BYTE [RDI + RAX], 0
JE done
INC RAX
JMP loop
done:
RETTest:
#include <stdio.h>
extern long string_length(char *s);
int main() {
printf("Length of 'Hello': %ld\n", string_length("Hello")); // 5
return 0;
}extern printf
section .data
fizzbuzz: db "FizzBuzz", 10, 0
fizz: db "Fizz", 10, 0
buzz: db "Buzz", 10, 0
fmt_num: db "%d", 10, 0
section .text
global fizzbuzz_main
fizzbuzz_main:
PUSH RBP
MOV RBP, RSP
MOV RCX, 1 ; i = 1
loop:
CMP RCX, 100 ; while i <= 100
JG done
MOV RAX, RCX
MOV RDX, 0
MOV R8, 15
DIV R8 ; RAX = RCX / 15, RDX = RCX % 15
TEST RDX, RDX ; if RCX % 15 == 0
JNZ not_fizzbuzz
MOV RDI, [rel fizzbuzz]
XOR EAX, EAX
CALL printf
JMP next
not_fizzbuzz:
MOV RAX, RCX
MOV RDX, 0
MOV R8, 3
DIV R8
TEST RDX, RDX
JNZ not_fizz
MOV RDI, [rel fizz]
XOR EAX, EAX
CALL printf
JMP next
not_fizz:
MOV RAX, RCX
MOV RDX, 0
MOV R8, 5
DIV R8
TEST RDX, RDX
JNZ not_buzz
MOV RDI, [rel buzz]
XOR EAX, EAX
CALL printf
JMP next
not_buzz:
MOV RDI, [rel fmt_num]
MOV RSI, RCX
XOR EAX, EAX
CALL printf
next:
INC RCX
JMP loop
done:
POP RBP
RETglobal quicksort
; Simplified quicksort
; RDI = array, RSI = left, RDX = right
quicksort:
CMP RSI, RDX
JGE done
PUSH RBP
MOV RBP, RSP
PUSH RBX
PUSH R12
PUSH R13
MOV R12, RSI ; left
MOV R13, RDX ; right
MOV RBX, [RDI + RSI*8] ; pivot = array[left]
; Partition (simplified)
MOV RCX, RSI
MOV R8, RDX
partition_loop:
CMP RCX, R8
JGE partition_done
CMP [RDI + RCX*8], RBX
JL skip_swap
; Swap
MOV R9, [RDI + RCX*8]
MOV R10, [RDI + R8*8]
MOV [RDI + RCX*8], R10
MOV [RDI + R8*8], R9
DEC R8
JMP partition_loop
skip_swap:
INC RCX
JMP partition_loop
partition_done:
; Recursive calls (simplified)
; quicksort(array, left, pivot-1)
; quicksort(array, pivot+1, right)
POP R13
POP R12
POP RBX
POP RBP
done:
RETglobal bitwise_demo
section .text
bitwise_demo:
MOV RAX, 0b1100
MOV RBX, 0b1010
AND RAX, RBX ; RAX = 0b1000 = 8
OR RAX, RBX ; RAX = 0b1110 = 14
XOR RAX, RBX ; RAX = 0b0110 = 6
MOV RAX, 0xFF
ROR RAX, 4 ; Rotate right
ROL RAX, 4 ; Rotate left
RETBasic steps:
# Assemble
nasm -felf64 program.asm -o program.o
# Link with GCC (if using libc)
gcc program.o -o program
# Or link with ld (for bare metal)
ld program.o -o program
# Run
./program# With GDB
gdb ./program
(gdb) break main
(gdb) run
(gdb) step
(gdb) register read
(gdb) x/20i $rip- Minimize register usage - More registers = more register pressure
- Avoid memory access in loops - Load once, use multiple times
- Use SIMD for bulk operations - Arrays/matrices benefit
- Minimize branches - Modern CPUs suffer from branch misprediction
- Align data properly - Faster memory access
You've now learned assembly from fundamentals to practical programming!
Recommended next topics:
- Study existing code (Linux kernel has good assembly)
- Learn your target architecture deeply (Intel/AMD manuals)
- Practice optimization (profile real code)
- Dive into specific domains (cryptography, graphics, etc.)
Resources:
- Intel 64 and IA-32 Architectures Software Developer Manual
- AMD64 Architecture Programmer's Manual
- x64 Cheat Sheet (quickly reference instructions)
- Online assembly playgrounds and IDEs
Congratulations on learning Assembly Language!