Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
433 changes: 213 additions & 220 deletions src/builtins_extra.asm

Large diffs are not rendered by default.

24 changes: 16 additions & 8 deletions src/eval.asm
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,14 @@ extern op_import_from
extern op_binary_op_add_int
extern op_binary_op_sub_int
extern op_compare_op_int
extern op_compare_op_int_jump_false
extern op_compare_op_int_jump_true
extern op_binary_op_add_float
extern op_binary_op_sub_float
extern op_binary_op_mul_float
extern op_binary_op_truediv_float
extern op_binary_op_mul_int
extern op_binary_op_floordiv_int
extern op_for_iter_list
extern op_for_iter_range

Expand Down Expand Up @@ -1414,14 +1422,14 @@ opcode_table:
dq op_binary_op_sub_int ; 212 = BINARY_OP_SUBTRACT_INT (specialized)
dq op_for_iter_list ; 213 = FOR_ITER_LIST (specialized)
dq op_for_iter_range ; 214 = FOR_ITER_RANGE (specialized)
dq op_unimplemented ; 215
dq op_unimplemented ; 216
dq op_unimplemented ; 217
dq op_unimplemented ; 218
dq op_unimplemented ; 219
dq op_unimplemented ; 220
dq op_unimplemented ; 221
dq op_unimplemented ; 222
dq op_compare_op_int_jump_false ; 215 = COMPARE_OP_INT_JUMP_FALSE (superinstruction)
dq op_compare_op_int_jump_true ; 216 = COMPARE_OP_INT_JUMP_TRUE (superinstruction)
dq op_binary_op_add_float ; 217 = BINARY_OP_ADD_FLOAT (specialized)
dq op_binary_op_sub_float ; 218 = BINARY_OP_SUB_FLOAT (specialized)
dq op_binary_op_mul_float ; 219 = BINARY_OP_MUL_FLOAT (specialized)
dq op_binary_op_truediv_float ; 220 = BINARY_OP_TRUEDIV_FLOAT (specialized)
dq op_binary_op_mul_int ; 221 = BINARY_OP_MULTIPLY_INT (specialized)
dq op_binary_op_floordiv_int ; 222 = BINARY_OP_FLOORDIV_INT (specialized)
dq op_unimplemented ; 223
dq op_unimplemented ; 224
dq op_unimplemented ; 225
Expand Down
26 changes: 23 additions & 3 deletions src/frame.asm
Original file line number Diff line number Diff line change
Expand Up @@ -178,12 +178,32 @@ DEF_FUNC frame_new
test ecx, ecx
jz .done
push rax ; save frame pointer
mov rdi, rax ; rdi = frame (for localsplus base calc)
lea rdi, [rdi + PyFrame.localsplus]
lea rdi, [rax + PyFrame.localsplus]
cmp ecx, 4
ja .zero_large
; Small: unrolled stores for 1-4 slots (16 bytes each)
xor eax, eax
mov [rdi], rax
mov [rdi + 8], rax
cmp ecx, 1
je .zero_done
mov [rdi + 16], rax
mov [rdi + 24], rax
cmp ecx, 2
je .zero_done
mov [rdi + 32], rax
mov [rdi + 40], rax
cmp ecx, 3
je .zero_done
mov [rdi + 48], rax
mov [rdi + 56], rax
jmp .zero_done
.zero_large:
xor eax, eax
mov ecx, ecx ; zero-extend ecx (already done but be explicit)
mov ecx, ecx ; zero-extend ecx
shl ecx, 1 ; 2 qwords per 16-byte slot
rep stosq ; store ecx qwords of 0 at [rdi]
.zero_done:
pop rax ; restore frame pointer

.done:
Expand Down
12 changes: 4 additions & 8 deletions src/itertools.asm
Original file line number Diff line number Diff line change
Expand Up @@ -472,14 +472,10 @@ DEF_FUNC_LOCAL enumerate_iternext
mov r12, rax ; r12 = value payload from iternext
push rdx ; save value tag from iternext

; Create SmallInt for current count
mov rdi, [rbx + IT_FIELD2] ; it_count (raw i64)
call int_from_i64
mov r13, rax ; r13 = count payload
push rdx ; save count tag from int_from_i64

; Increment it_count
inc qword [rbx + IT_FIELD2]
; Inline SmallInt for current count (int_from_i64 always returns SmallInt)
mov r13, [rbx + IT_FIELD2] ; r13 = count (raw i64 = SmallInt payload)
inc qword [rbx + IT_FIELD2] ; increment for next time
push qword TAG_SMALLINT ; count tag (always SmallInt)

; Create 2-tuple
mov rdi, 2
Expand Down
29 changes: 29 additions & 0 deletions src/lib/memops.asm
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,35 @@ DEF_FUNC_BARE ap_memset
ret
END_FUNC ap_memset

; ap_memmove(void *dst, const void *src, size_t n) -> void *dst
; Handles overlapping regions. n must be a multiple of 8.
; Forward: rep movsq (fast). Backward: manual qword loop (avoids std penalty).
DEF_FUNC_BARE ap_memmove
mov rax, rdi ; save dst for return
mov rcx, rdx
shr rcx, 3 ; qword count = n / 8
jz .memmove_done
cmp rdi, rsi
je .memmove_done ; dst == src, nop
jb .memmove_fwd ; dst < src: forward safe
.memmove_bk:
; dst > src: copy backward to avoid overlap corruption
lea rsi, [rsi + rdx - 8]
lea rdi, [rdi + rdx - 8]
.memmove_bk_loop:
mov r8, [rsi]
mov [rdi], r8
sub rsi, 8
sub rdi, 8
dec rcx
jnz .memmove_bk_loop
ret
.memmove_fwd:
rep movsq
.memmove_done:
ret
END_FUNC ap_memmove

; ap_memcmp(const void *s1, const void *s2, size_t n) -> int
; Returns 0 if equal, <0 if s1<s2, >0 if s1>s2
DEF_FUNC_BARE ap_memcmp
Expand Down
75 changes: 32 additions & 43 deletions src/methods.asm
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ extern ap_free
extern ap_realloc
extern ap_memcpy
extern ap_memset
extern ap_memmove
extern ap_strcmp
extern ap_strlen
extern ap_strstr
Expand Down Expand Up @@ -4129,22 +4130,18 @@ DEF_FUNC list_method_pop
push qword [rax + rcx + 8] ; save item tag on stack
; Don't DECREF since we're transferring ownership to caller

; Shift items down: for i = index .. size-2, items[i] = items[i+1]
mov rcx, r13 ; i = index
mov rdx, [rbx + PyListObject.ob_size]
dec rdx ; size - 1
.pop_shift:
cmp rcx, rdx
jge .pop_shrink
; Shift items down: memmove(&items[idx], &items[idx+1], (size-1-idx)*16)
mov rax, [rbx + PyListObject.ob_item]
mov r8, rcx
shl r8, 4 ; i * 16
mov r9, [rax + r8 + 16] ; items[i+1] payload
mov r10, [rax + r8 + 24] ; items[i+1] tag
mov [rax + r8], r9 ; items[i] payload
mov [rax + r8 + 8], r10 ; items[i] tag
inc rcx
jmp .pop_shift
mov rcx, r13
shl rcx, 4 ; idx * 16
lea rdi, [rax + rcx] ; dst = &items[idx]
lea rsi, [rdi + 16] ; src = &items[idx+1]
mov rdx, [rbx + PyListObject.ob_size]
sub rdx, r13
dec rdx ; count = size - idx - 1
shl rdx, 4 ; bytes = count * 16
jz .pop_shrink ; nothing to shift if popping last
call ap_memmove

.pop_shrink:
dec qword [rbx + PyListObject.ob_size]
Expand Down Expand Up @@ -4222,21 +4219,17 @@ DEF_FUNC list_method_insert
mov [rbx + PyListObject.ob_item], rax
.ins_no_grow:

; Shift items up: for i = size-1 down to index, items[i+1] = items[i]
mov rcx, [rbx + PyListObject.ob_size]
dec rcx ; i = size - 1
.ins_shift:
cmp rcx, r12
jl .ins_place
; Shift items up: memmove(&items[idx+1], &items[idx], (size-idx)*16)
mov rax, [rbx + PyListObject.ob_item]
mov r8, rcx
shl r8, 4 ; i * 16
mov r9, [rax + r8] ; payload
mov r10, [rax + r8 + 8] ; tag
mov [rax + r8 + 16], r9 ; items[i+1] payload
mov [rax + r8 + 24], r10 ; items[i+1] tag
dec rcx
jmp .ins_shift
mov rcx, r12
shl rcx, 4 ; idx * 16
lea rsi, [rax + rcx] ; src = &items[idx]
lea rdi, [rsi + 16] ; dst = &items[idx+1]
mov rdx, [rbx + PyListObject.ob_size]
sub rdx, r12 ; count = size - idx
shl rdx, 4 ; bytes = count * 16
jz .ins_place ; nothing to shift if inserting at end
call ap_memmove

.ins_place:
; Place item at index (16-byte fat slot)
Expand Down Expand Up @@ -6417,22 +6410,18 @@ DEF_FUNC list_method_remove
mov r12, [rax + rcx] ; item payload (save for DECREF)
mov r13, [rax + rcx + 8] ; item tag (save for DECREF)

; Shift remaining items left (16-byte fat elements)
; Shift remaining items left: memmove(&items[idx], &items[idx+1], (size-1-idx)*16)
mov rax, [rbx + PyListObject.ob_item]
mov rcx, r14
shl rcx, 4 ; idx * 16
lea rdi, [rax + rcx] ; dst = &items[idx]
lea rsi, [rdi + 16] ; src = &items[idx+1]
mov rdx, [rbx + PyListObject.ob_size]
dec rdx ; size - 1
.lremove_shift:
cmp rcx, rdx
jge .lremove_shrink
mov rax, [rbx + PyListObject.ob_item]
mov r8, rcx
shl r8, 4 ; i * 16
mov r9, [rax + r8 + 16] ; items[i+1] payload
mov r10, [rax + r8 + 24] ; items[i+1] tag
mov [rax + r8], r9 ; items[i] payload
mov [rax + r8 + 8], r10 ; items[i] tag
inc rcx
jmp .lremove_shift
sub rdx, r14
dec rdx ; count = size - idx - 1
shl rdx, 4 ; bytes = count * 16
jz .lremove_shrink ; nothing to shift if removing last
call ap_memmove

.lremove_shrink:
dec qword [rbx + PyListObject.ob_size]
Expand Down
Loading