From 07f0eef60a9273dd6a362d803aa961ac8ce5d7da Mon Sep 17 00:00:00 2001
From: Kevin Lu <kevie.lu8@gmail.com>
Date: Thu, 20 Nov 2025 22:01:25 -0800
Subject: [PATCH 01/13] save

---
 engine/bitboard.hpp   |   5 +-
 engine/boardstate.hpp |   2 -
 engine/eval.cpp       |  59 +++----
 engine/eval.hpp       |   4 +-
 engine/includes.hpp   |   1 +
 engine/search.cpp     | 393 ++++++++++++------------------------------
 engine/search.hpp     |  14 ++
 engine/ttable.cpp     |   2 +
 engine/ttable.hpp     |   2 +
 9 files changed, 161 insertions(+), 321 deletions(-)
diff --git a/engine/bitboard.hpp b/engine/bitboard.hpp
index d4f5acc..0a1419a 100644
--- a/engine/bitboard.hpp
+++ b/engine/bitboard.hpp
@@ -70,7 +70,6 @@ struct Board {
 	uint64_t pawn_hash = 0;
 	uint64_t nonpawn_hashval[2] = {0, 0}; // [side]
 	uint64_t major_hash = 0, minor_hash = 0;
-	TTable ttable;
 	pzstd::largevector<uint64_t> hash_hist;
 
 	// Mailbox representation of the board for faster queries of certain data
@@ -81,12 +80,12 @@ struct Board {
 	std::stack<HistoryEntry> move_hist;
 	std::stack<uint8_t> halfmove_hist;
 
-	Board(int ttsize=DEFAULT_TT_SIZE) : ttable(ttsize) {
+	Board() {
 		reset_board();
 		recompute_hash();
 	}
 
-	Board(std::string fen, int ttsize=DEFAULT_TT_SIZE) : ttable(ttsize) {
+	Board(std::string fen) {
 		load_fen(fen);
 		recompute_hash();
 	};
diff --git a/engine/boardstate.hpp b/engine/boardstate.hpp
index 9add549..c319079 100644
--- a/engine/boardstate.hpp
+++ b/engine/boardstate.hpp
@@ -8,5 +8,3 @@ struct BoardState {
 	Accumulator w_acc, b_acc;
 	Piece mailbox[64] = {};
 };
-
-extern BoardState bs[NINPUTS * 2][NINPUTS * 2];
\ No newline at end of file
diff --git a/engine/eval.cpp b/engine/eval.cpp
index bae237c..4651faf 100644
--- a/engine/eval.cpp
+++ b/engine/eval.cpp
@@ -60,15 +60,6 @@ float multi(int x) {
 __attribute__((constructor)) void init_network() {
 #ifndef HCE
 	nnue_network.load();
-	for (int j = 0; j < NINPUTS * 2; j++) {
-		for (int k = 0; k < NINPUTS * 2; k++) {
-			for (int i = 0; i < HL_SIZE; i++) {
-				bs[j][k].w_acc.val[i] = nnue_network.accumulator_biases[i];
-				bs[j][k].b_acc.val[i] = nnue_network.accumulator_biases[i];
-			}
-			for (int i = 0; i < 64; i++) bs[j][k].mailbox[i] = NO_PIECE;
-		}
-	}
 #endif
 }
 
@@ -239,7 +230,7 @@ std::array<Value, 8> debug_eval(Board &board) {
 	return {eval(board), 0, 0, 0, 0, 0, 0, 0};
 }
 #else
-Value eval(Board &board) {
+Value eval(Board &board, BoardState *bs) {
 	if (!(board.piece_boards[KING] & board.piece_boards[OCC(BLACK)])) {
 		// If black has no king, this is mate for white
 		return VALUE_MATE;
@@ -257,9 +248,12 @@ Value eval(Board &board) {
 	int winbucket = IBUCKET_LAYOUT[wkingsq];
 	int binbucket = IBUCKET_LAYOUT[bkingsq ^ 56];
 
+	// Convert bs to usable format
+	BoardState &state = *(bs + winbucket * NINPUTS * 2 + binbucket);
+
 	for (uint16_t i = 0; i < 64; i++) {
 		Piece piece = board.mailbox[i];
-		Piece prevpiece = bs[winbucket][binbucket].mailbox[i];
+		Piece prevpiece = state.mailbox[i];
 		if (piece == prevpiece) continue;
 		bool side = piece >> 3; // 1 = black, 0 = white
 		bool prevside = prevpiece >> 3; // 1 = black, 0 = white
@@ -269,28 +263,28 @@ Value eval(Board &board) {
 		if (piece != NO_PIECE) {
 			// Add to accumulator
 			uint16_t w_index = calculate_index((Square)i, pt, side, 0, winbucket);
-			accumulator_add(nnue_network, bs[winbucket][binbucket].w_acc, w_index);
+			accumulator_add(nnue_network, state.w_acc, w_index);
 			uint16_t b_index = calculate_index((Square)i, pt, side, 1, binbucket);
-			accumulator_add(nnue_network, bs[winbucket][binbucket].b_acc, b_index);
+			accumulator_add(nnue_network, state.b_acc, b_index);
 		}
 
 		if (prevpiece != NO_PIECE) {
 			// Subtract from accumulator
 			uint16_t w_index = calculate_index((Square)i, prevpt, prevside, 0, winbucket);
-			accumulator_sub(nnue_network, bs[winbucket][binbucket].w_acc, w_index);
+			accumulator_sub(nnue_network, state.w_acc, w_index);
 			uint16_t b_index = calculate_index((Square)i, prevpt, prevside, 1, binbucket);
-			accumulator_sub(nnue_network, bs[winbucket][binbucket].b_acc, b_index);
+			accumulator_sub(nnue_network, state.b_acc, b_index);
 		}
 	}
 
-	memcpy(bs[winbucket][binbucket].mailbox, board.mailbox, sizeof(bs[winbucket][binbucket].mailbox));
+	memcpy(state.mailbox, board.mailbox, sizeof(state.mailbox));
 
 	int nbucket = (npieces - 2) / 4;
 
 	if (board.side == WHITE) {
-		score = nnue_eval(nnue_network, bs[winbucket][binbucket].w_acc, bs[winbucket][binbucket].b_acc, nbucket);
+		score = nnue_eval(nnue_network, state.w_acc, state.b_acc, nbucket);
 	} else {
-		score = -nnue_eval(nnue_network, bs[winbucket][binbucket].b_acc, bs[winbucket][binbucket].w_acc, nbucket);
+		score = -nnue_eval(nnue_network, state.b_acc, state.w_acc, nbucket);
 	}
 	return score;
 }
@@ -313,46 +307,37 @@ std::array<Value, 8> debug_eval(Board &board) {
 	int winbucket = IBUCKET_LAYOUT[wkingsq];
 	int binbucket = IBUCKET_LAYOUT[bkingsq ^ 56];
 
+	Accumulator w_acc, b_acc;
+	for (int i = 0; i < HL_SIZE; i++) {
+		w_acc.val[i] = nnue_network.accumulator_biases[i];
+		b_acc.val[i] = nnue_network.accumulator_biases[i];
+	}
+
 	// Query the NNUE network
 	for (uint16_t i = 0; i < 64; i++) {
 		Piece piece = board.mailbox[i];
-		Piece prevpiece = bs[winbucket][binbucket].mailbox[i];
-		if (piece == prevpiece)
-			continue;
 		bool side = piece >> 3; // 1 = black, 0 = white
-		bool prevside = prevpiece >> 3; // 1 = black, 0 = white
 		PieceType pt = PieceType(piece & 7);
-		PieceType prevpt = PieceType(prevpiece & 7);
 
 		if (piece != NO_PIECE) {
 			// Add to accumulator
 			uint16_t w_index = calculate_index((Square)i, pt, side, 0, winbucket);
-			accumulator_add(nnue_network, bs[winbucket][binbucket].w_acc, w_index);
+			accumulator_add(nnue_network, w_acc, w_index);
 			uint16_t b_index = calculate_index((Square)i, pt, side, 1, binbucket);
-			accumulator_add(nnue_network, bs[winbucket][binbucket].b_acc, b_index);
-		}
-		
-		if (prevpiece != NO_PIECE) {
-			// Subtract from accumulator
-			uint16_t w_index = calculate_index((Square)i, prevpt, prevside, 0, winbucket);
-			accumulator_sub(nnue_network, bs[winbucket][binbucket].w_acc, w_index);
-			uint16_t b_index = calculate_index((Square)i, prevpt, prevside, 1, binbucket);
-			accumulator_sub(nnue_network, bs[winbucket][binbucket].b_acc, b_index);
+			accumulator_add(nnue_network, b_acc, b_index);
 		}
 	}
 
-	memcpy(bs[winbucket][binbucket].mailbox, board.mailbox, sizeof(bs[winbucket][binbucket].mailbox));
-
 	int npieces = _mm_popcnt_u64(board.piece_boards[OCC(WHITE)] | board.piece_boards[OCC(BLACK)]);
 
 	std::array<Value, 8> score = {};
 	if (board.side == WHITE) {
 		for (int i = 0; i < 8; i++) {
-			score[i] = nnue_eval(nnue_network, bs[winbucket][binbucket].w_acc, bs[winbucket][binbucket].b_acc, i);
+			score[i] = nnue_eval(nnue_network, w_acc, b_acc, i);
 		}
 	} else {
 		for (int i = 0; i < 8; i++) {
-			score[i] = -nnue_eval(nnue_network, bs[winbucket][binbucket].b_acc, bs[winbucket][binbucket].w_acc, i);
+			score[i] = -nnue_eval(nnue_network, b_acc, w_acc, i);
 		}
 	}
 
diff --git a/engine/eval.hpp b/engine/eval.hpp
index c4bde42..0c0b425 100644
--- a/engine/eval.hpp
+++ b/engine/eval.hpp
@@ -5,9 +5,11 @@
 #include "includes.hpp"
 #include "boardstate.hpp"
 
+extern Network nnue_network;
+
 Value simple_eval(Board &board);
 
-Value eval(Board &board);
+Value eval(Board &board, BoardState *sv);
 
 std::array<Value, 8> debug_eval(Board &board);
 
diff --git a/engine/includes.hpp b/engine/includes.hpp
index 4bc9163..4dc14f0 100644
--- a/engine/includes.hpp
+++ b/engine/includes.hpp
@@ -24,6 +24,7 @@ constexpr bool WHITE = false;
 constexpr bool BLACK = true;
 
 constexpr int MAX_PLY = 300;
+constexpr int MAX_THREADS = 64;
 
 typedef int16_t Value;
 constexpr Value VALUE_ZERO = 0;
diff --git a/engine/search.cpp b/engine/search.cpp
index 8dc31a1..6f9ae15 100644
--- a/engine/search.cpp
+++ b/engine/search.cpp
@@ -3,13 +3,16 @@
 
 #define MOVENUM(x) ((((#x)[1] - '1') << 12) | (((#x)[0] - 'a') << 8) | (((#x)[3] - '1') << 4) | ((#x)[2] - 'a'))
 
-uint64_t nodes = 0; // Node count
-int seldepth = 0; // Maximum searched depth, including quiescence search
 uint64_t mx_nodes = 1e18; // Maximum nodes to search
 uint64_t mxtime = 1000; // Maximum time to search in milliseconds
 bool early_exit = false, exit_allowed = false; // Whether or not to exit the search, and if we are allowed to exit (so we don't exit on the depth 1)
 clock_t start = 0;
 
+Move pvtable[MAX_PLY][MAX_PLY];
+int pvlen[MAX_PLY];
+
+uint16_t num_threads = 1;
+
 uint64_t perft(Board &board, int depth) {
 	// If white's turn is beginning and black is in check
 	if (board.side == WHITE && board.control(__tzcnt_u64(board.piece_boards[KING] & board.piece_boards[7]), WHITE))
@@ -66,15 +69,6 @@ __attribute__((constructor)) void init_mvvlva() {
 	}
 }
 
-History main_hist;
-
-SSEntry line[MAX_PLY]; // Currently searched line
-
-Move pvtable[MAX_PLY][MAX_PLY];
-int pvlen[MAX_PLY];
-
-uint64_t nodecnt[64][64];
-
 Move next_move(pzstd::vector<std::pair<Move, int>> &scores, int &end) {
 	if (end == 0) return NullMove; // Ran out
 	Move best_move = NullMove;
@@ -112,7 +106,7 @@ Value tt_to_score(Value score, int ply) {
 }
 
 double get_ttable_sz(Board &board) {
-	TTable &ttable = board.ttable;
+	TTable &ttable = ttable;
 	int cnt = 0;
 	for (int i = 0; i < 1024; i++) {
 		if (i >= ttable.TT_SIZE) break;
@@ -133,25 +127,25 @@ double get_ttable_sz(Board &board) {
  * - Search for checks and check evasions (every time I've tried this it has lost tons of elo)
  * - Late move reduction (instead of reducing depth, we reduce the search window) (not a known technique, maybe worth trying?)
  */
-Value quiesce(Board &board, Value alpha, Value beta, int side, int depth, bool pv=false) {
-	nodes++;
+Value quiesce(Board &board, SearchVars &sv, Value alpha, Value beta, int side, int depth, bool pv=false) {
+	sv.nodes++;
 
 	if (early_exit) return 0;
 
-	if (!(nodes & 4095)) {
+	if (!(sv.nodes & 4095)) {
 		// Check for early exit
 		// We check every 4096 nodes to avoid slowing down the search too much
 		uint64_t time = (clock() - start) / CLOCKS_PER_MS;
-		if ((time > mxtime || nodes > mx_nodes) && exit_allowed) {
+		if ((time > mxtime || sv.nodes > mx_nodes) && exit_allowed) {
 			early_exit = true;
 			return 0;
 		}
 	}
 
 	if (depth >= MAX_PLY)
-		return eval(board) * side; // Just in case
+		return eval(board, (BoardState *)sv.bs) * side; // Just in case
 
-	TTable::TTEntry *tentry = board.ttable.probe(board.zobrist);
+	TTable::TTEntry *tentry = ttable.probe(board.zobrist);
 	Value tteval = 0;
 	if (tentry && tentry->valid()) tteval = tt_to_score(tentry->eval, depth);
 	if (!pv && tentry && tentry->valid()) {
@@ -164,16 +158,15 @@ Value quiesce(Board &board, Value alpha, Value beta, int side, int depth, bool p
 		}
 	}
 
-	seldepth = std::max(depth, seldepth);
 	Value stand_pat = 0;
 	Value raw_eval = 0;
-	stand_pat = tentry ? tentry->s_eval : eval(board) * side;
+	stand_pat = tentry ? tentry->s_eval : eval(board, (BoardState *)sv.bs) * side;
 	raw_eval = stand_pat;
-	main_hist.apply_correction(board, stand_pat);
+	sv.history.apply_correction(board, stand_pat);
 	if (tentry && tentry->valid() && abs(tteval) < VALUE_MATE_MAX_PLY && tentry->bound() != (tteval > stand_pat ? UPPER_BOUND : LOWER_BOUND))
 		stand_pat = tteval;
 
-	if (!tentry) board.ttable.store(board.zobrist, -VALUE_INFINITE, raw_eval, 0, NONE, false, NullMove, depth);
+	if (!tentry) ttable.store(board.zobrist, -VALUE_INFINITE, raw_eval, 0, NONE, false, NullMove, depth);
 
 	// If it's a mate, stop here since there's no point in searching further
 	// Theoretically shouldn't ever happen because of stand pat
@@ -223,14 +216,14 @@ Value quiesce(Board &board, Value alpha, Value beta, int side, int depth, bool p
 			}
 		}
 
-		line[depth].move = move;
+		sv.line[depth].move = move;
 
 		board.make_move(move);
-		_mm_prefetch(&board.ttable.TT[board.zobrist % board.ttable.TT_SIZE], _MM_HINT_T0);
-		Value score = -quiesce(board, -beta, -alpha, -side, depth + 1, pv);
+		_mm_prefetch(&ttable.TT[board.zobrist % ttable.TT_SIZE], _MM_HINT_T0);
+		Value score = -quiesce(board, sv, -beta, -alpha, -side, depth + 1, pv);
 		board.unmake_move();
 
-		line[depth].move = NullMove;
+		sv.line[depth].move = NullMove;
 
 		if (score > best) {
 			if (score > alpha) {
@@ -241,23 +234,23 @@ Value quiesce(Board &board, Value alpha, Value beta, int side, int depth, bool p
 			best_move = move;
 		}
 		if (score >= beta) {
-			board.ttable.store(board.zobrist, score_to_tt(score, depth), raw_eval, 0, LOWER_BOUND, pv, move, depth);
+			ttable.store(board.zobrist, score_to_tt(score, depth), raw_eval, 0, LOWER_BOUND, pv, move, depth);
 			return best;
 		}
 	}
 
-	board.ttable.store(board.zobrist, score_to_tt(best, depth), raw_eval, 0, alpha_raise ? EXACT : UPPER_BOUND, pv, best_move, depth);
+	ttable.store(board.zobrist, score_to_tt(best, depth), raw_eval, 0, alpha_raise ? EXACT : UPPER_BOUND, pv, best_move, depth);
 
 	return best;
 }
 
-Value __recurse(Board &board, int depth, Value alpha = -VALUE_INFINITE, Value beta = VALUE_INFINITE, int side = 1, bool pv = false, bool cutnode = false, int ply = 0, bool root = false) {
-	if (pv) pvlen[ply] = 0;
+Value __recurse(Board &board, SearchVars &sv, int depth, Value alpha = -VALUE_INFINITE, Value beta = VALUE_INFINITE, int side = 1, bool pv = false, bool cutnode = false, int ply = 0, bool root = false) {
+	if (pv) sv.pvlen[ply] = 0;
 
 	if (ply >= MAX_PLY)
-		return eval(board) * side;
+		return eval(board, (BoardState *)sv.bs) * side;
 
-	nodes++;
+	sv.nodes++;
 
 	if (early_exit) return 0;
 
@@ -314,16 +307,16 @@ Value __recurse(Board &board, int depth, Value alpha = -VALUE_INFINITE, Value be
 
 	if (depth <= 0) {
 		// Reached the maximum depth, perform quiescence search
-		return quiesce(board, alpha, beta, side, ply, pv);
+		return quiesce(board, sv, alpha, beta, side, ply, pv);
 	}
 
 	bool ttpv = pv;
 
 	// Check for TTable cutoff
-	TTable::TTEntry *tentry = board.ttable.probe(board.zobrist);
+	TTable::TTEntry *tentry = ttable.probe(board.zobrist);
 	Value tteval = 0;
 	if (tentry && tentry->valid()) tteval = tt_to_score(tentry->eval, ply);
-	if (!pv && tentry && tentry->depth >= depth && line[ply].excl == NullMove) {
+	if (!pv && tentry && tentry->depth >= depth && sv.line[ply].excl == NullMove) {
 		// Check for cutoffs
 		if (tentry->bound() == EXACT) {
 			return tteval;
@@ -342,19 +335,19 @@ Value __recurse(Board &board, int depth, Value alpha = -VALUE_INFINITE, Value be
 	uint64_t pawn_hash = 0;
 	if (!in_check) {
 		pawn_hash = board.pawn_struct_hash();
-		cur_eval = tentry ? tentry->s_eval : eval(board) * side;
+		cur_eval = tentry ? tentry->s_eval : eval(board, (BoardState *)sv.bs) * side;
 		raw_eval = cur_eval;
-		main_hist.apply_correction(board, cur_eval);
+		sv.history.apply_correction(board, cur_eval);
 		tt_corr_eval = cur_eval;
 		if (tentry && tentry->valid() && abs(tteval) < VALUE_MATE_MAX_PLY && tentry->bound() != (tteval > cur_eval ? UPPER_BOUND : LOWER_BOUND))
 			tt_corr_eval = tteval;
-		else if (!tentry) board.ttable.store(board.zobrist, -VALUE_INFINITE, raw_eval, 0, NONE, false, NullMove, board.halfmove);
+		else if (!tentry) ttable.store(board.zobrist, -VALUE_INFINITE, raw_eval, 0, NONE, false, NullMove, board.halfmove);
 	}
 
-	line[ply].eval = in_check ? VALUE_NONE : cur_eval; // If in check, we don't have a valid eval yet
+	sv.line[ply].eval = in_check ? VALUE_NONE : cur_eval; // If in check, we don't have a valid eval yet
 
 	bool improving = false;
-	if (!in_check && ply >= 2 && line[ply-2].eval != VALUE_NONE && cur_eval > line[ply-2].eval) improving = true;
+	if (!in_check && ply >= 2 && sv.line[ply-2].eval != VALUE_NONE && cur_eval > sv.line[ply-2].eval) improving = true;
 
 	// Reverse futility pruning
 	if (!in_check && !ttpv && depth <= 8) {
@@ -372,7 +365,7 @@ Value __recurse(Board &board, int depth, Value alpha = -VALUE_INFINITE, Value be
 	// Null-move pruning
 	int npieces = _mm_popcnt_u64(board.piece_boards[OCC(WHITE)] | board.piece_boards[OCC(BLACK)]);
 	int npawns_and_kings = _mm_popcnt_u64(board.piece_boards[PAWN] | board.piece_boards[KING]);
-	if (!in_check && npieces != npawns_and_kings && tt_corr_eval >= beta && depth >= 2 && line[ply].excl == NullMove) { // Avoid NMP in pawn endgames
+	if (!in_check && npieces != npawns_and_kings && tt_corr_eval >= beta && depth >= 2 && sv.line[ply].excl == NullMove) { // Avoid NMP in pawn endgames
 		/**
 		 * This works off the *null-move observation*.
 		 * 
@@ -387,7 +380,7 @@ Value __recurse(Board &board, int depth, Value alpha = -VALUE_INFINITE, Value be
 		board.make_move(NullMove);
 		// Perform a reduced-depth search
 		Value r = NMP_R_VALUE + depth / 4 + std::min(3, (tt_corr_eval - beta) / 400) + improving;
-		Value null_score = -__recurse(board, depth - r, -beta, -beta + 1, -side, 0, !cutnode, ply+1);
+		Value null_score = -__recurse(board, sv, depth - r, -beta, -beta + 1, -side, 0, !cutnode, ply+1);
 		board.unmake_move();
 		if (null_score >= beta)
 			return null_score >= VALUE_MATE_MAX_PLY ? beta : null_score;
@@ -399,14 +392,14 @@ Value __recurse(Board &board, int depth, Value alpha = -VALUE_INFINITE, Value be
 		 * If we are losing by a lot, check w/ qsearch to see if we could possibly improve.
 		 * If not, we can prune the search.
 		 */
-		Value razor_score = quiesce(board, alpha, beta, side, ply, 0);
+		Value razor_score = quiesce(board, sv, alpha, beta, side, ply, 0);
 		if (razor_score <= alpha)
 			return razor_score;
 	}
 
 	Value best = -VALUE_INFINITE;
 
-	MovePicker mp(board, &line[ply], ply, &main_hist, tentry);
+	MovePicker mp(board, &sv.line[ply], ply, &sv.history, tentry);
 
 	if ((pv || cutnode) && depth > 4 && !(tentry && tentry->best_move != NullMove)) {
 		depth -= 2; // Internal iterative reductions
@@ -422,10 +415,10 @@ Value __recurse(Board &board, int depth, Value alpha = -VALUE_INFINITE, Value be
 	Move move = NullMove;
 	int i = 0;
 
-	uint64_t prev_nodes = nodes;
+	uint64_t prev_nodes = sv.nodes;
 
 	while ((move = mp.next()) != NullMove) {
-		if (move == line[ply].excl)
+		if (move == sv.line[ply].excl)
 			continue;
 		
 		bool capt = (board.piece_boards[OPPOCC(board.side)] & square_bits(move.dst()));
@@ -433,12 +426,12 @@ Value __recurse(Board &board, int depth, Value alpha = -VALUE_INFINITE, Value be
 		
 		int extension = 0;
 
-		if (line[ply].excl == NullMove && depth >= 8 && i == 0 && tentry && move == tentry->best_move && tentry->depth >= depth - 3 && tentry->bound() != UPPER_BOUND) {
+		if (sv.line[ply].excl == NullMove && depth >= 8 && i == 0 && tentry && move == tentry->best_move && tentry->depth >= depth - 3 && tentry->bound() != UPPER_BOUND) {
 			// Singular extension
-			line[ply].excl = move;
+			sv.line[ply].excl = move;
 			Value singular_beta = tteval - 4 * depth;
-			Value singular_score = __recurse(board, (depth-1) / 2, singular_beta - 1, singular_beta, side, 0, cutnode, ply);
-			line[ply].excl = NullMove; // Reset exclusion move
+			Value singular_score = __recurse(board, sv, (depth-1) / 2, singular_beta - 1, singular_beta, side, 0, cutnode, ply);
+			sv.line[ply].excl = NullMove; // Reset exclusion move
 
 			if (singular_score < singular_beta) {
 				extension++;
@@ -453,9 +446,9 @@ Value __recurse(Board &board, int depth, Value alpha = -VALUE_INFINITE, Value be
 			}
 		}
 
-		line[ply].move = move;
+		sv.line[ply].move = move;
 
-		Value hist = capt ? main_hist.get_capthist(board, move) : main_hist.get_history(board, move, ply, &line[ply]);
+		Value hist = capt ? sv.history.get_capthist(board, move) : sv.history.get_history(board, move, ply, &sv.line[ply]);
 		if (best > -VALUE_MATE_MAX_PLY) {
 			if (i >= (5 + depth * depth) / (2 - improving)) {
 				/**
@@ -489,11 +482,11 @@ Value __recurse(Board &board, int depth, Value alpha = -VALUE_INFINITE, Value be
 			}
 		}
 
-		line[ply].cont_hist = &main_hist.cont_hist[board.side][board.mailbox[move.src()] & 7][move.dst()];
+		sv.line[ply].cont_hist = &sv.history.cont_hist[board.side][board.mailbox[move.src()] & 7][move.dst()];
 
 		board.make_move(move);
 
-		_mm_prefetch(&board.ttable.TT[board.zobrist % board.ttable.TT_SIZE], _MM_HINT_T0);
+		_mm_prefetch(&ttable.TT[board.zobrist % ttable.TT_SIZE], _MM_HINT_T0);
 
 		Value newdepth = depth - 1 + extension;
 
@@ -515,33 +508,33 @@ Value __recurse(Board &board, int depth, Value alpha = -VALUE_INFINITE, Value be
 
 			r -= 1024 * pv;
 			r += 1024 * (!pv && cutnode);
-			if (move == line[ply].killer[0] || move == line[ply].killer[1])
+			if (move == sv.line[ply].killer[0] || move == sv.line[ply].killer[1])
 				r -= 1024;
 			r -= 1024 * ttpv;
 			r -= hist / 16 * !capt;
 
 			Value searched_depth = depth - r / 1024;
 
-			score = -__recurse(board, searched_depth, -alpha - 1, -alpha, -side, 0, true, ply+1);
+			score = -__recurse(board, sv, searched_depth, -alpha - 1, -alpha, -side, 0, true, ply+1);
 			if (score > alpha && searched_depth < newdepth) {
-				score = -__recurse(board, newdepth, -alpha - 1, -alpha, -side, 0, !cutnode, ply+1);
+				score = -__recurse(board, sv, newdepth, -alpha - 1, -alpha, -side, 0, !cutnode, ply+1);
 			}
 		} else if (!pv || i > 0) {
-			score = -__recurse(board, newdepth, -alpha - 1, -alpha, -side, 0, !cutnode, ply+1);
+			score = -__recurse(board, sv, newdepth, -alpha - 1, -alpha, -side, 0, !cutnode, ply+1);
 		}
 		if (pv && (i == 0 || score > alpha)) {
 			if (tentry && move == tentry->best_move && tentry->depth > 1)
 				newdepth = std::max((int)newdepth, 1); // Make sure we don't enter QS if we have an available TT move
-			score = -__recurse(board, newdepth, -beta, -alpha, -side, 1, false, ply+1);
+			score = -__recurse(board, sv, newdepth, -beta, -alpha, -side, 1, false, ply+1);
 		}
 
 		board.unmake_move();
 
-		line[ply].cont_hist = nullptr;
+		sv.line[ply].cont_hist = nullptr;
 
 		if (root) {
-			nodecnt[move.src()][move.dst()] += nodes - prev_nodes;
-			prev_nodes = nodes;
+			sv.nodecnt[move.src()][move.dst()] += sv.nodes - prev_nodes;
+			prev_nodes = sv.nodes;
 		}
 
 		if (score > best) {
@@ -551,10 +544,10 @@ Value __recurse(Board &board, int depth, Value alpha = -VALUE_INFINITE, Value be
 				alpha_raise++;
 				flag = EXACT;
 				if (score < beta) {
-					pvtable[ply][0] = move;
-					pvlen[ply] = pvlen[ply+1]+1;
-					for (int i = 0; i < pvlen[ply+1]; i++) {
-						pvtable[ply][i+1] = pvtable[ply+1][i];
+					sv.pvtable[ply][0] = move;
+					sv.pvlen[ply] = sv.pvlen[ply+1]+1;
+					for (int i = 0; i < sv.pvlen[ply+1]; i++) {
+						sv.pvtable[ply][i+1] = sv.pvtable[ply+1][i];
 					}
 				}
 			}
@@ -567,21 +560,21 @@ Value __recurse(Board &board, int depth, Value alpha = -VALUE_INFINITE, Value be
 				// note that best and score are functionally equivalent here; best is just what's returned + stored to TT
 				best = (score * depth + beta) / (depth + 1); // wtf?????
 			}
-			if (line[ply].killer[0] != move) {
-				line[ply].killer[1] = line[ply].killer[0];
-				line[ply].killer[0] = move; // Update killer moves
+			if (sv.line[ply].killer[0] != move) {
+				sv.line[ply].killer[1] = sv.line[ply].killer[0];
+				sv.line[ply].killer[0] = move; // Update killer moves
 			}
 			const Value bonus = std::min(1896, 4 * depth * depth + 120 * depth - 120); // saturate updates at depth 12
 			if (!capt) { // Not a capture
-				main_hist.update_history(board, move, ply, &line[ply], bonus);
+				sv.history.update_history(board, move, ply, &sv.line[ply], bonus);
 				for (auto &qmove : quiets) {
-					main_hist.update_history(board, qmove, ply, &line[ply], -bonus); // Penalize quiet moves
+					sv.history.update_history(board, qmove, ply, &sv.line[ply], -bonus); // Penalize quiet moves
 				}
 			} else { // Capture
-				main_hist.update_capthist(PieceType(board.mailbox[move.src()] & 7), PieceType(board.mailbox[move.dst()] & 7), move.dst(), bonus);
+				sv.history.update_capthist(PieceType(board.mailbox[move.src()] & 7), PieceType(board.mailbox[move.dst()] & 7), move.dst(), bonus);
 			}
 			for (auto &cmove : captures) {
-				main_hist.update_capthist(PieceType(board.mailbox[cmove.src()] & 7), PieceType(board.mailbox[cmove.dst()] & 7), cmove.dst(), -bonus);
+				sv.history.update_capthist(PieceType(board.mailbox[cmove.src()] & 7), PieceType(board.mailbox[cmove.dst()] & 7), cmove.dst(), -bonus);
 			}
 			break;
 		}
@@ -597,7 +590,7 @@ Value __recurse(Board &board, int depth, Value alpha = -VALUE_INFINITE, Value be
 	// Stalemate detection
 	if (best == -VALUE_MATE) {
 		// If our engine thinks we are mated but we are not in check, we are stalemated
-		if (line[ply].excl != NullMove) return alpha;
+		if (sv.line[ply].excl != NullMove) return alpha;
 		else if (in_check) return -VALUE_MATE + ply;
 		else return 0;
 	}
@@ -608,12 +601,12 @@ Value __recurse(Board &board, int depth, Value alpha = -VALUE_INFINITE, Value be
 		&& !(best < alpha && best >= raw_eval) && !(best >= beta && best <= raw_eval)) {
 		// Best move is a quiet move, update CorrHist
 		int bonus = (best - raw_eval) * depth / 8;
-		main_hist.update_corrhist(board, bonus);
+		sv.history.update_corrhist(board, bonus);
 	}
 
-	if (line[ply].excl == NullMove) {
+	if (sv.line[ply].excl == NullMove) {
 		Move tt_move = best_move != NullMove ? best_move : tentry ? tentry->best_move : NullMove;
-		board.ttable.store(board.zobrist, score_to_tt(best, ply), raw_eval, depth, flag, ttpv, tt_move, board.halfmove);
+		ttable.store(board.zobrist, score_to_tt(best, ply), raw_eval, depth, flag, ttpv, tt_move, board.halfmove);
 	}
 
 	return best;
@@ -621,103 +614,6 @@ Value __recurse(Board &board, int depth, Value alpha = -VALUE_INFINITE, Value be
 
 int g_quiet;
 
-pzstd::vector<std::pair<Move, Value>> __search_multipv(Board &board, int multipv, int depth, Value alpha = -VALUE_INFINITE, Value beta = VALUE_INFINITE, int side = 1) {
-	Move best_move[256];
-	Value best_score[256];
-
-	std::fill(best_move, best_move+256, NullMove);
-	std::fill(best_score, best_score+256, -VALUE_INFINITE);
-
-	auto min_score = [multipv](Value *best_score) {
-		Value min = best_score[0];
-		int idx = 0;
-		for (int i = 1; i < multipv; i++) {
-			if (best_score[i] < min) {
-				min = best_score[i];
-				idx = i;
-			}
-		}
-		return std::make_pair(min, idx);
-	};
-
-	TTable::TTEntry *tentry = board.ttable.probe(board.zobrist);
-
-	MovePicker mp(board, &line[0], 0, &main_hist, tentry);
-
-	Move move = NullMove;
-	int i = 0;
-
-	bool printing_currmove = false;
-	int alpha_raise = 0;
-
-	while ((move = mp.next()) != NullMove) {
-		if (depth >= 20 && nodes >= 10'000'000) {
-			if (!g_quiet) std::cout << "info depth " << depth << " currmove " << move.to_string() << " currmovenumber " << i+1 << std::endl;
-		}
-
-		auto res = min_score(best_score);
-
-		line[0].move = move;
-		board.make_move(move);
-		Value score;
-		Value used_alpha = res.first;
-		if (i > 0 && used_alpha != -VALUE_INFINITE) {
-			score = -__recurse(board, depth - reduction[i][depth] / 1024, -used_alpha - 1, -used_alpha, -side, 0);
-			if (score > used_alpha) {
-				score = -__recurse(board, depth - 1, -beta, -used_alpha, -side, 0);
-			}
-		} else {
-			score = -__recurse(board, depth - 1, -beta, -alpha, -side, 1);
-		}
-
-		board.unmake_move();
-
-		if (score > res.first) {
-			pvtable[0][0] = move;
-			pvlen[0] = pvlen[1]+1;
-			for (int i = 0; i < pvlen[1]; i++) {
-				pvtable[0][i+1] = pvtable[1][i];
-			}
-			if (score > alpha) {
-				alpha = score;
-				alpha_raise++;
-			}
-			best_score[res.second] = score;
-			best_move[res.second] = move;
-		}
-
-		if (score >= beta) {
-			if (line[0].killer[0] != move) {
-				line[0].killer[1] = line[0].killer[0];
-				line[0].killer[0] = move;
-			}
-			pzstd::vector<std::pair<Move, Value>> multipv_res;
-			multipv_res.push_back({move, score});
-			return multipv_res;
-		}
-
-		if (early_exit)
-			break;
-
-		i++;
-	}
-
-	Move final_best_move = NullMove;
-	Value final_best_score = -VALUE_INFINITE;
-	pzstd::vector<std::pair<Move, Value>> multipv_res;
-
-	for (int i = 0; i < multipv; i++) {
-		if (best_move[i] == NullMove) best_score[i] = -VALUE_INFINITE;
-		if (best_score[i] > final_best_score) {
-			final_best_score = best_score[i];
-			final_best_move = best_move[i];
-		}
-		multipv_res.push_back({best_move[i], best_score[i]});
-	}
-
-	return multipv_res;
-}
-
 void __print_pv(bool omit_last = 0) { // Need to omit last to prevent illegal moves during mates
 	const int ROOT_PLY = 0;
 	for (int i = 0; i < pvlen[ROOT_PLY] - omit_last; i++) {
@@ -735,13 +631,16 @@ void __print_pv_clipped(bool omit_last = 0) {
 	}
 }
 
+Value iterativedeepening(Board &board, SearchVars &sv, int quiet) {
+
+}
+
 std::pair<Move, Value> search(Board &board, int64_t time, int depth, int64_t maxnodes, int quiet) {
 	g_quiet = quiet;
 
 	uint64_t soft_nodes = 1e18;
 
 	std::cout << std::fixed << std::setprecision(0);
-	nodes = seldepth = 0;
 	early_exit = exit_allowed = false;
 	start = clock();
 	mxtime = time;
@@ -749,22 +648,22 @@ std::pair<Move, Value> search(Board &board, int64_t time, int depth, int64_t max
 		mx_nodes = 1000000;
 		soft_nodes = maxnodes;
 	}
-	
-	// Clear killer moves and history heuristic
-	for (int i = 0; i < MAX_PLY; i++) {
-		line[i].killer[0] = line[i].killer[1] = NullMove;
-		pvlen[i] = 0;
-	}
 
-	for (int i = 0; i < 64; i++) {
-		for (int j = 0; j < 64; j++) {
-			nodecnt[i][j] = 0;
-			main_hist.history[0][i][j] /= 2;
-			main_hist.history[1][i][j] /= 2;
+	SearchVars searchvars[MAX_THREADS] = {};
+	for (int i = 0; i < num_threads; i++) {
+		for (int j = 0; j < NINPUTS * 2; j++) {
+			for (int k = 0; k < NINPUTS * 2; k++) {
+				for (int l = 0; l < HL_SIZE; l++) {
+					searchvars[i].bs[j][k].w_acc.val[l] = nnue_network.accumulator_biases[l];
+					searchvars[i].bs[j][k].b_acc.val[l] = nnue_network.accumulator_biases[l];
+				}
+				memset(searchvars[i].bs[j][k].mailbox, 0, sizeof(searchvars[i].bs[j][k].mailbox));
+			}
 		}
 	}
+	searchvars[0].is_main = true;
 
-	Value static_eval = eval(board) * (board.side ? -1 : 1);
+	Value static_eval = eval(board, &searchvars[0].bs[0][0]) * (board.side ? -1 : 1);
 
 	Move best_move = NullMove;
 	Value eval = -VALUE_INFINITE;
@@ -784,26 +683,33 @@ std::pair<Move, Value> search(Board &board, int64_t time, int depth, int64_t max
 			alpha = eval - window_size;
 			beta = eval + window_size;
 		}
+
+		pzstd::vector<Value> results;
 		
-		auto result = __recurse(board, d, alpha, beta, board.side ? -1 : 1, 1, false, 0, true);
-		
-		// Gradually expand the window if we fail high or low
-		while ((result >= beta || result <= alpha) && window_size < VALUE_INFINITE / 4) {
-			if (result >= beta) {
-				// Fail high - expand upper bound
-				beta = eval + window_size * 2;
-				if (beta >= VALUE_INFINITE / 4) beta = VALUE_INFINITE;
-			}
-			if (result <= alpha) {
-				// Fail low - expand lower bound  
-				alpha = eval - window_size * 2;
-				if (alpha <= -VALUE_INFINITE / 4) alpha = -VALUE_INFINITE;
+		for (int t = 0; t < num_threads; t++) {
+			auto result = __recurse(board, searchvars[t], d, alpha, beta, board.side ? -1 : 1, 1, false, 0, true);
+			
+			// Gradually expand the window if we fail high or low
+			while ((result >= beta || result <= alpha) && window_size < VALUE_INFINITE / 4) {
+				if (result >= beta) {
+					// Fail high - expand upper bound
+					beta = eval + window_size * 2;
+					if (beta >= VALUE_INFINITE / 4) beta = VALUE_INFINITE;
+				}
+				if (result <= alpha) {
+					// Fail low - expand lower bound  
+					alpha = eval - window_size * 2;
+					if (alpha <= -VALUE_INFINITE / 4) alpha = -VALUE_INFINITE;
+				}
+				window_size *= 2;
+				result = __recurse(board, searchvars[t], d, alpha, beta, board.side ? -1 : 1, 1, false, 0, true);
+				if (early_exit) break;
 			}
-			window_size *= 2;
-			result = __recurse(board, d, alpha, beta, board.side ? -1 : 1, 1, false, 0, true);
 			if (early_exit) break;
+
+			results.push_back(result);
 		}
-		if (early_exit) break;
+
 		eval = result;
 		best_move = pvtable[0][0];
 
@@ -816,8 +722,6 @@ std::pair<Move, Value> search(Board &board, int64_t time, int depth, int64_t max
 			in_check = board.control(__tzcnt_u64(board.piece_boards[KING] & board.piece_boards[OCC(BLACK)]), WHITE) > 0;
 		}
 
-		seldepth = std::max(seldepth, d);
-
 		#ifndef NOUCI
 		if (!quiet) {
 			if (abs(eval) >= VALUE_MATE_MAX_PLY) {
@@ -843,7 +747,7 @@ std::pair<Move, Value> search(Board &board, int64_t time, int depth, int64_t max
 
 			uint64_t time_ms = (clock() - start) / CLOCKS_PER_MS;
 			uint64_t nps = time_ms > 0 ? (nodes * 1000 / time_ms) : 0;
-			uint32_t hashfull = get_ttable_sz(board) * 1000 / board.ttable.mxsize();
+			uint32_t hashfull = get_ttable_sz(board) * 1000 / ttable.mxsize();
 
 			std::string score_color;
 			std::string score_text;
@@ -917,80 +821,13 @@ std::pair<Move, Value> search(Board &board, int64_t time, int depth, int64_t max
 	return {best_move, eval};
 }
 
-pzstd::vector<std::pair<Move, Value>> search_multipv(Board &board, int multipv, int64_t time, int depth, int64_t maxnodes, int quiet) {
-	pzstd::vector<std::pair<Move, Value>> results;
-
-	g_quiet = quiet;
-
-	std::cout << std::fixed << std::setprecision(0);
-	nodes = seldepth = 0;
-	early_exit = exit_allowed = false;
-	start = clock();
-	mxtime = time;
-	mx_nodes = maxnodes;
-	
-	// Clear killer moves and history heuristic
-	for (int i = 0; i < MAX_PLY; i++) {
-		line[i].killer[0] = line[i].killer[1] = NullMove;
-		pvlen[i] = 0;
-	}
-
-	for (int i = 0; i < 64; i++) {
-		for (int j = 0; j < 64; j++) {
-			main_hist.history[0][i][j] = main_hist.history[1][i][j] = 0;
-		}
-	}
-
-	pzstd::vector<std::pair<Move, Value>> multipv_res;
-
-	for (int d = 1; d <= depth; d++) {
-		auto result = __search_multipv(board, multipv, d, -VALUE_INFINITE, VALUE_INFINITE, board.side ? -1 : 1);
-
-		if (early_exit)
-			break;
-
-		multipv_res = result;
-		
-		std::stable_sort(multipv_res.begin(), multipv_res.end(), [](const auto &a, const auto &b) {
-			return a.second > b.second;
-		});
-
-		if (!quiet) {
-			for (int i = 0; i < multipv; i++) {
-				Value eval = multipv_res[i].second;
-				if (eval == -VALUE_INFINITE || multipv_res[i].first == NullMove) break;
-
-				if (abs(eval) >= VALUE_MATE_MAX_PLY) {
-					std::cout << "info depth " << d << " seldepth " << seldepth << " multipv " << i+1 << " score mate " << (VALUE_MATE - abs(eval)) / 2 * (eval > 0 ? 1 : -1) << " nodes "
-					<< nodes << " nps " << (nodes / ((double)(clock() - start) / CLOCKS_PER_SEC)) << " pv " << multipv_res[i].first.to_string()
-					<< " hashfull " << (get_ttable_sz(board) * 1000 / board.ttable.mxsize()) << " time " << (clock() - start) / CLOCKS_PER_MS << std::endl;
-				} else {
-					std::cout << "info depth " << d << " seldepth " << seldepth << " multipv " << i+1 << " score cp " << eval << " nodes " << nodes << " nps "
-					<< (nodes / ((double)(clock() - start) / CLOCKS_PER_SEC)) << " pv " << multipv_res[i].first.to_string()
-					<< " hashfull " << (get_ttable_sz(board) * 1000 / board.ttable.mxsize()) << " time " << (clock() - start) / CLOCKS_PER_MS << std::endl;
-				}
-			}
-		}
-
-		exit_allowed = true;
-	}
-
-	return multipv_res;
-}
-
-void clear_search_vars() {
-	nodes = seldepth = 0;
+void clear_search_vars(SearchVars &sv) {
+	sv.nodes = sv.seldepth = 0;
 	early_exit = exit_allowed = false;
 	for (int i = 0; i < MAX_PLY; i++) {
-		pvlen[i] = 0;
-		line[i] = SSEntry();
+		sv.pvlen[i] = 0;
+		sv.line[i] = SSEntry();
 	}
 
-	memset(main_hist.history, 0, sizeof(main_hist.history));
-	memset(main_hist.corrhist_prev, 0, sizeof(main_hist.corrhist_prev));
-	memset(main_hist.capthist, 0, sizeof(main_hist.capthist));
-	memset(main_hist.corrhist_ps, 0, sizeof(main_hist.corrhist_ps));
-	memset(main_hist.corrhist_mat, 0, sizeof(main_hist.corrhist_mat));
-	memset(main_hist.corrhist_np, 0, sizeof(main_hist.corrhist_np));
-	memset(main_hist.cont_hist, 0, sizeof(main_hist.cont_hist));
+	memset(&sv.history, 0, sizeof(sv.history));
 }
diff --git a/engine/search.hpp b/engine/search.hpp
index 2e1d31b..29608ff 100644
--- a/engine/search.hpp
+++ b/engine/search.hpp
@@ -49,6 +49,20 @@
 #define HISTORY_MARGIN 2000
 
 extern uint64_t nodes;
+extern uint16_t num_threads;
+
+struct SearchVars {
+	Board board;
+	uint64_t nodes = 0;
+	uint64_t nodecnt[64][64] = {{}};
+	int seldepth = 0;
+	bool is_main = false;
+	History history;
+	SSEntry line[MAX_PLY] = {};
+	Move pvtable[MAX_PLY][MAX_PLY];
+	int pvlen[MAX_PLY];
+	BoardState bs[NINPUTS * 2][NINPUTS * 2];
+};
 
 std::pair<Move, Value> search(Board &board, int64_t time = 1e9, int depth = MAX_PLY, int64_t nodes = 1e18, int quiet = 0);
 
diff --git a/engine/ttable.cpp b/engine/ttable.cpp
index 789e4b3..2a53662 100644
--- a/engine/ttable.cpp
+++ b/engine/ttable.cpp
@@ -1,5 +1,7 @@
 #include "ttable.hpp"
 
+TTable ttable(DEFAULT_TT_SIZE);
+
 void TTable::store(uint64_t key, Value eval, Value s_eval, uint8_t depth, uint8_t bound, bool ttpv, Move best_move, uint8_t age) {
 	TTBucket *bucket = TT + (key % TT_SIZE);
 
diff --git a/engine/ttable.hpp b/engine/ttable.hpp
index 8f93e24..a0d004d 100644
--- a/engine/ttable.hpp
+++ b/engine/ttable.hpp
@@ -5,6 +5,8 @@
 
 #define DEFAULT_TT_SIZE (16 * 1024 * 1024 / sizeof(TTable::TTBucket)) // 16 MB
 
+extern TTable ttable;
+
 enum TTFlag {
 	EXACT = 0,
 	LOWER_BOUND = 1, // eval might be higher than stored value

From 0f55c1d5566f5a07a3e1f80818a2f0c7b9311980 Mon Sep 17 00:00:00 2001
From: Kevin Lu <kevie.lu8@gmail.com>
Date: Tue, 25 Nov 2025 00:12:07 -0800
Subject: [PATCH 02/13] Implement lazy SMP

Bench: 9610168
---
 engine/eval.cpp         | 222 +------------------
 engine/eval.hpp         | 100 +--------
 engine/includes.hpp     |   2 +
 engine/main.cpp         |  68 +++---
 engine/nnue/network.hpp |   4 +-
 engine/search.cpp       | 480 ++++++++++++++++++----------------------
 engine/search.hpp       |  32 ++-
 engine/ttable.hpp       |  10 +-
 8 files changed, 287 insertions(+), 631 deletions(-)

diff --git a/engine/eval.cpp b/engine/eval.cpp
index 4651faf..819b9ca 100644
--- a/engine/eval.cpp
+++ b/engine/eval.cpp
@@ -3,64 +3,8 @@
 // Accumulator w_acc, b_acc;
 Network nnue_network;
 
-#ifdef HCE
-extern Bitboard king_movetable[64];
-
-static constexpr Value king_safety_lookup[9] = {-10, 20, 40, 50, 50, 50, 50, 50, 50};
-static constexpr Value multipawn_lookup[7] = {0, 0, 20, 40, 80, 160, 320};
-
-/**
- * @brief Boards to denote "good" squares for each piece type
- * @details The 8 boards map out an 8-bit signed binary number that represents how good or bad a square is for a piece type.
- * @details 127 is the best square for a piece, -128 is the worst.
- */
-Bitboard PAWN_SQUARES[8];
-Bitboard KNIGHT_SQUARES[8];
-Bitboard BISHOP_SQUARES[8];
-Bitboard ROOK_SQUARES[8];
-Bitboard QUEEN_SQUARES[8];
-Bitboard KING_SQUARES[8];
-Bitboard KING_ENDGAME_SQUARES[8];
-Bitboard PAWN_ENDGAME_SQUARES[8];
-
-Bitboard PASSED_PAWN_MASKS[2][64];
-
-__attribute__((constructor)) constexpr void gen_lookups() {
-	// Convert heatmaps
-	for (int i = 0; i < 8; i++) {
-		for (int j = 0; j < 64; j++) {
-			PAWN_SQUARES[7 - i] |= (((Bitboard)pawn_heatmap[j] >> i) & 1) << j;
-			KNIGHT_SQUARES[7 - i] |= (((Bitboard)knight_heatmap[j] >> i) & 1) << j;
-			BISHOP_SQUARES[7 - i] |= (((Bitboard)bishop_heatmap[j] >> i) & 1) << j;
-			ROOK_SQUARES[7 - i] |= (((Bitboard)rook_heatmap[j] >> i) & 1) << j;
-			QUEEN_SQUARES[7 - i] |= (((Bitboard)queen_heatmap[j] >> i) & 1) << j;
-			KING_SQUARES[7 - i] |= (((Bitboard)king_heatmap[j] >> i) & 1) << j;
-			KING_ENDGAME_SQUARES[7 - i] |= (((Bitboard)endgame_heatmap[j] >> i) & 1) << j;
-			PAWN_ENDGAME_SQUARES[7 - i] |= (((Bitboard)pawn_endgame[j] >> i) & 1) << j;
-		}
-	}
-
-	for (int i = 8; i < 56; i++) {
-		Bitboard white_mask = 0x0101010101010101ULL << (i + 8);
-		Bitboard black_mask = 0x8080808080808080ULL >> (71 - i);
-		PASSED_PAWN_MASKS[WHITE][i] = white_mask | ((white_mask << 1) & 0x0101010101010101) | ((white_mask >> 1) & 0x8080808080808080);
-		PASSED_PAWN_MASKS[BLACK][i] = black_mask | ((black_mask << 1) & 0x0101010101010101) | ((black_mask >> 1) & 0x8080808080808080);
-	}
-}
-
-float multi(int x) {
-	// If there are fewer pieces on the board, we should raise the magnitude of the eval
-	// This allows for the engine to prioritize trading pieces when ahead, especially in the endgame
-	// The main caveat is that this may cause the engine to draw by insufficient material
-	int diff = std::min(32 - x, 20); // Number of pieces taken off the board
-	return 1.0 + 0.02 * diff;
-}
-#endif
-
 __attribute__((constructor)) void init_network() {
-#ifndef HCE
 	nnue_network.load();
-#endif
 }
 
 Value simple_eval(Board &board) {
@@ -72,164 +16,7 @@ Value simple_eval(Board &board) {
 	return score;
 }
 
-#ifdef HCE
-Value eval(Board &board) {
-	if (!(board.piece_boards[KING] & board.piece_boards[OCC(BLACK)])) {
-		// If black has no king, this is mate for white
-		return VALUE_MATE;
-	}
-	if (!(board.piece_boards[KING] & board.piece_boards[OCC(WHITE)])) {
-		// Likewise, if white has no king, this is mate for black
-		return -VALUE_MATE;
-	}
-
-	Value material = 0;
-	Value piecesquare = 0;
-	Value castling = 0;
-	Value bishop_pair = 0;
-	Value king_safety = 0;
-	Value tempo_bonus = 0;
-	Value pawn_structure = 0;
-
-	material += PawnValue * _mm_popcnt_u64(board.piece_boards[PAWN] & board.piece_boards[OCC(WHITE)]);
-	material += KnightValue * _mm_popcnt_u64(board.piece_boards[KNIGHT] & board.piece_boards[OCC(WHITE)]);
-	material += BishopValue * _mm_popcnt_u64(board.piece_boards[BISHOP] & board.piece_boards[OCC(WHITE)]);
-	material += RookValue * _mm_popcnt_u64(board.piece_boards[ROOK] & board.piece_boards[OCC(WHITE)]);
-	material += QueenValue * _mm_popcnt_u64(board.piece_boards[QUEEN] & board.piece_boards[OCC(WHITE)]);
-	material -= PawnValue * _mm_popcnt_u64(board.piece_boards[PAWN] & board.piece_boards[OCC(BLACK)]);
-	material -= KnightValue * _mm_popcnt_u64(board.piece_boards[KNIGHT] & board.piece_boards[OCC(BLACK)]);
-	material -= BishopValue * _mm_popcnt_u64(board.piece_boards[BISHOP] & board.piece_boards[OCC(BLACK)]);
-	material -= RookValue * _mm_popcnt_u64(board.piece_boards[ROOK] & board.piece_boards[OCC(BLACK)]);
-	material -= QueenValue * _mm_popcnt_u64(board.piece_boards[QUEEN] & board.piece_boards[OCC(BLACK)]);
-
-	// Decide between normal vs endgame king map
-	const Bitboard *funny = _mm_popcnt_u64(board.piece_boards[OCC(WHITE)] | board.piece_boards[OCC(BLACK)]) >= 10 ? KING_SQUARES : KING_ENDGAME_SQUARES;
-	const Bitboard *pawn = _mm_popcnt_u64(board.piece_boards[OCC(WHITE)] | board.piece_boards[OCC(BLACK)]) >= 10 ? PAWN_SQUARES : PAWN_ENDGAME_SQUARES;
-	// Initialize accumulators
-	int8_t pawn_acc, knight_acc, bishop_acc, rook_acc, queen_acc, king_acc;
-	int8_t pawn_acc_black, knight_acc_black, bishop_acc_black, rook_acc_black, queen_acc_black, king_acc_black;
-	pawn_acc = knight_acc = bishop_acc = rook_acc = queen_acc = king_acc = 0;
-	pawn_acc_black = knight_acc_black = bishop_acc_black = rook_acc_black = queen_acc_black = king_acc_black = 0;
-
-	// Precompute piece boards (flipping for black)
-	Bitboard boards[12];
-	for (int i = 0; i < 6; i++) {
-		boards[i] = board.piece_boards[i] & board.piece_boards[OCC(WHITE)];
-#ifndef WINDOWS
-		boards[i + 6] = __bswap_64(board.piece_boards[i] & board.piece_boards[OCC(BLACK)]);
-#else
-		boards[i + 6] = _byteswap_ulong(board.piece_boards[i] & board.piece_boards[OCC(BLACK)]);
-#endif
-	}
-
-	for (int i = 0; i < 8; i++) {
-		pawn_acc = pawn_acc * 2 + _mm_popcnt_u64(boards[0] & pawn[i]);
-		knight_acc = knight_acc * 2 + _mm_popcnt_u64(boards[1] & KNIGHT_SQUARES[i]);
-		bishop_acc = bishop_acc * 2 + _mm_popcnt_u64(boards[2] & BISHOP_SQUARES[i]);
-		rook_acc = rook_acc * 2 + _mm_popcnt_u64(boards[3] & ROOK_SQUARES[i]);
-		queen_acc = queen_acc * 2 + _mm_popcnt_u64(boards[4] & QUEEN_SQUARES[i]);
-		king_acc = king_acc * 2 + _mm_popcnt_u64(boards[5] & funny[i]);
-
-		pawn_acc_black = pawn_acc_black * 2 + _mm_popcnt_u64(boards[6] & pawn[i]);
-		knight_acc_black = knight_acc_black * 2 + _mm_popcnt_u64(boards[7] & KNIGHT_SQUARES[i]);
-		bishop_acc_black = bishop_acc_black * 2 + _mm_popcnt_u64(boards[8] & BISHOP_SQUARES[i]);
-		rook_acc_black = rook_acc_black * 2 + _mm_popcnt_u64(boards[9] & ROOK_SQUARES[i]);
-		queen_acc_black = queen_acc_black * 2 + _mm_popcnt_u64(boards[10] & QUEEN_SQUARES[i]);
-		king_acc_black = king_acc_black * 2 + _mm_popcnt_u64(boards[11] & funny[i]);
-	}
-	piecesquare += pawn_acc + knight_acc + bishop_acc + rook_acc + queen_acc + king_acc;
-	piecesquare -= pawn_acc_black + knight_acc_black + bishop_acc_black + rook_acc_black + queen_acc_black + king_acc_black;
-
-	castling += (board.castling & WHITE_OO) ? 5 : 0;
-	castling += (board.castling & WHITE_OOO) ? 5 : 0;
-	castling -= (board.castling & BLACK_OO) ? 5 : 0;
-	castling -= (board.castling & BLACK_OOO) ? 5 : 0;
-
-	bishop_pair += _mm_popcnt_u64(board.piece_boards[BISHOP] & board.piece_boards[OCC(WHITE)]) >= 2 ? 30 : 0;
-	bishop_pair -= _mm_popcnt_u64(board.piece_boards[BISHOP] & board.piece_boards[OCC(BLACK)]) >= 2 ? 30 : 0;
-
-	// For king safety, check for opponent control on squares around the king
-	// As well as counting our own pieces in front of the king
-
-	king_safety += king_safety_lookup[_mm_popcnt_u64(
-		king_movetable[__tzcnt_u64(board.piece_boards[KING] & board.piece_boards[OCC(WHITE)])] & board.piece_boards[OCC(WHITE)]
-	)];
-	king_safety -= king_safety_lookup[_mm_popcnt_u64(
-		king_movetable[__tzcnt_u64(board.piece_boards[KING] & board.piece_boards[OCC(BLACK)])] & board.piece_boards[OCC(BLACK)]
-	)];
-	Bitboard white_king = (board.piece_boards[KING] & board.piece_boards[OCC(WHITE)]);
-	Bitboard black_king = (board.piece_boards[KING] & board.piece_boards[OCC(BLACK)]);
-	if (white_king & (square_bits(SQ_G1) | square_bits(SQ_H1))) {
-		std::pair<int, int> control = board.control(SQ_F2);
-		king_safety -= std::max(0, control.second - control.first) * 10;
-		control = board.control(SQ_G2);
-		king_safety -= std::max(0, control.second - control.first) * 15;
-		control = board.control(SQ_H2);
-		king_safety -= std::max(0, control.second - control.first) * 15;
-	} else if (white_king & (square_bits(SQ_B1) | square_bits(SQ_C1))) {
-		std::pair<int, int> control = board.control(SQ_A2);
-		king_safety -= std::max(0, control.second - control.first) * 12;
-		control = board.control(SQ_B2);
-		king_safety -= std::max(0, control.second - control.first) * 15;
-		control = board.control(SQ_C2);
-		king_safety -= std::max(0, control.second - control.first) * 15;
-	}
-
-	if (black_king & (square_bits(SQ_G8) | square_bits(SQ_H8))) {
-		std::pair<int, int> control = board.control(SQ_F7);
-		king_safety += std::max(0, control.first - control.second) * 10;
-		control = board.control(SQ_G7);
-		king_safety += std::max(0, control.first - control.second) * 15;
-		control = board.control(SQ_H7);
-		king_safety += std::max(0, control.first - control.second) * 15;
-	} else if (black_king & (square_bits(SQ_B8) | square_bits(SQ_C8))) {
-		std::pair<int, int> control = board.control(SQ_A7);
-		king_safety += std::max(0, control.first - control.second) * 12;
-		control = board.control(SQ_B7);
-		king_safety += std::max(0, control.first - control.second) * 15;
-		control = board.control(SQ_C7);
-		king_safety += std::max(0, control.first - control.second) * 15;
-	}
-
-	tempo_bonus += board.side == WHITE ? 10 : -10;
-
-	for (Bitboard mask = 0x0101010101010101; mask & 0xff; mask <<= 1) {
-		// Doubled pawns
-		pawn_structure -= multipawn_lookup[_mm_popcnt_u64(board.piece_boards[PAWN] & board.piece_boards[OCC(WHITE)] & mask)];
-		pawn_structure += multipawn_lookup[_mm_popcnt_u64(board.piece_boards[PAWN] & board.piece_boards[OCC(BLACK)] & mask)];
-
-		// Isolated pawns
-		if (mask & 0b01111110) {
-			if ((board.piece_boards[PAWN] & board.piece_boards[OCC(WHITE)] & ((mask << 1) | (mask >> 1))) == 0)
-				pawn_structure -= _mm_popcnt_u64(board.piece_boards[PAWN] & board.piece_boards[OCC(WHITE)] & mask) ? 60 : 0;
-			if ((board.piece_boards[PAWN] & board.piece_boards[OCC(BLACK)] & ((mask << 1) | (mask >> 1))) == 0)
-				pawn_structure += _mm_popcnt_u64(board.piece_boards[PAWN] & board.piece_boards[OCC(BLACK)] & mask) ? 60 : 0;
-		}
-	}
-
-	Bitboard pawns = board.piece_boards[PAWN] & board.piece_boards[OCC(WHITE)];
-	while (pawns) {
-		int sq = _tzcnt_u64(pawns);
-		pawn_structure += (board.piece_boards[PAWN] & PASSED_PAWN_MASKS[WHITE][sq]) == 0 ? 80 : 0;
-		pawns = _blsr_u64(pawns);
-	}
-	pawns = board.piece_boards[PAWN] & board.piece_boards[OCC(BLACK)];
-	while (pawns) {
-		int sq = _tzcnt_u64(pawns);
-		pawn_structure -= (board.piece_boards[PAWN] & PASSED_PAWN_MASKS[BLACK][sq]) == 0 ? 80 : 0;
-		pawns = _blsr_u64(pawns);
-	}
-
-	int npieces = _mm_popcnt_u64(board.piece_boards[OCC(WHITE)] | board.piece_boards[OCC(BLACK)]);
-
-	return ((int)material * 3 + (int)piecesquare + (int)castling + (int)bishop_pair + (int)king_safety * 2 + (int)tempo_bonus + (int)pawn_structure) *
-		   multi(npieces);
-}
-
-std::array<Value, 8> debug_eval(Board &board) {
-	return {eval(board), 0, 0, 0, 0, 0, 0, 0};
-}
-#else
+#ifndef NO_UE
 Value eval(Board &board, BoardState *bs) {
 	if (!(board.piece_boards[KING] & board.piece_boards[OCC(BLACK)])) {
 		// If black has no king, this is mate for white
@@ -288,6 +75,12 @@ Value eval(Board &board, BoardState *bs) {
 	}
 	return score;
 }
+#else
+Value eval(Board &board, BoardState *bs) {
+	auto res = debug_eval(board);
+	return res[( _mm_popcnt_u64(board.piece_boards[OCC(WHITE)] | board.piece_boards[OCC(BLACK)]) - 2) / 4];
+}
+#endif
 
 std::array<Value, 8> debug_eval(Board &board) {
 	if (!(board.piece_boards[KING] & board.piece_boards[OCC(BLACK)])) {
@@ -343,4 +136,3 @@ std::array<Value, 8> debug_eval(Board &board) {
 
 	return score;
 }
-#endif
diff --git a/engine/eval.hpp b/engine/eval.hpp
index 0c0b425..6e44b28 100644
--- a/engine/eval.hpp
+++ b/engine/eval.hpp
@@ -9,107 +9,10 @@ extern Network nnue_network;
 
 Value simple_eval(Board &board);
 
-Value eval(Board &board, BoardState *sv);
+Value eval(Board &board, BoardState *bs);
 
 std::array<Value, 8> debug_eval(Board &board);
 
-#ifdef HCE
-constexpr int pawn_heatmap[64] = {
-	//  a  b  c  d  e  f  g  h
-	0,	0,	0,	 0,	  0,   0,	0,	0, // 1
-	5,	10, 10,	 -40, -40, 10,	10, 5, // 2
-	5,	-5, -10, 0,	  0,   -10, -5, 5, // 3
-	0,	0,	0,	 30,  30,  0,	0,	0, // 4
-	5,	5,	10,	 40,  40,  10,	5,	5, // 5
-	10, 10, 50,	 60,  60,  50,	10, 10, // 6
-	80, 80, 80,	 80,  80,  80,	80, 80, // 7
-	0,	0,	0,	 0,	  0,   0,	0,	0, // 8
-};
-
-constexpr int knight_heatmap[64] = {
-	//  a  b  c  d  e  f  g  h
-	-50, -40, -30, -30, -30, -30, -40, -50, // 1
-	-40, -20, 0,   5,	5,	 0,	  -20, -40, // 2
-	-30, 5,	  10,  15,	15,	 10,  5,   -30, // 3
-	-30, 0,	  15,  20,	20,	 15,  0,   -30, // 4
-	-30, 5,	  15,  20,	20,	 15,  5,   -30, // 5
-	-30, 0,	  10,  15,	15,	 10,  0,   -30, // 6
-	-40, -20, 0,   0,	0,	 0,	  -20, -40, // 7
-	-50, -40, -30, -30, -30, -30, -40, -50, // 8
-};
-
-constexpr int bishop_heatmap[64] = {
-	//  a  b  c  d  e  f  g  h
-	-20, -10, -10, -10, -10, -10, -10, -20, // 1
-	-10, 5,	  0,   0,	0,	 0,	  5,   -10, // 2
-	-10, 10,  10,  10,	10,	 10,  10,  -10, // 3
-	-10, 0,	  10,  10,	10,	 10,  0,   -10, // 4
-	-10, 5,	  5,   10,	10,	 5,	  5,   -10, // 5
-	-10, 0,	  5,   10,	10,	 5,	  0,   -10, // 6
-	-30, 0,	  0,   0,	0,	 0,	  0,   -30, // 7
-	-20, -10, -10, -10, -10, -10, -10, -20, // 8
-};
-
-constexpr int rook_heatmap[64] = {
-	//  a  b  c  d  e  f  g  h
-	-10, 0, 0, 10, 10, 5, 0, -10, // 1
-	-5,	 0, 0, 0,  0,  0, 0, -5, // 2
-	-5,	 0, 0, 0,  0,  0, 0, -5, // 3
-	-5,	 0, 0, 0,  0,  0, 0, -5, // 4
-	-5,	 0, 0, 0,  0,  0, 0, -5, // 5
-	-5,	 0, 0, 0,  0,  0, 0, -5, // 6
-	-10, 0, 0, 0,  0,  0, 0, -10, // 7
-	0,	 0, 0, 0,  0,  0, 0, 0, // 8
-};
-
-constexpr int queen_heatmap[64] = {
-	//  a  b  c  d  e  f  g  h
-	-20, -10, -10, -5, -5, -10, -10, -20, // 1
-	-10, 0,	  5,   0,  0,  0,	0,	 -10, // 2
-	-10, 5,	  5,   5,  5,  5,	0,	 -10, // 3
-	-5,	 0,	  5,   5,  5,  5,	0,	 -5, // 4
-	0,	 0,	  5,   5,  5,  5,	0,	 -5, // 5
-	-10, 0,	  5,   5,  5,  5,	0,	 -10, // 6
-	-10, 0,	  0,   0,  0,  0,	0,	 -10, // 7
-	-20, -10, -10, -5, -5, -10, -10, -20, // 8
-};
-
-constexpr int king_heatmap[64] = {
-	//  a  b  c  d  e  f  g  h
-	30,	 50,  40,  0,	0,	 10,  50,  30, // 1
-	20,	 20,  -5,  -5,	-5,	 -5,  20,  20, // 2
-	-10, -20, -20, -20, -20, -20, -20, -10, // 3
-	-20, -30, -30, -40, -40, -30, -30, -20, // 4
-	-30, -40, -40, -50, -50, -40, -40, -30, // 5
-	-30, -40, -40, -50, -50, -40, -40, -30, // 6
-	-30, -40, -40, -50, -50, -40, -40, -30, // 7
-	-30, -40, -40, -50, -50, -40, -40, -30, // 8
-};
-
-constexpr int endgame_heatmap[64] = {
-	//  a  b  c  d  e  f  g  h
-	1, 2,  4,  8,  8,  4,  2,  1, // 1
-	2, 4,  8,  16, 16, 8,  4,  2, // 2
-	4, 8,  16, 32, 32, 16, 8,  4, // 3
-	8, 16, 32, 64, 64, 32, 16, 8, // 4
-	8, 16, 32, 64, 64, 32, 16, 8, // 5
-	4, 8,  16, 32, 32, 16, 8,  4, // 6
-	2, 4,  8,  16, 16, 8,  4,  2, // 7
-	1, 2,  4,  8,  8,  4,  2,  1, // 8
-};
-
-constexpr int pawn_endgame[64] = {
-	//  a  b  c  d  e  f  g  h
-	0,	 0,	  0,   0,	0,	 0,	  0,   0, // 1
-	-10, -10, -10, -10, -10, -10, -10, -10, // 2
-	5,	 5,	  5,   5,	5,	 5,	  5,   5, // 3
-	10,	 10,  10,  10,	10,	 10,  10,  10, // 4
-	20,	 20,  20,  20,	20,	 20,  20,  20, // 5
-	60,	 60,  60,  60,	60,	 60,  60,  60, // 6
-	100, 100, 100, 100, 100, 100, 100, 100, // 7
-	0,	 0,	  0,   0,	0,	 0,	  0,   0, // 8
-};
-#else
 constexpr int IBUCKET_LAYOUT[] = {
 	0, 0, 2, 2, 3, 3, 1, 1,
 	0, 0, 2, 2, 3, 3, 1, 1,
@@ -120,4 +23,3 @@ constexpr int IBUCKET_LAYOUT[] = {
 	6, 6, 6, 6, 7, 7, 7, 7,
 	6, 6, 6, 6, 7, 7, 7, 7,
 };
-#endif
\ No newline at end of file
diff --git a/engine/includes.hpp b/engine/includes.hpp
index 4dc14f0..bbb6e6f 100644
--- a/engine/includes.hpp
+++ b/engine/includes.hpp
@@ -2,6 +2,7 @@
 
 #include <algorithm>
 #include <array>
+#include <atomic>
 #include <cmath>
 #include <cstdint>
 #include <cstring>
@@ -11,6 +12,7 @@
 #include <iostream>
 #include <stack>
 #include <string>
+#include <thread>
 #include <utility>
 
 #include "pzstl/vector.hpp"
diff --git a/engine/main.cpp b/engine/main.cpp
index 93cfea4..0ac9a9f 100644
--- a/engine/main.cpp
+++ b/engine/main.cpp
@@ -9,24 +9,25 @@
 #include "movegen.hpp"
 #include "movetimings.hpp"
 #include "search.hpp"
+#include "ttable.hpp"
 
 BoardState bs[NINPUTS * 2][NINPUTS * 2];
 
 // Options
 int TT_SIZE = DEFAULT_TT_SIZE;
 bool quiet = false, online = false;
-int multipv = 1;
+
+ThreadInfo tis[MAX_THREADS];
 
 void run_uci() {
 	std::string command;
-	Board board = Board(TT_SIZE);
+	Board board = Board();
 	while (getline(std::cin, command)) {
 		if (command == "uci") {
 			std::cout << "id name PZChessBot " << VERSION << std::endl;
 			std::cout << "id author kevlu8 and wdotmathree" << std::endl;
 			std::cout << "option name Hash type spin default 16 min 1 max 1024" << std::endl;
-			std::cout << "option name Threads type spin default 1 min 1 max 1" << std::endl; // Not implemented yet
-			std::cout << "option name MultiPV type spin default 1 min 1 max 256" << std::endl;
+			std::cout << "option name Threads type spin default 1 min 1 max 64" << std::endl;
 			std::cout << "option name Quiet type check default false" << std::endl;
 			std::cout << "uciok" << std::endl;
 		} else if (command == "icu") {
@@ -53,12 +54,15 @@ void run_uci() {
 				TT_SIZE = optionint * 1024 * 1024 / sizeof(TTable::TTBucket);
 			} else if (optionname == "Quiet") {
 				quiet = optionvalue == "true";
-			} else if (optionname == "MultiPV") {
-				multipv = std::stoi(optionvalue);
+			} else if (optionname == "Threads") {
+				num_threads = std::stoi(optionvalue);
 			}
 		} else if (command == "ucinewgame") {
-			board = Board(TT_SIZE);
-			clear_search_vars();
+			board = Board();
+			ttable.resize(TT_SIZE);
+			for (int i = 0; i < num_threads; i++) {
+				clear_search_vars(tis[i]);
+			}
 		} else if (command.substr(0, 8) == "position") {
 			// either `position startpos` or `position fen ...`
 			if (command.find("startpos") != std::string::npos) {
@@ -133,25 +137,18 @@ void run_uci() {
 			int timeleft = board.side ? btime : wtime;
 			int inc = board.side ? binc : winc;
 			std::pair<Move, Value> res;
-			if (multipv != 1) {
-				if (inf) res = search_multipv(board, multipv, 1e9, MAX_PLY, 1e18, quiet)[0];
-				else if (depth != -1) res = search_multipv(board, multipv, 1e9, depth, 1e18, quiet)[0];
-				else if (nodes != -1) res = search_multipv(board, multipv, 1e9, MAX_PLY, nodes, quiet)[0];
-				else if (movetime != -1) res = search_multipv(board, multipv, movetime, MAX_PLY, 1e18, quiet)[0];
-				else res = search_multipv(board, multipv, 1e9, MAX_PLY, 1e18, quiet)[0];
-			} else {
-				if (inf) res = search(board, 1e9, MAX_PLY, 1e18, quiet);
-				else if (depth != -1) res = search(board, 1e9, depth, 1e18, quiet);
-				else if (nodes != -1) res = search(board, 1e9, MAX_PLY, nodes, quiet);
-				else if (movetime != -1) res = search(board, movetime, MAX_PLY, 1e18, quiet);
-				else res = search(board, timemgmt(timeleft, inc, online), MAX_PLY, 1e18, quiet);
-			}
+			if (inf) res = search(board, tis, 1e9, MAX_PLY, 1e18, quiet);
+			else if (depth != -1) res = search(board, tis, 1e9, depth, 1e18, quiet);
+			else if (nodes != -1) res = search(board, tis, 1e9, MAX_PLY, nodes, quiet);
+			else if (movetime != -1) res = search(board, tis, movetime, MAX_PLY, 1e18, quiet);
+			else res = search(board, tis, timemgmt(timeleft, inc, online), MAX_PLY, 1e18, quiet);
 			std::cout << "bestmove " << res.first.to_string() << std::endl;
 		}
 	}
 }
 
 __attribute__((weak)) int main(int argc, char *argv[]) {
+	for (int i = 0; i < MAX_THREADS; i++) tis[i].set_bs();
 	if (argc == 2 && std::string(argv[1]) == "bench") {
 		const std::string bench_positions[] = {
 			"r3k2r/2pb1ppp/2pp1q2/p7/1nP1B3/1P2P3/P2N1PPP/R2QK2R w KQkq - 0 14",
@@ -205,17 +202,17 @@ __attribute__((weak)) int main(int argc, char *argv[]) {
             "3br1k1/p1pn3p/1p3n2/5pNq/2P1p3/1PN3PP/P2Q1PB1/4R1K1 w - - 0 23",
             "2r2b2/5p2/5k2/p1r1pP2/P2pB3/1P3P2/K1P3R1/7R w - - 23 93",
 		};
-		Board board = Board(TT_SIZE);
+		Board board = Board();
 		uint64_t tot_nodes = 0;
 		uint64_t start = clock();
 		for (const auto &fen : bench_positions) {
 			board.reset(fen);
-			clear_search_vars();
-			search(board, 1e9, 12, 1e18, 0);
-			tot_nodes += nodes;
+			clear_search_vars(tis[0]);
+			search(board, tis, 1e9, 12, 1e18, 0);
+			tot_nodes += nodes[0];
 		}
 		uint64_t end = clock();
-		std::cout << tot_nodes << " nodes " << (tot_nodes / ((double)(end - start) / CLOCKS_PER_SEC)) << " nps" << std::endl;
+		std::cout << tot_nodes << " nodes " << int(tot_nodes / ((double)(end - start) / CLOCKS_PER_SEC)) << " nps" << std::endl;
 		return 0;
 	}
 	if (argc == 3 && std::string(argv[2]) == "quit") {
@@ -241,7 +238,7 @@ __attribute__((weak)) int main(int argc, char *argv[]) {
 				ss >> nmoves;
 			}
 		}
-		Board board = Board(TT_SIZE);
+		Board board = Board();
 		std::mt19937_64 rng(s);
 		std::ifstream bookfile(book == "None" ? "" : book);
 		std::vector<std::string> fens;
@@ -281,10 +278,11 @@ __attribute__((weak)) int main(int argc, char *argv[]) {
 			if (!restart) {
 				if (_mm_popcnt_u64(board.piece_boards[KING]) != 2) restart = true;
 				else if (filter_weird) {
-					auto s_eval = eval(board);
+					int npieces = _mm_popcnt_u64(board.piece_boards[OCC(WHITE)] | board.piece_boards[OCC(BLACK)]);
+					auto s_eval = debug_eval(board)[(npieces - 2) / 4] * (board.side == WHITE ? 1 : -1);
 					if (abs(s_eval) >= 600) restart = true; // do a fast static eval to quickly filter out crazy positions
 					else {
-						auto res = search(board, 1e9, MAX_PLY, 10000, 1);
+						auto res = search(board, tis, 1e9, MAX_PLY, 10000, 1);
 						if (abs(res.second) >= 400) restart = true;
 					}
 				}
@@ -300,15 +298,14 @@ __attribute__((weak)) int main(int argc, char *argv[]) {
 	online = argc >= 2 && std::string(argv[1]) == "--online=1";
 	std::cout << "PZChessBot " << VERSION << " developed by kevlu8 and wdotmathree" << std::endl;
 	std::string command;
-	Board board = Board(TT_SIZE);
+	Board board = Board();
 	std::thread searchthread;
 	while (getline(std::cin, command)) {
 		if (command == "uci") {
 			std::cout << "id name PZChessBot " << VERSION << std::endl;
 			std::cout << "id author kevlu8 and wdotmathree" << std::endl;
 			std::cout << "option name Hash type spin default 16 min 1 max 1024" << std::endl;
-			std::cout << "option name Threads type spin default 1 min 1 max 1" << std::endl; // Not implemented yet
-			std::cout << "option name MultiPV type spin default 1 min 1 max 256" << std::endl;
+			std::cout << "option name Threads type spin default 1 min 1 max 64" << std::endl;
 			std::cout << "option name Quiet type check default false" << std::endl;
 			std::cout << "uciok" << std::endl;
 			run_uci();
@@ -390,7 +387,7 @@ __attribute__((weak)) int main(int argc, char *argv[]) {
 				}
 			} else if (command.substr(0, 2) == "go") {
 				int ms = std::stoi(command.substr(3));
-				auto res = search(board, ms, MAX_PLY, 1e18, 2); // Use quiet level 2 for pretty output
+				auto res = search(board, tis, ms, MAX_PLY, 1e18, 2); // Use quiet level 2 for pretty output
 				std::cout << CYAN "Best move: " RESET BOLD << res.first.to_string() << RESET
 						  << CYAN " with score: " RESET << (res.second * (board.side == BLACK ? -1 : 1) > 0 ? GREEN : RED)
 						  << std::showpos << res.second * (board.side == BLACK ? -1 : 1) << " cp" << RESET << std::endl << std::noshowpos;
@@ -398,10 +395,11 @@ __attribute__((weak)) int main(int argc, char *argv[]) {
 				board.unmake_move();
 				board.print_board_pretty();
 			} else if (command == "reset") {
-				board = Board(TT_SIZE);
+				board = Board();
+				ttable.resize(TT_SIZE);
 				std::cout << "Done" << std::endl;
 			} else if (command.substr(0, 3) == "fen") {
-				board = Board(TT_SIZE);
+				board = Board();
 				std::string fen = command.substr(4);
 				board.reset(fen);
 				std::cout << "Done" << std::endl;
diff --git a/engine/nnue/network.hpp b/engine/nnue/network.hpp
index 3127188..b57cdbc 100644
--- a/engine/nnue/network.hpp
+++ b/engine/nnue/network.hpp
@@ -29,4 +29,6 @@ void accumulator_add(const Network &net, Accumulator &acc, uint16_t index);
 
 void accumulator_sub(const Network &net, Accumulator &acc, uint16_t index);
 
-int32_t nnue_eval(const Network &net, const Accumulator &stm, const Accumulator &ntm, uint8_t nbucket);
\ No newline at end of file
+int32_t nnue_eval(const Network &net, const Accumulator &stm, const Accumulator &ntm, uint8_t nbucket);
+
+extern Network nnue_network;
diff --git a/engine/search.cpp b/engine/search.cpp
index 6f9ae15..d5853cd 100644
--- a/engine/search.cpp
+++ b/engine/search.cpp
@@ -4,15 +4,15 @@
 #define MOVENUM(x) ((((#x)[1] - '1') << 12) | (((#x)[0] - 'a') << 8) | (((#x)[3] - '1') << 4) | ((#x)[2] - 'a'))
 
 uint64_t mx_nodes = 1e18; // Maximum nodes to search
-uint64_t mxtime = 1000; // Maximum time to search in milliseconds
-bool early_exit = false, exit_allowed = false; // Whether or not to exit the search, and if we are allowed to exit (so we don't exit on the depth 1)
-clock_t start = 0;
-
-Move pvtable[MAX_PLY][MAX_PLY];
-int pvlen[MAX_PLY];
+bool stop_search = false;
+std::chrono::steady_clock::time_point start;
+uint64_t mxtime = 1e18; // Maximum time to search in milliseconds
 
 uint16_t num_threads = 1;
 
+std::atomic<int> nodecnt[64][64] = {{}};
+uint64_t nodes[MAX_THREADS] = {};
+
 uint64_t perft(Board &board, int depth) {
 	// If white's turn is beginning and black is in check
 	if (board.side == WHITE && board.control(__tzcnt_u64(board.piece_boards[KING] & board.piece_boards[7]), WHITE))
@@ -105,8 +105,7 @@ Value tt_to_score(Value score, int ply) {
 	}
 }
 
-double get_ttable_sz(Board &board) {
-	TTable &ttable = ttable;
+double get_ttable_sz() {
 	int cnt = 0;
 	for (int i = 0; i < 1024; i++) {
 		if (i >= ttable.TT_SIZE) break;
@@ -127,25 +126,23 @@ double get_ttable_sz(Board &board) {
  * - Search for checks and check evasions (every time I've tried this it has lost tons of elo)
  * - Late move reduction (instead of reducing depth, we reduce the search window) (not a known technique, maybe worth trying?)
  */
-Value quiesce(Board &board, SearchVars &sv, Value alpha, Value beta, int side, int depth, bool pv=false) {
-	sv.nodes++;
+Value quiesce(ThreadInfo &ti, Value alpha, Value beta, int side, int depth, bool pv=false) {
+	nodes[ti.id]++;
 
-	if (early_exit) return 0;
+	if (stop_search) return 0;
 
-	if (!(sv.nodes & 4095)) {
-		// Check for early exit
-		// We check every 4096 nodes to avoid slowing down the search too much
-		uint64_t time = (clock() - start) / CLOCKS_PER_MS;
-		if ((time > mxtime || sv.nodes > mx_nodes) && exit_allowed) {
-			early_exit = true;
+	if (ti.is_main && !(nodes[ti.id] & 4095)) {
+		auto time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count();
+		if (time > mxtime || nodes[ti.id] > mx_nodes) { // currently, the nodes will be broken but time will be accurate
+			stop_search = true;
 			return 0;
 		}
 	}
 
 	if (depth >= MAX_PLY)
-		return eval(board, (BoardState *)sv.bs) * side; // Just in case
+		return eval(ti.board, (BoardState *)ti.bs) * side; // Just in case
 
-	TTable::TTEntry *tentry = ttable.probe(board.zobrist);
+	TTable::TTEntry *tentry = ttable.probe(ti.board.zobrist);
 	Value tteval = 0;
 	if (tentry && tentry->valid()) tteval = tt_to_score(tentry->eval, depth);
 	if (!pv && tentry && tentry->valid()) {
@@ -160,13 +157,13 @@ Value quiesce(Board &board, SearchVars &sv, Value alpha, Value beta, int side, i
 
 	Value stand_pat = 0;
 	Value raw_eval = 0;
-	stand_pat = tentry ? tentry->s_eval : eval(board, (BoardState *)sv.bs) * side;
+	stand_pat = tentry ? tentry->s_eval : eval(ti.board, (BoardState *)ti.bs) * side;
 	raw_eval = stand_pat;
-	sv.history.apply_correction(board, stand_pat);
+	ti.thread_hist.apply_correction(ti.board, stand_pat);
 	if (tentry && tentry->valid() && abs(tteval) < VALUE_MATE_MAX_PLY && tentry->bound() != (tteval > stand_pat ? UPPER_BOUND : LOWER_BOUND))
 		stand_pat = tteval;
 
-	if (!tentry) ttable.store(board.zobrist, -VALUE_INFINITE, raw_eval, 0, NONE, false, NullMove, depth);
+	if (!tentry) ttable.store(ti.board.zobrist, -VALUE_INFINITE, raw_eval, 0, NONE, false, NullMove, depth);
 
 	// If it's a mate, stop here since there's no point in searching further
 	// Theoretically shouldn't ever happen because of stand pat
@@ -180,16 +177,16 @@ Value quiesce(Board &board, SearchVars &sv, Value alpha, Value beta, int side, i
 		alpha = stand_pat;
 
 	pzstd::vector<Move> moves;
-	board.captures(moves);
+	ti.board.captures(moves);
 	if (moves.empty())
 		return stand_pat;
 
 	// Sort captures and promotions
 	pzstd::vector<std::pair<Move, int>> scores;
 	for (Move &move : moves) {
-		if (board.piece_boards[OPPOCC(board.side)] & square_bits(move.dst())) {
+		if (ti.board.piece_boards[OPPOCC(ti.board.side)] & square_bits(move.dst())) {
 			int score = 0;
-			score = MVV_LVA[board.mailbox[move.dst()] & 7][board.mailbox[move.src()] & 7];
+			score = MVV_LVA[ti.board.mailbox[move.dst()] & 7][ti.board.mailbox[move.src()] & 7];
 			scores.push_back({move, score});
 		} else if (move.type() == PROMOTION) {
 			scores.push_back({move, PieceValue[move.promotion() + KNIGHT] - PawnValue});
@@ -206,7 +203,7 @@ Value quiesce(Board &board, SearchVars &sv, Value alpha, Value beta, int side, i
 
 	while ((move = next_move(scores, end)) != NullMove) {
 		if (move.type() != PROMOTION) {
-			Value see = board.see_capture(move);
+			Value see = ti.board.see_capture(move);
 			if (see < 0) {
 				continue; // Don't search moves that lose material
 			} else {
@@ -216,14 +213,14 @@ Value quiesce(Board &board, SearchVars &sv, Value alpha, Value beta, int side, i
 			}
 		}
 
-		sv.line[depth].move = move;
+		ti.line[depth].move = move;
 
-		board.make_move(move);
-		_mm_prefetch(&ttable.TT[board.zobrist % ttable.TT_SIZE], _MM_HINT_T0);
-		Value score = -quiesce(board, sv, -beta, -alpha, -side, depth + 1, pv);
-		board.unmake_move();
+		ti.board.make_move(move);
+		_mm_prefetch(&ttable.TT[ti.board.zobrist % ttable.TT_SIZE], _MM_HINT_T0);
+		Value score = -quiesce(ti, -beta, -alpha, -side, depth + 1, pv);
+		ti.board.unmake_move();
 
-		sv.line[depth].move = NullMove;
+		ti.line[depth].move = NullMove;
 
 		if (score > best) {
 			if (score > alpha) {
@@ -234,32 +231,32 @@ Value quiesce(Board &board, SearchVars &sv, Value alpha, Value beta, int side, i
 			best_move = move;
 		}
 		if (score >= beta) {
-			ttable.store(board.zobrist, score_to_tt(score, depth), raw_eval, 0, LOWER_BOUND, pv, move, depth);
+			ttable.store(ti.board.zobrist, score_to_tt(score, depth), raw_eval, 0, LOWER_BOUND, pv, move, depth);
 			return best;
 		}
 	}
 
-	ttable.store(board.zobrist, score_to_tt(best, depth), raw_eval, 0, alpha_raise ? EXACT : UPPER_BOUND, pv, best_move, depth);
+	ttable.store(ti.board.zobrist, score_to_tt(best, depth), raw_eval, 0, alpha_raise ? EXACT : UPPER_BOUND, pv, best_move, depth);
 
 	return best;
 }
 
-Value __recurse(Board &board, SearchVars &sv, int depth, Value alpha = -VALUE_INFINITE, Value beta = VALUE_INFINITE, int side = 1, bool pv = false, bool cutnode = false, int ply = 0, bool root = false) {
-	if (pv) sv.pvlen[ply] = 0;
+Value __recurse(ThreadInfo &ti, int depth, Value alpha = -VALUE_INFINITE, Value beta = VALUE_INFINITE, int side = 1, bool pv = false, bool cutnode = false, int ply = 0, bool root = false) {
+	if (pv) ti.pvlen[ply] = 0;
+
+	Board &board = ti.board;
 
 	if (ply >= MAX_PLY)
-		return eval(board, (BoardState *)sv.bs) * side;
+		return eval(board, (BoardState *)ti.bs) * side;
 
-	sv.nodes++;
+	nodes[ti.id]++;
 
-	if (early_exit) return 0;
+	if (stop_search) return 0;
 
-	if (!(nodes & 4095)) {
-		// Check for early exit
-		// We check every 4096 nodes to avoid slowing down the search too much
-		uint64_t time = (clock() - start) / CLOCKS_PER_MS;
-		if ((time > mxtime || nodes > mx_nodes) && exit_allowed) {
-			early_exit = true;
+	if (ti.is_main && !(nodes[ti.id] & 4095)) {
+		auto time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count();
+		if (time > mxtime || nodes[ti.id] > mx_nodes) { // currently, the nodes will be broken but time will be accurate
+			stop_search = true;
 			return 0;
 		}
 	}
@@ -307,7 +304,7 @@ Value __recurse(Board &board, SearchVars &sv, int depth, Value alpha = -VALUE_IN
 
 	if (depth <= 0) {
 		// Reached the maximum depth, perform quiescence search
-		return quiesce(board, sv, alpha, beta, side, ply, pv);
+		return quiesce(ti, alpha, beta, side, ply, pv);
 	}
 
 	bool ttpv = pv;
@@ -316,7 +313,7 @@ Value __recurse(Board &board, SearchVars &sv, int depth, Value alpha = -VALUE_IN
 	TTable::TTEntry *tentry = ttable.probe(board.zobrist);
 	Value tteval = 0;
 	if (tentry && tentry->valid()) tteval = tt_to_score(tentry->eval, ply);
-	if (!pv && tentry && tentry->depth >= depth && sv.line[ply].excl == NullMove) {
+	if (!pv && tentry && tentry->depth >= depth && ti.line[ply].excl == NullMove) {
 		// Check for cutoffs
 		if (tentry->bound() == EXACT) {
 			return tteval;
@@ -335,19 +332,19 @@ Value __recurse(Board &board, SearchVars &sv, int depth, Value alpha = -VALUE_IN
 	uint64_t pawn_hash = 0;
 	if (!in_check) {
 		pawn_hash = board.pawn_struct_hash();
-		cur_eval = tentry ? tentry->s_eval : eval(board, (BoardState *)sv.bs) * side;
+		cur_eval = tentry ? tentry->s_eval : eval(board, (BoardState *)ti.bs) * side;
 		raw_eval = cur_eval;
-		sv.history.apply_correction(board, cur_eval);
+		ti.thread_hist.apply_correction(board, cur_eval);
 		tt_corr_eval = cur_eval;
 		if (tentry && tentry->valid() && abs(tteval) < VALUE_MATE_MAX_PLY && tentry->bound() != (tteval > cur_eval ? UPPER_BOUND : LOWER_BOUND))
 			tt_corr_eval = tteval;
 		else if (!tentry) ttable.store(board.zobrist, -VALUE_INFINITE, raw_eval, 0, NONE, false, NullMove, board.halfmove);
 	}
 
-	sv.line[ply].eval = in_check ? VALUE_NONE : cur_eval; // If in check, we don't have a valid eval yet
+	ti.line[ply].eval = in_check ? VALUE_NONE : cur_eval; // If in check, we don't have a valid eval yet
 
 	bool improving = false;
-	if (!in_check && ply >= 2 && sv.line[ply-2].eval != VALUE_NONE && cur_eval > sv.line[ply-2].eval) improving = true;
+	if (!in_check && ply >= 2 && ti.line[ply-2].eval != VALUE_NONE && cur_eval > ti.line[ply-2].eval) improving = true;
 
 	// Reverse futility pruning
 	if (!in_check && !ttpv && depth <= 8) {
@@ -365,7 +362,7 @@ Value __recurse(Board &board, SearchVars &sv, int depth, Value alpha = -VALUE_IN
 	// Null-move pruning
 	int npieces = _mm_popcnt_u64(board.piece_boards[OCC(WHITE)] | board.piece_boards[OCC(BLACK)]);
 	int npawns_and_kings = _mm_popcnt_u64(board.piece_boards[PAWN] | board.piece_boards[KING]);
-	if (!in_check && npieces != npawns_and_kings && tt_corr_eval >= beta && depth >= 2 && sv.line[ply].excl == NullMove) { // Avoid NMP in pawn endgames
+	if (!in_check && npieces != npawns_and_kings && tt_corr_eval >= beta && depth >= 2 && ti.line[ply].excl == NullMove) { // Avoid NMP in pawn endgames
 		/**
 		 * This works off the *null-move observation*.
 		 * 
@@ -380,7 +377,7 @@ Value __recurse(Board &board, SearchVars &sv, int depth, Value alpha = -VALUE_IN
 		board.make_move(NullMove);
 		// Perform a reduced-depth search
 		Value r = NMP_R_VALUE + depth / 4 + std::min(3, (tt_corr_eval - beta) / 400) + improving;
-		Value null_score = -__recurse(board, sv, depth - r, -beta, -beta + 1, -side, 0, !cutnode, ply+1);
+		Value null_score = -__recurse(ti, depth - r, -beta, -beta + 1, -side, 0, !cutnode, ply+1);
 		board.unmake_move();
 		if (null_score >= beta)
 			return null_score >= VALUE_MATE_MAX_PLY ? beta : null_score;
@@ -392,14 +389,14 @@ Value __recurse(Board &board, SearchVars &sv, int depth, Value alpha = -VALUE_IN
 		 * If we are losing by a lot, check w/ qsearch to see if we could possibly improve.
 		 * If not, we can prune the search.
 		 */
-		Value razor_score = quiesce(board, sv, alpha, beta, side, ply, 0);
+		Value razor_score = quiesce(ti, alpha, beta, side, ply, 0);
 		if (razor_score <= alpha)
 			return razor_score;
 	}
 
 	Value best = -VALUE_INFINITE;
 
-	MovePicker mp(board, &sv.line[ply], ply, &sv.history, tentry);
+	MovePicker mp(board, &ti.line[ply], ply, &ti.thread_hist, tentry);
 
 	if ((pv || cutnode) && depth > 4 && !(tentry && tentry->best_move != NullMove)) {
 		depth -= 2; // Internal iterative reductions
@@ -415,10 +412,10 @@ Value __recurse(Board &board, SearchVars &sv, int depth, Value alpha = -VALUE_IN
 	Move move = NullMove;
 	int i = 0;
 
-	uint64_t prev_nodes = sv.nodes;
+	uint64_t prev_nodes = nodes[ti.id];
 
 	while ((move = mp.next()) != NullMove) {
-		if (move == sv.line[ply].excl)
+		if (move == ti.line[ply].excl)
 			continue;
 		
 		bool capt = (board.piece_boards[OPPOCC(board.side)] & square_bits(move.dst()));
@@ -426,12 +423,12 @@ Value __recurse(Board &board, SearchVars &sv, int depth, Value alpha = -VALUE_IN
 		
 		int extension = 0;
 
-		if (sv.line[ply].excl == NullMove && depth >= 8 && i == 0 && tentry && move == tentry->best_move && tentry->depth >= depth - 3 && tentry->bound() != UPPER_BOUND) {
+		if (ti.line[ply].excl == NullMove && depth >= 8 && i == 0 && tentry && move == tentry->best_move && tentry->depth >= depth - 3 && tentry->bound() != UPPER_BOUND) {
 			// Singular extension
-			sv.line[ply].excl = move;
+			ti.line[ply].excl = move;
 			Value singular_beta = tteval - 4 * depth;
-			Value singular_score = __recurse(board, sv, (depth-1) / 2, singular_beta - 1, singular_beta, side, 0, cutnode, ply);
-			sv.line[ply].excl = NullMove; // Reset exclusion move
+			Value singular_score = __recurse(ti, (depth-1) / 2, singular_beta - 1, singular_beta, side, 0, cutnode, ply);
+			ti.line[ply].excl = NullMove; // Reset exclusion move
 
 			if (singular_score < singular_beta) {
 				extension++;
@@ -446,9 +443,9 @@ Value __recurse(Board &board, SearchVars &sv, int depth, Value alpha = -VALUE_IN
 			}
 		}
 
-		sv.line[ply].move = move;
+		ti.line[ply].move = move;
 
-		Value hist = capt ? sv.history.get_capthist(board, move) : sv.history.get_history(board, move, ply, &sv.line[ply]);
+		Value hist = capt ? ti.thread_hist.get_capthist(board, move) : ti.thread_hist.get_history(board, move, ply, &ti.line[ply]);
 		if (best > -VALUE_MATE_MAX_PLY) {
 			if (i >= (5 + depth * depth) / (2 - improving)) {
 				/**
@@ -482,7 +479,7 @@ Value __recurse(Board &board, SearchVars &sv, int depth, Value alpha = -VALUE_IN
 			}
 		}
 
-		sv.line[ply].cont_hist = &sv.history.cont_hist[board.side][board.mailbox[move.src()] & 7][move.dst()];
+		ti.line[ply].cont_hist = &ti.thread_hist.cont_hist[board.side][board.mailbox[move.src()] & 7][move.dst()];
 
 		board.make_move(move);
 
@@ -508,35 +505,38 @@ Value __recurse(Board &board, SearchVars &sv, int depth, Value alpha = -VALUE_IN
 
 			r -= 1024 * pv;
 			r += 1024 * (!pv && cutnode);
-			if (move == sv.line[ply].killer[0] || move == sv.line[ply].killer[1])
+			if (move == ti.line[ply].killer[0] || move == ti.line[ply].killer[1])
 				r -= 1024;
 			r -= 1024 * ttpv;
 			r -= hist / 16 * !capt;
 
 			Value searched_depth = depth - r / 1024;
 
-			score = -__recurse(board, sv, searched_depth, -alpha - 1, -alpha, -side, 0, true, ply+1);
+			score = -__recurse(ti, searched_depth, -alpha - 1, -alpha, -side, 0, true, ply+1);
 			if (score > alpha && searched_depth < newdepth) {
-				score = -__recurse(board, sv, newdepth, -alpha - 1, -alpha, -side, 0, !cutnode, ply+1);
+				score = -__recurse(ti, newdepth, -alpha - 1, -alpha, -side, 0, !cutnode, ply+1);
 			}
 		} else if (!pv || i > 0) {
-			score = -__recurse(board, sv, newdepth, -alpha - 1, -alpha, -side, 0, !cutnode, ply+1);
+			score = -__recurse(ti, newdepth, -alpha - 1, -alpha, -side, 0, !cutnode, ply+1);
 		}
 		if (pv && (i == 0 || score > alpha)) {
 			if (tentry && move == tentry->best_move && tentry->depth > 1)
 				newdepth = std::max((int)newdepth, 1); // Make sure we don't enter QS if we have an available TT move
-			score = -__recurse(board, sv, newdepth, -beta, -alpha, -side, 1, false, ply+1);
+			score = -__recurse(ti, newdepth, -beta, -alpha, -side, 1, false, ply+1);
 		}
 
 		board.unmake_move();
 
-		sv.line[ply].cont_hist = nullptr;
+		ti.line[ply].cont_hist = nullptr;
 
 		if (root) {
-			sv.nodecnt[move.src()][move.dst()] += sv.nodes - prev_nodes;
-			prev_nodes = sv.nodes;
+			nodecnt[move.src()][move.dst()] += nodes[ti.id] - prev_nodes;
+			prev_nodes = nodes[ti.id];
 		}
 
+		if (stop_search)
+			break;
+
 		if (score > best) {
 			if (score > alpha) {
 				best_move = move;
@@ -544,10 +544,10 @@ Value __recurse(Board &board, SearchVars &sv, int depth, Value alpha = -VALUE_IN
 				alpha_raise++;
 				flag = EXACT;
 				if (score < beta) {
-					sv.pvtable[ply][0] = move;
-					sv.pvlen[ply] = sv.pvlen[ply+1]+1;
-					for (int i = 0; i < sv.pvlen[ply+1]; i++) {
-						sv.pvtable[ply][i+1] = sv.pvtable[ply+1][i];
+					ti.pvtable[ply][0] = move;
+					ti.pvlen[ply] = ti.pvlen[ply+1]+1;
+					for (int j = 0; j < ti.pvlen[ply+1]; j++) {
+						ti.pvtable[ply][j+1] = ti.pvtable[ply+1][j];
 					}
 				}
 			}
@@ -560,28 +560,25 @@ Value __recurse(Board &board, SearchVars &sv, int depth, Value alpha = -VALUE_IN
 				// note that best and score are functionally equivalent here; best is just what's returned + stored to TT
 				best = (score * depth + beta) / (depth + 1); // wtf?????
 			}
-			if (sv.line[ply].killer[0] != move) {
-				sv.line[ply].killer[1] = sv.line[ply].killer[0];
-				sv.line[ply].killer[0] = move; // Update killer moves
+			if (ti.line[ply].killer[0] != move) {
+				ti.line[ply].killer[1] = ti.line[ply].killer[0];
+				ti.line[ply].killer[0] = move; // Update killer moves
 			}
 			const Value bonus = std::min(1896, 4 * depth * depth + 120 * depth - 120); // saturate updates at depth 12
 			if (!capt) { // Not a capture
-				sv.history.update_history(board, move, ply, &sv.line[ply], bonus);
+				ti.thread_hist.update_history(board, move, ply, &ti.line[ply], bonus);
 				for (auto &qmove : quiets) {
-					sv.history.update_history(board, qmove, ply, &sv.line[ply], -bonus); // Penalize quiet moves
+					ti.thread_hist.update_history(board, qmove, ply, &ti.line[ply], -bonus); // Penalize quiet moves
 				}
 			} else { // Capture
-				sv.history.update_capthist(PieceType(board.mailbox[move.src()] & 7), PieceType(board.mailbox[move.dst()] & 7), move.dst(), bonus);
+				ti.thread_hist.update_capthist(PieceType(board.mailbox[move.src()] & 7), PieceType(board.mailbox[move.dst()] & 7), move.dst(), bonus);
 			}
 			for (auto &cmove : captures) {
-				sv.history.update_capthist(PieceType(board.mailbox[cmove.src()] & 7), PieceType(board.mailbox[cmove.dst()] & 7), cmove.dst(), -bonus);
+				ti.thread_hist.update_capthist(PieceType(board.mailbox[cmove.src()] & 7), PieceType(board.mailbox[cmove.dst()] & 7), cmove.dst(), -bonus);
 			}
 			break;
 		}
 
-		if (early_exit)
-			break;
-
 		if (!capt && !promo) quiets.push_back(move);
 		else if (capt) captures.push_back(move);
 		i++;
@@ -590,7 +587,7 @@ Value __recurse(Board &board, SearchVars &sv, int depth, Value alpha = -VALUE_IN
 	// Stalemate detection
 	if (best == -VALUE_MATE) {
 		// If our engine thinks we are mated but we are not in check, we are stalemated
-		if (sv.line[ply].excl != NullMove) return alpha;
+		if (ti.line[ply].excl != NullMove) return alpha;
 		else if (in_check) return -VALUE_MATE + ply;
 		else return 0;
 	}
@@ -601,10 +598,10 @@ Value __recurse(Board &board, SearchVars &sv, int depth, Value alpha = -VALUE_IN
 		&& !(best < alpha && best >= raw_eval) && !(best >= beta && best <= raw_eval)) {
 		// Best move is a quiet move, update CorrHist
 		int bonus = (best - raw_eval) * depth / 8;
-		sv.history.update_corrhist(board, bonus);
+		ti.thread_hist.update_corrhist(board, bonus);
 	}
 
-	if (sv.line[ply].excl == NullMove) {
+	if (ti.line[ply].excl == NullMove) {
 		Move tt_move = best_move != NullMove ? best_move : tentry ? tentry->best_move : NullMove;
 		ttable.store(board.zobrist, score_to_tt(best, ply), raw_eval, depth, flag, ttpv, tt_move, board.halfmove);
 	}
@@ -612,67 +609,25 @@ Value __recurse(Board &board, SearchVars &sv, int depth, Value alpha = -VALUE_IN
 	return best;
 }
 
-int g_quiet;
-
-void __print_pv(bool omit_last = 0) { // Need to omit last to prevent illegal moves during mates
-	const int ROOT_PLY = 0;
-	for (int i = 0; i < pvlen[ROOT_PLY] - omit_last; i++) {
-		if (pvtable[ROOT_PLY][i] == NullMove) break;
-		std::cout << pvtable[ROOT_PLY][i].to_string() << ' ';
-	}
-}
-
-void __print_pv_clipped(bool omit_last = 0) {
-	const int MAX_PLY = 10;
-	int len = std::min(pvlen[0] - omit_last, MAX_PLY);
-	for (int i = 0; i < len; i++) {
-		if (pvtable[0][i] == NullMove) break;
-		std::cout << pvtable[0][i].to_string() << ' ';
-	}
-}
-
-Value iterativedeepening(Board &board, SearchVars &sv, int quiet) {
-
-}
-
-std::pair<Move, Value> search(Board &board, int64_t time, int depth, int64_t maxnodes, int quiet) {
-	g_quiet = quiet;
-
-	uint64_t soft_nodes = 1e18;
-
-	std::cout << std::fixed << std::setprecision(0);
-	early_exit = exit_allowed = false;
-	start = clock();
-	mxtime = time;
-	if (maxnodes != 1e18) {
-		mx_nodes = 1000000;
-		soft_nodes = maxnodes;
-	}
-
-	SearchVars searchvars[MAX_THREADS] = {};
-	for (int i = 0; i < num_threads; i++) {
-		for (int j = 0; j < NINPUTS * 2; j++) {
-			for (int k = 0; k < NINPUTS * 2; k++) {
-				for (int l = 0; l < HL_SIZE; l++) {
-					searchvars[i].bs[j][k].w_acc.val[l] = nnue_network.accumulator_biases[l];
-					searchvars[i].bs[j][k].b_acc.val[l] = nnue_network.accumulator_biases[l];
-				}
-				memset(searchvars[i].bs[j][k].mailbox, 0, sizeof(searchvars[i].bs[j][k].mailbox));
-			}
+void iterativedeepening(ThreadInfo &ti, int depth) {
+	for (int i = 0; i < 64; i++) {
+		for (int j = 0; j < 64; j++) {
+			ti.thread_hist.history[0][i][j] /= 2;
+			ti.thread_hist.history[1][i][j] /= 2;
 		}
 	}
-	searchvars[0].is_main = true;
 
-	Value static_eval = eval(board, &searchvars[0].bs[0][0]) * (board.side ? -1 : 1);
+	Board &board = ti.board;
+
+	Value static_eval = eval(board, (BoardState *)ti.bs) * (board.side ? -1 : 1);
 
 	Move best_move = NullMove;
 	Value eval = -VALUE_INFINITE;
-	bool aspiration_enabled = true;
 	for (int d = 1; d <= depth; d++) {
 		Value alpha = -VALUE_INFINITE, beta = VALUE_INFINITE;
-		Value window_size = ASPIRATION_WINDOW;
-		
-		if (eval != -VALUE_INFINITE && aspiration_enabled) {
+		Value window_sz = ASPIRATION_WINDOW;
+
+		if (eval != -VALUE_INFINITE) {
 			/**
 			 * Aspiration windows work by searching a small window around the expected value
 			 * of the position. By having a smaller window, our search runs faster. 
@@ -680,154 +635,141 @@ std::pair<Move, Value> search(Board &board, int64_t time, int depth, int64_t max
 			 * If we fail either high or low out of this window, we gradually expand the
 			 * window size, eventually getting to a full-width search.
 			 */
-			alpha = eval - window_size;
-			beta = eval + window_size;
+			alpha = eval - window_sz;
+			beta = eval + window_sz;
 		}
 
-		pzstd::vector<Value> results;
-		
-		for (int t = 0; t < num_threads; t++) {
-			auto result = __recurse(board, searchvars[t], d, alpha, beta, board.side ? -1 : 1, 1, false, 0, true);
-			
-			// Gradually expand the window if we fail high or low
-			while ((result >= beta || result <= alpha) && window_size < VALUE_INFINITE / 4) {
-				if (result >= beta) {
-					// Fail high - expand upper bound
-					beta = eval + window_size * 2;
-					if (beta >= VALUE_INFINITE / 4) beta = VALUE_INFINITE;
-				}
-				if (result <= alpha) {
-					// Fail low - expand lower bound  
-					alpha = eval - window_size * 2;
-					if (alpha <= -VALUE_INFINITE / 4) alpha = -VALUE_INFINITE;
-				}
-				window_size *= 2;
-				result = __recurse(board, searchvars[t], d, alpha, beta, board.side ? -1 : 1, 1, false, 0, true);
-				if (early_exit) break;
-			}
-			if (early_exit) break;
+		auto result = __recurse(ti, d, alpha, beta, board.side ? -1 : 1, 1, false, 0, true);
 
-			results.push_back(result);
+		// Gradually expand the window if we fail high or low
+		while ((result >= beta || result <= alpha) && window_sz < VALUE_INFINITE / 4) {
+			if (result >= beta) {
+				// Fail high - expand upper bound
+				beta = eval + window_sz * 2;
+				if (beta >= VALUE_INFINITE / 4) beta = VALUE_INFINITE;
+			}
+			if (result <= alpha) {
+				// Fail low - expand lower bound  
+				alpha = eval - window_sz * 2;
+				if (alpha <= -VALUE_INFINITE / 4) alpha = -VALUE_INFINITE;
+			}
+			window_sz *= 2;
+			result = __recurse(ti, d, alpha, beta, board.side ? -1 : 1, 1, false, 0, true);
+			if (stop_search) break;
 		}
-
+		if (stop_search) break;
 		eval = result;
-		best_move = pvtable[0][0];
-
-		bool best_iscapt = (board.piece_boards[OPPOCC(board.side)] & square_bits(best_move.dst()));
-		bool best_ispromo = (best_move.type() == PROMOTION);
-		bool in_check = false;
-		if (board.side == WHITE) {
-			in_check = board.control(__tzcnt_u64(board.piece_boards[KING] & board.piece_boards[OCC(WHITE)]), BLACK) > 0;
-		} else {
-			in_check = board.control(__tzcnt_u64(board.piece_boards[KING] & board.piece_boards[OCC(BLACK)]), WHITE) > 0;
-		}
-
-		#ifndef NOUCI
-		if (!quiet) {
-			if (abs(eval) >= VALUE_MATE_MAX_PLY) {
-				std::cout << "info depth " << d << " seldepth " << seldepth << " score mate " << (VALUE_MATE - abs(eval) + 1) / 2 * (eval > 0 ? 1 : -1) << " nodes "
-				<< nodes << " nps " << (nodes / ((double)(clock() - start) / CLOCKS_PER_SEC)) << " pv ";
-				__print_pv(1);
-				std::cout << "hashfull " << (get_ttable_sz(board) * 1000) << " time " << (clock() - start) / CLOCKS_PER_MS << std::endl;
-			} else {
-				std::cout << "info depth " << d << " seldepth " << seldepth << " score cp " << eval << " nodes " << nodes << " nps "
-				<< (nodes / ((double)(clock() - start) / CLOCKS_PER_SEC)) << " pv ";
-				__print_pv();
-				std::cout << "hashfull " << (get_ttable_sz(board) * 1000) << " time " << (clock() - start) / CLOCKS_PER_MS << std::endl;
+		best_move = ti.pvtable[0][0];
+		
+		if (ti.is_main) {
+			uint64_t tot_nodes = 0;
+			for (int t = 0; t < num_threads; t++) {
+				tot_nodes += nodes[t]; // ig this is dangerous but whatever
 			}
-		} else if (quiet == 2) { // quiet level: formatted output
-			auto format_number = [](uint64_t num) -> std::string {
-				std::string str = std::to_string(num);
-				int len = str.length();
-				for (int i = len - 3; i > 0; i -= 3) {
-					str.insert(i, ",");
-				}
-				return str;
-			}; // actually cooked
-
-			uint64_t time_ms = (clock() - start) / CLOCKS_PER_MS;
-			uint64_t nps = time_ms > 0 ? (nodes * 1000 / time_ms) : 0;
-			uint32_t hashfull = get_ttable_sz(board) * 1000 / ttable.mxsize();
 
-			std::string score_color;
-			std::string score_text;
-
-			if (abs(eval) >= VALUE_MATE_MAX_PLY) {
-				int mate_moves = (VALUE_MATE - abs(eval) + 1) / 2 * (eval > 0 ? 1 : -1);
-				score_color = (mate_moves > 0) ? GREEN : RED;
-				score_text = "mate " + std::to_string(mate_moves);
+			// UCI output from main thread only
+			auto time_elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count();
+			std::cout << "info depth " << d << " score cp " << eval << " time " << time_elapsed << " nodes " << tot_nodes << " nps "
+					  << (time_elapsed ? (tot_nodes * 1000 / time_elapsed) : tot_nodes) << " hashfull " << (int)(get_ttable_sz() * 100) << " pv";
+			for (int ply = 0; ply < ti.pvlen[0]; ply++) {
+				std::cout << " " << ti.pvtable[0][ply].to_string();
+			}
+			std::cout << std::endl;
+
+			// only do time management on main thread
+			bool best_iscapt = board.is_capture(best_move);
+			bool best_ispromo = (best_move.type() == PROMOTION);
+			bool in_check = false;
+			if (board.side == WHITE) {
+				in_check = board.control(__tzcnt_u64(board.piece_boards[KING] & board.piece_boards[OCC(WHITE)]), BLACK) > 0;
 			} else {
-				int cp_score = eval;
-				if (cp_score > 200) score_color = GREEN;
-				else if (cp_score > 0) score_color = YELLOW;
-				else if (cp_score > -200) score_color = MAGENTA;
-				else score_color = RED;
-				score_text = std::to_string(cp_score * (board.side ? -1 : 1)) + " cp";
+				in_check = board.control(__tzcnt_u64(board.piece_boards[KING] & board.piece_boards[OCC(BLACK)]), WHITE) > 0;
 			}
 
-			if (d > 1)
-				std::cout << "\033[21A\033[J"; // Move cursor up 9 lines and clear
+			double soft = 0.5;
+			if (depth >= 6 && !best_iscapt && !best_ispromo && !in_check) {
+				// adjust soft limit based on complexity
+				Value complexity = abs(eval - static_eval);
+				double factor = std::clamp(complexity / 200.0, 0.0, 1.0);
+				// higher complexity = spend more time, lower complexity = spend less time
+				soft = 0.3 + 0.4 * factor;
+			}
 
-			int moves = 0;
-			for (int i = 0; i < pvlen[0]; i++) {
-				if (pvtable[0][i] == NullMove) break;
-				board.make_move(pvtable[0][i]);
-				moves++;
+			uint64_t bm_nodes = nodecnt[best_move.src()][best_move.dst()];
+			double node_adjustment = 1.5 - (bm_nodes / (double)tot_nodes);
+			soft *= node_adjustment;
+			if (time_elapsed > mxtime * soft) {
+				// We probably won't be able to complete the next ID loop
+				stop_search = true;
+				break;
 			}
-			board.print_board_pretty();
-			while (moves--) board.unmake_move();
-
-			std::cout << CYAN "┌─────────── " BOLD "Depth " << d << RESET CYAN " ───────────┐" RESET << std::endl;
-			std::cout << CYAN "│ " YELLOW "Depth:    " RESET BOLD << d << RESET CYAN " (" << seldepth << " sel)" RESET << std::endl;
-			std::cout << CYAN "│ " YELLOW "Score:    " RESET << score_color << BOLD << score_text << RESET << std::endl;
-			std::cout << CYAN "│ " YELLOW "Nodes:    " RESET << BOLD << format_number(nodes) << RESET << std::endl;
-			std::cout << CYAN "│ " YELLOW "Speed:    " RESET << BOLD << format_number(nps) << RESET " nps" << std::endl;
-			std::cout << CYAN "│ " YELLOW "Time:     " RESET << BOLD << time_ms << RESET " ms" << std::endl;
-			std::cout << CYAN "│ " YELLOW "Hash:     " RESET << BOLD << hashfull / 10.0 << RESET "%" << std::endl;
-			std::cout << CYAN "│ " YELLOW "Short PV: " RESET << BLUE;
-			__print_pv_clipped(abs(eval) >= VALUE_MATE_MAX_PLY);
-			std::cout << RESET << std::endl;
-			std::cout << CYAN "└────────────────────────────────┘" RESET << std::endl;
 		}
-		#endif
-		
-		exit_allowed = true;
+	}
 
-		if (nodes >= soft_nodes) break; // soft node limit
-		
-		// if (abs(eval) >= VALUE_MATE_MAX_PLY) {
-		// 	return {best_move, eval};
-		// 	// We don't need to search further, we found mate
-		// }
-
-		int time_elapsed = (clock() - start) / CLOCKS_PER_MS;
-		double soft = 0.5;
-		if (depth >= 6 && !best_iscapt && !best_ispromo && !in_check) {
-			// adjust soft limit based on complexity
-			Value complexity = abs(eval - static_eval);
-			double factor = std::clamp(complexity / 200.0, 0.0, 1.0);
-			// higher complexity = spend more time, lower complexity = spend less time
-			soft = 0.3 + 0.4 * factor;
+	ti.eval = eval;
+	stop_search = true;
+}
+
+std::pair<Move, Value> search(Board &board, ThreadInfo *threads, int64_t time, int depth, int64_t maxnodes, int quiet) {
+	memset(nodecnt, 0, sizeof(nodecnt));
+
+	mxtime = time;
+	mx_nodes = maxnodes;
+	start = std::chrono::steady_clock::now();
+	stop_search = false;
+
+	Move best_move = NullMove;
+	Value eval = 0;
+
+	std::vector<std::thread> thread_handles;
+
+	for (int t = 0; t < num_threads; t++) {
+		ThreadInfo &ti = threads[t];
+		std::copy(&board, &board + 1, &ti.board);
+		nodes[t] = 0;
+		ti.id = t;
+		ti.is_main = (t == 0);
+		// don't clear search vars here; keep history
+		thread_handles.emplace_back(iterativedeepening, std::ref(ti), depth);
+	}
+
+	while (!stop_search) {
+		std::this_thread::sleep_for(std::chrono::milliseconds(5));
+	}
+
+	for (int t = 0; t < num_threads; t++) {
+		thread_handles[t].join();
+	}
+
+	// obtain best move through thread voting
+	int votes[64][64] = {};
+	for (int t = 0; t < num_threads; t++) {
+		ThreadInfo &ti = threads[t];
+		Move tbest = ti.pvtable[0][0];
+		if (tbest != NullMove) {
+			votes[tbest.src()][tbest.dst()]++;
 		}
-		uint64_t bm_nodes = nodecnt[best_move.src()][best_move.dst()];
-		double node_adjustment = 1.5 - (bm_nodes / (double)nodes);
-		soft *= node_adjustment;
-		if (time_elapsed > mxtime * soft) {
-			// We probably won't be able to complete the next ID loop
-			break;
+	}
+
+	int max_votes = 0;
+	for (int i = 0; i < 64; i++) {
+		for (int j = 0; j < 64; j++) {
+			if (votes[i][j] > max_votes) {
+				max_votes = votes[i][j];
+				best_move = Move(i, j);
+			}
 		}
 	}
 
+	eval = threads[0].eval;
+
 	return {best_move, eval};
 }
 
-void clear_search_vars(SearchVars &sv) {
-	sv.nodes = sv.seldepth = 0;
-	early_exit = exit_allowed = false;
+void clear_search_vars(ThreadInfo &ti) {
+	ti.board.reset_startpos();
+	memset(&ti.thread_hist, 0, sizeof(History));
 	for (int i = 0; i < MAX_PLY; i++) {
-		sv.pvlen[i] = 0;
-		sv.line[i] = SSEntry();
+		ti.line[i] = SSEntry();
 	}
-
-	memset(&sv.history, 0, sizeof(sv.history));
 }
diff --git a/engine/search.hpp b/engine/search.hpp
index 29608ff..7461e33 100644
--- a/engine/search.hpp
+++ b/engine/search.hpp
@@ -48,26 +48,38 @@
 // This is the margin for history pruning (in centipawns)
 #define HISTORY_MARGIN 2000
 
-extern uint64_t nodes;
+extern uint64_t nodes[MAX_THREADS];
 extern uint16_t num_threads;
 
-struct SearchVars {
+struct ThreadInfo {
 	Board board;
-	uint64_t nodes = 0;
-	uint64_t nodecnt[64][64] = {{}};
 	int seldepth = 0;
+    Value eval = 0;
+    int id = 0;
 	bool is_main = false;
-	History history;
+	History thread_hist;
 	SSEntry line[MAX_PLY] = {};
 	Move pvtable[MAX_PLY][MAX_PLY];
-	int pvlen[MAX_PLY];
+	int pvlen[MAX_PLY] = {};
 	BoardState bs[NINPUTS * 2][NINPUTS * 2];
-};
 
-std::pair<Move, Value> search(Board &board, int64_t time = 1e9, int depth = MAX_PLY, int64_t nodes = 1e18, int quiet = 0);
+    void set_bs() {
+        for (int i = 0; i < NINPUTS * 2; i++) {
+            for (int j = 0; j < NINPUTS * 2; j++) {
+                for (int k = 0; k < HL_SIZE; k++) {
+                    bs[i][j].w_acc.val[k] = nnue_network.accumulator_biases[k];
+                    bs[i][j].b_acc.val[k] = nnue_network.accumulator_biases[k];
+                }
+                for (int k = 0; k < 64; k++) {
+                    bs[i][j].mailbox[k] = NO_PIECE;
+                }
+            }
+        }
+    }
+};
 
-pzstd::vector<std::pair<Move, Value>> search_multipv(Board &board, int multipv, int64_t time = 1e9, int depth = MAX_PLY, int64_t maxnodes = 1e18, int quiet = 0);
+std::pair<Move, Value> search(Board &board, ThreadInfo *threads, int64_t time = 1e9, int depth = MAX_PLY, int64_t nodes = 1e18, int quiet = 0);
 
 uint64_t perft(Board &board, int depth);
 
-void clear_search_vars();
+void clear_search_vars(ThreadInfo &ti);
diff --git a/engine/ttable.hpp b/engine/ttable.hpp
index a0d004d..14d82ec 100644
--- a/engine/ttable.hpp
+++ b/engine/ttable.hpp
@@ -5,8 +5,6 @@
 
 #define DEFAULT_TT_SIZE (16 * 1024 * 1024 / sizeof(TTable::TTBucket)) // 16 MB
 
-extern TTable ttable;
-
 enum TTFlag {
 	EXACT = 0,
 	LOWER_BOUND = 1, // eval might be higher than stored value
@@ -60,5 +58,13 @@ struct TTable {
 
 	TTEntry *probe(uint64_t key);
 
+	void resize(int size) {
+		delete[] TT;
+		TT_SIZE = size;
+		TT = new TTBucket[size];
+	}
+
 	constexpr uint64_t mxsize() const { return TT_SIZE * 2; }
 };
+
+extern TTable ttable;

From c8b60e6afcf0d7968fd008d7272c40e109fc641e Mon Sep 17 00:00:00 2001
From: Kevin Lu <kevie.lu8@gmail.com>
Date: Tue, 25 Nov 2025 00:23:05 -0800
Subject: [PATCH 03/13] change thread selection

Bench: 9610168
---
 engine/search.cpp | 31 ++++++++++---------------------
 engine/search.hpp |  2 +-
 2 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/engine/search.cpp b/engine/search.cpp
index d5853cd..905f131 100644
--- a/engine/search.cpp
+++ b/engine/search.cpp
@@ -704,6 +704,8 @@ void iterativedeepening(ThreadInfo &ti, int depth) {
 				break;
 			}
 		}
+
+		ti.maxdepth = d;
 	}
 
 	ti.eval = eval;
@@ -740,30 +742,17 @@ std::pair<Move, Value> search(Board &board, ThreadInfo *threads, int64_t time, i
 	for (int t = 0; t < num_threads; t++) {
 		thread_handles[t].join();
 	}
-
-	// obtain best move through thread voting
-	int votes[64][64] = {};
-	for (int t = 0; t < num_threads; t++) {
-		ThreadInfo &ti = threads[t];
-		Move tbest = ti.pvtable[0][0];
-		if (tbest != NullMove) {
-			votes[tbest.src()][tbest.dst()]++;
+	
+	// find best 
+	ThreadInfo &best_thread = threads[0];
+	for (int t = 1; t < num_threads; t++) {
+		if (threads[t].maxdepth > best_thread.maxdepth ||
+			(threads[t].maxdepth == best_thread.maxdepth && abs(threads[t].eval) > abs(best_thread.eval))) {
+			best_thread = threads[t];
 		}
 	}
 
-	int max_votes = 0;
-	for (int i = 0; i < 64; i++) {
-		for (int j = 0; j < 64; j++) {
-			if (votes[i][j] > max_votes) {
-				max_votes = votes[i][j];
-				best_move = Move(i, j);
-			}
-		}
-	}
-
-	eval = threads[0].eval;
-
-	return {best_move, eval};
+	return {best_thread.pvtable[0][0], best_thread.eval};
 }
 
 void clear_search_vars(ThreadInfo &ti) {
diff --git a/engine/search.hpp b/engine/search.hpp
index 7461e33..712ea83 100644
--- a/engine/search.hpp
+++ b/engine/search.hpp
@@ -53,7 +53,7 @@ extern uint16_t num_threads;
 
 struct ThreadInfo {
 	Board board;
-	int seldepth = 0;
+	int maxdepth = 0;
     Value eval = 0;
     int id = 0;
 	bool is_main = false;

From ca049bbe211b066fca5cd5b693432c7147a9811e Mon Sep 17 00:00:00 2001
From: Kevin Lu <kevie.lu8@gmail.com>
Date: Tue, 25 Nov 2025 00:42:22 -0800
Subject: [PATCH 04/13] remove thread selection logic

Bench: 9610168
---
 engine/search.cpp | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/engine/search.cpp b/engine/search.cpp
index 905f131..ceae21b 100644
--- a/engine/search.cpp
+++ b/engine/search.cpp
@@ -742,15 +742,8 @@ std::pair<Move, Value> search(Board &board, ThreadInfo *threads, int64_t time, i
 	for (int t = 0; t < num_threads; t++) {
 		thread_handles[t].join();
 	}
-	
-	// find best 
+
 	ThreadInfo &best_thread = threads[0];
-	for (int t = 1; t < num_threads; t++) {
-		if (threads[t].maxdepth > best_thread.maxdepth ||
-			(threads[t].maxdepth == best_thread.maxdepth && abs(threads[t].eval) > abs(best_thread.eval))) {
-			best_thread = threads[t];
-		}
-	}
 
 	return {best_thread.pvtable[0][0], best_thread.eval};
 }

From 8a97cef488d2b031cbf64de4503fbe4b2a2851eb Mon Sep 17 00:00:00 2001
From: Kevin Lu <kevie.lu8@gmail.com>
Date: Tue, 25 Nov 2025 11:35:04 -0800
Subject: [PATCH 05/13] Copy ttentry to avoid it changing during search

Bench: 11014934
---
 engine/search.cpp | 48 +++++++++++++++++++++++------------------------
 engine/ttable.cpp |  2 +-
 engine/ttable.hpp |  2 ++
 3 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/engine/search.cpp b/engine/search.cpp
index ceae21b..c18b37a 100644
--- a/engine/search.cpp
+++ b/engine/search.cpp
@@ -142,28 +142,28 @@ Value quiesce(ThreadInfo &ti, Value alpha, Value beta, int side, int depth, bool
 	if (depth >= MAX_PLY)
 		return eval(ti.board, (BoardState *)ti.bs) * side; // Just in case
 
-	TTable::TTEntry *tentry = ttable.probe(ti.board.zobrist);
+	TTable::TTEntry tentry = *ttable.probe(ti.board.zobrist);
 	Value tteval = 0;
-	if (tentry && tentry->valid()) tteval = tt_to_score(tentry->eval, depth);
-	if (!pv && tentry && tentry->valid()) {
-		if (tentry->bound() == EXACT) return tteval;
-		if (tentry->bound() == LOWER_BOUND) {
+	if (tentry.valid()) tteval = tt_to_score(tentry.eval, depth);
+	if (!pv && tentry.valid()) {
+		if (tentry.bound() == EXACT) return tteval;
+		if (tentry.bound() == LOWER_BOUND) {
 			if (tteval >= beta) return tteval;
 		}
-		if (tentry->bound() == UPPER_BOUND) {
+		if (tentry.bound() == UPPER_BOUND) {
 			if (tteval <= alpha) return tteval;
 		}
 	}
 
 	Value stand_pat = 0;
 	Value raw_eval = 0;
-	stand_pat = tentry ? tentry->s_eval : eval(ti.board, (BoardState *)ti.bs) * side;
+	stand_pat = tentry.valid() ? tentry.s_eval : eval(ti.board, (BoardState *)ti.bs) * side;
 	raw_eval = stand_pat;
 	ti.thread_hist.apply_correction(ti.board, stand_pat);
-	if (tentry && tentry->valid() && abs(tteval) < VALUE_MATE_MAX_PLY && tentry->bound() != (tteval > stand_pat ? UPPER_BOUND : LOWER_BOUND))
+	if (tentry.valid() && abs(tteval) < VALUE_MATE_MAX_PLY && tentry.bound() != (tteval > stand_pat ? UPPER_BOUND : LOWER_BOUND))
 		stand_pat = tteval;
 
-	if (!tentry) ttable.store(ti.board.zobrist, -VALUE_INFINITE, raw_eval, 0, NONE, false, NullMove, depth);
+	if (!tentry.valid()) ttable.store(ti.board.zobrist, -VALUE_INFINITE, raw_eval, 0, NONE, false, NullMove, depth);
 
 	// If it's a mate, stop here since there's no point in searching further
 	// Theoretically shouldn't ever happen because of stand pat
@@ -310,21 +310,21 @@ Value __recurse(ThreadInfo &ti, int depth, Value alpha = -VALUE_INFINITE, Value
 	bool ttpv = pv;
 
 	// Check for TTable cutoff
-	TTable::TTEntry *tentry = ttable.probe(board.zobrist);
+	TTable::TTEntry tentry = *ttable.probe(board.zobrist);
 	Value tteval = 0;
-	if (tentry && tentry->valid()) tteval = tt_to_score(tentry->eval, ply);
-	if (!pv && tentry && tentry->depth >= depth && ti.line[ply].excl == NullMove) {
+	if (tentry.valid()) tteval = tt_to_score(tentry.eval, ply);
+	if (!pv && tentry.valid() && tentry.depth >= depth && ti.line[ply].excl == NullMove) {
 		// Check for cutoffs
-		if (tentry->bound() == EXACT) {
+		if (tentry.bound() == EXACT) {
 			return tteval;
-		} else if (tentry->bound() == LOWER_BOUND && tteval >= beta) {
+		} else if (tentry.bound() == LOWER_BOUND && tteval >= beta) {
 			return tteval;
-		} else if (tentry->bound() == UPPER_BOUND && tteval <= alpha) {
+		} else if (tentry.bound() == UPPER_BOUND && tteval <= alpha) {
 			return tteval;
 		}
 	}
 
-	if (tentry) ttpv |= tentry->ttpv();
+	if (tentry.valid()) ttpv |= tentry.ttpv();
 
 	Value cur_eval = 0;
 	Value raw_eval = 0; // For CorrHist
@@ -332,13 +332,13 @@ Value __recurse(ThreadInfo &ti, int depth, Value alpha = -VALUE_INFINITE, Value
 	uint64_t pawn_hash = 0;
 	if (!in_check) {
 		pawn_hash = board.pawn_struct_hash();
-		cur_eval = tentry ? tentry->s_eval : eval(board, (BoardState *)ti.bs) * side;
+		cur_eval = tentry.valid() ? tentry.s_eval : eval(board, (BoardState *)ti.bs) * side;
 		raw_eval = cur_eval;
 		ti.thread_hist.apply_correction(board, cur_eval);
 		tt_corr_eval = cur_eval;
-		if (tentry && tentry->valid() && abs(tteval) < VALUE_MATE_MAX_PLY && tentry->bound() != (tteval > cur_eval ? UPPER_BOUND : LOWER_BOUND))
+		if (tentry.valid() && abs(tteval) < VALUE_MATE_MAX_PLY && tentry.bound() != (tteval > cur_eval ? UPPER_BOUND : LOWER_BOUND))
 			tt_corr_eval = tteval;
-		else if (!tentry) ttable.store(board.zobrist, -VALUE_INFINITE, raw_eval, 0, NONE, false, NullMove, board.halfmove);
+		else if (!tentry.valid()) ttable.store(board.zobrist, -VALUE_INFINITE, raw_eval, 0, NONE, false, NullMove, board.halfmove);
 	}
 
 	ti.line[ply].eval = in_check ? VALUE_NONE : cur_eval; // If in check, we don't have a valid eval yet
@@ -396,9 +396,9 @@ Value __recurse(ThreadInfo &ti, int depth, Value alpha = -VALUE_INFINITE, Value
 
 	Value best = -VALUE_INFINITE;
 
-	MovePicker mp(board, &ti.line[ply], ply, &ti.thread_hist, tentry);
+	MovePicker mp(board, &ti.line[ply], ply, &ti.thread_hist, &tentry);
 
-	if ((pv || cutnode) && depth > 4 && !(tentry && tentry->best_move != NullMove)) {
+	if ((pv || cutnode) && depth > 4 && !(tentry.valid() && tentry.best_move != NullMove)) {
 		depth -= 2; // Internal iterative reductions
 	}
 
@@ -423,7 +423,7 @@ Value __recurse(ThreadInfo &ti, int depth, Value alpha = -VALUE_INFINITE, Value
 		
 		int extension = 0;
 
-		if (ti.line[ply].excl == NullMove && depth >= 8 && i == 0 && tentry && move == tentry->best_move && tentry->depth >= depth - 3 && tentry->bound() != UPPER_BOUND) {
+		if (ti.line[ply].excl == NullMove && depth >= 8 && i == 0 && tentry.valid() && move == tentry.best_move && tentry.depth >= depth - 3 && tentry.bound() != UPPER_BOUND) {
 			// Singular extension
 			ti.line[ply].excl = move;
 			Value singular_beta = tteval - 4 * depth;
@@ -520,7 +520,7 @@ Value __recurse(ThreadInfo &ti, int depth, Value alpha = -VALUE_INFINITE, Value
 			score = -__recurse(ti, newdepth, -alpha - 1, -alpha, -side, 0, !cutnode, ply+1);
 		}
 		if (pv && (i == 0 || score > alpha)) {
-			if (tentry && move == tentry->best_move && tentry->depth > 1)
+			if (tentry.valid() && move == tentry.best_move && tentry.depth > 1)
 				newdepth = std::max((int)newdepth, 1); // Make sure we don't enter QS if we have an available TT move
 			score = -__recurse(ti, newdepth, -beta, -alpha, -side, 1, false, ply+1);
 		}
@@ -602,7 +602,7 @@ Value __recurse(ThreadInfo &ti, int depth, Value alpha = -VALUE_INFINITE, Value
 	}
 
 	if (ti.line[ply].excl == NullMove) {
-		Move tt_move = best_move != NullMove ? best_move : tentry ? tentry->best_move : NullMove;
+		Move tt_move = best_move != NullMove ? best_move : tentry.valid() ? tentry.best_move : NullMove;
 		ttable.store(board.zobrist, score_to_tt(best, ply), raw_eval, depth, flag, ttpv, tt_move, board.halfmove);
 	}
 
diff --git a/engine/ttable.cpp b/engine/ttable.cpp
index 2a53662..2fbb785 100644
--- a/engine/ttable.cpp
+++ b/engine/ttable.cpp
@@ -60,5 +60,5 @@ TTable::TTEntry *TTable::probe(uint64_t key) {
 			continue;
 		return entry;
 	}
-	return nullptr;
+	return &NO_ENTRY;
 }
diff --git a/engine/ttable.hpp b/engine/ttable.hpp
index 14d82ec..1165258 100644
--- a/engine/ttable.hpp
+++ b/engine/ttable.hpp
@@ -28,6 +28,8 @@ struct TTable {
 		const bool ttpv() const { return flags >> 2; }
 	};
 
+	TTEntry NO_ENTRY = TTEntry();
+
 	struct TTBucket {
 		TTEntry entries[2];
 	};

From 76a315482df24e5cf4eb9ebc97760f2d3d1229a0 Mon Sep 17 00:00:00 2001
From: Kevin Lu <kevie.lu8@gmail.com>
Date: Tue, 25 Nov 2025 18:30:03 -0800
Subject: [PATCH 06/13] minor fix

---
 engine/main.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/engine/main.cpp b/engine/main.cpp
index 0ac9a9f..642b957 100644
--- a/engine/main.cpp
+++ b/engine/main.cpp
@@ -137,9 +137,9 @@ void run_uci() {
 			int timeleft = board.side ? btime : wtime;
 			int inc = board.side ? binc : winc;
 			std::pair<Move, Value> res;
-			if (inf) res = search(board, tis, 1e9, MAX_PLY, 1e18, quiet);
-			else if (depth != -1) res = search(board, tis, 1e9, depth, 1e18, quiet);
-			else if (nodes != -1) res = search(board, tis, 1e9, MAX_PLY, nodes, quiet);
+			if (inf) res = search(board, tis, 1e18, MAX_PLY, 1e18, quiet);
+			else if (depth != -1) res = search(board, tis, 1e18, depth, 1e18, quiet);
+			else if (nodes != -1) res = search(board, tis, 1e18, MAX_PLY, nodes, quiet);
 			else if (movetime != -1) res = search(board, tis, movetime, MAX_PLY, 1e18, quiet);
 			else res = search(board, tis, timemgmt(timeleft, inc, online), MAX_PLY, 1e18, quiet);
 			std::cout << "bestmove " << res.first.to_string() << std::endl;

From cc506c75c28641be36aecfad1ede975aa9d916e2 Mon Sep 17 00:00:00 2001
From: Kevin Lu <kevie.lu8@gmail.com>
Date: Tue, 25 Nov 2025 21:40:53 -0800
Subject: [PATCH 07/13] Slight cleanups and make initialization faster

Bench: 11014934
---
 engine/eval.cpp   |  7 -------
 engine/main.cpp   | 15 +++++++++++----
 engine/ttable.cpp |  2 +-
 engine/ttable.hpp |  4 ++--
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/engine/eval.cpp b/engine/eval.cpp
index 819b9ca..80361e8 100644
--- a/engine/eval.cpp
+++ b/engine/eval.cpp
@@ -16,7 +16,6 @@ Value simple_eval(Board &board) {
 	return score;
 }
 
-#ifndef NO_UE
 Value eval(Board &board, BoardState *bs) {
 	if (!(board.piece_boards[KING] & board.piece_boards[OCC(BLACK)])) {
 		// If black has no king, this is mate for white
@@ -75,12 +74,6 @@ Value eval(Board &board, BoardState *bs) {
 	}
 	return score;
 }
-#else
-Value eval(Board &board, BoardState *bs) {
-	auto res = debug_eval(board);
-	return res[( _mm_popcnt_u64(board.piece_boards[OCC(WHITE)] | board.piece_boards[OCC(BLACK)]) - 2) / 4];
-}
-#endif
 
 std::array<Value, 8> debug_eval(Board &board) {
 	if (!(board.piece_boards[KING] & board.piece_boards[OCC(BLACK)])) {
diff --git a/engine/main.cpp b/engine/main.cpp
index 642b957..a52ab30 100644
--- a/engine/main.cpp
+++ b/engine/main.cpp
@@ -11,13 +11,11 @@
 #include "search.hpp"
 #include "ttable.hpp"
 
-BoardState bs[NINPUTS * 2][NINPUTS * 2];
-
 // Options
 int TT_SIZE = DEFAULT_TT_SIZE;
 bool quiet = false, online = false;
 
-ThreadInfo tis[MAX_THREADS];
+ThreadInfo *tis;
 
 void run_uci() {
 	std::string command;
@@ -56,6 +54,14 @@ void run_uci() {
 				quiet = optionvalue == "true";
 			} else if (optionname == "Threads") {
 				num_threads = std::stoi(optionvalue);
+				if (num_threads < 1 || num_threads > 64) {
+					std::cerr << "Invalid number of threads: " << num_threads << std::endl;
+					num_threads = 1;
+				}
+				delete[] tis;
+				tis = new ThreadInfo[num_threads];
+				for (int i = 0; i < num_threads; i++) tis[i].set_bs();
+				std::cout << "info string Using " << num_threads << " threads" << std::endl;
 			}
 		} else if (command == "ucinewgame") {
 			board = Board();
@@ -148,7 +154,8 @@ void run_uci() {
 }
 
 __attribute__((weak)) int main(int argc, char *argv[]) {
-	for (int i = 0; i < MAX_THREADS; i++) tis[i].set_bs();
+	tis = new ThreadInfo[1]; // single thread for now
+	tis[0].set_bs();
 	if (argc == 2 && std::string(argv[1]) == "bench") {
 		const std::string bench_positions[] = {
 			"r3k2r/2pb1ppp/2pp1q2/p7/1nP1B3/1P2P3/P2N1PPP/R2QK2R w KQkq - 0 14",
diff --git a/engine/ttable.cpp b/engine/ttable.cpp
index 2fbb785..1c7f4e2 100644
--- a/engine/ttable.cpp
+++ b/engine/ttable.cpp
@@ -51,7 +51,7 @@ void TTable::store(uint64_t key, Value eval, Value s_eval, uint8_t depth, uint8_
 	always_entry->age = age;
 }
 
-TTable::TTEntry *TTable::probe(uint64_t key) {
+const TTable::TTEntry *TTable::probe(uint64_t key) {
 	TTBucket *bucket = TT + (key % TT_SIZE);
 	key >>= 32;
 	for (int i = 0; i < 2; i++) {
diff --git a/engine/ttable.hpp b/engine/ttable.hpp
index 1165258..8757ca3 100644
--- a/engine/ttable.hpp
+++ b/engine/ttable.hpp
@@ -28,7 +28,7 @@ struct TTable {
 		const bool ttpv() const { return flags >> 2; }
 	};
 
-	TTEntry NO_ENTRY = TTEntry();
+	const TTEntry NO_ENTRY = TTEntry();
 
 	struct TTBucket {
 		TTEntry entries[2];
@@ -58,7 +58,7 @@ struct TTable {
 
 	void store(uint64_t key, Value eval, Value s_eval, uint8_t depth, uint8_t bound, bool ttpv, Move best_move, uint8_t age);
 
-	TTEntry *probe(uint64_t key);
+	const TTEntry *probe(uint64_t key);
 
 	void resize(int size) {
 		delete[] TT;

From 72b35f2ea94e0599febefb129232a1d56ba3f4fc Mon Sep 17 00:00:00 2001
From: Kevin Lu <kevie.lu8@gmail.com>
Date: Tue, 25 Nov 2025 22:27:26 -0800
Subject: [PATCH 08/13] Add stop command

Bench: 11014934
---
 engine/main.cpp   | 27 ++++++++++++++++-----------
 engine/search.cpp | 10 ++++++----
 engine/search.hpp |  2 ++
 3 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/engine/main.cpp b/engine/main.cpp
index a52ab30..b4d8ba5 100644
--- a/engine/main.cpp
+++ b/engine/main.cpp
@@ -18,6 +18,7 @@ bool quiet = false, online = false;
 ThreadInfo *tis;
 
 void run_uci() {
+	std::thread searchthread;
 	std::string command;
 	Board board = Board();
 	while (getline(std::cin, command)) {
@@ -89,12 +90,11 @@ void run_uci() {
 				}
 			}
 		} else if (command == "quit") {
+			if (searchthread.joinable()) searchthread.join();
 			exit(0);
 		} else if (command == "stop") {
-			// stop the search thread
-			// if (searchthread.joinable()) {
-			// 	searchthread.join();
-			// }
+			stop_search = true;
+			if (searchthread.joinable()) searchthread.join();
 		} else if (command == "eval") {
 			std::array<Value, 8> score = debug_eval(board);
 			board.print_board();
@@ -108,6 +108,8 @@ void run_uci() {
 				std::cout << std::endl;
 			}
 		} else if (command.substr(0, 2) == "go") {
+			if (!stop_search) continue; // ignore
+			if (searchthread.joinable()) searchthread.join();
 #ifndef HCE
 			std::cout << "info string Using " << NNUE_PATH << " for evaluation" << std::endl;
 #endif
@@ -142,13 +144,16 @@ void run_uci() {
 			}
 			int timeleft = board.side ? btime : wtime;
 			int inc = board.side ? binc : winc;
-			std::pair<Move, Value> res;
-			if (inf) res = search(board, tis, 1e18, MAX_PLY, 1e18, quiet);
-			else if (depth != -1) res = search(board, tis, 1e18, depth, 1e18, quiet);
-			else if (nodes != -1) res = search(board, tis, 1e18, MAX_PLY, nodes, quiet);
-			else if (movetime != -1) res = search(board, tis, movetime, MAX_PLY, 1e18, quiet);
-			else res = search(board, tis, timemgmt(timeleft, inc, online), MAX_PLY, 1e18, quiet);
-			std::cout << "bestmove " << res.first.to_string() << std::endl;
+			searchthread = std::thread(
+				[&]() {
+					std::cout << "info string Starting search..." << std::endl;
+					if (inf) search(board, tis, 1e18, MAX_PLY, 1e18, 0);
+					else if (depth != -1) search(board, tis, 1e18, depth, 1e18, 0);
+					else if (nodes != -1) search(board, tis, 1e18, MAX_PLY, nodes, 0);
+					else if (movetime != -1) search(board, tis, movetime, MAX_PLY, 1e18, 0);
+					else search(board, tis, timemgmt(timeleft, inc, online), MAX_PLY, 1e18, 0);
+				}
+			);
 		}
 	}
 }
diff --git a/engine/search.cpp b/engine/search.cpp
index c18b37a..58a3615 100644
--- a/engine/search.cpp
+++ b/engine/search.cpp
@@ -4,13 +4,13 @@
 #define MOVENUM(x) ((((#x)[1] - '1') << 12) | (((#x)[0] - 'a') << 8) | (((#x)[3] - '1') << 4) | ((#x)[2] - 'a'))
 
 uint64_t mx_nodes = 1e18; // Maximum nodes to search
-bool stop_search = false;
+bool stop_search = true;
 std::chrono::steady_clock::time_point start;
 uint64_t mxtime = 1e18; // Maximum time to search in milliseconds
 
 uint16_t num_threads = 1;
 
-std::atomic<int> nodecnt[64][64] = {{}};
+std::atomic<uint64_t> nodecnt[64][64] = {{}};
 uint64_t nodes[MAX_THREADS] = {};
 
 uint64_t perft(Board &board, int depth) {
@@ -670,7 +670,7 @@ void iterativedeepening(ThreadInfo &ti, int depth) {
 			// UCI output from main thread only
 			auto time_elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count();
 			std::cout << "info depth " << d << " score cp " << eval << " time " << time_elapsed << " nodes " << tot_nodes << " nps "
-					  << (time_elapsed ? (tot_nodes * 1000 / time_elapsed) : tot_nodes) << " hashfull " << (int)(get_ttable_sz() * 100) << " pv";
+					  << (time_elapsed ? (tot_nodes * 1000 / time_elapsed) : tot_nodes) << " hashfull " << (int)(get_ttable_sz() * 1000) << " pv";
 			for (int ply = 0; ply < ti.pvlen[0]; ply++) {
 				std::cout << " " << ti.pvtable[0][ply].to_string();
 			}
@@ -710,10 +710,12 @@ void iterativedeepening(ThreadInfo &ti, int depth) {
 
 	ti.eval = eval;
 	stop_search = true;
+
+	std::cout << "bestmove " << best_move.to_string() << std::endl;
 }
 
 std::pair<Move, Value> search(Board &board, ThreadInfo *threads, int64_t time, int depth, int64_t maxnodes, int quiet) {
-	memset(nodecnt, 0, sizeof(nodecnt));
+	for (int i = 0; i < 64; i++) for (int j = 0; j < 64; j++) nodecnt[i][j] = 0;
 
 	mxtime = time;
 	mx_nodes = maxnodes;
diff --git a/engine/search.hpp b/engine/search.hpp
index 712ea83..4400049 100644
--- a/engine/search.hpp
+++ b/engine/search.hpp
@@ -48,6 +48,8 @@
 // This is the margin for history pruning (in centipawns)
 #define HISTORY_MARGIN 2000
 
+extern bool stop_search;
+
 extern uint64_t nodes[MAX_THREADS];
 extern uint16_t num_threads;
 

From aa49c364d62c3a0100a734e11d150e5adb7000bd Mon Sep 17 00:00:00 2001
From: Kevin Lu <kevie.lu8@gmail.com>
Date: Tue, 25 Nov 2025 22:43:34 -0800
Subject: [PATCH 09/13] Fix race condition

Bench: 11014934
---
 engine/search.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/engine/search.cpp b/engine/search.cpp
index 58a3615..8f1644c 100644
--- a/engine/search.cpp
+++ b/engine/search.cpp
@@ -662,6 +662,10 @@ void iterativedeepening(ThreadInfo &ti, int depth) {
 		best_move = ti.pvtable[0][0];
 		
 		if (ti.is_main) {
+			// We must calculate best move nodes and total nodes at around the same time
+			// so that node counts don't change in between due to race conditions
+			// This is a really crude way of doing it (todo change later)
+			uint64_t bm_nodes = nodecnt[best_move.src()][best_move.dst()];
 			uint64_t tot_nodes = 0;
 			for (int t = 0; t < num_threads; t++) {
 				tot_nodes += nodes[t]; // ig this is dangerous but whatever
@@ -695,7 +699,6 @@ void iterativedeepening(ThreadInfo &ti, int depth) {
 				soft = 0.3 + 0.4 * factor;
 			}
 
-			uint64_t bm_nodes = nodecnt[best_move.src()][best_move.dst()];
 			double node_adjustment = 1.5 - (bm_nodes / (double)tot_nodes);
 			soft *= node_adjustment;
 			if (time_elapsed > mxtime * soft) {
@@ -711,7 +714,9 @@ void iterativedeepening(ThreadInfo &ti, int depth) {
 	ti.eval = eval;
 	stop_search = true;
 
-	std::cout << "bestmove " << best_move.to_string() << std::endl;
+	if (ti.is_main) {
+		std::cout << "bestmove " << best_move.to_string() << std::endl;
+	}
 }
 
 std::pair<Move, Value> search(Board &board, ThreadInfo *threads, int64_t time, int depth, int64_t maxnodes, int quiet) {
@@ -729,7 +734,7 @@ std::pair<Move, Value> search(Board &board, ThreadInfo *threads, int64_t time, i
 
 	for (int t = 0; t < num_threads; t++) {
 		ThreadInfo &ti = threads[t];
-		std::copy(&board, &board + 1, &ti.board);
+		ti.board = board;
 		nodes[t] = 0;
 		ti.id = t;
 		ti.is_main = (t == 0);

From 16fcf34ac2f6999660651c336f37d2453ca5ab94 Mon Sep 17 00:00:00 2001
From: Kevin Lu <kevie.lu8@gmail.com>
Date: Tue, 25 Nov 2025 22:48:39 -0800
Subject: [PATCH 10/13] Fix mate scores

Bench: 11014934
---
 engine/search.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/engine/search.cpp b/engine/search.cpp
index 8f1644c..a080778 100644
--- a/engine/search.cpp
+++ b/engine/search.cpp
@@ -115,6 +115,16 @@ double get_ttable_sz() {
 	return cnt / 2048.0;
 }
 
+std::string score_to_uci(Value score) {
+	if (score >= VALUE_MATE_MAX_PLY) {
+		return "mate " + std::to_string((VALUE_MATE - score + 1) / 2);
+	} else if (score <= -VALUE_MATE_MAX_PLY) {
+		return "mate " + std::to_string((-VALUE_MATE - score) / 2);
+	} else {
+		return "cp " + std::to_string(score);
+	}
+}
+
 /**
  * Perform the quiescence search
  * 
@@ -673,7 +683,7 @@ void iterativedeepening(ThreadInfo &ti, int depth) {
 
 			// UCI output from main thread only
 			auto time_elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count();
-			std::cout << "info depth " << d << " score cp " << eval << " time " << time_elapsed << " nodes " << tot_nodes << " nps "
+			std::cout << "info depth " << d << " score " << score_to_uci(eval) << " time " << time_elapsed << " nodes " << tot_nodes << " nps "
 					  << (time_elapsed ? (tot_nodes * 1000 / time_elapsed) : tot_nodes) << " hashfull " << (int)(get_ttable_sz() * 1000) << " pv";
 			for (int ply = 0; ply < ti.pvlen[0]; ply++) {
 				std::cout << " " << ti.pvtable[0][ply].to_string();

From 905ca0861f7a1e17244333b7325c2a87d5eb4050 Mon Sep 17 00:00:00 2001
From: Kevin Lu <kevie.lu8@gmail.com>
Date: Tue, 25 Nov 2025 18:34:16 -0800
Subject: [PATCH 11/13] change smp tt scheme to always replace

Bench: 10637494
---
 engine/main.cpp   |  2 +-
 engine/search.cpp |  5 ++--
 engine/ttable.cpp | 59 ++++++++---------------------------------------
 engine/ttable.hpp | 14 ++++-------
 4 files changed, 18 insertions(+), 62 deletions(-)

diff --git a/engine/main.cpp b/engine/main.cpp
index b4d8ba5..7fe2850 100644
--- a/engine/main.cpp
+++ b/engine/main.cpp
@@ -50,7 +50,7 @@ void run_uci() {
 					std::cerr << "Invalid hash size: " << optionint << std::endl;
 					continue;
 				}
-				TT_SIZE = optionint * 1024 * 1024 / sizeof(TTable::TTBucket);
+				TT_SIZE = optionint * 1024 * 1024 / sizeof(TTable::TTEntry);
 			} else if (optionname == "Quiet") {
 				quiet = optionvalue == "true";
 			} else if (optionname == "Threads") {
diff --git a/engine/search.cpp b/engine/search.cpp
index a080778..42b7918 100644
--- a/engine/search.cpp
+++ b/engine/search.cpp
@@ -109,10 +109,9 @@ double get_ttable_sz() {
 	int cnt = 0;
 	for (int i = 0; i < 1024; i++) {
 		if (i >= ttable.TT_SIZE) break;
-		if (ttable.TT[i].entries[0].valid()) cnt++;
-		if (ttable.TT[i].entries[1].valid()) cnt++;
+		if (ttable.TT[i].valid()) cnt++;
 	}
-	return cnt / 2048.0;
+	return cnt / 1024.0;
 }
 
 std::string score_to_uci(Value score) {
diff --git a/engine/ttable.cpp b/engine/ttable.cpp
index 1c7f4e2..e1ec9e9 100644
--- a/engine/ttable.cpp
+++ b/engine/ttable.cpp
@@ -3,62 +3,23 @@
 TTable ttable(DEFAULT_TT_SIZE);
 
 void TTable::store(uint64_t key, Value eval, Value s_eval, uint8_t depth, uint8_t bound, bool ttpv, Move best_move, uint8_t age) {
-	TTBucket *bucket = TT + (key % TT_SIZE);
+	TTEntry *entry = TT + (key % TT_SIZE);
 
 	key >>= 32; // Use upper 32 bits for the key (since we already verified bottom n bits)
-	
-	TTEntry *depth_entry = &bucket->entries[0];
-	TTEntry *always_entry = &bucket->entries[1];
-	if (depth_entry->key == key || always_entry->key == key) {
-		// Update an existing entry
-		if (depth_entry->key == key) {
-			depth_entry->eval = eval;
-			depth_entry->s_eval = s_eval;
-			depth_entry->depth = depth;
-			depth_entry->flags = bound | (ttpv ? TTPV : 0);
-			depth_entry->best_move = best_move;
-			depth_entry->age = age;
-		} else if (always_entry->key == key) {
-			always_entry->eval = eval;
-			always_entry->s_eval = s_eval;
-			always_entry->depth = depth;
-			always_entry->flags = bound | (ttpv ? TTPV : 0);
-			always_entry->best_move = best_move;
-			always_entry->age = age;
-		}
-		return;
-	}
 
-	// 1. Check if we can replace the depth entry
-	if (depth_entry->depth < depth || (depth_entry->depth == depth && depth_entry->age < age)) {
-		depth_entry->key = key;
-		depth_entry->eval = eval;
-		depth_entry->s_eval = s_eval;
-		depth_entry->depth = depth;
-		depth_entry->flags = bound | (ttpv ? TTPV : 0);
-		depth_entry->best_move = best_move;
-		depth_entry->age = age;
-		return;
-	}
-
-	// 2. Always replace the second entry
-	always_entry->key = key;
-	always_entry->eval = eval;
-	always_entry->s_eval = s_eval;
-	always_entry->depth = depth;
-	always_entry->flags = bound | (ttpv ? TTPV : 0);
-	always_entry->best_move = best_move;
-	always_entry->age = age;
+	entry->key = key;
+	entry->eval = eval;
+	entry->s_eval = s_eval;
+	entry->depth = depth;
+	entry->flags = bound | (ttpv ? TTPV : 0);
+	entry->best_move = best_move;
+	entry->age = age;
 }
 
 const TTable::TTEntry *TTable::probe(uint64_t key) {
-	TTBucket *bucket = TT + (key % TT_SIZE);
+	TTEntry *entry = TT + (key % TT_SIZE);
 	key >>= 32;
-	for (int i = 0; i < 2; i++) {
-		TTEntry *entry = &bucket->entries[i];
-		if (entry->key != key)
-			continue;
+	if (entry->key == key)
 		return entry;
-	}
 	return &NO_ENTRY;
 }
diff --git a/engine/ttable.hpp b/engine/ttable.hpp
index 8757ca3..4f45679 100644
--- a/engine/ttable.hpp
+++ b/engine/ttable.hpp
@@ -3,7 +3,7 @@
 #include "includes.hpp"
 #include "move.hpp"
 
-#define DEFAULT_TT_SIZE (16 * 1024 * 1024 / sizeof(TTable::TTBucket)) // 16 MB
+#define DEFAULT_TT_SIZE (16 * 1024 * 1024 / sizeof(TTable::TTEntry)) // 16 MB
 
 enum TTFlag {
 	EXACT = 0,
@@ -30,14 +30,10 @@ struct TTable {
 
 	const TTEntry NO_ENTRY = TTEntry();
 
-	struct TTBucket {
-		TTEntry entries[2];
-	};
-
-	TTBucket *TT;
+	TTEntry *TT;
 	int TT_SIZE;
 
-	TTable(int size) : TT_SIZE(size) { TT = new TTBucket[size]; }
+	TTable(int size) : TT_SIZE(size) { TT = new TTEntry[size]; }
 
 	~TTable() { delete[] TT; }
 
@@ -51,7 +47,7 @@ struct TTable {
 		if (this != &o) {
 			delete[] TT;
 			TT_SIZE = o.TT_SIZE;
-			TT = new TTBucket[TT_SIZE];
+			TT = new TTEntry[TT_SIZE];
 		}
 		return *this;
 	}
@@ -63,7 +59,7 @@ struct TTable {
 	void resize(int size) {
 		delete[] TT;
 		TT_SIZE = size;
-		TT = new TTBucket[size];
+		TT = new TTEntry[size];
 	}
 
 	constexpr uint64_t mxsize() const { return TT_SIZE * 2; }

From 5f290f2a9b114c69c52fd5d4e4af03736318e120 Mon Sep 17 00:00:00 2001
From: Kevin Lu <kevie.lu8@gmail.com>
Date: Fri, 28 Nov 2025 00:42:18 -0800
Subject: [PATCH 12/13] Revert TT patches for SMP

Bench: 10410988
---
 engine/search.cpp | 53 ++++++++++++++++++++-------------------
 engine/ttable.cpp | 63 ++++++++++++++++++++++++++++++++++++++---------
 engine/ttable.hpp | 18 ++++++++------
 3 files changed, 89 insertions(+), 45 deletions(-)

diff --git a/engine/search.cpp b/engine/search.cpp
index 42b7918..8169eec 100644
--- a/engine/search.cpp
+++ b/engine/search.cpp
@@ -109,9 +109,10 @@ double get_ttable_sz() {
 	int cnt = 0;
 	for (int i = 0; i < 1024; i++) {
 		if (i >= ttable.TT_SIZE) break;
-		if (ttable.TT[i].valid()) cnt++;
+		if (ttable.TT[i].entries[0].valid()) cnt++;
+		if (ttable.TT[i].entries[1].valid()) cnt++;
 	}
-	return cnt / 1024.0;
+	return cnt / 2048.0;
 }
 
 std::string score_to_uci(Value score) {
@@ -151,28 +152,28 @@ Value quiesce(ThreadInfo &ti, Value alpha, Value beta, int side, int depth, bool
 	if (depth >= MAX_PLY)
 		return eval(ti.board, (BoardState *)ti.bs) * side; // Just in case
 
-	TTable::TTEntry tentry = *ttable.probe(ti.board.zobrist);
+	TTable::TTEntry *tentry = ttable.probe(ti.board.zobrist);
 	Value tteval = 0;
-	if (tentry.valid()) tteval = tt_to_score(tentry.eval, depth);
-	if (!pv && tentry.valid()) {
-		if (tentry.bound() == EXACT) return tteval;
-		if (tentry.bound() == LOWER_BOUND) {
+	if (tentry) tteval = tt_to_score(tentry->eval, depth);
+	if (!pv && tentry) {
+		if (tentry->bound() == EXACT) return tteval;
+		if (tentry->bound() == LOWER_BOUND) {
 			if (tteval >= beta) return tteval;
 		}
-		if (tentry.bound() == UPPER_BOUND) {
+		if (tentry->bound() == UPPER_BOUND) {
 			if (tteval <= alpha) return tteval;
 		}
 	}
 
 	Value stand_pat = 0;
 	Value raw_eval = 0;
-	stand_pat = tentry.valid() ? tentry.s_eval : eval(ti.board, (BoardState *)ti.bs) * side;
+	stand_pat = tentry ? tentry->s_eval : eval(ti.board, (BoardState *)ti.bs) * side;
 	raw_eval = stand_pat;
 	ti.thread_hist.apply_correction(ti.board, stand_pat);
-	if (tentry.valid() && abs(tteval) < VALUE_MATE_MAX_PLY && tentry.bound() != (tteval > stand_pat ? UPPER_BOUND : LOWER_BOUND))
+	if (tentry && abs(tteval) < VALUE_MATE_MAX_PLY && tentry->bound() != (tteval > stand_pat ? UPPER_BOUND : LOWER_BOUND))
 		stand_pat = tteval;
 
-	if (!tentry.valid()) ttable.store(ti.board.zobrist, -VALUE_INFINITE, raw_eval, 0, NONE, false, NullMove, depth);
+	if (!tentry) ttable.store(ti.board.zobrist, -VALUE_INFINITE, raw_eval, 0, NONE, false, NullMove, depth);
 
 	// If it's a mate, stop here since there's no point in searching further
 	// Theoretically shouldn't ever happen because of stand pat
@@ -319,21 +320,21 @@ Value __recurse(ThreadInfo &ti, int depth, Value alpha = -VALUE_INFINITE, Value
 	bool ttpv = pv;
 
 	// Check for TTable cutoff
-	TTable::TTEntry tentry = *ttable.probe(board.zobrist);
+	TTable::TTEntry *tentry = ttable.probe(board.zobrist);
 	Value tteval = 0;
-	if (tentry.valid()) tteval = tt_to_score(tentry.eval, ply);
-	if (!pv && tentry.valid() && tentry.depth >= depth && ti.line[ply].excl == NullMove) {
+	if (tentry) tteval = tt_to_score(tentry->eval, ply);
+	if (!pv && tentry && tentry->depth >= depth && ti.line[ply].excl == NullMove) {
 		// Check for cutoffs
-		if (tentry.bound() == EXACT) {
+		if (tentry->bound() == EXACT) {
 			return tteval;
-		} else if (tentry.bound() == LOWER_BOUND && tteval >= beta) {
+		} else if (tentry->bound() == LOWER_BOUND && tteval >= beta) {
 			return tteval;
-		} else if (tentry.bound() == UPPER_BOUND && tteval <= alpha) {
+		} else if (tentry->bound() == UPPER_BOUND && tteval <= alpha) {
 			return tteval;
 		}
 	}
 
-	if (tentry.valid()) ttpv |= tentry.ttpv();
+	if (tentry) ttpv |= tentry->ttpv();
 
 	Value cur_eval = 0;
 	Value raw_eval = 0; // For CorrHist
@@ -341,13 +342,13 @@ Value __recurse(ThreadInfo &ti, int depth, Value alpha = -VALUE_INFINITE, Value
 	uint64_t pawn_hash = 0;
 	if (!in_check) {
 		pawn_hash = board.pawn_struct_hash();
-		cur_eval = tentry.valid() ? tentry.s_eval : eval(board, (BoardState *)ti.bs) * side;
+		cur_eval = tentry ? tentry->s_eval : eval(board, (BoardState *)ti.bs) * side;
 		raw_eval = cur_eval;
 		ti.thread_hist.apply_correction(board, cur_eval);
 		tt_corr_eval = cur_eval;
-		if (tentry.valid() && abs(tteval) < VALUE_MATE_MAX_PLY && tentry.bound() != (tteval > cur_eval ? UPPER_BOUND : LOWER_BOUND))
+		if (tentry && abs(tteval) < VALUE_MATE_MAX_PLY && tentry->bound() != (tteval > cur_eval ? UPPER_BOUND : LOWER_BOUND))
 			tt_corr_eval = tteval;
-		else if (!tentry.valid()) ttable.store(board.zobrist, -VALUE_INFINITE, raw_eval, 0, NONE, false, NullMove, board.halfmove);
+		else if (!tentry) ttable.store(board.zobrist, -VALUE_INFINITE, raw_eval, 0, NONE, false, NullMove, board.halfmove);
 	}
 
 	ti.line[ply].eval = in_check ? VALUE_NONE : cur_eval; // If in check, we don't have a valid eval yet
@@ -405,9 +406,9 @@ Value __recurse(ThreadInfo &ti, int depth, Value alpha = -VALUE_INFINITE, Value
 
 	Value best = -VALUE_INFINITE;
 
-	MovePicker mp(board, &ti.line[ply], ply, &ti.thread_hist, &tentry);
+	MovePicker mp(board, &ti.line[ply], ply, &ti.thread_hist, tentry);
 
-	if ((pv || cutnode) && depth > 4 && !(tentry.valid() && tentry.best_move != NullMove)) {
+	if ((pv || cutnode) && depth > 4 && !(tentry && tentry->best_move != NullMove)) {
 		depth -= 2; // Internal iterative reductions
 	}
 
@@ -432,7 +433,7 @@ Value __recurse(ThreadInfo &ti, int depth, Value alpha = -VALUE_INFINITE, Value
 		
 		int extension = 0;
 
-		if (ti.line[ply].excl == NullMove && depth >= 8 && i == 0 && tentry.valid() && move == tentry.best_move && tentry.depth >= depth - 3 && tentry.bound() != UPPER_BOUND) {
+		if (ti.line[ply].excl == NullMove && depth >= 8 && i == 0 && tentry && move == tentry->best_move && tentry->depth >= depth - 3 && tentry->bound() != UPPER_BOUND) {
 			// Singular extension
 			ti.line[ply].excl = move;
 			Value singular_beta = tteval - 4 * depth;
@@ -529,7 +530,7 @@ Value __recurse(ThreadInfo &ti, int depth, Value alpha = -VALUE_INFINITE, Value
 			score = -__recurse(ti, newdepth, -alpha - 1, -alpha, -side, 0, !cutnode, ply+1);
 		}
 		if (pv && (i == 0 || score > alpha)) {
-			if (tentry.valid() && move == tentry.best_move && tentry.depth > 1)
+			if (tentry && move == tentry->best_move && tentry->depth > 1)
 				newdepth = std::max((int)newdepth, 1); // Make sure we don't enter QS if we have an available TT move
 			score = -__recurse(ti, newdepth, -beta, -alpha, -side, 1, false, ply+1);
 		}
@@ -611,7 +612,7 @@ Value __recurse(ThreadInfo &ti, int depth, Value alpha = -VALUE_INFINITE, Value
 	}
 
 	if (ti.line[ply].excl == NullMove) {
-		Move tt_move = best_move != NullMove ? best_move : tentry.valid() ? tentry.best_move : NullMove;
+		Move tt_move = best_move != NullMove ? best_move : tentry ? tentry->best_move : NullMove;
 		ttable.store(board.zobrist, score_to_tt(best, ply), raw_eval, depth, flag, ttpv, tt_move, board.halfmove);
 	}
 
diff --git a/engine/ttable.cpp b/engine/ttable.cpp
index e1ec9e9..2a53662 100644
--- a/engine/ttable.cpp
+++ b/engine/ttable.cpp
@@ -3,23 +3,62 @@
 TTable ttable(DEFAULT_TT_SIZE);
 
 void TTable::store(uint64_t key, Value eval, Value s_eval, uint8_t depth, uint8_t bound, bool ttpv, Move best_move, uint8_t age) {
-	TTEntry *entry = TT + (key % TT_SIZE);
+	TTBucket *bucket = TT + (key % TT_SIZE);
 
 	key >>= 32; // Use upper 32 bits for the key (since we already verified bottom n bits)
+	
+	TTEntry *depth_entry = &bucket->entries[0];
+	TTEntry *always_entry = &bucket->entries[1];
+	if (depth_entry->key == key || always_entry->key == key) {
+		// Update an existing entry
+		if (depth_entry->key == key) {
+			depth_entry->eval = eval;
+			depth_entry->s_eval = s_eval;
+			depth_entry->depth = depth;
+			depth_entry->flags = bound | (ttpv ? TTPV : 0);
+			depth_entry->best_move = best_move;
+			depth_entry->age = age;
+		} else if (always_entry->key == key) {
+			always_entry->eval = eval;
+			always_entry->s_eval = s_eval;
+			always_entry->depth = depth;
+			always_entry->flags = bound | (ttpv ? TTPV : 0);
+			always_entry->best_move = best_move;
+			always_entry->age = age;
+		}
+		return;
+	}
 
-	entry->key = key;
-	entry->eval = eval;
-	entry->s_eval = s_eval;
-	entry->depth = depth;
-	entry->flags = bound | (ttpv ? TTPV : 0);
-	entry->best_move = best_move;
-	entry->age = age;
+	// 1. Check if we can replace the depth entry
+	if (depth_entry->depth < depth || (depth_entry->depth == depth && depth_entry->age < age)) {
+		depth_entry->key = key;
+		depth_entry->eval = eval;
+		depth_entry->s_eval = s_eval;
+		depth_entry->depth = depth;
+		depth_entry->flags = bound | (ttpv ? TTPV : 0);
+		depth_entry->best_move = best_move;
+		depth_entry->age = age;
+		return;
+	}
+
+	// 2. Always replace the second entry
+	always_entry->key = key;
+	always_entry->eval = eval;
+	always_entry->s_eval = s_eval;
+	always_entry->depth = depth;
+	always_entry->flags = bound | (ttpv ? TTPV : 0);
+	always_entry->best_move = best_move;
+	always_entry->age = age;
 }
 
-const TTable::TTEntry *TTable::probe(uint64_t key) {
-	TTEntry *entry = TT + (key % TT_SIZE);
+TTable::TTEntry *TTable::probe(uint64_t key) {
+	TTBucket *bucket = TT + (key % TT_SIZE);
 	key >>= 32;
-	if (entry->key == key)
+	for (int i = 0; i < 2; i++) {
+		TTEntry *entry = &bucket->entries[i];
+		if (entry->key != key)
+			continue;
 		return entry;
-	return &NO_ENTRY;
+	}
+	return nullptr;
 }
diff --git a/engine/ttable.hpp b/engine/ttable.hpp
index 4f45679..3208ee1 100644
--- a/engine/ttable.hpp
+++ b/engine/ttable.hpp
@@ -3,7 +3,7 @@
 #include "includes.hpp"
 #include "move.hpp"
 
-#define DEFAULT_TT_SIZE (16 * 1024 * 1024 / sizeof(TTable::TTEntry)) // 16 MB
+#define DEFAULT_TT_SIZE (16 * 1024 * 1024 / sizeof(TTable::TTBucket)) // 16 MB
 
 enum TTFlag {
 	EXACT = 0,
@@ -28,12 +28,16 @@ struct TTable {
 		const bool ttpv() const { return flags >> 2; }
 	};
 
-	const TTEntry NO_ENTRY = TTEntry();
+	struct TTBucket {
+		TTEntry entries[2];
+	};
+
+	TTEntry NO_ENTRY = TTEntry();
 
-	TTEntry *TT;
+	TTBucket *TT;
 	int TT_SIZE;
 
-	TTable(int size) : TT_SIZE(size) { TT = new TTEntry[size]; }
+	TTable(int size) : TT_SIZE(size) { TT = new TTBucket[size]; }
 
 	~TTable() { delete[] TT; }
 
@@ -47,19 +51,19 @@ struct TTable {
 		if (this != &o) {
 			delete[] TT;
 			TT_SIZE = o.TT_SIZE;
-			TT = new TTEntry[TT_SIZE];
+			TT = new TTBucket[TT_SIZE];
 		}
 		return *this;
 	}
 
 	void store(uint64_t key, Value eval, Value s_eval, uint8_t depth, uint8_t bound, bool ttpv, Move best_move, uint8_t age);
 
-	const TTEntry *probe(uint64_t key);
+	TTEntry *probe(uint64_t key);
 
 	void resize(int size) {
 		delete[] TT;
 		TT_SIZE = size;
-		TT = new TTEntry[size];
+		TT = new TTBucket[size];
 	}
 
 	constexpr uint64_t mxsize() const { return TT_SIZE * 2; }

From 941bf21918724250c2195973fad7d8d87c1ec765 Mon Sep 17 00:00:00 2001
From: Kevin Lu <kevie.lu8@gmail.com>
Date: Tue, 2 Dec 2025 22:55:48 -0800
Subject: [PATCH 13/13] Remove spin-waiting

Bench: 10811608
---
 engine/search.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/engine/search.cpp b/engine/search.cpp
index 8169eec..e8990c0 100644
--- a/engine/search.cpp
+++ b/engine/search.cpp
@@ -752,10 +752,6 @@ std::pair<Move, Value> search(Board &board, ThreadInfo *threads, int64_t time, i
 		thread_handles.emplace_back(iterativedeepening, std::ref(ti), depth);
 	}
 
-	while (!stop_search) {
-		std::this_thread::sleep_for(std::chrono::milliseconds(5));
-	}
-
 	for (int t = 0; t < num_threads; t++) {
 		thread_handles[t].join();
 	}