From 9aad004497318dd25951c0aa94568d57175ec357 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 14:44:09 -0400 Subject: [PATCH 01/58] Implement addc --- XenonRecomp/recompiler.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index f8608179..500ee5aa 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -531,6 +531,13 @@ bool Recompiler::Recompile( println("\t{}.compare({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); break; + case PPC_INST_ADDC: + println("\t{}.ca = ({}.u32 + {}.u32 < {}.u32);", xer(), r(insn.operands[1]), r(insn.operands[2]), r(insn.operands[1])); + println("\t{}.u64 = {}.u64 + {}.u64;", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); + break; + case PPC_INST_ADDE: println("\t{}.u8 = ({}.u32 + {}.u32 < {}.u32) | ({}.u32 + {}.u32 + {}.ca < {}.ca);", temp(), r(insn.operands[1]), r(insn.operands[2]), r(insn.operands[1]), r(insn.operands[1]), r(insn.operands[2]), xer(), xer()); println("\t{}.u64 = {}.u64 + {}.u64 + {}.ca;", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]), xer()); From 4041671557c511eea88308ebff815546fefcdf01 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 14:44:33 -0400 Subject: [PATCH 02/58] Implement addme --- XenonRecomp/recompiler.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 500ee5aa..8a8913d1 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -567,6 +567,16 @@ bool Recompiler::Recompile( println("{};", static_cast(insn.operands[2] << 16)); break; + case PPC_INST_ADDME: + println("\t{}.u64 = {}.u64 + {}.ca - 1;", temp(), r(insn.operands[1]), xer()); + println("\t{}.ca = ({}.u64 > {}.u64) || ({}.u64 == {}.u64 && {}.ca);", xer(), + r(insn.operands[1]), temp(), r(insn.operands[1]), temp(), xer()); + println("\t{}.u64 = {}.u64;", r(insn.operands[0]), temp()); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s32, 0, {});", + cr(0), r(insn.operands[0]), xer()); + break; + case PPC_INST_ADDZE: println("\t{}.s64 = {}.s64 + {}.ca;", temp(), r(insn.operands[1]), xer()); println("\t{}.ca = {}.u32 < {}.u32;", xer(), temp(), r(insn.operands[1])); From 1c06e8434955a7654c9c16c658a1403c9cbb8449 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 14:44:53 -0400 Subject: [PATCH 03/58] Implement eqv --- XenonRecomp/recompiler.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 8a8913d1..9bb6d34e 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -869,6 +869,13 @@ bool Recompiler::Recompile( // no op break; + case PPC_INST_EQV: + // rA = ~(rS XOR rB) + println("\t{}.u64 = ~({}.u64 ^ {}.u64);", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); + break; + case PPC_INST_EXTSB: println("\t{}.s64 = {}.s8;", r(insn.operands[0]), r(insn.operands[1])); if (strchr(insn.opcode->name, '.')) From 18049b417a1e5e5834dfc1e16109cbd06930669c Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 14:46:51 -0400 Subject: [PATCH 04/58] Implement lvebx/lvehx into the lvx instruction implementation. Memory wise they perform the same operation --- XenonRecomp/recompiler.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 9bb6d34e..aa6e4286 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1157,6 +1157,8 @@ bool Recompiler::Recompile( println("\t{}.s64 = {};", r(insn.operands[0]), int32_t(insn.operands[1] << 16)); break; + case PPC_INST_LVEBX: + case PPC_INST_LVEHX: case PPC_INST_LVEWX: case PPC_INST_LVEWX128: case PPC_INST_LVX: From 3fbe687fe16a033f8820873e96fe495fd0ff7544 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 14:48:12 -0400 Subject: [PATCH 05/58] Implement rlwnm --- XenonRecomp/recompiler.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index aa6e4286..04f0bb6e 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1421,6 +1421,14 @@ bool Recompiler::Recompile( println("\t{}.compare({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); break; + case PPC_INST_RLWNM: + println("\t{}.u64 = __builtin_rotateleft64({}.u32 | ({}.u64 << 32), {}.u8 & 0x1F) & 0x{:X};", + r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[1]), + r(insn.operands[2]), ComputeMask(insn.operands[3] + 32, insn.operands[4] + 32)); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); + break; + case PPC_INST_ROTLDI: println("\t{}.u64 = __builtin_rotateleft64({}.u64, {});", r(insn.operands[0]), r(insn.operands[1]), insn.operands[2]); break; From ae465823e1e4ea034050d3360ad27a60622afdfc Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 14:48:30 -0400 Subject: [PATCH 06/58] Implement subfme --- XenonRecomp/recompiler.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 04f0bb6e..f93bf0e7 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1744,6 +1744,16 @@ bool Recompiler::Recompile( println("\t{}.s64 = {} - {}.s64;", r(insn.operands[0]), int32_t(insn.operands[2]), r(insn.operands[1])); break; + case PPC_INST_SUBFME: + println("\t{}.u64 = ~{}.u64 + {}.ca - 1;", temp(), r(insn.operands[1]), xer()); + println("\t{}.ca = ({}.u64 < ~{}.u64) || ({}.u64 == ~{}.u64 && {}.ca);", xer(), + temp(), r(insn.operands[1]), temp(), r(insn.operands[1]), xer()); + println("\t{}.u64 = {}.u64;", r(insn.operands[0]), temp()); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s32, 0, {});", + cr(0), r(insn.operands[0]), xer()); + break; + case PPC_INST_SYNC: // no op break; From 822dfadd7205e0c18bba59e2883f75ce0dfeb265 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 14:49:06 -0400 Subject: [PATCH 07/58] Add missing case for vandc --- XenonRecomp/recompiler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index f93bf0e7..56ff8546 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1813,6 +1813,7 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u8, _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VANDC: case PPC_INST_VANDC128: println("\t_mm_store_si128((__m128i*){}.u8, _mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); break; From 5a3848ed4b5220b370add794264b8920507456f1 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 14:51:45 -0400 Subject: [PATCH 08/58] Implement vcmpbfp/vcmpbfp128 - need to implement vscr saturation bit control --- XenonRecomp/recompiler.cpp | 6 +++++- XenonUtils/ppc_context.h | 7 +++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 56ff8546..99bd86db 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1876,7 +1876,11 @@ bool Recompiler::Recompile( case PPC_INST_VCMPBFP: case PPC_INST_VCMPBFP128: - println("\t__builtin_debugtrap();"); + printSetFlushMode(true); + println("\t_mm_store_ps({}.f32, _mm_vcmpbfp(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", + v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.setFromMask(_mm_load_ps({}.f32), 0xF);", cr(6), v(insn.operands[0])); break; case PPC_INST_VCMPEQFP: diff --git a/XenonUtils/ppc_context.h b/XenonUtils/ppc_context.h index c1091d17..2a6e27a9 100644 --- a/XenonUtils/ppc_context.h +++ b/XenonUtils/ppc_context.h @@ -651,4 +651,11 @@ inline __m128i _mm_vsr(__m128i a, __m128i b) return _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(_mm_srl_epi64(a, b)), _mm_castsi128_ps(_mm_srl_epi64(_mm_srli_si128(a, 4), b)), 0x10)); } +inline __m128 _mm_vcmpbfp(__m128 a, __m128 b) +{ + __m128 xmm0 = _mm_and_ps(_mm_cmpgt_ps(a, b), _mm_castsi128_ps(_mm_set1_epi32(0x80000000))); + __m128 xmm1 = _mm_and_ps(_mm_cmplt_ps(a, _mm_sub_ps(_mm_setzero_ps(), b)), _mm_castsi128_ps(_mm_set1_epi32(0x40000000))); + return _mm_or_ps(xmm0, xmm1); +} + #endif From bc9494b003c6255bc80a8dc78efc76f65a0f35e0 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 14:56:26 -0400 Subject: [PATCH 09/58] Implement mulhd and mulhdu --- XenonRecomp/recompiler.cpp | 16 ++++++++++++++++ XenonUtils/ppc_context.h | 18 ++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 99bd86db..78c25117 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1351,6 +1351,22 @@ bool Recompiler::Recompile( println("\t{}.compare({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); break; + case PPC_INST_MULHD: + println("\t{}.s64 = __mulh({}.s64, {}.s64);", + r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s32, 0, {});", + cr(0), r(insn.operands[0]), xer()); + break; + + case PPC_INST_MULHDU: + println("\t{}.u64 = __mulhu({}.u64, {}.u64);", + r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s32, 0, {});", + cr(0), r(insn.operands[0]), xer()); + break; + case PPC_INST_NAND: println("\t{}.u64 = ~({}.u64 & {}.u64);", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2])); break; diff --git a/XenonUtils/ppc_context.h b/XenonUtils/ppc_context.h index 2a6e27a9..a8e495b0 100644 --- a/XenonUtils/ppc_context.h +++ b/XenonUtils/ppc_context.h @@ -658,4 +658,22 @@ inline __m128 _mm_vcmpbfp(__m128 a, __m128 b) return _mm_or_ps(xmm0, xmm1); } +inline uint64_t __mulhu(uint64_t a, uint64_t b) { + // Get high/low 32-bit parts + uint32_t a_lo = (uint32_t)a; + uint32_t a_hi = (uint32_t)(a >> 32); + uint32_t b_lo = (uint32_t)b; + uint32_t b_hi = (uint32_t)(b >> 32); + + // Compute partial products + uint64_t lo_lo = (uint64_t)a_lo * b_lo; + uint64_t hi_lo = (uint64_t)a_hi * b_lo; + uint64_t lo_hi = (uint64_t)a_lo * b_hi; + uint64_t hi_hi = (uint64_t)a_hi * b_hi; + + // Compute high 64 bits of result + uint64_t cross = (lo_lo >> 32) + (uint32_t)hi_lo + (uint32_t)lo_hi; + return hi_hi + (hi_lo >> 32) + (lo_hi >> 32) + (cross >> 32); +} + #endif From daa4c009dc437b46169d5ba1d2f076716f18de9e Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 15:13:22 -0400 Subject: [PATCH 10/58] Implement vavugh --- XenonRecomp/recompiler.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 78c25117..e3618ab1 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1846,6 +1846,11 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u8, _mm_avg_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VAVGUH: + println("\t_mm_store_si128((__m128i*){}.u16, _mm_avg_epu16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", + v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VCTSXS: case PPC_INST_VCFPSXWS128: printSetFlushMode(true); From 3cb492f94bbe0d9971666b9ecc0abda952ebe2dd Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 17:34:07 -0400 Subject: [PATCH 11/58] Implement vctuxs/vcfpuxws128 --- XenonRecomp/recompiler.cpp | 10 ++++++++++ XenonUtils/ppc_context.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index e3618ab1..cd636362 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1861,6 +1861,16 @@ bool Recompiler::Recompile( println("_mm_load_ps({}.f32)));", v(insn.operands[1])); break; + case PPC_INST_VCTUXS: + case PPC_INST_VCFPUXWS128: + printSetFlushMode(true); + print("\t_mm_store_si128((__m128i*){}.u32, _mm_vctuxs(", v(insn.operands[0])); + if (insn.operands[2] != 0) + println("_mm_mul_ps(_mm_load_ps({}.f32), _mm_set1_ps({}))));", v(insn.operands[1]), 1u << insn.operands[2]); + else + println("_mm_load_ps({}.f32)));", v(insn.operands[1])); + break; + case PPC_INST_VCFSX: case PPC_INST_VCSXWFP128: { diff --git a/XenonUtils/ppc_context.h b/XenonUtils/ppc_context.h index a8e495b0..22257481 100644 --- a/XenonUtils/ppc_context.h +++ b/XenonUtils/ppc_context.h @@ -676,4 +676,36 @@ inline uint64_t __mulhu(uint64_t a, uint64_t b) { return hi_hi + (hi_lo >> 32) + (lo_hi >> 32) + (cross >> 32); } +inline __m128i _mm_vctuxs(__m128 src1) +{ + // Clamp negative to 0 + __m128 clamped = _mm_max_ps(src1, _mm_setzero_ps()); + + // For values in [2^31, 2^32), subtract 2^31, convert, add 2^31 back + __m128i big_result = _mm_add_epi32( + _mm_cvttps_epi32( + _mm_sub_ps(clamped, _mm_castsi128_ps(_mm_set1_epi32(0x4F000000))) + ), + _mm_set1_epi32(0x80000000) + ); + + // Select based on range + __m128i result = _mm_blendv_epi8( + _mm_cvttps_epi32(clamped), + big_result, + _mm_castps_si128( + _mm_cmpge_ps(clamped, _mm_castsi128_ps(_mm_set1_epi32(0x4F000000))) + ) + ); + + // Saturate overflow and NaN to UINT_MAX + __m128 saturate_mask = _mm_or_ps( + _mm_cmpge_ps( + clamped, _mm_castsi128_ps(_mm_set1_epi32(0x4F800000)) + ), + _mm_cmpunord_ps(src1, src1) + ); + return _mm_blendv_epi8(result, _mm_set1_epi32(-1), _mm_castps_si128(saturate_mask)); +} + #endif From e38d06e3d5c6f4f952159fea64129e2f7818cdb1 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 17:41:00 -0400 Subject: [PATCH 12/58] Implement vmaxsh --- XenonRecomp/recompiler.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index cd636362..aa61e086 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1988,6 +1988,11 @@ bool Recompiler::Recompile( println("\t_mm_store_ps({}.f32, _mm_max_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VMAXSH: + println("\t_mm_store_si128((__m128i*){}.s16, _mm_max_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", + v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VMAXSW: println("\t_mm_store_si128((__m128i*){}.u32, _mm_max_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; From c68a087441497d014dae03a0d403388b8db01a21 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 17:43:36 -0400 Subject: [PATCH 13/58] implement vmaxuh --- XenonRecomp/recompiler.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index aa61e086..aba434fe 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1997,6 +1997,11 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u32, _mm_max_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VMAXUH: + println("\t_mm_store_si128((__m128i*){}.u16, _mm_max_epu16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", + v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VMINFP: case PPC_INST_VMINFP128: printSetFlushMode(true); From bb20abed18e52bd9f5f872bef0bd28a1969f9b84 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 17:47:46 -0400 Subject: [PATCH 14/58] Implement vminsh and vminuh --- XenonRecomp/recompiler.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index aba434fe..12b512a4 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2008,6 +2008,16 @@ bool Recompiler::Recompile( println("\t_mm_store_ps({}.f32, _mm_min_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VMINSH: + println("\t_mm_store_si128((__m128i*){}.s16, _mm_min_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", + v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + + case PPC_INST_VMINUH: + println("\t_mm_store_si128((__m128i*){}.u16, _mm_min_epu16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", + v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VMRGHB: println("\t_mm_store_si128((__m128i*){}.u8, _mm_unpackhi_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); break; From 68f4ff4062a66584acd80fe32587dc05acbe12df Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 17:51:39 -0400 Subject: [PATCH 15/58] Implement vsubshs --- XenonRecomp/recompiler.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 12b512a4..94278092 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2269,6 +2269,11 @@ bool Recompiler::Recompile( println("\t_mm_store_ps({}.f32, _mm_sub_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VSUBSHS: + println("\t_mm_store_si128((__m128i*){}.s16, _mm_subs_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", + v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VSUBSWS: // TODO: vectorize for (size_t i = 0; i < 4; i++) From ea988fa87e7401da1375ce5025930cacf351b073 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 18:12:17 -0400 Subject: [PATCH 16/58] Implement subfze --- XenonRecomp/recompiler.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 94278092..85903a0c 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1770,6 +1770,14 @@ bool Recompiler::Recompile( cr(0), r(insn.operands[0]), xer()); break; + case PPC_INST_SUBFZE: + println("\t{}.u64 = ~{}.u64 + {}.ca;", temp(), r(insn.operands[1]), xer()); + println("\t{}.ca = {}.u64 < {}.ca;", xer(), temp(), xer()); + println("\t{}.u64 = {}.u64;", r(insn.operands[0]), temp()); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); + break; + case PPC_INST_SYNC: // no op break; From 541631f055d43e1aba5e373e7f001d90739853e3 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 18:18:29 -0400 Subject: [PATCH 17/58] Implement all trap cards --- XenonRecomp/recompiler.cpp | 199 ++++++++++++++++++++++++++++++++++++- 1 file changed, 194 insertions(+), 5 deletions(-) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 85903a0c..19cb6aa6 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1782,24 +1782,213 @@ bool Recompiler::Recompile( // no op break; + case PPC_INST_TDEQ: + println("\tif ({}.u64 == {}.u64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TDEQI: + println("\tif ({}.u64 == {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TDGE: + println("\tif ({}.s64 >= {}.s64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TDGEI: + println("\tif ({}.s64 >= {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1])); + break; + + case PPC_INST_TDGT: + println("\tif ({}.s64 > {}.s64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TDGTI: + println("\tif ({}.s64 > {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1])); + break; + + case PPC_INST_TDLE: + println("\tif ({}.s64 <= {}.s64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TDLEI: + println("\tif ({}.s64 <= {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1])); + break; + + case PPC_INST_TDLGE: + println("\tif ({}.u64 >= {}.u64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + case PPC_INST_TDLGEI: - // no op + println("\tif ({}.u64 >= {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TDLGT: + println("\tif ({}.u64 > {}.u64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TDLGTI: + println("\tif ({}.u64 > {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TDLLE: + println("\tif ({}.u64 <= {}.u64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); break; case PPC_INST_TDLLEI: - // no op + println("\tif ({}.u64 <= {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TDLLT: + println("\tif ({}.u64 < {}.u64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TDLLTI: + println("\tif ({}.u64 < {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TDLT: + println("\tif ({}.s64 < {}.s64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TDLTI: + println("\tif ({}.s64 < {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1])); break; + case PPC_INST_TDNE: + println("\tif ({}.u64 != {}.u64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TDNEI: + println("\tif ({}.u64 != {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; case PPC_INST_TWI: - // no op + { + // TO field specifies trap conditions: + // Bit 0 (16): Less than (signed) + // Bit 1 (8): Greater than (signed) + // Bit 2 (4): Equal + // Bit 3 (2): Less than (unsigned) + // Bit 4 (1): Greater than (unsigned) + + bool first = true; + print("\tif ("); + + if (insn.operands[0] & 16) { + print("{}.s32 < {}", r(insn.operands[1]), int32_t(insn.operands[2])); + first = false; + } + + if (insn.operands[0] & 8) { + if (!first) print(" || "); + print("{}.s32 > {}", r(insn.operands[1]), int32_t(insn.operands[2])); + first = false; + } + + if (insn.operands[0] & 4) { + if (!first) print(" || "); + print("{}.u32 == {}", r(insn.operands[1]), insn.operands[2]); + first = false; + } + + if (insn.operands[0] & 2) { + if (!first) print(" || "); + print("{}.u32 < {}", r(insn.operands[1]), insn.operands[2]); + first = false; + } + + if (insn.operands[0] & 1) { + if (!first) print(" || "); + print("{}.u32 > {}", r(insn.operands[1]), insn.operands[2]); + first = false; + } + + if (first) { + // TO = 0 means never trap + println("false) __builtin_debugtrap();"); + } else { + println(") __builtin_debugtrap();"); + } + } + break; + + case PPC_INST_TWEQ: + println("\tif ({}.u32 == {}.u32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TWEQI: + println("\tif ({}.u32 == {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TWGE: + println("\tif ({}.s32 >= {}.s32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TWGEI: + println("\tif ({}.s32 >= {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1])); + break; + + case PPC_INST_TWGT: + println("\tif ({}.s32 > {}.s32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TWGTI: + println("\tif ({}.s32 > {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1])); + break; + + case PPC_INST_TWLE: + println("\tif ({}.s32 <= {}.s32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TWLEI: + println("\tif ({}.s32 <= {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1])); + break; + + case PPC_INST_TWLGE: + println("\tif ({}.u32 >= {}.u32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); break; case PPC_INST_TWLGEI: - // no op + println("\tif ({}.u32 >= {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TWLGT: + println("\tif ({}.u32 > {}.u32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TWLGTI: + println("\tif ({}.u32 > {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TWLLE: + println("\tif ({}.u32 <= {}.u32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); break; case PPC_INST_TWLLEI: - // no op + println("\tif ({}.u32 <= {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TWLLT: + println("\tif ({}.u32 < {}.u32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TWLLTI: + println("\tif ({}.u32 < {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TWLT: + println("\tif ({}.s32 < {}.s32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TWLTI: + println("\tif ({}.s32 < {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1])); + break; + + case PPC_INST_TWNE: + println("\tif ({}.u32 != {}.u32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TWNEI: + println("\tif ({}.u32 != {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); break; case PPC_INST_VADDFP: From 0f42cc4d68db1ae43a7fccb46f0cfcd73ef63820 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 18:36:18 -0400 Subject: [PATCH 18/58] Implement vpkshss and vpkswus --- XenonRecomp/recompiler.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 19cb6aa6..89026f73 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2339,11 +2339,31 @@ bool Recompiler::Recompile( } break; + case PPC_INST_VPKSHSS: + case PPC_INST_VPKSHSS128: + println("\t_mm_store_si128((__m128i*){}.s8, _mm_packs_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", + v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); + break; + case PPC_INST_VPKSHUS: case PPC_INST_VPKSHUS128: println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); break; + case PPC_INST_VPKSWUS: + case PPC_INST_VPKSWUS128: + println("\t_mm_store_si128((__m128i*){}.s32, _mm_load_si128((__m128i*){}.s32));", vTemp(), v(insn.operands[2])); + for (int i = 0; i < 4; i++) { + println("\t{}.u16[{}] = {}.s32[{}] < 0 ? 0 : ({}.s32[{}] > 0xFFFF ? 0xFFFF : {}.s32[{}]);", + v(insn.operands[0]), i, vTemp(), i, vTemp(), i, vTemp(), i); + } + println("\t_mm_store_si128((__m128i*){}.s32, _mm_load_si128((__m128i*){}.s32));", vTemp(), v(insn.operands[1])); + for (int i = 0; i < 4; i++) { + println("\t{}.u16[{}] = {}.s32[{}] < 0 ? 0 : ({}.s32[{}] > 0xFFFF ? 0xFFFF : {}.s32[{}]);", + v(insn.operands[0]), i + 4, vTemp(), i, vTemp(), i, vTemp(), i); + } + break; + case PPC_INST_VREFP: case PPC_INST_VREFP128: // TODO: see if we can use rcp safely From 9db01820e47d99570de2b1ca622194b0d70e0d50 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 19:09:25 -0400 Subject: [PATCH 19/58] Implement vpkuwum/vpkuwum128 and vpkuwus/vpkuwum128 --- XenonRecomp/recompiler.cpp | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 89026f73..3b5a5999 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2364,6 +2364,34 @@ bool Recompiler::Recompile( } break; + case PPC_INST_VPKUWUM: + case PPC_INST_VPKUWUM128: + println("\t_mm_store_si128((__m128i*){}.u32, _mm_load_si128((__m128i*){}.u32));", vTemp(), v(insn.operands[2])); + for (int i = 0; i < 4; i++) { + println("\t{}.u16[{}] = {}.u16[{}];", + v(insn.operands[0]), i, vTemp(), i*2); + } + println("\t_mm_store_si128((__m128i*){}.u32, _mm_load_si128((__m128i*){}.u32));", vTemp(), v(insn.operands[1])); + for (int i = 0; i < 4; i++) { + println("\t{}.u16[{}] = {}.u16[{}];", + v(insn.operands[0]), i + 4, vTemp(), i*2); + } + break; + + case PPC_INST_VPKUWUS: + case PPC_INST_VPKUWUS128: + println("\t_mm_store_si128((__m128i*){}.u32, _mm_load_si128((__m128i*){}.u32));", vTemp(), v(insn.operands[2])); + for (int i = 0; i < 4; i++) { + println("\t{}.u16[{}] = {}.u32[{}] > 0xFFFF ? 0xFFFF : {}.u32[{}];", + v(insn.operands[0]), i, vTemp(), i, vTemp(), i); + } + println("\t_mm_store_si128((__m128i*){}.u32, _mm_load_si128((__m128i*){}.u32));", vTemp(), v(insn.operands[1])); + for (int i = 0; i < 4; i++) { + println("\t{}.u16[{}] = {}.u32[{}] > 0xFFFF ? 0xFFFF : {}.u32[{}];", + v(insn.operands[0]), i + 4, vTemp(), i, vTemp(), i); + } + break; + case PPC_INST_VREFP: case PPC_INST_VREFP128: // TODO: see if we can use rcp safely From de697dc1b0d1848c51e70c23dc3a4eb6380919dc Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 19:19:45 -0400 Subject: [PATCH 20/58] Implement vpkswss/vpkswss128 --- XenonRecomp/recompiler.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 3b5a5999..b2729b5e 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2350,6 +2350,12 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); break; + case PPC_INST_VPKSWSS: + case PPC_INST_VPKSWSS128: + println("\t_mm_store_si128((__m128i*){}.s16, _mm_packs_epi32(_mm_load_si128((__m128i*){}.s32), _mm_load_si128((__m128i*){}.s32)));", + v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); + break; + case PPC_INST_VPKSWUS: case PPC_INST_VPKSWUS128: println("\t_mm_store_si128((__m128i*){}.s32, _mm_load_si128((__m128i*){}.s32));", vTemp(), v(insn.operands[2])); From 2ae394ba59ab7ed661a1761cc16c4be7f8afe62c Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 19:23:17 -0400 Subject: [PATCH 21/58] Implement vpkuhum --- XenonRecomp/recompiler.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index b2729b5e..d50a1f28 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2370,6 +2370,14 @@ bool Recompiler::Recompile( } break; + case PPC_INST_VPKUHUM: + // Pack without saturation - use shuffle to select lower bytes + println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(" + "_mm_and_si128(_mm_load_si128((__m128i*){}.u16), _mm_set1_epi16(0xFF)), " + "_mm_and_si128(_mm_load_si128((__m128i*){}.u16), _mm_set1_epi16(0xFF))));", + v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); + break; + case PPC_INST_VPKUWUM: case PPC_INST_VPKUWUM128: println("\t_mm_store_si128((__m128i*){}.u32, _mm_load_si128((__m128i*){}.u32));", vTemp(), v(insn.operands[2])); From 0b4c87071b33d30dfc392dae54bff2856dc0d780 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 19:26:02 -0400 Subject: [PATCH 22/58] Implement vpkuhus/vpkuhus128 --- XenonRecomp/recompiler.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index d50a1f28..27f1a3ee 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2378,6 +2378,13 @@ bool Recompiler::Recompile( v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); break; + case PPC_INST_VPKUHUS: + case PPC_INST_VPKUHUS128: + // Pack unsigned halfwords to unsigned bytes with saturation + println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", + v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); + break; + case PPC_INST_VPKUWUM: case PPC_INST_VPKUWUM128: println("\t_mm_store_si128((__m128i*){}.u32, _mm_load_si128((__m128i*){}.u32));", vTemp(), v(insn.operands[2])); From 5c01ec54ade4564fd111b718a1dd27743b255724 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 19:42:17 -0400 Subject: [PATCH 23/58] Implement vsl --- XenonRecomp/recompiler.cpp | 5 +++++ XenonUtils/ppc_context.h | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 27f1a3ee..6268df97 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2457,6 +2457,11 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u8, _mm_or_si128(_mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)), _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8))));", v(insn.operands[0]), v(insn.operands[3]), v(insn.operands[1]), v(insn.operands[3]), v(insn.operands[2])); break; + case PPC_INST_VSL: + println("\t_mm_store_si128((__m128i*){}.u8, _mm_vsl(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", + v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VSLB: // TODO: vectorize for (size_t i = 0; i < 16; i++) diff --git a/XenonUtils/ppc_context.h b/XenonUtils/ppc_context.h index 22257481..7b30689a 100644 --- a/XenonUtils/ppc_context.h +++ b/XenonUtils/ppc_context.h @@ -708,4 +708,20 @@ inline __m128i _mm_vctuxs(__m128 src1) return _mm_blendv_epi8(result, _mm_set1_epi32(-1), _mm_castps_si128(saturate_mask)); } +inline __m128i _mm_vsl(__m128i a, __m128i b) +{ + // Extract shift count from last byte of b (accounting for endianness) + uint32_t shift = _mm_extract_epi8(b, 15) & 0x7; + + if (shift == 0) return a; + + // Shift left by bits + __m128i shifted = _mm_or_si128( + _mm_slli_epi64(a, shift), + _mm_srli_epi64(_mm_slli_si128(a, 8), 64 - shift) + ); + + return shifted; +} + #endif From 691593db37563c82e6d76be1d68fccc042d0cef9 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 19:44:09 -0400 Subject: [PATCH 24/58] Implement vslh, vsrah, vsrh, vrlh --- XenonRecomp/recompiler.cpp | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 6268df97..d785f98d 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2468,6 +2468,36 @@ bool Recompiler::Recompile( println("\t{}.u8[{}] = {}.u8[{}] << ({}.u8[{}] & 0x7);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i); break; + case PPC_INST_VSLH: + // Vector shift left halfword + for (size_t i = 0; i < 8; i++) + println("\t{}.u16[{}] = {}.u16[{}] << ({}.u16[{}] & 0xF);", + v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i); + break; + + case PPC_INST_VSRAH: + // Vector shift right algebraic halfword + for (size_t i = 0; i < 8; i++) + println("\t{}.s16[{}] = {}.s16[{}] >> ({}.u16[{}] & 0xF);", + v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i); + break; + + case PPC_INST_VSRH: + // Vector shift right halfword + for (size_t i = 0; i < 8; i++) + println("\t{}.u16[{}] = {}.u16[{}] >> ({}.u16[{}] & 0xF);", + v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i); + break; + + case PPC_INST_VRLH: + // Vector rotate left halfword + for (size_t i = 0; i < 8; i++) + println("\t{}.u16[{}] = ({}.u16[{}] << ({}.u16[{}] & 0xF)) | " + "({}.u16[{}] >> (16 - ({}.u16[{}] & 0xF)));", + v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i, + v(insn.operands[1]), i, v(insn.operands[2]), i); + break; + case PPC_INST_VSLDOI: case PPC_INST_VSLDOI128: println("\t_mm_store_si128((__m128i*){}.u8, _mm_alignr_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8), {}));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), 16 - insn.operands[3]); @@ -2501,6 +2531,11 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u8, _mm_set1_epi8(char(0x{:X})));", v(insn.operands[0]), insn.operands[1]); break; + case PPC_INST_VSPLTISH: + println("\t_mm_store_si128((__m128i*){}.s16, _mm_set1_epi16(short({})));", + v(insn.operands[0]), int16_t(insn.operands[1])); + break; + case PPC_INST_VSPLTISW: case PPC_INST_VSPLTISW128: println("\t_mm_store_si128((__m128i*){}.u32, _mm_set1_epi32(int(0x{:X})));", v(insn.operands[0]), insn.operands[1]); From 1b73c209b75f6c6bbb908b3874ade4c417cbbdd5 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Mon, 26 May 2025 19:52:11 -0400 Subject: [PATCH 25/58] Add vsel128 to existing vsel --- XenonRecomp/recompiler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index d785f98d..4cb83bba 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2454,6 +2454,7 @@ bool Recompiler::Recompile( break; case PPC_INST_VSEL: + case PPC_INST_VSEL128: println("\t_mm_store_si128((__m128i*){}.u8, _mm_or_si128(_mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)), _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8))));", v(insn.operands[0]), v(insn.operands[3]), v(insn.operands[1]), v(insn.operands[3]), v(insn.operands[2])); break; From 5f40c803bb1b7d66a44634cd39f97433dac39916 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 14:44:09 -0400 Subject: [PATCH 26/58] Implement addc --- XenonRecomp/recompiler.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index f8608179..500ee5aa 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -531,6 +531,13 @@ bool Recompiler::Recompile( println("\t{}.compare({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); break; + case PPC_INST_ADDC: + println("\t{}.ca = ({}.u32 + {}.u32 < {}.u32);", xer(), r(insn.operands[1]), r(insn.operands[2]), r(insn.operands[1])); + println("\t{}.u64 = {}.u64 + {}.u64;", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); + break; + case PPC_INST_ADDE: println("\t{}.u8 = ({}.u32 + {}.u32 < {}.u32) | ({}.u32 + {}.u32 + {}.ca < {}.ca);", temp(), r(insn.operands[1]), r(insn.operands[2]), r(insn.operands[1]), r(insn.operands[1]), r(insn.operands[2]), xer(), xer()); println("\t{}.u64 = {}.u64 + {}.u64 + {}.ca;", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]), xer()); From 5fce07aff4d8128283d3330f815baf8bb7f2b6b1 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 14:44:33 -0400 Subject: [PATCH 27/58] Implement addme --- XenonRecomp/recompiler.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 500ee5aa..8a8913d1 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -567,6 +567,16 @@ bool Recompiler::Recompile( println("{};", static_cast(insn.operands[2] << 16)); break; + case PPC_INST_ADDME: + println("\t{}.u64 = {}.u64 + {}.ca - 1;", temp(), r(insn.operands[1]), xer()); + println("\t{}.ca = ({}.u64 > {}.u64) || ({}.u64 == {}.u64 && {}.ca);", xer(), + r(insn.operands[1]), temp(), r(insn.operands[1]), temp(), xer()); + println("\t{}.u64 = {}.u64;", r(insn.operands[0]), temp()); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s32, 0, {});", + cr(0), r(insn.operands[0]), xer()); + break; + case PPC_INST_ADDZE: println("\t{}.s64 = {}.s64 + {}.ca;", temp(), r(insn.operands[1]), xer()); println("\t{}.ca = {}.u32 < {}.u32;", xer(), temp(), r(insn.operands[1])); From 133e95a3997874ca0675340031c7dbbc20a57e77 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 14:44:53 -0400 Subject: [PATCH 28/58] Implement eqv --- XenonRecomp/recompiler.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 8a8913d1..9bb6d34e 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -869,6 +869,13 @@ bool Recompiler::Recompile( // no op break; + case PPC_INST_EQV: + // rA = ~(rS XOR rB) + println("\t{}.u64 = ~({}.u64 ^ {}.u64);", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); + break; + case PPC_INST_EXTSB: println("\t{}.s64 = {}.s8;", r(insn.operands[0]), r(insn.operands[1])); if (strchr(insn.opcode->name, '.')) From 3937e5469c73bb1af60477de1b46e88d9959d4b8 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 14:46:51 -0400 Subject: [PATCH 29/58] Implement lvebx/lvehx into the lvx instruction implementation. Memory wise they perform the same operation --- XenonRecomp/recompiler.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 9bb6d34e..aa6e4286 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1157,6 +1157,8 @@ bool Recompiler::Recompile( println("\t{}.s64 = {};", r(insn.operands[0]), int32_t(insn.operands[1] << 16)); break; + case PPC_INST_LVEBX: + case PPC_INST_LVEHX: case PPC_INST_LVEWX: case PPC_INST_LVEWX128: case PPC_INST_LVX: From 4e77be82ce292ee85a4f1702546df8411f998ea0 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 14:48:12 -0400 Subject: [PATCH 30/58] Implement rlwnm --- XenonRecomp/recompiler.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index aa6e4286..04f0bb6e 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1421,6 +1421,14 @@ bool Recompiler::Recompile( println("\t{}.compare({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); break; + case PPC_INST_RLWNM: + println("\t{}.u64 = __builtin_rotateleft64({}.u32 | ({}.u64 << 32), {}.u8 & 0x1F) & 0x{:X};", + r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[1]), + r(insn.operands[2]), ComputeMask(insn.operands[3] + 32, insn.operands[4] + 32)); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); + break; + case PPC_INST_ROTLDI: println("\t{}.u64 = __builtin_rotateleft64({}.u64, {});", r(insn.operands[0]), r(insn.operands[1]), insn.operands[2]); break; From fd85a418a8f42e0ca7f93743f89b2c8b9dade089 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 14:48:30 -0400 Subject: [PATCH 31/58] Implement subfme --- XenonRecomp/recompiler.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 04f0bb6e..f93bf0e7 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1744,6 +1744,16 @@ bool Recompiler::Recompile( println("\t{}.s64 = {} - {}.s64;", r(insn.operands[0]), int32_t(insn.operands[2]), r(insn.operands[1])); break; + case PPC_INST_SUBFME: + println("\t{}.u64 = ~{}.u64 + {}.ca - 1;", temp(), r(insn.operands[1]), xer()); + println("\t{}.ca = ({}.u64 < ~{}.u64) || ({}.u64 == ~{}.u64 && {}.ca);", xer(), + temp(), r(insn.operands[1]), temp(), r(insn.operands[1]), xer()); + println("\t{}.u64 = {}.u64;", r(insn.operands[0]), temp()); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s32, 0, {});", + cr(0), r(insn.operands[0]), xer()); + break; + case PPC_INST_SYNC: // no op break; From 485ca80383bc05c580396cf985ecc87168fd0970 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 14:49:06 -0400 Subject: [PATCH 32/58] Add missing case for vandc --- XenonRecomp/recompiler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index f93bf0e7..56ff8546 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1813,6 +1813,7 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u8, _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VANDC: case PPC_INST_VANDC128: println("\t_mm_store_si128((__m128i*){}.u8, _mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); break; From e5ca0a26846e74a501e36b075b914619b0eb20f1 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 14:51:45 -0400 Subject: [PATCH 33/58] Implement vcmpbfp/vcmpbfp128 - need to implement vscr saturation bit control --- XenonRecomp/recompiler.cpp | 6 +++++- XenonUtils/ppc_context.h | 7 +++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 56ff8546..99bd86db 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1876,7 +1876,11 @@ bool Recompiler::Recompile( case PPC_INST_VCMPBFP: case PPC_INST_VCMPBFP128: - println("\t__builtin_debugtrap();"); + printSetFlushMode(true); + println("\t_mm_store_ps({}.f32, _mm_vcmpbfp(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", + v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.setFromMask(_mm_load_ps({}.f32), 0xF);", cr(6), v(insn.operands[0])); break; case PPC_INST_VCMPEQFP: diff --git a/XenonUtils/ppc_context.h b/XenonUtils/ppc_context.h index c1091d17..2a6e27a9 100644 --- a/XenonUtils/ppc_context.h +++ b/XenonUtils/ppc_context.h @@ -651,4 +651,11 @@ inline __m128i _mm_vsr(__m128i a, __m128i b) return _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(_mm_srl_epi64(a, b)), _mm_castsi128_ps(_mm_srl_epi64(_mm_srli_si128(a, 4), b)), 0x10)); } +inline __m128 _mm_vcmpbfp(__m128 a, __m128 b) +{ + __m128 xmm0 = _mm_and_ps(_mm_cmpgt_ps(a, b), _mm_castsi128_ps(_mm_set1_epi32(0x80000000))); + __m128 xmm1 = _mm_and_ps(_mm_cmplt_ps(a, _mm_sub_ps(_mm_setzero_ps(), b)), _mm_castsi128_ps(_mm_set1_epi32(0x40000000))); + return _mm_or_ps(xmm0, xmm1); +} + #endif From 6299ca7ee74254e06bfec45b90532b6f9d59c011 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 14:56:26 -0400 Subject: [PATCH 34/58] Implement mulhd and mulhdu --- XenonRecomp/recompiler.cpp | 16 ++++++++++++++++ XenonUtils/ppc_context.h | 18 ++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 99bd86db..78c25117 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1351,6 +1351,22 @@ bool Recompiler::Recompile( println("\t{}.compare({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); break; + case PPC_INST_MULHD: + println("\t{}.s64 = __mulh({}.s64, {}.s64);", + r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s32, 0, {});", + cr(0), r(insn.operands[0]), xer()); + break; + + case PPC_INST_MULHDU: + println("\t{}.u64 = __mulhu({}.u64, {}.u64);", + r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2])); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s32, 0, {});", + cr(0), r(insn.operands[0]), xer()); + break; + case PPC_INST_NAND: println("\t{}.u64 = ~({}.u64 & {}.u64);", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2])); break; diff --git a/XenonUtils/ppc_context.h b/XenonUtils/ppc_context.h index 2a6e27a9..a8e495b0 100644 --- a/XenonUtils/ppc_context.h +++ b/XenonUtils/ppc_context.h @@ -658,4 +658,22 @@ inline __m128 _mm_vcmpbfp(__m128 a, __m128 b) return _mm_or_ps(xmm0, xmm1); } +inline uint64_t __mulhu(uint64_t a, uint64_t b) { + // Get high/low 32-bit parts + uint32_t a_lo = (uint32_t)a; + uint32_t a_hi = (uint32_t)(a >> 32); + uint32_t b_lo = (uint32_t)b; + uint32_t b_hi = (uint32_t)(b >> 32); + + // Compute partial products + uint64_t lo_lo = (uint64_t)a_lo * b_lo; + uint64_t hi_lo = (uint64_t)a_hi * b_lo; + uint64_t lo_hi = (uint64_t)a_lo * b_hi; + uint64_t hi_hi = (uint64_t)a_hi * b_hi; + + // Compute high 64 bits of result + uint64_t cross = (lo_lo >> 32) + (uint32_t)hi_lo + (uint32_t)lo_hi; + return hi_hi + (hi_lo >> 32) + (lo_hi >> 32) + (cross >> 32); +} + #endif From fb627549d8818595c006d01f13c9c177b0d770b4 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 15:13:22 -0400 Subject: [PATCH 35/58] Implement vavugh --- XenonRecomp/recompiler.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 78c25117..e3618ab1 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1846,6 +1846,11 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u8, _mm_avg_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VAVGUH: + println("\t_mm_store_si128((__m128i*){}.u16, _mm_avg_epu16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", + v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VCTSXS: case PPC_INST_VCFPSXWS128: printSetFlushMode(true); From 7c13094ffda089b01f932fc4963b56b368b1c09d Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 17:34:07 -0400 Subject: [PATCH 36/58] Implement vctuxs/vcfpuxws128 --- XenonRecomp/recompiler.cpp | 10 ++++++++++ XenonUtils/ppc_context.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index e3618ab1..cd636362 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1861,6 +1861,16 @@ bool Recompiler::Recompile( println("_mm_load_ps({}.f32)));", v(insn.operands[1])); break; + case PPC_INST_VCTUXS: + case PPC_INST_VCFPUXWS128: + printSetFlushMode(true); + print("\t_mm_store_si128((__m128i*){}.u32, _mm_vctuxs(", v(insn.operands[0])); + if (insn.operands[2] != 0) + println("_mm_mul_ps(_mm_load_ps({}.f32), _mm_set1_ps({}))));", v(insn.operands[1]), 1u << insn.operands[2]); + else + println("_mm_load_ps({}.f32)));", v(insn.operands[1])); + break; + case PPC_INST_VCFSX: case PPC_INST_VCSXWFP128: { diff --git a/XenonUtils/ppc_context.h b/XenonUtils/ppc_context.h index a8e495b0..22257481 100644 --- a/XenonUtils/ppc_context.h +++ b/XenonUtils/ppc_context.h @@ -676,4 +676,36 @@ inline uint64_t __mulhu(uint64_t a, uint64_t b) { return hi_hi + (hi_lo >> 32) + (lo_hi >> 32) + (cross >> 32); } +inline __m128i _mm_vctuxs(__m128 src1) +{ + // Clamp negative to 0 + __m128 clamped = _mm_max_ps(src1, _mm_setzero_ps()); + + // For values in [2^31, 2^32), subtract 2^31, convert, add 2^31 back + __m128i big_result = _mm_add_epi32( + _mm_cvttps_epi32( + _mm_sub_ps(clamped, _mm_castsi128_ps(_mm_set1_epi32(0x4F000000))) + ), + _mm_set1_epi32(0x80000000) + ); + + // Select based on range + __m128i result = _mm_blendv_epi8( + _mm_cvttps_epi32(clamped), + big_result, + _mm_castps_si128( + _mm_cmpge_ps(clamped, _mm_castsi128_ps(_mm_set1_epi32(0x4F000000))) + ) + ); + + // Saturate overflow and NaN to UINT_MAX + __m128 saturate_mask = _mm_or_ps( + _mm_cmpge_ps( + clamped, _mm_castsi128_ps(_mm_set1_epi32(0x4F800000)) + ), + _mm_cmpunord_ps(src1, src1) + ); + return _mm_blendv_epi8(result, _mm_set1_epi32(-1), _mm_castps_si128(saturate_mask)); +} + #endif From 7d9e5fb21444e4b7a40f3273b93df00d52f9c60a Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 17:41:00 -0400 Subject: [PATCH 37/58] Implement vmaxsh --- XenonRecomp/recompiler.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index cd636362..aa61e086 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1988,6 +1988,11 @@ bool Recompiler::Recompile( println("\t_mm_store_ps({}.f32, _mm_max_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VMAXSH: + println("\t_mm_store_si128((__m128i*){}.s16, _mm_max_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", + v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VMAXSW: println("\t_mm_store_si128((__m128i*){}.u32, _mm_max_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; From ca05cd7a9da23f8ce733199f2c972fb8653afa26 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 17:43:36 -0400 Subject: [PATCH 38/58] implement vmaxuh --- XenonRecomp/recompiler.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index aa61e086..aba434fe 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1997,6 +1997,11 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u32, _mm_max_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VMAXUH: + println("\t_mm_store_si128((__m128i*){}.u16, _mm_max_epu16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", + v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VMINFP: case PPC_INST_VMINFP128: printSetFlushMode(true); From 13b904a991caa9828f1c36d5487372668964d0f5 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 17:47:46 -0400 Subject: [PATCH 39/58] Implement vminsh and vminuh --- XenonRecomp/recompiler.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index aba434fe..12b512a4 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2008,6 +2008,16 @@ bool Recompiler::Recompile( println("\t_mm_store_ps({}.f32, _mm_min_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VMINSH: + println("\t_mm_store_si128((__m128i*){}.s16, _mm_min_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", + v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + + case PPC_INST_VMINUH: + println("\t_mm_store_si128((__m128i*){}.u16, _mm_min_epu16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", + v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VMRGHB: println("\t_mm_store_si128((__m128i*){}.u8, _mm_unpackhi_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); break; From 63c0bab3cc4ed53487ca7f5fe2c8f1fe49954ef0 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 17:51:39 -0400 Subject: [PATCH 40/58] Implement vsubshs --- XenonRecomp/recompiler.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 12b512a4..94278092 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2269,6 +2269,11 @@ bool Recompiler::Recompile( println("\t_mm_store_ps({}.f32, _mm_sub_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VSUBSHS: + println("\t_mm_store_si128((__m128i*){}.s16, _mm_subs_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", + v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VSUBSWS: // TODO: vectorize for (size_t i = 0; i < 4; i++) From 30fd01f8443d3ec0309325620e5792c2707ecf54 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 18:12:17 -0400 Subject: [PATCH 41/58] Implement subfze --- XenonRecomp/recompiler.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 94278092..85903a0c 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1770,6 +1770,14 @@ bool Recompiler::Recompile( cr(0), r(insn.operands[0]), xer()); break; + case PPC_INST_SUBFZE: + println("\t{}.u64 = ~{}.u64 + {}.ca;", temp(), r(insn.operands[1]), xer()); + println("\t{}.ca = {}.u64 < {}.ca;", xer(), temp(), xer()); + println("\t{}.u64 = {}.u64;", r(insn.operands[0]), temp()); + if (strchr(insn.opcode->name, '.')) + println("\t{}.compare({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer()); + break; + case PPC_INST_SYNC: // no op break; From 045f27aad5f1565d98531d00b3ae8f7ff753566c Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 18:18:29 -0400 Subject: [PATCH 42/58] Implement all trap cards --- XenonRecomp/recompiler.cpp | 199 ++++++++++++++++++++++++++++++++++++- 1 file changed, 194 insertions(+), 5 deletions(-) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 85903a0c..19cb6aa6 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1782,24 +1782,213 @@ bool Recompiler::Recompile( // no op break; + case PPC_INST_TDEQ: + println("\tif ({}.u64 == {}.u64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TDEQI: + println("\tif ({}.u64 == {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TDGE: + println("\tif ({}.s64 >= {}.s64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TDGEI: + println("\tif ({}.s64 >= {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1])); + break; + + case PPC_INST_TDGT: + println("\tif ({}.s64 > {}.s64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TDGTI: + println("\tif ({}.s64 > {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1])); + break; + + case PPC_INST_TDLE: + println("\tif ({}.s64 <= {}.s64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TDLEI: + println("\tif ({}.s64 <= {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1])); + break; + + case PPC_INST_TDLGE: + println("\tif ({}.u64 >= {}.u64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + case PPC_INST_TDLGEI: - // no op + println("\tif ({}.u64 >= {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TDLGT: + println("\tif ({}.u64 > {}.u64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TDLGTI: + println("\tif ({}.u64 > {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TDLLE: + println("\tif ({}.u64 <= {}.u64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); break; case PPC_INST_TDLLEI: - // no op + println("\tif ({}.u64 <= {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TDLLT: + println("\tif ({}.u64 < {}.u64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TDLLTI: + println("\tif ({}.u64 < {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TDLT: + println("\tif ({}.s64 < {}.s64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TDLTI: + println("\tif ({}.s64 < {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1])); break; + case PPC_INST_TDNE: + println("\tif ({}.u64 != {}.u64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TDNEI: + println("\tif ({}.u64 != {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; case PPC_INST_TWI: - // no op + { + // TO field specifies trap conditions: + // Bit 0 (16): Less than (signed) + // Bit 1 (8): Greater than (signed) + // Bit 2 (4): Equal + // Bit 3 (2): Less than (unsigned) + // Bit 4 (1): Greater than (unsigned) + + bool first = true; + print("\tif ("); + + if (insn.operands[0] & 16) { + print("{}.s32 < {}", r(insn.operands[1]), int32_t(insn.operands[2])); + first = false; + } + + if (insn.operands[0] & 8) { + if (!first) print(" || "); + print("{}.s32 > {}", r(insn.operands[1]), int32_t(insn.operands[2])); + first = false; + } + + if (insn.operands[0] & 4) { + if (!first) print(" || "); + print("{}.u32 == {}", r(insn.operands[1]), insn.operands[2]); + first = false; + } + + if (insn.operands[0] & 2) { + if (!first) print(" || "); + print("{}.u32 < {}", r(insn.operands[1]), insn.operands[2]); + first = false; + } + + if (insn.operands[0] & 1) { + if (!first) print(" || "); + print("{}.u32 > {}", r(insn.operands[1]), insn.operands[2]); + first = false; + } + + if (first) { + // TO = 0 means never trap + println("false) __builtin_debugtrap();"); + } else { + println(") __builtin_debugtrap();"); + } + } + break; + + case PPC_INST_TWEQ: + println("\tif ({}.u32 == {}.u32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TWEQI: + println("\tif ({}.u32 == {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TWGE: + println("\tif ({}.s32 >= {}.s32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TWGEI: + println("\tif ({}.s32 >= {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1])); + break; + + case PPC_INST_TWGT: + println("\tif ({}.s32 > {}.s32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TWGTI: + println("\tif ({}.s32 > {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1])); + break; + + case PPC_INST_TWLE: + println("\tif ({}.s32 <= {}.s32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TWLEI: + println("\tif ({}.s32 <= {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1])); + break; + + case PPC_INST_TWLGE: + println("\tif ({}.u32 >= {}.u32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); break; case PPC_INST_TWLGEI: - // no op + println("\tif ({}.u32 >= {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TWLGT: + println("\tif ({}.u32 > {}.u32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TWLGTI: + println("\tif ({}.u32 > {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TWLLE: + println("\tif ({}.u32 <= {}.u32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); break; case PPC_INST_TWLLEI: - // no op + println("\tif ({}.u32 <= {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TWLLT: + println("\tif ({}.u32 < {}.u32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TWLLTI: + println("\tif ({}.u32 < {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); + break; + + case PPC_INST_TWLT: + println("\tif ({}.s32 < {}.s32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TWLTI: + println("\tif ({}.s32 < {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1])); + break; + + case PPC_INST_TWNE: + println("\tif ({}.u32 != {}.u32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1])); + break; + + case PPC_INST_TWNEI: + println("\tif ({}.u32 != {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]); break; case PPC_INST_VADDFP: From 9222ef0cd6e76aaebd1a1770b2de9d525c89e7b1 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 18:36:18 -0400 Subject: [PATCH 43/58] Implement vpkshss and vpkswus --- XenonRecomp/recompiler.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 19cb6aa6..89026f73 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2339,11 +2339,31 @@ bool Recompiler::Recompile( } break; + case PPC_INST_VPKSHSS: + case PPC_INST_VPKSHSS128: + println("\t_mm_store_si128((__m128i*){}.s8, _mm_packs_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", + v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); + break; + case PPC_INST_VPKSHUS: case PPC_INST_VPKSHUS128: println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); break; + case PPC_INST_VPKSWUS: + case PPC_INST_VPKSWUS128: + println("\t_mm_store_si128((__m128i*){}.s32, _mm_load_si128((__m128i*){}.s32));", vTemp(), v(insn.operands[2])); + for (int i = 0; i < 4; i++) { + println("\t{}.u16[{}] = {}.s32[{}] < 0 ? 0 : ({}.s32[{}] > 0xFFFF ? 0xFFFF : {}.s32[{}]);", + v(insn.operands[0]), i, vTemp(), i, vTemp(), i, vTemp(), i); + } + println("\t_mm_store_si128((__m128i*){}.s32, _mm_load_si128((__m128i*){}.s32));", vTemp(), v(insn.operands[1])); + for (int i = 0; i < 4; i++) { + println("\t{}.u16[{}] = {}.s32[{}] < 0 ? 0 : ({}.s32[{}] > 0xFFFF ? 0xFFFF : {}.s32[{}]);", + v(insn.operands[0]), i + 4, vTemp(), i, vTemp(), i, vTemp(), i); + } + break; + case PPC_INST_VREFP: case PPC_INST_VREFP128: // TODO: see if we can use rcp safely From 81e8dd677760895eb0be9d7474bdf2ac8c210fb3 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 19:09:25 -0400 Subject: [PATCH 44/58] Implement vpkuwum/vpkuwum128 and vpkuwus/vpkuwum128 --- XenonRecomp/recompiler.cpp | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 89026f73..3b5a5999 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2364,6 +2364,34 @@ bool Recompiler::Recompile( } break; + case PPC_INST_VPKUWUM: + case PPC_INST_VPKUWUM128: + println("\t_mm_store_si128((__m128i*){}.u32, _mm_load_si128((__m128i*){}.u32));", vTemp(), v(insn.operands[2])); + for (int i = 0; i < 4; i++) { + println("\t{}.u16[{}] = {}.u16[{}];", + v(insn.operands[0]), i, vTemp(), i*2); + } + println("\t_mm_store_si128((__m128i*){}.u32, _mm_load_si128((__m128i*){}.u32));", vTemp(), v(insn.operands[1])); + for (int i = 0; i < 4; i++) { + println("\t{}.u16[{}] = {}.u16[{}];", + v(insn.operands[0]), i + 4, vTemp(), i*2); + } + break; + + case PPC_INST_VPKUWUS: + case PPC_INST_VPKUWUS128: + println("\t_mm_store_si128((__m128i*){}.u32, _mm_load_si128((__m128i*){}.u32));", vTemp(), v(insn.operands[2])); + for (int i = 0; i < 4; i++) { + println("\t{}.u16[{}] = {}.u32[{}] > 0xFFFF ? 0xFFFF : {}.u32[{}];", + v(insn.operands[0]), i, vTemp(), i, vTemp(), i); + } + println("\t_mm_store_si128((__m128i*){}.u32, _mm_load_si128((__m128i*){}.u32));", vTemp(), v(insn.operands[1])); + for (int i = 0; i < 4; i++) { + println("\t{}.u16[{}] = {}.u32[{}] > 0xFFFF ? 0xFFFF : {}.u32[{}];", + v(insn.operands[0]), i + 4, vTemp(), i, vTemp(), i); + } + break; + case PPC_INST_VREFP: case PPC_INST_VREFP128: // TODO: see if we can use rcp safely From cce95479c134f26a617d2e7dad70b9b43fea7477 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 19:19:45 -0400 Subject: [PATCH 45/58] Implement vpkswss/vpkswss128 --- XenonRecomp/recompiler.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 3b5a5999..b2729b5e 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2350,6 +2350,12 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); break; + case PPC_INST_VPKSWSS: + case PPC_INST_VPKSWSS128: + println("\t_mm_store_si128((__m128i*){}.s16, _mm_packs_epi32(_mm_load_si128((__m128i*){}.s32), _mm_load_si128((__m128i*){}.s32)));", + v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); + break; + case PPC_INST_VPKSWUS: case PPC_INST_VPKSWUS128: println("\t_mm_store_si128((__m128i*){}.s32, _mm_load_si128((__m128i*){}.s32));", vTemp(), v(insn.operands[2])); From 004a6dabebfbb895a56f5349062d09616be28d15 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 19:23:17 -0400 Subject: [PATCH 46/58] Implement vpkuhum --- XenonRecomp/recompiler.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index b2729b5e..d50a1f28 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2370,6 +2370,14 @@ bool Recompiler::Recompile( } break; + case PPC_INST_VPKUHUM: + // Pack without saturation - use shuffle to select lower bytes + println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(" + "_mm_and_si128(_mm_load_si128((__m128i*){}.u16), _mm_set1_epi16(0xFF)), " + "_mm_and_si128(_mm_load_si128((__m128i*){}.u16), _mm_set1_epi16(0xFF))));", + v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); + break; + case PPC_INST_VPKUWUM: case PPC_INST_VPKUWUM128: println("\t_mm_store_si128((__m128i*){}.u32, _mm_load_si128((__m128i*){}.u32));", vTemp(), v(insn.operands[2])); From 39003f075ed0bfdf1005560d2c015ef8ed98b64c Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 19:26:02 -0400 Subject: [PATCH 47/58] Implement vpkuhus/vpkuhus128 --- XenonRecomp/recompiler.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index d50a1f28..27f1a3ee 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2378,6 +2378,13 @@ bool Recompiler::Recompile( v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); break; + case PPC_INST_VPKUHUS: + case PPC_INST_VPKUHUS128: + // Pack unsigned halfwords to unsigned bytes with saturation + println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", + v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1])); + break; + case PPC_INST_VPKUWUM: case PPC_INST_VPKUWUM128: println("\t_mm_store_si128((__m128i*){}.u32, _mm_load_si128((__m128i*){}.u32));", vTemp(), v(insn.operands[2])); From 6b4c165a9b5c917d1a4f6313e38af9f38c2d8964 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 19:42:17 -0400 Subject: [PATCH 48/58] Implement vsl --- XenonRecomp/recompiler.cpp | 5 +++++ XenonUtils/ppc_context.h | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 27f1a3ee..6268df97 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2457,6 +2457,11 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u8, _mm_or_si128(_mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)), _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8))));", v(insn.operands[0]), v(insn.operands[3]), v(insn.operands[1]), v(insn.operands[3]), v(insn.operands[2])); break; + case PPC_INST_VSL: + println("\t_mm_store_si128((__m128i*){}.u8, _mm_vsl(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", + v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VSLB: // TODO: vectorize for (size_t i = 0; i < 16; i++) diff --git a/XenonUtils/ppc_context.h b/XenonUtils/ppc_context.h index 22257481..7b30689a 100644 --- a/XenonUtils/ppc_context.h +++ b/XenonUtils/ppc_context.h @@ -708,4 +708,20 @@ inline __m128i _mm_vctuxs(__m128 src1) return _mm_blendv_epi8(result, _mm_set1_epi32(-1), _mm_castps_si128(saturate_mask)); } +inline __m128i _mm_vsl(__m128i a, __m128i b) +{ + // Extract shift count from last byte of b (accounting for endianness) + uint32_t shift = _mm_extract_epi8(b, 15) & 0x7; + + if (shift == 0) return a; + + // Shift left by bits + __m128i shifted = _mm_or_si128( + _mm_slli_epi64(a, shift), + _mm_srli_epi64(_mm_slli_si128(a, 8), 64 - shift) + ); + + return shifted; +} + #endif From fa0c77c2acccde9b237edfefef33373ba749dc0c Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 23 May 2025 19:44:09 -0400 Subject: [PATCH 49/58] Implement vslh, vsrah, vsrh, vrlh --- XenonRecomp/recompiler.cpp | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 6268df97..d785f98d 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2468,6 +2468,36 @@ bool Recompiler::Recompile( println("\t{}.u8[{}] = {}.u8[{}] << ({}.u8[{}] & 0x7);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i); break; + case PPC_INST_VSLH: + // Vector shift left halfword + for (size_t i = 0; i < 8; i++) + println("\t{}.u16[{}] = {}.u16[{}] << ({}.u16[{}] & 0xF);", + v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i); + break; + + case PPC_INST_VSRAH: + // Vector shift right algebraic halfword + for (size_t i = 0; i < 8; i++) + println("\t{}.s16[{}] = {}.s16[{}] >> ({}.u16[{}] & 0xF);", + v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i); + break; + + case PPC_INST_VSRH: + // Vector shift right halfword + for (size_t i = 0; i < 8; i++) + println("\t{}.u16[{}] = {}.u16[{}] >> ({}.u16[{}] & 0xF);", + v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i); + break; + + case PPC_INST_VRLH: + // Vector rotate left halfword + for (size_t i = 0; i < 8; i++) + println("\t{}.u16[{}] = ({}.u16[{}] << ({}.u16[{}] & 0xF)) | " + "({}.u16[{}] >> (16 - ({}.u16[{}] & 0xF)));", + v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i, + v(insn.operands[1]), i, v(insn.operands[2]), i); + break; + case PPC_INST_VSLDOI: case PPC_INST_VSLDOI128: println("\t_mm_store_si128((__m128i*){}.u8, _mm_alignr_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8), {}));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), 16 - insn.operands[3]); @@ -2501,6 +2531,11 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.u8, _mm_set1_epi8(char(0x{:X})));", v(insn.operands[0]), insn.operands[1]); break; + case PPC_INST_VSPLTISH: + println("\t_mm_store_si128((__m128i*){}.s16, _mm_set1_epi16(short({})));", + v(insn.operands[0]), int16_t(insn.operands[1])); + break; + case PPC_INST_VSPLTISW: case PPC_INST_VSPLTISW128: println("\t_mm_store_si128((__m128i*){}.u32, _mm_set1_epi32(int(0x{:X})));", v(insn.operands[0]), insn.operands[1]); From 5e945d81e99c319632c815b54837898fdb54dff0 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Mon, 26 May 2025 19:52:11 -0400 Subject: [PATCH 50/58] Add vsel128 to existing vsel --- XenonRecomp/recompiler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index d785f98d..4cb83bba 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2454,6 +2454,7 @@ bool Recompiler::Recompile( break; case PPC_INST_VSEL: + case PPC_INST_VSEL128: println("\t_mm_store_si128((__m128i*){}.u8, _mm_or_si128(_mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)), _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8))));", v(insn.operands[0]), v(insn.operands[3]), v(insn.operands[1]), v(insn.operands[3]), v(insn.operands[2])); break; From 8782d4dbd74dc85e45cd8a32981555e50b44cd2d Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Sat, 31 May 2025 19:08:08 -0400 Subject: [PATCH 51/58] Added ability to alias function names to ease of debugging and reversing. --- XenonRecomp/recompiler.cpp | 22 ++++++++++++++++++++-- XenonRecomp/recompiler_config.cpp | 11 +++++++++++ XenonRecomp/recompiler_config.h | 1 + 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 4cb83bba..e3f91339 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -171,7 +171,19 @@ void Recompiler::Analyse() for (auto& [address, size] : config.functions) { functions.emplace_back(address, size); - image.symbols.emplace(fmt::format("sub_{:X}", address), address, size, Symbol_Function); + // Check if there's a function alias for this address + auto aliasIt = config.functionAliases.find(address); + std::string symbolName; + if (aliasIt != config.functionAliases.end()) + { + symbolName = aliasIt->second; + } + else + { + symbolName = fmt::format("sub_{:X}", address); + } + + image.symbols.emplace(symbolName, address, size, Symbol_Function); } auto& pdata = *image.Find(".pdata"); @@ -2784,7 +2796,13 @@ bool Recompiler::Recompile(const Function& fn) auto symbol = image.symbols.find(fn.base); std::string name; - if (symbol != image.symbols.end()) + auto aliasIt = config.functionAliases.find(fn.base); + + if (aliasIt != config.functionAliases.end()) + { + name = aliasIt->second; + } + else if (symbol != image.symbols.end()) { name = symbol->name; } diff --git a/XenonRecomp/recompiler_config.cpp b/XenonRecomp/recompiler_config.cpp index 81330a47..a145071c 100644 --- a/XenonRecomp/recompiler_config.cpp +++ b/XenonRecomp/recompiler_config.cpp @@ -58,6 +58,17 @@ void RecompilerConfig::Load(const std::string_view& configFilePath) } } + if (auto functionAliasesArray = main["function_aliases"].as_array()) + { + for (auto& alias : *functionAliasesArray) + { + auto& aliasTable = *alias.as_table(); + uint32_t address = *aliasTable["address"].value(); + std::string name = fmt::format("_gfn_{}", *aliasTable["name"].value()); + functionAliases.emplace(address, std::move(name)); + } + } + if (auto invalidArray = main["invalid_instructions"].as_array()) { for (auto& instr : *invalidArray) diff --git a/XenonRecomp/recompiler_config.h b/XenonRecomp/recompiler_config.h index 534e5032..2776a814 100644 --- a/XenonRecomp/recompiler_config.h +++ b/XenonRecomp/recompiler_config.h @@ -52,6 +52,7 @@ struct RecompilerConfig std::unordered_map functions; std::unordered_map invalidInstructions; std::unordered_map midAsmHooks; + std::unordered_map functionAliases; void Load(const std::string_view& configFilePath); }; From 0ddb5e5eb7b4a34d3df5eb4b988e55b2ce58b409 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Tue, 24 Jun 2025 20:20:50 -0400 Subject: [PATCH 52/58] Double look-back to 64 bytes to account for computed switch tables containing a nop before the branch instruction. --- XenonAnalyse/main.cpp | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/XenonAnalyse/main.cpp b/XenonAnalyse/main.cpp index d08371ef..108ba725 100644 --- a/XenonAnalyse/main.cpp +++ b/XenonAnalyse/main.cpp @@ -105,7 +105,7 @@ void ScanTable(const uint32_t* code, size_t base, SwitchTable& table) { ppc_insn insn; uint32_t cr{ (uint32_t)-1 }; - for (int i = 0; i < 32; i++) + for (int i = 0; i < 64; i++) { ppc::Disassemble(&code[-i], base - (4 * i), insn); if (insn.opcode == nullptr) @@ -113,7 +113,11 @@ void ScanTable(const uint32_t* code, size_t base, SwitchTable& table) continue; } - if (cr == -1 && (insn.opcode->id == PPC_INST_BGT || insn.opcode->id == PPC_INST_BGTLR || insn.opcode->id == PPC_INST_BLE || insn.opcode->id == PPC_INST_BLELR)) + // Handle conditional branches + if (cr == -1 && (insn.opcode->id == PPC_INST_BGT || + insn.opcode->id == PPC_INST_BGTLR || + insn.opcode->id == PPC_INST_BLE || + insn.opcode->id == PPC_INST_BLELR)) { cr = insn.operands[0]; if (insn.opcode->operands[1] != 0) @@ -121,16 +125,27 @@ void ScanTable(const uint32_t* code, size_t base, SwitchTable& table) table.defaultLabel = insn.operands[1]; } } - else if (cr != -1) + // Handle CMPLWI even if branch not found yet + else if (insn.opcode->id == PPC_INST_CMPLWI) { - if (insn.opcode->id == PPC_INST_CMPLWI && insn.operands[0] == cr) + // Only process if we haven't found labels yet + if (table.labels.empty()) { table.r = insn.operands[1]; table.labels.resize(insn.operands[2] + 1); table.base = base; - break; } } + // Handle CMPLWI after branch detection + else if (cr != -1 && + insn.opcode->id == PPC_INST_CMPLWI && + insn.operands[0] == cr) + { + table.r = insn.operands[1]; + table.labels.resize(insn.operands[2] + 1); + table.base = base; + break; + } } } From bd25a879294b87d8ff017ab77ca66750d94c5767 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Tue, 24 Jun 2025 20:22:15 -0400 Subject: [PATCH 53/58] Revert "Added ability to alias function names to ease of debugging and reversing." This reverts commit 8782d4dbd74dc85e45cd8a32981555e50b44cd2d. --- XenonRecomp/recompiler.cpp | 22 ++-------------------- XenonRecomp/recompiler_config.cpp | 11 ----------- XenonRecomp/recompiler_config.h | 1 - 3 files changed, 2 insertions(+), 32 deletions(-) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index e3f91339..4cb83bba 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -171,19 +171,7 @@ void Recompiler::Analyse() for (auto& [address, size] : config.functions) { functions.emplace_back(address, size); - // Check if there's a function alias for this address - auto aliasIt = config.functionAliases.find(address); - std::string symbolName; - if (aliasIt != config.functionAliases.end()) - { - symbolName = aliasIt->second; - } - else - { - symbolName = fmt::format("sub_{:X}", address); - } - - image.symbols.emplace(symbolName, address, size, Symbol_Function); + image.symbols.emplace(fmt::format("sub_{:X}", address), address, size, Symbol_Function); } auto& pdata = *image.Find(".pdata"); @@ -2796,13 +2784,7 @@ bool Recompiler::Recompile(const Function& fn) auto symbol = image.symbols.find(fn.base); std::string name; - auto aliasIt = config.functionAliases.find(fn.base); - - if (aliasIt != config.functionAliases.end()) - { - name = aliasIt->second; - } - else if (symbol != image.symbols.end()) + if (symbol != image.symbols.end()) { name = symbol->name; } diff --git a/XenonRecomp/recompiler_config.cpp b/XenonRecomp/recompiler_config.cpp index a145071c..81330a47 100644 --- a/XenonRecomp/recompiler_config.cpp +++ b/XenonRecomp/recompiler_config.cpp @@ -58,17 +58,6 @@ void RecompilerConfig::Load(const std::string_view& configFilePath) } } - if (auto functionAliasesArray = main["function_aliases"].as_array()) - { - for (auto& alias : *functionAliasesArray) - { - auto& aliasTable = *alias.as_table(); - uint32_t address = *aliasTable["address"].value(); - std::string name = fmt::format("_gfn_{}", *aliasTable["name"].value()); - functionAliases.emplace(address, std::move(name)); - } - } - if (auto invalidArray = main["invalid_instructions"].as_array()) { for (auto& instr : *invalidArray) diff --git a/XenonRecomp/recompiler_config.h b/XenonRecomp/recompiler_config.h index 2776a814..534e5032 100644 --- a/XenonRecomp/recompiler_config.h +++ b/XenonRecomp/recompiler_config.h @@ -52,7 +52,6 @@ struct RecompilerConfig std::unordered_map functions; std::unordered_map invalidInstructions; std::unordered_map midAsmHooks; - std::unordered_map functionAliases; void Load(const std::string_view& configFilePath); }; From c02c37b2316dbcf5785bb93a7b813319dfdf1877 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 27 Jun 2025 17:32:15 -0400 Subject: [PATCH 54/58] Implement bcdst - no op, performance related cache hint. --- XenonRecomp/recompiler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 4cb83bba..680bb199 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -825,6 +825,7 @@ bool Recompiler::Recompile( // no op break; + case PPC_INST_DCBST: case PPC_INST_DCBTST: // no op break; From c81d7f82a85d63bb9c6a24e00baa008edc791054 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 27 Jun 2025 17:34:20 -0400 Subject: [PATCH 55/58] Implement vnor/vnor128 --- XenonRecomp/recompiler.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 680bb199..0bc5e52f 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2264,6 +2264,12 @@ bool Recompiler::Recompile( println("\t_mm_store_ps({}.f32, _mm_xor_ps(_mm_sub_ps(_mm_mul_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)), _mm_load_ps({}.f32)), _mm_castsi128_ps(_mm_set1_epi32(int(0x80000000)))));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), v(insn.operands[3])); break; + case PPC_INST_VNOR: + case PPC_INST_VNOR128: + println("\t_mm_store_si128((__m128i*){}.u8, _mm_xor_si128(_mm_or_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)), _mm_set1_epi32(-1)));", + v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); + break; + case PPC_INST_VOR: case PPC_INST_VOR128: print("\t_mm_store_si128((__m128i*){}.u8, ", v(insn.operands[0])); From 0bf4d365321ece6b31d4a28dabab2a548d5fb2b1 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 27 Jun 2025 18:06:22 -0400 Subject: [PATCH 56/58] Implement frsqrte - Needs to be optimized but it should work for now --- XenonRecomp/recompiler.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 0bc5e52f..2902687e 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1020,6 +1020,12 @@ bool Recompiler::Recompile( println("\t{}.f64 = {}.f64 >= 0.0 ? {}.f64 : {}.f64;", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3])); break; + case PPC_INST_FRSQRTE: + // TODO(crack): I sure hope the c++ optimizer can optimize this. Fixme with some simd magic later + printSetFlushMode(false); + println("\t{}.f64 = double(1.0f / sqrtf(float({}.f64)));", f(insn.operands[0]), f(insn.operands[1])); + break; + case PPC_INST_FSQRT: printSetFlushMode(false); println("\t{}.f64 = sqrt({}.f64);", f(insn.operands[0]), f(insn.operands[1])); From 03a04bcf78a46df15b7efdf0bdf46c510161aa51 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 27 Jun 2025 19:47:48 -0400 Subject: [PATCH 57/58] Implement dbnzf --- XenonRecomp/recompiler.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 2902687e..05b768f0 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -685,6 +685,12 @@ bool Recompiler::Recompile( println("\tif ({}.u32 != 0 && !{}.eq) goto loc_{:X};", ctr(), cr(insn.operands[0] / 4), insn.operands[1]); break; + case PPC_INST_BDNZT: + // NOTE(crack): Same note as BDNZF but true instead of false + println("\t--{}.u64;", ctr()); + println("\tif ({}.u32 != 0 && {}.eq) goto loc_{:X};", ctr(), cr(insn.operands[0] / 4), insn.operands[1]); + break; + case PPC_INST_BEQ: printConditionalBranch(false, "eq"); break; From edcb89a7a5e2858f460eecd088913d359bc08d51 Mon Sep 17 00:00:00 2001 From: CRACKbomber <1568512+CRACKbomber@users.noreply.github.com> Date: Fri, 27 Jun 2025 19:48:33 -0400 Subject: [PATCH 58/58] Implement vaddsws --- XenonRecomp/recompiler.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index 05b768f0..bc1923e9 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -2014,6 +2014,16 @@ bool Recompiler::Recompile( println("\t_mm_store_si128((__m128i*){}.s16, _mm_adds_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break; + case PPC_INST_VADDSWS: + // TODO(crack): vectorize - SSE doesn't have _mm_adds_epi32 + for (size_t i = 0; i < 4; i++) + { + println("\t{}.s64 = int64_t({}.s32[{}]) + int64_t({}.s32[{}]);", temp(), v(insn.operands[1]), i, v(insn.operands[2]), i); + println("\t{}.s32[{}] = {}.s64 > INT_MAX ? INT_MAX : {}.s64 < INT_MIN ? INT_MIN : {}.s64;", + v(insn.operands[0]), i, temp(), temp(), temp()); + } + break; + case PPC_INST_VADDUBM: println("\t_mm_store_si128((__m128i*){}.u8, _mm_add_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2])); break;