diff --git a/XenonAnalyse/main.cpp b/XenonAnalyse/main.cpp
index d08371ef..108ba725 100644
--- a/XenonAnalyse/main.cpp
+++ b/XenonAnalyse/main.cpp
@@ -105,7 +105,7 @@ void ScanTable(const uint32_t* code, size_t base, SwitchTable& table)
 {
     ppc_insn insn;
     uint32_t cr{ (uint32_t)-1 };
-    for (int i = 0; i < 32; i++)
+    for (int i = 0; i < 64; i++)
     {
         ppc::Disassemble(&code[-i], base - (4 * i), insn);
         if (insn.opcode == nullptr)
@@ -113,7 +113,11 @@ void ScanTable(const uint32_t* code, size_t base, SwitchTable& table)
             continue;
         }
 
-        if (cr == -1 && (insn.opcode->id == PPC_INST_BGT || insn.opcode->id == PPC_INST_BGTLR || insn.opcode->id == PPC_INST_BLE || insn.opcode->id == PPC_INST_BLELR))
+        // Handle conditional branches
+        if (cr == -1 && (insn.opcode->id == PPC_INST_BGT ||
+            insn.opcode->id == PPC_INST_BGTLR ||
+            insn.opcode->id == PPC_INST_BLE ||
+            insn.opcode->id == PPC_INST_BLELR))
         {
             cr = insn.operands[0];
             if (insn.opcode->operands[1] != 0)
@@ -121,16 +125,27 @@ void ScanTable(const uint32_t* code, size_t base, SwitchTable& table)
                 table.defaultLabel = insn.operands[1];
             }
         }
-        else if (cr != -1)
+        // Handle CMPLWI even if branch not found yet
+        else if (insn.opcode->id == PPC_INST_CMPLWI)
         {
-            if (insn.opcode->id == PPC_INST_CMPLWI && insn.operands[0] == cr)
+            // Only process if we haven't found labels yet
+            if (table.labels.empty())
             {
                 table.r = insn.operands[1];
                 table.labels.resize(insn.operands[2] + 1);
                 table.base = base;
-                break;
             }
         }
+        // Handle CMPLWI after branch detection
+        else if (cr != -1 &&
+            insn.opcode->id == PPC_INST_CMPLWI &&
+            insn.operands[0] == cr)
+        {
+            table.r = insn.operands[1];
+            table.labels.resize(insn.operands[2] + 1);
+            table.base = base;
+            break;
+        }
     }
 }
 
diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp
index f8608179..bc1923e9 100644
--- a/XenonRecomp/recompiler.cpp
+++ b/XenonRecomp/recompiler.cpp
@@ -531,6 +531,13 @@ bool Recompiler::Recompile(
             println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer());
         break;
 
+    case PPC_INST_ADDC:
+        println("\t{}.ca = ({}.u32 + {}.u32 < {}.u32);", xer(), r(insn.operands[1]), r(insn.operands[2]), r(insn.operands[1]));
+        println("\t{}.u64 = {}.u64 + {}.u64;", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]));
+        if (strchr(insn.opcode->name, '.'))
+            println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer());
+        break;
+
     case PPC_INST_ADDE:
         println("\t{}.u8 = ({}.u32 + {}.u32 < {}.u32) | ({}.u32 + {}.u32 + {}.ca < {}.ca);", temp(), r(insn.operands[1]), r(insn.operands[2]), r(insn.operands[1]), r(insn.operands[1]), r(insn.operands[2]), xer(), xer());
         println("\t{}.u64 = {}.u64 + {}.u64 + {}.ca;", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]), xer());
@@ -560,6 +567,16 @@ bool Recompiler::Recompile(
         println("{};", static_cast<int32_t>(insn.operands[2] << 16));
         break;
 
+    case PPC_INST_ADDME:
+        println("\t{}.u64 = {}.u64 + {}.ca - 1;", temp(), r(insn.operands[1]), xer());
+        println("\t{}.ca = ({}.u64 > {}.u64) || ({}.u64 == {}.u64 && {}.ca);", xer(), 
+            r(insn.operands[1]), temp(), r(insn.operands[1]), temp(), xer());
+        println("\t{}.u64 = {}.u64;", r(insn.operands[0]), temp());
+        if (strchr(insn.opcode->name, '.'))
+            println("\t{}.compare<int32_t>({}.s32, 0, {});", 
+                cr(0), r(insn.operands[0]), xer());
+        break;
+
     case PPC_INST_ADDZE:
         println("\t{}.s64 = {}.s64 + {}.ca;", temp(), r(insn.operands[1]), xer());
         println("\t{}.ca = {}.u32 < {}.u32;", xer(), temp(), r(insn.operands[1]));
@@ -668,6 +685,12 @@ bool Recompiler::Recompile(
         println("\tif ({}.u32 != 0 && !{}.eq) goto loc_{:X};", ctr(), cr(insn.operands[0] / 4), insn.operands[1]);
         break;
 
+    case PPC_INST_BDNZT:
+        // NOTE(crack): Same note as BDNZF but true instead of false
+        println("\t--{}.u64;", ctr());
+        println("\tif ({}.u32 != 0 && {}.eq) goto loc_{:X};", ctr(), cr(insn.operands[0] / 4), insn.operands[1]);
+        break;
+
     case PPC_INST_BEQ:
         printConditionalBranch(false, "eq");
         break;
@@ -808,6 +831,7 @@ bool Recompiler::Recompile(
         // no op
         break;
 
+    case PPC_INST_DCBST:
     case PPC_INST_DCBTST:
         // no op
         break;
@@ -852,6 +876,13 @@ bool Recompiler::Recompile(
         // no op
         break;
 
+    case PPC_INST_EQV:
+        // rA = ~(rS XOR rB)
+        println("\t{}.u64 = ~({}.u64 ^ {}.u64);", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]));
+        if (strchr(insn.opcode->name, '.'))
+            println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer());
+        break;
+
     case PPC_INST_EXTSB:
         println("\t{}.s64 = {}.s8;", r(insn.operands[0]), r(insn.operands[1]));
         if (strchr(insn.opcode->name, '.'))
@@ -995,6 +1026,12 @@ bool Recompiler::Recompile(
         println("\t{}.f64 = {}.f64 >= 0.0 ? {}.f64 : {}.f64;", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3]));
         break;
 
+    case PPC_INST_FRSQRTE:
+        // TODO(crack): I sure hope the c++ optimizer can optimize this. Fixme with some simd magic later
+        printSetFlushMode(false);
+        println("\t{}.f64 = double(1.0f / sqrtf(float({}.f64)));", f(insn.operands[0]), f(insn.operands[1]));
+        break;
+
     case PPC_INST_FSQRT:
         printSetFlushMode(false);
         println("\t{}.f64 = sqrt({}.f64);", f(insn.operands[0]), f(insn.operands[1]));
@@ -1133,6 +1170,8 @@ bool Recompiler::Recompile(
         println("\t{}.s64 = {};", r(insn.operands[0]), int32_t(insn.operands[1] << 16));
         break;
 
+    case PPC_INST_LVEBX:
+    case PPC_INST_LVEHX:
     case PPC_INST_LVEWX:
     case PPC_INST_LVEWX128:
     case PPC_INST_LVX:
@@ -1325,6 +1364,22 @@ bool Recompiler::Recompile(
             println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer());
         break;
 
+    case PPC_INST_MULHD:
+        println("\t{}.s64 = __mulh({}.s64, {}.s64);",
+            r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]));
+        if (strchr(insn.opcode->name, '.'))
+            println("\t{}.compare<int32_t>({}.s32, 0, {});", 
+                cr(0), r(insn.operands[0]), xer());
+        break;
+    
+    case PPC_INST_MULHDU:
+        println("\t{}.u64 = __mulhu({}.u64, {}.u64);", 
+            r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]));
+        if (strchr(insn.opcode->name, '.'))
+            println("\t{}.compare<int32_t>({}.s32, 0, {});", 
+                cr(0), r(insn.operands[0]), xer());
+        break;
+
     case PPC_INST_NAND:
         println("\t{}.u64 = ~({}.u64 & {}.u64);", r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[2]));
         break;
@@ -1395,6 +1450,14 @@ bool Recompiler::Recompile(
             println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer());
         break;
 
+    case PPC_INST_RLWNM:
+        println("\t{}.u64 = __builtin_rotateleft64({}.u32 | ({}.u64 << 32), {}.u8 & 0x1F) & 0x{:X};", 
+            r(insn.operands[0]), r(insn.operands[1]), r(insn.operands[1]), 
+            r(insn.operands[2]), ComputeMask(insn.operands[3] + 32, insn.operands[4] + 32));
+        if (strchr(insn.opcode->name, '.'))
+            println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer());
+        break;
+
     case PPC_INST_ROTLDI:
         println("\t{}.u64 = __builtin_rotateleft64({}.u64, {});", r(insn.operands[0]), r(insn.operands[1]), insn.operands[2]);
         break;
@@ -1710,28 +1773,235 @@ bool Recompiler::Recompile(
         println("\t{}.s64 = {} - {}.s64;", r(insn.operands[0]), int32_t(insn.operands[2]), r(insn.operands[1]));
         break;
 
+    case PPC_INST_SUBFME:
+        println("\t{}.u64 = ~{}.u64 + {}.ca - 1;", temp(), r(insn.operands[1]), xer());
+        println("\t{}.ca = ({}.u64 < ~{}.u64) || ({}.u64 == ~{}.u64 && {}.ca);", xer(), 
+            temp(), r(insn.operands[1]), temp(), r(insn.operands[1]), xer());
+        println("\t{}.u64 = {}.u64;", r(insn.operands[0]), temp());
+        if (strchr(insn.opcode->name, '.'))
+            println("\t{}.compare<int32_t>({}.s32, 0, {});", 
+                cr(0), r(insn.operands[0]), xer());
+        break;
+
+    case PPC_INST_SUBFZE:
+        println("\t{}.u64 = ~{}.u64 + {}.ca;", temp(), r(insn.operands[1]), xer());
+        println("\t{}.ca = {}.u64 < {}.ca;", xer(), temp(), xer());
+        println("\t{}.u64 = {}.u64;", r(insn.operands[0]), temp());
+        if (strchr(insn.opcode->name, '.'))
+            println("\t{}.compare<int32_t>({}.s32, 0, {});", cr(0), r(insn.operands[0]), xer());
+        break;
+
     case PPC_INST_SYNC:
         // no op
         break;
 
+    case PPC_INST_TDEQ:
+        println("\tif ({}.u64 == {}.u64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
+        break;
+
+    case PPC_INST_TDEQI:
+        println("\tif ({}.u64 == {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]);
+        break;
+
+    case PPC_INST_TDGE:
+        println("\tif ({}.s64 >= {}.s64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
+        break;
+
+    case PPC_INST_TDGEI:
+        println("\tif ({}.s64 >= {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1]));
+        break;
+
+    case PPC_INST_TDGT:
+        println("\tif ({}.s64 > {}.s64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
+        break;
+
+    case PPC_INST_TDGTI:
+        println("\tif ({}.s64 > {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1]));
+        break;
+
+    case PPC_INST_TDLE:
+        println("\tif ({}.s64 <= {}.s64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
+        break;
+
+    case PPC_INST_TDLEI:
+        println("\tif ({}.s64 <= {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1]));
+        break;
+
+    case PPC_INST_TDLGE:
+        println("\tif ({}.u64 >= {}.u64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
+        break;
+
     case PPC_INST_TDLGEI:
-        // no op
+        println("\tif ({}.u64 >= {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]);
+        break;
+
+    case PPC_INST_TDLGT:
+        println("\tif ({}.u64 > {}.u64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
+        break;
+
+    case PPC_INST_TDLGTI:
+        println("\tif ({}.u64 > {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]);
+        break;
+
+    case PPC_INST_TDLLE:
+        println("\tif ({}.u64 <= {}.u64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
         break;
 
     case PPC_INST_TDLLEI:
-        // no op
+        println("\tif ({}.u64 <= {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]);
+        break;
+
+    case PPC_INST_TDLLT:
+        println("\tif ({}.u64 < {}.u64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
+        break;
+
+    case PPC_INST_TDLLTI:
+        println("\tif ({}.u64 < {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]);
+        break;
+
+    case PPC_INST_TDLT:
+        println("\tif ({}.s64 < {}.s64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
+        break;
+
+    case PPC_INST_TDLTI:
+        println("\tif ({}.s64 < {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1]));
+        break;
+
+    case PPC_INST_TDNE:
+        println("\tif ({}.u64 != {}.u64) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
         break;
 
+    case PPC_INST_TDNEI:
+        println("\tif ({}.u64 != {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]);
+        break;
     case PPC_INST_TWI:
-        // no op
+    {
+        // TO field specifies trap conditions:
+        // Bit 0 (16): Less than (signed)
+        // Bit 1 (8): Greater than (signed)  
+        // Bit 2 (4): Equal
+        // Bit 3 (2): Less than (unsigned)
+        // Bit 4 (1): Greater than (unsigned)
+        
+        bool first = true;
+        print("\tif (");
+        
+        if (insn.operands[0] & 16) {
+            print("{}.s32 < {}", r(insn.operands[1]), int32_t(insn.operands[2]));
+            first = false;
+        }
+        
+        if (insn.operands[0] & 8) {
+            if (!first) print(" || ");
+            print("{}.s32 > {}", r(insn.operands[1]), int32_t(insn.operands[2]));
+            first = false;
+        }
+        
+        if (insn.operands[0] & 4) {
+            if (!first) print(" || ");
+            print("{}.u32 == {}", r(insn.operands[1]), insn.operands[2]);
+            first = false;
+        }
+        
+        if (insn.operands[0] & 2) {
+            if (!first) print(" || ");
+            print("{}.u32 < {}", r(insn.operands[1]), insn.operands[2]);
+            first = false;
+        }
+        
+        if (insn.operands[0] & 1) {
+            if (!first) print(" || ");
+            print("{}.u32 > {}", r(insn.operands[1]), insn.operands[2]);
+            first = false;
+        }
+        
+        if (first) {
+            // TO = 0 means never trap
+            println("false) __builtin_debugtrap();");
+        } else {
+            println(") __builtin_debugtrap();");
+        }
+    }
+    break;
+
+    case PPC_INST_TWEQ:
+        println("\tif ({}.u32 == {}.u32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
+        break;
+
+    case PPC_INST_TWEQI:
+        println("\tif ({}.u32 == {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]);
+        break;
+
+    case PPC_INST_TWGE:
+        println("\tif ({}.s32 >= {}.s32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
+        break;
+
+    case PPC_INST_TWGEI:
+        println("\tif ({}.s32 >= {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1]));
+        break;
+
+    case PPC_INST_TWGT:
+        println("\tif ({}.s32 > {}.s32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
+        break;
+
+    case PPC_INST_TWGTI:
+        println("\tif ({}.s32 > {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1]));
+        break;
+
+    case PPC_INST_TWLE:
+        println("\tif ({}.s32 <= {}.s32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
+        break;
+
+    case PPC_INST_TWLEI:
+        println("\tif ({}.s32 <= {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1]));
+        break;
+
+    case PPC_INST_TWLGE:
+        println("\tif ({}.u32 >= {}.u32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
         break;
 
     case PPC_INST_TWLGEI:
-        // no op
+        println("\tif ({}.u32 >= {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]);
+        break;
+
+    case PPC_INST_TWLGT:
+        println("\tif ({}.u32 > {}.u32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
+        break;
+
+    case PPC_INST_TWLGTI:
+        println("\tif ({}.u32 > {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]);
+        break;
+
+    case PPC_INST_TWLLE:
+        println("\tif ({}.u32 <= {}.u32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
         break;
 
     case PPC_INST_TWLLEI:
-        // no op
+        println("\tif ({}.u32 <= {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]);
+        break;
+
+    case PPC_INST_TWLLT:
+        println("\tif ({}.u32 < {}.u32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
+        break;
+
+    case PPC_INST_TWLLTI:
+        println("\tif ({}.u32 < {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]);
+        break;
+
+    case PPC_INST_TWLT:
+        println("\tif ({}.s32 < {}.s32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
+        break;
+
+    case PPC_INST_TWLTI:
+        println("\tif ({}.s32 < {}) __builtin_debugtrap();", r(insn.operands[0]), int32_t(insn.operands[1]));
+        break;
+
+    case PPC_INST_TWNE:
+        println("\tif ({}.u32 != {}.u32) __builtin_debugtrap();", r(insn.operands[0]), r(insn.operands[1]));
+        break;
+
+    case PPC_INST_TWNEI:
+        println("\tif ({}.u32 != {}) __builtin_debugtrap();", r(insn.operands[0]), insn.operands[1]);
         break;
 
     case PPC_INST_VADDFP:
@@ -1744,6 +2014,16 @@ bool Recompiler::Recompile(
         println("\t_mm_store_si128((__m128i*){}.s16, _mm_adds_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
         break;
 
+    case PPC_INST_VADDSWS:
+        // TODO(crack): vectorize - SSE doesn't have _mm_adds_epi32
+        for (size_t i = 0; i < 4; i++)
+        {
+            println("\t{}.s64 = int64_t({}.s32[{}]) + int64_t({}.s32[{}]);", temp(), v(insn.operands[1]), i, v(insn.operands[2]), i);
+            println("\t{}.s32[{}] = {}.s64 > INT_MAX ? INT_MAX : {}.s64 < INT_MIN ? INT_MIN : {}.s64;",
+                v(insn.operands[0]), i, temp(), temp(), temp());
+        }
+        break;
+
     case PPC_INST_VADDUBM:
         println("\t_mm_store_si128((__m128i*){}.u8, _mm_add_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
         break;
@@ -1769,6 +2049,7 @@ bool Recompiler::Recompile(
         println("\t_mm_store_si128((__m128i*){}.u8, _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
         break;
 
+    case PPC_INST_VANDC:
     case PPC_INST_VANDC128:
         println("\t_mm_store_si128((__m128i*){}.u8, _mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
         break;
@@ -1785,6 +2066,11 @@ bool Recompiler::Recompile(
         println("\t_mm_store_si128((__m128i*){}.u8, _mm_avg_epu8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
         break;
 
+    case PPC_INST_VAVGUH:
+        println("\t_mm_store_si128((__m128i*){}.u16, _mm_avg_epu16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", 
+            v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
+        break;
+
     case PPC_INST_VCTSXS:
     case PPC_INST_VCFPSXWS128:
         printSetFlushMode(true);
@@ -1795,6 +2081,16 @@ bool Recompiler::Recompile(
             println("_mm_load_ps({}.f32)));", v(insn.operands[1]));
         break;
 
+    case PPC_INST_VCTUXS:
+    case PPC_INST_VCFPUXWS128:
+        printSetFlushMode(true);
+        print("\t_mm_store_si128((__m128i*){}.u32, _mm_vctuxs(", v(insn.operands[0]));
+        if (insn.operands[2] != 0)
+            println("_mm_mul_ps(_mm_load_ps({}.f32), _mm_set1_ps({}))));", v(insn.operands[1]), 1u << insn.operands[2]);
+        else
+            println("_mm_load_ps({}.f32)));", v(insn.operands[1]));
+        break;
+
     case PPC_INST_VCFSX:
     case PPC_INST_VCSXWFP128:
     {
@@ -1831,7 +2127,11 @@ bool Recompiler::Recompile(
 
     case PPC_INST_VCMPBFP:
     case PPC_INST_VCMPBFP128:
-        println("\t__builtin_debugtrap();");
+        printSetFlushMode(true);
+        println("\t_mm_store_ps({}.f32, _mm_vcmpbfp(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", 
+            v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
+        if (strchr(insn.opcode->name, '.'))
+            println("\t{}.setFromMask(_mm_load_ps({}.f32), 0xF);", cr(6), v(insn.operands[0]));
         break;
 
     case PPC_INST_VCMPEQFP:
@@ -1908,16 +2208,36 @@ bool Recompiler::Recompile(
         println("\t_mm_store_ps({}.f32, _mm_max_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
         break;
 
+    case PPC_INST_VMAXSH:
+        println("\t_mm_store_si128((__m128i*){}.s16, _mm_max_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", 
+            v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
+        break;
+
     case PPC_INST_VMAXSW:
         println("\t_mm_store_si128((__m128i*){}.u32, _mm_max_epi32(_mm_load_si128((__m128i*){}.u32), _mm_load_si128((__m128i*){}.u32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
         break;
 
+    case PPC_INST_VMAXUH:
+        println("\t_mm_store_si128((__m128i*){}.u16, _mm_max_epu16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", 
+            v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
+        break;
+
     case PPC_INST_VMINFP:
     case PPC_INST_VMINFP128:
         printSetFlushMode(true);
         println("\t_mm_store_ps({}.f32, _mm_min_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
         break;
 
+    case PPC_INST_VMINSH:
+        println("\t_mm_store_si128((__m128i*){}.s16, _mm_min_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", 
+            v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
+        break;
+
+    case PPC_INST_VMINUH:
+        println("\t_mm_store_si128((__m128i*){}.u16, _mm_min_epu16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", 
+            v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
+        break;
+
     case PPC_INST_VMRGHB:
         println("\t_mm_store_si128((__m128i*){}.u8, _mm_unpackhi_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
         break;
@@ -1966,6 +2286,12 @@ bool Recompiler::Recompile(
         println("\t_mm_store_ps({}.f32, _mm_xor_ps(_mm_sub_ps(_mm_mul_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)), _mm_load_ps({}.f32)), _mm_castsi128_ps(_mm_set1_epi32(int(0x80000000)))));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), v(insn.operands[3]));
         break;
 
+    case PPC_INST_VNOR:
+    case PPC_INST_VNOR128:
+        println("\t_mm_store_si128((__m128i*){}.u8, _mm_xor_si128(_mm_or_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)), _mm_set1_epi32(-1)));",
+            v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
+        break;
+
     case PPC_INST_VOR:
     case PPC_INST_VOR128:
         print("\t_mm_store_si128((__m128i*){}.u8, ", v(insn.operands[0]));
@@ -2042,11 +2368,80 @@ bool Recompiler::Recompile(
         }
         break;
 
+    case PPC_INST_VPKSHSS:
+    case PPC_INST_VPKSHSS128:
+        println("\t_mm_store_si128((__m128i*){}.s8, _mm_packs_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", 
+            v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
+        break;
+
     case PPC_INST_VPKSHUS:
     case PPC_INST_VPKSHUS128:
         println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
         break;
 
+    case PPC_INST_VPKSWSS:
+    case PPC_INST_VPKSWSS128:
+        println("\t_mm_store_si128((__m128i*){}.s16, _mm_packs_epi32(_mm_load_si128((__m128i*){}.s32), _mm_load_si128((__m128i*){}.s32)));", 
+            v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
+        break;
+
+    case PPC_INST_VPKSWUS:
+    case PPC_INST_VPKSWUS128:
+        println("\t_mm_store_si128((__m128i*){}.s32, _mm_load_si128((__m128i*){}.s32));", vTemp(), v(insn.operands[2]));
+        for (int i = 0; i < 4; i++) {
+            println("\t{}.u16[{}] = {}.s32[{}] < 0 ? 0 : ({}.s32[{}] > 0xFFFF ? 0xFFFF : {}.s32[{}]);",
+                v(insn.operands[0]), i, vTemp(), i, vTemp(), i, vTemp(), i);
+        }
+        println("\t_mm_store_si128((__m128i*){}.s32, _mm_load_si128((__m128i*){}.s32));", vTemp(), v(insn.operands[1]));
+        for (int i = 0; i < 4; i++) {
+            println("\t{}.u16[{}] = {}.s32[{}] < 0 ? 0 : ({}.s32[{}] > 0xFFFF ? 0xFFFF : {}.s32[{}]);",
+                v(insn.operands[0]), i + 4, vTemp(), i, vTemp(), i, vTemp(), i);
+        }
+        break;
+
+    case PPC_INST_VPKUHUM:
+        // Pack without saturation - use shuffle to select lower bytes
+        println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16("
+            "_mm_and_si128(_mm_load_si128((__m128i*){}.u16), _mm_set1_epi16(0xFF)), "
+            "_mm_and_si128(_mm_load_si128((__m128i*){}.u16), _mm_set1_epi16(0xFF))));",
+            v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
+        break;
+
+    case PPC_INST_VPKUHUS:
+    case PPC_INST_VPKUHUS128:
+        // Pack unsigned halfwords to unsigned bytes with saturation
+        println("\t_mm_store_si128((__m128i*){}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*){}.u16), _mm_load_si128((__m128i*){}.u16)));", 
+            v(insn.operands[0]), v(insn.operands[2]), v(insn.operands[1]));
+        break;
+
+    case PPC_INST_VPKUWUM:
+    case PPC_INST_VPKUWUM128:
+        println("\t_mm_store_si128((__m128i*){}.u32, _mm_load_si128((__m128i*){}.u32));", vTemp(), v(insn.operands[2]));
+        for (int i = 0; i < 4; i++) {
+            println("\t{}.u16[{}] = {}.u16[{}];", 
+                v(insn.operands[0]), i, vTemp(), i*2);
+        }
+        println("\t_mm_store_si128((__m128i*){}.u32, _mm_load_si128((__m128i*){}.u32));", vTemp(), v(insn.operands[1]));
+        for (int i = 0; i < 4; i++) {
+            println("\t{}.u16[{}] = {}.u16[{}];", 
+                v(insn.operands[0]), i + 4, vTemp(), i*2);
+        }
+        break;
+
+    case PPC_INST_VPKUWUS:
+    case PPC_INST_VPKUWUS128:
+        println("\t_mm_store_si128((__m128i*){}.u32, _mm_load_si128((__m128i*){}.u32));", vTemp(), v(insn.operands[2]));
+        for (int i = 0; i < 4; i++) {
+            println("\t{}.u16[{}] = {}.u32[{}] > 0xFFFF ? 0xFFFF : {}.u32[{}];",
+                v(insn.operands[0]), i, vTemp(), i, vTemp(), i);
+        }
+        println("\t_mm_store_si128((__m128i*){}.u32, _mm_load_si128((__m128i*){}.u32));", vTemp(), v(insn.operands[1]));
+        for (int i = 0; i < 4; i++) {
+            println("\t{}.u16[{}] = {}.u32[{}] > 0xFFFF ? 0xFFFF : {}.u32[{}];",
+                v(insn.operands[0]), i + 4, vTemp(), i, vTemp(), i);
+        }
+        break;
+
     case PPC_INST_VREFP:
     case PPC_INST_VREFP128:
         // TODO: see if we can use rcp safely
@@ -2088,15 +2483,51 @@ bool Recompiler::Recompile(
         break;
 
     case PPC_INST_VSEL:
+    case PPC_INST_VSEL128:
         println("\t_mm_store_si128((__m128i*){}.u8, _mm_or_si128(_mm_andnot_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)), _mm_and_si128(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8))));", v(insn.operands[0]), v(insn.operands[3]), v(insn.operands[1]), v(insn.operands[3]), v(insn.operands[2]));
         break;
 
+    case PPC_INST_VSL:
+        println("\t_mm_store_si128((__m128i*){}.u8, _mm_vsl(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8)));", 
+            v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
+        break;
+
     case PPC_INST_VSLB:
         // TODO: vectorize
         for (size_t i = 0; i < 16; i++)
             println("\t{}.u8[{}] = {}.u8[{}] << ({}.u8[{}] & 0x7);", v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i);
         break;
 
+    case PPC_INST_VSLH:
+        // Vector shift left halfword
+        for (size_t i = 0; i < 8; i++)
+            println("\t{}.u16[{}] = {}.u16[{}] << ({}.u16[{}] & 0xF);", 
+                v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i);
+        break;
+
+    case PPC_INST_VSRAH:
+        // Vector shift right algebraic halfword
+        for (size_t i = 0; i < 8; i++)
+            println("\t{}.s16[{}] = {}.s16[{}] >> ({}.u16[{}] & 0xF);", 
+                v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i);
+        break;
+
+    case PPC_INST_VSRH:
+        // Vector shift right halfword
+        for (size_t i = 0; i < 8; i++)
+            println("\t{}.u16[{}] = {}.u16[{}] >> ({}.u16[{}] & 0xF);", 
+                v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i);
+        break;
+
+    case PPC_INST_VRLH:
+        // Vector rotate left halfword
+        for (size_t i = 0; i < 8; i++)
+            println("\t{}.u16[{}] = ({}.u16[{}] << ({}.u16[{}] & 0xF)) | "
+                "({}.u16[{}] >> (16 - ({}.u16[{}] & 0xF)));", 
+                v(insn.operands[0]), i, v(insn.operands[1]), i, v(insn.operands[2]), i,
+                v(insn.operands[1]), i, v(insn.operands[2]), i);
+        break;
+
     case PPC_INST_VSLDOI:
     case PPC_INST_VSLDOI128:
         println("\t_mm_store_si128((__m128i*){}.u8, _mm_alignr_epi8(_mm_load_si128((__m128i*){}.u8), _mm_load_si128((__m128i*){}.u8), {}));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]), 16 - insn.operands[3]);
@@ -2130,6 +2561,11 @@ bool Recompiler::Recompile(
         println("\t_mm_store_si128((__m128i*){}.u8, _mm_set1_epi8(char(0x{:X})));", v(insn.operands[0]), insn.operands[1]);
         break;
 
+    case PPC_INST_VSPLTISH:
+        println("\t_mm_store_si128((__m128i*){}.s16, _mm_set1_epi16(short({})));", 
+            v(insn.operands[0]), int16_t(insn.operands[1]));
+        break;
+
     case PPC_INST_VSPLTISW:
     case PPC_INST_VSPLTISW128:
         println("\t_mm_store_si128((__m128i*){}.u32, _mm_set1_epi32(int(0x{:X})));", v(insn.operands[0]), insn.operands[1]);
@@ -2169,6 +2605,11 @@ bool Recompiler::Recompile(
         println("\t_mm_store_ps({}.f32, _mm_sub_ps(_mm_load_ps({}.f32), _mm_load_ps({}.f32)));", v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
         break;
 
+    case PPC_INST_VSUBSHS:
+        println("\t_mm_store_si128((__m128i*){}.s16, _mm_subs_epi16(_mm_load_si128((__m128i*){}.s16), _mm_load_si128((__m128i*){}.s16)));", 
+            v(insn.operands[0]), v(insn.operands[1]), v(insn.operands[2]));
+        break;
+
     case PPC_INST_VSUBSWS:
         // TODO: vectorize
         for (size_t i = 0; i < 4; i++)
diff --git a/XenonUtils/ppc_context.h b/XenonUtils/ppc_context.h
index c1091d17..7b30689a 100644
--- a/XenonUtils/ppc_context.h
+++ b/XenonUtils/ppc_context.h
@@ -651,4 +651,77 @@ inline __m128i _mm_vsr(__m128i a, __m128i b)
     return _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(_mm_srl_epi64(a, b)), _mm_castsi128_ps(_mm_srl_epi64(_mm_srli_si128(a, 4), b)), 0x10));
 }
 
+inline __m128 _mm_vcmpbfp(__m128 a, __m128 b) 
+{
+    __m128 xmm0 = _mm_and_ps(_mm_cmpgt_ps(a, b), _mm_castsi128_ps(_mm_set1_epi32(0x80000000)));
+    __m128 xmm1 = _mm_and_ps(_mm_cmplt_ps(a, _mm_sub_ps(_mm_setzero_ps(), b)), _mm_castsi128_ps(_mm_set1_epi32(0x40000000)));
+    return _mm_or_ps(xmm0, xmm1);
+}
+
+inline uint64_t __mulhu(uint64_t a, uint64_t b) {
+    // Get high/low 32-bit parts 
+    uint32_t a_lo = (uint32_t)a;
+    uint32_t a_hi = (uint32_t)(a >> 32);
+    uint32_t b_lo = (uint32_t)b;
+    uint32_t b_hi = (uint32_t)(b >> 32);
+
+    // Compute partial products
+    uint64_t lo_lo = (uint64_t)a_lo * b_lo;
+    uint64_t hi_lo = (uint64_t)a_hi * b_lo;
+    uint64_t lo_hi = (uint64_t)a_lo * b_hi;
+    uint64_t hi_hi = (uint64_t)a_hi * b_hi;
+
+    // Compute high 64 bits of result
+    uint64_t cross = (lo_lo >> 32) + (uint32_t)hi_lo + (uint32_t)lo_hi;
+    return hi_hi + (hi_lo >> 32) + (lo_hi >> 32) + (cross >> 32);
+}
+
+inline __m128i _mm_vctuxs(__m128 src1)
+{
+    // Clamp negative to 0
+    __m128 clamped = _mm_max_ps(src1, _mm_setzero_ps());
+
+    // For values in [2^31, 2^32), subtract 2^31, convert, add 2^31 back
+    __m128i big_result = _mm_add_epi32(
+        _mm_cvttps_epi32(
+            _mm_sub_ps(clamped, _mm_castsi128_ps(_mm_set1_epi32(0x4F000000)))
+        ), 
+        _mm_set1_epi32(0x80000000)
+    ); 
+    
+    // Select based on range
+    __m128i result = _mm_blendv_epi8(
+        _mm_cvttps_epi32(clamped), 
+        big_result, 
+        _mm_castps_si128(
+            _mm_cmpge_ps(clamped, _mm_castsi128_ps(_mm_set1_epi32(0x4F000000)))
+        )
+    );
+    
+    // Saturate overflow and NaN to UINT_MAX
+    __m128 saturate_mask = _mm_or_ps(
+        _mm_cmpge_ps(
+            clamped, _mm_castsi128_ps(_mm_set1_epi32(0x4F800000))
+        ),
+        _mm_cmpunord_ps(src1, src1)
+    );
+    return _mm_blendv_epi8(result, _mm_set1_epi32(-1), _mm_castps_si128(saturate_mask));
+}
+
+inline __m128i _mm_vsl(__m128i a, __m128i b)
+{
+    // Extract shift count from last byte of b (accounting for endianness)
+    uint32_t shift = _mm_extract_epi8(b, 15) & 0x7;
+    
+    if (shift == 0) return a;
+    
+    // Shift left by bits
+    __m128i shifted = _mm_or_si128(
+        _mm_slli_epi64(a, shift),
+        _mm_srli_epi64(_mm_slli_si128(a, 8), 64 - shift)
+    );
+    
+    return shifted;
+}
+
 #endif