From f33f21d609a55fe9c5ef4b245eb48a142acf7619 Mon Sep 17 00:00:00 2001 From: blogdron Date: Tue, 26 Nov 2024 15:10:15 +0300 Subject: [PATCH] fix build on gcc-14, add pmaxud, add pminud --- sse3.c | 58 +++++++++--------- sse41.c | 164 +++++++++++++++++++++++++++++---------------------- sse42.c | 158 ++++++++++++++++++++++++------------------------- ssse3.c | 84 +++++++++++++------------- ssse3_priv.h | 5 +- 5 files changed, 247 insertions(+), 222 deletions(-) diff --git a/sse3.c b/sse3.c index add07fb..e11e0ef 100644 --- a/sse3.c +++ b/sse3.c @@ -28,7 +28,7 @@ int sse3_grab_operands(sse3_t *sse3_obj) int64_t disp = 0; uint8_t disp_size = sse3_obj->udo_src->offset; uint64_t address; - + if (sse3_obj->udo_src->scale) goto bad; // TODO if (retrieve_reg (sse3_obj->op_obj->state, @@ -45,7 +45,7 @@ int sse3_grab_operands(sse3_t *sse3_obj) if (sse3_obj->op_obj->ring0) sse3_obj->src.uint64[0] = * ((uint64_t*) (address)); - else copy_from_user((char*) &sse3_obj->src.uint64[0], address, 8); + else copy_from_user((char*) &sse3_obj->src.uint64[0], ( const void __user *)address, 8); } } else { _store_xmm (sse3_obj->udo_dst->base - UD_R_XMM0, &sse3_obj->dst.uint128); @@ -56,7 +56,7 @@ int sse3_grab_operands(sse3_t *sse3_obj) int64_t disp = 0; uint8_t disp_size = sse3_obj->udo_src->offset; uint64_t address; - + if (sse3_obj->udo_src->scale) goto bad; // TODO if (retrieve_reg (sse3_obj->op_obj->state, @@ -73,7 +73,7 @@ int sse3_grab_operands(sse3_t *sse3_obj) if (sse3_obj->op_obj->ring0) sse3_obj->src.uint128 = * ((__uint128_t*) (address)); - else copy_from_user((char*) &sse3_obj->src.uint128, address, 16); + else copy_from_user((char*) &sse3_obj->src.uint128,(const void __user *)address, 16); } } @@ -131,7 +131,7 @@ int op_sse3_run(const op_t *op_obj) case UD_Imwait: goto good; case UD_Imonitor: goto good; sse3_common: - + sse3_obj.udo_src = ud_insn_opr (op_obj->ud_obj, 1); sse3_obj.udo_dst = ud_insn_opr (op_obj->ud_obj, 0); sse3_obj.udo_imm = ud_insn_opr (op_obj->ud_obj, 2); @@ -147,7 +147,7 @@ int op_sse3_run(const op_t *op_obj) && (sse3_obj.udo_dst->base <= UD_R_MM7)) { sse3_obj.ismmx = 1; } else sse3_obj.ismmx = 0; - + if (sse3_grab_operands(&sse3_obj) != 0) goto bad; opf(&sse3_obj); @@ -178,7 +178,7 @@ void fisttp(sse3_t *this) uint8_t modrm = 0; uint64_t address = 0; uint64_t reg_sel[8]; - + if (islongmode) { reg_sel[0] = this->op_obj->state64->ax; @@ -199,96 +199,96 @@ void fisttp(sse3_t *this) reg_sel[6] = this->op_obj->state32->si; reg_sel[7] = this->op_obj->state32->di; } - + if (*bytep == 0x66) { bytep++; ins_size++; } - + switch (*bytep) { case 0xDB: bytep++; ins_size++; - + modrm = *bytep; base = modrm & 0x7; mod = (modrm & 0xC0) >> 6; - + if (mod == 0) { address = reg_sel[base]; } else if (mod == 1) { bytep++; ins_size++; - + add = *bytep; address = reg_sel[base] + add; } else { return; } - + fisttpl((double *)address); - + ins_size++; - + return; break; - + case 0xDD: bytep++; ins_size++; - + modrm = *bytep; base = modrm & 0x7; mod = (modrm & 0xC0) >> 6; - + if (mod == 0) { address = reg_sel[base]; } else if (mod == 1) { bytep++; ins_size++; - + add = *bytep; address = reg_sel[base] + add; } else { return; } - + fisttpq((long double *)address); - + ins_size++; - + return; break; - + case 0xDF: bytep++; ins_size++; - + modrm = *bytep; base = modrm & 0x7; mod = (modrm & 0xC0) >> 6; - + if (mod == 0) { address = reg_sel[base]; } else if (mod == 1) { bytep++; ins_size++; - + add = *bytep; address = reg_sel[base] + add; } else { return; } - + fisttps((float *)address); - + ins_size++; - + return; break; } diff --git a/sse41.c b/sse41.c index fa95bca..d398433 100644 --- a/sse41.c +++ b/sse41.c @@ -1,30 +1,52 @@ #include "ssse3_priv.h" #include +void pmaxud(ssse3_t *this) +{ + uint32_t * temp1 = this->src.uint32; + uint32_t * temp2 = this->dst.uint32; + + this->res.int32[0] = (temp1[0] > temp2[0]) ? temp1[0] : temp2[0]; + this->res.int32[1] = (temp1[1] > temp2[1]) ? temp1[1] : temp2[1]; + this->res.int32[2] = (temp1[2] > temp2[2]) ? temp1[2] : temp2[2]; + this->res.int32[3] = (temp1[3] > temp2[3]) ? temp1[3] : temp2[3]; +} + +void pminud(ssse3_t *this) +{ + uint32_t * temp1 = this->src.uint32; + uint32_t * temp2 = this->dst.uint32; + + this->res.int32[0] = (temp1[0] < temp2[0]) ? temp1[0] : temp2[0]; + this->res.int32[1] = (temp1[1] < temp2[1]) ? temp1[1] : temp2[1]; + this->res.int32[2] = (temp1[2] < temp2[2]) ? temp1[2] : temp2[2]; + this->res.int32[3] = (temp1[3] < temp2[3]) ? temp1[3] : temp2[3]; +} + void blendpd(ssse3_t *this) { uint8_t imm = this->udo_imm->lval.ubyte; - - uint64_t* temp1 = this->src.uint128; - uint64_t* temp2 = this->dst.uint128; - + + uint64_t* temp1 = (uint64_t*)&this->src.uint128; + uint64_t* temp2 = (uint64_t*)&this->dst.uint128; + if (imm & 1) { temp2 = temp1; } if ((imm & 2) > 1) { temp2[1] = temp1[1]; } - this->res.uint128 = ((__uint128_t*) temp2); + this->res.uint128 = *((__uint128_t*) temp2); } void blendps(ssse3_t *this) { uint8_t imm = this->udo_imm->lval.ubyte; - - uint32_t* temp1 = this->src.uint128; - uint32_t* temp2 = this->dst.uint128; - + + uint32_t* temp1 = (uint32_t*)&this->src.uint128; + uint32_t* temp2 = (uint32_t*)&this->dst.uint128; + if (imm & 1) { //1st bit imm != 0 temp2 = temp1; } @@ -37,17 +59,17 @@ void blendps(ssse3_t *this) if ((imm & 8) > 7) { //4th bit imm != 0 temp2[3] = temp1[3]; } - this->res.uint128 = ((__uint128_t*) temp2); + this->res.uint128 = *((__uint128_t*) temp2); } void pblendw(ssse3_t *this) { uint8_t imm = this->udo_imm->lval.ubyte; - - uint16_t* temp1 = this->src.uint128; - uint16_t* temp2 = this->dst.uint128; - + + uint16_t* temp1 = (uint16_t*)&this->src.uint128; + uint16_t* temp2 = (uint16_t*)&this->dst.uint128; + if (imm & 1) { //1st bit imm != 0 temp2 = temp1; } @@ -72,15 +94,15 @@ void pblendw(ssse3_t *this) if ((imm & 128) > 127) { temp2[7] = temp1[7]; } - this->res.uint128 = ((__uint128_t*) temp2); + this->res.uint128 = *((__uint128_t*) temp2); } void pmovsxbw(ssse3_t *this) { - int8_t* temp1 = this->src.uint128; - int16_t* temp2 = this->dst.uint128; - + int8_t* temp1 = (int8_t*)&this->src.uint128; + int16_t* temp2 = (int16_t*)&this->dst.uint128; + temp2[0] = temp1[0] > 127 ? 0xFF00 | temp1[0] : temp1[0]; temp2[1] = temp1[1] > 127 ? 0xFF00 | temp1[1] : temp1[1]; @@ -90,75 +112,75 @@ void pmovsxbw(ssse3_t *this) temp2[5] = temp1[5] > 127 ? 0xFF00 | temp1[5] : temp1[5]; temp2[6] = temp1[6] > 127 ? 0xFF00 | temp1[6] : temp1[6]; temp2[7] = temp1[7] > 127 ? 0xFF00 | temp1[7] : temp1[7]; - - this->res.uint128 = ((__uint128_t*) temp2); + + this->res.uint128 = *((__uint128_t*) temp2); } void pmovsxbd(ssse3_t *this) { - uint8_t* temp1 = this->src.uint128; - uint32_t* temp2 = this->dst.uint128; - + uint8_t* temp1 = (uint8_t*)&this->src.uint128; + uint32_t* temp2 = (uint32_t*)&this->dst.uint128; + temp2[0] = temp1[0] > 127 ? 0xFFFF00 | temp1[0] : (uint32_t) temp1[0]; temp2[1] = temp1[1] > 127 ? 0xFFFF00 | temp1[1] : (uint32_t) temp1[1]; temp2[2] = temp1[2] > 127 ? 0xFFFF00 | temp1[2] : (uint32_t) temp1[2]; temp2[3] = temp1[3] > 127 ? 0xFFFF00 | temp1[3] : (uint32_t) temp1[3]; - this->res.uint128 = ((__uint128_t*) temp2); + this->res.uint128 = *((__uint128_t*) temp2); } void pmovsxbq(ssse3_t *this) { - uint8_t* temp1 = this->src.uint128; - uint64_t* temp2 = this->dst.uint128; - + uint8_t* temp1 = (uint8_t*)&this->src.uint128; + uint64_t* temp2 = (uint64_t*)&this->dst.uint128; + temp2[0] = temp1[0] > 127 ? 0xFFFFFFFFFFFFFF00 | temp1[0] : (uint64_t) temp1[0]; temp2[1] = temp1[1] > 127 ? 0xFFFFFFFFFFFFFF00 | temp1[1] : (uint64_t) temp1[1]; - this->res.uint128 = ((__uint128_t*) temp2); + this->res.uint128 = *((__uint128_t*) temp2); } void pmovsxwd(ssse3_t *this) { - uint16_t* temp1 = this->src.uint128; - uint32_t* temp2 = this->dst.uint128; - + uint16_t* temp1 = (uint16_t*)&this->src.uint128; + uint32_t* temp2 = (uint32_t*)&this->dst.uint128; + temp2[0] = temp1[0] > 32767 ? 0xFFFF0000 | temp1[0] : (uint32_t) temp1[0]; temp2[1] = temp1[1] > 32767 ? 0xFFFF0000 | temp1[1] : (uint32_t) temp1[1]; temp2[2] = temp1[2] > 32767 ? 0xFFFF0000 | temp1[2] : (uint32_t) temp1[2]; temp2[3] = temp1[3] > 32767 ? 0xFFFF0000 | temp1[3] : (uint32_t) temp1[3]; - this->res.uint128 = ((__uint128_t*) temp2); + this->res.uint128 = *((__uint128_t*) temp2); } void pmovsxwq(ssse3_t *this) { - uint16_t* temp1 = this->src.uint128; - uint64_t* temp2 = this->dst.uint128; - + uint16_t* temp1 = (uint16_t*)&this->src.uint128; + uint64_t* temp2 = (uint64_t*)&this->dst.uint128; + temp2[0] = temp1[0] > 32767 ? 0xFFFFFFFFFFFF0000 | temp1[0] : (uint64_t) temp1[0]; temp2[1] = temp1[1] > 32767 ? 0xFFFFFFFFFFFF0000 | temp1[1] : (uint64_t) temp1[1]; - this->res.uint128 = ((__uint128_t*) temp2); + this->res.uint128 = *((__uint128_t*) temp2); } void pmovsxdq(ssse3_t *this) { - uint32_t* temp1 = this->src.uint128; - uint64_t* temp2 = this->dst.uint128; - + uint32_t* temp1 = (uint32_t*)&this->src.uint128; + uint64_t* temp2 = (uint64_t*)&this->dst.uint128; + temp2[0] = temp1[0] > 0x7FFFFFFF ? 0xFFFFFFFF00000000 | temp1[0] : (uint64_t) temp1[0]; temp2[1] = temp1[1] > 0x7FFFFFFF ? 0xFFFFFFFF00000000 | temp1[1] : (uint64_t) temp1[1]; - this->res.uint128 = ((__uint128_t*) temp2); + this->res.uint128 = *((__uint128_t*) temp2); } void pmovzxbw(ssse3_t *this) { - int8_t* temp1 = this->src.uint128; - int16_t* temp2 = this->dst.uint128; - + int8_t* temp1 = (int8_t*)&this->src.uint128; + int16_t* temp2 = (int16_t*)&this->dst.uint128; + temp2[0] = temp1[0] > 127 ? 0xFF00 | temp1[0] : temp1[0]; temp2[1] = temp1[1] > 127 ? 0xFF00 | temp1[1] : temp1[1]; @@ -168,67 +190,67 @@ void pmovzxbw(ssse3_t *this) temp2[5] = temp1[5] > 127 ? 0xFF00 | temp1[5] : temp1[5]; temp2[6] = temp1[6] > 127 ? 0xFF00 | temp1[6] : temp1[6]; temp2[7] = temp1[7] > 127 ? 0xFF00 | temp1[7] : temp1[7]; - - this->res.uint128 = ((__uint128_t*) temp2); + + this->res.uint128 = *((__uint128_t*) temp2); } void pmovzxbd(ssse3_t *this) { - uint8_t* temp1 = this->src.uint128; - uint32_t* temp2 = this->dst.uint128; - + uint8_t* temp1 = (uint8_t*)&this->src.uint128; + uint32_t* temp2 = (uint32_t*)&this->dst.uint128; + temp2[0] = temp1[0] > 127 ? 0xFFFF00 | temp1[0] : (uint32_t) temp1[0]; temp2[1] = temp1[1] > 127 ? 0xFFFF00 | temp1[1] : (uint32_t) temp1[1]; temp2[2] = temp1[2] > 127 ? 0xFFFF00 | temp1[2] : (uint32_t) temp1[2]; temp2[3] = temp1[3] > 127 ? 0xFFFF00 | temp1[3] : (uint32_t) temp1[3]; - this->res.uint128 = ((__uint128_t*) temp2); + this->res.uint128 = *((__uint128_t*) temp2); } void pmovzxbq(ssse3_t *this) { - uint8_t* temp1 = this->src.uint128; - uint64_t* temp2 = this->dst.uint128; - + uint8_t* temp1 = (uint8_t*)&this->src.uint128; + uint64_t* temp2 = (uint64_t*)&this->dst.uint128; + temp2[0] = 0x00000000000000FF & temp1[0]; temp2[1] = 0x00000000000000FF & temp1[1]; - this->res.uint128 = ((__uint128_t*) temp2); + this->res.uint128 = *((__uint128_t*) temp2); } void pmovzxwd(ssse3_t *this) { - uint16_t* temp1 = this->src.uint128; - uint32_t* temp2 = this->dst.uint128; - + uint16_t* temp1 = (uint16_t*)&this->src.uint128; + uint32_t* temp2 = (uint32_t*)&this->dst.uint128; + temp2[0] = 0x0000FFFF & temp1[0]; temp2[1] = 0x0000FFFF & temp1[1]; temp2[2] = 0x0000FFFF & temp1[2]; temp2[3] = 0x0000FFFF & temp1[3]; - this->res.uint128 = ((__uint128_t*) temp2); + this->res.uint128 = *((__uint128_t*) temp2); } void pmovzxwq(ssse3_t *this) { - uint16_t* temp1 = this->src.uint128; - uint64_t* temp2 = this->dst.uint128; - + uint16_t* temp1 = (uint16_t*)&this->src.uint128; + uint64_t* temp2 = (uint64_t*)&this->dst.uint128; + temp2[0] = 0x000000000000FFFF & temp1[0]; temp2[1] = 0x000000000000FFFF & temp1[1]; - this->res.uint128 = ((__uint128_t*) temp2); + this->res.uint128 = *((__uint128_t*) temp2); } void pmovzxdq(ssse3_t *this) { - uint32_t* temp1 = this->src.uint128; - uint64_t* temp2 = this->dst.uint128; - + uint32_t* temp1 = (uint32_t*)&this->src.uint128; + uint64_t* temp2 = (uint64_t*)&this->dst.uint128; + temp2[0] = 0x00000000FFFFFFFF & temp1[0]; temp2[1] = 0x00000000FFFFFFFF & temp1[1]; - this->res.uint128 = ((__uint128_t*) temp2); + this->res.uint128 = *((__uint128_t*) temp2); } void pextrb(ssse3_t *this) @@ -236,7 +258,7 @@ void pextrb(ssse3_t *this) uint8_t sel = this->udo_imm->lval.ubyte & 0xF; uint8_t temp1 = this->src.uint8[sel]; uint8_t islongmode = is_saved_state64(this->op_obj->state); - + if (this->udo_dst->type == UD_OP_MEM) { this->res.uint8[0] = temp1; printk("pextrb this->udo_dst->type == UD_OP_MEM this->res.uint8[0]: %hhu", temp1); @@ -271,7 +293,7 @@ void pextrq(ssse3_t *this) uint8_t sel = this->udo_imm->lval.ubyte & 1; uint64_t temp1 = this->src.uint64[sel]; uint8_t islongmode = is_saved_state64(this->op_obj->state); - + if (this->udo_dst->type == UD_OP_MEM) { this->res.uint128 = temp1; printk("pextrq this->udo_dst->type == UD_OP_MEM this->res.uint128: %hhu", temp1); @@ -324,9 +346,9 @@ void ptest(ssse3_t *this) struct pt_regs *regs; uint64_t FLAGS; FLAGS = regs->flags; - + sse_reg_t AND1, AND2; - + AND1.uint128 = this->src.uint128 & this->dst.uint128; AND2.uint128 = this->src.uint128 & ~(this->dst.uint128); @@ -335,7 +357,7 @@ void ptest(ssse3_t *this) } else { FLAGS = (FLAGS | 0x00000040) ^ 0x00000040; //set ZF = 0 } - + if (AND2.uint128 == 0) { FLAGS |= 0x00000001; //set CF = 1 } else { @@ -346,7 +368,7 @@ void ptest(ssse3_t *this) FLAGS = (FLAGS | 0x00000010) ^ 0x00000010; //set AF = 0 FLAGS = (FLAGS | 0x00000080) ^ 0x00000080; //set SF = 0 FLAGS = (FLAGS | 0x00000004) ^ 0x00000004; //set PF = 0 - + regs->flags = FLAGS; } diff --git a/sse42.c b/sse42.c index 2994aa2..4bd8399 100644 --- a/sse42.c +++ b/sse42.c @@ -42,9 +42,9 @@ calc_str_len (__int128_t val, const int mode) } s; int i; int dim = (mode & 1) == 0 ? 16 : 8; - + s.x = val; - + if ((mode & 1)) { for (i = 0; i < dim; i++) @@ -57,7 +57,7 @@ calc_str_len (__int128_t val, const int mode) if (s.c[i] == 0) break; } - + return i; } @@ -66,7 +66,7 @@ override_invalid (unsigned char res[16][16], int la, int lb, const int mode, int dim) { int i, j; - + for (j = 0; j < dim; j++) for (i = 0; i < dim; i++) if (i < la && j >= lb) @@ -99,10 +99,10 @@ calc_matrix (__int128_t a, int la, __int128_t b, int lb, const int mode, signed short ss[8]; unsigned short us[8]; } d, s; - + d.x = a; s.x = b; - + switch ((mode & 3)) { case 0x00: @@ -146,7 +146,7 @@ calc_matrix (__int128_t a, int la, __int128_t b, int lb, const int mode, } break; } - + override_invalid (res, la, lb, mode, (mode & 1) == 0 ? 16 : 8); } @@ -155,25 +155,25 @@ pcmpstr_calc_res (__int128_t a, int la, __int128_t b, int lb, const int mode) { unsigned char mtx[16][16]; int i, j, k, dim, res = 0; - + memset (mtx, 0, sizeof (mtx)); - + dim = (mode & 1) == 0 ? 16 : 8; - + if (la < 0) la = -la; - + if (lb < 0) lb = -lb; - + if (la > dim) la = dim; - + if (lb > dim) lb = dim; - + calc_matrix (a, la, b, lb, mode, mtx); - + switch ((mode & 0x0C)) { case 0: @@ -182,28 +182,28 @@ pcmpstr_calc_res (__int128_t a, int la, __int128_t b, int lb, const int mode) if (mtx[i][j]) res |= (1 << i); break; - + case 4: for (i = 0; i < dim; i += 2) for(j = 0; j < dim; j++) if (mtx[j][i] && mtx[j][i+1]) res |= (1 << j); break; - + case 8: for(i = 0; i < dim; i++) if (mtx[i][i]) res |= (1 << i); break; - + case 12: for(i = 0; i < dim; i++) { unsigned char val = 1; - + for (j = 0, k = i; j < dim - i && k < dim; j++, k++) val &= mtx[k][j]; - + if (val) res |= (1 << i); else @@ -211,17 +211,17 @@ pcmpstr_calc_res (__int128_t a, int la, __int128_t b, int lb, const int mode) } break; } - + switch ((mode & 0x30)) { case 0x00: case 0x20: break; - + case 0x10: res ^= -1; break; - + case 0x30: for (i = 0; i < lb; i++) if (res & (1 << i)) @@ -230,7 +230,7 @@ pcmpstr_calc_res (__int128_t a, int la, __int128_t b, int lb, const int mode) res |= (1 << i); break; } - + return res & ((dim == 8) ? 0xFF : 0xFFFF); } @@ -241,9 +241,9 @@ cmp_indexed (__int128_t a, int la, __int128_t b, int lb, int i, ndx; int dim = (mode & 1) == 0 ? 16 : 8; int r2; - + r2 = pcmpstr_calc_res (a, la, b, lb, mode); - + ndx = dim; if ((mode & 0x40)) { @@ -263,7 +263,7 @@ cmp_indexed (__int128_t a, int la, __int128_t b, int lb, break; } } - + *res2 = r2; return ndx; } @@ -281,21 +281,21 @@ cmp_flags (__int128_t a, int la, __int128_t b, int lb, unsigned char uc[16]; unsigned short us[8]; } d, s; - + d.x = a; s.x = b; - + /* CF: reset if (RES2 == 0), set otherwise. */ if (res2 != 0) flags |= CFLAG; - + if (is_implicit) { /* ZF: set if any byte/word of src xmm operand is null, reset otherwise. SF: set if any byte/word of dst xmm operand is null, reset otherwise. */ - + if (is_bytes_mode) { for (i = 0; i < 16; i++) @@ -322,22 +322,22 @@ cmp_flags (__int128_t a, int la, __int128_t b, int lb, /* ZF: set if abs value of EDX/RDX < 16 (8), reset otherwise. SF: set if abs value of EAX/RAX < 16 (8), reset otherwise. */ int max_ind = is_bytes_mode ? 16 : 8; - + if (la < 0) la = -la; if (lb < 0) lb = -lb; - + if (lb < max_ind) flags |= ZFLAG; if (la < max_ind) flags |= SFLAG; } - + /* OF: equal to RES2[0]. */ if ((res2 & 0x1)) flags |= OFLAG; - + /* AF: Reset. PF: Reset. */ return flags; @@ -349,10 +349,10 @@ cmp_ei (__int128_t *a, int la, __int128_t *b, int lb, { int res2; int index = cmp_indexed (*a, la, *b, lb, mode, &res2); - + if (flags != NULL) *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0); - + return index; } @@ -362,15 +362,15 @@ cmp_ii (__int128_t *a, __int128_t *b, const int mode, int *flags) int la, lb; int res2; int index; - + la = calc_str_len (*a, mode); lb = calc_str_len (*b, mode); - + index = cmp_indexed (*a, la, *b, lb, mode, &res2); - + if (flags != NULL) *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1); - + return index; } @@ -392,11 +392,11 @@ cmp_masked (__int128_t a, int la, __int128_t b, int lb, char c[4]; short s[2]; } r2; - + r2.i = pcmpstr_calc_res (a, la, b, lb, mode); - + ret.x = 0; - + if (mode & 0x40) { for (i = 0; i < dim; i++) @@ -412,9 +412,9 @@ cmp_masked (__int128_t a, int la, __int128_t b, int lb, else ret.c[0] = r2.c[0]; } - + *res2 = r2.i; - + return ret.x; } @@ -424,10 +424,10 @@ cmp_em (__int128_t *a, int la, __int128_t *b, int lb, { int res2; __int128_t mask = cmp_masked (*a, la, *b, lb, mode, &res2); - + if (flags != NULL) *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0); - + return mask; } @@ -437,14 +437,14 @@ cmp_im (__int128_t *a, __int128_t *b, const int mode, int *flags) int la, lb; int res2; __int128_t mask; - + la = calc_str_len (*a, mode); lb = calc_str_len (*b, mode); - + mask = cmp_masked (*a, la, *b, lb, mode, &res2); if (flags != NULL) *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1); - + return mask; } @@ -502,42 +502,42 @@ void pcmpestri (ssse3_t *this) const int imm = this->udo_imm->lval.ubyte; //const int issigned = imm & 0b10; uint8_t islongmode = is_saved_state64(this->op_obj->state); - + __int128_t *src = &(this->src.int128); __int128_t *dst = &(this->dst.int128); - + int res1 = 0, res2 = 0; - + /* thanks for excusing me the nesting */ // FIXME: How to get la + lb parameters? res1 = cmp_ei(src, sizeof(*src), dst, sizeof(*dst), imm, &res2); - + #if 0 printf("src: "); print128(this->src); printf("\n"); - + printf("dst: "); print128(this->dst); printf("\n"); - + printf("res: "); print128(this->res); printf("\n"); - + printf("and the int2 is %02x\n", res2); printf("and the index is %d\n", res1); #endif - + if (islongmode) { this->op_obj->state64->cx = res1; - + this->op_obj->state64->flags &= ~ 0b100011010101; this->op_obj->state64->flags |= res2; } else { this->op_obj->state32->cx = res1; - + this->op_obj->state32->flags &= ~ 0b100011010101; this->op_obj->state32->flags |= res2; // C } @@ -548,34 +548,34 @@ void pcmpestrm (ssse3_t *this) const int imm = this->udo_imm->lval.ubyte; //const int issigned = imm & 0b10; uint8_t islongmode = is_saved_state64(this->op_obj->state); - + __int128_t *src = &(this->src.int128); __int128_t *dst = &(this->dst.int128); __int128_t *res = &(this->res.int128); - + int res2 = 0; - + /* thanks for excusing me the nesting */ // FIXME: How to get la + lb parameters? *res = cmp_em(src, sizeof(*src), dst, sizeof(*dst), imm, &res2); - + #if 0 printf("src: "); print128(this->src); printf("\n"); - + printf("dst: "); print128(this->dst); printf("\n"); - + printf("res: "); print128(this->res); printf("\n"); - + printf("and the int2 is %02x\n", res2); printf("and the index is %d\n", res1); #endif - + if (islongmode) { this->op_obj->state64->flags &= ~ 0b100011010101; @@ -608,7 +608,7 @@ static void getmemoperand(ssse3_t *this, uint8_t *size, uint64_t *retval) if (this->op_obj->ring0) retval[0] = *((uint64_t*)(address)); else - copy_from_user ((char*) &retval[0], address, 8); + copy_from_user ((char*) &retval[0],(const void __user *)address, 8); } void pcmpistrm (ssse3_t *this) @@ -616,34 +616,34 @@ void pcmpistrm (ssse3_t *this) const int imm = this->udo_imm->lval.ubyte; //const int issigned = imm & 0b10; uint8_t islongmode = is_saved_state64(this->op_obj->state); - + __int128_t *src = &(this->src.int128); __int128_t *dst = &(this->dst.int128); __int128_t *res = &(this->res.int128); - + int res2 = 0; - + /* thanks for excusing me the nesting */ - + *res = cmp_im(src, dst, imm, &res2); - + #if 0 printf("src: "); print128(this->src); printf("\n"); - + printf("dst: "); print128(this->dst); printf("\n"); - + printf("res: "); print128(this->res); printf("\n"); - + printf("and the int2 is %02x\n", res2); printf("and the index is %d\n", res1); #endif - + if (islongmode) { this->op_obj->state64->flags &= ~ 0b100011010101; diff --git a/ssse3.c b/ssse3.c index 8791202..2e369ae 100644 --- a/ssse3.c +++ b/ssse3.c @@ -1,12 +1,12 @@ /* - .d8888b. .d8888b. .d8888b. 8888888888 .d8888b. - d88P Y88b d88P Y88b d88P Y88b 888 d88P Y88b - Y88b. Y88b. Y88b. 888 .d88P - "Y888b. "Y888b. "Y888b. 8888888 8888" - "Y88b. "Y88b. "Y88b. 888 "Y8b. - "888 "888 "888 888 888 888 - Y88b d88P Y88b d88P Y88b d88P 888 Y88b d88P - "Y8888P" "Y8888P" "Y8888P" 8888888888 "Y8888P" + .d8888b. .d8888b. .d8888b. 8888888888 .d8888b. + d88P Y88b d88P Y88b d88P Y88b 888 d88P Y88b + Y88b. Y88b. Y88b. 888 .d88P + "Y888b. "Y888b. "Y888b. 8888888 8888" + "Y88b. "Y88b. "Y88b. 888 "Y8b. + "888 "888 "888 888 888 888 + Y88b d88P Y88b d88P Y88b d88P 888 Y88b d88P + "Y8888P" "Y8888P" "Y8888P" 8888888888 "Y8888P" */ #include "opemu.h" @@ -107,28 +107,28 @@ inline void _sstore_gpr32 (ud_type_t n, uint32_t *where) { struct pt_regs *regs; switch (n) { case UD_R_EAX: - *where = ®s->ax; + *where = (uint32_t)®s->ax; break; case UD_R_ECX: - *where = ®s->cx; + *where = (uint32_t)®s->cx; break; case UD_R_EDX: - *where = ®s->dx; + *where = (uint32_t)®s->dx; break; case UD_R_EBX: - *where = ®s->bx; + *where = (uint32_t)®s->bx; break; case UD_R_ESP: - *where = ®s->sp; + *where = (uint32_t)®s->sp; break; case UD_R_EBP: - *where = ®s->bp; + *where = (uint32_t)®s->bp; break; case UD_R_ESI: - *where = ®s->si; + *where = (uint32_t)®s->si; break; case UD_R_EDI: - *where = ®s->di; + *where = (uint32_t)®s->di; break; } } @@ -137,28 +137,28 @@ inline void _sstore_gpr64 (ud_type_t n, uint64_t *where) { struct pt_regs *regs; switch (n) { case UD_R_RAX: - *where = ®s->ax; + *where = (uint64_t)®s->ax; break; case UD_R_RCX: - *where = ®s->cx; + *where = (uint64_t)®s->cx; break; case UD_R_RDX: - *where = ®s->dx; + *where = (uint64_t)®s->dx; break; case UD_R_RBX: - *where = ®s->bx; + *where = (uint64_t)®s->bx; break; case UD_R_RSP: - *where = ®s->sp; + *where = (uint64_t)®s->sp; break; case UD_R_RBP: - *where = ®s->bp; + *where = (uint64_t)®s->bp; break; case UD_R_RSI: - *where = ®s->si; + *where = (uint64_t)®s->si; break; case UD_R_RDI: - *where = ®s->di; + *where = (uint64_t)®s->di; break; } } @@ -228,7 +228,7 @@ int ssse3_grab_operands(ssse3_t *ssse3_obj) } else { printk("mem"); } - + if (ssse3_obj->udo_src->type == UD_OP_REG) { if (ud_insn_mnemonic(ssse3_obj->op_obj->ud_obj) == UD_Iroundss) { _fstore_xmm (ssse3_obj->udo_src->base - UD_R_XMM0, &ssse3_obj->src.fa32[0]); @@ -257,7 +257,7 @@ int ssse3_grab_operands(ssse3_t *ssse3_obj) int64_t disp = 0; uint8_t disp_size = ssse3_obj->udo_src->offset; uint64_t address; - + if (ssse3_obj->udo_src->scale) goto bad; // TODO if (retrieve_reg (ssse3_obj->op_obj->state, @@ -274,14 +274,14 @@ int ssse3_grab_operands(ssse3_t *ssse3_obj) if (ssse3_obj->op_obj->ring0) ssse3_obj->src.uint64[0] = * ((uint64_t*) (address)); - else copy_from_user((char*) &ssse3_obj->src.uint64[0], address, 8); + else copy_from_user((char*) &ssse3_obj->src.uint64[0], (const void __user *)address, 8); } else if (ssse3_obj->udo_src->size == 128) { // m128 load int64_t disp = 0; uint8_t disp_size = ssse3_obj->udo_src->offset; uint64_t address; - + if (ssse3_obj->udo_src->scale) goto bad; // TODO if (retrieve_reg (ssse3_obj->op_obj->state, @@ -298,13 +298,13 @@ int ssse3_grab_operands(ssse3_t *ssse3_obj) if (ssse3_obj->op_obj->ring0) ssse3_obj->src.uint128 = * ((__uint128_t*) (address)); - else copy_from_user((char*) &ssse3_obj->src.uint128, address, 16); + else copy_from_user((char*) &ssse3_obj->src.uint128, (const void __user *)address, 16); } else { printk("src mem else"); } } - + return 0; // Only reached if bad bad: return -1; @@ -317,7 +317,7 @@ bad: return -1; int ssse3_commit_results(ssse3_t *ssse3_obj) { if (ud_insn_mnemonic(ssse3_obj->op_obj->ud_obj) == UD_Iroundss) { - + _fload_xmm (ssse3_obj->udo_dst->base - UD_R_XMM0, (void*) &ssse3_obj->res.fa32[0]); } else if (ssse3_obj->ismmx) { @@ -367,7 +367,9 @@ int op_sse3x_run(op_t *op_obj) case UD_Ipcmpgtq: opf = pcmpgtq; goto sse42_common; case UD_Ipopcnt: opf = popcnt; goto regop; case UD_Icrc32: opf = crc32_op; goto regop; - + // + case UD_Ipmaxud: opf = pmaxud; goto ssse3_common; + case UD_Ipminud: opf = pminud; goto ssse3_common; //SSE 4.1 //case UD_Iblendpd: opf = blendpd; goto ssse3_common; //case UD_Iblendps: opf = blendps; goto ssse3_common; @@ -393,7 +395,7 @@ int op_sse3x_run(op_t *op_obj) case UD_Ipinsrd: opf = pinsrd; goto ssse3_common; case UD_Ipinsrq: opf = pinsrq; goto ssse3_common; -sse42_common: +sse42_common: goto ssse3_common; @@ -422,7 +424,7 @@ int op_sse3x_run(op_t *op_obj) case UD_Iphaddsw: opf = phaddsw; goto ssse3_common; ssse3_common: - + ssse3_obj.udo_src = ud_insn_opr (op_obj->ud_obj, 1); ssse3_obj.udo_dst = ud_insn_opr (op_obj->ud_obj, 0); ssse3_obj.udo_imm = ud_insn_opr (op_obj->ud_obj, 2); @@ -434,7 +436,7 @@ int op_sse3x_run(op_t *op_obj) if ((ssse3_obj.udo_dst->base >= UD_R_MM0) && (ssse3_obj.udo_dst->base <= UD_R_MM7)) { ssse3_obj.ismmx = 1; } else ssse3_obj.ismmx = 0; - + ssse3_obj.dst64 = ssse3_obj.dst32 = 0; if (ssse3_grab_operands(&ssse3_obj) != 0) goto bad; @@ -454,7 +456,7 @@ int op_sse3x_run(op_t *op_obj) opf(&ssse3_obj); -good: +good: if (ssse3_obj.dst64) { //printk("OPEMUq: %s\n", ud_insn_asm(op_obj->ud_obj)); @@ -465,8 +467,8 @@ int op_sse3x_run(op_t *op_obj) op_obj->dst32 = (uint8_t) 1; op_obj->res32 = (uint32_t) ssse3_obj.res.uint32[0]; } - - + + //uint64_t ek; //asm __volatile__ ("movq %%rcx, %0" : "=m" (ek) :); //printk("good rcx: %u", ek); @@ -598,7 +600,7 @@ void pabsd (ssse3_t *this) /** * Concatenate and shift - */ + */ void palignr (ssse3_t *this) { uint8_t imm = this->udo_imm->lval.ubyte; @@ -619,7 +621,7 @@ void palignr (ssse3_t *this) shiftp = (uint8_t*) &temp1[0]; shiftp += imm; shiftpaddr = (uint64_t)shiftp; - this->res.uint128 = ((__uint128_t*) shiftpaddr); + this->res.uint128 = *((__uint128_t*) shiftpaddr); } } @@ -682,7 +684,7 @@ void pmaddubsw (ssse3_t *this) ++res; src += 2; dst += 2; - } + } } /** diff --git a/ssse3_priv.h b/ssse3_priv.h index a00d6d4..16cead2 100644 --- a/ssse3_priv.h +++ b/ssse3_priv.h @@ -115,7 +115,7 @@ typedef void (*ssse3_func)(ssse3_t*); asm __volatile__ ("movss %0, %%xmm" #n :: "m" (*(where))); \ kernel_fpu_end(); \ } while (0); - + #define storedqu_template(n, where) \ do { \ asm __volatile__ ("movdqu %%xmm" #n ", %0" : "=m" (*(where))); \ @@ -183,7 +183,8 @@ inline void ptest (ssse3_t*); inline void pinsrb (ssse3_t*); inline void pinsrd (ssse3_t*); inline void pinsrq (ssse3_t*); - +inline void pmaxud (ssse3_t*); +inline void pminud (ssse3_t*); /*** SSE4.2 TODO move this somewhere else ***/ inline void pcmpistri (ssse3_t*);