trunk/src/emu/cpu/rsp/rsp.c
| r241959 | r241960 | |
| 7 | 7 | #include "emu.h" |
| 8 | 8 | #include "debugger.h" |
| 9 | 9 | #include "rsp.h" |
| 10 | #include "rspdiv.h" |
| 10 | 11 | #include "rspfe.h" |
| 11 | | #include "rspcp2.h" |
| 12 | | #include "rspcp2d.h" |
| 13 | 12 | |
| 14 | 13 | |
| 15 | 14 | const device_type RSP = &device_creator<rsp_device>; |
| r241959 | r241960 | |
| 37 | 36 | #define UIMM16 ((UINT16)(op)) |
| 38 | 37 | #define UIMM26 (op & 0x03ffffff) |
| 39 | 38 | |
| 40 | | #define RSVAL (m_rsp_state->r[RSREG]) |
| 41 | | #define RTVAL (m_rsp_state->r[RTREG]) |
| 42 | | #define RDVAL (m_rsp_state->r[RDREG]) |
| 43 | | |
| 44 | 39 | #define JUMP_ABS(addr) { m_nextpc = 0x04001000 | (((addr) << 2) & 0xfff); } |
| 45 | 40 | #define JUMP_ABS_L(addr,l) { m_nextpc = 0x04001000 | (((addr) << 2) & 0xfff); m_rsp_state->r[l] = m_rsp_state->pc + 4; } |
| 46 | 41 | #define JUMP_REL(offset) { m_nextpc = 0x04001000 | ((m_rsp_state->pc + ((offset) << 2)) & 0xfff); } |
| r241959 | r241960 | |
| 49 | 44 | #define JUMP_PC_L(addr,l) { m_nextpc = 0x04001000 | ((addr) & 0xfff); m_rsp_state->r[l] = m_rsp_state->pc + 4; } |
| 50 | 45 | #define LINK(l) { m_rsp_state->r[l] = m_rsp_state->pc + 4; } |
| 51 | 46 | |
| 47 | #define VREG_B(reg, offset) m_v[(reg)].b[(offset)^1] |
| 48 | #define VREG_S(reg, offset) m_v[(reg)].s[(offset)] |
| 49 | #define VREG_L(reg, offset) m_v[(reg)].l[(offset)] |
| 50 | |
| 51 | #define R_VREG_B(reg, offset) m_v[(reg)].b[(offset)^1] |
| 52 | #define R_VREG_S(reg, offset) (INT16)m_v[(reg)].s[(offset)] |
| 53 | #define R_VREG_L(reg, offset) m_v[(reg)].l[(offset)] |
| 54 | |
| 55 | #define W_VREG_B(reg, offset, val) (m_v[(reg)].b[(offset)^1] = val) |
| 56 | #define W_VREG_S(reg, offset, val) (m_v[(reg)].s[(offset)] = val) |
| 57 | #define W_VREG_L(reg, offset, val) (m_v[(reg)].l[(offset)] = val) |
| 58 | |
| 59 | #define VEC_EL_2(x,z) (vector_elements[(x)][(z)]) |
| 60 | |
| 61 | #define ACCUM(x) m_accum[((x))].q |
| 62 | #define ACCUM_H(x) m_accum[((x))].w[3] |
| 63 | #define ACCUM_M(x) m_accum[((x))].w[2] |
| 64 | #define ACCUM_L(x) m_accum[((x))].w[1] |
| 65 | #define ACCUM_LL(x) m_accum[((x))].w[0] |
| 66 | |
| 67 | #define CARRY 0 |
| 68 | #define COMPARE 1 |
| 69 | #define CLIP1 2 |
| 70 | #define ZERO 3 |
| 71 | #define CLIP2 4 |
| 72 | |
| 52 | 73 | #define CARRY_FLAG(x) (m_vflag[CARRY][x & 7] != 0 ? 0xffff : 0) |
| 53 | 74 | #define COMPARE_FLAG(x) (m_vflag[COMPARE][x & 7] != 0 ? 0xffff : 0) |
| 54 | 75 | #define CLIP1_FLAG(x) (m_vflag[CLIP1][x & 7] != 0 ? 0xffff : 0) |
| r241959 | r241960 | |
| 120 | 141 | , m_write32(NULL) |
| 121 | 142 | , m_rsp_state(NULL) |
| 122 | 143 | , m_exec_output(NULL) |
| 144 | #if SIMUL_SIMD |
| 145 | , m_old_reciprocal_res(0) |
| 146 | , m_old_reciprocal_high(0) |
| 147 | , m_old_dp_allowed(0) |
| 148 | , m_scalar_reciprocal_res(0) |
| 149 | , m_scalar_reciprocal_high(0) |
| 150 | , m_scalar_dp_allowed(0) |
| 151 | , m_simd_reciprocal_res(0) |
| 152 | , m_simd_reciprocal_high(0) |
| 153 | , m_simd_dp_allowed(0) |
| 154 | #endif |
| 123 | 155 | , m_sr(0) |
| 124 | 156 | , m_step_count(0) |
| 157 | #if USE_SIMD |
| 158 | , m_accum_h(0) |
| 159 | , m_accum_m(0) |
| 160 | , m_accum_l(0) |
| 161 | , m_accum_ll(0) |
| 162 | #endif |
| 163 | , m_reciprocal_res(0) |
| 164 | , m_reciprocal_high(0) |
| 165 | , m_dp_allowed(0) |
| 125 | 166 | , m_ppc(0) |
| 126 | 167 | , m_nextpc(0) |
| 127 | 168 | , m_dmem32(NULL) |
| r241959 | r241960 | |
| 138 | 179 | , m_sp_set_status_func(*this) |
| 139 | 180 | { |
| 140 | 181 | m_isdrc = mconfig.options().drc() ? true : false; |
| 182 | memset(m_vres, 0, sizeof(m_vres)); |
| 183 | memset(m_v, 0, sizeof(m_v)); |
| 184 | memset(m_vflag, 0, sizeof(m_vflag)); |
| 185 | #if SIMUL_SIMD |
| 186 | memset(m_old_r, 0, sizeof(m_old_r)); |
| 187 | memset(m_old_dmem, 0, sizeof(m_old_dmem)); |
| 188 | memset(m_scalar_r, 0, sizeof(m_scalar_r)); |
| 189 | memset(m_scalar_dmem, 0, sizeof(m_scalar_dmem)); |
| 190 | #endif |
| 191 | #if USE_SIMD |
| 192 | memset(m_xv, 0, sizeof(m_xv)); |
| 193 | memset(m_xvflag, 0, sizeof(m_xvflag)); |
| 194 | #endif |
| 195 | memset(m_accum, 0, sizeof(m_accum)); |
| 141 | 196 | } |
| 142 | 197 | |
| 143 | 198 | offs_t rsp_device::disasm_disassemble(char *buffer, offs_t pc, const UINT8 *oprom, const UINT8 *opram, UINT32 options) |
| r241959 | r241960 | |
| 146 | 201 | return CPU_DISASSEMBLE_NAME( rsp )(this, buffer, pc, oprom, opram, options); |
| 147 | 202 | } |
| 148 | 203 | |
| 149 | | UINT8 rsp_device::READ8(UINT32 address) |
| 204 | inline UINT8 rsp_device::READ8(UINT32 address) |
| 150 | 205 | { |
| 151 | 206 | UINT8 ret; |
| 152 | 207 | address &= 0xfff; |
| 153 | 208 | ret = m_program->read_byte(address); |
| 154 | | //printf("R8:%08x=%02x\n", address, ret); |
| 155 | 209 | return ret; |
| 156 | 210 | } |
| 157 | 211 | |
| 158 | | UINT16 rsp_device::READ16(UINT32 address) |
| 212 | inline UINT16 rsp_device::READ16(UINT32 address) |
| 159 | 213 | { |
| 160 | 214 | UINT16 ret; |
| 161 | 215 | address &= 0xfff; |
| 162 | 216 | |
| 163 | 217 | ret = (m_program->read_byte(address) << 8) | (m_program->read_byte(address + 1) & 0xff); |
| 164 | 218 | |
| 165 | | //printf("R16:%08x=%04x\n", address, ret); |
| 166 | 219 | return ret; |
| 167 | 220 | } |
| 168 | 221 | |
| 169 | | UINT32 rsp_device::READ32(UINT32 address) |
| 222 | inline UINT32 rsp_device::READ32(UINT32 address) |
| 170 | 223 | { |
| 171 | 224 | UINT32 ret; |
| 172 | 225 | address &= 0xfff; |
| r241959 | r241960 | |
| 176 | 229 | (m_program->read_byte(address + 2) << 8) | |
| 177 | 230 | (m_program->read_byte(address + 3) << 0); |
| 178 | 231 | |
| 179 | | //printf("R32:%08x=%08x\n", address, ret); |
| 180 | 232 | return ret; |
| 181 | 233 | } |
| 182 | 234 | |
| r241959 | r241960 | |
| 184 | 236 | { |
| 185 | 237 | address &= 0xfff; |
| 186 | 238 | m_program->write_byte(address, data); |
| 187 | | //printf("W8:%08x=%02x\n", address, data); |
| 188 | 239 | } |
| 189 | 240 | |
| 190 | 241 | void rsp_device::WRITE16(UINT32 address, UINT16 data) |
| r241959 | r241960 | |
| 193 | 244 | |
| 194 | 245 | m_program->write_byte(address, data >> 8); |
| 195 | 246 | m_program->write_byte(address + 1, data & 0xff); |
| 196 | | //printf("W16:%08x=%04x\n", address, data); |
| 197 | 247 | } |
| 198 | 248 | |
| 199 | 249 | void rsp_device::WRITE32(UINT32 address, UINT32 data) |
| r241959 | r241960 | |
| 204 | 254 | m_program->write_byte(address + 1, (data >> 16) & 0xff); |
| 205 | 255 | m_program->write_byte(address + 2, (data >> 8) & 0xff); |
| 206 | 256 | m_program->write_byte(address + 3, data & 0xff); |
| 207 | | //printf("W32:%08x=%08x\n", address, data); |
| 208 | 257 | } |
| 209 | 258 | |
| 210 | 259 | /*****************************************************************************/ |
| r241959 | r241960 | |
| 321 | 370 | m_direct = &m_program->direct(); |
| 322 | 371 | resolve_cb(); |
| 323 | 372 | |
| 324 | | if (m_isdrc) |
| 325 | | { |
| 326 | | m_cop2 = auto_alloc(machine(), rsp_cop2_drc(*this, machine())); |
| 327 | | } |
| 328 | | else |
| 329 | | { |
| 330 | | m_cop2 = auto_alloc(machine(), rsp_cop2(*this, machine())); |
| 331 | | } |
| 332 | | m_cop2->init(); |
| 333 | | m_cop2->start(); |
| 334 | | |
| 335 | 373 | // RSP registers should power on to a random state |
| 336 | 374 | for(int regIdx = 0; regIdx < 32; regIdx++ ) |
| 337 | 375 | { |
| 338 | 376 | m_rsp_state->r[regIdx] = 0; |
| 377 | m_v[regIdx].d[0] = 0; |
| 378 | m_v[regIdx].d[1] = 0; |
| 339 | 379 | } |
| 380 | CLEAR_CARRY_FLAGS(); |
| 381 | CLEAR_COMPARE_FLAGS(); |
| 382 | CLEAR_CLIP1_FLAGS(); |
| 383 | CLEAR_ZERO_FLAGS(); |
| 384 | CLEAR_CLIP2_FLAGS(); |
| 385 | m_reciprocal_res = 0; |
| 386 | m_reciprocal_high = 0; |
| 340 | 387 | |
| 388 | // Accumulators do not power on to a random state |
| 389 | for(int accumIdx = 0; accumIdx < 8; accumIdx++ ) |
| 390 | { |
| 391 | m_accum[accumIdx].q = 0; |
| 392 | } |
| 393 | |
| 341 | 394 | m_sr = RSP_STATUS_HALT; |
| 342 | 395 | m_step_count = 0; |
| 343 | 396 | |
| r241959 | r241960 | |
| 491 | 544 | |
| 492 | 545 | void rsp_device::state_string_export(const device_state_entry &entry, astring &string) |
| 493 | 546 | { |
| 494 | | const int index = entry.index(); |
| 495 | | if (index >= RSP_V0 && index <= RSP_V31) |
| 547 | switch (entry.index()) |
| 496 | 548 | { |
| 497 | | m_cop2->state_string_export(index, string); |
| 549 | case STATE_GENFLAGS: |
| 550 | string.printf("%s",""); |
| 551 | break; |
| 552 | |
| 553 | #if USE_SIMD |
| 554 | case RSP_V0: |
| 555 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 0], 7), (UINT16)_mm_extract_epi16(m_xv[ 0], 6), (UINT16)_mm_extract_epi16(m_xv[ 0], 5), (UINT16)_mm_extract_epi16(m_xv[ 0], 4), (UINT16)_mm_extract_epi16(m_xv[ 0], 3), (UINT16)_mm_extract_epi16(m_xv[ 0], 2), (UINT16)_mm_extract_epi16(m_xv[ 0], 1), (UINT16)_mm_extract_epi16(m_xv[ 0], 0)); |
| 556 | break; |
| 557 | case RSP_V1: |
| 558 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 1], 7), (UINT16)_mm_extract_epi16(m_xv[ 1], 6), (UINT16)_mm_extract_epi16(m_xv[ 1], 5), (UINT16)_mm_extract_epi16(m_xv[ 1], 4), (UINT16)_mm_extract_epi16(m_xv[ 1], 3), (UINT16)_mm_extract_epi16(m_xv[ 1], 2), (UINT16)_mm_extract_epi16(m_xv[ 1], 1), (UINT16)_mm_extract_epi16(m_xv[ 1], 0)); |
| 559 | break; |
| 560 | case RSP_V2: |
| 561 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 2], 7), (UINT16)_mm_extract_epi16(m_xv[ 2], 6), (UINT16)_mm_extract_epi16(m_xv[ 2], 5), (UINT16)_mm_extract_epi16(m_xv[ 2], 4), (UINT16)_mm_extract_epi16(m_xv[ 2], 3), (UINT16)_mm_extract_epi16(m_xv[ 2], 2), (UINT16)_mm_extract_epi16(m_xv[ 2], 1), (UINT16)_mm_extract_epi16(m_xv[ 2], 0)); |
| 562 | break; |
| 563 | case RSP_V3: |
| 564 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 3], 7), (UINT16)_mm_extract_epi16(m_xv[ 3], 6), (UINT16)_mm_extract_epi16(m_xv[ 3], 5), (UINT16)_mm_extract_epi16(m_xv[ 3], 4), (UINT16)_mm_extract_epi16(m_xv[ 3], 3), (UINT16)_mm_extract_epi16(m_xv[ 3], 2), (UINT16)_mm_extract_epi16(m_xv[ 3], 1), (UINT16)_mm_extract_epi16(m_xv[ 3], 0)); |
| 565 | break; |
| 566 | case RSP_V4: |
| 567 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 4], 7), (UINT16)_mm_extract_epi16(m_xv[ 4], 6), (UINT16)_mm_extract_epi16(m_xv[ 4], 5), (UINT16)_mm_extract_epi16(m_xv[ 4], 4), (UINT16)_mm_extract_epi16(m_xv[ 4], 3), (UINT16)_mm_extract_epi16(m_xv[ 4], 2), (UINT16)_mm_extract_epi16(m_xv[ 4], 1), (UINT16)_mm_extract_epi16(m_xv[ 4], 0)); |
| 568 | break; |
| 569 | case RSP_V5: |
| 570 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 5], 7), (UINT16)_mm_extract_epi16(m_xv[ 5], 6), (UINT16)_mm_extract_epi16(m_xv[ 5], 5), (UINT16)_mm_extract_epi16(m_xv[ 5], 4), (UINT16)_mm_extract_epi16(m_xv[ 5], 3), (UINT16)_mm_extract_epi16(m_xv[ 5], 2), (UINT16)_mm_extract_epi16(m_xv[ 5], 1), (UINT16)_mm_extract_epi16(m_xv[ 5], 0)); |
| 571 | break; |
| 572 | case RSP_V6: |
| 573 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 6], 7), (UINT16)_mm_extract_epi16(m_xv[ 6], 6), (UINT16)_mm_extract_epi16(m_xv[ 6], 5), (UINT16)_mm_extract_epi16(m_xv[ 6], 4), (UINT16)_mm_extract_epi16(m_xv[ 6], 3), (UINT16)_mm_extract_epi16(m_xv[ 6], 2), (UINT16)_mm_extract_epi16(m_xv[ 6], 1), (UINT16)_mm_extract_epi16(m_xv[ 6], 0)); |
| 574 | break; |
| 575 | case RSP_V7: |
| 576 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 7], 7), (UINT16)_mm_extract_epi16(m_xv[ 7], 6), (UINT16)_mm_extract_epi16(m_xv[ 7], 5), (UINT16)_mm_extract_epi16(m_xv[ 7], 4), (UINT16)_mm_extract_epi16(m_xv[ 7], 3), (UINT16)_mm_extract_epi16(m_xv[ 7], 2), (UINT16)_mm_extract_epi16(m_xv[ 7], 1), (UINT16)_mm_extract_epi16(m_xv[ 7], 0)); |
| 577 | break; |
| 578 | case RSP_V8: |
| 579 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 8], 7), (UINT16)_mm_extract_epi16(m_xv[ 8], 6), (UINT16)_mm_extract_epi16(m_xv[ 8], 5), (UINT16)_mm_extract_epi16(m_xv[ 8], 4), (UINT16)_mm_extract_epi16(m_xv[ 8], 3), (UINT16)_mm_extract_epi16(m_xv[ 8], 2), (UINT16)_mm_extract_epi16(m_xv[ 8], 1), (UINT16)_mm_extract_epi16(m_xv[ 8], 0)); |
| 580 | break; |
| 581 | case RSP_V9: |
| 582 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 9], 7), (UINT16)_mm_extract_epi16(m_xv[ 9], 6), (UINT16)_mm_extract_epi16(m_xv[ 9], 5), (UINT16)_mm_extract_epi16(m_xv[ 9], 4), (UINT16)_mm_extract_epi16(m_xv[ 9], 3), (UINT16)_mm_extract_epi16(m_xv[ 9], 2), (UINT16)_mm_extract_epi16(m_xv[ 9], 1), (UINT16)_mm_extract_epi16(m_xv[ 9], 0)); |
| 583 | break; |
| 584 | case RSP_V10: |
| 585 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[10], 7), (UINT16)_mm_extract_epi16(m_xv[10], 6), (UINT16)_mm_extract_epi16(m_xv[10], 5), (UINT16)_mm_extract_epi16(m_xv[10], 4), (UINT16)_mm_extract_epi16(m_xv[10], 3), (UINT16)_mm_extract_epi16(m_xv[10], 2), (UINT16)_mm_extract_epi16(m_xv[10], 1), (UINT16)_mm_extract_epi16(m_xv[10], 0)); |
| 586 | break; |
| 587 | case RSP_V11: |
| 588 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[11], 7), (UINT16)_mm_extract_epi16(m_xv[11], 6), (UINT16)_mm_extract_epi16(m_xv[11], 5), (UINT16)_mm_extract_epi16(m_xv[11], 4), (UINT16)_mm_extract_epi16(m_xv[11], 3), (UINT16)_mm_extract_epi16(m_xv[11], 2), (UINT16)_mm_extract_epi16(m_xv[11], 1), (UINT16)_mm_extract_epi16(m_xv[11], 0)); |
| 589 | break; |
| 590 | case RSP_V12: |
| 591 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[12], 7), (UINT16)_mm_extract_epi16(m_xv[12], 6), (UINT16)_mm_extract_epi16(m_xv[12], 5), (UINT16)_mm_extract_epi16(m_xv[12], 4), (UINT16)_mm_extract_epi16(m_xv[12], 3), (UINT16)_mm_extract_epi16(m_xv[12], 2), (UINT16)_mm_extract_epi16(m_xv[12], 1), (UINT16)_mm_extract_epi16(m_xv[12], 0)); |
| 592 | break; |
| 593 | case RSP_V13: |
| 594 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[13], 7), (UINT16)_mm_extract_epi16(m_xv[13], 6), (UINT16)_mm_extract_epi16(m_xv[13], 5), (UINT16)_mm_extract_epi16(m_xv[13], 4), (UINT16)_mm_extract_epi16(m_xv[13], 3), (UINT16)_mm_extract_epi16(m_xv[13], 2), (UINT16)_mm_extract_epi16(m_xv[13], 1), (UINT16)_mm_extract_epi16(m_xv[13], 0)); |
| 595 | break; |
| 596 | case RSP_V14: |
| 597 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[14], 7), (UINT16)_mm_extract_epi16(m_xv[14], 6), (UINT16)_mm_extract_epi16(m_xv[14], 5), (UINT16)_mm_extract_epi16(m_xv[14], 4), (UINT16)_mm_extract_epi16(m_xv[14], 3), (UINT16)_mm_extract_epi16(m_xv[14], 2), (UINT16)_mm_extract_epi16(m_xv[14], 1), (UINT16)_mm_extract_epi16(m_xv[14], 0)); |
| 598 | break; |
| 599 | case RSP_V15: |
| 600 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[15], 7), (UINT16)_mm_extract_epi16(m_xv[15], 6), (UINT16)_mm_extract_epi16(m_xv[15], 5), (UINT16)_mm_extract_epi16(m_xv[15], 4), (UINT16)_mm_extract_epi16(m_xv[15], 3), (UINT16)_mm_extract_epi16(m_xv[15], 2), (UINT16)_mm_extract_epi16(m_xv[15], 1), (UINT16)_mm_extract_epi16(m_xv[15], 0)); |
| 601 | break; |
| 602 | case RSP_V16: |
| 603 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[16], 7), (UINT16)_mm_extract_epi16(m_xv[16], 6), (UINT16)_mm_extract_epi16(m_xv[16], 5), (UINT16)_mm_extract_epi16(m_xv[16], 4), (UINT16)_mm_extract_epi16(m_xv[16], 3), (UINT16)_mm_extract_epi16(m_xv[16], 2), (UINT16)_mm_extract_epi16(m_xv[16], 1), (UINT16)_mm_extract_epi16(m_xv[16], 0)); |
| 604 | break; |
| 605 | case RSP_V17: |
| 606 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[17], 7), (UINT16)_mm_extract_epi16(m_xv[17], 6), (UINT16)_mm_extract_epi16(m_xv[17], 5), (UINT16)_mm_extract_epi16(m_xv[17], 4), (UINT16)_mm_extract_epi16(m_xv[17], 3), (UINT16)_mm_extract_epi16(m_xv[17], 2), (UINT16)_mm_extract_epi16(m_xv[17], 1), (UINT16)_mm_extract_epi16(m_xv[17], 0)); |
| 607 | break; |
| 608 | case RSP_V18: |
| 609 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[18], 7), (UINT16)_mm_extract_epi16(m_xv[18], 6), (UINT16)_mm_extract_epi16(m_xv[18], 5), (UINT16)_mm_extract_epi16(m_xv[18], 4), (UINT16)_mm_extract_epi16(m_xv[18], 3), (UINT16)_mm_extract_epi16(m_xv[18], 2), (UINT16)_mm_extract_epi16(m_xv[18], 1), (UINT16)_mm_extract_epi16(m_xv[18], 0)); |
| 610 | break; |
| 611 | case RSP_V19: |
| 612 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[19], 7), (UINT16)_mm_extract_epi16(m_xv[19], 6), (UINT16)_mm_extract_epi16(m_xv[19], 5), (UINT16)_mm_extract_epi16(m_xv[19], 4), (UINT16)_mm_extract_epi16(m_xv[19], 3), (UINT16)_mm_extract_epi16(m_xv[19], 2), (UINT16)_mm_extract_epi16(m_xv[19], 1), (UINT16)_mm_extract_epi16(m_xv[19], 0)); |
| 613 | break; |
| 614 | case RSP_V20: |
| 615 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[20], 7), (UINT16)_mm_extract_epi16(m_xv[20], 6), (UINT16)_mm_extract_epi16(m_xv[20], 5), (UINT16)_mm_extract_epi16(m_xv[20], 4), (UINT16)_mm_extract_epi16(m_xv[20], 3), (UINT16)_mm_extract_epi16(m_xv[20], 2), (UINT16)_mm_extract_epi16(m_xv[20], 1), (UINT16)_mm_extract_epi16(m_xv[20], 0)); |
| 616 | break; |
| 617 | case RSP_V21: |
| 618 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[21], 7), (UINT16)_mm_extract_epi16(m_xv[21], 6), (UINT16)_mm_extract_epi16(m_xv[21], 5), (UINT16)_mm_extract_epi16(m_xv[21], 4), (UINT16)_mm_extract_epi16(m_xv[21], 3), (UINT16)_mm_extract_epi16(m_xv[21], 2), (UINT16)_mm_extract_epi16(m_xv[21], 1), (UINT16)_mm_extract_epi16(m_xv[21], 0)); |
| 619 | break; |
| 620 | case RSP_V22: |
| 621 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[22], 7), (UINT16)_mm_extract_epi16(m_xv[22], 6), (UINT16)_mm_extract_epi16(m_xv[22], 5), (UINT16)_mm_extract_epi16(m_xv[22], 4), (UINT16)_mm_extract_epi16(m_xv[22], 3), (UINT16)_mm_extract_epi16(m_xv[22], 2), (UINT16)_mm_extract_epi16(m_xv[22], 1), (UINT16)_mm_extract_epi16(m_xv[22], 0)); |
| 622 | break; |
| 623 | case RSP_V23: |
| 624 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[23], 7), (UINT16)_mm_extract_epi16(m_xv[23], 6), (UINT16)_mm_extract_epi16(m_xv[23], 5), (UINT16)_mm_extract_epi16(m_xv[23], 4), (UINT16)_mm_extract_epi16(m_xv[23], 3), (UINT16)_mm_extract_epi16(m_xv[23], 2), (UINT16)_mm_extract_epi16(m_xv[23], 1), (UINT16)_mm_extract_epi16(m_xv[23], 0)); |
| 625 | break; |
| 626 | case RSP_V24: |
| 627 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[24], 7), (UINT16)_mm_extract_epi16(m_xv[24], 6), (UINT16)_mm_extract_epi16(m_xv[24], 5), (UINT16)_mm_extract_epi16(m_xv[24], 4), (UINT16)_mm_extract_epi16(m_xv[24], 3), (UINT16)_mm_extract_epi16(m_xv[24], 2), (UINT16)_mm_extract_epi16(m_xv[24], 1), (UINT16)_mm_extract_epi16(m_xv[24], 0)); |
| 628 | break; |
| 629 | case RSP_V25: |
| 630 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[25], 7), (UINT16)_mm_extract_epi16(m_xv[25], 6), (UINT16)_mm_extract_epi16(m_xv[25], 5), (UINT16)_mm_extract_epi16(m_xv[25], 4), (UINT16)_mm_extract_epi16(m_xv[25], 3), (UINT16)_mm_extract_epi16(m_xv[25], 2), (UINT16)_mm_extract_epi16(m_xv[25], 1), (UINT16)_mm_extract_epi16(m_xv[25], 0)); |
| 631 | break; |
| 632 | case RSP_V26: |
| 633 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[26], 7), (UINT16)_mm_extract_epi16(m_xv[26], 6), (UINT16)_mm_extract_epi16(m_xv[26], 5), (UINT16)_mm_extract_epi16(m_xv[26], 4), (UINT16)_mm_extract_epi16(m_xv[26], 3), (UINT16)_mm_extract_epi16(m_xv[26], 2), (UINT16)_mm_extract_epi16(m_xv[26], 1), (UINT16)_mm_extract_epi16(m_xv[26], 0)); |
| 634 | break; |
| 635 | case RSP_V27: |
| 636 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[27], 7), (UINT16)_mm_extract_epi16(m_xv[27], 6), (UINT16)_mm_extract_epi16(m_xv[27], 5), (UINT16)_mm_extract_epi16(m_xv[27], 4), (UINT16)_mm_extract_epi16(m_xv[27], 3), (UINT16)_mm_extract_epi16(m_xv[27], 2), (UINT16)_mm_extract_epi16(m_xv[27], 1), (UINT16)_mm_extract_epi16(m_xv[27], 0)); |
| 637 | break; |
| 638 | case RSP_V28: |
| 639 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[28], 7), (UINT16)_mm_extract_epi16(m_xv[28], 6), (UINT16)_mm_extract_epi16(m_xv[28], 5), (UINT16)_mm_extract_epi16(m_xv[28], 4), (UINT16)_mm_extract_epi16(m_xv[28], 3), (UINT16)_mm_extract_epi16(m_xv[28], 2), (UINT16)_mm_extract_epi16(m_xv[28], 1), (UINT16)_mm_extract_epi16(m_xv[28], 0)); |
| 640 | break; |
| 641 | case RSP_V29: |
| 642 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[29], 7), (UINT16)_mm_extract_epi16(m_xv[29], 6), (UINT16)_mm_extract_epi16(m_xv[29], 5), (UINT16)_mm_extract_epi16(m_xv[29], 4), (UINT16)_mm_extract_epi16(m_xv[29], 3), (UINT16)_mm_extract_epi16(m_xv[29], 2), (UINT16)_mm_extract_epi16(m_xv[29], 1), (UINT16)_mm_extract_epi16(m_xv[29], 0)); |
| 643 | break; |
| 644 | case RSP_V30: |
| 645 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[30], 7), (UINT16)_mm_extract_epi16(m_xv[30], 6), (UINT16)_mm_extract_epi16(m_xv[30], 5), (UINT16)_mm_extract_epi16(m_xv[30], 4), (UINT16)_mm_extract_epi16(m_xv[30], 3), (UINT16)_mm_extract_epi16(m_xv[30], 2), (UINT16)_mm_extract_epi16(m_xv[30], 1), (UINT16)_mm_extract_epi16(m_xv[30], 0)); |
| 646 | break; |
| 647 | case RSP_V31: |
| 648 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[31], 7), (UINT16)_mm_extract_epi16(m_xv[31], 6), (UINT16)_mm_extract_epi16(m_xv[31], 5), (UINT16)_mm_extract_epi16(m_xv[31], 4), (UINT16)_mm_extract_epi16(m_xv[31], 3), (UINT16)_mm_extract_epi16(m_xv[31], 2), (UINT16)_mm_extract_epi16(m_xv[31], 1), (UINT16)_mm_extract_epi16(m_xv[31], 0)); |
| 649 | break; |
| 650 | #else |
| 651 | case RSP_V0: |
| 652 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 0, 0), (UINT16)VREG_S( 0, 1), (UINT16)VREG_S( 0, 2), (UINT16)VREG_S( 0, 3), (UINT16)VREG_S( 0, 4), (UINT16)VREG_S( 0, 5), (UINT16)VREG_S( 0, 6), (UINT16)VREG_S( 0, 7)); |
| 653 | break; |
| 654 | case RSP_V1: |
| 655 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 1, 0), (UINT16)VREG_S( 1, 1), (UINT16)VREG_S( 1, 2), (UINT16)VREG_S( 1, 3), (UINT16)VREG_S( 1, 4), (UINT16)VREG_S( 1, 5), (UINT16)VREG_S( 1, 6), (UINT16)VREG_S( 1, 7)); |
| 656 | break; |
| 657 | case RSP_V2: |
| 658 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 2, 0), (UINT16)VREG_S( 2, 1), (UINT16)VREG_S( 2, 2), (UINT16)VREG_S( 2, 3), (UINT16)VREG_S( 2, 4), (UINT16)VREG_S( 2, 5), (UINT16)VREG_S( 2, 6), (UINT16)VREG_S( 2, 7)); |
| 659 | break; |
| 660 | case RSP_V3: |
| 661 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 3, 0), (UINT16)VREG_S( 3, 1), (UINT16)VREG_S( 3, 2), (UINT16)VREG_S( 3, 3), (UINT16)VREG_S( 3, 4), (UINT16)VREG_S( 3, 5), (UINT16)VREG_S( 3, 6), (UINT16)VREG_S( 3, 7)); |
| 662 | break; |
| 663 | case RSP_V4: |
| 664 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 4, 0), (UINT16)VREG_S( 4, 1), (UINT16)VREG_S( 4, 2), (UINT16)VREG_S( 4, 3), (UINT16)VREG_S( 4, 4), (UINT16)VREG_S( 4, 5), (UINT16)VREG_S( 4, 6), (UINT16)VREG_S( 4, 7)); |
| 665 | break; |
| 666 | case RSP_V5: |
| 667 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 5, 0), (UINT16)VREG_S( 5, 1), (UINT16)VREG_S( 5, 2), (UINT16)VREG_S( 5, 3), (UINT16)VREG_S( 5, 4), (UINT16)VREG_S( 5, 5), (UINT16)VREG_S( 5, 6), (UINT16)VREG_S( 5, 7)); |
| 668 | break; |
| 669 | case RSP_V6: |
| 670 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 6, 0), (UINT16)VREG_S( 6, 1), (UINT16)VREG_S( 6, 2), (UINT16)VREG_S( 6, 3), (UINT16)VREG_S( 6, 4), (UINT16)VREG_S( 6, 5), (UINT16)VREG_S( 6, 6), (UINT16)VREG_S( 6, 7)); |
| 671 | break; |
| 672 | case RSP_V7: |
| 673 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 7, 0), (UINT16)VREG_S( 7, 1), (UINT16)VREG_S( 7, 2), (UINT16)VREG_S( 7, 3), (UINT16)VREG_S( 7, 4), (UINT16)VREG_S( 7, 5), (UINT16)VREG_S( 7, 6), (UINT16)VREG_S( 7, 7)); |
| 674 | break; |
| 675 | case RSP_V8: |
| 676 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 8, 0), (UINT16)VREG_S( 8, 1), (UINT16)VREG_S( 8, 2), (UINT16)VREG_S( 8, 3), (UINT16)VREG_S( 8, 4), (UINT16)VREG_S( 8, 5), (UINT16)VREG_S( 8, 6), (UINT16)VREG_S( 8, 7)); |
| 677 | break; |
| 678 | case RSP_V9: |
| 679 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 9, 0), (UINT16)VREG_S( 9, 1), (UINT16)VREG_S( 9, 2), (UINT16)VREG_S( 9, 3), (UINT16)VREG_S( 9, 4), (UINT16)VREG_S( 9, 5), (UINT16)VREG_S( 9, 6), (UINT16)VREG_S( 9, 7)); |
| 680 | break; |
| 681 | case RSP_V10: |
| 682 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(10, 0), (UINT16)VREG_S(10, 1), (UINT16)VREG_S(10, 2), (UINT16)VREG_S(10, 3), (UINT16)VREG_S(10, 4), (UINT16)VREG_S(10, 5), (UINT16)VREG_S(10, 6), (UINT16)VREG_S(10, 7)); |
| 683 | break; |
| 684 | case RSP_V11: |
| 685 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(11, 0), (UINT16)VREG_S(11, 1), (UINT16)VREG_S(11, 2), (UINT16)VREG_S(11, 3), (UINT16)VREG_S(11, 4), (UINT16)VREG_S(11, 5), (UINT16)VREG_S(11, 6), (UINT16)VREG_S(11, 7)); |
| 686 | break; |
| 687 | case RSP_V12: |
| 688 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(12, 0), (UINT16)VREG_S(12, 1), (UINT16)VREG_S(12, 2), (UINT16)VREG_S(12, 3), (UINT16)VREG_S(12, 4), (UINT16)VREG_S(12, 5), (UINT16)VREG_S(12, 6), (UINT16)VREG_S(12, 7)); |
| 689 | break; |
| 690 | case RSP_V13: |
| 691 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(13, 0), (UINT16)VREG_S(13, 1), (UINT16)VREG_S(13, 2), (UINT16)VREG_S(13, 3), (UINT16)VREG_S(13, 4), (UINT16)VREG_S(13, 5), (UINT16)VREG_S(13, 6), (UINT16)VREG_S(13, 7)); |
| 692 | break; |
| 693 | case RSP_V14: |
| 694 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(14, 0), (UINT16)VREG_S(14, 1), (UINT16)VREG_S(14, 2), (UINT16)VREG_S(14, 3), (UINT16)VREG_S(14, 4), (UINT16)VREG_S(14, 5), (UINT16)VREG_S(14, 6), (UINT16)VREG_S(14, 7)); |
| 695 | break; |
| 696 | case RSP_V15: |
| 697 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(15, 0), (UINT16)VREG_S(15, 1), (UINT16)VREG_S(15, 2), (UINT16)VREG_S(15, 3), (UINT16)VREG_S(15, 4), (UINT16)VREG_S(15, 5), (UINT16)VREG_S(15, 6), (UINT16)VREG_S(15, 7)); |
| 698 | break; |
| 699 | case RSP_V16: |
| 700 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(16, 0), (UINT16)VREG_S(16, 1), (UINT16)VREG_S(16, 2), (UINT16)VREG_S(16, 3), (UINT16)VREG_S(16, 4), (UINT16)VREG_S(16, 5), (UINT16)VREG_S(16, 6), (UINT16)VREG_S(16, 7)); |
| 701 | break; |
| 702 | case RSP_V17: |
| 703 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(17, 0), (UINT16)VREG_S(17, 1), (UINT16)VREG_S(17, 2), (UINT16)VREG_S(17, 3), (UINT16)VREG_S(17, 4), (UINT16)VREG_S(17, 5), (UINT16)VREG_S(17, 6), (UINT16)VREG_S(17, 7)); |
| 704 | break; |
| 705 | case RSP_V18: |
| 706 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(18, 0), (UINT16)VREG_S(18, 1), (UINT16)VREG_S(18, 2), (UINT16)VREG_S(18, 3), (UINT16)VREG_S(18, 4), (UINT16)VREG_S(18, 5), (UINT16)VREG_S(18, 6), (UINT16)VREG_S(18, 7)); |
| 707 | break; |
| 708 | case RSP_V19: |
| 709 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(19, 0), (UINT16)VREG_S(19, 1), (UINT16)VREG_S(19, 2), (UINT16)VREG_S(19, 3), (UINT16)VREG_S(19, 4), (UINT16)VREG_S(19, 5), (UINT16)VREG_S(19, 6), (UINT16)VREG_S(19, 7)); |
| 710 | break; |
| 711 | case RSP_V20: |
| 712 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(20, 0), (UINT16)VREG_S(20, 1), (UINT16)VREG_S(20, 2), (UINT16)VREG_S(20, 3), (UINT16)VREG_S(20, 4), (UINT16)VREG_S(20, 5), (UINT16)VREG_S(20, 6), (UINT16)VREG_S(20, 7)); |
| 713 | break; |
| 714 | case RSP_V21: |
| 715 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(21, 0), (UINT16)VREG_S(21, 1), (UINT16)VREG_S(21, 2), (UINT16)VREG_S(21, 3), (UINT16)VREG_S(21, 4), (UINT16)VREG_S(21, 5), (UINT16)VREG_S(21, 6), (UINT16)VREG_S(21, 7)); |
| 716 | break; |
| 717 | case RSP_V22: |
| 718 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(22, 0), (UINT16)VREG_S(22, 1), (UINT16)VREG_S(22, 2), (UINT16)VREG_S(22, 3), (UINT16)VREG_S(22, 4), (UINT16)VREG_S(22, 5), (UINT16)VREG_S(22, 6), (UINT16)VREG_S(22, 7)); |
| 719 | break; |
| 720 | case RSP_V23: |
| 721 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(23, 0), (UINT16)VREG_S(23, 1), (UINT16)VREG_S(23, 2), (UINT16)VREG_S(23, 3), (UINT16)VREG_S(23, 4), (UINT16)VREG_S(23, 5), (UINT16)VREG_S(23, 6), (UINT16)VREG_S(23, 7)); |
| 722 | break; |
| 723 | case RSP_V24: |
| 724 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(24, 0), (UINT16)VREG_S(24, 1), (UINT16)VREG_S(24, 2), (UINT16)VREG_S(24, 3), (UINT16)VREG_S(24, 4), (UINT16)VREG_S(24, 5), (UINT16)VREG_S(24, 6), (UINT16)VREG_S(24, 7)); |
| 725 | break; |
| 726 | case RSP_V25: |
| 727 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(25, 0), (UINT16)VREG_S(25, 1), (UINT16)VREG_S(25, 2), (UINT16)VREG_S(25, 3), (UINT16)VREG_S(25, 4), (UINT16)VREG_S(25, 5), (UINT16)VREG_S(25, 6), (UINT16)VREG_S(25, 7)); |
| 728 | break; |
| 729 | case RSP_V26: |
| 730 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(26, 0), (UINT16)VREG_S(26, 1), (UINT16)VREG_S(26, 2), (UINT16)VREG_S(26, 3), (UINT16)VREG_S(26, 4), (UINT16)VREG_S(26, 5), (UINT16)VREG_S(26, 6), (UINT16)VREG_S(26, 7)); |
| 731 | break; |
| 732 | case RSP_V27: |
| 733 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(27, 0), (UINT16)VREG_S(27, 1), (UINT16)VREG_S(27, 2), (UINT16)VREG_S(27, 3), (UINT16)VREG_S(27, 4), (UINT16)VREG_S(27, 5), (UINT16)VREG_S(27, 6), (UINT16)VREG_S(27, 7)); |
| 734 | break; |
| 735 | case RSP_V28: |
| 736 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(28, 0), (UINT16)VREG_S(28, 1), (UINT16)VREG_S(28, 2), (UINT16)VREG_S(28, 3), (UINT16)VREG_S(28, 4), (UINT16)VREG_S(28, 5), (UINT16)VREG_S(28, 6), (UINT16)VREG_S(28, 7)); |
| 737 | break; |
| 738 | case RSP_V29: |
| 739 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(29, 0), (UINT16)VREG_S(29, 1), (UINT16)VREG_S(29, 2), (UINT16)VREG_S(29, 3), (UINT16)VREG_S(29, 4), (UINT16)VREG_S(29, 5), (UINT16)VREG_S(29, 6), (UINT16)VREG_S(29, 7)); |
| 740 | break; |
| 741 | case RSP_V30: |
| 742 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(30, 0), (UINT16)VREG_S(30, 1), (UINT16)VREG_S(30, 2), (UINT16)VREG_S(30, 3), (UINT16)VREG_S(30, 4), (UINT16)VREG_S(30, 5), (UINT16)VREG_S(30, 6), (UINT16)VREG_S(30, 7)); |
| 743 | break; |
| 744 | case RSP_V31: |
| 745 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(31, 0), (UINT16)VREG_S(31, 1), (UINT16)VREG_S(31, 2), (UINT16)VREG_S(31, 3), (UINT16)VREG_S(31, 4), (UINT16)VREG_S(31, 5), (UINT16)VREG_S(31, 6), (UINT16)VREG_S(31, 7)); |
| 746 | break; |
| 747 | #endif |
| 748 | |
| 498 | 749 | } |
| 499 | | else if (index == STATE_GENFLAGS) |
| 500 | | { |
| 501 | | string.printf("%s",""); |
| 502 | | } |
| 503 | 750 | } |
| 504 | 751 | |
| 505 | 752 | void rsp_device::device_stop() |
| r241959 | r241960 | |
| 548 | 795 | m_exec_output = NULL; |
| 549 | 796 | |
| 550 | 797 | /* clean up the DRC */ |
| 551 | | if (m_drcuml) |
| 798 | if ( m_drcuml ) |
| 552 | 799 | { |
| 553 | 800 | auto_free(machine(), m_drcuml); |
| 554 | 801 | } |
| 555 | | if (m_drcfe) |
| 802 | if (m_drcfe ) |
| 556 | 803 | { |
| 557 | 804 | auto_free(machine(), m_drcfe); |
| 558 | 805 | } |
| 806 | } |
| 559 | 807 | |
| 560 | | if (m_cop2) |
| 808 | void rsp_device::device_reset() |
| 809 | { |
| 810 | m_nextpc = ~0; |
| 811 | } |
| 812 | |
| 813 | void rsp_device::handle_lwc2(UINT32 op) |
| 814 | { |
| 815 | int i, end; |
| 816 | UINT32 ea; |
| 817 | int dest = (op >> 16) & 0x1f; |
| 818 | int base = (op >> 21) & 0x1f; |
| 819 | int index = (op >> 7) & 0xf; |
| 820 | int offset = (op & 0x7f); |
| 821 | if (offset & 0x40) |
| 822 | offset |= 0xffffffc0; |
| 823 | |
| 824 | switch ((op >> 11) & 0x1f) |
| 561 | 825 | { |
| 562 | | auto_free(machine(), m_cop2); |
| 826 | case 0x00: /* LBV */ |
| 827 | { |
| 828 | // 31 25 20 15 10 6 0 |
| 829 | // -------------------------------------------------- |
| 830 | // | 110010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 831 | // -------------------------------------------------- |
| 832 | // |
| 833 | // Load 1 byte to vector byte index |
| 834 | |
| 835 | ea = (base) ? m_rsp_state->r[base] + offset : offset; |
| 836 | VREG_B(dest, index) = READ8(ea); |
| 837 | break; |
| 838 | } |
| 839 | case 0x01: /* LSV */ |
| 840 | { |
| 841 | // 31 25 20 15 10 6 0 |
| 842 | // -------------------------------------------------- |
| 843 | // | 110010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 844 | // -------------------------------------------------- |
| 845 | // |
| 846 | // Loads 2 bytes starting from vector byte index |
| 847 | |
| 848 | ea = (base) ? m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 849 | |
| 850 | end = index + 2; |
| 851 | |
| 852 | for (i=index; i < end; i++) |
| 853 | { |
| 854 | VREG_B(dest, i) = READ8(ea); |
| 855 | ea++; |
| 856 | } |
| 857 | break; |
| 858 | } |
| 859 | case 0x02: /* LLV */ |
| 860 | { |
| 861 | // 31 25 20 15 10 6 0 |
| 862 | // -------------------------------------------------- |
| 863 | // | 110010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 864 | // -------------------------------------------------- |
| 865 | // |
| 866 | // Loads 4 bytes starting from vector byte index |
| 867 | |
| 868 | ea = (base) ? m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 869 | |
| 870 | end = index + 4; |
| 871 | |
| 872 | for (i=index; i < end; i++) |
| 873 | { |
| 874 | VREG_B(dest, i) = READ8(ea); |
| 875 | ea++; |
| 876 | } |
| 877 | break; |
| 878 | } |
| 879 | case 0x03: /* LDV */ |
| 880 | { |
| 881 | // 31 25 20 15 10 6 0 |
| 882 | // -------------------------------------------------- |
| 883 | // | 110010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 884 | // -------------------------------------------------- |
| 885 | // |
| 886 | // Loads 8 bytes starting from vector byte index |
| 887 | |
| 888 | ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 889 | |
| 890 | end = index + 8; |
| 891 | |
| 892 | for (i=index; i < end; i++) |
| 893 | { |
| 894 | VREG_B(dest, i) = READ8(ea); |
| 895 | ea++; |
| 896 | } |
| 897 | break; |
| 898 | } |
| 899 | case 0x04: /* LQV */ |
| 900 | { |
| 901 | // 31 25 20 15 10 6 0 |
| 902 | // -------------------------------------------------- |
| 903 | // | 110010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 904 | // -------------------------------------------------- |
| 905 | // |
| 906 | // Loads up to 16 bytes starting from vector byte index |
| 907 | |
| 908 | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 909 | |
| 910 | end = index + (16 - (ea & 0xf)); |
| 911 | if (end > 16) end = 16; |
| 912 | |
| 913 | for (i=index; i < end; i++) |
| 914 | { |
| 915 | VREG_B(dest, i) = READ8(ea); |
| 916 | ea++; |
| 917 | } |
| 918 | break; |
| 919 | } |
| 920 | case 0x05: /* LRV */ |
| 921 | { |
| 922 | // 31 25 20 15 10 6 0 |
| 923 | // -------------------------------------------------- |
| 924 | // | 110010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 925 | // -------------------------------------------------- |
| 926 | // |
| 927 | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 928 | |
| 929 | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 930 | |
| 931 | index = 16 - ((ea & 0xf) - index); |
| 932 | end = 16; |
| 933 | ea &= ~0xf; |
| 934 | |
| 935 | for (i=index; i < end; i++) |
| 936 | { |
| 937 | VREG_B(dest, i) = READ8(ea); |
| 938 | ea++; |
| 939 | } |
| 940 | break; |
| 941 | } |
| 942 | case 0x06: /* LPV */ |
| 943 | { |
| 944 | // 31 25 20 15 10 6 0 |
| 945 | // -------------------------------------------------- |
| 946 | // | 110010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 947 | // -------------------------------------------------- |
| 948 | // |
| 949 | // Loads a byte as the upper 8 bits of each element |
| 950 | |
| 951 | ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 952 | |
| 953 | for (i=0; i < 8; i++) |
| 954 | { |
| 955 | VREG_S(dest, i) = READ8(ea + (((16-index) + i) & 0xf)) << 8; |
| 956 | } |
| 957 | break; |
| 958 | } |
| 959 | case 0x07: /* LUV */ |
| 960 | { |
| 961 | // 31 25 20 15 10 6 0 |
| 962 | // -------------------------------------------------- |
| 963 | // | 110010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 964 | // -------------------------------------------------- |
| 965 | // |
| 966 | // Loads a byte as the bits 14-7 of each element |
| 967 | |
| 968 | ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 969 | |
| 970 | for (i=0; i < 8; i++) |
| 971 | { |
| 972 | VREG_S(dest, i) = READ8(ea + (((16-index) + i) & 0xf)) << 7; |
| 973 | } |
| 974 | break; |
| 975 | } |
| 976 | case 0x08: /* LHV */ |
| 977 | { |
| 978 | // 31 25 20 15 10 6 0 |
| 979 | // -------------------------------------------------- |
| 980 | // | 110010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 981 | // -------------------------------------------------- |
| 982 | // |
| 983 | // Loads a byte as the bits 14-7 of each element, with 2-byte stride |
| 984 | |
| 985 | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 986 | |
| 987 | for (i=0; i < 8; i++) |
| 988 | { |
| 989 | VREG_S(dest, i) = READ8(ea + (((16-index) + (i<<1)) & 0xf)) << 7; |
| 990 | } |
| 991 | break; |
| 992 | } |
| 993 | case 0x09: /* LFV */ |
| 994 | { |
| 995 | // 31 25 20 15 10 6 0 |
| 996 | // -------------------------------------------------- |
| 997 | // | 110010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 998 | // -------------------------------------------------- |
| 999 | // |
| 1000 | // Loads a byte as the bits 14-7 of upper or lower quad, with 4-byte stride |
| 1001 | |
| 1002 | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1003 | |
| 1004 | // not sure what happens if 16-byte boundary is crossed... |
| 1005 | |
| 1006 | end = (index >> 1) + 4; |
| 1007 | |
| 1008 | for (i=index >> 1; i < end; i++) |
| 1009 | { |
| 1010 | VREG_S(dest, i) = READ8(ea) << 7; |
| 1011 | ea += 4; |
| 1012 | } |
| 1013 | break; |
| 1014 | } |
| 1015 | case 0x0a: /* LWV */ |
| 1016 | { |
| 1017 | // 31 25 20 15 10 6 0 |
| 1018 | // -------------------------------------------------- |
| 1019 | // | 110010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 1020 | // -------------------------------------------------- |
| 1021 | // |
| 1022 | // Loads the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 1023 | // after byte index 15 |
| 1024 | |
| 1025 | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1026 | |
| 1027 | // not sure what happens if 16-byte boundary is crossed... |
| 1028 | if ((ea & 0xf) > 0) fatalerror("RSP: LWV: 16-byte boundary crossing at %08X, recheck this!\n", m_ppc); |
| 1029 | |
| 1030 | end = (16 - index) + 16; |
| 1031 | |
| 1032 | for (i=(16 - index); i < end; i++) |
| 1033 | { |
| 1034 | VREG_B(dest, i & 0xf) = READ8(ea); |
| 1035 | ea += 4; |
| 1036 | } |
| 1037 | break; |
| 1038 | } |
| 1039 | case 0x0b: /* LTV */ |
| 1040 | { |
| 1041 | // 31 25 20 15 10 6 0 |
| 1042 | // -------------------------------------------------- |
| 1043 | // | 110010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 1044 | // -------------------------------------------------- |
| 1045 | // |
| 1046 | // Loads one element to maximum of 8 vectors, while incrementing element index |
| 1047 | |
| 1048 | // FIXME: has a small problem with odd indices |
| 1049 | |
| 1050 | int element; |
| 1051 | int vs = dest; |
| 1052 | int ve = dest + 8; |
| 1053 | if (ve > 32) |
| 1054 | ve = 32; |
| 1055 | |
| 1056 | element = 7 - (index >> 1); |
| 1057 | |
| 1058 | if (index & 1) fatalerror("RSP: LTV: index = %d\n", index); |
| 1059 | |
| 1060 | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1061 | |
| 1062 | ea = ((ea + 8) & ~0xf) + (index & 1); |
| 1063 | for (i=vs; i < ve; i++) |
| 1064 | { |
| 1065 | element = ((8 - (index >> 1) + (i-vs)) << 1); |
| 1066 | VREG_B(i, (element & 0xf)) = READ8(ea); |
| 1067 | VREG_B(i, ((element + 1) & 0xf)) = READ8(ea + 1); |
| 1068 | |
| 1069 | ea += 2; |
| 1070 | } |
| 1071 | break; |
| 1072 | } |
| 1073 | |
| 1074 | default: |
| 1075 | { |
| 1076 | unimplemented_opcode(op); |
| 1077 | break; |
| 1078 | } |
| 563 | 1079 | } |
| 564 | 1080 | } |
| 565 | 1081 | |
| 566 | | void rsp_device::device_reset() |
| 1082 | void rsp_device::handle_swc2(UINT32 op) |
| 567 | 1083 | { |
| 568 | | m_nextpc = ~0; |
| 1084 | int i, end; |
| 1085 | int eaoffset; |
| 1086 | UINT32 ea; |
| 1087 | int dest = (op >> 16) & 0x1f; |
| 1088 | int base = (op >> 21) & 0x1f; |
| 1089 | int index = (op >> 7) & 0xf; |
| 1090 | int offset = (op & 0x7f); |
| 1091 | if (offset & 0x40) |
| 1092 | offset |= 0xffffffc0; |
| 1093 | |
| 1094 | switch ((op >> 11) & 0x1f) |
| 1095 | { |
| 1096 | case 0x00: /* SBV */ |
| 1097 | { |
| 1098 | // 31 25 20 15 10 6 0 |
| 1099 | // -------------------------------------------------- |
| 1100 | // | 111010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 1101 | // -------------------------------------------------- |
| 1102 | // |
| 1103 | // Stores 1 byte from vector byte index |
| 1104 | |
| 1105 | ea = (base) ? m_rsp_state->r[base] + offset : offset; |
| 1106 | WRITE8(ea, VREG_B(dest, index)); |
| 1107 | break; |
| 1108 | } |
| 1109 | case 0x01: /* SSV */ |
| 1110 | { |
| 1111 | // 31 25 20 15 10 6 0 |
| 1112 | // -------------------------------------------------- |
| 1113 | // | 111010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 1114 | // -------------------------------------------------- |
| 1115 | // |
| 1116 | // Stores 2 bytes starting from vector byte index |
| 1117 | |
| 1118 | ea = (base) ? m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 1119 | |
| 1120 | end = index + 2; |
| 1121 | |
| 1122 | for (i=index; i < end; i++) |
| 1123 | { |
| 1124 | WRITE8(ea, VREG_B(dest, i)); |
| 1125 | ea++; |
| 1126 | } |
| 1127 | break; |
| 1128 | } |
| 1129 | case 0x02: /* SLV */ |
| 1130 | { |
| 1131 | // 31 25 20 15 10 6 0 |
| 1132 | // -------------------------------------------------- |
| 1133 | // | 111010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 1134 | // -------------------------------------------------- |
| 1135 | // |
| 1136 | // Stores 4 bytes starting from vector byte index |
| 1137 | |
| 1138 | ea = (base) ? m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 1139 | |
| 1140 | end = index + 4; |
| 1141 | |
| 1142 | for (i=index; i < end; i++) |
| 1143 | { |
| 1144 | WRITE8(ea, VREG_B(dest, i)); |
| 1145 | ea++; |
| 1146 | } |
| 1147 | break; |
| 1148 | } |
| 1149 | case 0x03: /* SDV */ |
| 1150 | { |
| 1151 | // 31 25 20 15 10 6 0 |
| 1152 | // -------------------------------------------------- |
| 1153 | // | 111010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 1154 | // -------------------------------------------------- |
| 1155 | // |
| 1156 | // Stores 8 bytes starting from vector byte index |
| 1157 | |
| 1158 | ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1159 | |
| 1160 | end = index + 8; |
| 1161 | |
| 1162 | for (i=index; i < end; i++) |
| 1163 | { |
| 1164 | WRITE8(ea, VREG_B(dest, i)); |
| 1165 | ea++; |
| 1166 | } |
| 1167 | break; |
| 1168 | } |
| 1169 | case 0x04: /* SQV */ |
| 1170 | { |
| 1171 | // 31 25 20 15 10 6 0 |
| 1172 | // -------------------------------------------------- |
| 1173 | // | 111010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 1174 | // -------------------------------------------------- |
| 1175 | // |
| 1176 | // Stores up to 16 bytes starting from vector byte index until 16-byte boundary |
| 1177 | |
| 1178 | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1179 | |
| 1180 | end = index + (16 - (ea & 0xf)); |
| 1181 | |
| 1182 | for (i=index; i < end; i++) |
| 1183 | { |
| 1184 | WRITE8(ea, VREG_B(dest, i & 0xf)); |
| 1185 | ea++; |
| 1186 | } |
| 1187 | break; |
| 1188 | } |
| 1189 | case 0x05: /* SRV */ |
| 1190 | { |
| 1191 | // 31 25 20 15 10 6 0 |
| 1192 | // -------------------------------------------------- |
| 1193 | // | 111010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 1194 | // -------------------------------------------------- |
| 1195 | // |
| 1196 | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 1197 | |
| 1198 | int o; |
| 1199 | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1200 | |
| 1201 | end = index + (ea & 0xf); |
| 1202 | o = (16 - (ea & 0xf)) & 0xf; |
| 1203 | ea &= ~0xf; |
| 1204 | |
| 1205 | for (i=index; i < end; i++) |
| 1206 | { |
| 1207 | WRITE8(ea, VREG_B(dest, ((i + o) & 0xf))); |
| 1208 | ea++; |
| 1209 | } |
| 1210 | break; |
| 1211 | } |
| 1212 | case 0x06: /* SPV */ |
| 1213 | { |
| 1214 | // 31 25 20 15 10 6 0 |
| 1215 | // -------------------------------------------------- |
| 1216 | // | 111010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 1217 | // -------------------------------------------------- |
| 1218 | // |
| 1219 | // Stores upper 8 bits of each element |
| 1220 | |
| 1221 | ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1222 | end = index + 8; |
| 1223 | |
| 1224 | for (i=index; i < end; i++) |
| 1225 | { |
| 1226 | if ((i & 0xf) < 8) |
| 1227 | { |
| 1228 | WRITE8(ea, VREG_B(dest, ((i & 0xf) << 1))); |
| 1229 | } |
| 1230 | else |
| 1231 | { |
| 1232 | WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); |
| 1233 | } |
| 1234 | ea++; |
| 1235 | } |
| 1236 | break; |
| 1237 | } |
| 1238 | case 0x07: /* SUV */ |
| 1239 | { |
| 1240 | // 31 25 20 15 10 6 0 |
| 1241 | // -------------------------------------------------- |
| 1242 | // | 111010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 1243 | // -------------------------------------------------- |
| 1244 | // |
| 1245 | // Stores bits 14-7 of each element |
| 1246 | |
| 1247 | ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1248 | end = index + 8; |
| 1249 | |
| 1250 | for (i=index; i < end; i++) |
| 1251 | { |
| 1252 | if ((i & 0xf) < 8) |
| 1253 | { |
| 1254 | WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); |
| 1255 | } |
| 1256 | else |
| 1257 | { |
| 1258 | WRITE8(ea, VREG_B(dest, ((i & 0x7) << 1))); |
| 1259 | } |
| 1260 | ea++; |
| 1261 | } |
| 1262 | break; |
| 1263 | } |
| 1264 | case 0x08: /* SHV */ |
| 1265 | { |
| 1266 | // 31 25 20 15 10 6 0 |
| 1267 | // -------------------------------------------------- |
| 1268 | // | 111010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 1269 | // -------------------------------------------------- |
| 1270 | // |
| 1271 | // Stores bits 14-7 of each element, with 2-byte stride |
| 1272 | |
| 1273 | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1274 | |
| 1275 | for (i=0; i < 8; i++) |
| 1276 | { |
| 1277 | UINT8 d = ((VREG_B(dest, ((index + (i << 1) + 0) & 0xf))) << 1) | |
| 1278 | ((VREG_B(dest, ((index + (i << 1) + 1) & 0xf))) >> 7); |
| 1279 | |
| 1280 | WRITE8(ea, d); |
| 1281 | ea += 2; |
| 1282 | } |
| 1283 | break; |
| 1284 | } |
| 1285 | case 0x09: /* SFV */ |
| 1286 | { |
| 1287 | // 31 25 20 15 10 6 0 |
| 1288 | // -------------------------------------------------- |
| 1289 | // | 111010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 1290 | // -------------------------------------------------- |
| 1291 | // |
| 1292 | // Stores bits 14-7 of upper or lower quad, with 4-byte stride |
| 1293 | |
| 1294 | // FIXME: only works for index 0 and index 8 |
| 1295 | |
| 1296 | if (index & 0x7) osd_printf_debug("RSP: SFV: index = %d at %08X\n", index, m_ppc); |
| 1297 | |
| 1298 | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1299 | |
| 1300 | eaoffset = ea & 0xf; |
| 1301 | ea &= ~0xf; |
| 1302 | |
| 1303 | end = (index >> 1) + 4; |
| 1304 | |
| 1305 | for (i=index >> 1; i < end; i++) |
| 1306 | { |
| 1307 | WRITE8(ea + (eaoffset & 0xf), VREG_S(dest, i) >> 7); |
| 1308 | eaoffset += 4; |
| 1309 | } |
| 1310 | break; |
| 1311 | } |
| 1312 | case 0x0a: /* SWV */ |
| 1313 | { |
| 1314 | // 31 25 20 15 10 6 0 |
| 1315 | // -------------------------------------------------- |
| 1316 | // | 111010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 1317 | // -------------------------------------------------- |
| 1318 | // |
| 1319 | // Stores the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 1320 | // after byte index 15 |
| 1321 | |
| 1322 | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1323 | |
| 1324 | eaoffset = ea & 0xf; |
| 1325 | ea &= ~0xf; |
| 1326 | |
| 1327 | end = index + 16; |
| 1328 | |
| 1329 | for (i=index; i < end; i++) |
| 1330 | { |
| 1331 | WRITE8(ea + (eaoffset & 0xf), VREG_B(dest, i & 0xf)); |
| 1332 | eaoffset++; |
| 1333 | } |
| 1334 | break; |
| 1335 | } |
| 1336 | case 0x0b: /* STV */ |
| 1337 | { |
| 1338 | // 31 25 20 15 10 6 0 |
| 1339 | // -------------------------------------------------- |
| 1340 | // | 111010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 1341 | // -------------------------------------------------- |
| 1342 | // |
| 1343 | // Stores one element from maximum of 8 vectors, while incrementing element index |
| 1344 | |
| 1345 | int element; |
| 1346 | int vs = dest; |
| 1347 | int ve = dest + 8; |
| 1348 | if (ve > 32) |
| 1349 | ve = 32; |
| 1350 | |
| 1351 | element = 8 - (index >> 1); |
| 1352 | |
| 1353 | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1354 | |
| 1355 | eaoffset = (ea & 0xf) + (element * 2); |
| 1356 | ea &= ~0xf; |
| 1357 | |
| 1358 | for (i=vs; i < ve; i++) |
| 1359 | { |
| 1360 | WRITE16(ea + (eaoffset & 0xf), VREG_S(i, element & 0x7)); |
| 1361 | eaoffset += 2; |
| 1362 | element++; |
| 1363 | } |
| 1364 | break; |
| 1365 | } |
| 1366 | |
| 1367 | default: |
| 1368 | { |
| 1369 | unimplemented_opcode(op); |
| 1370 | break; |
| 1371 | } |
| 1372 | } |
| 569 | 1373 | } |
| 570 | 1374 | |
| 1375 | inline UINT16 rsp_device::SATURATE_ACCUM(int accum, int slice, UINT16 negative, UINT16 positive) |
| 1376 | { |
| 1377 | if ((INT16)ACCUM_H(accum) < 0) |
| 1378 | { |
| 1379 | if ((UINT16)(ACCUM_H(accum)) != 0xffff) |
| 1380 | { |
| 1381 | return negative; |
| 1382 | } |
| 1383 | else |
| 1384 | { |
| 1385 | if ((INT16)ACCUM_M(accum) >= 0) |
| 1386 | { |
| 1387 | return negative; |
| 1388 | } |
| 1389 | else |
| 1390 | { |
| 1391 | if (slice == 0) |
| 1392 | { |
| 1393 | return ACCUM_L(accum); |
| 1394 | } |
| 1395 | else if (slice == 1) |
| 1396 | { |
| 1397 | return ACCUM_M(accum); |
| 1398 | } |
| 1399 | } |
| 1400 | } |
| 1401 | } |
| 1402 | else |
| 1403 | { |
| 1404 | if ((UINT16)(ACCUM_H(accum)) != 0) |
| 1405 | { |
| 1406 | return positive; |
| 1407 | } |
| 1408 | else |
| 1409 | { |
| 1410 | if ((INT16)ACCUM_M(accum) < 0) |
| 1411 | { |
| 1412 | return positive; |
| 1413 | } |
| 1414 | else |
| 1415 | { |
| 1416 | if (slice == 0) |
| 1417 | { |
| 1418 | return ACCUM_L(accum); |
| 1419 | } |
| 1420 | else |
| 1421 | { |
| 1422 | return ACCUM_M(accum); |
| 1423 | } |
| 1424 | } |
| 1425 | } |
| 1426 | } |
| 1427 | |
| 1428 | return 0; |
| 1429 | } |
| 1430 | |
| 1431 | inline UINT16 rsp_device::SATURATE_ACCUM1(int accum, UINT16 negative, UINT16 positive) |
| 1432 | { |
| 1433 | if ((INT16)ACCUM_H(accum) < 0) |
| 1434 | { |
| 1435 | if ((UINT16)(ACCUM_H(accum)) != 0xffff) |
| 1436 | { |
| 1437 | return negative; |
| 1438 | } |
| 1439 | else |
| 1440 | { |
| 1441 | if ((INT16)ACCUM_M(accum) >= 0) |
| 1442 | { |
| 1443 | return negative; |
| 1444 | } |
| 1445 | else |
| 1446 | { |
| 1447 | return ACCUM_M(accum); |
| 1448 | } |
| 1449 | } |
| 1450 | } |
| 1451 | else |
| 1452 | { |
| 1453 | if ((UINT16)(ACCUM_H(accum)) != 0) |
| 1454 | { |
| 1455 | return positive; |
| 1456 | } |
| 1457 | else |
| 1458 | { |
| 1459 | if ((INT16)ACCUM_M(accum) < 0) |
| 1460 | { |
| 1461 | return positive; |
| 1462 | } |
| 1463 | else |
| 1464 | { |
| 1465 | return ACCUM_M(accum); |
| 1466 | } |
| 1467 | } |
| 1468 | } |
| 1469 | } |
| 1470 | |
| 1471 | #define WRITEBACK_RESULT() {memcpy(&m_v[VDREG].s[0], &vres[0], 16);} |
| 1472 | |
| 1473 | void rsp_device::handle_vector_ops(UINT32 op) |
| 1474 | { |
| 1475 | int i; |
| 1476 | UINT32 VS1REG = (op >> 11) & 0x1f; |
| 1477 | UINT32 VS2REG = (op >> 16) & 0x1f; |
| 1478 | UINT32 VDREG = (op >> 6) & 0x1f; |
| 1479 | UINT32 EL = (op >> 21) & 0xf; |
| 1480 | INT16 vres[8]; |
| 1481 | |
| 1482 | // Opcode legend: |
| 1483 | // E = VS2 element type |
| 1484 | // S = VS1, Source vector 1 |
| 1485 | // T = VS2, Source vector 2 |
| 1486 | // D = Destination vector |
| 1487 | |
| 1488 | switch (op & 0x3f) |
| 1489 | { |
| 1490 | case 0x00: /* VMULF */ |
| 1491 | { |
| 1492 | // 31 25 24 20 15 10 5 0 |
| 1493 | // ------------------------------------------------------ |
| 1494 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000000 | |
| 1495 | // ------------------------------------------------------ |
| 1496 | // |
| 1497 | // Multiplies signed integer by signed integer * 2 |
| 1498 | |
| 1499 | for (i=0; i < 8; i++) |
| 1500 | { |
| 1501 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1502 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1503 | |
| 1504 | if (s1 == -32768 && s2 == -32768) |
| 1505 | { |
| 1506 | // overflow |
| 1507 | ACCUM_H(i) = 0; |
| 1508 | ACCUM_M(i) = -32768; |
| 1509 | ACCUM_L(i) = -32768; |
| 1510 | vres[i] = 0x7fff; |
| 1511 | } |
| 1512 | else |
| 1513 | { |
| 1514 | INT64 r = s1 * s2 * 2; |
| 1515 | r += 0x8000; // rounding ? |
| 1516 | ACCUM_H(i) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit |
| 1517 | ACCUM_M(i) = (INT16)(r >> 16); |
| 1518 | ACCUM_L(i) = (UINT16)(r); |
| 1519 | vres[i] = ACCUM_M(i); |
| 1520 | } |
| 1521 | } |
| 1522 | WRITEBACK_RESULT(); |
| 1523 | |
| 1524 | break; |
| 1525 | } |
| 1526 | |
| 1527 | case 0x01: /* VMULU */ |
| 1528 | { |
| 1529 | // 31 25 24 20 15 10 5 0 |
| 1530 | // ------------------------------------------------------ |
| 1531 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000001 | |
| 1532 | // ------------------------------------------------------ |
| 1533 | // |
| 1534 | |
| 1535 | for (i=0; i < 8; i++) |
| 1536 | { |
| 1537 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1538 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1539 | |
| 1540 | INT64 r = s1 * s2 * 2; |
| 1541 | r += 0x8000; // rounding ? |
| 1542 | |
| 1543 | ACCUM_H(i) = (UINT16)(r >> 32); |
| 1544 | ACCUM_M(i) = (UINT16)(r >> 16); |
| 1545 | ACCUM_L(i) = (UINT16)(r); |
| 1546 | |
| 1547 | if (r < 0) |
| 1548 | { |
| 1549 | vres[i] = 0; |
| 1550 | } |
| 1551 | else if (((INT16)(ACCUM_H(i)) ^ (INT16)(ACCUM_M(i))) < 0) |
| 1552 | { |
| 1553 | vres[i] = -1; |
| 1554 | } |
| 1555 | else |
| 1556 | { |
| 1557 | vres[i] = ACCUM_M(i); |
| 1558 | } |
| 1559 | } |
| 1560 | WRITEBACK_RESULT(); |
| 1561 | break; |
| 1562 | } |
| 1563 | |
| 1564 | case 0x04: /* VMUDL */ |
| 1565 | { |
| 1566 | // 31 25 24 20 15 10 5 0 |
| 1567 | // ------------------------------------------------------ |
| 1568 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000100 | |
| 1569 | // ------------------------------------------------------ |
| 1570 | // |
| 1571 | // Multiplies unsigned fraction by unsigned fraction |
| 1572 | // Stores the higher 16 bits of the 32-bit result to accumulator |
| 1573 | // The low slice of accumulator is stored into destination element |
| 1574 | |
| 1575 | for (i=0; i < 8; i++) |
| 1576 | { |
| 1577 | UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1578 | UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1579 | UINT32 r = s1 * s2; |
| 1580 | |
| 1581 | ACCUM_H(i) = 0; |
| 1582 | ACCUM_M(i) = 0; |
| 1583 | ACCUM_L(i) = (UINT16)(r >> 16); |
| 1584 | |
| 1585 | vres[i] = ACCUM_L(i); |
| 1586 | } |
| 1587 | WRITEBACK_RESULT(); |
| 1588 | break; |
| 1589 | } |
| 1590 | |
| 1591 | case 0x05: /* VMUDM */ |
| 1592 | { |
| 1593 | // 31 25 24 20 15 10 5 0 |
| 1594 | // ------------------------------------------------------ |
| 1595 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000101 | |
| 1596 | // ------------------------------------------------------ |
| 1597 | // |
| 1598 | // Multiplies signed integer by unsigned fraction |
| 1599 | // The result is stored into accumulator |
| 1600 | // The middle slice of accumulator is stored into destination element |
| 1601 | |
| 1602 | for (i=0; i < 8; i++) |
| 1603 | { |
| 1604 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1605 | INT32 s2 = (UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); // not sign-extended |
| 1606 | INT32 r = s1 * s2; |
| 1607 | |
| 1608 | ACCUM_H(i) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit |
| 1609 | ACCUM_M(i) = (INT16)(r >> 16); |
| 1610 | ACCUM_L(i) = (UINT16)(r); |
| 1611 | |
| 1612 | vres[i] = ACCUM_M(i); |
| 1613 | } |
| 1614 | WRITEBACK_RESULT(); |
| 1615 | break; |
| 1616 | |
| 1617 | } |
| 1618 | |
| 1619 | case 0x06: /* VMUDN */ |
| 1620 | { |
| 1621 | // 31 25 24 20 15 10 5 0 |
| 1622 | // ------------------------------------------------------ |
| 1623 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000110 | |
| 1624 | // ------------------------------------------------------ |
| 1625 | // |
| 1626 | // Multiplies unsigned fraction by signed integer |
| 1627 | // The result is stored into accumulator |
| 1628 | // The low slice of accumulator is stored into destination element |
| 1629 | |
| 1630 | for (i=0; i < 8; i++) |
| 1631 | { |
| 1632 | INT32 s1 = (UINT16)VREG_S(VS1REG, i); // not sign-extended |
| 1633 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1634 | INT32 r = s1 * s2; |
| 1635 | |
| 1636 | ACCUM_H(i) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit |
| 1637 | ACCUM_M(i) = (INT16)(r >> 16); |
| 1638 | ACCUM_L(i) = (UINT16)(r); |
| 1639 | |
| 1640 | vres[i] = ACCUM_L(i); |
| 1641 | } |
| 1642 | WRITEBACK_RESULT(); |
| 1643 | break; |
| 1644 | } |
| 1645 | |
| 1646 | case 0x07: /* VMUDH */ |
| 1647 | { |
| 1648 | // 31 25 24 20 15 10 5 0 |
| 1649 | // ------------------------------------------------------ |
| 1650 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000111 | |
| 1651 | // ------------------------------------------------------ |
| 1652 | // |
| 1653 | // Multiplies signed integer by signed integer |
| 1654 | // The result is stored into highest 32 bits of accumulator, the low slice is zero |
| 1655 | // The highest 32 bits of accumulator is saturated into destination element |
| 1656 | |
| 1657 | for (i=0; i < 8; i++) |
| 1658 | { |
| 1659 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1660 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1661 | INT32 r = s1 * s2; |
| 1662 | |
| 1663 | ACCUM_H(i) = (INT16)(r >> 16); |
| 1664 | ACCUM_M(i) = (UINT16)(r); |
| 1665 | ACCUM_L(i) = 0; |
| 1666 | |
| 1667 | if (r < -32768) r = -32768; |
| 1668 | if (r > 32767) r = 32767; |
| 1669 | vres[i] = (INT16)(r); |
| 1670 | } |
| 1671 | WRITEBACK_RESULT(); |
| 1672 | break; |
| 1673 | } |
| 1674 | |
| 1675 | case 0x08: /* VMACF */ |
| 1676 | { |
| 1677 | // 31 25 24 20 15 10 5 0 |
| 1678 | // ------------------------------------------------------ |
| 1679 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001000 | |
| 1680 | // ------------------------------------------------------ |
| 1681 | // |
| 1682 | // Multiplies signed integer by signed integer * 2 |
| 1683 | // The result is added to accumulator |
| 1684 | |
| 1685 | for (i=0; i < 8; i++) |
| 1686 | { |
| 1687 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1688 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1689 | INT32 r = s1 * s2; |
| 1690 | |
| 1691 | UINT64 q = (UINT64)(UINT16)ACCUM_LL(i); |
| 1692 | q |= (((UINT64)(UINT16)ACCUM_L(i)) << 16); |
| 1693 | q |= (((UINT64)(UINT16)ACCUM_M(i)) << 32); |
| 1694 | q |= (((UINT64)(UINT16)ACCUM_H(i)) << 48); |
| 1695 | |
| 1696 | q += (INT64)(r) << 17; |
| 1697 | |
| 1698 | ACCUM_LL(i) = (UINT16)q; |
| 1699 | ACCUM_L(i) = (UINT16)(q >> 16); |
| 1700 | ACCUM_M(i) = (UINT16)(q >> 32); |
| 1701 | ACCUM_H(i) = (UINT16)(q >> 48); |
| 1702 | |
| 1703 | vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1704 | } |
| 1705 | WRITEBACK_RESULT(); |
| 1706 | break; |
| 1707 | } |
| 1708 | |
| 1709 | case 0x09: /* VMACU */ |
| 1710 | { |
| 1711 | // 31 25 24 20 15 10 5 0 |
| 1712 | // ------------------------------------------------------ |
| 1713 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001001 | |
| 1714 | // ------------------------------------------------------ |
| 1715 | // |
| 1716 | |
| 1717 | for (i = 0; i < 8; i++) |
| 1718 | { |
| 1719 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1720 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1721 | INT32 r1 = s1 * s2; |
| 1722 | UINT32 r2 = (UINT16)ACCUM_L(i) + ((UINT16)(r1) * 2); |
| 1723 | UINT32 r3 = (UINT16)ACCUM_M(i) + (UINT16)((r1 >> 16) * 2) + (UINT16)(r2 >> 16); |
| 1724 | |
| 1725 | ACCUM_L(i) = (UINT16)(r2); |
| 1726 | ACCUM_M(i) = (UINT16)(r3); |
| 1727 | ACCUM_H(i) += (UINT16)(r3 >> 16) + (UINT16)(r1 >> 31); |
| 1728 | |
| 1729 | if ((INT16)ACCUM_H(i) < 0) |
| 1730 | { |
| 1731 | vres[i] = 0; |
| 1732 | } |
| 1733 | else |
| 1734 | { |
| 1735 | if (ACCUM_H(i) != 0) |
| 1736 | { |
| 1737 | vres[i] = 0xffffu; |
| 1738 | } |
| 1739 | else |
| 1740 | { |
| 1741 | if ((INT16)ACCUM_M(i) < 0) |
| 1742 | { |
| 1743 | vres[i] = 0xffffu; |
| 1744 | } |
| 1745 | else |
| 1746 | { |
| 1747 | vres[i] = ACCUM_M(i); |
| 1748 | } |
| 1749 | } |
| 1750 | } |
| 1751 | } |
| 1752 | WRITEBACK_RESULT(); |
| 1753 | break; |
| 1754 | } |
| 1755 | |
| 1756 | case 0x0c: /* VMADL */ |
| 1757 | { |
| 1758 | // 31 25 24 20 15 10 5 0 |
| 1759 | // ------------------------------------------------------ |
| 1760 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001100 | |
| 1761 | // ------------------------------------------------------ |
| 1762 | // |
| 1763 | // Multiplies unsigned fraction by unsigned fraction |
| 1764 | // Adds the higher 16 bits of the 32-bit result to accumulator |
| 1765 | // The low slice of accumulator is stored into destination element |
| 1766 | |
| 1767 | for (i = 0; i < 8; i++) |
| 1768 | { |
| 1769 | UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1770 | UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1771 | UINT32 r1 = s1 * s2; |
| 1772 | UINT32 r2 = (UINT16)ACCUM_L(i) + (r1 >> 16); |
| 1773 | UINT32 r3 = (UINT16)ACCUM_M(i) + (r2 >> 16); |
| 1774 | |
| 1775 | ACCUM_L(i) = (UINT16)(r2); |
| 1776 | ACCUM_M(i) = (UINT16)(r3); |
| 1777 | ACCUM_H(i) += (INT16)(r3 >> 16); |
| 1778 | |
| 1779 | vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 1780 | } |
| 1781 | WRITEBACK_RESULT(); |
| 1782 | break; |
| 1783 | } |
| 1784 | |
| 1785 | case 0x0d: /* VMADM */ |
| 1786 | { |
| 1787 | // 31 25 24 20 15 10 5 0 |
| 1788 | // ------------------------------------------------------ |
| 1789 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 | |
| 1790 | // ------------------------------------------------------ |
| 1791 | // |
| 1792 | // Multiplies signed integer by unsigned fraction |
| 1793 | // The result is added into accumulator |
| 1794 | // The middle slice of accumulator is stored into destination element |
| 1795 | |
| 1796 | for (i=0; i < 8; i++) |
| 1797 | { |
| 1798 | UINT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1799 | UINT32 s2 = (UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); // not sign-extended |
| 1800 | UINT32 r1 = s1 * s2; |
| 1801 | UINT32 r2 = (UINT16)ACCUM_L(i) + (UINT16)(r1); |
| 1802 | UINT32 r3 = (UINT16)ACCUM_M(i) + (r1 >> 16) + (r2 >> 16); |
| 1803 | |
| 1804 | ACCUM_L(i) = (UINT16)(r2); |
| 1805 | ACCUM_M(i) = (UINT16)(r3); |
| 1806 | ACCUM_H(i) += (UINT16)(r3 >> 16); |
| 1807 | if ((INT32)(r1) < 0) |
| 1808 | ACCUM_H(i) -= 1; |
| 1809 | |
| 1810 | vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1811 | } |
| 1812 | WRITEBACK_RESULT(); |
| 1813 | break; |
| 1814 | } |
| 1815 | |
| 1816 | case 0x0e: /* VMADN */ |
| 1817 | { |
| 1818 | // 31 25 24 20 15 10 5 0 |
| 1819 | // ------------------------------------------------------ |
| 1820 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001110 | |
| 1821 | // ------------------------------------------------------ |
| 1822 | // |
| 1823 | // Multiplies unsigned fraction by signed integer |
| 1824 | // The result is added into accumulator |
| 1825 | // The low slice of accumulator is stored into destination element |
| 1826 | |
| 1827 | for (i=0; i < 8; i++) |
| 1828 | { |
| 1829 | INT32 s1 = (UINT16)VREG_S(VS1REG, i); // not sign-extended |
| 1830 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1831 | |
| 1832 | UINT64 q = (UINT64)ACCUM_LL(i); |
| 1833 | q |= (((UINT64)ACCUM_L(i)) << 16); |
| 1834 | q |= (((UINT64)ACCUM_M(i)) << 32); |
| 1835 | q |= (((UINT64)ACCUM_H(i)) << 48); |
| 1836 | q += (INT64)(s1*s2) << 16; |
| 1837 | |
| 1838 | ACCUM_LL(i) = (UINT16)q; |
| 1839 | ACCUM_L(i) = (UINT16)(q >> 16); |
| 1840 | ACCUM_M(i) = (UINT16)(q >> 32); |
| 1841 | ACCUM_H(i) = (UINT16)(q >> 48); |
| 1842 | |
| 1843 | vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 1844 | } |
| 1845 | WRITEBACK_RESULT(); |
| 1846 | |
| 1847 | break; |
| 1848 | } |
| 1849 | |
| 1850 | case 0x0f: /* VMADH */ |
| 1851 | { |
| 1852 | // 31 25 24 20 15 10 5 0 |
| 1853 | // ------------------------------------------------------ |
| 1854 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001111 | |
| 1855 | // ------------------------------------------------------ |
| 1856 | // |
| 1857 | // Multiplies signed integer by signed integer |
| 1858 | // The result is added into highest 32 bits of accumulator, the low slice is zero |
| 1859 | // The highest 32 bits of accumulator is saturated into destination element |
| 1860 | |
| 1861 | for (i = 0; i < 8; i++) |
| 1862 | { |
| 1863 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1864 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1865 | |
| 1866 | INT32 accum = (UINT32)(UINT16)ACCUM_M(i); |
| 1867 | accum |= ((UINT32)((UINT16)ACCUM_H(i))) << 16; |
| 1868 | accum += s1 * s2; |
| 1869 | |
| 1870 | ACCUM_H(i) = (UINT16)(accum >> 16); |
| 1871 | ACCUM_M(i) = (UINT16)accum; |
| 1872 | |
| 1873 | vres[i] = SATURATE_ACCUM1(i, 0x8000, 0x7fff); |
| 1874 | } |
| 1875 | WRITEBACK_RESULT(); |
| 1876 | |
| 1877 | break; |
| 1878 | } |
| 1879 | |
| 1880 | case 0x10: /* VADD */ |
| 1881 | { |
| 1882 | // 31 25 24 20 15 10 5 0 |
| 1883 | // ------------------------------------------------------ |
| 1884 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010000 | |
| 1885 | // ------------------------------------------------------ |
| 1886 | // |
| 1887 | // Adds two vector registers and carry flag, the result is saturated to 32767 |
| 1888 | |
| 1889 | // TODO: check VS2REG == VDREG |
| 1890 | |
| 1891 | for (i=0; i < 8; i++) |
| 1892 | { |
| 1893 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1894 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1895 | INT32 r = s1 + s2 + (CARRY_FLAG(i) != 0 ? 1 : 0); |
| 1896 | |
| 1897 | ACCUM_L(i) = (INT16)(r); |
| 1898 | |
| 1899 | if (r > 32767) r = 32767; |
| 1900 | if (r < -32768) r = -32768; |
| 1901 | vres[i] = (INT16)(r); |
| 1902 | } |
| 1903 | CLEAR_ZERO_FLAGS(); |
| 1904 | CLEAR_CARRY_FLAGS(); |
| 1905 | WRITEBACK_RESULT(); |
| 1906 | break; |
| 1907 | } |
| 1908 | |
| 1909 | case 0x11: /* VSUB */ |
| 1910 | { |
| 1911 | // 31 25 24 20 15 10 5 0 |
| 1912 | // ------------------------------------------------------ |
| 1913 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010001 | |
| 1914 | // ------------------------------------------------------ |
| 1915 | // |
| 1916 | // Subtracts two vector registers and carry flag, the result is saturated to -32768 |
| 1917 | |
| 1918 | // TODO: check VS2REG == VDREG |
| 1919 | |
| 1920 | for (i = 0; i < 8; i++) |
| 1921 | { |
| 1922 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1923 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1924 | INT32 r = s1 - s2 - (CARRY_FLAG(i) != 0 ? 1 : 0); |
| 1925 | |
| 1926 | ACCUM_L(i) = (INT16)(r); |
| 1927 | |
| 1928 | if (r > 32767) r = 32767; |
| 1929 | if (r < -32768) r = -32768; |
| 1930 | |
| 1931 | vres[i] = (INT16)(r); |
| 1932 | } |
| 1933 | CLEAR_ZERO_FLAGS(); |
| 1934 | CLEAR_CARRY_FLAGS(); |
| 1935 | WRITEBACK_RESULT(); |
| 1936 | break; |
| 1937 | } |
| 1938 | |
| 1939 | case 0x13: /* VABS */ |
| 1940 | { |
| 1941 | // 31 25 24 20 15 10 5 0 |
| 1942 | // ------------------------------------------------------ |
| 1943 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010011 | |
| 1944 | // ------------------------------------------------------ |
| 1945 | // |
| 1946 | // Changes the sign of source register 2 if source register 1 is negative and stores |
| 1947 | // the result to destination register |
| 1948 | |
| 1949 | for (i=0; i < 8; i++) |
| 1950 | { |
| 1951 | INT16 s1 = (INT16)VREG_S(VS1REG, i); |
| 1952 | INT16 s2 = (INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1953 | |
| 1954 | if (s1 < 0) |
| 1955 | { |
| 1956 | if (s2 == -32768) |
| 1957 | { |
| 1958 | vres[i] = 32767; |
| 1959 | } |
| 1960 | else |
| 1961 | { |
| 1962 | vres[i] = -s2; |
| 1963 | } |
| 1964 | } |
| 1965 | else if (s1 > 0) |
| 1966 | { |
| 1967 | vres[i] = s2; |
| 1968 | } |
| 1969 | else |
| 1970 | { |
| 1971 | vres[i] = 0; |
| 1972 | } |
| 1973 | |
| 1974 | ACCUM_L(i) = vres[i]; |
| 1975 | } |
| 1976 | WRITEBACK_RESULT(); |
| 1977 | break; |
| 1978 | } |
| 1979 | |
| 1980 | case 0x14: /* VADDC */ |
| 1981 | { |
| 1982 | // 31 25 24 20 15 10 5 0 |
| 1983 | // ------------------------------------------------------ |
| 1984 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010100 | |
| 1985 | // ------------------------------------------------------ |
| 1986 | // |
| 1987 | // Adds two vector registers, the carry out is stored into carry register |
| 1988 | |
| 1989 | // TODO: check VS2REG = VDREG |
| 1990 | |
| 1991 | CLEAR_ZERO_FLAGS(); |
| 1992 | CLEAR_CARRY_FLAGS(); |
| 1993 | |
| 1994 | for (i=0; i < 8; i++) |
| 1995 | { |
| 1996 | INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1997 | INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1998 | INT32 r = s1 + s2; |
| 1999 | |
| 2000 | vres[i] = (INT16)(r); |
| 2001 | ACCUM_L(i) = (INT16)(r); |
| 2002 | |
| 2003 | if (r & 0xffff0000) |
| 2004 | { |
| 2005 | SET_CARRY_FLAG(i); |
| 2006 | } |
| 2007 | } |
| 2008 | WRITEBACK_RESULT(); |
| 2009 | break; |
| 2010 | } |
| 2011 | |
| 2012 | case 0x15: /* VSUBC */ |
| 2013 | { |
| 2014 | // 31 25 24 20 15 10 5 0 |
| 2015 | // ------------------------------------------------------ |
| 2016 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010101 | |
| 2017 | // ------------------------------------------------------ |
| 2018 | // |
| 2019 | // Subtracts two vector registers, the carry out is stored into carry register |
| 2020 | |
| 2021 | // TODO: check VS2REG = VDREG |
| 2022 | |
| 2023 | CLEAR_ZERO_FLAGS(); |
| 2024 | CLEAR_CARRY_FLAGS(); |
| 2025 | |
| 2026 | for (i=0; i < 8; i++) |
| 2027 | { |
| 2028 | INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 2029 | INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2030 | INT32 r = s1 - s2; |
| 2031 | |
| 2032 | vres[i] = (INT16)(r); |
| 2033 | ACCUM_L(i) = (UINT16)(r); |
| 2034 | |
| 2035 | if ((UINT16)(r) != 0) |
| 2036 | { |
| 2037 | SET_ZERO_FLAG(i); |
| 2038 | } |
| 2039 | if (r & 0xffff0000) |
| 2040 | { |
| 2041 | SET_CARRY_FLAG(i); |
| 2042 | } |
| 2043 | } |
| 2044 | WRITEBACK_RESULT(); |
| 2045 | break; |
| 2046 | } |
| 2047 | |
| 2048 | case 0x1d: /* VSAW */ |
| 2049 | { |
| 2050 | // 31 25 24 20 15 10 5 0 |
| 2051 | // ------------------------------------------------------ |
| 2052 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 011101 | |
| 2053 | // ------------------------------------------------------ |
| 2054 | // |
| 2055 | // Stores high, middle or low slice of accumulator to destination vector |
| 2056 | |
| 2057 | switch (EL) |
| 2058 | { |
| 2059 | case 0x08: // VSAWH |
| 2060 | { |
| 2061 | for (i=0; i < 8; i++) |
| 2062 | { |
| 2063 | VREG_S(VDREG, i) = ACCUM_H(i); |
| 2064 | } |
| 2065 | break; |
| 2066 | } |
| 2067 | case 0x09: // VSAWM |
| 2068 | { |
| 2069 | for (i=0; i < 8; i++) |
| 2070 | { |
| 2071 | VREG_S(VDREG, i) = ACCUM_M(i); |
| 2072 | } |
| 2073 | break; |
| 2074 | } |
| 2075 | case 0x0a: // VSAWL |
| 2076 | { |
| 2077 | for (i=0; i < 8; i++) |
| 2078 | { |
| 2079 | VREG_S(VDREG, i) = ACCUM_L(i); |
| 2080 | } |
| 2081 | break; |
| 2082 | } |
| 2083 | default: //fatalerror("RSP: VSAW: el = %d\n", EL);//??????? |
| 2084 | printf("RSP: VSAW: el = %d\n", EL);//??? ??? |
| 2085 | exit(0); |
| 2086 | } |
| 2087 | break; |
| 2088 | } |
| 2089 | |
| 2090 | case 0x20: /* VLT */ |
| 2091 | { |
| 2092 | // 31 25 24 20 15 10 5 0 |
| 2093 | // ------------------------------------------------------ |
| 2094 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100000 | |
| 2095 | // ------------------------------------------------------ |
| 2096 | // |
| 2097 | // Sets compare flags if elements in VS1 are less than VS2 |
| 2098 | // Moves the element in VS2 to destination vector |
| 2099 | |
| 2100 | CLEAR_COMPARE_FLAGS(); |
| 2101 | CLEAR_CLIP2_FLAGS(); |
| 2102 | |
| 2103 | for (i=0; i < 8; i++) |
| 2104 | { |
| 2105 | INT16 s1, s2; |
| 2106 | s1 = VREG_S(VS1REG, i); |
| 2107 | s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2108 | if (s1 < s2) |
| 2109 | { |
| 2110 | SET_COMPARE_FLAG(i); |
| 2111 | } |
| 2112 | else if (s1 == s2) |
| 2113 | { |
| 2114 | if (ZERO_FLAG(i) != 0 && CARRY_FLAG(i) != 0) |
| 2115 | { |
| 2116 | SET_COMPARE_FLAG(i); |
| 2117 | } |
| 2118 | } |
| 2119 | |
| 2120 | if (COMPARE_FLAG(i) != 0) |
| 2121 | { |
| 2122 | vres[i] = s1; |
| 2123 | } |
| 2124 | else |
| 2125 | { |
| 2126 | vres[i] = s2; |
| 2127 | } |
| 2128 | |
| 2129 | ACCUM_L(i) = vres[i]; |
| 2130 | } |
| 2131 | |
| 2132 | CLEAR_CARRY_FLAGS(); |
| 2133 | CLEAR_ZERO_FLAGS(); |
| 2134 | WRITEBACK_RESULT(); |
| 2135 | break; |
| 2136 | } |
| 2137 | |
| 2138 | case 0x21: /* VEQ */ |
| 2139 | { |
| 2140 | // 31 25 24 20 15 10 5 0 |
| 2141 | // ------------------------------------------------------ |
| 2142 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100001 | |
| 2143 | // ------------------------------------------------------ |
| 2144 | // |
| 2145 | // Sets compare flags if elements in VS1 are equal with VS2 |
| 2146 | // Moves the element in VS2 to destination vector |
| 2147 | |
| 2148 | CLEAR_COMPARE_FLAGS(); |
| 2149 | CLEAR_CLIP2_FLAGS(); |
| 2150 | |
| 2151 | for (i = 0; i < 8; i++) |
| 2152 | { |
| 2153 | INT16 s1 = VREG_S(VS1REG, i); |
| 2154 | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2155 | |
| 2156 | if ((s1 == s2) && ZERO_FLAG(i) == 0) |
| 2157 | { |
| 2158 | SET_COMPARE_FLAG(i); |
| 2159 | vres[i] = s1; |
| 2160 | } |
| 2161 | else |
| 2162 | { |
| 2163 | vres[i] = s2; |
| 2164 | } |
| 2165 | ACCUM_L(i) = vres[i]; |
| 2166 | } |
| 2167 | |
| 2168 | CLEAR_ZERO_FLAGS(); |
| 2169 | CLEAR_CARRY_FLAGS(); |
| 2170 | WRITEBACK_RESULT(); |
| 2171 | break; |
| 2172 | } |
| 2173 | |
| 2174 | case 0x22: /* VNE */ |
| 2175 | { |
| 2176 | // 31 25 24 20 15 10 5 0 |
| 2177 | // ------------------------------------------------------ |
| 2178 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100010 | |
| 2179 | // ------------------------------------------------------ |
| 2180 | // |
| 2181 | // Sets compare flags if elements in VS1 are not equal with VS2 |
| 2182 | // Moves the element in VS2 to destination vector |
| 2183 | |
| 2184 | CLEAR_COMPARE_FLAGS(); |
| 2185 | CLEAR_CLIP2_FLAGS(); |
| 2186 | |
| 2187 | for (i = 0; i < 8; i++) |
| 2188 | { |
| 2189 | INT16 s1 = VREG_S(VS1REG, i); |
| 2190 | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2191 | |
| 2192 | if (s1 != s2 || ZERO_FLAG(i) != 0) |
| 2193 | { |
| 2194 | SET_COMPARE_FLAG(i); |
| 2195 | vres[i] = s1; |
| 2196 | } |
| 2197 | else |
| 2198 | { |
| 2199 | vres[i] = s2; |
| 2200 | } |
| 2201 | ACCUM_L(i) = vres[i]; |
| 2202 | } |
| 2203 | |
| 2204 | CLEAR_CARRY_FLAGS(); |
| 2205 | CLEAR_ZERO_FLAGS(); |
| 2206 | WRITEBACK_RESULT(); |
| 2207 | break; |
| 2208 | } |
| 2209 | |
| 2210 | case 0x23: /* VGE */ |
| 2211 | { |
| 2212 | // 31 25 24 20 15 10 5 0 |
| 2213 | // ------------------------------------------------------ |
| 2214 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100011 | |
| 2215 | // ------------------------------------------------------ |
| 2216 | // |
| 2217 | // Sets compare flags if elements in VS1 are greater or equal with VS2 |
| 2218 | // Moves the element in VS2 to destination vector |
| 2219 | |
| 2220 | CLEAR_COMPARE_FLAGS(); |
| 2221 | CLEAR_CLIP2_FLAGS(); |
| 2222 | |
| 2223 | for (i=0; i < 8; i++) |
| 2224 | { |
| 2225 | INT16 s1 = VREG_S(VS1REG, i); |
| 2226 | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2227 | |
| 2228 | if ((s1 == s2 && (ZERO_FLAG(i) == 0 || CARRY_FLAG(i) == 0)) || s1 > s2) |
| 2229 | { |
| 2230 | SET_COMPARE_FLAG(i); |
| 2231 | vres[i] = s1; |
| 2232 | } |
| 2233 | else |
| 2234 | { |
| 2235 | vres[i] = s2; |
| 2236 | } |
| 2237 | |
| 2238 | ACCUM_L(i) = vres[i]; |
| 2239 | } |
| 2240 | |
| 2241 | CLEAR_CARRY_FLAGS(); |
| 2242 | CLEAR_ZERO_FLAGS(); |
| 2243 | WRITEBACK_RESULT(); |
| 2244 | break; |
| 2245 | } |
| 2246 | |
| 2247 | case 0x24: /* VCL */ |
| 2248 | { |
| 2249 | // 31 25 24 20 15 10 5 0 |
| 2250 | // ------------------------------------------------------ |
| 2251 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100100 | |
| 2252 | // ------------------------------------------------------ |
| 2253 | // |
| 2254 | // Vector clip low |
| 2255 | |
| 2256 | for (i = 0; i < 8; i++) |
| 2257 | { |
| 2258 | INT16 s1 = VREG_S(VS1REG, i); |
| 2259 | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2260 | |
| 2261 | if (CARRY_FLAG(i) != 0) |
| 2262 | { |
| 2263 | if (ZERO_FLAG(i) != 0) |
| 2264 | { |
| 2265 | if (COMPARE_FLAG(i) != 0) |
| 2266 | { |
| 2267 | ACCUM_L(i) = -(UINT16)s2; |
| 2268 | } |
| 2269 | else |
| 2270 | { |
| 2271 | ACCUM_L(i) = s1; |
| 2272 | } |
| 2273 | } |
| 2274 | else |
| 2275 | { |
| 2276 | if (CLIP1_FLAG(i) != 0) |
| 2277 | { |
| 2278 | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) > 0x10000) |
| 2279 | { |
| 2280 | |
| 2281 | ACCUM_L(i) = s1; |
| 2282 | CLEAR_COMPARE_FLAG(i); |
| 2283 | } |
| 2284 | else |
| 2285 | { |
| 2286 | ACCUM_L(i) = -((UINT16)s2); |
| 2287 | SET_COMPARE_FLAG(i); |
| 2288 | } |
| 2289 | } |
| 2290 | else |
| 2291 | { |
| 2292 | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) != 0) |
| 2293 | { |
| 2294 | ACCUM_L(i) = s1; |
| 2295 | CLEAR_COMPARE_FLAG(i); |
| 2296 | } |
| 2297 | else |
| 2298 | { |
| 2299 | ACCUM_L(i) = -((UINT16)s2); |
| 2300 | SET_COMPARE_FLAG(i); |
| 2301 | } |
| 2302 | } |
| 2303 | } |
| 2304 | } |
| 2305 | else |
| 2306 | { |
| 2307 | if (ZERO_FLAG(i) != 0) |
| 2308 | { |
| 2309 | if (CLIP2_FLAG(i) != 0) |
| 2310 | { |
| 2311 | ACCUM_L(i) = s2; |
| 2312 | } |
| 2313 | else |
| 2314 | { |
| 2315 | ACCUM_L(i) = s1; |
| 2316 | } |
| 2317 | } |
| 2318 | else |
| 2319 | { |
| 2320 | if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0) |
| 2321 | { |
| 2322 | ACCUM_L(i) = s2; |
| 2323 | SET_CLIP2_FLAG(i); |
| 2324 | } |
| 2325 | else |
| 2326 | { |
| 2327 | ACCUM_L(i) = s1; |
| 2328 | CLEAR_CLIP2_FLAG(i); |
| 2329 | } |
| 2330 | } |
| 2331 | } |
| 2332 | |
| 2333 | vres[i] = ACCUM_L(i); |
| 2334 | } |
| 2335 | CLEAR_CARRY_FLAGS(); |
| 2336 | CLEAR_ZERO_FLAGS(); |
| 2337 | CLEAR_CLIP1_FLAGS(); |
| 2338 | WRITEBACK_RESULT(); |
| 2339 | break; |
| 2340 | } |
| 2341 | |
| 2342 | case 0x25: /* VCH */ |
| 2343 | { |
| 2344 | // 31 25 24 20 15 10 5 0 |
| 2345 | // ------------------------------------------------------ |
| 2346 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100101 | |
| 2347 | // ------------------------------------------------------ |
| 2348 | // |
| 2349 | // Vector clip high |
| 2350 | |
| 2351 | CLEAR_CARRY_FLAGS(); |
| 2352 | CLEAR_COMPARE_FLAGS(); |
| 2353 | CLEAR_CLIP1_FLAGS(); |
| 2354 | CLEAR_ZERO_FLAGS(); |
| 2355 | CLEAR_CLIP2_FLAGS(); |
| 2356 | UINT32 vce = 0; |
| 2357 | |
| 2358 | for (i=0; i < 8; i++) |
| 2359 | { |
| 2360 | INT16 s1 = VREG_S(VS1REG, i); |
| 2361 | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2362 | |
| 2363 | if ((s1 ^ s2) < 0) |
| 2364 | { |
| 2365 | vce = (s1 + s2 == -1); |
| 2366 | SET_CARRY_FLAG(i); |
| 2367 | if (s2 < 0) |
| 2368 | { |
| 2369 | SET_CLIP2_FLAG(i); |
| 2370 | } |
| 2371 | |
| 2372 | if (s1 + s2 <= 0) |
| 2373 | { |
| 2374 | SET_COMPARE_FLAG(i); |
| 2375 | vres[i] = -((UINT16)s2); |
| 2376 | } |
| 2377 | else |
| 2378 | { |
| 2379 | vres[i] = s1; |
| 2380 | } |
| 2381 | |
| 2382 | if (s1 + s2 != 0) |
| 2383 | { |
| 2384 | if (s1 != ~s2) |
| 2385 | { |
| 2386 | SET_ZERO_FLAG(i); |
| 2387 | } |
| 2388 | } |
| 2389 | } |
| 2390 | else |
| 2391 | { |
| 2392 | vce = 0; |
| 2393 | if (s2 < 0) |
| 2394 | { |
| 2395 | SET_COMPARE_FLAG(i); |
| 2396 | } |
| 2397 | if (s1 - s2 >= 0) |
| 2398 | { |
| 2399 | SET_CLIP2_FLAG(i); |
| 2400 | vres[i] = s2; |
| 2401 | } |
| 2402 | else |
| 2403 | { |
| 2404 | vres[i] = s1; |
| 2405 | } |
| 2406 | |
| 2407 | if ((s1 - s2) != 0) |
| 2408 | { |
| 2409 | if (s1 != ~s2) |
| 2410 | { |
| 2411 | SET_ZERO_FLAG(i); |
| 2412 | } |
| 2413 | } |
| 2414 | } |
| 2415 | if (vce != 0) |
| 2416 | { |
| 2417 | SET_CLIP1_FLAG(i); |
| 2418 | } |
| 2419 | ACCUM_L(i) = vres[i]; |
| 2420 | } |
| 2421 | WRITEBACK_RESULT(); |
| 2422 | break; |
| 2423 | } |
| 2424 | |
| 2425 | case 0x26: /* VCR */ |
| 2426 | { |
| 2427 | // 31 25 24 20 15 10 5 0 |
| 2428 | // ------------------------------------------------------ |
| 2429 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100110 | |
| 2430 | // ------------------------------------------------------ |
| 2431 | // |
| 2432 | // Vector clip reverse |
| 2433 | |
| 2434 | CLEAR_CARRY_FLAGS(); |
| 2435 | CLEAR_COMPARE_FLAGS(); |
| 2436 | CLEAR_CLIP1_FLAGS(); |
| 2437 | CLEAR_ZERO_FLAGS(); |
| 2438 | CLEAR_CLIP2_FLAGS(); |
| 2439 | |
| 2440 | for (i=0; i < 8; i++) |
| 2441 | { |
| 2442 | INT16 s1 = VREG_S(VS1REG, i); |
| 2443 | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2444 | |
| 2445 | if ((INT16)(s1 ^ s2) < 0) |
| 2446 | { |
| 2447 | if (s2 < 0) |
| 2448 | { |
| 2449 | SET_CLIP2_FLAG(i); |
| 2450 | } |
| 2451 | if ((s1 + s2) <= 0) |
| 2452 | { |
| 2453 | ACCUM_L(i) = ~((UINT16)s2); |
| 2454 | SET_COMPARE_FLAG(i); |
| 2455 | } |
| 2456 | else |
| 2457 | { |
| 2458 | ACCUM_L(i) = s1; |
| 2459 | } |
| 2460 | } |
| 2461 | else |
| 2462 | { |
| 2463 | if (s2 < 0) |
| 2464 | { |
| 2465 | SET_COMPARE_FLAG(i); |
| 2466 | } |
| 2467 | if ((s1 - s2) >= 0) |
| 2468 | { |
| 2469 | ACCUM_L(i) = s2; |
| 2470 | SET_CLIP2_FLAG(i); |
| 2471 | } |
| 2472 | else |
| 2473 | { |
| 2474 | ACCUM_L(i) = s1; |
| 2475 | } |
| 2476 | } |
| 2477 | |
| 2478 | vres[i] = ACCUM_L(i); |
| 2479 | } |
| 2480 | WRITEBACK_RESULT(); |
| 2481 | break; |
| 2482 | } |
| 2483 | |
| 2484 | case 0x27: /* VMRG */ |
| 2485 | { |
| 2486 | // 31 25 24 20 15 10 5 0 |
| 2487 | // ------------------------------------------------------ |
| 2488 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100111 | |
| 2489 | // ------------------------------------------------------ |
| 2490 | // |
| 2491 | // Merges two vectors according to compare flags |
| 2492 | |
| 2493 | for (i = 0; i < 8; i++) |
| 2494 | { |
| 2495 | if (COMPARE_FLAG(i) != 0) |
| 2496 | { |
| 2497 | vres[i] = VREG_S(VS1REG, i); |
| 2498 | } |
| 2499 | else |
| 2500 | { |
| 2501 | vres[i] = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2502 | } |
| 2503 | |
| 2504 | ACCUM_L(i) = vres[i]; |
| 2505 | } |
| 2506 | WRITEBACK_RESULT(); |
| 2507 | break; |
| 2508 | } |
| 2509 | case 0x28: /* VAND */ |
| 2510 | { |
| 2511 | // 31 25 24 20 15 10 5 0 |
| 2512 | // ------------------------------------------------------ |
| 2513 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101000 | |
| 2514 | // ------------------------------------------------------ |
| 2515 | // |
| 2516 | // Bitwise AND of two vector registers |
| 2517 | |
| 2518 | for (i = 0; i < 8; i++) |
| 2519 | { |
| 2520 | vres[i] = VREG_S(VS1REG, i) & VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2521 | ACCUM_L(i) = vres[i]; |
| 2522 | } |
| 2523 | WRITEBACK_RESULT(); |
| 2524 | break; |
| 2525 | } |
| 2526 | case 0x29: /* VNAND */ |
| 2527 | { |
| 2528 | // 31 25 24 20 15 10 5 0 |
| 2529 | // ------------------------------------------------------ |
| 2530 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101001 | |
| 2531 | // ------------------------------------------------------ |
| 2532 | // |
| 2533 | // Bitwise NOT AND of two vector registers |
| 2534 | |
| 2535 | for (i = 0; i < 8; i++) |
| 2536 | { |
| 2537 | vres[i] = ~((VREG_S(VS1REG, i) & VREG_S(VS2REG, VEC_EL_2(EL, i)))); |
| 2538 | ACCUM_L(i) = vres[i]; |
| 2539 | } |
| 2540 | WRITEBACK_RESULT(); |
| 2541 | break; |
| 2542 | } |
| 2543 | case 0x2a: /* VOR */ |
| 2544 | { |
| 2545 | // 31 25 24 20 15 10 5 0 |
| 2546 | // ------------------------------------------------------ |
| 2547 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101010 | |
| 2548 | // ------------------------------------------------------ |
| 2549 | // |
| 2550 | // Bitwise OR of two vector registers |
| 2551 | |
| 2552 | for (i = 0; i < 8; i++) |
| 2553 | { |
| 2554 | vres[i] = VREG_S(VS1REG, i) | VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2555 | ACCUM_L(i) = vres[i]; |
| 2556 | } |
| 2557 | WRITEBACK_RESULT(); |
| 2558 | break; |
| 2559 | } |
| 2560 | case 0x2b: /* VNOR */ |
| 2561 | { |
| 2562 | // 31 25 24 20 15 10 5 0 |
| 2563 | // ------------------------------------------------------ |
| 2564 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101011 | |
| 2565 | // ------------------------------------------------------ |
| 2566 | // |
| 2567 | // Bitwise NOT OR of two vector registers |
| 2568 | |
| 2569 | for (i=0; i < 8; i++) |
| 2570 | { |
| 2571 | vres[i] = ~((VREG_S(VS1REG, i) | VREG_S(VS2REG, VEC_EL_2(EL, i)))); |
| 2572 | ACCUM_L(i) = vres[i]; |
| 2573 | } |
| 2574 | WRITEBACK_RESULT(); |
| 2575 | break; |
| 2576 | } |
| 2577 | case 0x2c: /* VXOR */ |
| 2578 | { |
| 2579 | // 31 25 24 20 15 10 5 0 |
| 2580 | // ------------------------------------------------------ |
| 2581 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101100 | |
| 2582 | // ------------------------------------------------------ |
| 2583 | // |
| 2584 | // Bitwise XOR of two vector registers |
| 2585 | |
| 2586 | for (i=0; i < 8; i++) |
| 2587 | { |
| 2588 | vres[i] = VREG_S(VS1REG, i) ^ VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2589 | ACCUM_L(i) = vres[i]; |
| 2590 | } |
| 2591 | WRITEBACK_RESULT(); |
| 2592 | break; |
| 2593 | } |
| 2594 | case 0x2d: /* VNXOR */ |
| 2595 | { |
| 2596 | // 31 25 24 20 15 10 5 0 |
| 2597 | // ------------------------------------------------------ |
| 2598 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101101 | |
| 2599 | // ------------------------------------------------------ |
| 2600 | // |
| 2601 | // Bitwise NOT XOR of two vector registers |
| 2602 | |
| 2603 | for (i=0; i < 8; i++) |
| 2604 | { |
| 2605 | vres[i] = ~((VREG_S(VS1REG, i) ^ VREG_S(VS2REG, VEC_EL_2(EL, i)))); |
| 2606 | ACCUM_L(i) = vres[i]; |
| 2607 | } |
| 2608 | WRITEBACK_RESULT(); |
| 2609 | break; |
| 2610 | } |
| 2611 | |
| 2612 | case 0x30: /* VRCP */ |
| 2613 | { |
| 2614 | // 31 25 24 20 15 10 5 0 |
| 2615 | // ------------------------------------------------------ |
| 2616 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110000 | |
| 2617 | // ------------------------------------------------------ |
| 2618 | // |
| 2619 | // Calculates reciprocal |
| 2620 | INT32 shifter = 0; |
| 2621 | |
| 2622 | INT32 rec = (INT16)(VREG_S(VS2REG, EL & 7)); |
| 2623 | INT32 datainput = (rec < 0) ? (-rec) : rec; |
| 2624 | if (datainput) |
| 2625 | { |
| 2626 | for (i = 0; i < 32; i++) |
| 2627 | { |
| 2628 | if (datainput & (1 << ((~i) & 0x1f))) |
| 2629 | { |
| 2630 | shifter = i; |
| 2631 | break; |
| 2632 | } |
| 2633 | } |
| 2634 | } |
| 2635 | else |
| 2636 | { |
| 2637 | shifter = 0x10; |
| 2638 | } |
| 2639 | |
| 2640 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2641 | INT32 fetchval = rsp_divtable[address]; |
| 2642 | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 2643 | if (rec < 0) |
| 2644 | { |
| 2645 | temp = ~temp; |
| 2646 | } |
| 2647 | if (!rec) |
| 2648 | { |
| 2649 | temp = 0x7fffffff; |
| 2650 | } |
| 2651 | else if (rec == 0xffff8000) |
| 2652 | { |
| 2653 | temp = 0xffff0000; |
| 2654 | } |
| 2655 | rec = temp; |
| 2656 | |
| 2657 | m_reciprocal_res = rec; |
| 2658 | m_dp_allowed = 0; |
| 2659 | |
| 2660 | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 2661 | |
| 2662 | for (i = 0; i < 8; i++) |
| 2663 | { |
| 2664 | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2665 | } |
| 2666 | |
| 2667 | |
| 2668 | break; |
| 2669 | } |
| 2670 | |
| 2671 | case 0x31: /* VRCPL */ |
| 2672 | { |
| 2673 | // 31 25 24 20 15 10 5 0 |
| 2674 | // ------------------------------------------------------ |
| 2675 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110001 | |
| 2676 | // ------------------------------------------------------ |
| 2677 | // |
| 2678 | // Calculates reciprocal low part |
| 2679 | |
| 2680 | INT32 shifter = 0; |
| 2681 | |
| 2682 | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 2683 | INT32 datainput = rec; |
| 2684 | |
| 2685 | if (m_dp_allowed) |
| 2686 | { |
| 2687 | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 2688 | datainput = rec; |
| 2689 | |
| 2690 | if (rec < 0) |
| 2691 | { |
| 2692 | if (rec < -32768) |
| 2693 | { |
| 2694 | datainput = ~datainput; |
| 2695 | } |
| 2696 | else |
| 2697 | { |
| 2698 | datainput = -datainput; |
| 2699 | } |
| 2700 | } |
| 2701 | } |
| 2702 | else if (datainput < 0) |
| 2703 | { |
| 2704 | datainput = -datainput; |
| 2705 | |
| 2706 | shifter = 0x10; |
| 2707 | } |
| 2708 | |
| 2709 | |
| 2710 | for (i = 0; i < 32; i++) |
| 2711 | { |
| 2712 | if (datainput & (1 << ((~i) & 0x1f))) |
| 2713 | { |
| 2714 | shifter = i; |
| 2715 | break; |
| 2716 | } |
| 2717 | } |
| 2718 | |
| 2719 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2720 | INT32 fetchval = rsp_divtable[address]; |
| 2721 | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 2722 | temp ^= rec >> 31; |
| 2723 | |
| 2724 | if (!rec) |
| 2725 | { |
| 2726 | temp = 0x7fffffff; |
| 2727 | } |
| 2728 | else if (rec == 0xffff8000) |
| 2729 | { |
| 2730 | temp = 0xffff0000; |
| 2731 | } |
| 2732 | rec = temp; |
| 2733 | |
| 2734 | m_reciprocal_res = rec; |
| 2735 | m_dp_allowed = 0; |
| 2736 | |
| 2737 | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 2738 | |
| 2739 | for (i = 0; i < 8; i++) |
| 2740 | { |
| 2741 | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2742 | } |
| 2743 | |
| 2744 | break; |
| 2745 | } |
| 2746 | |
| 2747 | case 0x32: /* VRCPH */ |
| 2748 | { |
| 2749 | // 31 25 24 20 15 10 5 0 |
| 2750 | // ------------------------------------------------------ |
| 2751 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110010 | |
| 2752 | // ------------------------------------------------------ |
| 2753 | // |
| 2754 | // Calculates reciprocal high part |
| 2755 | |
| 2756 | m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16; |
| 2757 | m_dp_allowed = 1; |
| 2758 | |
| 2759 | for (i = 0; i < 8; i++) |
| 2760 | { |
| 2761 | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2762 | } |
| 2763 | |
| 2764 | VREG_S(VDREG, VS1REG & 7) = (INT16)(m_reciprocal_res >> 16); |
| 2765 | |
| 2766 | break; |
| 2767 | } |
| 2768 | |
| 2769 | case 0x33: /* VMOV */ |
| 2770 | { |
| 2771 | // 31 25 24 20 15 10 5 0 |
| 2772 | // ------------------------------------------------------ |
| 2773 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110011 | |
| 2774 | // ------------------------------------------------------ |
| 2775 | // |
| 2776 | // Moves element from vector to destination vector |
| 2777 | |
| 2778 | VREG_S(VDREG, VS1REG & 7) = VREG_S(VS2REG, EL & 7); |
| 2779 | for (i = 0; i < 8; i++) |
| 2780 | { |
| 2781 | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2782 | } |
| 2783 | break; |
| 2784 | } |
| 2785 | |
| 2786 | case 0x34: /* VRSQ */ |
| 2787 | { |
| 2788 | // 31 25 24 20 15 10 5 0 |
| 2789 | // ------------------------------------------------------ |
| 2790 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110100 | |
| 2791 | // ------------------------------------------------------ |
| 2792 | // |
| 2793 | // Calculates reciprocal square-root |
| 2794 | |
| 2795 | INT32 shifter = 0; |
| 2796 | |
| 2797 | INT32 rec = (INT16)(VREG_S(VS2REG, EL & 7)); |
| 2798 | INT32 datainput = (rec < 0) ? (-rec) : rec; |
| 2799 | if (datainput) |
| 2800 | { |
| 2801 | for (i = 0; i < 32; i++) |
| 2802 | { |
| 2803 | if (datainput & (1 << ((~i) & 0x1f)))//?.?.??? 31 - i |
| 2804 | { |
| 2805 | shifter = i; |
| 2806 | break; |
| 2807 | } |
| 2808 | } |
| 2809 | } |
| 2810 | else |
| 2811 | { |
| 2812 | shifter = 0x10; |
| 2813 | } |
| 2814 | |
| 2815 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2816 | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 2817 | |
| 2818 | INT32 fetchval = rsp_divtable[address]; |
| 2819 | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 2820 | if (rec < 0) |
| 2821 | { |
| 2822 | temp = ~temp; |
| 2823 | } |
| 2824 | if (!rec) |
| 2825 | { |
| 2826 | temp = 0x7fffffff; |
| 2827 | } |
| 2828 | else if (rec == 0xffff8000) |
| 2829 | { |
| 2830 | temp = 0xffff0000; |
| 2831 | } |
| 2832 | rec = temp; |
| 2833 | |
| 2834 | m_reciprocal_res = rec; |
| 2835 | m_dp_allowed = 0; |
| 2836 | |
| 2837 | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 2838 | |
| 2839 | for (i = 0; i < 8; i++) |
| 2840 | { |
| 2841 | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2842 | } |
| 2843 | |
| 2844 | break; |
| 2845 | } |
| 2846 | |
| 2847 | case 0x35: /* VRSQL */ |
| 2848 | { |
| 2849 | // 31 25 24 20 15 10 5 0 |
| 2850 | // ------------------------------------------------------ |
| 2851 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110101 | |
| 2852 | // ------------------------------------------------------ |
| 2853 | // |
| 2854 | // Calculates reciprocal square-root low part |
| 2855 | |
| 2856 | INT32 shifter = 0; |
| 2857 | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 2858 | INT32 datainput = rec; |
| 2859 | |
| 2860 | if (m_dp_allowed) |
| 2861 | { |
| 2862 | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 2863 | datainput = rec; |
| 2864 | |
| 2865 | if (rec < 0) |
| 2866 | { |
| 2867 | if (rec < -32768) |
| 2868 | { |
| 2869 | datainput = ~datainput; |
| 2870 | } |
| 2871 | else |
| 2872 | { |
| 2873 | datainput = -datainput; |
| 2874 | } |
| 2875 | } |
| 2876 | } |
| 2877 | else if (datainput < 0) |
| 2878 | { |
| 2879 | datainput = -datainput; |
| 2880 | |
| 2881 | shifter = 0x10; |
| 2882 | } |
| 2883 | |
| 2884 | if (datainput) |
| 2885 | { |
| 2886 | for (i = 0; i < 32; i++) |
| 2887 | { |
| 2888 | if (datainput & (1 << ((~i) & 0x1f))) |
| 2889 | { |
| 2890 | shifter = i; |
| 2891 | break; |
| 2892 | } |
| 2893 | } |
| 2894 | } |
| 2895 | |
| 2896 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2897 | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 2898 | |
| 2899 | INT32 fetchval = rsp_divtable[address]; |
| 2900 | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 2901 | temp ^= rec >> 31; |
| 2902 | |
| 2903 | if (!rec) |
| 2904 | { |
| 2905 | temp = 0x7fffffff; |
| 2906 | } |
| 2907 | else if (rec == 0xffff8000) |
| 2908 | { |
| 2909 | temp = 0xffff0000; |
| 2910 | } |
| 2911 | rec = temp; |
| 2912 | |
| 2913 | m_reciprocal_res = rec; |
| 2914 | m_dp_allowed = 0; |
| 2915 | |
| 2916 | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 2917 | |
| 2918 | for (i = 0; i < 8; i++) |
| 2919 | { |
| 2920 | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2921 | } |
| 2922 | |
| 2923 | break; |
| 2924 | } |
| 2925 | |
| 2926 | case 0x36: /* VRSQH */ |
| 2927 | { |
| 2928 | // 31 25 24 20 15 10 5 0 |
| 2929 | // ------------------------------------------------------ |
| 2930 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110110 | |
| 2931 | // ------------------------------------------------------ |
| 2932 | // |
| 2933 | // Calculates reciprocal square-root high part |
| 2934 | |
| 2935 | m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16; |
| 2936 | m_dp_allowed = 1; |
| 2937 | |
| 2938 | for (i=0; i < 8; i++) |
| 2939 | { |
| 2940 | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2941 | } |
| 2942 | |
| 2943 | VREG_S(VDREG, VS1REG & 7) = (INT16)(m_reciprocal_res >> 16); // store high part |
| 2944 | break; |
| 2945 | } |
| 2946 | |
| 2947 | case 0x37: /* VNOP */ |
| 2948 | { |
| 2949 | // 31 25 24 20 15 10 5 0 |
| 2950 | // ------------------------------------------------------ |
| 2951 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110111 | |
| 2952 | // ------------------------------------------------------ |
| 2953 | // |
| 2954 | // Vector null instruction |
| 2955 | |
| 2956 | break; |
| 2957 | } |
| 2958 | |
| 2959 | default: unimplemented_opcode(op); break; |
| 2960 | } |
| 2961 | } |
| 2962 | |
| 571 | 2963 | void rsp_device::execute_run() |
| 572 | 2964 | { |
| 573 | 2965 | if (m_isdrc) |
| r241959 | r241960 | |
| 617 | 3009 | { |
| 618 | 3010 | m_sp_set_status_func(0, 0x3, 0xffffffff); |
| 619 | 3011 | m_rsp_state->icount = MIN(m_rsp_state->icount, 1); |
| 3012 | |
| 3013 | if (LOG_INSTRUCTION_EXECUTION) fprintf(m_exec_output, "\n---------- break ----------\n\n"); |
| 3014 | |
| 620 | 3015 | break; |
| 621 | 3016 | } |
| 622 | 3017 | case 0x20: /* ADD */ if (RDREG) RDVAL = (INT32)(RSVAL + RTVAL); break; |
| r241959 | r241960 | |
| 675 | 3070 | |
| 676 | 3071 | case 0x12: /* COP2 */ |
| 677 | 3072 | { |
| 678 | | m_cop2->handle_cop2(op); |
| 3073 | switch ((op >> 21) & 0x1f) |
| 3074 | { |
| 3075 | case 0x00: /* MFC2 */ |
| 3076 | { |
| 3077 | // 31 25 20 15 10 6 0 |
| 3078 | // --------------------------------------------------- |
| 3079 | // | 010010 | 00000 | TTTTT | DDDDD | IIII | 0000000 | |
| 3080 | // --------------------------------------------------- |
| 3081 | // |
| 3082 | |
| 3083 | int el = (op >> 7) & 0xf; |
| 3084 | UINT16 b1 = VREG_B(RDREG, (el+0) & 0xf); |
| 3085 | UINT16 b2 = VREG_B(RDREG, (el+1) & 0xf); |
| 3086 | if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2)); |
| 3087 | break; |
| 3088 | } |
| 3089 | case 0x02: /* CFC2 */ |
| 3090 | { |
| 3091 | // 31 25 20 15 10 0 |
| 3092 | // ------------------------------------------------ |
| 3093 | // | 010010 | 00010 | TTTTT | DDDDD | 00000000000 | |
| 3094 | // ------------------------------------------------ |
| 3095 | // |
| 3096 | |
| 3097 | if (RTREG) |
| 3098 | { |
| 3099 | switch(RDREG) |
| 3100 | { |
| 3101 | case 0: |
| 3102 | RTVAL = ((CARRY_FLAG(0) & 1) << 0) | |
| 3103 | ((CARRY_FLAG(1) & 1) << 1) | |
| 3104 | ((CARRY_FLAG(2) & 1) << 2) | |
| 3105 | ((CARRY_FLAG(3) & 1) << 3) | |
| 3106 | ((CARRY_FLAG(4) & 1) << 4) | |
| 3107 | ((CARRY_FLAG(5) & 1) << 5) | |
| 3108 | ((CARRY_FLAG(6) & 1) << 6) | |
| 3109 | ((CARRY_FLAG(7) & 1) << 7) | |
| 3110 | ((ZERO_FLAG(0) & 1) << 8) | |
| 3111 | ((ZERO_FLAG(1) & 1) << 9) | |
| 3112 | ((ZERO_FLAG(2) & 1) << 10) | |
| 3113 | ((ZERO_FLAG(3) & 1) << 11) | |
| 3114 | ((ZERO_FLAG(4) & 1) << 12) | |
| 3115 | ((ZERO_FLAG(5) & 1) << 13) | |
| 3116 | ((ZERO_FLAG(6) & 1) << 14) | |
| 3117 | ((ZERO_FLAG(7) & 1) << 15); |
| 3118 | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 3119 | break; |
| 3120 | case 1: |
| 3121 | RTVAL = ((COMPARE_FLAG(0) & 1) << 0) | |
| 3122 | ((COMPARE_FLAG(1) & 1) << 1) | |
| 3123 | ((COMPARE_FLAG(2) & 1) << 2) | |
| 3124 | ((COMPARE_FLAG(3) & 1) << 3) | |
| 3125 | ((COMPARE_FLAG(4) & 1) << 4) | |
| 3126 | ((COMPARE_FLAG(5) & 1) << 5) | |
| 3127 | ((COMPARE_FLAG(6) & 1) << 6) | |
| 3128 | ((COMPARE_FLAG(7) & 1) << 7) | |
| 3129 | ((CLIP2_FLAG(0) & 1) << 8) | |
| 3130 | ((CLIP2_FLAG(1) & 1) << 9) | |
| 3131 | ((CLIP2_FLAG(2) & 1) << 10) | |
| 3132 | ((CLIP2_FLAG(3) & 1) << 11) | |
| 3133 | ((CLIP2_FLAG(4) & 1) << 12) | |
| 3134 | ((CLIP2_FLAG(5) & 1) << 13) | |
| 3135 | ((CLIP2_FLAG(6) & 1) << 14) | |
| 3136 | ((CLIP2_FLAG(7) & 1) << 15); |
| 3137 | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 3138 | break; |
| 3139 | case 2: |
| 3140 | // Anciliary clipping flags |
| 3141 | RTVAL = ((CLIP1_FLAG(0) & 1) << 0) | |
| 3142 | ((CLIP1_FLAG(1) & 1) << 1) | |
| 3143 | ((CLIP1_FLAG(2) & 1) << 2) | |
| 3144 | ((CLIP1_FLAG(3) & 1) << 3) | |
| 3145 | ((CLIP1_FLAG(4) & 1) << 4) | |
| 3146 | ((CLIP1_FLAG(5) & 1) << 5) | |
| 3147 | ((CLIP1_FLAG(6) & 1) << 6) | |
| 3148 | ((CLIP1_FLAG(7) & 1) << 7); |
| 3149 | } |
| 3150 | } |
| 3151 | break; |
| 3152 | } |
| 3153 | case 0x04: /* MTC2 */ |
| 3154 | { |
| 3155 | // 31 25 20 15 10 6 0 |
| 3156 | // --------------------------------------------------- |
| 3157 | // | 010010 | 00100 | TTTTT | DDDDD | IIII | 0000000 | |
| 3158 | // --------------------------------------------------- |
| 3159 | // |
| 3160 | |
| 3161 | int el = (op >> 7) & 0xf; |
| 3162 | W_VREG_B(RDREG, (el+0) & 0xf, (RTVAL >> 8) & 0xff); |
| 3163 | W_VREG_B(RDREG, (el+1) & 0xf, (RTVAL >> 0) & 0xff); |
| 3164 | break; |
| 3165 | } |
| 3166 | case 0x06: /* CTC2 */ |
| 3167 | { |
| 3168 | // 31 25 20 15 10 0 |
| 3169 | // ------------------------------------------------ |
| 3170 | // | 010010 | 00110 | TTTTT | DDDDD | 00000000000 | |
| 3171 | // ------------------------------------------------ |
| 3172 | // |
| 3173 | |
| 3174 | switch(RDREG) |
| 3175 | { |
| 3176 | case 0: |
| 3177 | CLEAR_CARRY_FLAGS(); |
| 3178 | CLEAR_ZERO_FLAGS(); |
| 3179 | if (RTVAL & (1 << 0)) { SET_CARRY_FLAG(0); } |
| 3180 | if (RTVAL & (1 << 1)) { SET_CARRY_FLAG(1); } |
| 3181 | if (RTVAL & (1 << 2)) { SET_CARRY_FLAG(2); } |
| 3182 | if (RTVAL & (1 << 3)) { SET_CARRY_FLAG(3); } |
| 3183 | if (RTVAL & (1 << 4)) { SET_CARRY_FLAG(4); } |
| 3184 | if (RTVAL & (1 << 5)) { SET_CARRY_FLAG(5); } |
| 3185 | if (RTVAL & (1 << 6)) { SET_CARRY_FLAG(6); } |
| 3186 | if (RTVAL & (1 << 7)) { SET_CARRY_FLAG(7); } |
| 3187 | if (RTVAL & (1 << 8)) { SET_ZERO_FLAG(0); } |
| 3188 | if (RTVAL & (1 << 9)) { SET_ZERO_FLAG(1); } |
| 3189 | if (RTVAL & (1 << 10)) { SET_ZERO_FLAG(2); } |
| 3190 | if (RTVAL & (1 << 11)) { SET_ZERO_FLAG(3); } |
| 3191 | if (RTVAL & (1 << 12)) { SET_ZERO_FLAG(4); } |
| 3192 | if (RTVAL & (1 << 13)) { SET_ZERO_FLAG(5); } |
| 3193 | if (RTVAL & (1 << 14)) { SET_ZERO_FLAG(6); } |
| 3194 | if (RTVAL & (1 << 15)) { SET_ZERO_FLAG(7); } |
| 3195 | break; |
| 3196 | case 1: |
| 3197 | CLEAR_COMPARE_FLAGS(); |
| 3198 | CLEAR_CLIP2_FLAGS(); |
| 3199 | if (RTVAL & (1 << 0)) { SET_COMPARE_FLAG(0); } |
| 3200 | if (RTVAL & (1 << 1)) { SET_COMPARE_FLAG(1); } |
| 3201 | if (RTVAL & (1 << 2)) { SET_COMPARE_FLAG(2); } |
| 3202 | if (RTVAL & (1 << 3)) { SET_COMPARE_FLAG(3); } |
| 3203 | if (RTVAL & (1 << 4)) { SET_COMPARE_FLAG(4); } |
| 3204 | if (RTVAL & (1 << 5)) { SET_COMPARE_FLAG(5); } |
| 3205 | if (RTVAL & (1 << 6)) { SET_COMPARE_FLAG(6); } |
| 3206 | if (RTVAL & (1 << 7)) { SET_COMPARE_FLAG(7); } |
| 3207 | if (RTVAL & (1 << 8)) { SET_CLIP2_FLAG(0); } |
| 3208 | if (RTVAL & (1 << 9)) { SET_CLIP2_FLAG(1); } |
| 3209 | if (RTVAL & (1 << 10)) { SET_CLIP2_FLAG(2); } |
| 3210 | if (RTVAL & (1 << 11)) { SET_CLIP2_FLAG(3); } |
| 3211 | if (RTVAL & (1 << 12)) { SET_CLIP2_FLAG(4); } |
| 3212 | if (RTVAL & (1 << 13)) { SET_CLIP2_FLAG(5); } |
| 3213 | if (RTVAL & (1 << 14)) { SET_CLIP2_FLAG(6); } |
| 3214 | if (RTVAL & (1 << 15)) { SET_CLIP2_FLAG(7); } |
| 3215 | break; |
| 3216 | case 2: |
| 3217 | CLEAR_CLIP1_FLAGS(); |
| 3218 | if (RTVAL & (1 << 0)) { SET_CLIP1_FLAG(0); } |
| 3219 | if (RTVAL & (1 << 1)) { SET_CLIP1_FLAG(1); } |
| 3220 | if (RTVAL & (1 << 2)) { SET_CLIP1_FLAG(2); } |
| 3221 | if (RTVAL & (1 << 3)) { SET_CLIP1_FLAG(3); } |
| 3222 | if (RTVAL & (1 << 4)) { SET_CLIP1_FLAG(4); } |
| 3223 | if (RTVAL & (1 << 5)) { SET_CLIP1_FLAG(5); } |
| 3224 | if (RTVAL & (1 << 6)) { SET_CLIP1_FLAG(6); } |
| 3225 | if (RTVAL & (1 << 7)) { SET_CLIP1_FLAG(7); } |
| 3226 | break; |
| 3227 | } |
| 3228 | break; |
| 3229 | } |
| 3230 | |
| 3231 | case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: |
| 3232 | case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: |
| 3233 | { |
| 3234 | handle_vector_ops(op); |
| 3235 | break; |
| 3236 | } |
| 3237 | |
| 3238 | default: unimplemented_opcode(op); break; |
| 3239 | } |
| 679 | 3240 | break; |
| 680 | 3241 | } |
| 681 | 3242 | |
| r241959 | r241960 | |
| 687 | 3248 | case 0x28: /* SB */ WRITE8(RSVAL + SIMM16, RTVAL); break; |
| 688 | 3249 | case 0x29: /* SH */ WRITE16(RSVAL + SIMM16, RTVAL); break; |
| 689 | 3250 | case 0x2b: /* SW */ WRITE32(RSVAL + SIMM16, RTVAL); break; |
| 690 | | case 0x32: /* LWC2 */ m_cop2->handle_lwc2(op); break; |
| 691 | | case 0x3a: /* SWC2 */ m_cop2->handle_swc2(op); break; |
| 3251 | case 0x32: /* LWC2 */ handle_lwc2(op); break; |
| 3252 | case 0x3a: /* SWC2 */ handle_swc2(op); break; |
| 692 | 3253 | |
| 693 | 3254 | default: |
| 694 | 3255 | { |
| r241959 | r241960 | |
| 701 | 3262 | { |
| 702 | 3263 | int i, l; |
| 703 | 3264 | static UINT32 prev_regs[32]; |
| 3265 | static VECTOR_REG prev_vecs[32]; |
| 704 | 3266 | char string[200]; |
| 705 | 3267 | rsp_dasm_one(string, m_ppc, op); |
| 706 | 3268 | |
| r241959 | r241960 | |
| 726 | 3288 | prev_regs[i] = m_rsp_state->r[i]; |
| 727 | 3289 | } |
| 728 | 3290 | |
| 729 | | m_cop2->log_instruction_execution(); |
| 3291 | for (i=0; i < 32; i++) |
| 3292 | { |
| 3293 | if (m_v[i].d[0] != prev_vecs[i].d[0] || m_v[i].d[1] != prev_vecs[i].d[1]) |
| 3294 | { |
| 3295 | fprintf(m_exec_output, "V%d: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X ", i, |
| 3296 | (UINT16)VREG_S(i,0), (UINT16)VREG_S(i,1), (UINT16)VREG_S(i,2), (UINT16)VREG_S(i,3), (UINT16)VREG_S(i,4), (UINT16)VREG_S(i,5), (UINT16)VREG_S(i,6), (UINT16)VREG_S(i,7)); |
| 3297 | } |
| 3298 | prev_vecs[i].d[0] = m_v[i].d[0]; |
| 3299 | prev_vecs[i].d[1] = m_v[i].d[1]; |
| 3300 | } |
| 730 | 3301 | |
| 731 | 3302 | fprintf(m_exec_output, "\n"); |
| 732 | 3303 | |
trunk/src/emu/cpu/rsp/rspcp2.c
| r241959 | r241960 | |
| 1 | | /*************************************************************************** |
| 2 | | |
| 3 | | rspcp2.c |
| 4 | | |
| 5 | | Universal machine language-based Nintendo/SGI RSP COP2 emulator. |
| 6 | | Written by Harmony of the MESS team. |
| 7 | | |
| 8 | | Copyright the MESS team. |
| 9 | | Released for general non-commercial use under the MAME license |
| 10 | | Visit http://mamedev.org for licensing and usage restrictions. |
| 11 | | |
| 12 | | ***************************************************************************/ |
| 13 | | |
| 14 | | #include "emu.h" |
| 15 | | #include "rsp.h" |
| 16 | | #include "rspdiv.h" |
| 17 | | #include "rspcp2.h" |
| 18 | | #include "cpu/drcfe.h" |
| 19 | | #include "cpu/drcuml.h" |
| 20 | | #include "cpu/drcumlsh.h" |
| 21 | | |
| 22 | | using namespace uml; |
| 23 | | |
| 24 | | extern offs_t rsp_dasm_one(char *buffer, offs_t pc, UINT32 op); |
| 25 | | |
| 26 | | /*************************************************************************** |
| 27 | | Helpful Defines |
| 28 | | ***************************************************************************/ |
| 29 | | |
| 30 | | #define VDREG ((op >> 6) & 0x1f) |
| 31 | | #define VS1REG ((op >> 11) & 0x1f) |
| 32 | | #define VS2REG ((op >> 16) & 0x1f) |
| 33 | | #define EL ((op >> 21) & 0xf) |
| 34 | | |
| 35 | | #define RSVAL (m_rsp.m_rsp_state->r[RSREG]) |
| 36 | | #define RTVAL (m_rsp.m_rsp_state->r[RTREG]) |
| 37 | | #define RDVAL (m_rsp.m_rsp_state->r[RDREG]) |
| 38 | | |
| 39 | | #define VREG_B(reg, offset) m_v[(reg)].b[(offset)^1] |
| 40 | | #define VREG_S(reg, offset) m_v[(reg)].s[(offset)] |
| 41 | | #define VREG_L(reg, offset) m_v[(reg)].l[(offset)] |
| 42 | | |
| 43 | | #define R_VREG_B(reg, offset) m_v[(reg)].b[(offset)^1] |
| 44 | | #define R_VREG_S(reg, offset) (INT16)m_v[(reg)].s[(offset)] |
| 45 | | #define R_VREG_L(reg, offset) m_v[(reg)].l[(offset)] |
| 46 | | |
| 47 | | #define W_VREG_B(reg, offset, val) (m_v[(reg)].b[(offset)^1] = val) |
| 48 | | #define W_VREG_S(reg, offset, val) (m_v[(reg)].s[(offset)] = val) |
| 49 | | #define W_VREG_L(reg, offset, val) (m_v[(reg)].l[(offset)] = val) |
| 50 | | |
| 51 | | #define VEC_EL_2(x,z) (vector_elements_2[(x)][(z)]) |
| 52 | | |
| 53 | | #define CARRY 0 |
| 54 | | #define COMPARE 1 |
| 55 | | #define CLIP1 2 |
| 56 | | #define ZERO 3 |
| 57 | | #define CLIP2 4 |
| 58 | | |
| 59 | | #define ACCUM(x) m_accum[x].q |
| 60 | | #define ACCUM_H(x) (UINT16)m_accum[x].w[3] |
| 61 | | #define ACCUM_M(x) (UINT16)m_accum[x].w[2] |
| 62 | | #define ACCUM_L(x) (UINT16)m_accum[x].w[1] |
| 63 | | #define ACCUM_LL(x) (UINT16)m_accum[x].w[0] |
| 64 | | |
| 65 | | #define SET_ACCUM_H(v, x) m_accum[x].w[3] = v; |
| 66 | | #define SET_ACCUM_M(v, x) m_accum[x].w[2] = v; |
| 67 | | #define SET_ACCUM_L(v, x) m_accum[x].w[1] = v; |
| 68 | | #define SET_ACCUM_LL(v, x) m_accum[x].w[0] = v; |
| 69 | | |
| 70 | | #define CARRY_FLAG(x) (m_vflag[CARRY][x & 7] != 0 ? 0xffff : 0) |
| 71 | | #define COMPARE_FLAG(x) (m_vflag[COMPARE][x & 7] != 0 ? 0xffff : 0) |
| 72 | | #define CLIP1_FLAG(x) (m_vflag[CLIP1][x & 7] != 0 ? 0xffff : 0) |
| 73 | | #define ZERO_FLAG(x) (m_vflag[ZERO][x & 7] != 0 ? 0xffff : 0) |
| 74 | | #define CLIP2_FLAG(x) (m_vflag[CLIP2][x & 7] != 0 ? 0xffff : 0) |
| 75 | | |
| 76 | | #define CLEAR_CARRY_FLAGS() { memset(m_vflag[CARRY], 0, 16); } |
| 77 | | #define CLEAR_COMPARE_FLAGS() { memset(m_vflag[COMPARE], 0, 16); } |
| 78 | | #define CLEAR_CLIP1_FLAGS() { memset(m_vflag[CLIP1], 0, 16); } |
| 79 | | #define CLEAR_ZERO_FLAGS() { memset(m_vflag[ZERO], 0, 16); } |
| 80 | | #define CLEAR_CLIP2_FLAGS() { memset(m_vflag[CLIP2], 0, 16); } |
| 81 | | |
| 82 | | #define SET_CARRY_FLAG(x) { m_vflag[CARRY][x & 7] = 0xffff; } |
| 83 | | #define SET_COMPARE_FLAG(x) { m_vflag[COMPARE][x & 7] = 0xffff; } |
| 84 | | #define SET_CLIP1_FLAG(x) { m_vflag[CLIP1][x & 7] = 0xffff; } |
| 85 | | #define SET_ZERO_FLAG(x) { m_vflag[ZERO][x & 7] = 0xffff; } |
| 86 | | #define SET_CLIP2_FLAG(x) { m_vflag[CLIP2][x & 7] = 0xffff; } |
| 87 | | |
| 88 | | #define CLEAR_CARRY_FLAG(x) { m_vflag[CARRY][x & 7] = 0; } |
| 89 | | #define CLEAR_COMPARE_FLAG(x) { m_vflag[COMPARE][x & 7] = 0; } |
| 90 | | #define CLEAR_CLIP1_FLAG(x) { m_vflag[CLIP1][x & 7] = 0; } |
| 91 | | #define CLEAR_ZERO_FLAG(x) { m_vflag[ZERO][x & 7] = 0; } |
| 92 | | #define CLEAR_CLIP2_FLAG(x) { m_vflag[CLIP2][x & 7] = 0; } |
| 93 | | |
| 94 | | #define WRITEBACK_RESULT() { \ |
| 95 | | VREG_S(VDREG, 0) = m_vres[0]; \ |
| 96 | | VREG_S(VDREG, 1) = m_vres[1]; \ |
| 97 | | VREG_S(VDREG, 2) = m_vres[2]; \ |
| 98 | | VREG_S(VDREG, 3) = m_vres[3]; \ |
| 99 | | VREG_S(VDREG, 4) = m_vres[4]; \ |
| 100 | | VREG_S(VDREG, 5) = m_vres[5]; \ |
| 101 | | VREG_S(VDREG, 6) = m_vres[6]; \ |
| 102 | | VREG_S(VDREG, 7) = m_vres[7]; \ |
| 103 | | } |
| 104 | | |
| 105 | | static const int vector_elements_2[16][8] = |
| 106 | | { |
| 107 | | { 0, 1, 2, 3, 4, 5, 6, 7 }, // none |
| 108 | | { 0, 1, 2, 3, 4, 5, 6, 7 }, // ??? |
| 109 | | { 0, 0, 2, 2, 4, 4, 6, 6 }, // 0q |
| 110 | | { 1, 1, 3, 3, 5, 5, 7, 7 }, // 1q |
| 111 | | { 0, 0, 0, 0, 4, 4, 4, 4 }, // 0h |
| 112 | | { 1, 1, 1, 1, 5, 5, 5, 5 }, // 1h |
| 113 | | { 2, 2, 2, 2, 6, 6, 6, 6 }, // 2h |
| 114 | | { 3, 3, 3, 3, 7, 7, 7, 7 }, // 3h |
| 115 | | { 0, 0, 0, 0, 0, 0, 0, 0 }, // 0 |
| 116 | | { 1, 1, 1, 1, 1, 1, 1, 1 }, // 1 |
| 117 | | { 2, 2, 2, 2, 2, 2, 2, 2 }, // 2 |
| 118 | | { 3, 3, 3, 3, 3, 3, 3, 3 }, // 3 |
| 119 | | { 4, 4, 4, 4, 4, 4, 4, 4 }, // 4 |
| 120 | | { 5, 5, 5, 5, 5, 5, 5, 5 }, // 5 |
| 121 | | { 6, 6, 6, 6, 6, 6, 6, 6 }, // 6 |
| 122 | | { 7, 7, 7, 7, 7, 7, 7, 7 }, // 7 |
| 123 | | }; |
| 124 | | |
| 125 | | rsp_cop2::rsp_cop2(rsp_device &rsp, running_machine &machine) |
| 126 | | : m_rsp(rsp) |
| 127 | | , m_machine(machine) |
| 128 | | , m_reciprocal_res(0) |
| 129 | | , m_reciprocal_high(0) |
| 130 | | , m_dp_allowed(0) |
| 131 | | { |
| 132 | | memset(m_vres, 0, sizeof(m_vres)); |
| 133 | | memset(m_v, 0, sizeof(m_v)); |
| 134 | | memset(m_vflag, 0, sizeof(m_vflag)); |
| 135 | | memset(m_accum, 0, sizeof(m_accum)); |
| 136 | | } |
| 137 | | |
| 138 | | void rsp_cop2::init() |
| 139 | | { |
| 140 | | CLEAR_CARRY_FLAGS(); |
| 141 | | CLEAR_COMPARE_FLAGS(); |
| 142 | | CLEAR_CLIP1_FLAGS(); |
| 143 | | CLEAR_ZERO_FLAGS(); |
| 144 | | CLEAR_CLIP2_FLAGS(); |
| 145 | | } |
| 146 | | |
| 147 | | void rsp_cop2::start() |
| 148 | | { |
| 149 | | for(int regIdx = 0; regIdx < 32; regIdx++ ) |
| 150 | | { |
| 151 | | m_v[regIdx].d[0] = 0; |
| 152 | | m_v[regIdx].d[1] = 0; |
| 153 | | } |
| 154 | | |
| 155 | | CLEAR_CARRY_FLAGS(); |
| 156 | | CLEAR_COMPARE_FLAGS(); |
| 157 | | CLEAR_CLIP1_FLAGS(); |
| 158 | | CLEAR_ZERO_FLAGS(); |
| 159 | | CLEAR_CLIP2_FLAGS(); |
| 160 | | m_reciprocal_res = 0; |
| 161 | | m_reciprocal_high = 0; |
| 162 | | |
| 163 | | // Accumulators do not power on to a random state |
| 164 | | for(int accumIdx = 0; accumIdx < 8; accumIdx++ ) |
| 165 | | { |
| 166 | | m_accum[accumIdx].q = 0; |
| 167 | | } |
| 168 | | } |
| 169 | | |
| 170 | | void rsp_cop2::state_string_export(const int index, astring &string) |
| 171 | | { |
| 172 | | switch (index) |
| 173 | | { |
| 174 | | case RSP_V0: |
| 175 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 0, 0), (UINT16)VREG_S( 0, 1), (UINT16)VREG_S( 0, 2), (UINT16)VREG_S( 0, 3), (UINT16)VREG_S( 0, 4), (UINT16)VREG_S( 0, 5), (UINT16)VREG_S( 0, 6), (UINT16)VREG_S( 0, 7)); |
| 176 | | break; |
| 177 | | case RSP_V1: |
| 178 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 1, 0), (UINT16)VREG_S( 1, 1), (UINT16)VREG_S( 1, 2), (UINT16)VREG_S( 1, 3), (UINT16)VREG_S( 1, 4), (UINT16)VREG_S( 1, 5), (UINT16)VREG_S( 1, 6), (UINT16)VREG_S( 1, 7)); |
| 179 | | break; |
| 180 | | case RSP_V2: |
| 181 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 2, 0), (UINT16)VREG_S( 2, 1), (UINT16)VREG_S( 2, 2), (UINT16)VREG_S( 2, 3), (UINT16)VREG_S( 2, 4), (UINT16)VREG_S( 2, 5), (UINT16)VREG_S( 2, 6), (UINT16)VREG_S( 2, 7)); |
| 182 | | break; |
| 183 | | case RSP_V3: |
| 184 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 3, 0), (UINT16)VREG_S( 3, 1), (UINT16)VREG_S( 3, 2), (UINT16)VREG_S( 3, 3), (UINT16)VREG_S( 3, 4), (UINT16)VREG_S( 3, 5), (UINT16)VREG_S( 3, 6), (UINT16)VREG_S( 3, 7)); |
| 185 | | break; |
| 186 | | case RSP_V4: |
| 187 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 4, 0), (UINT16)VREG_S( 4, 1), (UINT16)VREG_S( 4, 2), (UINT16)VREG_S( 4, 3), (UINT16)VREG_S( 4, 4), (UINT16)VREG_S( 4, 5), (UINT16)VREG_S( 4, 6), (UINT16)VREG_S( 4, 7)); |
| 188 | | break; |
| 189 | | case RSP_V5: |
| 190 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 5, 0), (UINT16)VREG_S( 5, 1), (UINT16)VREG_S( 5, 2), (UINT16)VREG_S( 5, 3), (UINT16)VREG_S( 5, 4), (UINT16)VREG_S( 5, 5), (UINT16)VREG_S( 5, 6), (UINT16)VREG_S( 5, 7)); |
| 191 | | break; |
| 192 | | case RSP_V6: |
| 193 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 6, 0), (UINT16)VREG_S( 6, 1), (UINT16)VREG_S( 6, 2), (UINT16)VREG_S( 6, 3), (UINT16)VREG_S( 6, 4), (UINT16)VREG_S( 6, 5), (UINT16)VREG_S( 6, 6), (UINT16)VREG_S( 6, 7)); |
| 194 | | break; |
| 195 | | case RSP_V7: |
| 196 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 7, 0), (UINT16)VREG_S( 7, 1), (UINT16)VREG_S( 7, 2), (UINT16)VREG_S( 7, 3), (UINT16)VREG_S( 7, 4), (UINT16)VREG_S( 7, 5), (UINT16)VREG_S( 7, 6), (UINT16)VREG_S( 7, 7)); |
| 197 | | break; |
| 198 | | case RSP_V8: |
| 199 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 8, 0), (UINT16)VREG_S( 8, 1), (UINT16)VREG_S( 8, 2), (UINT16)VREG_S( 8, 3), (UINT16)VREG_S( 8, 4), (UINT16)VREG_S( 8, 5), (UINT16)VREG_S( 8, 6), (UINT16)VREG_S( 8, 7)); |
| 200 | | break; |
| 201 | | case RSP_V9: |
| 202 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 9, 0), (UINT16)VREG_S( 9, 1), (UINT16)VREG_S( 9, 2), (UINT16)VREG_S( 9, 3), (UINT16)VREG_S( 9, 4), (UINT16)VREG_S( 9, 5), (UINT16)VREG_S( 9, 6), (UINT16)VREG_S( 9, 7)); |
| 203 | | break; |
| 204 | | case RSP_V10: |
| 205 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(10, 0), (UINT16)VREG_S(10, 1), (UINT16)VREG_S(10, 2), (UINT16)VREG_S(10, 3), (UINT16)VREG_S(10, 4), (UINT16)VREG_S(10, 5), (UINT16)VREG_S(10, 6), (UINT16)VREG_S(10, 7)); |
| 206 | | break; |
| 207 | | case RSP_V11: |
| 208 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(11, 0), (UINT16)VREG_S(11, 1), (UINT16)VREG_S(11, 2), (UINT16)VREG_S(11, 3), (UINT16)VREG_S(11, 4), (UINT16)VREG_S(11, 5), (UINT16)VREG_S(11, 6), (UINT16)VREG_S(11, 7)); |
| 209 | | break; |
| 210 | | case RSP_V12: |
| 211 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(12, 0), (UINT16)VREG_S(12, 1), (UINT16)VREG_S(12, 2), (UINT16)VREG_S(12, 3), (UINT16)VREG_S(12, 4), (UINT16)VREG_S(12, 5), (UINT16)VREG_S(12, 6), (UINT16)VREG_S(12, 7)); |
| 212 | | break; |
| 213 | | case RSP_V13: |
| 214 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(13, 0), (UINT16)VREG_S(13, 1), (UINT16)VREG_S(13, 2), (UINT16)VREG_S(13, 3), (UINT16)VREG_S(13, 4), (UINT16)VREG_S(13, 5), (UINT16)VREG_S(13, 6), (UINT16)VREG_S(13, 7)); |
| 215 | | break; |
| 216 | | case RSP_V14: |
| 217 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(14, 0), (UINT16)VREG_S(14, 1), (UINT16)VREG_S(14, 2), (UINT16)VREG_S(14, 3), (UINT16)VREG_S(14, 4), (UINT16)VREG_S(14, 5), (UINT16)VREG_S(14, 6), (UINT16)VREG_S(14, 7)); |
| 218 | | break; |
| 219 | | case RSP_V15: |
| 220 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(15, 0), (UINT16)VREG_S(15, 1), (UINT16)VREG_S(15, 2), (UINT16)VREG_S(15, 3), (UINT16)VREG_S(15, 4), (UINT16)VREG_S(15, 5), (UINT16)VREG_S(15, 6), (UINT16)VREG_S(15, 7)); |
| 221 | | break; |
| 222 | | case RSP_V16: |
| 223 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(16, 0), (UINT16)VREG_S(16, 1), (UINT16)VREG_S(16, 2), (UINT16)VREG_S(16, 3), (UINT16)VREG_S(16, 4), (UINT16)VREG_S(16, 5), (UINT16)VREG_S(16, 6), (UINT16)VREG_S(16, 7)); |
| 224 | | break; |
| 225 | | case RSP_V17: |
| 226 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(17, 0), (UINT16)VREG_S(17, 1), (UINT16)VREG_S(17, 2), (UINT16)VREG_S(17, 3), (UINT16)VREG_S(17, 4), (UINT16)VREG_S(17, 5), (UINT16)VREG_S(17, 6), (UINT16)VREG_S(17, 7)); |
| 227 | | break; |
| 228 | | case RSP_V18: |
| 229 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(18, 0), (UINT16)VREG_S(18, 1), (UINT16)VREG_S(18, 2), (UINT16)VREG_S(18, 3), (UINT16)VREG_S(18, 4), (UINT16)VREG_S(18, 5), (UINT16)VREG_S(18, 6), (UINT16)VREG_S(18, 7)); |
| 230 | | break; |
| 231 | | case RSP_V19: |
| 232 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(19, 0), (UINT16)VREG_S(19, 1), (UINT16)VREG_S(19, 2), (UINT16)VREG_S(19, 3), (UINT16)VREG_S(19, 4), (UINT16)VREG_S(19, 5), (UINT16)VREG_S(19, 6), (UINT16)VREG_S(19, 7)); |
| 233 | | break; |
| 234 | | case RSP_V20: |
| 235 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(20, 0), (UINT16)VREG_S(20, 1), (UINT16)VREG_S(20, 2), (UINT16)VREG_S(20, 3), (UINT16)VREG_S(20, 4), (UINT16)VREG_S(20, 5), (UINT16)VREG_S(20, 6), (UINT16)VREG_S(20, 7)); |
| 236 | | break; |
| 237 | | case RSP_V21: |
| 238 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(21, 0), (UINT16)VREG_S(21, 1), (UINT16)VREG_S(21, 2), (UINT16)VREG_S(21, 3), (UINT16)VREG_S(21, 4), (UINT16)VREG_S(21, 5), (UINT16)VREG_S(21, 6), (UINT16)VREG_S(21, 7)); |
| 239 | | break; |
| 240 | | case RSP_V22: |
| 241 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(22, 0), (UINT16)VREG_S(22, 1), (UINT16)VREG_S(22, 2), (UINT16)VREG_S(22, 3), (UINT16)VREG_S(22, 4), (UINT16)VREG_S(22, 5), (UINT16)VREG_S(22, 6), (UINT16)VREG_S(22, 7)); |
| 242 | | break; |
| 243 | | case RSP_V23: |
| 244 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(23, 0), (UINT16)VREG_S(23, 1), (UINT16)VREG_S(23, 2), (UINT16)VREG_S(23, 3), (UINT16)VREG_S(23, 4), (UINT16)VREG_S(23, 5), (UINT16)VREG_S(23, 6), (UINT16)VREG_S(23, 7)); |
| 245 | | break; |
| 246 | | case RSP_V24: |
| 247 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(24, 0), (UINT16)VREG_S(24, 1), (UINT16)VREG_S(24, 2), (UINT16)VREG_S(24, 3), (UINT16)VREG_S(24, 4), (UINT16)VREG_S(24, 5), (UINT16)VREG_S(24, 6), (UINT16)VREG_S(24, 7)); |
| 248 | | break; |
| 249 | | case RSP_V25: |
| 250 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(25, 0), (UINT16)VREG_S(25, 1), (UINT16)VREG_S(25, 2), (UINT16)VREG_S(25, 3), (UINT16)VREG_S(25, 4), (UINT16)VREG_S(25, 5), (UINT16)VREG_S(25, 6), (UINT16)VREG_S(25, 7)); |
| 251 | | break; |
| 252 | | case RSP_V26: |
| 253 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(26, 0), (UINT16)VREG_S(26, 1), (UINT16)VREG_S(26, 2), (UINT16)VREG_S(26, 3), (UINT16)VREG_S(26, 4), (UINT16)VREG_S(26, 5), (UINT16)VREG_S(26, 6), (UINT16)VREG_S(26, 7)); |
| 254 | | break; |
| 255 | | case RSP_V27: |
| 256 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(27, 0), (UINT16)VREG_S(27, 1), (UINT16)VREG_S(27, 2), (UINT16)VREG_S(27, 3), (UINT16)VREG_S(27, 4), (UINT16)VREG_S(27, 5), (UINT16)VREG_S(27, 6), (UINT16)VREG_S(27, 7)); |
| 257 | | break; |
| 258 | | case RSP_V28: |
| 259 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(28, 0), (UINT16)VREG_S(28, 1), (UINT16)VREG_S(28, 2), (UINT16)VREG_S(28, 3), (UINT16)VREG_S(28, 4), (UINT16)VREG_S(28, 5), (UINT16)VREG_S(28, 6), (UINT16)VREG_S(28, 7)); |
| 260 | | break; |
| 261 | | case RSP_V29: |
| 262 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(29, 0), (UINT16)VREG_S(29, 1), (UINT16)VREG_S(29, 2), (UINT16)VREG_S(29, 3), (UINT16)VREG_S(29, 4), (UINT16)VREG_S(29, 5), (UINT16)VREG_S(29, 6), (UINT16)VREG_S(29, 7)); |
| 263 | | break; |
| 264 | | case RSP_V30: |
| 265 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(30, 0), (UINT16)VREG_S(30, 1), (UINT16)VREG_S(30, 2), (UINT16)VREG_S(30, 3), (UINT16)VREG_S(30, 4), (UINT16)VREG_S(30, 5), (UINT16)VREG_S(30, 6), (UINT16)VREG_S(30, 7)); |
| 266 | | break; |
| 267 | | case RSP_V31: |
| 268 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(31, 0), (UINT16)VREG_S(31, 1), (UINT16)VREG_S(31, 2), (UINT16)VREG_S(31, 3), (UINT16)VREG_S(31, 4), (UINT16)VREG_S(31, 5), (UINT16)VREG_S(31, 6), (UINT16)VREG_S(31, 7)); |
| 269 | | break; |
| 270 | | } |
| 271 | | } |
| 272 | | |
| 273 | | /*************************************************************************** |
| 274 | | Vector Load Instructions |
| 275 | | ***************************************************************************/ |
| 276 | | |
| 277 | | void rsp_cop2::handle_lwc2(UINT32 op) |
| 278 | | { |
| 279 | | int i, end; |
| 280 | | UINT32 ea; |
| 281 | | int dest = (op >> 16) & 0x1f; |
| 282 | | int base = (op >> 21) & 0x1f; |
| 283 | | int index = (op >> 7) & 0xf; |
| 284 | | int offset = (op & 0x7f); |
| 285 | | if (offset & 0x40) |
| 286 | | offset |= 0xffffffc0; |
| 287 | | |
| 288 | | switch ((op >> 11) & 0x1f) |
| 289 | | { |
| 290 | | case 0x00: /* LBV */ |
| 291 | | { |
| 292 | | // 31 25 20 15 10 6 0 |
| 293 | | // -------------------------------------------------- |
| 294 | | // | 110010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 295 | | // -------------------------------------------------- |
| 296 | | // |
| 297 | | // Load 1 byte to vector byte index |
| 298 | | |
| 299 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + offset : offset; |
| 300 | | VREG_B(dest, index) = m_rsp.READ8(ea); |
| 301 | | break; |
| 302 | | } |
| 303 | | case 0x01: /* LSV */ |
| 304 | | { |
| 305 | | // 31 25 20 15 10 6 0 |
| 306 | | // -------------------------------------------------- |
| 307 | | // | 110010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 308 | | // -------------------------------------------------- |
| 309 | | // |
| 310 | | // Loads 2 bytes starting from vector byte index |
| 311 | | |
| 312 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 313 | | |
| 314 | | end = index + 2; |
| 315 | | |
| 316 | | for (i=index; i < end; i++) |
| 317 | | { |
| 318 | | VREG_B(dest, i) = m_rsp.READ8(ea); |
| 319 | | ea++; |
| 320 | | } |
| 321 | | break; |
| 322 | | } |
| 323 | | case 0x02: /* LLV */ |
| 324 | | { |
| 325 | | // 31 25 20 15 10 6 0 |
| 326 | | // -------------------------------------------------- |
| 327 | | // | 110010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 328 | | // -------------------------------------------------- |
| 329 | | // |
| 330 | | // Loads 4 bytes starting from vector byte index |
| 331 | | |
| 332 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 333 | | |
| 334 | | end = index + 4; |
| 335 | | |
| 336 | | for (i=index; i < end; i++) |
| 337 | | { |
| 338 | | VREG_B(dest, i) = m_rsp.READ8(ea); |
| 339 | | ea++; |
| 340 | | } |
| 341 | | break; |
| 342 | | } |
| 343 | | case 0x03: /* LDV */ |
| 344 | | { |
| 345 | | // 31 25 20 15 10 6 0 |
| 346 | | // -------------------------------------------------- |
| 347 | | // | 110010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 348 | | // -------------------------------------------------- |
| 349 | | // |
| 350 | | // Loads 8 bytes starting from vector byte index |
| 351 | | |
| 352 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 353 | | |
| 354 | | end = index + 8; |
| 355 | | |
| 356 | | for (i=index; i < end; i++) |
| 357 | | { |
| 358 | | VREG_B(dest, i) = m_rsp.READ8(ea); |
| 359 | | ea++; |
| 360 | | } |
| 361 | | break; |
| 362 | | } |
| 363 | | case 0x04: /* LQV */ |
| 364 | | { |
| 365 | | // 31 25 20 15 10 6 0 |
| 366 | | // -------------------------------------------------- |
| 367 | | // | 110010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 368 | | // -------------------------------------------------- |
| 369 | | // |
| 370 | | // Loads up to 16 bytes starting from vector byte index |
| 371 | | |
| 372 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 373 | | |
| 374 | | end = index + (16 - (ea & 0xf)); |
| 375 | | if (end > 16) end = 16; |
| 376 | | |
| 377 | | for (i=index; i < end; i++) |
| 378 | | { |
| 379 | | VREG_B(dest, i) = m_rsp.READ8(ea); |
| 380 | | ea++; |
| 381 | | } |
| 382 | | break; |
| 383 | | } |
| 384 | | case 0x05: /* LRV */ |
| 385 | | { |
| 386 | | // 31 25 20 15 10 6 0 |
| 387 | | // -------------------------------------------------- |
| 388 | | // | 110010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 389 | | // -------------------------------------------------- |
| 390 | | // |
| 391 | | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 392 | | |
| 393 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 394 | | |
| 395 | | index = 16 - ((ea & 0xf) - index); |
| 396 | | end = 16; |
| 397 | | ea &= ~0xf; |
| 398 | | |
| 399 | | for (i=index; i < end; i++) |
| 400 | | { |
| 401 | | VREG_B(dest, i) = m_rsp.READ8(ea); |
| 402 | | ea++; |
| 403 | | } |
| 404 | | break; |
| 405 | | } |
| 406 | | case 0x06: /* LPV */ |
| 407 | | { |
| 408 | | // 31 25 20 15 10 6 0 |
| 409 | | // -------------------------------------------------- |
| 410 | | // | 110010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 411 | | // -------------------------------------------------- |
| 412 | | // |
| 413 | | // Loads a byte as the upper 8 bits of each element |
| 414 | | |
| 415 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 416 | | |
| 417 | | for (i=0; i < 8; i++) |
| 418 | | { |
| 419 | | VREG_S(dest, i) = m_rsp.READ8(ea + (((16-index) + i) & 0xf)) << 8; |
| 420 | | } |
| 421 | | break; |
| 422 | | } |
| 423 | | case 0x07: /* LUV */ |
| 424 | | { |
| 425 | | // 31 25 20 15 10 6 0 |
| 426 | | // -------------------------------------------------- |
| 427 | | // | 110010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 428 | | // -------------------------------------------------- |
| 429 | | // |
| 430 | | // Loads a byte as the bits 14-7 of each element |
| 431 | | |
| 432 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 433 | | |
| 434 | | for (i=0; i < 8; i++) |
| 435 | | { |
| 436 | | VREG_S(dest, i) = m_rsp.READ8(ea + (((16-index) + i) & 0xf)) << 7; |
| 437 | | } |
| 438 | | break; |
| 439 | | } |
| 440 | | case 0x08: /* LHV */ |
| 441 | | { |
| 442 | | // 31 25 20 15 10 6 0 |
| 443 | | // -------------------------------------------------- |
| 444 | | // | 110010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 445 | | // -------------------------------------------------- |
| 446 | | // |
| 447 | | // Loads a byte as the bits 14-7 of each element, with 2-byte stride |
| 448 | | |
| 449 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 450 | | |
| 451 | | for (i=0; i < 8; i++) |
| 452 | | { |
| 453 | | VREG_S(dest, i) = m_rsp.READ8(ea + (((16-index) + (i<<1)) & 0xf)) << 7; |
| 454 | | } |
| 455 | | break; |
| 456 | | } |
| 457 | | case 0x09: /* LFV */ |
| 458 | | { |
| 459 | | // 31 25 20 15 10 6 0 |
| 460 | | // -------------------------------------------------- |
| 461 | | // | 110010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 462 | | // -------------------------------------------------- |
| 463 | | // |
| 464 | | // Loads a byte as the bits 14-7 of upper or lower quad, with 4-byte stride |
| 465 | | |
| 466 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 467 | | |
| 468 | | // not sure what happens if 16-byte boundary is crossed... |
| 469 | | |
| 470 | | end = (index >> 1) + 4; |
| 471 | | |
| 472 | | for (i=index >> 1; i < end; i++) |
| 473 | | { |
| 474 | | VREG_S(dest, i) = m_rsp.READ8(ea) << 7; |
| 475 | | ea += 4; |
| 476 | | } |
| 477 | | break; |
| 478 | | } |
| 479 | | case 0x0a: /* LWV */ |
| 480 | | { |
| 481 | | // 31 25 20 15 10 6 0 |
| 482 | | // -------------------------------------------------- |
| 483 | | // | 110010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 484 | | // -------------------------------------------------- |
| 485 | | // |
| 486 | | // Loads the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 487 | | // after byte index 15 |
| 488 | | |
| 489 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 490 | | |
| 491 | | end = (16 - index) + 16; |
| 492 | | |
| 493 | | for (i=(16 - index); i < end; i++) |
| 494 | | { |
| 495 | | VREG_B(dest, i & 0xf) = m_rsp.READ8(ea); |
| 496 | | ea += 4; |
| 497 | | } |
| 498 | | break; |
| 499 | | } |
| 500 | | case 0x0b: /* LTV */ |
| 501 | | { |
| 502 | | // 31 25 20 15 10 6 0 |
| 503 | | // -------------------------------------------------- |
| 504 | | // | 110010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 505 | | // -------------------------------------------------- |
| 506 | | // |
| 507 | | // Loads one element to maximum of 8 vectors, while incrementing element index |
| 508 | | |
| 509 | | // FIXME: has a small problem with odd indices |
| 510 | | |
| 511 | | int element; |
| 512 | | int vs = dest; |
| 513 | | int ve = dest + 8; |
| 514 | | if (ve > 32) |
| 515 | | ve = 32; |
| 516 | | |
| 517 | | element = 7 - (index >> 1); |
| 518 | | |
| 519 | | if (index & 1) fatalerror("RSP: LTV: index = %d\n", index); |
| 520 | | |
| 521 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 522 | | |
| 523 | | ea = ((ea + 8) & ~0xf) + (index & 1); |
| 524 | | for (i=vs; i < ve; i++) |
| 525 | | { |
| 526 | | element = ((8 - (index >> 1) + (i-vs)) << 1); |
| 527 | | VREG_B(i, (element & 0xf)) = m_rsp.READ8(ea); |
| 528 | | VREG_B(i, ((element + 1) & 0xf)) = m_rsp.READ8(ea + 1); |
| 529 | | |
| 530 | | ea += 2; |
| 531 | | } |
| 532 | | break; |
| 533 | | } |
| 534 | | |
| 535 | | default: |
| 536 | | { |
| 537 | | m_rsp.unimplemented_opcode(op); |
| 538 | | break; |
| 539 | | } |
| 540 | | } |
| 541 | | } |
| 542 | | |
| 543 | | |
| 544 | | /*************************************************************************** |
| 545 | | Vector Store Instructions |
| 546 | | ***************************************************************************/ |
| 547 | | |
| 548 | | void rsp_cop2::handle_swc2(UINT32 op) |
| 549 | | { |
| 550 | | int i, end; |
| 551 | | int eaoffset; |
| 552 | | UINT32 ea; |
| 553 | | int dest = (op >> 16) & 0x1f; |
| 554 | | int base = (op >> 21) & 0x1f; |
| 555 | | int index = (op >> 7) & 0xf; |
| 556 | | int offset = (op & 0x7f); |
| 557 | | if (offset & 0x40) |
| 558 | | offset |= 0xffffffc0; |
| 559 | | |
| 560 | | switch ((op >> 11) & 0x1f) |
| 561 | | { |
| 562 | | case 0x00: /* SBV */ |
| 563 | | { |
| 564 | | // 31 25 20 15 10 6 0 |
| 565 | | // -------------------------------------------------- |
| 566 | | // | 111010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 567 | | // -------------------------------------------------- |
| 568 | | // |
| 569 | | // Stores 1 byte from vector byte index |
| 570 | | |
| 571 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + offset : offset; |
| 572 | | m_rsp.WRITE8(ea, VREG_B(dest, index)); |
| 573 | | break; |
| 574 | | } |
| 575 | | case 0x01: /* SSV */ |
| 576 | | { |
| 577 | | // 31 25 20 15 10 6 0 |
| 578 | | // -------------------------------------------------- |
| 579 | | // | 111010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 580 | | // -------------------------------------------------- |
| 581 | | // |
| 582 | | // Stores 2 bytes starting from vector byte index |
| 583 | | |
| 584 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 585 | | |
| 586 | | end = index + 2; |
| 587 | | |
| 588 | | for (i=index; i < end; i++) |
| 589 | | { |
| 590 | | m_rsp.WRITE8(ea, VREG_B(dest, i)); |
| 591 | | ea++; |
| 592 | | } |
| 593 | | break; |
| 594 | | } |
| 595 | | case 0x02: /* SLV */ |
| 596 | | { |
| 597 | | // 31 25 20 15 10 6 0 |
| 598 | | // -------------------------------------------------- |
| 599 | | // | 111010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 600 | | // -------------------------------------------------- |
| 601 | | // |
| 602 | | // Stores 4 bytes starting from vector byte index |
| 603 | | |
| 604 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 605 | | |
| 606 | | end = index + 4; |
| 607 | | |
| 608 | | for (i=index; i < end; i++) |
| 609 | | { |
| 610 | | m_rsp.WRITE8(ea, VREG_B(dest, i)); |
| 611 | | ea++; |
| 612 | | } |
| 613 | | break; |
| 614 | | } |
| 615 | | case 0x03: /* SDV */ |
| 616 | | { |
| 617 | | // 31 25 20 15 10 6 0 |
| 618 | | // -------------------------------------------------- |
| 619 | | // | 111010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 620 | | // -------------------------------------------------- |
| 621 | | // |
| 622 | | // Stores 8 bytes starting from vector byte index |
| 623 | | |
| 624 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 625 | | |
| 626 | | end = index + 8; |
| 627 | | |
| 628 | | for (i=index; i < end; i++) |
| 629 | | { |
| 630 | | m_rsp.WRITE8(ea, VREG_B(dest, i)); |
| 631 | | ea++; |
| 632 | | } |
| 633 | | break; |
| 634 | | } |
| 635 | | case 0x04: /* SQV */ |
| 636 | | { |
| 637 | | // 31 25 20 15 10 6 0 |
| 638 | | // -------------------------------------------------- |
| 639 | | // | 111010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 640 | | // -------------------------------------------------- |
| 641 | | // |
| 642 | | // Stores up to 16 bytes starting from vector byte index until 16-byte boundary |
| 643 | | |
| 644 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 645 | | |
| 646 | | end = index + (16 - (ea & 0xf)); |
| 647 | | |
| 648 | | for (i=index; i < end; i++) |
| 649 | | { |
| 650 | | m_rsp.WRITE8(ea, VREG_B(dest, i & 0xf)); |
| 651 | | ea++; |
| 652 | | } |
| 653 | | break; |
| 654 | | } |
| 655 | | case 0x05: /* SRV */ |
| 656 | | { |
| 657 | | // 31 25 20 15 10 6 0 |
| 658 | | // -------------------------------------------------- |
| 659 | | // | 111010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 660 | | // -------------------------------------------------- |
| 661 | | // |
| 662 | | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 663 | | |
| 664 | | int o; |
| 665 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 666 | | |
| 667 | | end = index + (ea & 0xf); |
| 668 | | o = (16 - (ea & 0xf)) & 0xf; |
| 669 | | ea &= ~0xf; |
| 670 | | |
| 671 | | for (i=index; i < end; i++) |
| 672 | | { |
| 673 | | m_rsp.WRITE8(ea, VREG_B(dest, ((i + o) & 0xf))); |
| 674 | | ea++; |
| 675 | | } |
| 676 | | break; |
| 677 | | } |
| 678 | | case 0x06: /* SPV */ |
| 679 | | { |
| 680 | | // 31 25 20 15 10 6 0 |
| 681 | | // -------------------------------------------------- |
| 682 | | // | 111010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 683 | | // -------------------------------------------------- |
| 684 | | // |
| 685 | | // Stores upper 8 bits of each element |
| 686 | | |
| 687 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 688 | | end = index + 8; |
| 689 | | |
| 690 | | for (i=index; i < end; i++) |
| 691 | | { |
| 692 | | if ((i & 0xf) < 8) |
| 693 | | { |
| 694 | | m_rsp.WRITE8(ea, VREG_B(dest, ((i & 0xf) << 1))); |
| 695 | | } |
| 696 | | else |
| 697 | | { |
| 698 | | m_rsp.WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); |
| 699 | | } |
| 700 | | ea++; |
| 701 | | } |
| 702 | | break; |
| 703 | | } |
| 704 | | case 0x07: /* SUV */ |
| 705 | | { |
| 706 | | // 31 25 20 15 10 6 0 |
| 707 | | // -------------------------------------------------- |
| 708 | | // | 111010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 709 | | // -------------------------------------------------- |
| 710 | | // |
| 711 | | // Stores bits 14-7 of each element |
| 712 | | |
| 713 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 714 | | end = index + 8; |
| 715 | | |
| 716 | | for (i=index; i < end; i++) |
| 717 | | { |
| 718 | | if ((i & 0xf) < 8) |
| 719 | | { |
| 720 | | m_rsp.WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); |
| 721 | | } |
| 722 | | else |
| 723 | | { |
| 724 | | m_rsp.WRITE8(ea, VREG_B(dest, ((i & 0x7) << 1))); |
| 725 | | } |
| 726 | | ea++; |
| 727 | | } |
| 728 | | break; |
| 729 | | } |
| 730 | | case 0x08: /* SHV */ |
| 731 | | { |
| 732 | | // 31 25 20 15 10 6 0 |
| 733 | | // -------------------------------------------------- |
| 734 | | // | 111010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 735 | | // -------------------------------------------------- |
| 736 | | // |
| 737 | | // Stores bits 14-7 of each element, with 2-byte stride |
| 738 | | |
| 739 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 740 | | |
| 741 | | for (i=0; i < 8; i++) |
| 742 | | { |
| 743 | | UINT8 d = ((VREG_B(dest, ((index + (i << 1) + 0) & 0xf))) << 1) | |
| 744 | | ((VREG_B(dest, ((index + (i << 1) + 1) & 0xf))) >> 7); |
| 745 | | |
| 746 | | m_rsp.WRITE8(ea, d); |
| 747 | | ea += 2; |
| 748 | | } |
| 749 | | break; |
| 750 | | } |
| 751 | | case 0x09: /* SFV */ |
| 752 | | { |
| 753 | | // 31 25 20 15 10 6 0 |
| 754 | | // -------------------------------------------------- |
| 755 | | // | 111010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 756 | | // -------------------------------------------------- |
| 757 | | // |
| 758 | | // Stores bits 14-7 of upper or lower quad, with 4-byte stride |
| 759 | | |
| 760 | | // FIXME: only works for index 0 and index 8 |
| 761 | | |
| 762 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 763 | | |
| 764 | | eaoffset = ea & 0xf; |
| 765 | | ea &= ~0xf; |
| 766 | | |
| 767 | | end = (index >> 1) + 4; |
| 768 | | |
| 769 | | for (i=index >> 1; i < end; i++) |
| 770 | | { |
| 771 | | m_rsp.WRITE8(ea + (eaoffset & 0xf), VREG_S(dest, i) >> 7); |
| 772 | | eaoffset += 4; |
| 773 | | } |
| 774 | | break; |
| 775 | | } |
| 776 | | case 0x0a: /* SWV */ |
| 777 | | { |
| 778 | | // 31 25 20 15 10 6 0 |
| 779 | | // -------------------------------------------------- |
| 780 | | // | 111010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 781 | | // -------------------------------------------------- |
| 782 | | // |
| 783 | | // Stores the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 784 | | // after byte index 15 |
| 785 | | |
| 786 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 787 | | |
| 788 | | eaoffset = ea & 0xf; |
| 789 | | ea &= ~0xf; |
| 790 | | |
| 791 | | end = index + 16; |
| 792 | | |
| 793 | | for (i=index; i < end; i++) |
| 794 | | { |
| 795 | | m_rsp.WRITE8(ea + (eaoffset & 0xf), VREG_B(dest, i & 0xf)); |
| 796 | | eaoffset++; |
| 797 | | } |
| 798 | | break; |
| 799 | | } |
| 800 | | case 0x0b: /* STV */ |
| 801 | | { |
| 802 | | // 31 25 20 15 10 6 0 |
| 803 | | // -------------------------------------------------- |
| 804 | | // | 111010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 805 | | // -------------------------------------------------- |
| 806 | | // |
| 807 | | // Stores one element from maximum of 8 vectors, while incrementing element index |
| 808 | | |
| 809 | | int element; |
| 810 | | int vs = dest; |
| 811 | | int ve = dest + 8; |
| 812 | | if (ve > 32) |
| 813 | | ve = 32; |
| 814 | | |
| 815 | | element = 8 - (index >> 1); |
| 816 | | |
| 817 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 818 | | |
| 819 | | eaoffset = (ea & 0xf) + (element * 2); |
| 820 | | ea &= ~0xf; |
| 821 | | |
| 822 | | for (i=vs; i < ve; i++) |
| 823 | | { |
| 824 | | m_rsp.WRITE16(ea + (eaoffset & 0xf), VREG_S(i, element & 0x7)); |
| 825 | | eaoffset += 2; |
| 826 | | element++; |
| 827 | | } |
| 828 | | break; |
| 829 | | } |
| 830 | | |
| 831 | | default: |
| 832 | | { |
| 833 | | m_rsp.unimplemented_opcode(op); |
| 834 | | break; |
| 835 | | } |
| 836 | | } |
| 837 | | } |
| 838 | | |
| 839 | | /*************************************************************************** |
| 840 | | Vector Accumulator Helpers |
| 841 | | ***************************************************************************/ |
| 842 | | |
| 843 | | inline UINT16 rsp_cop2::SATURATE_ACCUM1(int accum, UINT16 negative, UINT16 positive) |
| 844 | | { |
| 845 | | if ((INT16)ACCUM_H(accum) < 0) |
| 846 | | { |
| 847 | | if ((UINT16)(ACCUM_H(accum)) != 0xffff) |
| 848 | | { |
| 849 | | return negative; |
| 850 | | } |
| 851 | | else |
| 852 | | { |
| 853 | | if ((INT16)ACCUM_M(accum) >= 0) |
| 854 | | { |
| 855 | | return negative; |
| 856 | | } |
| 857 | | else |
| 858 | | { |
| 859 | | return ACCUM_M(accum); |
| 860 | | } |
| 861 | | } |
| 862 | | } |
| 863 | | else |
| 864 | | { |
| 865 | | if ((UINT16)(ACCUM_H(accum)) != 0) |
| 866 | | { |
| 867 | | return positive; |
| 868 | | } |
| 869 | | else |
| 870 | | { |
| 871 | | if ((INT16)ACCUM_M(accum) < 0) |
| 872 | | { |
| 873 | | return positive; |
| 874 | | } |
| 875 | | else |
| 876 | | { |
| 877 | | return ACCUM_M(accum); |
| 878 | | } |
| 879 | | } |
| 880 | | } |
| 881 | | } |
| 882 | | |
| 883 | | UINT16 rsp_cop2::SATURATE_ACCUM(int accum, int slice, UINT16 negative, UINT16 positive) |
| 884 | | { |
| 885 | | if ((INT16)ACCUM_H(accum) < 0) |
| 886 | | { |
| 887 | | if ((UINT16)(ACCUM_H(accum)) != 0xffff) |
| 888 | | { |
| 889 | | return negative; |
| 890 | | } |
| 891 | | else |
| 892 | | { |
| 893 | | if ((INT16)ACCUM_M(accum) >= 0) |
| 894 | | { |
| 895 | | return negative; |
| 896 | | } |
| 897 | | else |
| 898 | | { |
| 899 | | if (slice == 0) |
| 900 | | { |
| 901 | | return ACCUM_L(accum); |
| 902 | | } |
| 903 | | else if (slice == 1) |
| 904 | | { |
| 905 | | return ACCUM_M(accum); |
| 906 | | } |
| 907 | | } |
| 908 | | } |
| 909 | | } |
| 910 | | else |
| 911 | | { |
| 912 | | if ((UINT16)(ACCUM_H(accum)) != 0) |
| 913 | | { |
| 914 | | return positive; |
| 915 | | } |
| 916 | | else |
| 917 | | { |
| 918 | | if ((INT16)ACCUM_M(accum) < 0) |
| 919 | | { |
| 920 | | return positive; |
| 921 | | } |
| 922 | | else |
| 923 | | { |
| 924 | | if (slice == 0) |
| 925 | | { |
| 926 | | return ACCUM_L(accum); |
| 927 | | } |
| 928 | | else |
| 929 | | { |
| 930 | | return ACCUM_M(accum); |
| 931 | | } |
| 932 | | } |
| 933 | | } |
| 934 | | } |
| 935 | | return 0; |
| 936 | | } |
| 937 | | |
| 938 | | |
| 939 | | /*************************************************************************** |
| 940 | | Vector Opcodes |
| 941 | | ***************************************************************************/ |
| 942 | | |
| 943 | | void rsp_cop2::handle_vector_ops(UINT32 op) |
| 944 | | { |
| 945 | | int i; |
| 946 | | |
| 947 | | // Opcode legend: |
| 948 | | // E = VS2 element type |
| 949 | | // S = VS1, Source vector 1 |
| 950 | | // T = VS2, Source vector 2 |
| 951 | | // D = Destination vector |
| 952 | | |
| 953 | | switch (op & 0x3f) |
| 954 | | { |
| 955 | | case 0x00: /* VMULF */ |
| 956 | | { |
| 957 | | // 31 25 24 20 15 10 5 0 |
| 958 | | // ------------------------------------------------------ |
| 959 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000000 | |
| 960 | | // ------------------------------------------------------ |
| 961 | | // |
| 962 | | // Multiplies signed integer by signed integer * 2 |
| 963 | | |
| 964 | | for (i=0; i < 8; i++) |
| 965 | | { |
| 966 | | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 967 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 968 | | |
| 969 | | if (s1 == -32768 && s2 == -32768) |
| 970 | | { |
| 971 | | // overflow |
| 972 | | SET_ACCUM_H(0, i); |
| 973 | | SET_ACCUM_M(-32768, i); |
| 974 | | SET_ACCUM_L(-32768, i); |
| 975 | | m_vres[i] = 0x7fff; |
| 976 | | } |
| 977 | | else |
| 978 | | { |
| 979 | | INT64 r = s1 * s2 * 2; |
| 980 | | r += 0x8000; // rounding ? |
| 981 | | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); // sign-extend to 48-bit |
| 982 | | SET_ACCUM_M((INT16)(r >> 16), i); |
| 983 | | SET_ACCUM_L((UINT16)(r), i); |
| 984 | | m_vres[i] = ACCUM_M(i); |
| 985 | | } |
| 986 | | } |
| 987 | | WRITEBACK_RESULT(); |
| 988 | | |
| 989 | | break; |
| 990 | | } |
| 991 | | |
| 992 | | case 0x01: /* VMULU */ |
| 993 | | { |
| 994 | | // 31 25 24 20 15 10 5 0 |
| 995 | | // ------------------------------------------------------ |
| 996 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000001 | |
| 997 | | // ------------------------------------------------------ |
| 998 | | // |
| 999 | | |
| 1000 | | for (i=0; i < 8; i++) |
| 1001 | | { |
| 1002 | | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1003 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1004 | | |
| 1005 | | INT64 r = s1 * s2 * 2; |
| 1006 | | r += 0x8000; // rounding ? |
| 1007 | | |
| 1008 | | SET_ACCUM_H((UINT16)(r >> 32), i); |
| 1009 | | SET_ACCUM_M((UINT16)(r >> 16), i); |
| 1010 | | SET_ACCUM_L((UINT16)(r), i); |
| 1011 | | |
| 1012 | | if (r < 0) |
| 1013 | | { |
| 1014 | | m_vres[i] = 0; |
| 1015 | | } |
| 1016 | | else if (((INT16)(ACCUM_H(i)) ^ (INT16)(ACCUM_M(i))) < 0) |
| 1017 | | { |
| 1018 | | m_vres[i] = -1; |
| 1019 | | } |
| 1020 | | else |
| 1021 | | { |
| 1022 | | m_vres[i] = ACCUM_M(i); |
| 1023 | | } |
| 1024 | | } |
| 1025 | | WRITEBACK_RESULT(); |
| 1026 | | break; |
| 1027 | | } |
| 1028 | | |
| 1029 | | case 0x04: /* VMUDL */ |
| 1030 | | { |
| 1031 | | // 31 25 24 20 15 10 5 0 |
| 1032 | | // ------------------------------------------------------ |
| 1033 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000100 | |
| 1034 | | // ------------------------------------------------------ |
| 1035 | | // |
| 1036 | | // Multiplies unsigned fraction by unsigned fraction |
| 1037 | | // Stores the higher 16 bits of the 32-bit result to accumulator |
| 1038 | | // The low slice of accumulator is stored into destination element |
| 1039 | | |
| 1040 | | for (i=0; i < 8; i++) |
| 1041 | | { |
| 1042 | | UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1043 | | UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1044 | | UINT32 r = s1 * s2; |
| 1045 | | |
| 1046 | | SET_ACCUM_H(0, i); |
| 1047 | | SET_ACCUM_M(0, i); |
| 1048 | | SET_ACCUM_L((UINT16)(r >> 16), i); |
| 1049 | | |
| 1050 | | m_vres[i] = ACCUM_L(i); |
| 1051 | | } |
| 1052 | | WRITEBACK_RESULT(); |
| 1053 | | break; |
| 1054 | | } |
| 1055 | | |
| 1056 | | case 0x05: /* VMUDM */ |
| 1057 | | { |
| 1058 | | // 31 25 24 20 15 10 5 0 |
| 1059 | | // ------------------------------------------------------ |
| 1060 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000101 | |
| 1061 | | // ------------------------------------------------------ |
| 1062 | | // |
| 1063 | | // Multiplies signed integer by unsigned fraction |
| 1064 | | // The result is stored into accumulator |
| 1065 | | // The middle slice of accumulator is stored into destination element |
| 1066 | | |
| 1067 | | for (i=0; i < 8; i++) |
| 1068 | | { |
| 1069 | | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1070 | | INT32 s2 = (UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); // not sign-extended |
| 1071 | | INT32 r = s1 * s2; |
| 1072 | | |
| 1073 | | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); // sign-extend to 48-bit |
| 1074 | | SET_ACCUM_M((INT16)(r >> 16), i); |
| 1075 | | SET_ACCUM_L((UINT16)(r), i); |
| 1076 | | |
| 1077 | | m_vres[i] = ACCUM_M(i); |
| 1078 | | } |
| 1079 | | WRITEBACK_RESULT(); |
| 1080 | | break; |
| 1081 | | |
| 1082 | | } |
| 1083 | | |
| 1084 | | case 0x06: /* VMUDN */ |
| 1085 | | { |
| 1086 | | // 31 25 24 20 15 10 5 0 |
| 1087 | | // ------------------------------------------------------ |
| 1088 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000110 | |
| 1089 | | // ------------------------------------------------------ |
| 1090 | | // |
| 1091 | | // Multiplies unsigned fraction by signed integer |
| 1092 | | // The result is stored into accumulator |
| 1093 | | // The low slice of accumulator is stored into destination element |
| 1094 | | |
| 1095 | | for (i=0; i < 8; i++) |
| 1096 | | { |
| 1097 | | INT32 s1 = (UINT16)VREG_S(VS1REG, i); // not sign-extended |
| 1098 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1099 | | INT32 r = s1 * s2; |
| 1100 | | |
| 1101 | | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); // sign-extend to 48-bit |
| 1102 | | SET_ACCUM_M((INT16)(r >> 16), i); |
| 1103 | | SET_ACCUM_L((UINT16)(r), i); |
| 1104 | | |
| 1105 | | m_vres[i] = ACCUM_L(i); |
| 1106 | | } |
| 1107 | | WRITEBACK_RESULT(); |
| 1108 | | break; |
| 1109 | | } |
| 1110 | | |
| 1111 | | case 0x07: /* VMUDH */ |
| 1112 | | { |
| 1113 | | // 31 25 24 20 15 10 5 0 |
| 1114 | | // ------------------------------------------------------ |
| 1115 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000111 | |
| 1116 | | // ------------------------------------------------------ |
| 1117 | | // |
| 1118 | | // Multiplies signed integer by signed integer |
| 1119 | | // The result is stored into highest 32 bits of accumulator, the low slice is zero |
| 1120 | | // The highest 32 bits of accumulator is saturated into destination element |
| 1121 | | |
| 1122 | | for (i=0; i < 8; i++) |
| 1123 | | { |
| 1124 | | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1125 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1126 | | INT32 r = s1 * s2; |
| 1127 | | |
| 1128 | | SET_ACCUM_H((INT16)(r >> 16), i); |
| 1129 | | SET_ACCUM_M((UINT16)(r), i); |
| 1130 | | SET_ACCUM_L(0, i); |
| 1131 | | |
| 1132 | | if (r < -32768) r = -32768; |
| 1133 | | if (r > 32767) r = 32767; |
| 1134 | | m_vres[i] = (INT16)(r); |
| 1135 | | } |
| 1136 | | WRITEBACK_RESULT(); |
| 1137 | | break; |
| 1138 | | } |
| 1139 | | |
| 1140 | | case 0x08: /* VMACF */ |
| 1141 | | { |
| 1142 | | // 31 25 24 20 15 10 5 0 |
| 1143 | | // ------------------------------------------------------ |
| 1144 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001000 | |
| 1145 | | // ------------------------------------------------------ |
| 1146 | | // |
| 1147 | | // Multiplies signed integer by signed integer * 2 |
| 1148 | | // The result is added to accumulator |
| 1149 | | |
| 1150 | | for (i=0; i < 8; i++) |
| 1151 | | { |
| 1152 | | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1153 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1154 | | INT32 r = s1 * s2; |
| 1155 | | |
| 1156 | | UINT64 q = (UINT64)(UINT16)ACCUM_LL(i); |
| 1157 | | q |= (((UINT64)(UINT16)ACCUM_L(i)) << 16); |
| 1158 | | q |= (((UINT64)(UINT16)ACCUM_M(i)) << 32); |
| 1159 | | q |= (((UINT64)(UINT16)ACCUM_H(i)) << 48); |
| 1160 | | |
| 1161 | | q += (INT64)(r) << 17; |
| 1162 | | |
| 1163 | | SET_ACCUM_LL((UINT16)q, i); |
| 1164 | | SET_ACCUM_L((UINT16)(q >> 16), i); |
| 1165 | | SET_ACCUM_M((UINT16)(q >> 32), i); |
| 1166 | | SET_ACCUM_H((UINT16)(q >> 48), i); |
| 1167 | | |
| 1168 | | m_vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1169 | | } |
| 1170 | | WRITEBACK_RESULT(); |
| 1171 | | break; |
| 1172 | | } |
| 1173 | | |
| 1174 | | case 0x09: /* VMACU */ |
| 1175 | | { |
| 1176 | | // 31 25 24 20 15 10 5 0 |
| 1177 | | // ------------------------------------------------------ |
| 1178 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001001 | |
| 1179 | | // ------------------------------------------------------ |
| 1180 | | // |
| 1181 | | |
| 1182 | | for (i = 0; i < 8; i++) |
| 1183 | | { |
| 1184 | | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1185 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1186 | | INT32 r1 = s1 * s2; |
| 1187 | | UINT32 r2 = (UINT16)ACCUM_L(i) + ((UINT16)(r1) * 2); |
| 1188 | | UINT32 r3 = (UINT16)ACCUM_M(i) + (UINT16)((r1 >> 16) * 2) + (UINT16)(r2 >> 16); |
| 1189 | | |
| 1190 | | SET_ACCUM_L((UINT16)(r2), i); |
| 1191 | | SET_ACCUM_M((UINT16)(r3), i); |
| 1192 | | SET_ACCUM_H(ACCUM_H(i) + (UINT16)(r3 >> 16) + (UINT16)(r1 >> 31), i); |
| 1193 | | |
| 1194 | | if ((INT16)ACCUM_H(i) < 0) |
| 1195 | | { |
| 1196 | | m_vres[i] = 0; |
| 1197 | | } |
| 1198 | | else |
| 1199 | | { |
| 1200 | | if (ACCUM_H(i) != 0) |
| 1201 | | { |
| 1202 | | m_vres[i] = 0xffff; |
| 1203 | | } |
| 1204 | | else |
| 1205 | | { |
| 1206 | | if ((INT16)ACCUM_M(i) < 0) |
| 1207 | | { |
| 1208 | | m_vres[i] = 0xffff; |
| 1209 | | } |
| 1210 | | else |
| 1211 | | { |
| 1212 | | m_vres[i] = ACCUM_M(i); |
| 1213 | | } |
| 1214 | | } |
| 1215 | | } |
| 1216 | | } |
| 1217 | | WRITEBACK_RESULT(); |
| 1218 | | break; |
| 1219 | | } |
| 1220 | | |
| 1221 | | case 0x0c: /* VMADL */ |
| 1222 | | { |
| 1223 | | // 31 25 24 20 15 10 5 0 |
| 1224 | | // ------------------------------------------------------ |
| 1225 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001100 | |
| 1226 | | // ------------------------------------------------------ |
| 1227 | | // |
| 1228 | | // Multiplies unsigned fraction by unsigned fraction |
| 1229 | | // Adds the higher 16 bits of the 32-bit result to accumulator |
| 1230 | | // The low slice of accumulator is stored into destination element |
| 1231 | | |
| 1232 | | for (i = 0; i < 8; i++) |
| 1233 | | { |
| 1234 | | UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1235 | | UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1236 | | UINT32 r1 = s1 * s2; |
| 1237 | | UINT32 r2 = (UINT16)ACCUM_L(i) + (r1 >> 16); |
| 1238 | | UINT32 r3 = (UINT16)ACCUM_M(i) + (r2 >> 16); |
| 1239 | | |
| 1240 | | SET_ACCUM_L((UINT16)(r2), i); |
| 1241 | | SET_ACCUM_M((UINT16)(r3), i); |
| 1242 | | SET_ACCUM_H(ACCUM_H(i) + (INT16)(r3 >> 16), i); |
| 1243 | | |
| 1244 | | m_vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 1245 | | } |
| 1246 | | WRITEBACK_RESULT(); |
| 1247 | | break; |
| 1248 | | } |
| 1249 | | |
| 1250 | | case 0x0d: /* VMADM */ |
| 1251 | | { |
| 1252 | | // 31 25 24 20 15 10 5 0 |
| 1253 | | // ------------------------------------------------------ |
| 1254 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 | |
| 1255 | | // ------------------------------------------------------ |
| 1256 | | // |
| 1257 | | // Multiplies signed integer by unsigned fraction |
| 1258 | | // The result is added into accumulator |
| 1259 | | // The middle slice of accumulator is stored into destination element |
| 1260 | | |
| 1261 | | for (i=0; i < 8; i++) |
| 1262 | | { |
| 1263 | | UINT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1264 | | UINT32 s2 = (UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); // not sign-extended |
| 1265 | | UINT32 r1 = s1 * s2; |
| 1266 | | UINT32 r2 = (UINT16)ACCUM_L(i) + (UINT16)(r1); |
| 1267 | | UINT32 r3 = (UINT16)ACCUM_M(i) + (r1 >> 16) + (r2 >> 16); |
| 1268 | | |
| 1269 | | SET_ACCUM_L((UINT16)(r2), i); |
| 1270 | | SET_ACCUM_M((UINT16)(r3), i); |
| 1271 | | SET_ACCUM_H(ACCUM_H(i) + (UINT16)(r3 >> 16), i); |
| 1272 | | if ((INT32)(r1) < 0) |
| 1273 | | SET_ACCUM_H(i, ACCUM_H(i) - 1); |
| 1274 | | |
| 1275 | | m_vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1276 | | } |
| 1277 | | WRITEBACK_RESULT(); |
| 1278 | | break; |
| 1279 | | } |
| 1280 | | |
| 1281 | | case 0x0e: /* VMADN */ |
| 1282 | | { |
| 1283 | | // 31 25 24 20 15 10 5 0 |
| 1284 | | // ------------------------------------------------------ |
| 1285 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001110 | |
| 1286 | | // ------------------------------------------------------ |
| 1287 | | // |
| 1288 | | // Multiplies unsigned fraction by signed integer |
| 1289 | | // The result is added into accumulator |
| 1290 | | // The low slice of accumulator is stored into destination element |
| 1291 | | |
| 1292 | | for (i=0; i < 8; i++) |
| 1293 | | { |
| 1294 | | INT32 s1 = (UINT16)VREG_S(VS1REG, i); // not sign-extended |
| 1295 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1296 | | |
| 1297 | | UINT64 q = (UINT64)ACCUM_LL(i); |
| 1298 | | q |= (((UINT64)ACCUM_L(i)) << 16); |
| 1299 | | q |= (((UINT64)ACCUM_M(i)) << 32); |
| 1300 | | q |= (((UINT64)ACCUM_H(i)) << 48); |
| 1301 | | q += (INT64)(s1*s2) << 16; |
| 1302 | | |
| 1303 | | SET_ACCUM_LL((UINT16)q, i); |
| 1304 | | SET_ACCUM_L((UINT16)(q >> 16), i); |
| 1305 | | SET_ACCUM_M((UINT16)(q >> 32), i); |
| 1306 | | SET_ACCUM_H((UINT16)(q >> 48), i); |
| 1307 | | |
| 1308 | | m_vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 1309 | | } |
| 1310 | | WRITEBACK_RESULT(); |
| 1311 | | |
| 1312 | | break; |
| 1313 | | } |
| 1314 | | |
| 1315 | | case 0x0f: /* VMADH */ |
| 1316 | | { |
| 1317 | | // 31 25 24 20 15 10 5 0 |
| 1318 | | // ------------------------------------------------------ |
| 1319 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001111 | |
| 1320 | | // ------------------------------------------------------ |
| 1321 | | // |
| 1322 | | // Multiplies signed integer by signed integer |
| 1323 | | // The result is added into highest 32 bits of accumulator, the low slice is zero |
| 1324 | | // The highest 32 bits of accumulator is saturated into destination element |
| 1325 | | |
| 1326 | | for (i = 0; i < 8; i++) |
| 1327 | | { |
| 1328 | | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1329 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1330 | | |
| 1331 | | INT32 accum = (UINT32)(UINT16)ACCUM_M(i); |
| 1332 | | accum |= ((UINT32)((UINT16)ACCUM_H(i))) << 16; |
| 1333 | | accum += s1 * s2; |
| 1334 | | |
| 1335 | | SET_ACCUM_H((UINT16)(accum >> 16), i); |
| 1336 | | SET_ACCUM_M((UINT16)accum, i); |
| 1337 | | |
| 1338 | | m_vres[i] = SATURATE_ACCUM1(i, 0x8000, 0x7fff); |
| 1339 | | } |
| 1340 | | WRITEBACK_RESULT(); |
| 1341 | | |
| 1342 | | break; |
| 1343 | | } |
| 1344 | | |
| 1345 | | case 0x10: /* VADD */ |
| 1346 | | { |
| 1347 | | // 31 25 24 20 15 10 5 0 |
| 1348 | | // ------------------------------------------------------ |
| 1349 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010000 | |
| 1350 | | // ------------------------------------------------------ |
| 1351 | | // |
| 1352 | | // Adds two vector registers and carry flag, the result is saturated to 32767 |
| 1353 | | |
| 1354 | | // TODO: check VS2REG == VDREG |
| 1355 | | |
| 1356 | | for (i=0; i < 8; i++) |
| 1357 | | { |
| 1358 | | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1359 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1360 | | INT32 r = s1 + s2 + (CARRY_FLAG(i) != 0 ? 1 : 0); |
| 1361 | | |
| 1362 | | SET_ACCUM_L((INT16)(r), i); |
| 1363 | | |
| 1364 | | if (r > 32767) r = 32767; |
| 1365 | | if (r < -32768) r = -32768; |
| 1366 | | m_vres[i] = (INT16)(r); |
| 1367 | | } |
| 1368 | | CLEAR_ZERO_FLAGS(); |
| 1369 | | CLEAR_CARRY_FLAGS(); |
| 1370 | | WRITEBACK_RESULT(); |
| 1371 | | break; |
| 1372 | | } |
| 1373 | | |
| 1374 | | case 0x11: /* VSUB */ |
| 1375 | | { |
| 1376 | | // 31 25 24 20 15 10 5 0 |
| 1377 | | // ------------------------------------------------------ |
| 1378 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010001 | |
| 1379 | | // ------------------------------------------------------ |
| 1380 | | // |
| 1381 | | // Subtracts two vector registers and carry flag, the result is saturated to -32768 |
| 1382 | | |
| 1383 | | // TODO: check VS2REG == VDREG |
| 1384 | | |
| 1385 | | for (i = 0; i < 8; i++) |
| 1386 | | { |
| 1387 | | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1388 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1389 | | INT32 r = s1 - s2 - (CARRY_FLAG(i) != 0 ? 1 : 0); |
| 1390 | | |
| 1391 | | SET_ACCUM_L((INT16)(r), i); |
| 1392 | | |
| 1393 | | if (r > 32767) r = 32767; |
| 1394 | | if (r < -32768) r = -32768; |
| 1395 | | |
| 1396 | | m_vres[i] = (INT16)(r); |
| 1397 | | } |
| 1398 | | CLEAR_ZERO_FLAGS(); |
| 1399 | | CLEAR_CARRY_FLAGS(); |
| 1400 | | WRITEBACK_RESULT(); |
| 1401 | | break; |
| 1402 | | } |
| 1403 | | |
| 1404 | | case 0x13: /* VABS */ |
| 1405 | | { |
| 1406 | | // 31 25 24 20 15 10 5 0 |
| 1407 | | // ------------------------------------------------------ |
| 1408 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010011 | |
| 1409 | | // ------------------------------------------------------ |
| 1410 | | // |
| 1411 | | // Changes the sign of source register 2 if source register 1 is negative and stores |
| 1412 | | // the result to destination register |
| 1413 | | |
| 1414 | | for (i=0; i < 8; i++) |
| 1415 | | { |
| 1416 | | INT16 s1 = (INT16)VREG_S(VS1REG, i); |
| 1417 | | INT16 s2 = (INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1418 | | |
| 1419 | | if (s1 < 0) |
| 1420 | | { |
| 1421 | | if (s2 == -32768) |
| 1422 | | { |
| 1423 | | m_vres[i] = 32767; |
| 1424 | | } |
| 1425 | | else |
| 1426 | | { |
| 1427 | | m_vres[i] = -s2; |
| 1428 | | } |
| 1429 | | } |
| 1430 | | else if (s1 > 0) |
| 1431 | | { |
| 1432 | | m_vres[i] = s2; |
| 1433 | | } |
| 1434 | | else |
| 1435 | | { |
| 1436 | | m_vres[i] = 0; |
| 1437 | | } |
| 1438 | | |
| 1439 | | SET_ACCUM_L(m_vres[i], i); |
| 1440 | | } |
| 1441 | | WRITEBACK_RESULT(); |
| 1442 | | break; |
| 1443 | | } |
| 1444 | | |
| 1445 | | case 0x14: /* VADDC */ |
| 1446 | | { |
| 1447 | | // 31 25 24 20 15 10 5 0 |
| 1448 | | // ------------------------------------------------------ |
| 1449 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010100 | |
| 1450 | | // ------------------------------------------------------ |
| 1451 | | // |
| 1452 | | // Adds two vector registers, the carry out is stored into carry register |
| 1453 | | |
| 1454 | | // TODO: check VS2REG = VDREG |
| 1455 | | |
| 1456 | | CLEAR_ZERO_FLAGS(); |
| 1457 | | CLEAR_CARRY_FLAGS(); |
| 1458 | | |
| 1459 | | for (i=0; i < 8; i++) |
| 1460 | | { |
| 1461 | | INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1462 | | INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1463 | | INT32 r = s1 + s2; |
| 1464 | | |
| 1465 | | m_vres[i] = (INT16)(r); |
| 1466 | | SET_ACCUM_L((INT16)(r), i); |
| 1467 | | |
| 1468 | | if (r & 0xffff0000) |
| 1469 | | { |
| 1470 | | SET_CARRY_FLAG(i); |
| 1471 | | } |
| 1472 | | } |
| 1473 | | WRITEBACK_RESULT(); |
| 1474 | | break; |
| 1475 | | } |
| 1476 | | |
| 1477 | | case 0x15: /* VSUBC */ |
| 1478 | | { |
| 1479 | | // 31 25 24 20 15 10 5 0 |
| 1480 | | // ------------------------------------------------------ |
| 1481 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010101 | |
| 1482 | | // ------------------------------------------------------ |
| 1483 | | // |
| 1484 | | // Subtracts two vector registers, the carry out is stored into carry register |
| 1485 | | |
| 1486 | | // TODO: check VS2REG = VDREG |
| 1487 | | |
| 1488 | | CLEAR_ZERO_FLAGS(); |
| 1489 | | CLEAR_CARRY_FLAGS(); |
| 1490 | | |
| 1491 | | for (i=0; i < 8; i++) |
| 1492 | | { |
| 1493 | | INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1494 | | INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1495 | | INT32 r = s1 - s2; |
| 1496 | | |
| 1497 | | m_vres[i] = (INT16)(r); |
| 1498 | | SET_ACCUM_L((UINT16)(r), i); |
| 1499 | | |
| 1500 | | if ((UINT16)(r) != 0) |
| 1501 | | { |
| 1502 | | SET_ZERO_FLAG(i); |
| 1503 | | } |
| 1504 | | if (r & 0xffff0000) |
| 1505 | | { |
| 1506 | | SET_CARRY_FLAG(i); |
| 1507 | | } |
| 1508 | | } |
| 1509 | | WRITEBACK_RESULT(); |
| 1510 | | break; |
| 1511 | | } |
| 1512 | | |
| 1513 | | case 0x1d: /* VSAW */ |
| 1514 | | { |
| 1515 | | // 31 25 24 20 15 10 5 0 |
| 1516 | | // ------------------------------------------------------ |
| 1517 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 011101 | |
| 1518 | | // ------------------------------------------------------ |
| 1519 | | // |
| 1520 | | // Stores high, middle or low slice of accumulator to destination vector |
| 1521 | | |
| 1522 | | switch (EL) |
| 1523 | | { |
| 1524 | | case 0x08: // VSAWH |
| 1525 | | { |
| 1526 | | for (i=0; i < 8; i++) |
| 1527 | | { |
| 1528 | | VREG_S(VDREG, i) = ACCUM_H(i); |
| 1529 | | } |
| 1530 | | break; |
| 1531 | | } |
| 1532 | | case 0x09: // VSAWM |
| 1533 | | { |
| 1534 | | for (i=0; i < 8; i++) |
| 1535 | | { |
| 1536 | | VREG_S(VDREG, i) = ACCUM_M(i); |
| 1537 | | } |
| 1538 | | break; |
| 1539 | | } |
| 1540 | | case 0x0a: // VSAWL |
| 1541 | | { |
| 1542 | | for (i=0; i < 8; i++) |
| 1543 | | { |
| 1544 | | VREG_S(VDREG, i) = ACCUM_L(i); |
| 1545 | | } |
| 1546 | | break; |
| 1547 | | } |
| 1548 | | default: //fatalerror("RSP: VSAW: el = %d\n", EL);//??????? |
| 1549 | | printf("RSP: VSAW: el = %d\n", EL);//??? ??? |
| 1550 | | exit(0); |
| 1551 | | } |
| 1552 | | break; |
| 1553 | | } |
| 1554 | | |
| 1555 | | case 0x20: /* VLT */ |
| 1556 | | { |
| 1557 | | // 31 25 24 20 15 10 5 0 |
| 1558 | | // ------------------------------------------------------ |
| 1559 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100000 | |
| 1560 | | // ------------------------------------------------------ |
| 1561 | | // |
| 1562 | | // Sets compare flags if elements in VS1 are less than VS2 |
| 1563 | | // Moves the element in VS2 to destination vector |
| 1564 | | |
| 1565 | | CLEAR_COMPARE_FLAGS(); |
| 1566 | | CLEAR_CLIP2_FLAGS(); |
| 1567 | | |
| 1568 | | for (i=0; i < 8; i++) |
| 1569 | | { |
| 1570 | | INT16 s1, s2; |
| 1571 | | s1 = VREG_S(VS1REG, i); |
| 1572 | | s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1573 | | if (s1 < s2) |
| 1574 | | { |
| 1575 | | SET_COMPARE_FLAG(i); |
| 1576 | | } |
| 1577 | | else if (s1 == s2) |
| 1578 | | { |
| 1579 | | if (ZERO_FLAG(i) != 0 && CARRY_FLAG(i) != 0) |
| 1580 | | { |
| 1581 | | SET_COMPARE_FLAG(i); |
| 1582 | | } |
| 1583 | | } |
| 1584 | | |
| 1585 | | if (COMPARE_FLAG(i) != 0) |
| 1586 | | { |
| 1587 | | m_vres[i] = s1; |
| 1588 | | } |
| 1589 | | else |
| 1590 | | { |
| 1591 | | m_vres[i] = s2; |
| 1592 | | } |
| 1593 | | |
| 1594 | | SET_ACCUM_L(m_vres[i], i); |
| 1595 | | } |
| 1596 | | |
| 1597 | | CLEAR_CARRY_FLAGS(); |
| 1598 | | CLEAR_ZERO_FLAGS(); |
| 1599 | | WRITEBACK_RESULT(); |
| 1600 | | break; |
| 1601 | | } |
| 1602 | | |
| 1603 | | case 0x21: /* VEQ */ |
| 1604 | | { |
| 1605 | | // 31 25 24 20 15 10 5 0 |
| 1606 | | // ------------------------------------------------------ |
| 1607 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100001 | |
| 1608 | | // ------------------------------------------------------ |
| 1609 | | // |
| 1610 | | // Sets compare flags if elements in VS1 are equal with VS2 |
| 1611 | | // Moves the element in VS2 to destination vector |
| 1612 | | |
| 1613 | | CLEAR_COMPARE_FLAGS(); |
| 1614 | | CLEAR_CLIP2_FLAGS(); |
| 1615 | | |
| 1616 | | for (i = 0; i < 8; i++) |
| 1617 | | { |
| 1618 | | INT16 s1 = VREG_S(VS1REG, i); |
| 1619 | | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1620 | | |
| 1621 | | if ((s1 == s2) && ZERO_FLAG(i) == 0) |
| 1622 | | { |
| 1623 | | SET_COMPARE_FLAG(i); |
| 1624 | | m_vres[i] = s1; |
| 1625 | | } |
| 1626 | | else |
| 1627 | | { |
| 1628 | | m_vres[i] = s2; |
| 1629 | | } |
| 1630 | | SET_ACCUM_L(m_vres[i], i); |
| 1631 | | } |
| 1632 | | |
| 1633 | | CLEAR_ZERO_FLAGS(); |
| 1634 | | CLEAR_CARRY_FLAGS(); |
| 1635 | | WRITEBACK_RESULT(); |
| 1636 | | break; |
| 1637 | | } |
| 1638 | | |
| 1639 | | case 0x22: /* VNE */ |
| 1640 | | { |
| 1641 | | // 31 25 24 20 15 10 5 0 |
| 1642 | | // ------------------------------------------------------ |
| 1643 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100010 | |
| 1644 | | // ------------------------------------------------------ |
| 1645 | | // |
| 1646 | | // Sets compare flags if elements in VS1 are not equal with VS2 |
| 1647 | | // Moves the element in VS2 to destination vector |
| 1648 | | |
| 1649 | | CLEAR_COMPARE_FLAGS(); |
| 1650 | | CLEAR_CLIP2_FLAGS(); |
| 1651 | | |
| 1652 | | for (i = 0; i < 8; i++) |
| 1653 | | { |
| 1654 | | INT16 s1 = VREG_S(VS1REG, i); |
| 1655 | | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1656 | | |
| 1657 | | if (s1 != s2 || ZERO_FLAG(i) != 0) |
| 1658 | | { |
| 1659 | | SET_COMPARE_FLAG(i); |
| 1660 | | m_vres[i] = s1; |
| 1661 | | } |
| 1662 | | else |
| 1663 | | { |
| 1664 | | m_vres[i] = s2; |
| 1665 | | } |
| 1666 | | |
| 1667 | | SET_ACCUM_L(m_vres[i], i); |
| 1668 | | } |
| 1669 | | |
| 1670 | | CLEAR_CARRY_FLAGS(); |
| 1671 | | CLEAR_ZERO_FLAGS(); |
| 1672 | | WRITEBACK_RESULT(); |
| 1673 | | break; |
| 1674 | | } |
| 1675 | | |
| 1676 | | case 0x23: /* VGE */ |
| 1677 | | { |
| 1678 | | // 31 25 24 20 15 10 5 0 |
| 1679 | | // ------------------------------------------------------ |
| 1680 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100011 | |
| 1681 | | // ------------------------------------------------------ |
| 1682 | | // |
| 1683 | | // Sets compare flags if elements in VS1 are greater or equal with VS2 |
| 1684 | | // Moves the element in VS2 to destination vector |
| 1685 | | |
| 1686 | | CLEAR_COMPARE_FLAGS(); |
| 1687 | | CLEAR_CLIP2_FLAGS(); |
| 1688 | | |
| 1689 | | for (i=0; i < 8; i++) |
| 1690 | | { |
| 1691 | | INT16 s1 = VREG_S(VS1REG, i); |
| 1692 | | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1693 | | |
| 1694 | | if ((s1 == s2 && (ZERO_FLAG(i) == 0 || CARRY_FLAG(i) == 0)) || s1 > s2) |
| 1695 | | { |
| 1696 | | SET_COMPARE_FLAG(i); |
| 1697 | | m_vres[i] = s1; |
| 1698 | | } |
| 1699 | | else |
| 1700 | | { |
| 1701 | | m_vres[i] = s2; |
| 1702 | | } |
| 1703 | | |
| 1704 | | SET_ACCUM_L(m_vres[i], i); |
| 1705 | | } |
| 1706 | | |
| 1707 | | CLEAR_CARRY_FLAGS(); |
| 1708 | | CLEAR_ZERO_FLAGS(); |
| 1709 | | WRITEBACK_RESULT(); |
| 1710 | | break; |
| 1711 | | } |
| 1712 | | |
| 1713 | | case 0x24: /* VCL */ |
| 1714 | | { |
| 1715 | | // 31 25 24 20 15 10 5 0 |
| 1716 | | // ------------------------------------------------------ |
| 1717 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100100 | |
| 1718 | | // ------------------------------------------------------ |
| 1719 | | // |
| 1720 | | // Vector clip low |
| 1721 | | |
| 1722 | | for (i = 0; i < 8; i++) |
| 1723 | | { |
| 1724 | | INT16 s1 = VREG_S(VS1REG, i); |
| 1725 | | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1726 | | |
| 1727 | | if (CARRY_FLAG(i) != 0) |
| 1728 | | { |
| 1729 | | if (ZERO_FLAG(i) != 0) |
| 1730 | | { |
| 1731 | | if (COMPARE_FLAG(i) != 0) |
| 1732 | | { |
| 1733 | | SET_ACCUM_L(-(UINT16)s2, i); |
| 1734 | | } |
| 1735 | | else |
| 1736 | | { |
| 1737 | | SET_ACCUM_L(s1, i); |
| 1738 | | } |
| 1739 | | } |
| 1740 | | else |
| 1741 | | { |
| 1742 | | if (CLIP1_FLAG(i) != 0) |
| 1743 | | { |
| 1744 | | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) > 0x10000) |
| 1745 | | { |
| 1746 | | SET_ACCUM_L(s1, i); |
| 1747 | | CLEAR_COMPARE_FLAG(i); |
| 1748 | | } |
| 1749 | | else |
| 1750 | | { |
| 1751 | | SET_ACCUM_L(-((UINT16)s2), i); |
| 1752 | | SET_COMPARE_FLAG(i); |
| 1753 | | } |
| 1754 | | } |
| 1755 | | else |
| 1756 | | { |
| 1757 | | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) != 0) |
| 1758 | | { |
| 1759 | | SET_ACCUM_L(s1, i); |
| 1760 | | CLEAR_COMPARE_FLAG(i); |
| 1761 | | } |
| 1762 | | else |
| 1763 | | { |
| 1764 | | SET_ACCUM_L(-((UINT16)s2), i); |
| 1765 | | SET_COMPARE_FLAG(i); |
| 1766 | | } |
| 1767 | | } |
| 1768 | | } |
| 1769 | | } |
| 1770 | | else |
| 1771 | | { |
| 1772 | | if (ZERO_FLAG(i) != 0) |
| 1773 | | { |
| 1774 | | if (CLIP2_FLAG(i) != 0) |
| 1775 | | { |
| 1776 | | SET_ACCUM_L(s2, i); |
| 1777 | | } |
| 1778 | | else |
| 1779 | | { |
| 1780 | | SET_ACCUM_L(s1, i); |
| 1781 | | } |
| 1782 | | } |
| 1783 | | else |
| 1784 | | { |
| 1785 | | if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0) |
| 1786 | | { |
| 1787 | | SET_ACCUM_L(s2, i); |
| 1788 | | SET_CLIP2_FLAG(i); |
| 1789 | | } |
| 1790 | | else |
| 1791 | | { |
| 1792 | | SET_ACCUM_L(s1, i); |
| 1793 | | CLEAR_CLIP2_FLAG(i); |
| 1794 | | } |
| 1795 | | } |
| 1796 | | } |
| 1797 | | |
| 1798 | | m_vres[i] = ACCUM_L(i); |
| 1799 | | } |
| 1800 | | CLEAR_CARRY_FLAGS(); |
| 1801 | | CLEAR_ZERO_FLAGS(); |
| 1802 | | CLEAR_CLIP1_FLAGS(); |
| 1803 | | WRITEBACK_RESULT(); |
| 1804 | | break; |
| 1805 | | } |
| 1806 | | |
| 1807 | | case 0x25: /* VCH */ |
| 1808 | | { |
| 1809 | | // 31 25 24 20 15 10 5 0 |
| 1810 | | // ------------------------------------------------------ |
| 1811 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100101 | |
| 1812 | | // ------------------------------------------------------ |
| 1813 | | // |
| 1814 | | // Vector clip high |
| 1815 | | |
| 1816 | | CLEAR_CARRY_FLAGS(); |
| 1817 | | CLEAR_COMPARE_FLAGS(); |
| 1818 | | CLEAR_CLIP1_FLAGS(); |
| 1819 | | CLEAR_ZERO_FLAGS(); |
| 1820 | | CLEAR_CLIP2_FLAGS(); |
| 1821 | | UINT32 vce = 0; |
| 1822 | | |
| 1823 | | for (i=0; i < 8; i++) |
| 1824 | | { |
| 1825 | | INT16 s1 = VREG_S(VS1REG, i); |
| 1826 | | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1827 | | |
| 1828 | | if ((s1 ^ s2) < 0) |
| 1829 | | { |
| 1830 | | vce = (s1 + s2 == -1); |
| 1831 | | SET_CARRY_FLAG(i); |
| 1832 | | if (s2 < 0) |
| 1833 | | { |
| 1834 | | SET_CLIP2_FLAG(i); |
| 1835 | | } |
| 1836 | | |
| 1837 | | if (s1 + s2 <= 0) |
| 1838 | | { |
| 1839 | | SET_COMPARE_FLAG(i); |
| 1840 | | m_vres[i] = -((UINT16)s2); |
| 1841 | | } |
| 1842 | | else |
| 1843 | | { |
| 1844 | | m_vres[i] = s1; |
| 1845 | | } |
| 1846 | | |
| 1847 | | if (s1 + s2 != 0) |
| 1848 | | { |
| 1849 | | if (s1 != ~s2) |
| 1850 | | { |
| 1851 | | SET_ZERO_FLAG(i); |
| 1852 | | } |
| 1853 | | } |
| 1854 | | } |
| 1855 | | else |
| 1856 | | { |
| 1857 | | vce = 0; |
| 1858 | | if (s2 < 0) |
| 1859 | | { |
| 1860 | | SET_COMPARE_FLAG(i); |
| 1861 | | } |
| 1862 | | if (s1 - s2 >= 0) |
| 1863 | | { |
| 1864 | | SET_CLIP2_FLAG(i); |
| 1865 | | m_vres[i] = s2; |
| 1866 | | } |
| 1867 | | else |
| 1868 | | { |
| 1869 | | m_vres[i] = s1; |
| 1870 | | } |
| 1871 | | |
| 1872 | | if ((s1 - s2) != 0) |
| 1873 | | { |
| 1874 | | if (s1 != ~s2) |
| 1875 | | { |
| 1876 | | SET_ZERO_FLAG(i); |
| 1877 | | } |
| 1878 | | } |
| 1879 | | } |
| 1880 | | if (vce != 0) |
| 1881 | | { |
| 1882 | | SET_CLIP1_FLAG(i); |
| 1883 | | } |
| 1884 | | |
| 1885 | | SET_ACCUM_L(m_vres[i], i); |
| 1886 | | } |
| 1887 | | WRITEBACK_RESULT(); |
| 1888 | | break; |
| 1889 | | } |
| 1890 | | |
| 1891 | | case 0x26: /* VCR */ |
| 1892 | | { |
| 1893 | | // 31 25 24 20 15 10 5 0 |
| 1894 | | // ------------------------------------------------------ |
| 1895 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100110 | |
| 1896 | | // ------------------------------------------------------ |
| 1897 | | // |
| 1898 | | // Vector clip reverse |
| 1899 | | |
| 1900 | | CLEAR_CARRY_FLAGS(); |
| 1901 | | CLEAR_COMPARE_FLAGS(); |
| 1902 | | CLEAR_CLIP1_FLAGS(); |
| 1903 | | CLEAR_ZERO_FLAGS(); |
| 1904 | | CLEAR_CLIP2_FLAGS(); |
| 1905 | | |
| 1906 | | for (i=0; i < 8; i++) |
| 1907 | | { |
| 1908 | | INT16 s1 = VREG_S(VS1REG, i); |
| 1909 | | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1910 | | |
| 1911 | | if ((INT16)(s1 ^ s2) < 0) |
| 1912 | | { |
| 1913 | | if (s2 < 0) |
| 1914 | | { |
| 1915 | | SET_CLIP2_FLAG(i); |
| 1916 | | } |
| 1917 | | if ((s1 + s2) <= 0) |
| 1918 | | { |
| 1919 | | SET_ACCUM_L(~((UINT16)s2), i); |
| 1920 | | SET_COMPARE_FLAG(i); |
| 1921 | | } |
| 1922 | | else |
| 1923 | | { |
| 1924 | | SET_ACCUM_L(s1, i); |
| 1925 | | } |
| 1926 | | } |
| 1927 | | else |
| 1928 | | { |
| 1929 | | if (s2 < 0) |
| 1930 | | { |
| 1931 | | SET_COMPARE_FLAG(i); |
| 1932 | | } |
| 1933 | | if ((s1 - s2) >= 0) |
| 1934 | | { |
| 1935 | | SET_ACCUM_L(s2, i); |
| 1936 | | SET_CLIP2_FLAG(i); |
| 1937 | | } |
| 1938 | | else |
| 1939 | | { |
| 1940 | | SET_ACCUM_L(s1, i); |
| 1941 | | } |
| 1942 | | } |
| 1943 | | |
| 1944 | | m_vres[i] = ACCUM_L(i); |
| 1945 | | } |
| 1946 | | WRITEBACK_RESULT(); |
| 1947 | | break; |
| 1948 | | } |
| 1949 | | |
| 1950 | | case 0x27: /* VMRG */ |
| 1951 | | { |
| 1952 | | // 31 25 24 20 15 10 5 0 |
| 1953 | | // ------------------------------------------------------ |
| 1954 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100111 | |
| 1955 | | // ------------------------------------------------------ |
| 1956 | | // |
| 1957 | | // Merges two vectors according to compare flags |
| 1958 | | |
| 1959 | | for (i = 0; i < 8; i++) |
| 1960 | | { |
| 1961 | | if (COMPARE_FLAG(i) != 0) |
| 1962 | | { |
| 1963 | | m_vres[i] = VREG_S(VS1REG, i); |
| 1964 | | } |
| 1965 | | else |
| 1966 | | { |
| 1967 | | m_vres[i] = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1968 | | } |
| 1969 | | |
| 1970 | | SET_ACCUM_L(m_vres[i], i); |
| 1971 | | } |
| 1972 | | WRITEBACK_RESULT(); |
| 1973 | | break; |
| 1974 | | } |
| 1975 | | case 0x28: /* VAND */ |
| 1976 | | { |
| 1977 | | // 31 25 24 20 15 10 5 0 |
| 1978 | | // ------------------------------------------------------ |
| 1979 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101000 | |
| 1980 | | // ------------------------------------------------------ |
| 1981 | | // |
| 1982 | | // Bitwise AND of two vector registers |
| 1983 | | |
| 1984 | | for (i = 0; i < 8; i++) |
| 1985 | | { |
| 1986 | | m_vres[i] = VREG_S(VS1REG, i) & VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1987 | | SET_ACCUM_L(m_vres[i], i); |
| 1988 | | } |
| 1989 | | WRITEBACK_RESULT(); |
| 1990 | | break; |
| 1991 | | } |
| 1992 | | case 0x29: /* VNAND */ |
| 1993 | | { |
| 1994 | | // 31 25 24 20 15 10 5 0 |
| 1995 | | // ------------------------------------------------------ |
| 1996 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101001 | |
| 1997 | | // ------------------------------------------------------ |
| 1998 | | // |
| 1999 | | // Bitwise NOT AND of two vector registers |
| 2000 | | |
| 2001 | | for (i = 0; i < 8; i++) |
| 2002 | | { |
| 2003 | | m_vres[i] = ~((VREG_S(VS1REG, i) & VREG_S(VS2REG, VEC_EL_2(EL, i)))); |
| 2004 | | SET_ACCUM_L(m_vres[i], i); |
| 2005 | | } |
| 2006 | | WRITEBACK_RESULT(); |
| 2007 | | break; |
| 2008 | | } |
| 2009 | | case 0x2a: /* VOR */ |
| 2010 | | { |
| 2011 | | // 31 25 24 20 15 10 5 0 |
| 2012 | | // ------------------------------------------------------ |
| 2013 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101010 | |
| 2014 | | // ------------------------------------------------------ |
| 2015 | | // |
| 2016 | | // Bitwise OR of two vector registers |
| 2017 | | |
| 2018 | | for (i = 0; i < 8; i++) |
| 2019 | | { |
| 2020 | | m_vres[i] = VREG_S(VS1REG, i) | VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2021 | | SET_ACCUM_L(m_vres[i], i); |
| 2022 | | } |
| 2023 | | WRITEBACK_RESULT(); |
| 2024 | | break; |
| 2025 | | } |
| 2026 | | case 0x2b: /* VNOR */ |
| 2027 | | { |
| 2028 | | // 31 25 24 20 15 10 5 0 |
| 2029 | | // ------------------------------------------------------ |
| 2030 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101011 | |
| 2031 | | // ------------------------------------------------------ |
| 2032 | | // |
| 2033 | | // Bitwise NOT OR of two vector registers |
| 2034 | | |
| 2035 | | for (i=0; i < 8; i++) |
| 2036 | | { |
| 2037 | | m_vres[i] = ~((VREG_S(VS1REG, i) | VREG_S(VS2REG, VEC_EL_2(EL, i)))); |
| 2038 | | SET_ACCUM_L(m_vres[i], i); |
| 2039 | | } |
| 2040 | | WRITEBACK_RESULT(); |
| 2041 | | break; |
| 2042 | | } |
| 2043 | | case 0x2c: /* VXOR */ |
| 2044 | | { |
| 2045 | | // 31 25 24 20 15 10 5 0 |
| 2046 | | // ------------------------------------------------------ |
| 2047 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101100 | |
| 2048 | | // ------------------------------------------------------ |
| 2049 | | // |
| 2050 | | // Bitwise XOR of two vector registers |
| 2051 | | |
| 2052 | | for (i=0; i < 8; i++) |
| 2053 | | { |
| 2054 | | m_vres[i] = VREG_S(VS1REG, i) ^ VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2055 | | SET_ACCUM_L(m_vres[i], i); |
| 2056 | | } |
| 2057 | | WRITEBACK_RESULT(); |
| 2058 | | break; |
| 2059 | | } |
| 2060 | | case 0x2d: /* VNXOR */ |
| 2061 | | { |
| 2062 | | // 31 25 24 20 15 10 5 0 |
| 2063 | | // ------------------------------------------------------ |
| 2064 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101101 | |
| 2065 | | // ------------------------------------------------------ |
| 2066 | | // |
| 2067 | | // Bitwise NOT XOR of two vector registers |
| 2068 | | |
| 2069 | | for (i=0; i < 8; i++) |
| 2070 | | { |
| 2071 | | m_vres[i] = ~((VREG_S(VS1REG, i) ^ VREG_S(VS2REG, VEC_EL_2(EL, i)))); |
| 2072 | | SET_ACCUM_L(m_vres[i], i); |
| 2073 | | } |
| 2074 | | WRITEBACK_RESULT(); |
| 2075 | | break; |
| 2076 | | } |
| 2077 | | |
| 2078 | | case 0x30: /* VRCP */ |
| 2079 | | { |
| 2080 | | // 31 25 24 20 15 10 5 0 |
| 2081 | | // ------------------------------------------------------ |
| 2082 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110000 | |
| 2083 | | // ------------------------------------------------------ |
| 2084 | | // |
| 2085 | | // Calculates reciprocal |
| 2086 | | INT32 shifter = 0; |
| 2087 | | |
| 2088 | | INT32 rec = (INT16)(VREG_S(VS2REG, EL & 7)); |
| 2089 | | INT32 datainput = (rec < 0) ? (-rec) : rec; |
| 2090 | | if (datainput) |
| 2091 | | { |
| 2092 | | for (i = 0; i < 32; i++) |
| 2093 | | { |
| 2094 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 2095 | | { |
| 2096 | | shifter = i; |
| 2097 | | break; |
| 2098 | | } |
| 2099 | | } |
| 2100 | | } |
| 2101 | | else |
| 2102 | | { |
| 2103 | | shifter = 0x10; |
| 2104 | | } |
| 2105 | | |
| 2106 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2107 | | INT32 fetchval = rsp_divtable[address]; |
| 2108 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 2109 | | if (rec < 0) |
| 2110 | | { |
| 2111 | | temp = ~temp; |
| 2112 | | } |
| 2113 | | if (!rec) |
| 2114 | | { |
| 2115 | | temp = 0x7fffffff; |
| 2116 | | } |
| 2117 | | else if (rec == 0xffff8000) |
| 2118 | | { |
| 2119 | | temp = 0xffff0000; |
| 2120 | | } |
| 2121 | | rec = temp; |
| 2122 | | |
| 2123 | | m_reciprocal_res = rec; |
| 2124 | | m_dp_allowed = 0; |
| 2125 | | |
| 2126 | | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 2127 | | |
| 2128 | | for (i = 0; i < 8; i++) |
| 2129 | | { |
| 2130 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 2131 | | } |
| 2132 | | |
| 2133 | | |
| 2134 | | break; |
| 2135 | | } |
| 2136 | | |
| 2137 | | case 0x31: /* VRCPL */ |
| 2138 | | { |
| 2139 | | // 31 25 24 20 15 10 5 0 |
| 2140 | | // ------------------------------------------------------ |
| 2141 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110001 | |
| 2142 | | // ------------------------------------------------------ |
| 2143 | | // |
| 2144 | | // Calculates reciprocal low part |
| 2145 | | |
| 2146 | | INT32 shifter = 0; |
| 2147 | | |
| 2148 | | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 2149 | | INT32 datainput = rec; |
| 2150 | | |
| 2151 | | if (m_dp_allowed) |
| 2152 | | { |
| 2153 | | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 2154 | | datainput = rec; |
| 2155 | | |
| 2156 | | if (rec < 0) |
| 2157 | | { |
| 2158 | | if (rec < -32768) |
| 2159 | | { |
| 2160 | | datainput = ~datainput; |
| 2161 | | } |
| 2162 | | else |
| 2163 | | { |
| 2164 | | datainput = -datainput; |
| 2165 | | } |
| 2166 | | } |
| 2167 | | } |
| 2168 | | else if (datainput < 0) |
| 2169 | | { |
| 2170 | | datainput = -datainput; |
| 2171 | | |
| 2172 | | shifter = 0x10; |
| 2173 | | } |
| 2174 | | |
| 2175 | | |
| 2176 | | for (i = 0; i < 32; i++) |
| 2177 | | { |
| 2178 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 2179 | | { |
| 2180 | | shifter = i; |
| 2181 | | break; |
| 2182 | | } |
| 2183 | | } |
| 2184 | | |
| 2185 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2186 | | INT32 fetchval = rsp_divtable[address]; |
| 2187 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 2188 | | temp ^= rec >> 31; |
| 2189 | | |
| 2190 | | if (!rec) |
| 2191 | | { |
| 2192 | | temp = 0x7fffffff; |
| 2193 | | } |
| 2194 | | else if (rec == 0xffff8000) |
| 2195 | | { |
| 2196 | | temp = 0xffff0000; |
| 2197 | | } |
| 2198 | | rec = temp; |
| 2199 | | |
| 2200 | | m_reciprocal_res = rec; |
| 2201 | | m_dp_allowed = 0; |
| 2202 | | |
| 2203 | | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 2204 | | |
| 2205 | | for (i = 0; i < 8; i++) |
| 2206 | | { |
| 2207 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 2208 | | } |
| 2209 | | |
| 2210 | | break; |
| 2211 | | } |
| 2212 | | |
| 2213 | | case 0x32: /* VRCPH */ |
| 2214 | | { |
| 2215 | | // 31 25 24 20 15 10 5 0 |
| 2216 | | // ------------------------------------------------------ |
| 2217 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110010 | |
| 2218 | | // ------------------------------------------------------ |
| 2219 | | // |
| 2220 | | // Calculates reciprocal high part |
| 2221 | | |
| 2222 | | m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16; |
| 2223 | | m_dp_allowed = 1; |
| 2224 | | |
| 2225 | | for (i = 0; i < 8; i++) |
| 2226 | | { |
| 2227 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 2228 | | } |
| 2229 | | |
| 2230 | | VREG_S(VDREG, VS1REG & 7) = (INT16)(m_reciprocal_res >> 16); |
| 2231 | | |
| 2232 | | break; |
| 2233 | | } |
| 2234 | | |
| 2235 | | case 0x33: /* VMOV */ |
| 2236 | | { |
| 2237 | | // 31 25 24 20 15 10 5 0 |
| 2238 | | // ------------------------------------------------------ |
| 2239 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110011 | |
| 2240 | | // ------------------------------------------------------ |
| 2241 | | // |
| 2242 | | // Moves element from vector to destination vector |
| 2243 | | |
| 2244 | | VREG_S(VDREG, VS1REG & 7) = VREG_S(VS2REG, EL & 7); |
| 2245 | | for (i = 0; i < 8; i++) |
| 2246 | | { |
| 2247 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 2248 | | } |
| 2249 | | break; |
| 2250 | | } |
| 2251 | | |
| 2252 | | case 0x34: /* VRSQ */ |
| 2253 | | { |
| 2254 | | // 31 25 24 20 15 10 5 0 |
| 2255 | | // ------------------------------------------------------ |
| 2256 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110100 | |
| 2257 | | // ------------------------------------------------------ |
| 2258 | | // |
| 2259 | | // Calculates reciprocal square-root |
| 2260 | | |
| 2261 | | INT32 shifter = 0; |
| 2262 | | |
| 2263 | | INT32 rec = (INT16)(VREG_S(VS2REG, EL & 7)); |
| 2264 | | INT32 datainput = (rec < 0) ? (-rec) : rec; |
| 2265 | | if (datainput) |
| 2266 | | { |
| 2267 | | for (i = 0; i < 32; i++) |
| 2268 | | { |
| 2269 | | if (datainput & (1 << ((~i) & 0x1f)))//?.?.??? 31 - i |
| 2270 | | { |
| 2271 | | shifter = i; |
| 2272 | | break; |
| 2273 | | } |
| 2274 | | } |
| 2275 | | } |
| 2276 | | else |
| 2277 | | { |
| 2278 | | shifter = 0x10; |
| 2279 | | } |
| 2280 | | |
| 2281 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2282 | | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 2283 | | |
| 2284 | | INT32 fetchval = rsp_divtable[address]; |
| 2285 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 2286 | | if (rec < 0) |
| 2287 | | { |
| 2288 | | temp = ~temp; |
| 2289 | | } |
| 2290 | | if (!rec) |
| 2291 | | { |
| 2292 | | temp = 0x7fffffff; |
| 2293 | | } |
| 2294 | | else if (rec == 0xffff8000) |
| 2295 | | { |
| 2296 | | temp = 0xffff0000; |
| 2297 | | } |
| 2298 | | rec = temp; |
| 2299 | | |
| 2300 | | m_reciprocal_res = rec; |
| 2301 | | m_dp_allowed = 0; |
| 2302 | | |
| 2303 | | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 2304 | | |
| 2305 | | for (i = 0; i < 8; i++) |
| 2306 | | { |
| 2307 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 2308 | | } |
| 2309 | | |
| 2310 | | break; |
| 2311 | | } |
| 2312 | | |
| 2313 | | case 0x35: /* VRSQL */ |
| 2314 | | { |
| 2315 | | // 31 25 24 20 15 10 5 0 |
| 2316 | | // ------------------------------------------------------ |
| 2317 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110101 | |
| 2318 | | // ------------------------------------------------------ |
| 2319 | | // |
| 2320 | | // Calculates reciprocal square-root low part |
| 2321 | | |
| 2322 | | INT32 shifter = 0; |
| 2323 | | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 2324 | | INT32 datainput = rec; |
| 2325 | | |
| 2326 | | if (m_dp_allowed) |
| 2327 | | { |
| 2328 | | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 2329 | | datainput = rec; |
| 2330 | | |
| 2331 | | if (rec < 0) |
| 2332 | | { |
| 2333 | | if (rec < -32768) |
| 2334 | | { |
| 2335 | | datainput = ~datainput; |
| 2336 | | } |
| 2337 | | else |
| 2338 | | { |
| 2339 | | datainput = -datainput; |
| 2340 | | } |
| 2341 | | } |
| 2342 | | } |
| 2343 | | else if (datainput < 0) |
| 2344 | | { |
| 2345 | | datainput = -datainput; |
| 2346 | | |
| 2347 | | shifter = 0x10; |
| 2348 | | } |
| 2349 | | |
| 2350 | | if (datainput) |
| 2351 | | { |
| 2352 | | for (i = 0; i < 32; i++) |
| 2353 | | { |
| 2354 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 2355 | | { |
| 2356 | | shifter = i; |
| 2357 | | break; |
| 2358 | | } |
| 2359 | | } |
| 2360 | | } |
| 2361 | | |
| 2362 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2363 | | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 2364 | | |
| 2365 | | INT32 fetchval = rsp_divtable[address]; |
| 2366 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 2367 | | temp ^= rec >> 31; |
| 2368 | | |
| 2369 | | if (!rec) |
| 2370 | | { |
| 2371 | | temp = 0x7fffffff; |
| 2372 | | } |
| 2373 | | else if (rec == 0xffff8000) |
| 2374 | | { |
| 2375 | | temp = 0xffff0000; |
| 2376 | | } |
| 2377 | | rec = temp; |
| 2378 | | |
| 2379 | | m_reciprocal_res = rec; |
| 2380 | | m_dp_allowed = 0; |
| 2381 | | |
| 2382 | | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 2383 | | |
| 2384 | | for (i = 0; i < 8; i++) |
| 2385 | | { |
| 2386 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 2387 | | } |
| 2388 | | |
| 2389 | | break; |
| 2390 | | } |
| 2391 | | |
| 2392 | | case 0x36: /* VRSQH */ |
| 2393 | | { |
| 2394 | | // 31 25 24 20 15 10 5 0 |
| 2395 | | // ------------------------------------------------------ |
| 2396 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110110 | |
| 2397 | | // ------------------------------------------------------ |
| 2398 | | // |
| 2399 | | // Calculates reciprocal square-root high part |
| 2400 | | |
| 2401 | | m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16; |
| 2402 | | m_dp_allowed = 1; |
| 2403 | | |
| 2404 | | for (i=0; i < 8; i++) |
| 2405 | | { |
| 2406 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 2407 | | } |
| 2408 | | |
| 2409 | | VREG_S(VDREG, VS1REG & 7) = (INT16)(m_reciprocal_res >> 16); // store high part |
| 2410 | | break; |
| 2411 | | } |
| 2412 | | |
| 2413 | | case 0x37: /* VNOP */ |
| 2414 | | { |
| 2415 | | // 31 25 24 20 15 10 5 0 |
| 2416 | | // ------------------------------------------------------ |
| 2417 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110111 | |
| 2418 | | // ------------------------------------------------------ |
| 2419 | | // |
| 2420 | | // Vector null instruction |
| 2421 | | |
| 2422 | | break; |
| 2423 | | } |
| 2424 | | |
| 2425 | | default: m_rsp.unimplemented_opcode(op); break; |
| 2426 | | } |
| 2427 | | } |
| 2428 | | |
| 2429 | | /*************************************************************************** |
| 2430 | | Vector Flag Reading/Writing |
| 2431 | | ***************************************************************************/ |
| 2432 | | |
| 2433 | | void rsp_cop2::handle_cop2(UINT32 op) |
| 2434 | | { |
| 2435 | | switch ((op >> 21) & 0x1f) |
| 2436 | | { |
| 2437 | | case 0x00: /* MFC2 */ |
| 2438 | | { |
| 2439 | | // 31 25 20 15 10 6 0 |
| 2440 | | // --------------------------------------------------- |
| 2441 | | // | 010010 | 00000 | TTTTT | DDDDD | IIII | 0000000 | |
| 2442 | | // --------------------------------------------------- |
| 2443 | | // |
| 2444 | | int el = (op >> 7) & 0xf; |
| 2445 | | UINT16 b1 = VREG_B(RDREG, (el+0) & 0xf); |
| 2446 | | UINT16 b2 = VREG_B(RDREG, (el+1) & 0xf); |
| 2447 | | if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2)); |
| 2448 | | break; |
| 2449 | | } |
| 2450 | | |
| 2451 | | case 0x02: /* CFC2 */ |
| 2452 | | { |
| 2453 | | // 31 25 20 15 10 0 |
| 2454 | | // ------------------------------------------------ |
| 2455 | | // | 010010 | 00010 | TTTTT | DDDDD | 00000000000 | |
| 2456 | | // ------------------------------------------------ |
| 2457 | | // |
| 2458 | | if (RTREG) |
| 2459 | | { |
| 2460 | | switch(RDREG) |
| 2461 | | { |
| 2462 | | case 0: |
| 2463 | | RTVAL = ((CARRY_FLAG(0) & 1) << 0) | |
| 2464 | | ((CARRY_FLAG(1) & 1) << 1) | |
| 2465 | | ((CARRY_FLAG(2) & 1) << 2) | |
| 2466 | | ((CARRY_FLAG(3) & 1) << 3) | |
| 2467 | | ((CARRY_FLAG(4) & 1) << 4) | |
| 2468 | | ((CARRY_FLAG(5) & 1) << 5) | |
| 2469 | | ((CARRY_FLAG(6) & 1) << 6) | |
| 2470 | | ((CARRY_FLAG(7) & 1) << 7) | |
| 2471 | | ((ZERO_FLAG(0) & 1) << 8) | |
| 2472 | | ((ZERO_FLAG(1) & 1) << 9) | |
| 2473 | | ((ZERO_FLAG(2) & 1) << 10) | |
| 2474 | | ((ZERO_FLAG(3) & 1) << 11) | |
| 2475 | | ((ZERO_FLAG(4) & 1) << 12) | |
| 2476 | | ((ZERO_FLAG(5) & 1) << 13) | |
| 2477 | | ((ZERO_FLAG(6) & 1) << 14) | |
| 2478 | | ((ZERO_FLAG(7) & 1) << 15); |
| 2479 | | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 2480 | | break; |
| 2481 | | case 1: |
| 2482 | | RTVAL = ((COMPARE_FLAG(0) & 1) << 0) | |
| 2483 | | ((COMPARE_FLAG(1) & 1) << 1) | |
| 2484 | | ((COMPARE_FLAG(2) & 1) << 2) | |
| 2485 | | ((COMPARE_FLAG(3) & 1) << 3) | |
| 2486 | | ((COMPARE_FLAG(4) & 1) << 4) | |
| 2487 | | ((COMPARE_FLAG(5) & 1) << 5) | |
| 2488 | | ((COMPARE_FLAG(6) & 1) << 6) | |
| 2489 | | ((COMPARE_FLAG(7) & 1) << 7) | |
| 2490 | | ((CLIP2_FLAG(0) & 1) << 8) | |
| 2491 | | ((CLIP2_FLAG(1) & 1) << 9) | |
| 2492 | | ((CLIP2_FLAG(2) & 1) << 10) | |
| 2493 | | ((CLIP2_FLAG(3) & 1) << 11) | |
| 2494 | | ((CLIP2_FLAG(4) & 1) << 12) | |
| 2495 | | ((CLIP2_FLAG(5) & 1) << 13) | |
| 2496 | | ((CLIP2_FLAG(6) & 1) << 14) | |
| 2497 | | ((CLIP2_FLAG(7) & 1) << 15); |
| 2498 | | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 2499 | | break; |
| 2500 | | case 2: |
| 2501 | | // Anciliary clipping flags |
| 2502 | | RTVAL = ((CLIP1_FLAG(0) & 1) << 0) | |
| 2503 | | ((CLIP1_FLAG(1) & 1) << 1) | |
| 2504 | | ((CLIP1_FLAG(2) & 1) << 2) | |
| 2505 | | ((CLIP1_FLAG(3) & 1) << 3) | |
| 2506 | | ((CLIP1_FLAG(4) & 1) << 4) | |
| 2507 | | ((CLIP1_FLAG(5) & 1) << 5) | |
| 2508 | | ((CLIP1_FLAG(6) & 1) << 6) | |
| 2509 | | ((CLIP1_FLAG(7) & 1) << 7); |
| 2510 | | } |
| 2511 | | } |
| 2512 | | break; |
| 2513 | | } |
| 2514 | | |
| 2515 | | case 0x04: /* MTC2 */ |
| 2516 | | { |
| 2517 | | // 31 25 20 15 10 6 0 |
| 2518 | | // --------------------------------------------------- |
| 2519 | | // | 010010 | 00100 | TTTTT | DDDDD | IIII | 0000000 | |
| 2520 | | // --------------------------------------------------- |
| 2521 | | // |
| 2522 | | int el = (op >> 7) & 0xf; |
| 2523 | | W_VREG_B(RDREG, (el+0) & 0xf, (RTVAL >> 8) & 0xff); |
| 2524 | | W_VREG_B(RDREG, (el+1) & 0xf, (RTVAL >> 0) & 0xff); |
| 2525 | | break; |
| 2526 | | } |
| 2527 | | |
| 2528 | | case 0x06: /* CTC2 */ |
| 2529 | | { |
| 2530 | | // 31 25 20 15 10 0 |
| 2531 | | // ------------------------------------------------ |
| 2532 | | // | 010010 | 00110 | TTTTT | DDDDD | 00000000000 | |
| 2533 | | // ------------------------------------------------ |
| 2534 | | // |
| 2535 | | switch(RDREG) |
| 2536 | | { |
| 2537 | | case 0: |
| 2538 | | CLEAR_CARRY_FLAGS(); |
| 2539 | | CLEAR_ZERO_FLAGS(); |
| 2540 | | if (RTVAL & (1 << 0)) { SET_CARRY_FLAG(0); } |
| 2541 | | if (RTVAL & (1 << 1)) { SET_CARRY_FLAG(1); } |
| 2542 | | if (RTVAL & (1 << 2)) { SET_CARRY_FLAG(2); } |
| 2543 | | if (RTVAL & (1 << 3)) { SET_CARRY_FLAG(3); } |
| 2544 | | if (RTVAL & (1 << 4)) { SET_CARRY_FLAG(4); } |
| 2545 | | if (RTVAL & (1 << 5)) { SET_CARRY_FLAG(5); } |
| 2546 | | if (RTVAL & (1 << 6)) { SET_CARRY_FLAG(6); } |
| 2547 | | if (RTVAL & (1 << 7)) { SET_CARRY_FLAG(7); } |
| 2548 | | if (RTVAL & (1 << 8)) { SET_ZERO_FLAG(0); } |
| 2549 | | if (RTVAL & (1 << 9)) { SET_ZERO_FLAG(1); } |
| 2550 | | if (RTVAL & (1 << 10)) { SET_ZERO_FLAG(2); } |
| 2551 | | if (RTVAL & (1 << 11)) { SET_ZERO_FLAG(3); } |
| 2552 | | if (RTVAL & (1 << 12)) { SET_ZERO_FLAG(4); } |
| 2553 | | if (RTVAL & (1 << 13)) { SET_ZERO_FLAG(5); } |
| 2554 | | if (RTVAL & (1 << 14)) { SET_ZERO_FLAG(6); } |
| 2555 | | if (RTVAL & (1 << 15)) { SET_ZERO_FLAG(7); } |
| 2556 | | break; |
| 2557 | | |
| 2558 | | case 1: |
| 2559 | | CLEAR_COMPARE_FLAGS(); |
| 2560 | | CLEAR_CLIP2_FLAGS(); |
| 2561 | | if (RTVAL & (1 << 0)) { SET_COMPARE_FLAG(0); } |
| 2562 | | if (RTVAL & (1 << 1)) { SET_COMPARE_FLAG(1); } |
| 2563 | | if (RTVAL & (1 << 2)) { SET_COMPARE_FLAG(2); } |
| 2564 | | if (RTVAL & (1 << 3)) { SET_COMPARE_FLAG(3); } |
| 2565 | | if (RTVAL & (1 << 4)) { SET_COMPARE_FLAG(4); } |
| 2566 | | if (RTVAL & (1 << 5)) { SET_COMPARE_FLAG(5); } |
| 2567 | | if (RTVAL & (1 << 6)) { SET_COMPARE_FLAG(6); } |
| 2568 | | if (RTVAL & (1 << 7)) { SET_COMPARE_FLAG(7); } |
| 2569 | | if (RTVAL & (1 << 8)) { SET_CLIP2_FLAG(0); } |
| 2570 | | if (RTVAL & (1 << 9)) { SET_CLIP2_FLAG(1); } |
| 2571 | | if (RTVAL & (1 << 10)) { SET_CLIP2_FLAG(2); } |
| 2572 | | if (RTVAL & (1 << 11)) { SET_CLIP2_FLAG(3); } |
| 2573 | | if (RTVAL & (1 << 12)) { SET_CLIP2_FLAG(4); } |
| 2574 | | if (RTVAL & (1 << 13)) { SET_CLIP2_FLAG(5); } |
| 2575 | | if (RTVAL & (1 << 14)) { SET_CLIP2_FLAG(6); } |
| 2576 | | if (RTVAL & (1 << 15)) { SET_CLIP2_FLAG(7); } |
| 2577 | | break; |
| 2578 | | |
| 2579 | | case 2: |
| 2580 | | CLEAR_CLIP1_FLAGS(); |
| 2581 | | if (RTVAL & (1 << 0)) { SET_CLIP1_FLAG(0); } |
| 2582 | | if (RTVAL & (1 << 1)) { SET_CLIP1_FLAG(1); } |
| 2583 | | if (RTVAL & (1 << 2)) { SET_CLIP1_FLAG(2); } |
| 2584 | | if (RTVAL & (1 << 3)) { SET_CLIP1_FLAG(3); } |
| 2585 | | if (RTVAL & (1 << 4)) { SET_CLIP1_FLAG(4); } |
| 2586 | | if (RTVAL & (1 << 5)) { SET_CLIP1_FLAG(5); } |
| 2587 | | if (RTVAL & (1 << 6)) { SET_CLIP1_FLAG(6); } |
| 2588 | | if (RTVAL & (1 << 7)) { SET_CLIP1_FLAG(7); } |
| 2589 | | break; |
| 2590 | | } |
| 2591 | | break; |
| 2592 | | } |
| 2593 | | |
| 2594 | | case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: |
| 2595 | | case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: |
| 2596 | | { |
| 2597 | | handle_vector_ops(op); |
| 2598 | | break; |
| 2599 | | } |
| 2600 | | |
| 2601 | | default: |
| 2602 | | m_rsp.unimplemented_opcode(op); |
| 2603 | | break; |
| 2604 | | } |
| 2605 | | } |
| 2606 | | |
| 2607 | | inline void rsp_cop2::mfc2() |
| 2608 | | { |
| 2609 | | UINT32 op = m_op; |
| 2610 | | int el = (op >> 7) & 0xf; |
| 2611 | | |
| 2612 | | UINT16 b1 = VREG_B(VS1REG, (el+0) & 0xf); |
| 2613 | | UINT16 b2 = VREG_B(VS1REG, (el+1) & 0xf); |
| 2614 | | if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2)); |
| 2615 | | } |
| 2616 | | |
| 2617 | | inline void rsp_cop2::cfc2() |
| 2618 | | { |
| 2619 | | UINT32 op = m_op; |
| 2620 | | if (RTREG) |
| 2621 | | { |
| 2622 | | switch(RDREG) |
| 2623 | | { |
| 2624 | | case 0: |
| 2625 | | RTVAL = ((CARRY_FLAG(0) & 1) << 0) | |
| 2626 | | ((CARRY_FLAG(1) & 1) << 1) | |
| 2627 | | ((CARRY_FLAG(2) & 1) << 2) | |
| 2628 | | ((CARRY_FLAG(3) & 1) << 3) | |
| 2629 | | ((CARRY_FLAG(4) & 1) << 4) | |
| 2630 | | ((CARRY_FLAG(5) & 1) << 5) | |
| 2631 | | ((CARRY_FLAG(6) & 1) << 6) | |
| 2632 | | ((CARRY_FLAG(7) & 1) << 7) | |
| 2633 | | ((ZERO_FLAG(0) & 1) << 8) | |
| 2634 | | ((ZERO_FLAG(1) & 1) << 9) | |
| 2635 | | ((ZERO_FLAG(2) & 1) << 10) | |
| 2636 | | ((ZERO_FLAG(3) & 1) << 11) | |
| 2637 | | ((ZERO_FLAG(4) & 1) << 12) | |
| 2638 | | ((ZERO_FLAG(5) & 1) << 13) | |
| 2639 | | ((ZERO_FLAG(6) & 1) << 14) | |
| 2640 | | ((ZERO_FLAG(7) & 1) << 15); |
| 2641 | | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 2642 | | break; |
| 2643 | | case 1: |
| 2644 | | RTVAL = ((COMPARE_FLAG(0) & 1) << 0) | |
| 2645 | | ((COMPARE_FLAG(1) & 1) << 1) | |
| 2646 | | ((COMPARE_FLAG(2) & 1) << 2) | |
| 2647 | | ((COMPARE_FLAG(3) & 1) << 3) | |
| 2648 | | ((COMPARE_FLAG(4) & 1) << 4) | |
| 2649 | | ((COMPARE_FLAG(5) & 1) << 5) | |
| 2650 | | ((COMPARE_FLAG(6) & 1) << 6) | |
| 2651 | | ((COMPARE_FLAG(7) & 1) << 7) | |
| 2652 | | ((CLIP2_FLAG(0) & 1) << 8) | |
| 2653 | | ((CLIP2_FLAG(1) & 1) << 9) | |
| 2654 | | ((CLIP2_FLAG(2) & 1) << 10) | |
| 2655 | | ((CLIP2_FLAG(3) & 1) << 11) | |
| 2656 | | ((CLIP2_FLAG(4) & 1) << 12) | |
| 2657 | | ((CLIP2_FLAG(5) & 1) << 13) | |
| 2658 | | ((CLIP2_FLAG(6) & 1) << 14) | |
| 2659 | | ((CLIP2_FLAG(7) & 1) << 15); |
| 2660 | | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 2661 | | break; |
| 2662 | | case 2: |
| 2663 | | RTVAL = ((CLIP1_FLAG(0) & 1) << 0) | |
| 2664 | | ((CLIP1_FLAG(1) & 1) << 1) | |
| 2665 | | ((CLIP1_FLAG(2) & 1) << 2) | |
| 2666 | | ((CLIP1_FLAG(3) & 1) << 3) | |
| 2667 | | ((CLIP1_FLAG(4) & 1) << 4) | |
| 2668 | | ((CLIP1_FLAG(5) & 1) << 5) | |
| 2669 | | ((CLIP1_FLAG(6) & 1) << 6) | |
| 2670 | | ((CLIP1_FLAG(7) & 1) << 7); |
| 2671 | | break; |
| 2672 | | } |
| 2673 | | } |
| 2674 | | } |
| 2675 | | |
| 2676 | | inline void rsp_cop2::mtc2() |
| 2677 | | { |
| 2678 | | UINT32 op = m_op; |
| 2679 | | int el = (op >> 7) & 0xf; |
| 2680 | | VREG_B(VS1REG, (el+0) & 0xf) = (RTVAL >> 8) & 0xff; |
| 2681 | | VREG_B(VS1REG, (el+1) & 0xf) = (RTVAL >> 0) & 0xff; |
| 2682 | | } |
| 2683 | | |
| 2684 | | inline void rsp_cop2::ctc2() |
| 2685 | | { |
| 2686 | | UINT32 op = m_op; |
| 2687 | | switch(RDREG) |
| 2688 | | { |
| 2689 | | case 0: |
| 2690 | | CLEAR_CARRY_FLAGS(); |
| 2691 | | CLEAR_ZERO_FLAGS(); |
| 2692 | | m_vflag[0][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 2693 | | m_vflag[0][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 2694 | | m_vflag[0][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 2695 | | m_vflag[0][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 2696 | | m_vflag[0][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 2697 | | m_vflag[0][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 2698 | | m_vflag[0][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 2699 | | m_vflag[0][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 2700 | | if (RTVAL & (1 << 0)) { SET_CARRY_FLAG(0); } |
| 2701 | | if (RTVAL & (1 << 1)) { SET_CARRY_FLAG(1); } |
| 2702 | | if (RTVAL & (1 << 2)) { SET_CARRY_FLAG(2); } |
| 2703 | | if (RTVAL & (1 << 3)) { SET_CARRY_FLAG(3); } |
| 2704 | | if (RTVAL & (1 << 4)) { SET_CARRY_FLAG(4); } |
| 2705 | | if (RTVAL & (1 << 5)) { SET_CARRY_FLAG(5); } |
| 2706 | | if (RTVAL & (1 << 6)) { SET_CARRY_FLAG(6); } |
| 2707 | | if (RTVAL & (1 << 7)) { SET_CARRY_FLAG(7); } |
| 2708 | | m_vflag[3][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 2709 | | m_vflag[3][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 2710 | | m_vflag[3][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 2711 | | m_vflag[3][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 2712 | | m_vflag[3][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 2713 | | m_vflag[3][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 2714 | | m_vflag[3][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 2715 | | m_vflag[3][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 2716 | | if (RTVAL & (1 << 8)) { SET_ZERO_FLAG(0); } |
| 2717 | | if (RTVAL & (1 << 9)) { SET_ZERO_FLAG(1); } |
| 2718 | | if (RTVAL & (1 << 10)) { SET_ZERO_FLAG(2); } |
| 2719 | | if (RTVAL & (1 << 11)) { SET_ZERO_FLAG(3); } |
| 2720 | | if (RTVAL & (1 << 12)) { SET_ZERO_FLAG(4); } |
| 2721 | | if (RTVAL & (1 << 13)) { SET_ZERO_FLAG(5); } |
| 2722 | | if (RTVAL & (1 << 14)) { SET_ZERO_FLAG(6); } |
| 2723 | | if (RTVAL & (1 << 15)) { SET_ZERO_FLAG(7); } |
| 2724 | | break; |
| 2725 | | case 1: |
| 2726 | | CLEAR_COMPARE_FLAGS(); |
| 2727 | | CLEAR_CLIP2_FLAGS(); |
| 2728 | | m_vflag[1][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 2729 | | m_vflag[1][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 2730 | | m_vflag[1][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 2731 | | m_vflag[1][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 2732 | | m_vflag[1][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 2733 | | m_vflag[1][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 2734 | | m_vflag[1][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 2735 | | m_vflag[1][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 2736 | | if (RTVAL & (1 << 0)) { SET_COMPARE_FLAG(0); } |
| 2737 | | if (RTVAL & (1 << 1)) { SET_COMPARE_FLAG(1); } |
| 2738 | | if (RTVAL & (1 << 2)) { SET_COMPARE_FLAG(2); } |
| 2739 | | if (RTVAL & (1 << 3)) { SET_COMPARE_FLAG(3); } |
| 2740 | | if (RTVAL & (1 << 4)) { SET_COMPARE_FLAG(4); } |
| 2741 | | if (RTVAL & (1 << 5)) { SET_COMPARE_FLAG(5); } |
| 2742 | | if (RTVAL & (1 << 6)) { SET_COMPARE_FLAG(6); } |
| 2743 | | if (RTVAL & (1 << 7)) { SET_COMPARE_FLAG(7); } |
| 2744 | | m_vflag[4][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 2745 | | m_vflag[4][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 2746 | | m_vflag[4][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 2747 | | m_vflag[4][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 2748 | | m_vflag[4][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 2749 | | m_vflag[4][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 2750 | | m_vflag[4][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 2751 | | m_vflag[4][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 2752 | | if (RTVAL & (1 << 8)) { SET_CLIP2_FLAG(0); } |
| 2753 | | if (RTVAL & (1 << 9)) { SET_CLIP2_FLAG(1); } |
| 2754 | | if (RTVAL & (1 << 10)) { SET_CLIP2_FLAG(2); } |
| 2755 | | if (RTVAL & (1 << 11)) { SET_CLIP2_FLAG(3); } |
| 2756 | | if (RTVAL & (1 << 12)) { SET_CLIP2_FLAG(4); } |
| 2757 | | if (RTVAL & (1 << 13)) { SET_CLIP2_FLAG(5); } |
| 2758 | | if (RTVAL & (1 << 14)) { SET_CLIP2_FLAG(6); } |
| 2759 | | if (RTVAL & (1 << 15)) { SET_CLIP2_FLAG(7); } |
| 2760 | | break; |
| 2761 | | case 2: |
| 2762 | | CLEAR_CLIP1_FLAGS(); |
| 2763 | | m_vflag[2][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 2764 | | m_vflag[2][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 2765 | | m_vflag[2][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 2766 | | m_vflag[2][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 2767 | | m_vflag[2][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 2768 | | m_vflag[2][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 2769 | | m_vflag[2][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 2770 | | m_vflag[2][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 2771 | | if (RTVAL & (1 << 0)) { SET_CLIP1_FLAG(0); } |
| 2772 | | if (RTVAL & (1 << 1)) { SET_CLIP1_FLAG(1); } |
| 2773 | | if (RTVAL & (1 << 2)) { SET_CLIP1_FLAG(2); } |
| 2774 | | if (RTVAL & (1 << 3)) { SET_CLIP1_FLAG(3); } |
| 2775 | | if (RTVAL & (1 << 4)) { SET_CLIP1_FLAG(4); } |
| 2776 | | if (RTVAL & (1 << 5)) { SET_CLIP1_FLAG(5); } |
| 2777 | | if (RTVAL & (1 << 6)) { SET_CLIP1_FLAG(6); } |
| 2778 | | if (RTVAL & (1 << 7)) { SET_CLIP1_FLAG(7); } |
| 2779 | | break; |
| 2780 | | } |
| 2781 | | } |
| 2782 | | |
| 2783 | | void rsp_cop2::log_instruction_execution() |
| 2784 | | { |
| 2785 | | static VECTOR_REG prev_vecs[32]; |
| 2786 | | |
| 2787 | | for (int i = 0; i < 32; i++) |
| 2788 | | { |
| 2789 | | if (m_v[i].d[0] != prev_vecs[i].d[0] || m_v[i].d[1] != prev_vecs[i].d[1]) |
| 2790 | | { |
| 2791 | | fprintf(m_rsp.m_exec_output, "V%d: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X ", i, |
| 2792 | | (UINT16)VREG_S(i,0), (UINT16)VREG_S(i,1), (UINT16)VREG_S(i,2), (UINT16)VREG_S(i,3), (UINT16)VREG_S(i,4), (UINT16)VREG_S(i,5), (UINT16)VREG_S(i,6), (UINT16)VREG_S(i,7)); |
| 2793 | | } |
| 2794 | | prev_vecs[i].d[0] = m_v[i].d[0]; |
| 2795 | | prev_vecs[i].d[1] = m_v[i].d[1]; |
| 2796 | | } |
| 2797 | | } |
trunk/src/emu/cpu/rsp/rspcp2d.c
| r241959 | r241960 | |
| 1 | | /*************************************************************************** |
| 2 | | |
| 3 | | rspcp2d.c |
| 4 | | |
| 5 | | Universal machine language-based Nintendo/SGI RSP COP2 emulator. |
| 6 | | Written by Harmony of the MESS team. |
| 7 | | |
| 8 | | Copyright the MESS team. |
| 9 | | Released for general non-commercial use under the MAME license |
| 10 | | Visit http://mamedev.org for licensing and usage restrictions. |
| 11 | | |
| 12 | | ***************************************************************************/ |
| 13 | | |
| 14 | | #include "emu.h" |
| 15 | | #include "rsp.h" |
| 16 | | #include "rspdiv.h" |
| 17 | | #include "rspcp2.h" |
| 18 | | #include "rspcp2d.h" |
| 19 | | #include "cpu/drcfe.h" |
| 20 | | #include "cpu/drcuml.h" |
| 21 | | #include "cpu/drcumlsh.h" |
| 22 | | |
| 23 | | using namespace uml; |
| 24 | | |
| 25 | | extern offs_t rsp_dasm_one(char *buffer, offs_t pc, UINT32 op); |
| 26 | | |
| 27 | | /*************************************************************************** |
| 28 | | Helpful Defines |
| 29 | | ***************************************************************************/ |
| 30 | | |
| 31 | | #define VDREG ((op >> 6) & 0x1f) |
| 32 | | #define VS1REG ((op >> 11) & 0x1f) |
| 33 | | #define VS2REG ((op >> 16) & 0x1f) |
| 34 | | #define EL ((op >> 21) & 0xf) |
| 35 | | |
| 36 | | #define RSVAL (m_rsp.m_rsp_state->r[RSREG]) |
| 37 | | #define RTVAL (m_rsp.m_rsp_state->r[RTREG]) |
| 38 | | #define RDVAL (m_rsp.m_rsp_state->r[RDREG]) |
| 39 | | |
| 40 | | #define VREG_B(reg, offset) m_v[(reg)].b[(offset)^1] |
| 41 | | #define W_VREG_S(reg, offset) m_v[(reg)].s[(offset)] |
| 42 | | #define VREG_S(reg, offset) (INT16)m_v[(reg)].s[(offset)] |
| 43 | | |
| 44 | | #define VEC_EL_2(x,z) (vector_elements_2[(x)][(z)]) |
| 45 | | |
| 46 | | #define ACCUM(x) m_accum[x].q |
| 47 | | |
| 48 | | #define CARRY 0 |
| 49 | | #define COMPARE 1 |
| 50 | | #define CLIP1 2 |
| 51 | | #define ZERO 3 |
| 52 | | #define CLIP2 4 |
| 53 | | |
| 54 | | static void cfunc_mfc2(void *param); |
| 55 | | static void cfunc_cfc2(void *param); |
| 56 | | static void cfunc_mtc2(void *param); |
| 57 | | static void cfunc_ctc2(void *param); |
| 58 | | |
| 59 | | #define ACCUM_H(x) (UINT16)m_accum[x].w[3] |
| 60 | | #define ACCUM_M(x) (UINT16)m_accum[x].w[2] |
| 61 | | #define ACCUM_L(x) (UINT16)m_accum[x].w[1] |
| 62 | | #define ACCUM_LL(x) (UINT16)m_accum[x].w[0] |
| 63 | | |
| 64 | | #define SET_ACCUM_H(v, x) m_accum[x].w[3] = v; |
| 65 | | #define SET_ACCUM_M(v, x) m_accum[x].w[2] = v; |
| 66 | | #define SET_ACCUM_L(v, x) m_accum[x].w[1] = v; |
| 67 | | #define SET_ACCUM_LL(v, x) m_accum[x].w[0] = v; |
| 68 | | |
| 69 | | #define GET_VS1(out, i) out = VREG_S(VS1REG, i) |
| 70 | | #define GET_VS2(out, i) out = VREG_S(VS2REG, VEC_EL_2(EL, i)) |
| 71 | | |
| 72 | | #define CARRY_FLAG(x) (m_vflag[CARRY][x & 7] != 0 ? 0xffff : 0) |
| 73 | | #define COMPARE_FLAG(x) (m_vflag[COMPARE][x & 7] != 0 ? 0xffff : 0) |
| 74 | | #define CLIP1_FLAG(x) (m_vflag[CLIP1][x & 7] != 0 ? 0xffff : 0) |
| 75 | | #define ZERO_FLAG(x) (m_vflag[ZERO][x & 7] != 0 ? 0xffff : 0) |
| 76 | | #define CLIP2_FLAG(x) (m_vflag[CLIP2][x & 7] != 0 ? 0xffff : 0) |
| 77 | | |
| 78 | | #define CLEAR_CARRY_FLAGS() { memset(m_vflag[CARRY], 0, 16); } |
| 79 | | #define CLEAR_COMPARE_FLAGS() { memset(m_vflag[COMPARE], 0, 16); } |
| 80 | | #define CLEAR_CLIP1_FLAGS() { memset(m_vflag[CLIP1], 0, 16); } |
| 81 | | #define CLEAR_ZERO_FLAGS() { memset(m_vflag[ZERO], 0, 16); } |
| 82 | | #define CLEAR_CLIP2_FLAGS() { memset(m_vflag[CLIP2], 0, 16); } |
| 83 | | |
| 84 | | #define SET_CARRY_FLAG(x) { m_vflag[CARRY][x & 7] = 0xffff; } |
| 85 | | #define SET_COMPARE_FLAG(x) { m_vflag[COMPARE][x & 7] = 0xffff; } |
| 86 | | #define SET_CLIP1_FLAG(x) { m_vflag[CLIP1][x & 7] = 0xffff; } |
| 87 | | #define SET_ZERO_FLAG(x) { m_vflag[ZERO][x & 7] = 0xffff; } |
| 88 | | #define SET_CLIP2_FLAG(x) { m_vflag[CLIP2][x & 7] = 0xffff; } |
| 89 | | |
| 90 | | #define CLEAR_CARRY_FLAG(x) { m_vflag[CARRY][x & 7] = 0; } |
| 91 | | #define CLEAR_COMPARE_FLAG(x) { m_vflag[COMPARE][x & 7] = 0; } |
| 92 | | #define CLEAR_CLIP1_FLAG(x) { m_vflag[CLIP1][x & 7] = 0; } |
| 93 | | #define CLEAR_ZERO_FLAG(x) { m_vflag[ZERO][x & 7] = 0; } |
| 94 | | #define CLEAR_CLIP2_FLAG(x) { m_vflag[CLIP2][x & 7] = 0; } |
| 95 | | |
| 96 | | #define WRITEBACK_RESULT() { \ |
| 97 | | W_VREG_S(VDREG, 0) = m_vres[0]; \ |
| 98 | | W_VREG_S(VDREG, 1) = m_vres[1]; \ |
| 99 | | W_VREG_S(VDREG, 2) = m_vres[2]; \ |
| 100 | | W_VREG_S(VDREG, 3) = m_vres[3]; \ |
| 101 | | W_VREG_S(VDREG, 4) = m_vres[4]; \ |
| 102 | | W_VREG_S(VDREG, 5) = m_vres[5]; \ |
| 103 | | W_VREG_S(VDREG, 6) = m_vres[6]; \ |
| 104 | | W_VREG_S(VDREG, 7) = m_vres[7]; \ |
| 105 | | } |
| 106 | | |
| 107 | | static const int vector_elements_2[16][8] = |
| 108 | | { |
| 109 | | { 0, 1, 2, 3, 4, 5, 6, 7 }, // none |
| 110 | | { 0, 1, 2, 3, 4, 5, 6, 7 }, // ??? |
| 111 | | { 0, 0, 2, 2, 4, 4, 6, 6 }, // 0q |
| 112 | | { 1, 1, 3, 3, 5, 5, 7, 7 }, // 1q |
| 113 | | { 0, 0, 0, 0, 4, 4, 4, 4 }, // 0h |
| 114 | | { 1, 1, 1, 1, 5, 5, 5, 5 }, // 1h |
| 115 | | { 2, 2, 2, 2, 6, 6, 6, 6 }, // 2h |
| 116 | | { 3, 3, 3, 3, 7, 7, 7, 7 }, // 3h |
| 117 | | { 0, 0, 0, 0, 0, 0, 0, 0 }, // 0 |
| 118 | | { 1, 1, 1, 1, 1, 1, 1, 1 }, // 1 |
| 119 | | { 2, 2, 2, 2, 2, 2, 2, 2 }, // 2 |
| 120 | | { 3, 3, 3, 3, 3, 3, 3, 3 }, // 3 |
| 121 | | { 4, 4, 4, 4, 4, 4, 4, 4 }, // 4 |
| 122 | | { 5, 5, 5, 5, 5, 5, 5, 5 }, // 5 |
| 123 | | { 6, 6, 6, 6, 6, 6, 6, 6 }, // 6 |
| 124 | | { 7, 7, 7, 7, 7, 7, 7, 7 }, // 7 |
| 125 | | }; |
| 126 | | |
| 127 | | void rsp_cop2_drc::cfunc_unimplemented_opcode() |
| 128 | | { |
| 129 | | const UINT32 ppc = m_rsp.m_ppc; |
| 130 | | if ((m_machine.debug_flags & DEBUG_FLAG_ENABLED) != 0) |
| 131 | | { |
| 132 | | char string[200]; |
| 133 | | rsp_dasm_one(string, ppc, m_op); |
| 134 | | osd_printf_debug("%08X: %s\n", ppc, string); |
| 135 | | } |
| 136 | | fatalerror("RSP: unknown opcode %02X (%08X) at %08X\n", m_op >> 26, m_op, ppc); |
| 137 | | } |
| 138 | | |
| 139 | | static void unimplemented_opcode(void *param) |
| 140 | | { |
| 141 | | ((rsp_cop2 *)param)->cfunc_unimplemented_opcode(); |
| 142 | | } |
| 143 | | |
| 144 | | void rsp_cop2_drc::state_string_export(const int index, astring &string) |
| 145 | | { |
| 146 | | switch (index) |
| 147 | | { |
| 148 | | case RSP_V0: |
| 149 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 0, 0), (UINT16)VREG_S( 0, 1), (UINT16)VREG_S( 0, 2), (UINT16)VREG_S( 0, 3), (UINT16)VREG_S( 0, 4), (UINT16)VREG_S( 0, 5), (UINT16)VREG_S( 0, 6), (UINT16)VREG_S( 0, 7)); |
| 150 | | break; |
| 151 | | case RSP_V1: |
| 152 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 1, 0), (UINT16)VREG_S( 1, 1), (UINT16)VREG_S( 1, 2), (UINT16)VREG_S( 1, 3), (UINT16)VREG_S( 1, 4), (UINT16)VREG_S( 1, 5), (UINT16)VREG_S( 1, 6), (UINT16)VREG_S( 1, 7)); |
| 153 | | break; |
| 154 | | case RSP_V2: |
| 155 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 2, 0), (UINT16)VREG_S( 2, 1), (UINT16)VREG_S( 2, 2), (UINT16)VREG_S( 2, 3), (UINT16)VREG_S( 2, 4), (UINT16)VREG_S( 2, 5), (UINT16)VREG_S( 2, 6), (UINT16)VREG_S( 2, 7)); |
| 156 | | break; |
| 157 | | case RSP_V3: |
| 158 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 3, 0), (UINT16)VREG_S( 3, 1), (UINT16)VREG_S( 3, 2), (UINT16)VREG_S( 3, 3), (UINT16)VREG_S( 3, 4), (UINT16)VREG_S( 3, 5), (UINT16)VREG_S( 3, 6), (UINT16)VREG_S( 3, 7)); |
| 159 | | break; |
| 160 | | case RSP_V4: |
| 161 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 4, 0), (UINT16)VREG_S( 4, 1), (UINT16)VREG_S( 4, 2), (UINT16)VREG_S( 4, 3), (UINT16)VREG_S( 4, 4), (UINT16)VREG_S( 4, 5), (UINT16)VREG_S( 4, 6), (UINT16)VREG_S( 4, 7)); |
| 162 | | break; |
| 163 | | case RSP_V5: |
| 164 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 5, 0), (UINT16)VREG_S( 5, 1), (UINT16)VREG_S( 5, 2), (UINT16)VREG_S( 5, 3), (UINT16)VREG_S( 5, 4), (UINT16)VREG_S( 5, 5), (UINT16)VREG_S( 5, 6), (UINT16)VREG_S( 5, 7)); |
| 165 | | break; |
| 166 | | case RSP_V6: |
| 167 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 6, 0), (UINT16)VREG_S( 6, 1), (UINT16)VREG_S( 6, 2), (UINT16)VREG_S( 6, 3), (UINT16)VREG_S( 6, 4), (UINT16)VREG_S( 6, 5), (UINT16)VREG_S( 6, 6), (UINT16)VREG_S( 6, 7)); |
| 168 | | break; |
| 169 | | case RSP_V7: |
| 170 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 7, 0), (UINT16)VREG_S( 7, 1), (UINT16)VREG_S( 7, 2), (UINT16)VREG_S( 7, 3), (UINT16)VREG_S( 7, 4), (UINT16)VREG_S( 7, 5), (UINT16)VREG_S( 7, 6), (UINT16)VREG_S( 7, 7)); |
| 171 | | break; |
| 172 | | case RSP_V8: |
| 173 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 8, 0), (UINT16)VREG_S( 8, 1), (UINT16)VREG_S( 8, 2), (UINT16)VREG_S( 8, 3), (UINT16)VREG_S( 8, 4), (UINT16)VREG_S( 8, 5), (UINT16)VREG_S( 8, 6), (UINT16)VREG_S( 8, 7)); |
| 174 | | break; |
| 175 | | case RSP_V9: |
| 176 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 9, 0), (UINT16)VREG_S( 9, 1), (UINT16)VREG_S( 9, 2), (UINT16)VREG_S( 9, 3), (UINT16)VREG_S( 9, 4), (UINT16)VREG_S( 9, 5), (UINT16)VREG_S( 9, 6), (UINT16)VREG_S( 9, 7)); |
| 177 | | break; |
| 178 | | case RSP_V10: |
| 179 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(10, 0), (UINT16)VREG_S(10, 1), (UINT16)VREG_S(10, 2), (UINT16)VREG_S(10, 3), (UINT16)VREG_S(10, 4), (UINT16)VREG_S(10, 5), (UINT16)VREG_S(10, 6), (UINT16)VREG_S(10, 7)); |
| 180 | | break; |
| 181 | | case RSP_V11: |
| 182 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(11, 0), (UINT16)VREG_S(11, 1), (UINT16)VREG_S(11, 2), (UINT16)VREG_S(11, 3), (UINT16)VREG_S(11, 4), (UINT16)VREG_S(11, 5), (UINT16)VREG_S(11, 6), (UINT16)VREG_S(11, 7)); |
| 183 | | break; |
| 184 | | case RSP_V12: |
| 185 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(12, 0), (UINT16)VREG_S(12, 1), (UINT16)VREG_S(12, 2), (UINT16)VREG_S(12, 3), (UINT16)VREG_S(12, 4), (UINT16)VREG_S(12, 5), (UINT16)VREG_S(12, 6), (UINT16)VREG_S(12, 7)); |
| 186 | | break; |
| 187 | | case RSP_V13: |
| 188 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(13, 0), (UINT16)VREG_S(13, 1), (UINT16)VREG_S(13, 2), (UINT16)VREG_S(13, 3), (UINT16)VREG_S(13, 4), (UINT16)VREG_S(13, 5), (UINT16)VREG_S(13, 6), (UINT16)VREG_S(13, 7)); |
| 189 | | break; |
| 190 | | case RSP_V14: |
| 191 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(14, 0), (UINT16)VREG_S(14, 1), (UINT16)VREG_S(14, 2), (UINT16)VREG_S(14, 3), (UINT16)VREG_S(14, 4), (UINT16)VREG_S(14, 5), (UINT16)VREG_S(14, 6), (UINT16)VREG_S(14, 7)); |
| 192 | | break; |
| 193 | | case RSP_V15: |
| 194 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(15, 0), (UINT16)VREG_S(15, 1), (UINT16)VREG_S(15, 2), (UINT16)VREG_S(15, 3), (UINT16)VREG_S(15, 4), (UINT16)VREG_S(15, 5), (UINT16)VREG_S(15, 6), (UINT16)VREG_S(15, 7)); |
| 195 | | break; |
| 196 | | case RSP_V16: |
| 197 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(16, 0), (UINT16)VREG_S(16, 1), (UINT16)VREG_S(16, 2), (UINT16)VREG_S(16, 3), (UINT16)VREG_S(16, 4), (UINT16)VREG_S(16, 5), (UINT16)VREG_S(16, 6), (UINT16)VREG_S(16, 7)); |
| 198 | | break; |
| 199 | | case RSP_V17: |
| 200 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(17, 0), (UINT16)VREG_S(17, 1), (UINT16)VREG_S(17, 2), (UINT16)VREG_S(17, 3), (UINT16)VREG_S(17, 4), (UINT16)VREG_S(17, 5), (UINT16)VREG_S(17, 6), (UINT16)VREG_S(17, 7)); |
| 201 | | break; |
| 202 | | case RSP_V18: |
| 203 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(18, 0), (UINT16)VREG_S(18, 1), (UINT16)VREG_S(18, 2), (UINT16)VREG_S(18, 3), (UINT16)VREG_S(18, 4), (UINT16)VREG_S(18, 5), (UINT16)VREG_S(18, 6), (UINT16)VREG_S(18, 7)); |
| 204 | | break; |
| 205 | | case RSP_V19: |
| 206 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(19, 0), (UINT16)VREG_S(19, 1), (UINT16)VREG_S(19, 2), (UINT16)VREG_S(19, 3), (UINT16)VREG_S(19, 4), (UINT16)VREG_S(19, 5), (UINT16)VREG_S(19, 6), (UINT16)VREG_S(19, 7)); |
| 207 | | break; |
| 208 | | case RSP_V20: |
| 209 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(20, 0), (UINT16)VREG_S(20, 1), (UINT16)VREG_S(20, 2), (UINT16)VREG_S(20, 3), (UINT16)VREG_S(20, 4), (UINT16)VREG_S(20, 5), (UINT16)VREG_S(20, 6), (UINT16)VREG_S(20, 7)); |
| 210 | | break; |
| 211 | | case RSP_V21: |
| 212 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(21, 0), (UINT16)VREG_S(21, 1), (UINT16)VREG_S(21, 2), (UINT16)VREG_S(21, 3), (UINT16)VREG_S(21, 4), (UINT16)VREG_S(21, 5), (UINT16)VREG_S(21, 6), (UINT16)VREG_S(21, 7)); |
| 213 | | break; |
| 214 | | case RSP_V22: |
| 215 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(22, 0), (UINT16)VREG_S(22, 1), (UINT16)VREG_S(22, 2), (UINT16)VREG_S(22, 3), (UINT16)VREG_S(22, 4), (UINT16)VREG_S(22, 5), (UINT16)VREG_S(22, 6), (UINT16)VREG_S(22, 7)); |
| 216 | | break; |
| 217 | | case RSP_V23: |
| 218 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(23, 0), (UINT16)VREG_S(23, 1), (UINT16)VREG_S(23, 2), (UINT16)VREG_S(23, 3), (UINT16)VREG_S(23, 4), (UINT16)VREG_S(23, 5), (UINT16)VREG_S(23, 6), (UINT16)VREG_S(23, 7)); |
| 219 | | break; |
| 220 | | case RSP_V24: |
| 221 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(24, 0), (UINT16)VREG_S(24, 1), (UINT16)VREG_S(24, 2), (UINT16)VREG_S(24, 3), (UINT16)VREG_S(24, 4), (UINT16)VREG_S(24, 5), (UINT16)VREG_S(24, 6), (UINT16)VREG_S(24, 7)); |
| 222 | | break; |
| 223 | | case RSP_V25: |
| 224 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(25, 0), (UINT16)VREG_S(25, 1), (UINT16)VREG_S(25, 2), (UINT16)VREG_S(25, 3), (UINT16)VREG_S(25, 4), (UINT16)VREG_S(25, 5), (UINT16)VREG_S(25, 6), (UINT16)VREG_S(25, 7)); |
| 225 | | break; |
| 226 | | case RSP_V26: |
| 227 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(26, 0), (UINT16)VREG_S(26, 1), (UINT16)VREG_S(26, 2), (UINT16)VREG_S(26, 3), (UINT16)VREG_S(26, 4), (UINT16)VREG_S(26, 5), (UINT16)VREG_S(26, 6), (UINT16)VREG_S(26, 7)); |
| 228 | | break; |
| 229 | | case RSP_V27: |
| 230 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(27, 0), (UINT16)VREG_S(27, 1), (UINT16)VREG_S(27, 2), (UINT16)VREG_S(27, 3), (UINT16)VREG_S(27, 4), (UINT16)VREG_S(27, 5), (UINT16)VREG_S(27, 6), (UINT16)VREG_S(27, 7)); |
| 231 | | break; |
| 232 | | case RSP_V28: |
| 233 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(28, 0), (UINT16)VREG_S(28, 1), (UINT16)VREG_S(28, 2), (UINT16)VREG_S(28, 3), (UINT16)VREG_S(28, 4), (UINT16)VREG_S(28, 5), (UINT16)VREG_S(28, 6), (UINT16)VREG_S(28, 7)); |
| 234 | | break; |
| 235 | | case RSP_V29: |
| 236 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(29, 0), (UINT16)VREG_S(29, 1), (UINT16)VREG_S(29, 2), (UINT16)VREG_S(29, 3), (UINT16)VREG_S(29, 4), (UINT16)VREG_S(29, 5), (UINT16)VREG_S(29, 6), (UINT16)VREG_S(29, 7)); |
| 237 | | break; |
| 238 | | case RSP_V30: |
| 239 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(30, 0), (UINT16)VREG_S(30, 1), (UINT16)VREG_S(30, 2), (UINT16)VREG_S(30, 3), (UINT16)VREG_S(30, 4), (UINT16)VREG_S(30, 5), (UINT16)VREG_S(30, 6), (UINT16)VREG_S(30, 7)); |
| 240 | | break; |
| 241 | | case RSP_V31: |
| 242 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(31, 0), (UINT16)VREG_S(31, 1), (UINT16)VREG_S(31, 2), (UINT16)VREG_S(31, 3), (UINT16)VREG_S(31, 4), (UINT16)VREG_S(31, 5), (UINT16)VREG_S(31, 6), (UINT16)VREG_S(31, 7)); |
| 243 | | break; |
| 244 | | } |
| 245 | | } |
| 246 | | |
| 247 | | |
| 248 | | /*************************************************************************** |
| 249 | | Vector Load Instructions |
| 250 | | ***************************************************************************/ |
| 251 | | |
| 252 | | // LBV |
| 253 | | // |
| 254 | | // 31 25 20 15 10 6 0 |
| 255 | | // -------------------------------------------------- |
| 256 | | // | 110010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 257 | | // -------------------------------------------------- |
| 258 | | // |
| 259 | | // Load 1 byte to vector byte index |
| 260 | | |
| 261 | | inline void rsp_cop2_drc::lbv() |
| 262 | | { |
| 263 | | UINT32 op = m_op; |
| 264 | | |
| 265 | | UINT32 ea = 0; |
| 266 | | int dest = (op >> 16) & 0x1f; |
| 267 | | int base = (op >> 21) & 0x1f; |
| 268 | | int index = (op >> 7) & 0xf; |
| 269 | | int offset = (op & 0x7f); |
| 270 | | if (offset & 0x40) |
| 271 | | { |
| 272 | | offset |= 0xffffffc0; |
| 273 | | } |
| 274 | | |
| 275 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + offset : offset; |
| 276 | | VREG_B(dest, index) = m_rsp.DM_READ8(ea); |
| 277 | | } |
| 278 | | |
| 279 | | static void cfunc_lbv(void *param) |
| 280 | | { |
| 281 | | ((rsp_cop2 *)param)->lbv(); |
| 282 | | } |
| 283 | | |
| 284 | | |
| 285 | | // LSV |
| 286 | | // |
| 287 | | // 31 25 20 15 10 6 0 |
| 288 | | // -------------------------------------------------- |
| 289 | | // | 110010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 290 | | // -------------------------------------------------- |
| 291 | | // |
| 292 | | // Loads 2 bytes starting from vector byte index |
| 293 | | |
| 294 | | inline void rsp_cop2_drc::lsv() |
| 295 | | { |
| 296 | | UINT32 op = m_op; |
| 297 | | int dest = (op >> 16) & 0x1f; |
| 298 | | int base = (op >> 21) & 0x1f; |
| 299 | | int index = (op >> 7) & 0xe; |
| 300 | | int offset = (op & 0x7f); |
| 301 | | if (offset & 0x40) |
| 302 | | { |
| 303 | | offset |= 0xffffffc0; |
| 304 | | } |
| 305 | | |
| 306 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 307 | | int end = index + 2; |
| 308 | | for (int i = index; i < end; i++) |
| 309 | | { |
| 310 | | VREG_B(dest, i) = m_rsp.DM_READ8(ea); |
| 311 | | ea++; |
| 312 | | } |
| 313 | | } |
| 314 | | |
| 315 | | static void cfunc_lsv(void *param) |
| 316 | | { |
| 317 | | ((rsp_cop2 *)param)->lsv(); |
| 318 | | } |
| 319 | | |
| 320 | | |
| 321 | | // LLV |
| 322 | | // |
| 323 | | // 31 25 20 15 10 6 0 |
| 324 | | // -------------------------------------------------- |
| 325 | | // | 110010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 326 | | // -------------------------------------------------- |
| 327 | | // |
| 328 | | // Loads 4 bytes starting from vector byte index |
| 329 | | |
| 330 | | inline void rsp_cop2_drc::llv() |
| 331 | | { |
| 332 | | UINT32 op = m_op; |
| 333 | | UINT32 ea = 0; |
| 334 | | int dest = (op >> 16) & 0x1f; |
| 335 | | int base = (op >> 21) & 0x1f; |
| 336 | | int index = (op >> 7) & 0xc; |
| 337 | | int offset = (op & 0x7f); |
| 338 | | if (offset & 0x40) |
| 339 | | { |
| 340 | | offset |= 0xffffffc0; |
| 341 | | } |
| 342 | | |
| 343 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 344 | | |
| 345 | | int end = index + 4; |
| 346 | | |
| 347 | | for (int i = index; i < end; i++) |
| 348 | | { |
| 349 | | VREG_B(dest, i) = m_rsp.DM_READ8(ea); |
| 350 | | ea++; |
| 351 | | } |
| 352 | | } |
| 353 | | |
| 354 | | static void cfunc_llv(void *param) |
| 355 | | { |
| 356 | | ((rsp_cop2 *)param)->llv(); |
| 357 | | } |
| 358 | | |
| 359 | | |
| 360 | | // LDV |
| 361 | | // |
| 362 | | // 31 25 20 15 10 6 0 |
| 363 | | // -------------------------------------------------- |
| 364 | | // | 110010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 365 | | // -------------------------------------------------- |
| 366 | | // |
| 367 | | // Loads 8 bytes starting from vector byte index |
| 368 | | |
| 369 | | inline void rsp_cop2_drc::ldv() |
| 370 | | { |
| 371 | | UINT32 op = m_op; |
| 372 | | UINT32 ea = 0; |
| 373 | | int dest = (op >> 16) & 0x1f; |
| 374 | | int base = (op >> 21) & 0x1f; |
| 375 | | int index = (op >> 7) & 0x8; |
| 376 | | int offset = (op & 0x7f); |
| 377 | | if (offset & 0x40) |
| 378 | | { |
| 379 | | offset |= 0xffffffc0; |
| 380 | | } |
| 381 | | |
| 382 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 383 | | |
| 384 | | int end = index + 8; |
| 385 | | |
| 386 | | for (int i = index; i < end; i++) |
| 387 | | { |
| 388 | | VREG_B(dest, i) = m_rsp.DM_READ8(ea); |
| 389 | | ea++; |
| 390 | | } |
| 391 | | } |
| 392 | | |
| 393 | | static void cfunc_ldv(void *param) |
| 394 | | { |
| 395 | | ((rsp_cop2 *)param)->ldv(); |
| 396 | | } |
| 397 | | |
| 398 | | |
| 399 | | // LQV |
| 400 | | // |
| 401 | | // 31 25 20 15 10 6 0 |
| 402 | | // -------------------------------------------------- |
| 403 | | // | 110010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 404 | | // -------------------------------------------------- |
| 405 | | // |
| 406 | | // Loads up to 16 bytes starting from vector byte index |
| 407 | | |
| 408 | | inline void rsp_cop2_drc::lqv() |
| 409 | | { |
| 410 | | UINT32 op = m_op; |
| 411 | | int dest = (op >> 16) & 0x1f; |
| 412 | | int base = (op >> 21) & 0x1f; |
| 413 | | int offset = (op & 0x7f); |
| 414 | | if (offset & 0x40) |
| 415 | | { |
| 416 | | offset |= 0xffffffc0; |
| 417 | | } |
| 418 | | |
| 419 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 420 | | |
| 421 | | int end = 16 - (ea & 0xf); |
| 422 | | if (end > 16) end = 16; |
| 423 | | |
| 424 | | for (int i = 0; i < end; i++) |
| 425 | | { |
| 426 | | VREG_B(dest, i) = m_rsp.DM_READ8(ea); |
| 427 | | ea++; |
| 428 | | } |
| 429 | | } |
| 430 | | |
| 431 | | static void cfunc_lqv(void *param) |
| 432 | | { |
| 433 | | ((rsp_cop2 *)param)->lqv(); |
| 434 | | } |
| 435 | | |
| 436 | | |
| 437 | | // LRV |
| 438 | | // |
| 439 | | // 31 25 20 15 10 6 0 |
| 440 | | // -------------------------------------------------- |
| 441 | | // | 110010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 442 | | // -------------------------------------------------- |
| 443 | | // |
| 444 | | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 445 | | |
| 446 | | inline void rsp_cop2_drc::lrv() |
| 447 | | { |
| 448 | | UINT32 op = m_op; |
| 449 | | int dest = (op >> 16) & 0x1f; |
| 450 | | int base = (op >> 21) & 0x1f; |
| 451 | | int index = (op >> 7) & 0xf; |
| 452 | | int offset = (op & 0x7f); |
| 453 | | if (offset & 0x40) |
| 454 | | { |
| 455 | | offset |= 0xffffffc0; |
| 456 | | } |
| 457 | | |
| 458 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 459 | | |
| 460 | | index = 16 - ((ea & 0xf) - index); |
| 461 | | ea &= ~0xf; |
| 462 | | |
| 463 | | for (int i = index; i < 16; i++) |
| 464 | | { |
| 465 | | VREG_B(dest, i) = m_rsp.DM_READ8(ea); |
| 466 | | ea++; |
| 467 | | } |
| 468 | | } |
| 469 | | |
| 470 | | static void cfunc_lrv(void *param) |
| 471 | | { |
| 472 | | ((rsp_cop2 *)param)->lrv(); |
| 473 | | } |
| 474 | | |
| 475 | | |
| 476 | | // LPV |
| 477 | | // |
| 478 | | // 31 25 20 15 10 6 0 |
| 479 | | // -------------------------------------------------- |
| 480 | | // | 110010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 481 | | // -------------------------------------------------- |
| 482 | | // |
| 483 | | // Loads a byte as the upper 8 bits of each element |
| 484 | | |
| 485 | | inline void rsp_cop2_drc::lpv() |
| 486 | | { |
| 487 | | UINT32 op = m_op; |
| 488 | | int dest = (op >> 16) & 0x1f; |
| 489 | | int base = (op >> 21) & 0x1f; |
| 490 | | int index = (op >> 7) & 0xf; |
| 491 | | int offset = (op & 0x7f); |
| 492 | | if (offset & 0x40) |
| 493 | | { |
| 494 | | offset |= 0xffffffc0; |
| 495 | | } |
| 496 | | |
| 497 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 498 | | |
| 499 | | for (int i = 0; i < 8; i++) |
| 500 | | { |
| 501 | | W_VREG_S(dest, i) = m_rsp.DM_READ8(ea + (((16-index) + i) & 0xf)) << 8; |
| 502 | | } |
| 503 | | } |
| 504 | | |
| 505 | | static void cfunc_lpv(void *param) |
| 506 | | { |
| 507 | | ((rsp_cop2 *)param)->lpv(); |
| 508 | | } |
| 509 | | |
| 510 | | |
| 511 | | // LUV |
| 512 | | // |
| 513 | | // 31 25 20 15 10 6 0 |
| 514 | | // -------------------------------------------------- |
| 515 | | // | 110010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 516 | | // -------------------------------------------------- |
| 517 | | // |
| 518 | | // Loads a byte as the bits 14-7 of each element |
| 519 | | |
| 520 | | inline void rsp_cop2_drc::luv() |
| 521 | | { |
| 522 | | UINT32 op = m_op; |
| 523 | | int dest = (op >> 16) & 0x1f; |
| 524 | | int base = (op >> 21) & 0x1f; |
| 525 | | int index = (op >> 7) & 0xf; |
| 526 | | int offset = (op & 0x7f); |
| 527 | | if (offset & 0x40) |
| 528 | | { |
| 529 | | offset |= 0xffffffc0; |
| 530 | | } |
| 531 | | |
| 532 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 533 | | |
| 534 | | for (int i = 0; i < 8; i++) |
| 535 | | { |
| 536 | | W_VREG_S(dest, i) = m_rsp.DM_READ8(ea + (((16-index) + i) & 0xf)) << 7; |
| 537 | | } |
| 538 | | } |
| 539 | | |
| 540 | | static void cfunc_luv(void *param) |
| 541 | | { |
| 542 | | ((rsp_cop2 *)param)->luv(); |
| 543 | | } |
| 544 | | |
| 545 | | |
| 546 | | // LHV |
| 547 | | // |
| 548 | | // 31 25 20 15 10 6 0 |
| 549 | | // -------------------------------------------------- |
| 550 | | // | 110010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 551 | | // -------------------------------------------------- |
| 552 | | // |
| 553 | | // Loads a byte as the bits 14-7 of each element, with 2-byte stride |
| 554 | | |
| 555 | | inline void rsp_cop2_drc::lhv() |
| 556 | | { |
| 557 | | UINT32 op = m_op; |
| 558 | | int dest = (op >> 16) & 0x1f; |
| 559 | | int base = (op >> 21) & 0x1f; |
| 560 | | int index = (op >> 7) & 0xf; |
| 561 | | int offset = (op & 0x7f); |
| 562 | | if (offset & 0x40) |
| 563 | | { |
| 564 | | offset |= 0xffffffc0; |
| 565 | | } |
| 566 | | |
| 567 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 568 | | |
| 569 | | for (int i = 0; i < 8; i++) |
| 570 | | { |
| 571 | | W_VREG_S(dest, i) = m_rsp.DM_READ8(ea + (((16-index) + (i<<1)) & 0xf)) << 7; |
| 572 | | } |
| 573 | | } |
| 574 | | |
| 575 | | static void cfunc_lhv(void *param) |
| 576 | | { |
| 577 | | ((rsp_cop2 *)param)->lhv(); |
| 578 | | } |
| 579 | | |
| 580 | | |
| 581 | | // LFV |
| 582 | | // 31 25 20 15 10 6 0 |
| 583 | | // -------------------------------------------------- |
| 584 | | // | 110010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 585 | | // -------------------------------------------------- |
| 586 | | // |
| 587 | | // Loads a byte as the bits 14-7 of upper or lower quad, with 4-byte stride |
| 588 | | |
| 589 | | inline void rsp_cop2_drc::lfv() |
| 590 | | { |
| 591 | | UINT32 op = m_op; |
| 592 | | int dest = (op >> 16) & 0x1f; |
| 593 | | int base = (op >> 21) & 0x1f; |
| 594 | | int index = (op >> 7) & 0xf; |
| 595 | | int offset = (op & 0x7f); |
| 596 | | if (offset & 0x40) |
| 597 | | { |
| 598 | | offset |= 0xffffffc0; |
| 599 | | } |
| 600 | | |
| 601 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 602 | | |
| 603 | | // not sure what happens if 16-byte boundary is crossed... |
| 604 | | |
| 605 | | int end = (index >> 1) + 4; |
| 606 | | |
| 607 | | for (int i = index >> 1; i < end; i++) |
| 608 | | { |
| 609 | | W_VREG_S(dest, i) = m_rsp.DM_READ8(ea) << 7; |
| 610 | | ea += 4; |
| 611 | | } |
| 612 | | } |
| 613 | | |
| 614 | | static void cfunc_lfv(void *param) |
| 615 | | { |
| 616 | | ((rsp_cop2 *)param)->lfv(); |
| 617 | | } |
| 618 | | |
| 619 | | |
| 620 | | // LWV |
| 621 | | // |
| 622 | | // 31 25 20 15 10 6 0 |
| 623 | | // -------------------------------------------------- |
| 624 | | // | 110010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 625 | | // -------------------------------------------------- |
| 626 | | // |
| 627 | | // Loads the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 628 | | // after byte index 15 |
| 629 | | |
| 630 | | inline void rsp_cop2_drc::lwv() |
| 631 | | { |
| 632 | | UINT32 op = m_op; |
| 633 | | int dest = (op >> 16) & 0x1f; |
| 634 | | int base = (op >> 21) & 0x1f; |
| 635 | | int index = (op >> 7) & 0xf; |
| 636 | | int offset = (op & 0x7f); |
| 637 | | if (offset & 0x40) |
| 638 | | { |
| 639 | | offset |= 0xffffffc0; |
| 640 | | } |
| 641 | | |
| 642 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 643 | | int end = (16 - index) + 16; |
| 644 | | |
| 645 | | for (int i = (16 - index); i < end; i++) |
| 646 | | { |
| 647 | | VREG_B(dest, i & 0xf) = m_rsp.DM_READ8(ea); |
| 648 | | ea += 4; |
| 649 | | } |
| 650 | | } |
| 651 | | |
| 652 | | static void cfunc_lwv(void *param) |
| 653 | | { |
| 654 | | ((rsp_cop2 *)param)->lwv(); |
| 655 | | } |
| 656 | | |
| 657 | | |
| 658 | | // LTV |
| 659 | | // |
| 660 | | // 31 25 20 15 10 6 0 |
| 661 | | // -------------------------------------------------- |
| 662 | | // | 110010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 663 | | // -------------------------------------------------- |
| 664 | | // |
| 665 | | // Loads one element to maximum of 8 vectors, while incrementing element index |
| 666 | | |
| 667 | | inline void rsp_cop2_drc::ltv() |
| 668 | | { |
| 669 | | UINT32 op = m_op; |
| 670 | | int dest = (op >> 16) & 0x1f; |
| 671 | | int base = (op >> 21) & 0x1f; |
| 672 | | int index = (op >> 7) & 0xf; |
| 673 | | int offset = (op & 0x7f); |
| 674 | | |
| 675 | | // FIXME: has a small problem with odd indices |
| 676 | | |
| 677 | | int vs = dest; |
| 678 | | int ve = dest + 8; |
| 679 | | if (ve > 32) |
| 680 | | { |
| 681 | | ve = 32; |
| 682 | | } |
| 683 | | |
| 684 | | int element = 7 - (index >> 1); |
| 685 | | |
| 686 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 687 | | |
| 688 | | ea = ((ea + 8) & ~0xf) + (index & 1); |
| 689 | | for (int i = vs; i < ve; i++) |
| 690 | | { |
| 691 | | element = (8 - (index >> 1) + (i - vs)) << 1; |
| 692 | | VREG_B(i, (element & 0xf)) = m_rsp.DM_READ8(ea); |
| 693 | | VREG_B(i, ((element + 1) & 0xf)) = m_rsp.DM_READ8(ea + 1); |
| 694 | | ea += 2; |
| 695 | | } |
| 696 | | } |
| 697 | | |
| 698 | | static void cfunc_ltv(void *param) |
| 699 | | { |
| 700 | | ((rsp_cop2 *)param)->ltv(); |
| 701 | | } |
| 702 | | |
| 703 | | |
| 704 | | int rsp_cop2_drc::generate_lwc2(drcuml_block *block, rsp_device::compiler_state *compiler, const opcode_desc *desc) |
| 705 | | { |
| 706 | | UINT32 op = desc->opptr.l[0]; |
| 707 | | int offset = (op & 0x7f); |
| 708 | | if (offset & 0x40) |
| 709 | | { |
| 710 | | offset |= 0xffffffc0; |
| 711 | | } |
| 712 | | |
| 713 | | switch ((op >> 11) & 0x1f) |
| 714 | | { |
| 715 | | case 0x00: /* LBV */ |
| 716 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 717 | | UML_CALLC(block, cfunc_lbv, this); |
| 718 | | return TRUE; |
| 719 | | |
| 720 | | case 0x01: /* LSV */ |
| 721 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 722 | | UML_CALLC(block, cfunc_lsv, this); |
| 723 | | return TRUE; |
| 724 | | |
| 725 | | case 0x02: /* LLV */ |
| 726 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 727 | | UML_CALLC(block, cfunc_llv, this); |
| 728 | | return TRUE; |
| 729 | | |
| 730 | | case 0x03: /* LDV */ |
| 731 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 732 | | UML_CALLC(block, cfunc_ldv, this); |
| 733 | | return TRUE; |
| 734 | | |
| 735 | | case 0x04: /* LQV */ |
| 736 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 737 | | UML_CALLC(block, cfunc_lqv, this); |
| 738 | | return TRUE; |
| 739 | | |
| 740 | | case 0x05: /* LRV */ |
| 741 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 742 | | UML_CALLC(block, cfunc_lrv, this); |
| 743 | | return TRUE; |
| 744 | | |
| 745 | | case 0x06: /* LPV */ |
| 746 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 747 | | UML_CALLC(block, cfunc_lpv, this); |
| 748 | | return TRUE; |
| 749 | | |
| 750 | | case 0x07: /* LUV */ |
| 751 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 752 | | UML_CALLC(block, cfunc_luv, this); |
| 753 | | return TRUE; |
| 754 | | |
| 755 | | case 0x08: /* LHV */ |
| 756 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 757 | | UML_CALLC(block, cfunc_lhv, this); |
| 758 | | return TRUE; |
| 759 | | |
| 760 | | case 0x09: /* LFV */ |
| 761 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 762 | | UML_CALLC(block, cfunc_lfv, this); |
| 763 | | return TRUE; |
| 764 | | |
| 765 | | case 0x0a: /* LWV */ |
| 766 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 767 | | UML_CALLC(block, cfunc_lwv, this); |
| 768 | | return TRUE; |
| 769 | | |
| 770 | | case 0x0b: /* LTV */ |
| 771 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 772 | | UML_CALLC(block, cfunc_ltv, this); |
| 773 | | return TRUE; |
| 774 | | |
| 775 | | default: |
| 776 | | return FALSE; |
| 777 | | } |
| 778 | | } |
| 779 | | |
| 780 | | |
| 781 | | /*************************************************************************** |
| 782 | | Vector Store Instructions |
| 783 | | ***************************************************************************/ |
| 784 | | |
| 785 | | // SBV |
| 786 | | // |
| 787 | | // 31 25 20 15 10 6 0 |
| 788 | | // -------------------------------------------------- |
| 789 | | // | 111010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 790 | | // -------------------------------------------------- |
| 791 | | // |
| 792 | | // Stores 1 byte from vector byte index |
| 793 | | |
| 794 | | inline void rsp_cop2_drc::sbv() |
| 795 | | { |
| 796 | | UINT32 op = m_op; |
| 797 | | int dest = (op >> 16) & 0x1f; |
| 798 | | int base = (op >> 21) & 0x1f; |
| 799 | | int index = (op >> 7) & 0xf; |
| 800 | | int offset = (op & 0x7f); |
| 801 | | if (offset & 0x40) |
| 802 | | { |
| 803 | | offset |= 0xffffffc0; |
| 804 | | } |
| 805 | | |
| 806 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + offset : offset; |
| 807 | | m_rsp.DM_WRITE8(ea, VREG_B(dest, index)); |
| 808 | | } |
| 809 | | |
| 810 | | static void cfunc_sbv(void *param) |
| 811 | | { |
| 812 | | ((rsp_cop2 *)param)->sbv(); |
| 813 | | } |
| 814 | | |
| 815 | | |
| 816 | | // SSV |
| 817 | | // |
| 818 | | // 31 25 20 15 10 6 0 |
| 819 | | // -------------------------------------------------- |
| 820 | | // | 111010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 821 | | // -------------------------------------------------- |
| 822 | | // |
| 823 | | // Stores 2 bytes starting from vector byte index |
| 824 | | |
| 825 | | inline void rsp_cop2_drc::ssv() |
| 826 | | { |
| 827 | | UINT32 op = m_op; |
| 828 | | int dest = (op >> 16) & 0x1f; |
| 829 | | int base = (op >> 21) & 0x1f; |
| 830 | | int index = (op >> 7) & 0xf; |
| 831 | | int offset = (op & 0x7f); |
| 832 | | if (offset & 0x40) |
| 833 | | { |
| 834 | | offset |= 0xffffffc0; |
| 835 | | } |
| 836 | | |
| 837 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 838 | | |
| 839 | | int end = index + 2; |
| 840 | | for (int i = index; i < end; i++) |
| 841 | | { |
| 842 | | m_rsp.DM_WRITE8(ea, VREG_B(dest, i)); |
| 843 | | ea++; |
| 844 | | } |
| 845 | | } |
| 846 | | |
| 847 | | static void cfunc_ssv(void *param) |
| 848 | | { |
| 849 | | ((rsp_cop2 *)param)->ssv(); |
| 850 | | } |
| 851 | | |
| 852 | | |
| 853 | | // SLV |
| 854 | | // |
| 855 | | // 31 25 20 15 10 6 0 |
| 856 | | // -------------------------------------------------- |
| 857 | | // | 111010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 858 | | // -------------------------------------------------- |
| 859 | | // |
| 860 | | // Stores 4 bytes starting from vector byte index |
| 861 | | |
| 862 | | inline void rsp_cop2_drc::slv() |
| 863 | | { |
| 864 | | UINT32 op = m_op; |
| 865 | | int dest = (op >> 16) & 0x1f; |
| 866 | | int base = (op >> 21) & 0x1f; |
| 867 | | int index = (op >> 7) & 0xf; |
| 868 | | int offset = (op & 0x7f); |
| 869 | | if (offset & 0x40) |
| 870 | | { |
| 871 | | offset |= 0xffffffc0; |
| 872 | | } |
| 873 | | |
| 874 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 875 | | |
| 876 | | int end = index + 4; |
| 877 | | for (int i = index; i < end; i++) |
| 878 | | { |
| 879 | | m_rsp.DM_WRITE8(ea, VREG_B(dest, i)); |
| 880 | | ea++; |
| 881 | | } |
| 882 | | } |
| 883 | | |
| 884 | | static void cfunc_slv(void *param) |
| 885 | | { |
| 886 | | ((rsp_cop2 *)param)->slv(); |
| 887 | | } |
| 888 | | |
| 889 | | |
| 890 | | // SDV |
| 891 | | // |
| 892 | | // 31 25 20 15 10 6 0 |
| 893 | | // -------------------------------------------------- |
| 894 | | // | 111010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 895 | | // -------------------------------------------------- |
| 896 | | // |
| 897 | | // Stores 8 bytes starting from vector byte index |
| 898 | | |
| 899 | | inline void rsp_cop2_drc::sdv() |
| 900 | | { |
| 901 | | UINT32 op = m_op; |
| 902 | | int dest = (op >> 16) & 0x1f; |
| 903 | | int base = (op >> 21) & 0x1f; |
| 904 | | int index = (op >> 7) & 0x8; |
| 905 | | int offset = (op & 0x7f); |
| 906 | | if (offset & 0x40) |
| 907 | | { |
| 908 | | offset |= 0xffffffc0; |
| 909 | | } |
| 910 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 911 | | |
| 912 | | int end = index + 8; |
| 913 | | for (int i = index; i < end; i++) |
| 914 | | { |
| 915 | | m_rsp.DM_WRITE8(ea, VREG_B(dest, i)); |
| 916 | | ea++; |
| 917 | | } |
| 918 | | } |
| 919 | | |
| 920 | | static void cfunc_sdv(void *param) |
| 921 | | { |
| 922 | | ((rsp_cop2 *)param)->sdv(); |
| 923 | | } |
| 924 | | |
| 925 | | |
| 926 | | // SQV |
| 927 | | // |
| 928 | | // 31 25 20 15 10 6 0 |
| 929 | | // -------------------------------------------------- |
| 930 | | // | 111010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 931 | | // -------------------------------------------------- |
| 932 | | // |
| 933 | | // Stores up to 16 bytes starting from vector byte index until 16-byte boundary |
| 934 | | |
| 935 | | inline void rsp_cop2_drc::sqv() |
| 936 | | { |
| 937 | | UINT32 op = m_op; |
| 938 | | int dest = (op >> 16) & 0x1f; |
| 939 | | int base = (op >> 21) & 0x1f; |
| 940 | | int index = (op >> 7) & 0xf; |
| 941 | | int offset = (op & 0x7f); |
| 942 | | if (offset & 0x40) |
| 943 | | { |
| 944 | | offset |= 0xffffffc0; |
| 945 | | } |
| 946 | | |
| 947 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 948 | | int end = index + (16 - (ea & 0xf)); |
| 949 | | for (int i=index; i < end; i++) |
| 950 | | { |
| 951 | | m_rsp.DM_WRITE8(ea, VREG_B(dest, i & 0xf)); |
| 952 | | ea++; |
| 953 | | } |
| 954 | | } |
| 955 | | |
| 956 | | static void cfunc_sqv(void *param) |
| 957 | | { |
| 958 | | ((rsp_cop2 *)param)->sqv(); |
| 959 | | } |
| 960 | | |
| 961 | | |
| 962 | | // SRV |
| 963 | | // |
| 964 | | // 31 25 20 15 10 6 0 |
| 965 | | // -------------------------------------------------- |
| 966 | | // | 111010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 967 | | // -------------------------------------------------- |
| 968 | | // |
| 969 | | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 970 | | |
| 971 | | inline void rsp_cop2_drc::srv() |
| 972 | | { |
| 973 | | UINT32 op = m_op; |
| 974 | | int dest = (op >> 16) & 0x1f; |
| 975 | | int base = (op >> 21) & 0x1f; |
| 976 | | int index = (op >> 7) & 0xf; |
| 977 | | int offset = (op & 0x7f); |
| 978 | | if (offset & 0x40) |
| 979 | | { |
| 980 | | offset |= 0xffffffc0; |
| 981 | | } |
| 982 | | |
| 983 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 984 | | |
| 985 | | int end = index + (ea & 0xf); |
| 986 | | int o = (16 - (ea & 0xf)) & 0xf; |
| 987 | | ea &= ~0xf; |
| 988 | | |
| 989 | | for (int i = index; i < end; i++) |
| 990 | | { |
| 991 | | m_rsp.DM_WRITE8(ea, VREG_B(dest, ((i + o) & 0xf))); |
| 992 | | ea++; |
| 993 | | } |
| 994 | | } |
| 995 | | |
| 996 | | static void cfunc_srv(void *param) |
| 997 | | { |
| 998 | | ((rsp_cop2 *)param)->srv(); |
| 999 | | } |
| 1000 | | |
| 1001 | | |
| 1002 | | // SPV |
| 1003 | | // |
| 1004 | | // 31 25 20 15 10 6 0 |
| 1005 | | // -------------------------------------------------- |
| 1006 | | // | 111010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 1007 | | // -------------------------------------------------- |
| 1008 | | // |
| 1009 | | // Stores upper 8 bits of each element |
| 1010 | | |
| 1011 | | inline void rsp_cop2_drc::spv() |
| 1012 | | { |
| 1013 | | UINT32 op = m_op; |
| 1014 | | int dest = (op >> 16) & 0x1f; |
| 1015 | | int base = (op >> 21) & 0x1f; |
| 1016 | | int index = (op >> 7) & 0xf; |
| 1017 | | int offset = (op & 0x7f); |
| 1018 | | if (offset & 0x40) |
| 1019 | | { |
| 1020 | | offset |= 0xffffffc0; |
| 1021 | | } |
| 1022 | | |
| 1023 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1024 | | int end = index + 8; |
| 1025 | | for (int i=index; i < end; i++) |
| 1026 | | { |
| 1027 | | if ((i & 0xf) < 8) |
| 1028 | | { |
| 1029 | | m_rsp.DM_WRITE8(ea, VREG_B(dest, (i & 0xf) << 1)); |
| 1030 | | } |
| 1031 | | else |
| 1032 | | { |
| 1033 | | m_rsp.DM_WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); |
| 1034 | | } |
| 1035 | | ea++; |
| 1036 | | } |
| 1037 | | } |
| 1038 | | |
| 1039 | | static void cfunc_spv(void *param) |
| 1040 | | { |
| 1041 | | ((rsp_cop2 *)param)->spv(); |
| 1042 | | } |
| 1043 | | |
| 1044 | | |
| 1045 | | // SUV |
| 1046 | | // |
| 1047 | | // 31 25 20 15 10 6 0 |
| 1048 | | // -------------------------------------------------- |
| 1049 | | // | 111010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 1050 | | // -------------------------------------------------- |
| 1051 | | // |
| 1052 | | // Stores bits 14-7 of each element |
| 1053 | | |
| 1054 | | inline void rsp_cop2_drc::suv() |
| 1055 | | { |
| 1056 | | UINT32 op = m_op; |
| 1057 | | int dest = (op >> 16) & 0x1f; |
| 1058 | | int base = (op >> 21) & 0x1f; |
| 1059 | | int index = (op >> 7) & 0xf; |
| 1060 | | int offset = (op & 0x7f); |
| 1061 | | if (offset & 0x40) |
| 1062 | | { |
| 1063 | | offset |= 0xffffffc0; |
| 1064 | | } |
| 1065 | | |
| 1066 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1067 | | int end = index + 8; |
| 1068 | | for (int i=index; i < end; i++) |
| 1069 | | { |
| 1070 | | if ((i & 0xf) < 8) |
| 1071 | | { |
| 1072 | | m_rsp.DM_WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); |
| 1073 | | } |
| 1074 | | else |
| 1075 | | { |
| 1076 | | m_rsp.DM_WRITE8(ea, VREG_B(dest, ((i & 0x7) << 1))); |
| 1077 | | } |
| 1078 | | ea++; |
| 1079 | | } |
| 1080 | | } |
| 1081 | | |
| 1082 | | static void cfunc_suv(void *param) |
| 1083 | | { |
| 1084 | | ((rsp_cop2 *)param)->suv(); |
| 1085 | | } |
| 1086 | | |
| 1087 | | |
| 1088 | | // SHV |
| 1089 | | // |
| 1090 | | // 31 25 20 15 10 6 0 |
| 1091 | | // -------------------------------------------------- |
| 1092 | | // | 111010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 1093 | | // -------------------------------------------------- |
| 1094 | | // |
| 1095 | | // Stores bits 14-7 of each element, with 2-byte stride |
| 1096 | | |
| 1097 | | inline void rsp_cop2_drc::shv() |
| 1098 | | { |
| 1099 | | UINT32 op = m_op; |
| 1100 | | int dest = (op >> 16) & 0x1f; |
| 1101 | | int base = (op >> 21) & 0x1f; |
| 1102 | | int index = (op >> 7) & 0xf; |
| 1103 | | int offset = (op & 0x7f); |
| 1104 | | if (offset & 0x40) |
| 1105 | | { |
| 1106 | | offset |= 0xffffffc0; |
| 1107 | | } |
| 1108 | | |
| 1109 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1110 | | for (int i=0; i < 8; i++) |
| 1111 | | { |
| 1112 | | int element = index + (i << 1); |
| 1113 | | UINT8 d = (VREG_B(dest, (element & 0xf)) << 1) | |
| 1114 | | (VREG_B(dest, ((element + 1) & 0xf)) >> 7); |
| 1115 | | m_rsp.DM_WRITE8(ea, d); |
| 1116 | | ea += 2; |
| 1117 | | } |
| 1118 | | } |
| 1119 | | |
| 1120 | | static void cfunc_shv(void *param) |
| 1121 | | { |
| 1122 | | ((rsp_cop2 *)param)->shv(); |
| 1123 | | } |
| 1124 | | |
| 1125 | | |
| 1126 | | // SFV |
| 1127 | | // |
| 1128 | | // 31 25 20 15 10 6 0 |
| 1129 | | // -------------------------------------------------- |
| 1130 | | // | 111010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 1131 | | // -------------------------------------------------- |
| 1132 | | // |
| 1133 | | // Stores bits 14-7 of upper or lower quad, with 4-byte stride |
| 1134 | | |
| 1135 | | inline void rsp_cop2_drc::sfv() |
| 1136 | | { |
| 1137 | | UINT32 op = m_op; |
| 1138 | | int dest = (op >> 16) & 0x1f; |
| 1139 | | int base = (op >> 21) & 0x1f; |
| 1140 | | int index = (op >> 7) & 0xf; |
| 1141 | | int offset = (op & 0x7f); |
| 1142 | | if (offset & 0x40) |
| 1143 | | { |
| 1144 | | offset |= 0xffffffc0; |
| 1145 | | } |
| 1146 | | |
| 1147 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1148 | | int eaoffset = ea & 0xf; |
| 1149 | | ea &= ~0xf; |
| 1150 | | |
| 1151 | | int end = (index >> 1) + 4; |
| 1152 | | |
| 1153 | | for (int i = index>>1; i < end; i++) |
| 1154 | | { |
| 1155 | | m_rsp.DM_WRITE8(ea + (eaoffset & 0xf), VREG_S(dest, i) >> 7); |
| 1156 | | eaoffset += 4; |
| 1157 | | } |
| 1158 | | } |
| 1159 | | |
| 1160 | | static void cfunc_sfv(void *param) |
| 1161 | | { |
| 1162 | | ((rsp_cop2 *)param)->sfv(); |
| 1163 | | } |
| 1164 | | |
| 1165 | | |
| 1166 | | // SWV |
| 1167 | | // |
| 1168 | | // 31 25 20 15 10 6 0 |
| 1169 | | // -------------------------------------------------- |
| 1170 | | // | 111010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 1171 | | // -------------------------------------------------- |
| 1172 | | // |
| 1173 | | // Stores the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 1174 | | // after byte index 15 |
| 1175 | | |
| 1176 | | inline void rsp_cop2_drc::swv() |
| 1177 | | { |
| 1178 | | UINT32 op = m_op; |
| 1179 | | int dest = (op >> 16) & 0x1f; |
| 1180 | | int base = (op >> 21) & 0x1f; |
| 1181 | | int index = (op >> 7) & 0xf; |
| 1182 | | int offset = (op & 0x7f); |
| 1183 | | if (offset & 0x40) |
| 1184 | | { |
| 1185 | | offset |= 0xffffffc0; |
| 1186 | | } |
| 1187 | | |
| 1188 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1189 | | int eaoffset = ea & 0xf; |
| 1190 | | ea &= ~0xf; |
| 1191 | | |
| 1192 | | int end = index + 16; |
| 1193 | | for (int i = index; i < end; i++) |
| 1194 | | { |
| 1195 | | m_rsp.DM_WRITE8(ea + (eaoffset & 0xf), VREG_B(dest, i & 0xf)); |
| 1196 | | eaoffset++; |
| 1197 | | } |
| 1198 | | } |
| 1199 | | |
| 1200 | | static void cfunc_swv(void *param) |
| 1201 | | { |
| 1202 | | ((rsp_cop2 *)param)->swv(); |
| 1203 | | } |
| 1204 | | |
| 1205 | | |
| 1206 | | // STV |
| 1207 | | // |
| 1208 | | // 31 25 20 15 10 6 0 |
| 1209 | | // -------------------------------------------------- |
| 1210 | | // | 111010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 1211 | | // -------------------------------------------------- |
| 1212 | | // |
| 1213 | | // Stores one element from maximum of 8 vectors, while incrementing element index |
| 1214 | | |
| 1215 | | inline void rsp_cop2_drc::stv() |
| 1216 | | { |
| 1217 | | UINT32 op = m_op; |
| 1218 | | int dest = (op >> 16) & 0x1f; |
| 1219 | | int base = (op >> 21) & 0x1f; |
| 1220 | | int index = (op >> 7) & 0xf; |
| 1221 | | int offset = (op & 0x7f); |
| 1222 | | |
| 1223 | | if (offset & 0x40) |
| 1224 | | { |
| 1225 | | offset |= 0xffffffc0; |
| 1226 | | } |
| 1227 | | |
| 1228 | | int vs = dest; |
| 1229 | | int ve = dest + 8; |
| 1230 | | if (ve > 32) |
| 1231 | | { |
| 1232 | | ve = 32; |
| 1233 | | } |
| 1234 | | |
| 1235 | | int element = 8 - (index >> 1); |
| 1236 | | |
| 1237 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1238 | | int eaoffset = (ea & 0xf) + (element * 2); |
| 1239 | | ea &= ~0xf; |
| 1240 | | |
| 1241 | | for (int i = vs; i < ve; i++) |
| 1242 | | { |
| 1243 | | m_rsp.DM_WRITE16(ea + (eaoffset & 0xf), VREG_S(i, element & 0x7)); |
| 1244 | | eaoffset += 2; |
| 1245 | | element++; |
| 1246 | | } |
| 1247 | | } |
| 1248 | | |
| 1249 | | static void cfunc_stv(void *param) |
| 1250 | | { |
| 1251 | | ((rsp_cop2 *)param)->stv(); |
| 1252 | | } |
| 1253 | | |
| 1254 | | int rsp_cop2_drc::generate_swc2(drcuml_block *block, rsp_device::compiler_state *compiler, const opcode_desc *desc) |
| 1255 | | { |
| 1256 | | UINT32 op = desc->opptr.l[0]; |
| 1257 | | int offset = (op & 0x7f); |
| 1258 | | if (offset & 0x40) |
| 1259 | | { |
| 1260 | | offset |= 0xffffffc0; |
| 1261 | | } |
| 1262 | | |
| 1263 | | switch ((op >> 11) & 0x1f) |
| 1264 | | { |
| 1265 | | case 0x00: /* SBV */ |
| 1266 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1267 | | UML_CALLC(block, cfunc_sbv, this); |
| 1268 | | return TRUE; |
| 1269 | | |
| 1270 | | case 0x01: /* SSV */ |
| 1271 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1272 | | UML_CALLC(block, cfunc_ssv, this); |
| 1273 | | return TRUE; |
| 1274 | | |
| 1275 | | case 0x02: /* SLV */ |
| 1276 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1277 | | UML_CALLC(block, cfunc_slv, this); |
| 1278 | | return TRUE; |
| 1279 | | |
| 1280 | | case 0x03: /* SDV */ |
| 1281 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1282 | | UML_CALLC(block, cfunc_sdv, this); |
| 1283 | | return TRUE; |
| 1284 | | |
| 1285 | | case 0x04: /* SQV */ |
| 1286 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1287 | | UML_CALLC(block, cfunc_sqv, this); |
| 1288 | | return TRUE; |
| 1289 | | |
| 1290 | | case 0x05: /* SRV */ |
| 1291 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1292 | | UML_CALLC(block, cfunc_srv, this); |
| 1293 | | return TRUE; |
| 1294 | | |
| 1295 | | case 0x06: /* SPV */ |
| 1296 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1297 | | UML_CALLC(block, cfunc_spv, this); |
| 1298 | | return TRUE; |
| 1299 | | |
| 1300 | | case 0x07: /* SUV */ |
| 1301 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1302 | | UML_CALLC(block, cfunc_suv, this); |
| 1303 | | return TRUE; |
| 1304 | | |
| 1305 | | case 0x08: /* SHV */ |
| 1306 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1307 | | UML_CALLC(block, cfunc_shv, this); |
| 1308 | | return TRUE; |
| 1309 | | |
| 1310 | | case 0x09: /* SFV */ |
| 1311 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1312 | | UML_CALLC(block, cfunc_sfv, this); |
| 1313 | | return TRUE; |
| 1314 | | |
| 1315 | | case 0x0a: /* SWV */ |
| 1316 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1317 | | UML_CALLC(block, cfunc_swv, this); |
| 1318 | | return TRUE; |
| 1319 | | |
| 1320 | | case 0x0b: /* STV */ |
| 1321 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1322 | | UML_CALLC(block, cfunc_stv, this); |
| 1323 | | return TRUE; |
| 1324 | | |
| 1325 | | default: |
| 1326 | | m_rsp.unimplemented_opcode(op); |
| 1327 | | return FALSE; |
| 1328 | | } |
| 1329 | | |
| 1330 | | return TRUE; |
| 1331 | | } |
| 1332 | | |
| 1333 | | |
| 1334 | | /*************************************************************************** |
| 1335 | | Vector Opcodes |
| 1336 | | ***************************************************************************/ |
| 1337 | | |
| 1338 | | // VMULF |
| 1339 | | // |
| 1340 | | // 31 25 24 20 15 10 5 0 |
| 1341 | | // ------------------------------------------------------ |
| 1342 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000000 | |
| 1343 | | // ------------------------------------------------------ |
| 1344 | | // |
| 1345 | | // Multiplies signed integer by signed integer * 2 |
| 1346 | | |
| 1347 | | inline void rsp_cop2_drc::vmulf() |
| 1348 | | { |
| 1349 | | int op = m_op; |
| 1350 | | |
| 1351 | | for (int i = 0; i < 8; i++) |
| 1352 | | { |
| 1353 | | UINT16 w1, w2; |
| 1354 | | GET_VS1(w1, i); |
| 1355 | | GET_VS2(w2, i); |
| 1356 | | INT32 s1 = (INT32)(INT16)w1; |
| 1357 | | INT32 s2 = (INT32)(INT16)w2; |
| 1358 | | |
| 1359 | | if (s1 == -32768 && s2 == -32768) |
| 1360 | | { |
| 1361 | | // overflow |
| 1362 | | SET_ACCUM_H(0, i); |
| 1363 | | SET_ACCUM_M(-32768, i); |
| 1364 | | SET_ACCUM_L(-32768, i); |
| 1365 | | m_vres[i] = 0x7fff; |
| 1366 | | } |
| 1367 | | else |
| 1368 | | { |
| 1369 | | INT64 r = s1 * s2 * 2; |
| 1370 | | r += 0x8000; // rounding ? |
| 1371 | | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); |
| 1372 | | SET_ACCUM_M((INT16)(r >> 16), i); |
| 1373 | | SET_ACCUM_L((UINT16)(r), i); |
| 1374 | | m_vres[i] = ACCUM_M(i); |
| 1375 | | } |
| 1376 | | } |
| 1377 | | WRITEBACK_RESULT(); |
| 1378 | | } |
| 1379 | | |
| 1380 | | static void cfunc_vmulf(void *param) |
| 1381 | | { |
| 1382 | | ((rsp_cop2 *)param)->vmulf(); |
| 1383 | | } |
| 1384 | | |
| 1385 | | |
| 1386 | | // VMULU |
| 1387 | | // |
| 1388 | | // 31 25 24 20 15 10 5 0 |
| 1389 | | // ------------------------------------------------------ |
| 1390 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000001 | |
| 1391 | | // ------------------------------------------------------ |
| 1392 | | // |
| 1393 | | |
| 1394 | | inline void rsp_cop2_drc::vmulu() |
| 1395 | | { |
| 1396 | | int op = m_op; |
| 1397 | | |
| 1398 | | for (int i = 0; i < 8; i++) |
| 1399 | | { |
| 1400 | | UINT16 w1, w2; |
| 1401 | | GET_VS1(w1, i); |
| 1402 | | GET_VS2(w2, i); |
| 1403 | | INT32 s1 = (INT32)(INT16)w1; |
| 1404 | | INT32 s2 = (INT32)(INT16)w2; |
| 1405 | | |
| 1406 | | INT64 r = s1 * s2 * 2; |
| 1407 | | r += 0x8000; // rounding ? |
| 1408 | | |
| 1409 | | SET_ACCUM_H((UINT16)(r >> 32), i); |
| 1410 | | SET_ACCUM_M((UINT16)(r >> 16), i); |
| 1411 | | SET_ACCUM_L((UINT16)(r), i); |
| 1412 | | |
| 1413 | | if (r < 0) |
| 1414 | | { |
| 1415 | | m_vres[i] = 0; |
| 1416 | | } |
| 1417 | | else if (((INT16)(ACCUM_H(i)) ^ (INT16)(ACCUM_M(i))) < 0) |
| 1418 | | { |
| 1419 | | m_vres[i] = -1; |
| 1420 | | } |
| 1421 | | else |
| 1422 | | { |
| 1423 | | m_vres[i] = ACCUM_M(i); |
| 1424 | | } |
| 1425 | | } |
| 1426 | | WRITEBACK_RESULT(); |
| 1427 | | } |
| 1428 | | |
| 1429 | | static void cfunc_vmulu(void *param) |
| 1430 | | { |
| 1431 | | ((rsp_cop2 *)param)->vmulu(); |
| 1432 | | } |
| 1433 | | |
| 1434 | | |
| 1435 | | // VMUDL |
| 1436 | | // |
| 1437 | | // 31 25 24 20 15 10 5 0 |
| 1438 | | // ------------------------------------------------------ |
| 1439 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 | |
| 1440 | | // ------------------------------------------------------ |
| 1441 | | // |
| 1442 | | // Multiplies signed integer by unsigned fraction |
| 1443 | | // The result is added into accumulator |
| 1444 | | // The middle slice of accumulator is stored into destination element |
| 1445 | | |
| 1446 | | inline void rsp_cop2_drc::vmudl() |
| 1447 | | { |
| 1448 | | int op = m_op; |
| 1449 | | |
| 1450 | | for (int i = 0; i < 8; i++) |
| 1451 | | { |
| 1452 | | UINT16 w1, w2; |
| 1453 | | GET_VS1(w1, i); |
| 1454 | | GET_VS2(w2, i); |
| 1455 | | UINT32 s1 = (UINT32)(UINT16)w1; |
| 1456 | | UINT32 s2 = (UINT32)(UINT16)w2; |
| 1457 | | |
| 1458 | | UINT32 r = s1 * s2; |
| 1459 | | |
| 1460 | | SET_ACCUM_H(0, i); |
| 1461 | | SET_ACCUM_M(0, i); |
| 1462 | | SET_ACCUM_L((UINT16)(r >> 16), i); |
| 1463 | | |
| 1464 | | m_vres[i] = ACCUM_L(i); |
| 1465 | | } |
| 1466 | | WRITEBACK_RESULT(); |
| 1467 | | } |
| 1468 | | |
| 1469 | | static void cfunc_vmudl(void *param) |
| 1470 | | { |
| 1471 | | ((rsp_cop2 *)param)->vmudl(); |
| 1472 | | } |
| 1473 | | |
| 1474 | | |
| 1475 | | // VMUDM |
| 1476 | | // |
| 1477 | | // 31 25 24 20 15 10 5 0 |
| 1478 | | // ------------------------------------------------------ |
| 1479 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000101 | |
| 1480 | | // ------------------------------------------------------ |
| 1481 | | // |
| 1482 | | // Multiplies signed integer by unsigned fraction |
| 1483 | | // The result is stored into accumulator |
| 1484 | | // The middle slice of accumulator is stored into destination element |
| 1485 | | |
| 1486 | | inline void rsp_cop2_drc::vmudm() |
| 1487 | | { |
| 1488 | | int op = m_op; |
| 1489 | | |
| 1490 | | for (int i = 0; i < 8; i++) |
| 1491 | | { |
| 1492 | | UINT16 w1, w2; |
| 1493 | | GET_VS1(w1, i); |
| 1494 | | GET_VS2(w2, i); |
| 1495 | | INT32 s1 = (INT32)(INT16)w1; |
| 1496 | | INT32 s2 = (UINT16)w2; |
| 1497 | | |
| 1498 | | INT32 r = s1 * s2; |
| 1499 | | |
| 1500 | | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); // sign-extend to 48-bit |
| 1501 | | SET_ACCUM_M((INT16)(r >> 16), i); |
| 1502 | | SET_ACCUM_L((UINT16)r, i); |
| 1503 | | |
| 1504 | | m_vres[i] = ACCUM_M(i); |
| 1505 | | } |
| 1506 | | WRITEBACK_RESULT(); |
| 1507 | | } |
| 1508 | | |
| 1509 | | static void cfunc_vmudm(void *param) |
| 1510 | | { |
| 1511 | | ((rsp_cop2 *)param)->vmudm(); |
| 1512 | | } |
| 1513 | | |
| 1514 | | |
| 1515 | | // VMUDN |
| 1516 | | // |
| 1517 | | // 31 25 24 20 15 10 5 0 |
| 1518 | | // ------------------------------------------------------ |
| 1519 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000110 | |
| 1520 | | // ------------------------------------------------------ |
| 1521 | | // |
| 1522 | | // Multiplies unsigned fraction by signed integer |
| 1523 | | // The result is stored into accumulator |
| 1524 | | // The low slice of accumulator is stored into destination element |
| 1525 | | |
| 1526 | | inline void rsp_cop2_drc::vmudn() |
| 1527 | | { |
| 1528 | | int op = m_op; |
| 1529 | | |
| 1530 | | for (int i = 0; i < 8; i++) |
| 1531 | | { |
| 1532 | | UINT16 w1, w2; |
| 1533 | | GET_VS1(w1, i); |
| 1534 | | GET_VS2(w2, i); |
| 1535 | | INT32 s1 = (UINT16)w1; |
| 1536 | | INT32 s2 = (INT32)(INT16)w2; |
| 1537 | | |
| 1538 | | INT32 r = s1 * s2; |
| 1539 | | |
| 1540 | | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); // sign-extend to 48-bit |
| 1541 | | SET_ACCUM_M((INT16)(r >> 16), i); |
| 1542 | | SET_ACCUM_L((UINT16)(r), i); |
| 1543 | | |
| 1544 | | m_vres[i] = (UINT16)(r); |
| 1545 | | } |
| 1546 | | WRITEBACK_RESULT(); |
| 1547 | | } |
| 1548 | | |
| 1549 | | static void cfunc_vmudn(void *param) |
| 1550 | | { |
| 1551 | | ((rsp_cop2 *)param)->vmudn(); |
| 1552 | | } |
| 1553 | | |
| 1554 | | |
| 1555 | | // VMUDH |
| 1556 | | // |
| 1557 | | // 31 25 24 20 15 10 5 0 |
| 1558 | | // ------------------------------------------------------ |
| 1559 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000111 | |
| 1560 | | // ------------------------------------------------------ |
| 1561 | | // |
| 1562 | | // Multiplies signed integer by signed integer |
| 1563 | | // The result is stored into highest 32 bits of accumulator, the low slice is zero |
| 1564 | | // The highest 32 bits of accumulator is saturated into destination element |
| 1565 | | |
| 1566 | | inline void rsp_cop2_drc::vmudh() |
| 1567 | | { |
| 1568 | | int op = m_op; |
| 1569 | | |
| 1570 | | for (int i = 0; i < 8; i++) |
| 1571 | | { |
| 1572 | | UINT16 w1, w2; |
| 1573 | | GET_VS1(w1, i); |
| 1574 | | GET_VS2(w2, i); |
| 1575 | | INT32 s1 = (INT32)(INT16)w1; |
| 1576 | | INT32 s2 = (INT32)(INT16)w2; |
| 1577 | | |
| 1578 | | INT32 r = s1 * s2; |
| 1579 | | |
| 1580 | | SET_ACCUM_H((INT16)(r >> 16), i); |
| 1581 | | SET_ACCUM_M((UINT16)(r), i); |
| 1582 | | SET_ACCUM_L(0, i); |
| 1583 | | |
| 1584 | | if (r < -32768) r = -32768; |
| 1585 | | if (r > 32767) r = 32767; |
| 1586 | | m_vres[i] = (INT16)(r); |
| 1587 | | } |
| 1588 | | WRITEBACK_RESULT(); |
| 1589 | | } |
| 1590 | | |
| 1591 | | static void cfunc_vmudh(void *param) |
| 1592 | | { |
| 1593 | | ((rsp_cop2 *)param)->vmudh(); |
| 1594 | | } |
| 1595 | | |
| 1596 | | |
| 1597 | | // VMACF |
| 1598 | | // |
| 1599 | | // 31 25 24 20 15 10 5 0 |
| 1600 | | // ------------------------------------------------------ |
| 1601 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001000 | |
| 1602 | | // ------------------------------------------------------ |
| 1603 | | // |
| 1604 | | |
| 1605 | | inline void rsp_cop2_drc::vmacf() |
| 1606 | | { |
| 1607 | | int op = m_op; |
| 1608 | | |
| 1609 | | for (int i = 0; i < 8; i++) |
| 1610 | | { |
| 1611 | | UINT16 w1, w2; |
| 1612 | | GET_VS1(w1, i); |
| 1613 | | GET_VS2(w2, i); |
| 1614 | | INT32 s1 = (INT32)(INT16)w1; |
| 1615 | | INT32 s2 = (INT32)(INT16)w2; |
| 1616 | | |
| 1617 | | INT32 r = s1 * s2; |
| 1618 | | |
| 1619 | | UINT64 q = (UINT64)(UINT16)ACCUM_LL(i); |
| 1620 | | q |= (((UINT64)(UINT16)ACCUM_L(i)) << 16); |
| 1621 | | q |= (((UINT64)(UINT16)ACCUM_M(i)) << 32); |
| 1622 | | q |= (((UINT64)(UINT16)ACCUM_H(i)) << 48); |
| 1623 | | |
| 1624 | | q += (INT64)(r) << 17; |
| 1625 | | SET_ACCUM_LL((UINT16)q, i); |
| 1626 | | SET_ACCUM_L((UINT16)(q >> 16), i); |
| 1627 | | SET_ACCUM_M((UINT16)(q >> 32), i); |
| 1628 | | SET_ACCUM_H((UINT16)(q >> 48), i); |
| 1629 | | |
| 1630 | | m_vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1631 | | } |
| 1632 | | WRITEBACK_RESULT(); |
| 1633 | | } |
| 1634 | | |
| 1635 | | static void cfunc_vmacf(void *param) |
| 1636 | | { |
| 1637 | | ((rsp_cop2 *)param)->vmacf(); |
| 1638 | | } |
| 1639 | | |
| 1640 | | |
| 1641 | | // VMACU |
| 1642 | | // |
| 1643 | | // 31 25 24 20 15 10 5 0 |
| 1644 | | // ------------------------------------------------------ |
| 1645 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001001 | |
| 1646 | | // ------------------------------------------------------ |
| 1647 | | // |
| 1648 | | |
| 1649 | | inline void rsp_cop2_drc::vmacu() |
| 1650 | | { |
| 1651 | | int op = m_op; |
| 1652 | | |
| 1653 | | for (int i = 0; i < 8; i++) |
| 1654 | | { |
| 1655 | | UINT16 w1, w2; |
| 1656 | | GET_VS1(w1, i); |
| 1657 | | GET_VS2(w2, i); |
| 1658 | | INT32 s1 = (INT32)(INT16)w1; |
| 1659 | | INT32 s2 = (INT32)(INT16)w2; |
| 1660 | | |
| 1661 | | INT32 r1 = s1 * s2; |
| 1662 | | UINT32 r2 = (UINT16)ACCUM_L(i) + ((UINT16)(r1) * 2); |
| 1663 | | UINT32 r3 = (UINT16)ACCUM_M(i) + (UINT16)((r1 >> 16) * 2) + (UINT16)(r2 >> 16); |
| 1664 | | |
| 1665 | | SET_ACCUM_L((UINT16)(r2), i); |
| 1666 | | SET_ACCUM_M((UINT16)(r3), i); |
| 1667 | | SET_ACCUM_H(ACCUM_H(i) + (UINT16)(r3 >> 16) + (UINT16)(r1 >> 31), i); |
| 1668 | | |
| 1669 | | if ((INT16)ACCUM_H(i) < 0) |
| 1670 | | { |
| 1671 | | m_vres[i] = 0; |
| 1672 | | } |
| 1673 | | else |
| 1674 | | { |
| 1675 | | if (ACCUM_H(i) != 0) |
| 1676 | | { |
| 1677 | | m_vres[i] = (INT16)0xffff; |
| 1678 | | } |
| 1679 | | else |
| 1680 | | { |
| 1681 | | if ((INT16)ACCUM_M(i) < 0) |
| 1682 | | { |
| 1683 | | m_vres[i] = (INT16)0xffff; |
| 1684 | | } |
| 1685 | | else |
| 1686 | | { |
| 1687 | | m_vres[i] = ACCUM_M(i); |
| 1688 | | } |
| 1689 | | } |
| 1690 | | } |
| 1691 | | } |
| 1692 | | WRITEBACK_RESULT(); |
| 1693 | | } |
| 1694 | | |
| 1695 | | static void cfunc_vmacu(void *param) |
| 1696 | | { |
| 1697 | | ((rsp_cop2 *)param)->vmacu(); |
| 1698 | | } |
| 1699 | | |
| 1700 | | |
| 1701 | | // VMADL |
| 1702 | | // |
| 1703 | | // 31 25 24 20 15 10 5 0 |
| 1704 | | // ------------------------------------------------------ |
| 1705 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001100 | |
| 1706 | | // ------------------------------------------------------ |
| 1707 | | // |
| 1708 | | // Multiplies unsigned fraction by unsigned fraction |
| 1709 | | // Adds the higher 16 bits of the 32-bit result to accumulator |
| 1710 | | // The low slice of accumulator is stored into destination element |
| 1711 | | |
| 1712 | | inline void rsp_cop2_drc::vmadl() |
| 1713 | | { |
| 1714 | | int op = m_op; |
| 1715 | | |
| 1716 | | for (int i = 0; i < 8; i++) |
| 1717 | | { |
| 1718 | | UINT16 w1, w2; |
| 1719 | | GET_VS1(w1, i); |
| 1720 | | GET_VS2(w2, i); |
| 1721 | | UINT32 s1 = w1; |
| 1722 | | UINT32 s2 = w2; |
| 1723 | | |
| 1724 | | UINT32 r1 = s1 * s2; |
| 1725 | | UINT32 r2 = (UINT16)ACCUM_L(i) + (r1 >> 16); |
| 1726 | | UINT32 r3 = (UINT16)ACCUM_M(i) + (r2 >> 16); |
| 1727 | | |
| 1728 | | SET_ACCUM_L((UINT16)r2, i); |
| 1729 | | SET_ACCUM_M((UINT16)r3, i); |
| 1730 | | SET_ACCUM_H(ACCUM_H(i) + (INT16)(r3 >> 16), i); |
| 1731 | | |
| 1732 | | m_vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 1733 | | } |
| 1734 | | WRITEBACK_RESULT(); |
| 1735 | | } |
| 1736 | | |
| 1737 | | static void cfunc_vmadl(void *param) |
| 1738 | | { |
| 1739 | | ((rsp_cop2 *)param)->vmadl(); |
| 1740 | | } |
| 1741 | | |
| 1742 | | |
| 1743 | | // VMADM |
| 1744 | | // |
| 1745 | | |
| 1746 | | inline void rsp_cop2_drc::vmadm() |
| 1747 | | { |
| 1748 | | int op = m_op; |
| 1749 | | |
| 1750 | | for (int i = 0; i < 8; i++) |
| 1751 | | { |
| 1752 | | UINT16 w1, w2; |
| 1753 | | GET_VS1(w1, i); |
| 1754 | | GET_VS2(w2, i); |
| 1755 | | UINT32 s1 = (INT32)(INT16)w1; |
| 1756 | | UINT32 s2 = (UINT16)w2; |
| 1757 | | |
| 1758 | | UINT32 r1 = s1 * s2; |
| 1759 | | UINT32 r2 = (UINT16)ACCUM_L(i) + (UINT16)(r1); |
| 1760 | | UINT32 r3 = (UINT16)ACCUM_M(i) + (r1 >> 16) + (r2 >> 16); |
| 1761 | | |
| 1762 | | SET_ACCUM_L((UINT16)r2, i); |
| 1763 | | SET_ACCUM_M((UINT16)r3, i); |
| 1764 | | SET_ACCUM_H((UINT16)ACCUM_H(i) + (UINT16)(r3 >> 16), i); |
| 1765 | | if ((INT32)(r1) < 0) |
| 1766 | | { |
| 1767 | | SET_ACCUM_H((UINT16)ACCUM_H(i) - 1, i); |
| 1768 | | } |
| 1769 | | |
| 1770 | | m_vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1771 | | } |
| 1772 | | WRITEBACK_RESULT(); |
| 1773 | | } |
| 1774 | | |
| 1775 | | static void cfunc_vmadm(void *param) |
| 1776 | | { |
| 1777 | | ((rsp_cop2 *)param)->vmadm(); |
| 1778 | | } |
| 1779 | | |
| 1780 | | |
| 1781 | | // VMADN |
| 1782 | | // |
| 1783 | | |
| 1784 | | inline void rsp_cop2_drc::vmadn() |
| 1785 | | { |
| 1786 | | int op = m_op; |
| 1787 | | |
| 1788 | | for (int i = 0; i < 8; i++) |
| 1789 | | { |
| 1790 | | UINT16 w1, w2; |
| 1791 | | GET_VS1(w1, i); |
| 1792 | | GET_VS2(w2, i); |
| 1793 | | INT32 s1 = (UINT16)w1; |
| 1794 | | INT32 s2 = (INT32)(INT16)w2; |
| 1795 | | |
| 1796 | | UINT64 q = (UINT64)ACCUM_LL(i); |
| 1797 | | q |= (((UINT64)ACCUM_L(i)) << 16); |
| 1798 | | q |= (((UINT64)ACCUM_M(i)) << 32); |
| 1799 | | q |= (((UINT64)ACCUM_H(i)) << 48); |
| 1800 | | q += (INT64)(s1*s2) << 16; |
| 1801 | | |
| 1802 | | SET_ACCUM_LL((UINT16)q, i); |
| 1803 | | SET_ACCUM_L((UINT16)(q >> 16), i); |
| 1804 | | SET_ACCUM_M((UINT16)(q >> 32), i); |
| 1805 | | SET_ACCUM_H((UINT16)(q >> 48), i); |
| 1806 | | |
| 1807 | | m_vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 1808 | | } |
| 1809 | | WRITEBACK_RESULT(); |
| 1810 | | } |
| 1811 | | |
| 1812 | | static void cfunc_vmadn(void *param) |
| 1813 | | { |
| 1814 | | ((rsp_cop2 *)param)->vmadn(); |
| 1815 | | } |
| 1816 | | |
| 1817 | | |
| 1818 | | // VMADH |
| 1819 | | // |
| 1820 | | // 31 25 24 20 15 10 5 0 |
| 1821 | | // ------------------------------------------------------ |
| 1822 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001111 | |
| 1823 | | // ------------------------------------------------------ |
| 1824 | | // |
| 1825 | | // Multiplies signed integer by signed integer |
| 1826 | | // The result is added into highest 32 bits of accumulator, the low slice is zero |
| 1827 | | // The highest 32 bits of accumulator is saturated into destination element |
| 1828 | | |
| 1829 | | inline void rsp_cop2_drc::vmadh() |
| 1830 | | { |
| 1831 | | int op = m_op; |
| 1832 | | |
| 1833 | | for (int i = 0; i < 8; i++) |
| 1834 | | { |
| 1835 | | INT16 w1, w2; |
| 1836 | | GET_VS1(w1, i); |
| 1837 | | GET_VS2(w2, i); |
| 1838 | | INT32 s1 = (INT32)(INT16)w1; |
| 1839 | | INT32 s2 = (INT32)(INT16)w2; |
| 1840 | | |
| 1841 | | INT32 accum = (UINT32)(UINT16)ACCUM_M(i); |
| 1842 | | accum |= ((UINT32)((UINT16)ACCUM_H(i))) << 16; |
| 1843 | | accum += s1 * s2; |
| 1844 | | |
| 1845 | | SET_ACCUM_H((UINT16)(accum >> 16), i); |
| 1846 | | SET_ACCUM_M((UINT16)accum, i); |
| 1847 | | |
| 1848 | | m_vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1849 | | } |
| 1850 | | WRITEBACK_RESULT(); |
| 1851 | | } |
| 1852 | | |
| 1853 | | static void cfunc_vmadh(void *param) |
| 1854 | | { |
| 1855 | | ((rsp_cop2 *)param)->vmadh(); |
| 1856 | | } |
| 1857 | | |
| 1858 | | |
| 1859 | | // VADD |
| 1860 | | // 31 25 24 20 15 10 5 0 |
| 1861 | | // ------------------------------------------------------ |
| 1862 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010000 | |
| 1863 | | // ------------------------------------------------------ |
| 1864 | | // |
| 1865 | | // Adds two vector registers and carry flag, the result is saturated to 32767 |
| 1866 | | |
| 1867 | | inline void rsp_cop2_drc::vadd() |
| 1868 | | { |
| 1869 | | int op = m_op; |
| 1870 | | |
| 1871 | | for (int i = 0; i < 8; i++) |
| 1872 | | { |
| 1873 | | INT16 w1, w2; |
| 1874 | | GET_VS1(w1, i); |
| 1875 | | GET_VS2(w2, i); |
| 1876 | | INT32 s1 = (INT32)(INT16)w1; |
| 1877 | | INT32 s2 = (INT32)(INT16)w2; |
| 1878 | | INT32 r = s1 + s2 + (((CARRY_FLAG(i)) != 0) ? 1 : 0); |
| 1879 | | |
| 1880 | | SET_ACCUM_L((INT16)(r), i); |
| 1881 | | |
| 1882 | | if (r > 32767) r = 32767; |
| 1883 | | if (r < -32768) r = -32768; |
| 1884 | | m_vres[i] = (INT16)(r); |
| 1885 | | } |
| 1886 | | CLEAR_ZERO_FLAGS(); |
| 1887 | | CLEAR_CARRY_FLAGS(); |
| 1888 | | WRITEBACK_RESULT(); |
| 1889 | | } |
| 1890 | | |
| 1891 | | static void cfunc_vadd(void *param) |
| 1892 | | { |
| 1893 | | ((rsp_cop2 *)param)->vadd(); |
| 1894 | | } |
| 1895 | | |
| 1896 | | |
| 1897 | | // VSUB |
| 1898 | | // |
| 1899 | | // 31 25 24 20 15 10 5 0 |
| 1900 | | // ------------------------------------------------------ |
| 1901 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010001 | |
| 1902 | | // ------------------------------------------------------ |
| 1903 | | // |
| 1904 | | // Subtracts two vector registers and carry flag, the result is saturated to -32768 |
| 1905 | | // TODO: check VS2REG == VDREG |
| 1906 | | |
| 1907 | | inline void rsp_cop2_drc::vsub() |
| 1908 | | { |
| 1909 | | int op = m_op; |
| 1910 | | |
| 1911 | | for (int i = 0; i < 8; i++) |
| 1912 | | { |
| 1913 | | INT16 w1, w2; |
| 1914 | | GET_VS1(w1, i); |
| 1915 | | GET_VS2(w2, i); |
| 1916 | | INT32 s1 = (INT32)(INT16)w1; |
| 1917 | | INT32 s2 = (INT32)(INT16)w2; |
| 1918 | | INT32 r = s1 - s2 - (((CARRY_FLAG(i)) != 0) ? 1 : 0); |
| 1919 | | |
| 1920 | | SET_ACCUM_L((INT16)(r), i); |
| 1921 | | |
| 1922 | | if (r > 32767) r = 32767; |
| 1923 | | if (r < -32768) r = -32768; |
| 1924 | | |
| 1925 | | m_vres[i] = (INT16)(r); |
| 1926 | | } |
| 1927 | | CLEAR_ZERO_FLAGS(); |
| 1928 | | CLEAR_CARRY_FLAGS(); |
| 1929 | | WRITEBACK_RESULT(); |
| 1930 | | } |
| 1931 | | |
| 1932 | | static void cfunc_vsub(void *param) |
| 1933 | | { |
| 1934 | | ((rsp_cop2 *)param)->vsub(); |
| 1935 | | } |
| 1936 | | |
| 1937 | | |
| 1938 | | // VABS |
| 1939 | | // |
| 1940 | | // 31 25 24 20 15 10 5 0 |
| 1941 | | // ------------------------------------------------------ |
| 1942 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010011 | |
| 1943 | | // ------------------------------------------------------ |
| 1944 | | // |
| 1945 | | // Changes the sign of source register 2 if source register 1 is negative and stores the result to destination register |
| 1946 | | |
| 1947 | | inline void rsp_cop2_drc::vabs() |
| 1948 | | { |
| 1949 | | int op = m_op; |
| 1950 | | |
| 1951 | | for (int i = 0; i < 8; i++) |
| 1952 | | { |
| 1953 | | INT16 s1, s2; |
| 1954 | | GET_VS1(s1, i); |
| 1955 | | GET_VS2(s2, i); |
| 1956 | | |
| 1957 | | if (s1 < 0) |
| 1958 | | { |
| 1959 | | if (s2 == -32768) |
| 1960 | | { |
| 1961 | | m_vres[i] = 32767; |
| 1962 | | } |
| 1963 | | else |
| 1964 | | { |
| 1965 | | m_vres[i] = -s2; |
| 1966 | | } |
| 1967 | | } |
| 1968 | | else if (s1 > 0) |
| 1969 | | { |
| 1970 | | m_vres[i] = s2; |
| 1971 | | } |
| 1972 | | else |
| 1973 | | { |
| 1974 | | m_vres[i] = 0; |
| 1975 | | } |
| 1976 | | |
| 1977 | | SET_ACCUM_L(m_vres[i], i); |
| 1978 | | } |
| 1979 | | WRITEBACK_RESULT(); |
| 1980 | | } |
| 1981 | | |
| 1982 | | static void cfunc_vabs(void *param) |
| 1983 | | { |
| 1984 | | ((rsp_cop2 *)param)->vabs(); |
| 1985 | | } |
| 1986 | | |
| 1987 | | |
| 1988 | | // VADDC |
| 1989 | | // |
| 1990 | | // 31 25 24 20 15 10 5 0 |
| 1991 | | // ------------------------------------------------------ |
| 1992 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010100 | |
| 1993 | | // ------------------------------------------------------ |
| 1994 | | // |
| 1995 | | // Adds two vector registers, the carry out is stored into carry register |
| 1996 | | // TODO: check VS2REG = VDREG |
| 1997 | | |
| 1998 | | inline void rsp_cop2_drc::vaddc() |
| 1999 | | { |
| 2000 | | int op = m_op; |
| 2001 | | |
| 2002 | | CLEAR_ZERO_FLAGS(); |
| 2003 | | CLEAR_CARRY_FLAGS(); |
| 2004 | | |
| 2005 | | for (int i = 0; i < 8; i++) |
| 2006 | | { |
| 2007 | | INT16 w1, w2; |
| 2008 | | GET_VS1(w1, i); |
| 2009 | | GET_VS2(w2, i); |
| 2010 | | INT32 s1 = (UINT32)(UINT16)w1; |
| 2011 | | INT32 s2 = (UINT32)(UINT16)w2; |
| 2012 | | INT32 r = s1 + s2; |
| 2013 | | |
| 2014 | | m_vres[i] = (INT16)(r); |
| 2015 | | SET_ACCUM_L((INT16)r, i); |
| 2016 | | |
| 2017 | | if (r & 0xffff0000) |
| 2018 | | { |
| 2019 | | SET_CARRY_FLAG(i); |
| 2020 | | } |
| 2021 | | } |
| 2022 | | WRITEBACK_RESULT(); |
| 2023 | | } |
| 2024 | | |
| 2025 | | static void cfunc_vaddc(void *param) |
| 2026 | | { |
| 2027 | | ((rsp_cop2 *)param)->vaddc(); |
| 2028 | | } |
| 2029 | | |
| 2030 | | |
| 2031 | | // VSUBC |
| 2032 | | // |
| 2033 | | // 31 25 24 20 15 10 5 0 |
| 2034 | | // ------------------------------------------------------ |
| 2035 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010101 | |
| 2036 | | // ------------------------------------------------------ |
| 2037 | | // |
| 2038 | | // Subtracts two vector registers, the carry out is stored into carry register |
| 2039 | | // TODO: check VS2REG = VDREG |
| 2040 | | |
| 2041 | | inline void rsp_cop2_drc::vsubc() |
| 2042 | | { |
| 2043 | | int op = m_op; |
| 2044 | | |
| 2045 | | CLEAR_ZERO_FLAGS(); |
| 2046 | | CLEAR_CARRY_FLAGS(); |
| 2047 | | |
| 2048 | | for (int i = 0; i < 8; i++) |
| 2049 | | { |
| 2050 | | INT16 w1, w2; |
| 2051 | | GET_VS1(w1, i); |
| 2052 | | GET_VS2(w2, i); |
| 2053 | | INT32 s1 = (UINT32)(UINT16)w1; |
| 2054 | | INT32 s2 = (UINT32)(UINT16)w2; |
| 2055 | | INT32 r = s1 - s2; |
| 2056 | | |
| 2057 | | m_vres[i] = (INT16)(r); |
| 2058 | | SET_ACCUM_L((UINT16)r, i); |
| 2059 | | |
| 2060 | | if ((UINT16)(r) != 0) |
| 2061 | | { |
| 2062 | | SET_ZERO_FLAG(i); |
| 2063 | | } |
| 2064 | | if (r & 0xffff0000) |
| 2065 | | { |
| 2066 | | SET_CARRY_FLAG(i); |
| 2067 | | } |
| 2068 | | } |
| 2069 | | WRITEBACK_RESULT(); |
| 2070 | | } |
| 2071 | | |
| 2072 | | static void cfunc_vsubc(void *param) |
| 2073 | | { |
| 2074 | | ((rsp_cop2 *)param)->vsubc(); |
| 2075 | | } |
| 2076 | | |
| 2077 | | |
| 2078 | | // VADDB |
| 2079 | | // |
| 2080 | | // 31 25 24 20 15 10 5 0 |
| 2081 | | // ------------------------------------------------------ |
| 2082 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010110 | |
| 2083 | | // ------------------------------------------------------ |
| 2084 | | // |
| 2085 | | // Adds two vector registers bytewise with rounding |
| 2086 | | |
| 2087 | | inline void rsp_cop2_drc::vaddb() |
| 2088 | | { |
| 2089 | | const int op = m_op; |
| 2090 | | const int round = (EL == 0) ? 0 : (1 << (EL - 1)); |
| 2091 | | |
| 2092 | | for (int i = 0; i < 8; i++) |
| 2093 | | { |
| 2094 | | UINT16 w1, w2; |
| 2095 | | GET_VS1(w1, i); |
| 2096 | | GET_VS2(w2, i); |
| 2097 | | |
| 2098 | | UINT8 hb1 = w1 >> 8; |
| 2099 | | UINT8 lb1 = w1 & 0xff; |
| 2100 | | UINT8 hb2 = w2 >> 8; |
| 2101 | | UINT8 lb2 = w2 & 0xff; |
| 2102 | | |
| 2103 | | UINT16 hs = hb1 + hb2 + round; |
| 2104 | | UINT16 ls = lb1 + lb2 + round; |
| 2105 | | |
| 2106 | | SET_ACCUM_L((hs << 8) | ls, i); |
| 2107 | | |
| 2108 | | hs >>= EL; |
| 2109 | | if (hs > 255) |
| 2110 | | { |
| 2111 | | hs = 255; |
| 2112 | | } |
| 2113 | | |
| 2114 | | ls >>= EL; |
| 2115 | | if (ls > 255) |
| 2116 | | { |
| 2117 | | ls = 255; |
| 2118 | | } |
| 2119 | | |
| 2120 | | m_vres[i] = 0; // VD writeback disabled on production hardware |
| 2121 | | // m_vres[i] = (hs << 8) | ls; |
| 2122 | | } |
| 2123 | | WRITEBACK_RESULT(); |
| 2124 | | } |
| 2125 | | |
| 2126 | | static void cfunc_vaddb(void *param) |
| 2127 | | { |
| 2128 | | ((rsp_cop2 *)param)->vaddb(); |
| 2129 | | } |
| 2130 | | |
| 2131 | | |
| 2132 | | // VSAW |
| 2133 | | // |
| 2134 | | // 31 25 24 20 15 10 5 0 |
| 2135 | | // ------------------------------------------------------ |
| 2136 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 011101 | |
| 2137 | | // ------------------------------------------------------ |
| 2138 | | // |
| 2139 | | // Stores high, middle or low slice of accumulator to destination vector |
| 2140 | | |
| 2141 | | inline void rsp_cop2_drc::vsaw() |
| 2142 | | { |
| 2143 | | int op = m_op; |
| 2144 | | |
| 2145 | | switch (EL) |
| 2146 | | { |
| 2147 | | case 0x08: // VSAWH |
| 2148 | | for (int i = 0; i < 8; i++) |
| 2149 | | { |
| 2150 | | W_VREG_S(VDREG, i) = ACCUM_H(i); |
| 2151 | | } |
| 2152 | | break; |
| 2153 | | case 0x09: // VSAWM |
| 2154 | | for (int i = 0; i < 8; i++) |
| 2155 | | { |
| 2156 | | W_VREG_S(VDREG, i) = ACCUM_M(i); |
| 2157 | | } |
| 2158 | | break; |
| 2159 | | case 0x0a: // VSAWL |
| 2160 | | for (int i = 0; i < 8; i++) |
| 2161 | | { |
| 2162 | | W_VREG_S(VDREG, i) = ACCUM_L(i); |
| 2163 | | } |
| 2164 | | break; |
| 2165 | | default: // Unsupported |
| 2166 | | { |
| 2167 | | for (int i = 0; i < 8; i++) |
| 2168 | | { |
| 2169 | | W_VREG_S(VDREG, i) = 0; |
| 2170 | | } |
| 2171 | | } |
| 2172 | | } |
| 2173 | | } |
| 2174 | | |
| 2175 | | static void cfunc_vsaw(void *param) |
| 2176 | | { |
| 2177 | | ((rsp_cop2 *)param)->vsaw(); |
| 2178 | | } |
| 2179 | | |
| 2180 | | |
| 2181 | | // VLT |
| 2182 | | // |
| 2183 | | // 31 25 24 20 15 10 5 0 |
| 2184 | | // ------------------------------------------------------ |
| 2185 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100000 | |
| 2186 | | // ------------------------------------------------------ |
| 2187 | | // |
| 2188 | | // Sets compare flags if elements in VS1 are less than VS2 |
| 2189 | | // Moves the element in VS2 to destination vector |
| 2190 | | |
| 2191 | | inline void rsp_cop2_drc::vlt() |
| 2192 | | { |
| 2193 | | int op = m_op; |
| 2194 | | |
| 2195 | | CLEAR_COMPARE_FLAGS(); |
| 2196 | | CLEAR_CLIP2_FLAGS(); |
| 2197 | | |
| 2198 | | for (int i = 0; i < 8; i++) |
| 2199 | | { |
| 2200 | | INT16 s1, s2; |
| 2201 | | GET_VS1(s1, i); |
| 2202 | | GET_VS2(s2, i); |
| 2203 | | |
| 2204 | | if (s1 < s2) |
| 2205 | | { |
| 2206 | | SET_COMPARE_FLAG(i); |
| 2207 | | } |
| 2208 | | else if (s1 == s2) |
| 2209 | | { |
| 2210 | | if (ZERO_FLAG(i) != 0 && CARRY_FLAG(i) != 0) |
| 2211 | | { |
| 2212 | | SET_COMPARE_FLAG(i); |
| 2213 | | } |
| 2214 | | } |
| 2215 | | |
| 2216 | | if (COMPARE_FLAG(i) != 0) |
| 2217 | | { |
| 2218 | | m_vres[i] = s1; |
| 2219 | | } |
| 2220 | | else |
| 2221 | | { |
| 2222 | | m_vres[i] = s2; |
| 2223 | | } |
| 2224 | | |
| 2225 | | SET_ACCUM_L(m_vres[i], i); |
| 2226 | | } |
| 2227 | | |
| 2228 | | CLEAR_ZERO_FLAGS(); |
| 2229 | | CLEAR_CARRY_FLAGS(); |
| 2230 | | WRITEBACK_RESULT(); |
| 2231 | | } |
| 2232 | | |
| 2233 | | static void cfunc_vlt(void *param) |
| 2234 | | { |
| 2235 | | ((rsp_cop2 *)param)->vlt(); |
| 2236 | | } |
| 2237 | | |
| 2238 | | |
| 2239 | | // VEQ |
| 2240 | | // |
| 2241 | | // 31 25 24 20 15 10 5 0 |
| 2242 | | // ------------------------------------------------------ |
| 2243 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100001 | |
| 2244 | | // ------------------------------------------------------ |
| 2245 | | // |
| 2246 | | // Sets compare flags if elements in VS1 are equal with VS2 |
| 2247 | | // Moves the element in VS2 to destination vector |
| 2248 | | |
| 2249 | | inline void rsp_cop2_drc::veq() |
| 2250 | | { |
| 2251 | | int op = m_op; |
| 2252 | | |
| 2253 | | CLEAR_COMPARE_FLAGS(); |
| 2254 | | CLEAR_CLIP2_FLAGS(); |
| 2255 | | |
| 2256 | | for (int i = 0; i < 8; i++) |
| 2257 | | { |
| 2258 | | INT16 s1, s2; |
| 2259 | | GET_VS1(s1, i); |
| 2260 | | GET_VS2(s2, i); |
| 2261 | | |
| 2262 | | if ((s1 == s2) && ZERO_FLAG(i) == 0) |
| 2263 | | { |
| 2264 | | SET_COMPARE_FLAG(i); |
| 2265 | | m_vres[i] = s1; |
| 2266 | | } |
| 2267 | | else |
| 2268 | | { |
| 2269 | | m_vres[i] = s2; |
| 2270 | | } |
| 2271 | | |
| 2272 | | SET_ACCUM_L(m_vres[i], i); |
| 2273 | | } |
| 2274 | | |
| 2275 | | CLEAR_ZERO_FLAGS(); |
| 2276 | | CLEAR_CARRY_FLAGS(); |
| 2277 | | WRITEBACK_RESULT(); |
| 2278 | | } |
| 2279 | | |
| 2280 | | static void cfunc_veq(void *param) |
| 2281 | | { |
| 2282 | | ((rsp_cop2 *)param)->veq(); |
| 2283 | | } |
| 2284 | | |
| 2285 | | |
| 2286 | | // VNE |
| 2287 | | // |
| 2288 | | // 31 25 24 20 15 10 5 0 |
| 2289 | | // ------------------------------------------------------ |
| 2290 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100010 | |
| 2291 | | // ------------------------------------------------------ |
| 2292 | | // |
| 2293 | | // Sets compare flags if elements in VS1 are not equal with VS2 |
| 2294 | | // Moves the element in VS2 to destination vector |
| 2295 | | |
| 2296 | | inline void rsp_cop2_drc::vne() |
| 2297 | | { |
| 2298 | | int op = m_op; |
| 2299 | | |
| 2300 | | CLEAR_COMPARE_FLAGS(); |
| 2301 | | CLEAR_CLIP2_FLAGS(); |
| 2302 | | |
| 2303 | | for (int i = 0; i < 8; i++) |
| 2304 | | { |
| 2305 | | INT16 s1, s2; |
| 2306 | | GET_VS1(s1, i); |
| 2307 | | GET_VS2(s2, i); |
| 2308 | | |
| 2309 | | if (s1 != s2 || ZERO_FLAG(i) != 0) |
| 2310 | | { |
| 2311 | | SET_COMPARE_FLAG(i); |
| 2312 | | m_vres[i] = s1; |
| 2313 | | } |
| 2314 | | else |
| 2315 | | { |
| 2316 | | m_vres[i] = s2; |
| 2317 | | } |
| 2318 | | |
| 2319 | | SET_ACCUM_L(m_vres[i], i); |
| 2320 | | } |
| 2321 | | |
| 2322 | | CLEAR_ZERO_FLAGS(); |
| 2323 | | CLEAR_CARRY_FLAGS(); |
| 2324 | | WRITEBACK_RESULT(); |
| 2325 | | } |
| 2326 | | |
| 2327 | | static void cfunc_vne(void *param) |
| 2328 | | { |
| 2329 | | ((rsp_cop2 *)param)->vne(); |
| 2330 | | } |
| 2331 | | |
| 2332 | | |
| 2333 | | // VGE |
| 2334 | | // |
| 2335 | | // 31 25 24 20 15 10 5 0 |
| 2336 | | // ------------------------------------------------------ |
| 2337 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100011 | |
| 2338 | | // ------------------------------------------------------ |
| 2339 | | // |
| 2340 | | // Sets compare flags if elements in VS1 are greater or equal with VS2 |
| 2341 | | // Moves the element in VS2 to destination vector |
| 2342 | | |
| 2343 | | inline void rsp_cop2_drc::vge() |
| 2344 | | { |
| 2345 | | int op = m_op; |
| 2346 | | |
| 2347 | | CLEAR_COMPARE_FLAGS(); |
| 2348 | | CLEAR_CLIP2_FLAGS(); |
| 2349 | | |
| 2350 | | for (int i = 0; i < 8; i++) |
| 2351 | | { |
| 2352 | | INT16 s1, s2; |
| 2353 | | GET_VS1(s1, i); |
| 2354 | | GET_VS2(s2, i); |
| 2355 | | if ((s1 == s2 && (ZERO_FLAG(i) == 0 || CARRY_FLAG(i) == 0)) || s1 > s2) |
| 2356 | | { |
| 2357 | | SET_COMPARE_FLAG(i); |
| 2358 | | m_vres[i] = s1; |
| 2359 | | } |
| 2360 | | else |
| 2361 | | { |
| 2362 | | m_vres[i] = s2; |
| 2363 | | } |
| 2364 | | |
| 2365 | | SET_ACCUM_L(m_vres[i], i); |
| 2366 | | } |
| 2367 | | |
| 2368 | | CLEAR_ZERO_FLAGS(); |
| 2369 | | CLEAR_CARRY_FLAGS(); |
| 2370 | | WRITEBACK_RESULT(); |
| 2371 | | } |
| 2372 | | |
| 2373 | | static void cfunc_vge(void *param) |
| 2374 | | { |
| 2375 | | ((rsp_cop2 *)param)->vge(); |
| 2376 | | } |
| 2377 | | |
| 2378 | | |
| 2379 | | // VCL |
| 2380 | | // |
| 2381 | | // 31 25 24 20 15 10 5 0 |
| 2382 | | // ------------------------------------------------------ |
| 2383 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100100 | |
| 2384 | | // ------------------------------------------------------ |
| 2385 | | // |
| 2386 | | // Vector clip low |
| 2387 | | |
| 2388 | | inline void rsp_cop2_drc::vcl() |
| 2389 | | { |
| 2390 | | int op = m_op; |
| 2391 | | |
| 2392 | | for (int i = 0; i < 8; i++) |
| 2393 | | { |
| 2394 | | INT16 s1, s2; |
| 2395 | | GET_VS1(s1, i); |
| 2396 | | GET_VS2(s2, i); |
| 2397 | | |
| 2398 | | if (CARRY_FLAG(i) != 0) |
| 2399 | | { |
| 2400 | | if (ZERO_FLAG(i) != 0) |
| 2401 | | { |
| 2402 | | if (COMPARE_FLAG(i) != 0) |
| 2403 | | { |
| 2404 | | SET_ACCUM_L(-(UINT16)s2, i); |
| 2405 | | } |
| 2406 | | else |
| 2407 | | { |
| 2408 | | SET_ACCUM_L(s1, i); |
| 2409 | | } |
| 2410 | | } |
| 2411 | | else |
| 2412 | | { |
| 2413 | | if (CLIP1_FLAG(i) != 0) |
| 2414 | | { |
| 2415 | | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) > 0x10000) |
| 2416 | | { |
| 2417 | | SET_ACCUM_L(s1, i); |
| 2418 | | CLEAR_COMPARE_FLAG(i); |
| 2419 | | } |
| 2420 | | else |
| 2421 | | { |
| 2422 | | SET_ACCUM_L(-((UINT16)s2), i); |
| 2423 | | SET_COMPARE_FLAG(i); |
| 2424 | | } |
| 2425 | | } |
| 2426 | | else |
| 2427 | | { |
| 2428 | | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) != 0) |
| 2429 | | { |
| 2430 | | SET_ACCUM_L(s1, i); |
| 2431 | | CLEAR_COMPARE_FLAG(i); |
| 2432 | | } |
| 2433 | | else |
| 2434 | | { |
| 2435 | | SET_ACCUM_L(-((UINT16)s2), i); |
| 2436 | | SET_COMPARE_FLAG(i); |
| 2437 | | } |
| 2438 | | } |
| 2439 | | } |
| 2440 | | } |
| 2441 | | else |
| 2442 | | { |
| 2443 | | if (ZERO_FLAG(i) != 0) |
| 2444 | | { |
| 2445 | | if (CLIP2_FLAG(i) != 0) |
| 2446 | | { |
| 2447 | | SET_ACCUM_L(s2, i); |
| 2448 | | } |
| 2449 | | else |
| 2450 | | { |
| 2451 | | SET_ACCUM_L(s1, i); |
| 2452 | | } |
| 2453 | | } |
| 2454 | | else |
| 2455 | | { |
| 2456 | | if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0) |
| 2457 | | { |
| 2458 | | SET_ACCUM_L(s2, i); |
| 2459 | | SET_CLIP2_FLAG(i); |
| 2460 | | } |
| 2461 | | else |
| 2462 | | { |
| 2463 | | SET_ACCUM_L(s1, i); |
| 2464 | | CLEAR_CLIP2_FLAG(i); |
| 2465 | | } |
| 2466 | | } |
| 2467 | | } |
| 2468 | | m_vres[i] = ACCUM_L(i); |
| 2469 | | } |
| 2470 | | CLEAR_ZERO_FLAGS(); |
| 2471 | | CLEAR_CARRY_FLAGS(); |
| 2472 | | CLEAR_CLIP1_FLAGS(); |
| 2473 | | WRITEBACK_RESULT(); |
| 2474 | | } |
| 2475 | | |
| 2476 | | static void cfunc_vcl(void *param) |
| 2477 | | { |
| 2478 | | ((rsp_cop2 *)param)->vcl(); |
| 2479 | | } |
| 2480 | | |
| 2481 | | |
| 2482 | | // VCH |
| 2483 | | // |
| 2484 | | // 31 25 24 20 15 10 5 0 |
| 2485 | | // ------------------------------------------------------ |
| 2486 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100101 | |
| 2487 | | // ------------------------------------------------------ |
| 2488 | | // |
| 2489 | | // Vector clip high |
| 2490 | | |
| 2491 | | inline void rsp_cop2_drc::vch() |
| 2492 | | { |
| 2493 | | int op = m_op; |
| 2494 | | |
| 2495 | | CLEAR_CARRY_FLAGS(); |
| 2496 | | CLEAR_COMPARE_FLAGS(); |
| 2497 | | CLEAR_CLIP1_FLAGS(); |
| 2498 | | CLEAR_ZERO_FLAGS(); |
| 2499 | | CLEAR_CLIP2_FLAGS(); |
| 2500 | | |
| 2501 | | UINT32 vce = 0; |
| 2502 | | for (int i = 0; i < 8; i++) |
| 2503 | | { |
| 2504 | | INT16 s1, s2; |
| 2505 | | GET_VS1(s1, i); |
| 2506 | | GET_VS2(s2, i); |
| 2507 | | |
| 2508 | | if ((s1 ^ s2) < 0) |
| 2509 | | { |
| 2510 | | vce = (s1 + s2 == -1); |
| 2511 | | SET_CARRY_FLAG(i); |
| 2512 | | if (s2 < 0) |
| 2513 | | { |
| 2514 | | SET_CLIP2_FLAG(i); |
| 2515 | | } |
| 2516 | | |
| 2517 | | if ((s1 + s2) <= 0) |
| 2518 | | { |
| 2519 | | SET_COMPARE_FLAG(i); |
| 2520 | | m_vres[i] = -((UINT16)s2); |
| 2521 | | } |
| 2522 | | else |
| 2523 | | { |
| 2524 | | m_vres[i] = s1; |
| 2525 | | } |
| 2526 | | |
| 2527 | | if ((s1 + s2) != 0 && s1 != ~s2) |
| 2528 | | { |
| 2529 | | SET_ZERO_FLAG(i); |
| 2530 | | } |
| 2531 | | }//sign |
| 2532 | | else |
| 2533 | | { |
| 2534 | | vce = 0; |
| 2535 | | if (s2 < 0) |
| 2536 | | { |
| 2537 | | SET_COMPARE_FLAG(i); |
| 2538 | | } |
| 2539 | | if ((s1 - s2) >= 0) |
| 2540 | | { |
| 2541 | | SET_CLIP2_FLAG(i); |
| 2542 | | m_vres[i] = s2; |
| 2543 | | } |
| 2544 | | else |
| 2545 | | { |
| 2546 | | m_vres[i] = s1; |
| 2547 | | } |
| 2548 | | |
| 2549 | | if ((s1 - s2) != 0 && s1 != ~s2) |
| 2550 | | { |
| 2551 | | SET_ZERO_FLAG(i); |
| 2552 | | } |
| 2553 | | } |
| 2554 | | if (vce) |
| 2555 | | { |
| 2556 | | SET_CLIP1_FLAG(i); |
| 2557 | | } |
| 2558 | | SET_ACCUM_L(m_vres[i], i); |
| 2559 | | } |
| 2560 | | WRITEBACK_RESULT(); |
| 2561 | | } |
| 2562 | | |
| 2563 | | static void cfunc_vch(void *param) |
| 2564 | | { |
| 2565 | | ((rsp_cop2 *)param)->vch(); |
| 2566 | | } |
| 2567 | | |
| 2568 | | |
| 2569 | | // VCR |
| 2570 | | // |
| 2571 | | // 31 25 24 20 15 10 5 0 |
| 2572 | | // ------------------------------------------------------ |
| 2573 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100110 | |
| 2574 | | // ------------------------------------------------------ |
| 2575 | | // |
| 2576 | | // Vector clip reverse |
| 2577 | | |
| 2578 | | inline void rsp_cop2_drc::vcr() |
| 2579 | | { |
| 2580 | | int op = m_op; |
| 2581 | | |
| 2582 | | CLEAR_CARRY_FLAGS(); |
| 2583 | | CLEAR_COMPARE_FLAGS(); |
| 2584 | | CLEAR_CLIP1_FLAGS(); |
| 2585 | | CLEAR_ZERO_FLAGS(); |
| 2586 | | CLEAR_CLIP2_FLAGS(); |
| 2587 | | |
| 2588 | | for (int i = 0; i < 8; i++) |
| 2589 | | { |
| 2590 | | INT16 s1, s2; |
| 2591 | | GET_VS1(s1, i); |
| 2592 | | GET_VS2(s2, i); |
| 2593 | | |
| 2594 | | if ((INT16)(s1 ^ s2) < 0) |
| 2595 | | { |
| 2596 | | if (s2 < 0) |
| 2597 | | { |
| 2598 | | SET_CLIP2_FLAG(i); |
| 2599 | | } |
| 2600 | | if ((s1 + s2) <= 0) |
| 2601 | | { |
| 2602 | | SET_ACCUM_L(~((UINT16)s2), i); |
| 2603 | | SET_COMPARE_FLAG(i); |
| 2604 | | } |
| 2605 | | else |
| 2606 | | { |
| 2607 | | SET_ACCUM_L(s1, i); |
| 2608 | | } |
| 2609 | | } |
| 2610 | | else |
| 2611 | | { |
| 2612 | | if (s2 < 0) |
| 2613 | | { |
| 2614 | | SET_COMPARE_FLAG(i); |
| 2615 | | } |
| 2616 | | if ((s1 - s2) >= 0) |
| 2617 | | { |
| 2618 | | SET_ACCUM_L(s2, i); |
| 2619 | | SET_CLIP2_FLAG(i); |
| 2620 | | } |
| 2621 | | else |
| 2622 | | { |
| 2623 | | SET_ACCUM_L(s1, i); |
| 2624 | | } |
| 2625 | | } |
| 2626 | | |
| 2627 | | m_vres[i] = ACCUM_L(i); |
| 2628 | | } |
| 2629 | | WRITEBACK_RESULT(); |
| 2630 | | } |
| 2631 | | |
| 2632 | | static void cfunc_vcr(void *param) |
| 2633 | | { |
| 2634 | | ((rsp_cop2 *)param)->vcr(); |
| 2635 | | } |
| 2636 | | |
| 2637 | | |
| 2638 | | // VMRG |
| 2639 | | // |
| 2640 | | // 31 25 24 20 15 10 5 0 |
| 2641 | | // ------------------------------------------------------ |
| 2642 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100111 | |
| 2643 | | // ------------------------------------------------------ |
| 2644 | | // |
| 2645 | | // Merges two vectors according to compare flags |
| 2646 | | |
| 2647 | | inline void rsp_cop2_drc::vmrg() |
| 2648 | | { |
| 2649 | | int op = m_op; |
| 2650 | | |
| 2651 | | for (int i = 0; i < 8; i++) |
| 2652 | | { |
| 2653 | | INT16 s1, s2; |
| 2654 | | GET_VS1(s1, i); |
| 2655 | | GET_VS2(s2, i); |
| 2656 | | if (COMPARE_FLAG(i) != 0) |
| 2657 | | { |
| 2658 | | m_vres[i] = s1; |
| 2659 | | } |
| 2660 | | else |
| 2661 | | { |
| 2662 | | m_vres[i] = s2; |
| 2663 | | } |
| 2664 | | |
| 2665 | | SET_ACCUM_L(m_vres[i], i); |
| 2666 | | } |
| 2667 | | WRITEBACK_RESULT(); |
| 2668 | | } |
| 2669 | | |
| 2670 | | static void cfunc_vmrg(void *param) |
| 2671 | | { |
| 2672 | | ((rsp_cop2 *)param)->vmrg(); |
| 2673 | | } |
| 2674 | | |
| 2675 | | |
| 2676 | | // VAND |
| 2677 | | // |
| 2678 | | // 31 25 24 20 15 10 5 0 |
| 2679 | | // ------------------------------------------------------ |
| 2680 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101000 | |
| 2681 | | // ------------------------------------------------------ |
| 2682 | | // |
| 2683 | | // Bitwise AND of two vector registers |
| 2684 | | |
| 2685 | | inline void rsp_cop2_drc::vand() |
| 2686 | | { |
| 2687 | | int op = m_op; |
| 2688 | | |
| 2689 | | for (int i = 0; i < 8; i++) |
| 2690 | | { |
| 2691 | | UINT16 s1, s2; |
| 2692 | | GET_VS1(s1, i); |
| 2693 | | GET_VS2(s2, i); |
| 2694 | | m_vres[i] = s1 & s2; |
| 2695 | | SET_ACCUM_L(m_vres[i], i); |
| 2696 | | } |
| 2697 | | WRITEBACK_RESULT(); |
| 2698 | | } |
| 2699 | | |
| 2700 | | static void cfunc_vand(void *param) |
| 2701 | | { |
| 2702 | | ((rsp_cop2 *)param)->vand(); |
| 2703 | | } |
| 2704 | | |
| 2705 | | |
| 2706 | | // VNAND |
| 2707 | | // |
| 2708 | | // 31 25 24 20 15 10 5 0 |
| 2709 | | // ------------------------------------------------------ |
| 2710 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101001 | |
| 2711 | | // ------------------------------------------------------ |
| 2712 | | // |
| 2713 | | // Bitwise NOT AND of two vector registers |
| 2714 | | |
| 2715 | | inline void rsp_cop2_drc::vnand() |
| 2716 | | { |
| 2717 | | int op = m_op; |
| 2718 | | |
| 2719 | | for (int i = 0; i < 8; i++) |
| 2720 | | { |
| 2721 | | UINT16 s1, s2; |
| 2722 | | GET_VS1(s1, i); |
| 2723 | | GET_VS2(s2, i); |
| 2724 | | m_vres[i] = ~((s1 & s2)); |
| 2725 | | SET_ACCUM_L(m_vres[i], i); |
| 2726 | | } |
| 2727 | | WRITEBACK_RESULT(); |
| 2728 | | } |
| 2729 | | |
| 2730 | | static void cfunc_vnand(void *param) |
| 2731 | | { |
| 2732 | | ((rsp_cop2 *)param)->vnand(); |
| 2733 | | } |
| 2734 | | |
| 2735 | | |
| 2736 | | // VOR |
| 2737 | | // |
| 2738 | | // 31 25 24 20 15 10 5 0 |
| 2739 | | // ------------------------------------------------------ |
| 2740 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101010 | |
| 2741 | | // ------------------------------------------------------ |
| 2742 | | // |
| 2743 | | // Bitwise OR of two vector registers |
| 2744 | | |
| 2745 | | inline void rsp_cop2_drc::vor() |
| 2746 | | { |
| 2747 | | int op = m_op; |
| 2748 | | |
| 2749 | | for (int i = 0; i < 8; i++) |
| 2750 | | { |
| 2751 | | UINT16 s1, s2; |
| 2752 | | GET_VS1(s1, i); |
| 2753 | | GET_VS2(s2, i); |
| 2754 | | m_vres[i] = s1 | s2; |
| 2755 | | SET_ACCUM_L(m_vres[i], i); |
| 2756 | | } |
| 2757 | | WRITEBACK_RESULT(); |
| 2758 | | } |
| 2759 | | |
| 2760 | | static void cfunc_vor(void *param) |
| 2761 | | { |
| 2762 | | ((rsp_cop2 *)param)->vor(); |
| 2763 | | } |
| 2764 | | |
| 2765 | | |
| 2766 | | // VNOR |
| 2767 | | // |
| 2768 | | // 31 25 24 20 15 10 5 0 |
| 2769 | | // ------------------------------------------------------ |
| 2770 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101011 | |
| 2771 | | // ------------------------------------------------------ |
| 2772 | | // |
| 2773 | | // Bitwise NOT OR of two vector registers |
| 2774 | | |
| 2775 | | inline void rsp_cop2_drc::vnor() |
| 2776 | | { |
| 2777 | | int op = m_op; |
| 2778 | | |
| 2779 | | for (int i = 0; i < 8; i++) |
| 2780 | | { |
| 2781 | | UINT16 s1, s2; |
| 2782 | | GET_VS1(s1, i); |
| 2783 | | GET_VS2(s2, i); |
| 2784 | | m_vres[i] = ~(s1 | s2); |
| 2785 | | SET_ACCUM_L(m_vres[i], i); |
| 2786 | | } |
| 2787 | | WRITEBACK_RESULT(); |
| 2788 | | } |
| 2789 | | |
| 2790 | | static void cfunc_vnor(void *param) |
| 2791 | | { |
| 2792 | | ((rsp_cop2 *)param)->vnor(); |
| 2793 | | } |
| 2794 | | |
| 2795 | | |
| 2796 | | // VXOR |
| 2797 | | // |
| 2798 | | // 31 25 24 20 15 10 5 0 |
| 2799 | | // ------------------------------------------------------ |
| 2800 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101100 | |
| 2801 | | // ------------------------------------------------------ |
| 2802 | | // |
| 2803 | | // Bitwise XOR of two vector registers |
| 2804 | | |
| 2805 | | inline void rsp_cop2_drc::vxor() |
| 2806 | | { |
| 2807 | | int op = m_op; |
| 2808 | | |
| 2809 | | for (int i = 0; i < 8; i++) |
| 2810 | | { |
| 2811 | | UINT16 s1, s2; |
| 2812 | | GET_VS1(s1, i); |
| 2813 | | GET_VS2(s2, i); |
| 2814 | | m_vres[i] = s1 ^ s2; |
| 2815 | | SET_ACCUM_L(m_vres[i], i); |
| 2816 | | } |
| 2817 | | WRITEBACK_RESULT(); |
| 2818 | | } |
| 2819 | | |
| 2820 | | static void cfunc_vxor(void *param) |
| 2821 | | { |
| 2822 | | ((rsp_cop2 *)param)->vxor(); |
| 2823 | | } |
| 2824 | | |
| 2825 | | |
| 2826 | | // VNXOR |
| 2827 | | // |
| 2828 | | // 31 25 24 20 15 10 5 0 |
| 2829 | | // ------------------------------------------------------ |
| 2830 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101101 | |
| 2831 | | // ------------------------------------------------------ |
| 2832 | | // |
| 2833 | | // Bitwise NOT XOR of two vector registers |
| 2834 | | |
| 2835 | | inline void rsp_cop2_drc::vnxor() |
| 2836 | | { |
| 2837 | | int op = m_op; |
| 2838 | | |
| 2839 | | for (int i = 0; i < 8; i++) |
| 2840 | | { |
| 2841 | | UINT16 s1, s2; |
| 2842 | | GET_VS1(s1, i); |
| 2843 | | GET_VS2(s2, i); |
| 2844 | | m_vres[i] = ~(s1 ^ s2); |
| 2845 | | SET_ACCUM_L(m_vres[i], i); |
| 2846 | | } |
| 2847 | | WRITEBACK_RESULT(); |
| 2848 | | } |
| 2849 | | |
| 2850 | | static void cfunc_vnxor(void *param) |
| 2851 | | { |
| 2852 | | ((rsp_cop2 *)param)->vnxor(); |
| 2853 | | } |
| 2854 | | |
| 2855 | | |
| 2856 | | // VRCP |
| 2857 | | // |
| 2858 | | // 31 25 24 20 15 10 5 0 |
| 2859 | | // ------------------------------------------------------ |
| 2860 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110000 | |
| 2861 | | // ------------------------------------------------------ |
| 2862 | | // |
| 2863 | | // Calculates reciprocal |
| 2864 | | |
| 2865 | | inline void rsp_cop2_drc::vrcp() |
| 2866 | | { |
| 2867 | | int op = m_op; |
| 2868 | | |
| 2869 | | INT32 shifter = 0; |
| 2870 | | INT32 rec = (INT16)(VREG_S(VS2REG, EL & 7)); |
| 2871 | | INT32 datainput = (rec < 0) ? (-rec) : rec; |
| 2872 | | if (datainput) |
| 2873 | | { |
| 2874 | | for (int i = 0; i < 32; i++) |
| 2875 | | { |
| 2876 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 2877 | | { |
| 2878 | | shifter = i; |
| 2879 | | break; |
| 2880 | | } |
| 2881 | | } |
| 2882 | | } |
| 2883 | | else |
| 2884 | | { |
| 2885 | | shifter = 0x10; |
| 2886 | | } |
| 2887 | | |
| 2888 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2889 | | INT32 fetchval = rsp_divtable[address]; |
| 2890 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 2891 | | if (rec < 0) |
| 2892 | | { |
| 2893 | | temp = ~temp; |
| 2894 | | } |
| 2895 | | if (!rec) |
| 2896 | | { |
| 2897 | | temp = 0x7fffffff; |
| 2898 | | } |
| 2899 | | else if (rec == 0xffff8000) |
| 2900 | | { |
| 2901 | | temp = 0xffff0000; |
| 2902 | | } |
| 2903 | | rec = temp; |
| 2904 | | |
| 2905 | | m_reciprocal_res = rec; |
| 2906 | | m_dp_allowed = 0; |
| 2907 | | |
| 2908 | | W_VREG_S(VDREG, VS1REG & 7) = (UINT16)rec; |
| 2909 | | for (int i = 0; i < 8; i++) |
| 2910 | | { |
| 2911 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 2912 | | } |
| 2913 | | } |
| 2914 | | |
| 2915 | | static void cfunc_vrcp(void *param) |
| 2916 | | { |
| 2917 | | ((rsp_cop2 *)param)->vrcp(); |
| 2918 | | } |
| 2919 | | |
| 2920 | | |
| 2921 | | // VRCPL |
| 2922 | | // |
| 2923 | | // 31 25 24 20 15 10 5 0 |
| 2924 | | // ------------------------------------------------------ |
| 2925 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110001 | |
| 2926 | | // ------------------------------------------------------ |
| 2927 | | // |
| 2928 | | // Calculates reciprocal low part |
| 2929 | | |
| 2930 | | inline void rsp_cop2_drc::vrcpl() |
| 2931 | | { |
| 2932 | | int op = m_op; |
| 2933 | | |
| 2934 | | INT32 shifter = 0; |
| 2935 | | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 2936 | | INT32 datainput = rec; |
| 2937 | | |
| 2938 | | if (m_dp_allowed) |
| 2939 | | { |
| 2940 | | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 2941 | | datainput = rec; |
| 2942 | | |
| 2943 | | if (rec < 0) |
| 2944 | | { |
| 2945 | | if (rec < -32768) |
| 2946 | | { |
| 2947 | | datainput = ~datainput; |
| 2948 | | } |
| 2949 | | else |
| 2950 | | { |
| 2951 | | datainput = -datainput; |
| 2952 | | } |
| 2953 | | } |
| 2954 | | } |
| 2955 | | else if (datainput < 0) |
| 2956 | | { |
| 2957 | | datainput = -datainput; |
| 2958 | | |
| 2959 | | shifter = 0x10; |
| 2960 | | } |
| 2961 | | |
| 2962 | | if (datainput) |
| 2963 | | { |
| 2964 | | for (int i = 0; i < 32; i++) |
| 2965 | | { |
| 2966 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 2967 | | { |
| 2968 | | shifter = i; |
| 2969 | | break; |
| 2970 | | } |
| 2971 | | } |
| 2972 | | } |
| 2973 | | |
| 2974 | | UINT32 address = (datainput << shifter) >> 22; |
| 2975 | | INT32 fetchval = rsp_divtable[address & 0x1ff]; |
| 2976 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 2977 | | temp ^= rec >> 31; |
| 2978 | | |
| 2979 | | if (!rec) |
| 2980 | | { |
| 2981 | | temp = 0x7fffffff; |
| 2982 | | } |
| 2983 | | else if (rec == 0xffff8000) |
| 2984 | | { |
| 2985 | | temp = 0xffff0000; |
| 2986 | | } |
| 2987 | | rec = temp; |
| 2988 | | |
| 2989 | | m_reciprocal_res = rec; |
| 2990 | | m_dp_allowed = 0; |
| 2991 | | |
| 2992 | | W_VREG_S(VDREG, VS1REG & 7) = (UINT16)rec; |
| 2993 | | |
| 2994 | | for (int i = 0; i < 8; i++) |
| 2995 | | { |
| 2996 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 2997 | | } |
| 2998 | | } |
| 2999 | | |
| 3000 | | static void cfunc_vrcpl(void *param) |
| 3001 | | { |
| 3002 | | ((rsp_cop2 *)param)->vrcpl(); |
| 3003 | | } |
| 3004 | | |
| 3005 | | |
| 3006 | | // VRCPH |
| 3007 | | // |
| 3008 | | // 31 25 24 20 15 10 5 0 |
| 3009 | | // ------------------------------------------------------ |
| 3010 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110010 | |
| 3011 | | // ------------------------------------------------------ |
| 3012 | | // |
| 3013 | | // Calculates reciprocal high part |
| 3014 | | |
| 3015 | | inline void rsp_cop2_drc::vrcph() |
| 3016 | | { |
| 3017 | | int op = m_op; |
| 3018 | | |
| 3019 | | m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16; |
| 3020 | | m_dp_allowed = 1; |
| 3021 | | |
| 3022 | | for (int i = 0; i < 8; i++) |
| 3023 | | { |
| 3024 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 3025 | | } |
| 3026 | | |
| 3027 | | W_VREG_S(VDREG, VS1REG & 7) = (INT16)(m_reciprocal_res >> 16); |
| 3028 | | } |
| 3029 | | |
| 3030 | | static void cfunc_vrcph(void *param) |
| 3031 | | { |
| 3032 | | ((rsp_cop2 *)param)->vrcph(); |
| 3033 | | } |
| 3034 | | |
| 3035 | | |
| 3036 | | // VMOV |
| 3037 | | // |
| 3038 | | // 31 25 24 20 15 10 5 0 |
| 3039 | | // ------------------------------------------------------ |
| 3040 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110011 | |
| 3041 | | // ------------------------------------------------------ |
| 3042 | | // |
| 3043 | | // Moves element from vector to destination vector |
| 3044 | | |
| 3045 | | inline void rsp_cop2_drc::vmov() |
| 3046 | | { |
| 3047 | | int op = m_op; |
| 3048 | | |
| 3049 | | W_VREG_S(VDREG, VS1REG & 7) = VREG_S(VS2REG, EL & 7); |
| 3050 | | for (int i = 0; i < 8; i++) |
| 3051 | | { |
| 3052 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 3053 | | } |
| 3054 | | } |
| 3055 | | |
| 3056 | | static void cfunc_vmov(void *param) |
| 3057 | | { |
| 3058 | | ((rsp_cop2 *)param)->vmov(); |
| 3059 | | } |
| 3060 | | |
| 3061 | | |
| 3062 | | // VRSQ |
| 3063 | | // |
| 3064 | | // 31 25 24 20 15 10 5 0 |
| 3065 | | // ------------------------------------------------------ |
| 3066 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110100 | |
| 3067 | | // ------------------------------------------------------ |
| 3068 | | // |
| 3069 | | // Calculates reciprocal square-root |
| 3070 | | |
| 3071 | | inline void rsp_cop2_drc::vrsq() |
| 3072 | | { |
| 3073 | | int op = m_op; |
| 3074 | | |
| 3075 | | INT32 shifter = 0; |
| 3076 | | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 3077 | | INT32 datainput = (rec < 0) ? (-rec) : (rec); |
| 3078 | | |
| 3079 | | if (rec < 0) |
| 3080 | | { |
| 3081 | | if (rec < -32768) |
| 3082 | | { |
| 3083 | | datainput = ~datainput; |
| 3084 | | } |
| 3085 | | else |
| 3086 | | { |
| 3087 | | datainput = -datainput; |
| 3088 | | } |
| 3089 | | } |
| 3090 | | |
| 3091 | | if (datainput) |
| 3092 | | { |
| 3093 | | for (int i = 0; i < 32; i++) |
| 3094 | | { |
| 3095 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 3096 | | { |
| 3097 | | shifter = i; |
| 3098 | | break; |
| 3099 | | } |
| 3100 | | } |
| 3101 | | } |
| 3102 | | else |
| 3103 | | { |
| 3104 | | shifter = 0; |
| 3105 | | } |
| 3106 | | |
| 3107 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 3108 | | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 3109 | | |
| 3110 | | INT32 fetchval = rsp_divtable[address]; |
| 3111 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 3112 | | if (rec < 0) |
| 3113 | | { |
| 3114 | | temp = ~temp; |
| 3115 | | } |
| 3116 | | if (!rec) |
| 3117 | | { |
| 3118 | | temp = 0x7fffffff; |
| 3119 | | } |
| 3120 | | else if (rec == 0xffff8000) |
| 3121 | | { |
| 3122 | | temp = 0xffff0000; |
| 3123 | | } |
| 3124 | | rec = temp; |
| 3125 | | |
| 3126 | | if (rec < 0) |
| 3127 | | { |
| 3128 | | if (m_dp_allowed) |
| 3129 | | { |
| 3130 | | if (rec < -32768) |
| 3131 | | { |
| 3132 | | datainput = ~datainput; |
| 3133 | | } |
| 3134 | | else |
| 3135 | | { |
| 3136 | | datainput = -datainput; |
| 3137 | | } |
| 3138 | | } |
| 3139 | | else |
| 3140 | | { |
| 3141 | | datainput = -datainput; |
| 3142 | | } |
| 3143 | | } |
| 3144 | | |
| 3145 | | if (datainput) |
| 3146 | | { |
| 3147 | | for (int i = 0; i < 32; i++) |
| 3148 | | { |
| 3149 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 3150 | | { |
| 3151 | | shifter = i; |
| 3152 | | break; |
| 3153 | | } |
| 3154 | | } |
| 3155 | | } |
| 3156 | | else |
| 3157 | | { |
| 3158 | | shifter = 0; |
| 3159 | | } |
| 3160 | | |
| 3161 | | address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 3162 | | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 3163 | | |
| 3164 | | fetchval = rsp_divtable[address]; |
| 3165 | | temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 3166 | | if (rec < 0) |
| 3167 | | { |
| 3168 | | temp = ~temp; |
| 3169 | | } |
| 3170 | | if (!rec) |
| 3171 | | { |
| 3172 | | temp = 0x7fff; |
| 3173 | | } |
| 3174 | | else if (rec == 0xffff8000) |
| 3175 | | { |
| 3176 | | temp = 0x0000; |
| 3177 | | } |
| 3178 | | rec = temp; |
| 3179 | | |
| 3180 | | W_VREG_S(VDREG, VS1REG & 7) = (UINT16)rec; |
| 3181 | | for (int i = 0; i < 8; i++) |
| 3182 | | { |
| 3183 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 3184 | | } |
| 3185 | | } |
| 3186 | | |
| 3187 | | static void cfunc_vrsq(void *param) |
| 3188 | | { |
| 3189 | | ((rsp_cop2 *)param)->vrsq(); |
| 3190 | | } |
| 3191 | | |
| 3192 | | |
| 3193 | | // VRSQL |
| 3194 | | // |
| 3195 | | // 31 25 24 20 15 10 5 0 |
| 3196 | | // ------------------------------------------------------ |
| 3197 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110101 | |
| 3198 | | // ------------------------------------------------------ |
| 3199 | | // |
| 3200 | | // Calculates reciprocal square-root low part |
| 3201 | | |
| 3202 | | inline void rsp_cop2_drc::vrsql() |
| 3203 | | { |
| 3204 | | int op = m_op; |
| 3205 | | |
| 3206 | | INT32 shifter = 0; |
| 3207 | | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 3208 | | INT32 datainput = rec; |
| 3209 | | |
| 3210 | | if (m_dp_allowed) |
| 3211 | | { |
| 3212 | | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 3213 | | datainput = rec; |
| 3214 | | |
| 3215 | | if (rec < 0) |
| 3216 | | { |
| 3217 | | if (rec < -32768) |
| 3218 | | { |
| 3219 | | datainput = ~datainput; |
| 3220 | | } |
| 3221 | | else |
| 3222 | | { |
| 3223 | | datainput = -datainput; |
| 3224 | | } |
| 3225 | | } |
| 3226 | | } |
| 3227 | | else if (datainput < 0) |
| 3228 | | { |
| 3229 | | datainput = -datainput; |
| 3230 | | |
| 3231 | | shifter = 0x10; |
| 3232 | | } |
| 3233 | | |
| 3234 | | if (datainput) |
| 3235 | | { |
| 3236 | | for (int i = 0; i < 32; i++) |
| 3237 | | { |
| 3238 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 3239 | | { |
| 3240 | | shifter = i; |
| 3241 | | break; |
| 3242 | | } |
| 3243 | | } |
| 3244 | | } |
| 3245 | | |
| 3246 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 3247 | | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 3248 | | |
| 3249 | | INT32 fetchval = rsp_divtable[address]; |
| 3250 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 3251 | | temp ^= rec >> 31; |
| 3252 | | |
| 3253 | | if (!rec) |
| 3254 | | { |
| 3255 | | temp = 0x7fffffff; |
| 3256 | | } |
| 3257 | | else if (rec == 0xffff8000) |
| 3258 | | { |
| 3259 | | temp = 0xffff0000; |
| 3260 | | } |
| 3261 | | rec = temp; |
| 3262 | | |
| 3263 | | m_reciprocal_res = rec; |
| 3264 | | m_dp_allowed = 0; |
| 3265 | | |
| 3266 | | W_VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 3267 | | for (int i = 0; i < 8; i++) |
| 3268 | | { |
| 3269 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 3270 | | } |
| 3271 | | } |
| 3272 | | |
| 3273 | | static void cfunc_vrsql(void *param) |
| 3274 | | { |
| 3275 | | ((rsp_cop2 *)param)->vrsql(); |
| 3276 | | } |
| 3277 | | |
| 3278 | | |
| 3279 | | // VRSQH |
| 3280 | | // |
| 3281 | | // 31 25 24 20 15 10 5 0 |
| 3282 | | // ------------------------------------------------------ |
| 3283 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110110 | |
| 3284 | | // ------------------------------------------------------ |
| 3285 | | // |
| 3286 | | // Calculates reciprocal square-root high part |
| 3287 | | |
| 3288 | | inline void rsp_cop2_drc::vrsqh() |
| 3289 | | { |
| 3290 | | int op = m_op; |
| 3291 | | |
| 3292 | | m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16; |
| 3293 | | m_dp_allowed = 1; |
| 3294 | | |
| 3295 | | for (int i = 0; i < 8; i++) |
| 3296 | | { |
| 3297 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 3298 | | } |
| 3299 | | |
| 3300 | | W_VREG_S(VDREG, VS1REG & 7) = (INT16)(m_reciprocal_res >> 16); // store high part |
| 3301 | | } |
| 3302 | | |
| 3303 | | static void cfunc_vrsqh(void *param) |
| 3304 | | { |
| 3305 | | ((rsp_cop2 *)param)->vrsqh(); |
| 3306 | | } |
| 3307 | | |
| 3308 | | |
| 3309 | | /*------------------------------------------------- |
| 3310 | | generate_vector_opcode - generate code for a |
| 3311 | | vector opcode |
| 3312 | | -------------------------------------------------*/ |
| 3313 | | |
| 3314 | | int rsp_cop2_drc::generate_vector_opcode(drcuml_block *block, rsp_device::compiler_state *compiler, const opcode_desc *desc) |
| 3315 | | { |
| 3316 | | UINT32 op = desc->opptr.l[0]; |
| 3317 | | // Opcode legend: |
| 3318 | | // E = VS2 element type |
| 3319 | | // S = VS1, Source vector 1 |
| 3320 | | // T = VS2, Source vector 2 |
| 3321 | | // D = Destination vector |
| 3322 | | |
| 3323 | | switch (op & 0x3f) |
| 3324 | | { |
| 3325 | | case 0x00: /* VMULF */ |
| 3326 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3327 | | UML_CALLC(block, cfunc_vmulf, this); |
| 3328 | | return TRUE; |
| 3329 | | |
| 3330 | | case 0x01: /* VMULU */ |
| 3331 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3332 | | UML_CALLC(block, cfunc_vmulu, this); |
| 3333 | | return TRUE; |
| 3334 | | |
| 3335 | | case 0x04: /* VMUDL */ |
| 3336 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3337 | | UML_CALLC(block, cfunc_vmudl, this); |
| 3338 | | return TRUE; |
| 3339 | | |
| 3340 | | case 0x05: /* VMUDM */ |
| 3341 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3342 | | UML_CALLC(block, cfunc_vmudm, this); |
| 3343 | | return TRUE; |
| 3344 | | |
| 3345 | | case 0x06: /* VMUDN */ |
| 3346 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3347 | | UML_CALLC(block, cfunc_vmudn, this); |
| 3348 | | return TRUE; |
| 3349 | | |
| 3350 | | case 0x07: /* VMUDH */ |
| 3351 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3352 | | UML_CALLC(block, cfunc_vmudh, this); |
| 3353 | | return TRUE; |
| 3354 | | |
| 3355 | | case 0x08: /* VMACF */ |
| 3356 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3357 | | UML_CALLC(block, cfunc_vmacf, this); |
| 3358 | | return TRUE; |
| 3359 | | |
| 3360 | | case 0x09: /* VMACU */ |
| 3361 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3362 | | UML_CALLC(block, cfunc_vmacu, this); |
| 3363 | | return TRUE; |
| 3364 | | |
| 3365 | | case 0x0c: /* VMADL */ |
| 3366 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3367 | | UML_CALLC(block, cfunc_vmadl, this); |
| 3368 | | return TRUE; |
| 3369 | | |
| 3370 | | case 0x0d: /* VMADM */ |
| 3371 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3372 | | UML_CALLC(block, cfunc_vmadm, this); |
| 3373 | | return TRUE; |
| 3374 | | |
| 3375 | | case 0x0e: /* VMADN */ |
| 3376 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3377 | | UML_CALLC(block, cfunc_vmadn, this); |
| 3378 | | return TRUE; |
| 3379 | | |
| 3380 | | case 0x0f: /* VMADH */ |
| 3381 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3382 | | UML_CALLC(block, cfunc_vmadh, this); |
| 3383 | | return TRUE; |
| 3384 | | |
| 3385 | | case 0x10: /* VADD */ |
| 3386 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3387 | | UML_CALLC(block, cfunc_vadd, this); |
| 3388 | | return TRUE; |
| 3389 | | |
| 3390 | | case 0x11: /* VSUB */ |
| 3391 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3392 | | UML_CALLC(block, cfunc_vsub, this); |
| 3393 | | return TRUE; |
| 3394 | | |
| 3395 | | case 0x13: /* VABS */ |
| 3396 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3397 | | UML_CALLC(block, cfunc_vabs, this); |
| 3398 | | return TRUE; |
| 3399 | | |
| 3400 | | case 0x14: /* VADDC */ |
| 3401 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3402 | | UML_CALLC(block, cfunc_vaddc, this); |
| 3403 | | return TRUE; |
| 3404 | | |
| 3405 | | case 0x15: /* VSUBC */ |
| 3406 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3407 | | UML_CALLC(block, cfunc_vsubc, this); |
| 3408 | | return TRUE; |
| 3409 | | |
| 3410 | | case 0x16: /* VADDB */ |
| 3411 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3412 | | UML_CALLC(block, cfunc_vaddb, this); |
| 3413 | | return TRUE; |
| 3414 | | |
| 3415 | | case 0x17: /* VSUBB (reserved, functionally identical to VADDB) */ |
| 3416 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3417 | | UML_CALLC(block, cfunc_vaddb, this); |
| 3418 | | return TRUE; |
| 3419 | | |
| 3420 | | case 0x18: /* VACCB (reserved, functionally identical to VADDB) */ |
| 3421 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3422 | | UML_CALLC(block, cfunc_vaddb, this); |
| 3423 | | return TRUE; |
| 3424 | | |
| 3425 | | case 0x19: /* VSUCB (reserved, functionally identical to VADDB) */ |
| 3426 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3427 | | UML_CALLC(block, cfunc_vaddb, this); |
| 3428 | | return TRUE; |
| 3429 | | |
| 3430 | | case 0x1d: /* VSAW */ |
| 3431 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3432 | | UML_CALLC(block, cfunc_vsaw, this); |
| 3433 | | return TRUE; |
| 3434 | | |
| 3435 | | case 0x20: /* VLT */ |
| 3436 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3437 | | UML_CALLC(block, cfunc_vlt, this); |
| 3438 | | return TRUE; |
| 3439 | | |
| 3440 | | case 0x21: /* VEQ */ |
| 3441 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3442 | | UML_CALLC(block, cfunc_veq, this); |
| 3443 | | return TRUE; |
| 3444 | | |
| 3445 | | case 0x22: /* VNE */ |
| 3446 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3447 | | UML_CALLC(block, cfunc_vne, this); |
| 3448 | | return TRUE; |
| 3449 | | |
| 3450 | | case 0x23: /* VGE */ |
| 3451 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3452 | | UML_CALLC(block, cfunc_vge, this); |
| 3453 | | return TRUE; |
| 3454 | | |
| 3455 | | case 0x24: /* VCL */ |
| 3456 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3457 | | UML_CALLC(block, cfunc_vcl, this); |
| 3458 | | return TRUE; |
| 3459 | | |
| 3460 | | case 0x25: /* VCH */ |
| 3461 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3462 | | UML_CALLC(block, cfunc_vch, this); |
| 3463 | | return TRUE; |
| 3464 | | |
| 3465 | | case 0x26: /* VCR */ |
| 3466 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3467 | | UML_CALLC(block, cfunc_vcr, this); |
| 3468 | | return TRUE; |
| 3469 | | |
| 3470 | | case 0x27: /* VMRG */ |
| 3471 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3472 | | UML_CALLC(block, cfunc_vmrg, this); |
| 3473 | | return TRUE; |
| 3474 | | |
| 3475 | | case 0x28: /* VAND */ |
| 3476 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3477 | | UML_CALLC(block, cfunc_vand, this); |
| 3478 | | return TRUE; |
| 3479 | | |
| 3480 | | case 0x29: /* VNAND */ |
| 3481 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3482 | | UML_CALLC(block, cfunc_vnand, this); |
| 3483 | | return TRUE; |
| 3484 | | |
| 3485 | | case 0x2a: /* VOR */ |
| 3486 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3487 | | UML_CALLC(block, cfunc_vor, this); |
| 3488 | | return TRUE; |
| 3489 | | |
| 3490 | | case 0x2b: /* VNOR */ |
| 3491 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3492 | | UML_CALLC(block, cfunc_vnor, this); |
| 3493 | | return TRUE; |
| 3494 | | |
| 3495 | | case 0x2c: /* VXOR */ |
| 3496 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3497 | | UML_CALLC(block, cfunc_vxor, this); |
| 3498 | | return TRUE; |
| 3499 | | |
| 3500 | | case 0x2d: /* VNXOR */ |
| 3501 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3502 | | UML_CALLC(block, cfunc_vnxor, this); |
| 3503 | | return TRUE; |
| 3504 | | |
| 3505 | | case 0x30: /* VRCP */ |
| 3506 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3507 | | UML_CALLC(block, cfunc_vrcp, this); |
| 3508 | | return TRUE; |
| 3509 | | |
| 3510 | | case 0x31: /* VRCPL */ |
| 3511 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3512 | | UML_CALLC(block, cfunc_vrcpl, this); |
| 3513 | | return TRUE; |
| 3514 | | |
| 3515 | | case 0x32: /* VRCPH */ |
| 3516 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3517 | | UML_CALLC(block, cfunc_vrcph, this); |
| 3518 | | return TRUE; |
| 3519 | | |
| 3520 | | case 0x33: /* VMOV */ |
| 3521 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3522 | | UML_CALLC(block, cfunc_vmov, this); |
| 3523 | | return TRUE; |
| 3524 | | |
| 3525 | | case 0x34: /* VRSQ */ |
| 3526 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3527 | | UML_CALLC(block, cfunc_vrsq, this); |
| 3528 | | return TRUE; |
| 3529 | | |
| 3530 | | case 0x35: /* VRSQL */ |
| 3531 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3532 | | UML_CALLC(block, cfunc_vrsql, this); |
| 3533 | | return TRUE; |
| 3534 | | |
| 3535 | | case 0x36: /* VRSQH */ |
| 3536 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3537 | | UML_CALLC(block, cfunc_vrsqh, this); |
| 3538 | | return TRUE; |
| 3539 | | |
| 3540 | | case 0x37: /* VNOP */ |
| 3541 | | case 0x3F: /* VNULL */ |
| 3542 | | return TRUE; |
| 3543 | | |
| 3544 | | default: |
| 3545 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3546 | | UML_CALLC(block, unimplemented_opcode, &m_rsp); |
| 3547 | | return FALSE; |
| 3548 | | } |
| 3549 | | } |
| 3550 | | |
| 3551 | | |
| 3552 | | /*************************************************************************** |
| 3553 | | Vector Flag Reading/Writing |
| 3554 | | ***************************************************************************/ |
| 3555 | | |
| 3556 | | inline void rsp_cop2_drc::mfc2() |
| 3557 | | { |
| 3558 | | UINT32 op = m_op; |
| 3559 | | int el = (op >> 7) & 0xf; |
| 3560 | | |
| 3561 | | UINT16 b1 = VREG_B(VS1REG, (el+0) & 0xf); |
| 3562 | | UINT16 b2 = VREG_B(VS1REG, (el+1) & 0xf); |
| 3563 | | if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2)); |
| 3564 | | } |
| 3565 | | |
| 3566 | | static void cfunc_mfc2(void *param) |
| 3567 | | { |
| 3568 | | ((rsp_cop2 *)param)->mfc2(); |
| 3569 | | } |
| 3570 | | |
| 3571 | | inline void rsp_cop2_drc::cfc2() |
| 3572 | | { |
| 3573 | | UINT32 op = m_op; |
| 3574 | | if (RTREG) |
| 3575 | | { |
| 3576 | | switch(RDREG) |
| 3577 | | { |
| 3578 | | case 0: |
| 3579 | | RTVAL = ((CARRY_FLAG(0) & 1) << 0) | |
| 3580 | | ((CARRY_FLAG(1) & 1) << 1) | |
| 3581 | | ((CARRY_FLAG(2) & 1) << 2) | |
| 3582 | | ((CARRY_FLAG(3) & 1) << 3) | |
| 3583 | | ((CARRY_FLAG(4) & 1) << 4) | |
| 3584 | | ((CARRY_FLAG(5) & 1) << 5) | |
| 3585 | | ((CARRY_FLAG(6) & 1) << 6) | |
| 3586 | | ((CARRY_FLAG(7) & 1) << 7) | |
| 3587 | | ((ZERO_FLAG(0) & 1) << 8) | |
| 3588 | | ((ZERO_FLAG(1) & 1) << 9) | |
| 3589 | | ((ZERO_FLAG(2) & 1) << 10) | |
| 3590 | | ((ZERO_FLAG(3) & 1) << 11) | |
| 3591 | | ((ZERO_FLAG(4) & 1) << 12) | |
| 3592 | | ((ZERO_FLAG(5) & 1) << 13) | |
| 3593 | | ((ZERO_FLAG(6) & 1) << 14) | |
| 3594 | | ((ZERO_FLAG(7) & 1) << 15); |
| 3595 | | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 3596 | | break; |
| 3597 | | case 1: |
| 3598 | | RTVAL = ((COMPARE_FLAG(0) & 1) << 0) | |
| 3599 | | ((COMPARE_FLAG(1) & 1) << 1) | |
| 3600 | | ((COMPARE_FLAG(2) & 1) << 2) | |
| 3601 | | ((COMPARE_FLAG(3) & 1) << 3) | |
| 3602 | | ((COMPARE_FLAG(4) & 1) << 4) | |
| 3603 | | ((COMPARE_FLAG(5) & 1) << 5) | |
| 3604 | | ((COMPARE_FLAG(6) & 1) << 6) | |
| 3605 | | ((COMPARE_FLAG(7) & 1) << 7) | |
| 3606 | | ((CLIP2_FLAG(0) & 1) << 8) | |
| 3607 | | ((CLIP2_FLAG(1) & 1) << 9) | |
| 3608 | | ((CLIP2_FLAG(2) & 1) << 10) | |
| 3609 | | ((CLIP2_FLAG(3) & 1) << 11) | |
| 3610 | | ((CLIP2_FLAG(4) & 1) << 12) | |
| 3611 | | ((CLIP2_FLAG(5) & 1) << 13) | |
| 3612 | | ((CLIP2_FLAG(6) & 1) << 14) | |
| 3613 | | ((CLIP2_FLAG(7) & 1) << 15); |
| 3614 | | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 3615 | | break; |
| 3616 | | case 2: |
| 3617 | | RTVAL = ((CLIP1_FLAG(0) & 1) << 0) | |
| 3618 | | ((CLIP1_FLAG(1) & 1) << 1) | |
| 3619 | | ((CLIP1_FLAG(2) & 1) << 2) | |
| 3620 | | ((CLIP1_FLAG(3) & 1) << 3) | |
| 3621 | | ((CLIP1_FLAG(4) & 1) << 4) | |
| 3622 | | ((CLIP1_FLAG(5) & 1) << 5) | |
| 3623 | | ((CLIP1_FLAG(6) & 1) << 6) | |
| 3624 | | ((CLIP1_FLAG(7) & 1) << 7); |
| 3625 | | break; |
| 3626 | | } |
| 3627 | | } |
| 3628 | | } |
| 3629 | | |
| 3630 | | static void cfunc_cfc2(void *param) |
| 3631 | | { |
| 3632 | | ((rsp_cop2 *)param)->cfc2(); |
| 3633 | | } |
| 3634 | | |
| 3635 | | |
| 3636 | | inline void rsp_cop2_drc::mtc2() |
| 3637 | | { |
| 3638 | | UINT32 op = m_op; |
| 3639 | | int el = (op >> 7) & 0xf; |
| 3640 | | VREG_B(VS1REG, (el+0) & 0xf) = (RTVAL >> 8) & 0xff; |
| 3641 | | VREG_B(VS1REG, (el+1) & 0xf) = (RTVAL >> 0) & 0xff; |
| 3642 | | } |
| 3643 | | |
| 3644 | | static void cfunc_mtc2(void *param) |
| 3645 | | { |
| 3646 | | ((rsp_cop2 *)param)->mtc2(); |
| 3647 | | } |
| 3648 | | |
| 3649 | | |
| 3650 | | inline void rsp_cop2_drc::ctc2() |
| 3651 | | { |
| 3652 | | UINT32 op = m_op; |
| 3653 | | switch(RDREG) |
| 3654 | | { |
| 3655 | | case 0: |
| 3656 | | CLEAR_CARRY_FLAGS(); |
| 3657 | | CLEAR_ZERO_FLAGS(); |
| 3658 | | m_vflag[0][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 3659 | | m_vflag[0][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 3660 | | m_vflag[0][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 3661 | | m_vflag[0][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 3662 | | m_vflag[0][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 3663 | | m_vflag[0][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 3664 | | m_vflag[0][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 3665 | | m_vflag[0][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 3666 | | if (RTVAL & (1 << 0)) { SET_CARRY_FLAG(0); } |
| 3667 | | if (RTVAL & (1 << 1)) { SET_CARRY_FLAG(1); } |
| 3668 | | if (RTVAL & (1 << 2)) { SET_CARRY_FLAG(2); } |
| 3669 | | if (RTVAL & (1 << 3)) { SET_CARRY_FLAG(3); } |
| 3670 | | if (RTVAL & (1 << 4)) { SET_CARRY_FLAG(4); } |
| 3671 | | if (RTVAL & (1 << 5)) { SET_CARRY_FLAG(5); } |
| 3672 | | if (RTVAL & (1 << 6)) { SET_CARRY_FLAG(6); } |
| 3673 | | if (RTVAL & (1 << 7)) { SET_CARRY_FLAG(7); } |
| 3674 | | m_vflag[3][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 3675 | | m_vflag[3][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 3676 | | m_vflag[3][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 3677 | | m_vflag[3][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 3678 | | m_vflag[3][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 3679 | | m_vflag[3][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 3680 | | m_vflag[3][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 3681 | | m_vflag[3][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 3682 | | if (RTVAL & (1 << 8)) { SET_ZERO_FLAG(0); } |
| 3683 | | if (RTVAL & (1 << 9)) { SET_ZERO_FLAG(1); } |
| 3684 | | if (RTVAL & (1 << 10)) { SET_ZERO_FLAG(2); } |
| 3685 | | if (RTVAL & (1 << 11)) { SET_ZERO_FLAG(3); } |
| 3686 | | if (RTVAL & (1 << 12)) { SET_ZERO_FLAG(4); } |
| 3687 | | if (RTVAL & (1 << 13)) { SET_ZERO_FLAG(5); } |
| 3688 | | if (RTVAL & (1 << 14)) { SET_ZERO_FLAG(6); } |
| 3689 | | if (RTVAL & (1 << 15)) { SET_ZERO_FLAG(7); } |
| 3690 | | break; |
| 3691 | | case 1: |
| 3692 | | CLEAR_COMPARE_FLAGS(); |
| 3693 | | CLEAR_CLIP2_FLAGS(); |
| 3694 | | m_vflag[1][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 3695 | | m_vflag[1][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 3696 | | m_vflag[1][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 3697 | | m_vflag[1][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 3698 | | m_vflag[1][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 3699 | | m_vflag[1][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 3700 | | m_vflag[1][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 3701 | | m_vflag[1][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 3702 | | if (RTVAL & (1 << 0)) { SET_COMPARE_FLAG(0); } |
| 3703 | | if (RTVAL & (1 << 1)) { SET_COMPARE_FLAG(1); } |
| 3704 | | if (RTVAL & (1 << 2)) { SET_COMPARE_FLAG(2); } |
| 3705 | | if (RTVAL & (1 << 3)) { SET_COMPARE_FLAG(3); } |
| 3706 | | if (RTVAL & (1 << 4)) { SET_COMPARE_FLAG(4); } |
| 3707 | | if (RTVAL & (1 << 5)) { SET_COMPARE_FLAG(5); } |
| 3708 | | if (RTVAL & (1 << 6)) { SET_COMPARE_FLAG(6); } |
| 3709 | | if (RTVAL & (1 << 7)) { SET_COMPARE_FLAG(7); } |
| 3710 | | m_vflag[4][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 3711 | | m_vflag[4][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 3712 | | m_vflag[4][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 3713 | | m_vflag[4][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 3714 | | m_vflag[4][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 3715 | | m_vflag[4][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 3716 | | m_vflag[4][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 3717 | | m_vflag[4][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 3718 | | if (RTVAL & (1 << 8)) { SET_CLIP2_FLAG(0); } |
| 3719 | | if (RTVAL & (1 << 9)) { SET_CLIP2_FLAG(1); } |
| 3720 | | if (RTVAL & (1 << 10)) { SET_CLIP2_FLAG(2); } |
| 3721 | | if (RTVAL & (1 << 11)) { SET_CLIP2_FLAG(3); } |
| 3722 | | if (RTVAL & (1 << 12)) { SET_CLIP2_FLAG(4); } |
| 3723 | | if (RTVAL & (1 << 13)) { SET_CLIP2_FLAG(5); } |
| 3724 | | if (RTVAL & (1 << 14)) { SET_CLIP2_FLAG(6); } |
| 3725 | | if (RTVAL & (1 << 15)) { SET_CLIP2_FLAG(7); } |
| 3726 | | break; |
| 3727 | | case 2: |
| 3728 | | CLEAR_CLIP1_FLAGS(); |
| 3729 | | m_vflag[2][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 3730 | | m_vflag[2][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 3731 | | m_vflag[2][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 3732 | | m_vflag[2][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 3733 | | m_vflag[2][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 3734 | | m_vflag[2][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 3735 | | m_vflag[2][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 3736 | | m_vflag[2][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 3737 | | if (RTVAL & (1 << 0)) { SET_CLIP1_FLAG(0); } |
| 3738 | | if (RTVAL & (1 << 1)) { SET_CLIP1_FLAG(1); } |
| 3739 | | if (RTVAL & (1 << 2)) { SET_CLIP1_FLAG(2); } |
| 3740 | | if (RTVAL & (1 << 3)) { SET_CLIP1_FLAG(3); } |
| 3741 | | if (RTVAL & (1 << 4)) { SET_CLIP1_FLAG(4); } |
| 3742 | | if (RTVAL & (1 << 5)) { SET_CLIP1_FLAG(5); } |
| 3743 | | if (RTVAL & (1 << 6)) { SET_CLIP1_FLAG(6); } |
| 3744 | | if (RTVAL & (1 << 7)) { SET_CLIP1_FLAG(7); } |
| 3745 | | break; |
| 3746 | | } |
| 3747 | | } |
| 3748 | | |
| 3749 | | static void cfunc_ctc2(void *param) |
| 3750 | | { |
| 3751 | | ((rsp_cop2 *)param)->ctc2(); |
| 3752 | | } |
| 3753 | | |
| 3754 | | /*************************************************************************** |
| 3755 | | COP2 Opcode Compilation |
| 3756 | | ***************************************************************************/ |
| 3757 | | |
| 3758 | | int rsp_cop2_drc::generate_cop2(drcuml_block *block, rsp_device::compiler_state *compiler, const opcode_desc *desc) |
| 3759 | | { |
| 3760 | | UINT32 op = desc->opptr.l[0]; |
| 3761 | | UINT8 opswitch = RSREG; |
| 3762 | | |
| 3763 | | switch (opswitch) |
| 3764 | | { |
| 3765 | | case 0x00: /* MFCz */ |
| 3766 | | if (RTREG != 0) |
| 3767 | | { |
| 3768 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3769 | | UML_CALLC(block, cfunc_mfc2, this); // callc mfc2 |
| 3770 | | } |
| 3771 | | return TRUE; |
| 3772 | | |
| 3773 | | case 0x02: /* CFCz */ |
| 3774 | | if (RTREG != 0) |
| 3775 | | { |
| 3776 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3777 | | UML_CALLC(block, cfunc_cfc2, this); // callc cfc2 |
| 3778 | | } |
| 3779 | | return TRUE; |
| 3780 | | |
| 3781 | | case 0x04: /* MTCz */ |
| 3782 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3783 | | UML_CALLC(block, cfunc_mtc2, this); // callc mtc2 |
| 3784 | | return TRUE; |
| 3785 | | |
| 3786 | | case 0x06: /* CTCz */ |
| 3787 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3788 | | UML_CALLC(block, cfunc_ctc2, this); // callc ctc2 |
| 3789 | | return TRUE; |
| 3790 | | |
| 3791 | | case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: |
| 3792 | | case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: |
| 3793 | | return generate_vector_opcode(block, compiler, desc); |
| 3794 | | } |
| 3795 | | return FALSE; |
| 3796 | | } |
| | No newline at end of file |
trunk/src/emu/cpu/rsp/rspcp2s.c
| r241959 | r241960 | |
| 1 | | /*************************************************************************** |
| 2 | | |
| 3 | | rspcp2s.c |
| 4 | | |
| 5 | | Universal machine language-based Nintendo/SGI RSP COP2 emulator, with |
| 6 | | SSSE3 SIMD optimizations. |
| 7 | | Written by Harmony of the MESS team. |
| 8 | | |
| 9 | | Copyright the MESS team. |
| 10 | | Released for general non-commercial use under the MAME license |
| 11 | | Visit http://mamedev.org for licensing and usage restrictions. |
| 12 | | |
| 13 | | ***************************************************************************/ |
| 14 | | |
| 15 | | #include "emu.h" |
| 16 | | #include "rsp.h" |
| 17 | | #include "rspdiv.h" |
| 18 | | #include "rspcp2.h" |
| 19 | | #include "cpu/drcfe.h" |
| 20 | | #include "cpu/drcuml.h" |
| 21 | | #include "cpu/drcumlsh.h" |
| 22 | | |
| 23 | | using namespace uml; |
| 24 | | |
| 25 | | /*************************************************************************** |
| 26 | | Helpful Defines |
| 27 | | ***************************************************************************/ |
| 28 | | |
| 29 | | #define VDREG ((op >> 6) & 0x1f) |
| 30 | | #define VS1REG ((op >> 11) & 0x1f) |
| 31 | | #define VS2REG ((op >> 16) & 0x1f) |
| 32 | | #define EL ((op >> 21) & 0xf) |
| 33 | | |
| 34 | | #define RSVAL (m_rsp.m_rsp_state->r[RSREG]) |
| 35 | | #define RTVAL (m_rsp.m_rsp_state->r[RTREG]) |
| 36 | | #define RDVAL (m_rsp.m_rsp_state->r[RDREG]) |
| 37 | | |
| 38 | | #define EXTRACT16(reg, value, element) \ |
| 39 | | switch((element) & 7) \ |
| 40 | | { \ |
| 41 | | case 0: value = _mm_extract_epi16(reg, 0); break; \ |
| 42 | | case 1: value = _mm_extract_epi16(reg, 1); break; \ |
| 43 | | case 2: value = _mm_extract_epi16(reg, 2); break; \ |
| 44 | | case 3: value = _mm_extract_epi16(reg, 3); break; \ |
| 45 | | case 4: value = _mm_extract_epi16(reg, 4); break; \ |
| 46 | | case 5: value = _mm_extract_epi16(reg, 5); break; \ |
| 47 | | case 6: value = _mm_extract_epi16(reg, 6); break; \ |
| 48 | | case 7: value = _mm_extract_epi16(reg, 7); break; \ |
| 49 | | } |
| 50 | | |
| 51 | | |
| 52 | | #define INSERT16(reg, value, element) \ |
| 53 | | switch((element) & 7) \ |
| 54 | | { \ |
| 55 | | case 0: reg = _mm_insert_epi16(reg, value, 0); break; \ |
| 56 | | case 1: reg = _mm_insert_epi16(reg, value, 1); break; \ |
| 57 | | case 2: reg = _mm_insert_epi16(reg, value, 2); break; \ |
| 58 | | case 3: reg = _mm_insert_epi16(reg, value, 3); break; \ |
| 59 | | case 4: reg = _mm_insert_epi16(reg, value, 4); break; \ |
| 60 | | case 5: reg = _mm_insert_epi16(reg, value, 5); break; \ |
| 61 | | case 6: reg = _mm_insert_epi16(reg, value, 6); break; \ |
| 62 | | case 7: reg = _mm_insert_epi16(reg, value, 7); break; \ |
| 63 | | } |
| 64 | | |
| 65 | | |
| 66 | | #define VREG_B(reg, offset) m_v[(reg)].b[(offset)^1] |
| 67 | | #define W_VREG_S(reg, offset) m_v[(reg)].s[(offset)] |
| 68 | | #define VREG_S(reg, offset) (INT16)m_v[(reg)].s[(offset)] |
| 69 | | |
| 70 | | #define VEC_EL_2(x,z) (vector_elements_2[(x)][(z)]) |
| 71 | | |
| 72 | | #define ACCUM(x) m_accum[x].q |
| 73 | | |
| 74 | | #define CARRY 0 |
| 75 | | #define COMPARE 1 |
| 76 | | #define CLIP1 2 |
| 77 | | #define ZERO 3 |
| 78 | | #define CLIP2 4 |
| 79 | | |
| 80 | | static void cfunc_mfc2(void *param); |
| 81 | | static void cfunc_cfc2(void *param); |
| 82 | | static void cfunc_mtc2(void *param); |
| 83 | | static void cfunc_ctc2(void *param); |
| 84 | | |
| 85 | | inline UINT16 rsp_cop2_simd::ACCUM_H(int x) |
| 86 | | { |
| 87 | | UINT16 out; |
| 88 | | EXTRACT16(m_accum_h, out, x); |
| 89 | | return out; |
| 90 | | } |
| 91 | | |
| 92 | | inline UINT16 rsp_cop2_simd::ACCUM_M(int x) |
| 93 | | { |
| 94 | | UINT16 out; |
| 95 | | EXTRACT16(m_accum_m, out, x); |
| 96 | | return out; |
| 97 | | } |
| 98 | | |
| 99 | | inline UINT16 rsp_cop2_simd::ACCUM_L(int x) |
| 100 | | { |
| 101 | | UINT16 out; |
| 102 | | EXTRACT16(m_accum_l, out, x); |
| 103 | | return out; |
| 104 | | } |
| 105 | | |
| 106 | | inline UINT16 rsp_cop2_simd::ACCUM_LL(int x) |
| 107 | | { |
| 108 | | UINT16 out; |
| 109 | | EXTRACT16(m_accum_ll, out, x); |
| 110 | | return out; |
| 111 | | } |
| 112 | | |
| 113 | | #define SET_ACCUM_H(v, x) INSERT16(m_accum_h, v, x); |
| 114 | | #define SET_ACCUM_M(v, x) INSERT16(m_>accum_m, v, x); |
| 115 | | #define SET_ACCUM_L(v, x) INSERT16(m_accum_l, v, x); |
| 116 | | #define SET_ACCUM_LL(v, x) INSERT16(m_accum_ll, v, x); |
| 117 | | |
| 118 | | #define GET_VS1(out, i) EXTRACT16(m_xv[VS1REG], out, i); |
| 119 | | #define GET_VS2(out, i) EXTRACT16(m_xv[VS2REG], out, VEC_EL_2(EL, i)); |
| 120 | | |
| 121 | | inline UINT16 rsp_cop2_simd::CARRY_FLAG(const int x) |
| 122 | | { |
| 123 | | UINT16 out; |
| 124 | | EXTRACT16(m_xvflag[CARRY], out, x); |
| 125 | | return out; |
| 126 | | } |
| 127 | | |
| 128 | | inline UINT16 rsp_cop2_simd::COMPARE_FLAG(const int x) |
| 129 | | { |
| 130 | | UINT16 out; |
| 131 | | EXTRACT16(m_xvflag[COMPARE], out, x); |
| 132 | | return out; |
| 133 | | } |
| 134 | | |
| 135 | | inline UINT16 rsp_cop2_simd::CLIP1_FLAG(const int x) |
| 136 | | { |
| 137 | | UINT16 out; |
| 138 | | EXTRACT16(m_xvflag[CLIP1], out, x); |
| 139 | | return out; |
| 140 | | } |
| 141 | | |
| 142 | | inline UINT16 rsp_cop2_simd::ZERO_FLAG(const int x) |
| 143 | | { |
| 144 | | UINT16 out; |
| 145 | | EXTRACT16(m_xvflag[ZERO], out, x); |
| 146 | | return out; |
| 147 | | } |
| 148 | | |
| 149 | | inline UINT16 rsp_cop2_simd::CLIP2_FLAG(const int x) |
| 150 | | { |
| 151 | | UINT16 out; |
| 152 | | EXTRACT16(m_xvflag[CLIP2], out, x); |
| 153 | | return out; |
| 154 | | } |
| 155 | | |
| 156 | | #define CLEAR_CARRY_FLAGS() { m_xvflag[CARRY] = _mm_setzero_si128(); } |
| 157 | | #define CLEAR_COMPARE_FLAGS() { m_xvflag[COMPARE] = _mm_setzero_si128(); } |
| 158 | | #define CLEAR_CLIP1_FLAGS() { m_xvflag[CLIP1] = _mm_setzero_si128(); } |
| 159 | | #define CLEAR_ZERO_FLAGS() { m_xvflag[ZERO] = _mm_setzero_si128(); } |
| 160 | | #define CLEAR_CLIP2_FLAGS() { m_xvflag[CLIP2] = _mm_setzero_si128(); } |
| 161 | | |
| 162 | | #define SET_CARRY_FLAG(x) { INSERT16(m_xvflag[CARRY], 0xffff, x); } |
| 163 | | #define SET_COMPARE_FLAG(x) { INSERT16(m_xvflag[COMPARE], 0xffff, x); } |
| 164 | | #define SET_CLIP1_FLAG(x) { INSERT16(m_xvflag[CLIP1], 0xffff, x); } |
| 165 | | #define SET_ZERO_FLAG(x) { INSERT16(m_xvflag[ZERO], 0xffff, x); } |
| 166 | | #define SET_CLIP2_FLAG(x) { INSERT16(m_xvflag[CLIP2], 0xffff, x); } |
| 167 | | |
| 168 | | #define CLEAR_CARRY_FLAG(x) { INSERT16(m_xvflag[CARRY], 0, x); } |
| 169 | | #define CLEAR_COMPARE_FLAG(x) { INSERT16(m_xvflag[COMPARE], 0, x); } |
| 170 | | #define CLEAR_CLIP1_FLAG(x) { INSERT16(m_xvflag[CLIP1], 0, x); } |
| 171 | | #define CLEAR_ZERO_FLAG(x) { INSERT16(m_xvflag[ZERO], 0, x); } |
| 172 | | #define CLEAR_CLIP2_FLAG(x) { INSERT16(m_xvflag[CLIP2], 0, x); } |
| 173 | | |
| 174 | | #define WRITEBACK_RESULT() { \ |
| 175 | | INSERT16(m_xv[VDREG], m_vres[0], 0); \ |
| 176 | | INSERT16(m_xv[VDREG], m_vres[1], 1); \ |
| 177 | | INSERT16(m_xv[VDREG], m_vres[2], 2); \ |
| 178 | | INSERT16(m_xv[VDREG], m_vres[3], 3); \ |
| 179 | | INSERT16(m_xv[VDREG], m_vres[4], 4); \ |
| 180 | | INSERT16(m_xv[VDREG], m_vres[5], 5); \ |
| 181 | | INSERT16(m_xv[VDREG], m_vres[6], 6); \ |
| 182 | | INSERT16(m_xv[VDREG], m_vres[7], 7); \ |
| 183 | | } |
| 184 | | #endif |
| 185 | | |
| 186 | | static const int vector_elements_2[16][8] = |
| 187 | | { |
| 188 | | { 0, 1, 2, 3, 4, 5, 6, 7 }, // none |
| 189 | | { 0, 1, 2, 3, 4, 5, 6, 7 }, // ??? |
| 190 | | { 0, 0, 2, 2, 4, 4, 6, 6 }, // 0q |
| 191 | | { 1, 1, 3, 3, 5, 5, 7, 7 }, // 1q |
| 192 | | { 0, 0, 0, 0, 4, 4, 4, 4 }, // 0h |
| 193 | | { 1, 1, 1, 1, 5, 5, 5, 5 }, // 1h |
| 194 | | { 2, 2, 2, 2, 6, 6, 6, 6 }, // 2h |
| 195 | | { 3, 3, 3, 3, 7, 7, 7, 7 }, // 3h |
| 196 | | { 0, 0, 0, 0, 0, 0, 0, 0 }, // 0 |
| 197 | | { 1, 1, 1, 1, 1, 1, 1, 1 }, // 1 |
| 198 | | { 2, 2, 2, 2, 2, 2, 2, 2 }, // 2 |
| 199 | | { 3, 3, 3, 3, 3, 3, 3, 3 }, // 3 |
| 200 | | { 4, 4, 4, 4, 4, 4, 4, 4 }, // 4 |
| 201 | | { 5, 5, 5, 5, 5, 5, 5, 5 }, // 5 |
| 202 | | { 6, 6, 6, 6, 6, 6, 6, 6 }, // 6 |
| 203 | | { 7, 7, 7, 7, 7, 7, 7, 7 }, // 7 |
| 204 | | }; |
| 205 | | |
| 206 | | static __m128i vec_himask; |
| 207 | | static __m128i vec_lomask; |
| 208 | | static __m128i vec_hibit; |
| 209 | | static __m128i vec_lobit; |
| 210 | | static __m128i vec_n32768; |
| 211 | | static __m128i vec_32767; |
| 212 | | static __m128i vec_flagmask; |
| 213 | | static __m128i vec_shiftmask2; |
| 214 | | static __m128i vec_shiftmask4; |
| 215 | | static __m128i vec_flag_reverse; |
| 216 | | static __m128i vec_neg1; |
| 217 | | static __m128i vec_zero; |
| 218 | | static __m128i vec_shuf[16]; |
| 219 | | static __m128i vec_shuf_inverse[16]; |
| 220 | | |
| 221 | | rsp_cop2_simd::rsp_cop2_simd(rsp_device &rsp, running_machine &machine) : rsp_cop2(rsp, machine) |
| 222 | | : m_accum_h(0) |
| 223 | | , m_accum_m(0) |
| 224 | | , m_accum_l(0) |
| 225 | | , m_accum_ll(0) |
| 226 | | #if SIMUL_SIMD |
| 227 | | , m_old_reciprocal_res(0) |
| 228 | | , m_old_reciprocal_high(0) |
| 229 | | , m_old_dp_allowed(0) |
| 230 | | , m_scalar_reciprocal_res(0) |
| 231 | | , m_scalar_reciprocal_high(0) |
| 232 | | , m_scalar_dp_allowed(0) |
| 233 | | , m_simd_reciprocal_res(0) |
| 234 | | , m_simd_reciprocal_high(0) |
| 235 | | , m_simd_dp_allowed(0) |
| 236 | | #endif |
| 237 | | { |
| 238 | | #if SIMUL_SIMD |
| 239 | | memset(m_old_r, 0, sizeof(m_old_r)); |
| 240 | | memset(m_old_dmem, 0, sizeof(m_old_dmem)); |
| 241 | | memset(m_scalar_r, 0, sizeof(m_scalar_r)); |
| 242 | | memset(m_scalar_dmem, 0, sizeof(m_scalar_dmem)); |
| 243 | | #endif |
| 244 | | memset(m_xv, 0, sizeof(m_xv)); |
| 245 | | memset(m_xvflag, 0, sizeof(m_xvflag)); |
| 246 | | |
| 247 | | vec_shuf_inverse[ 0] = _mm_set_epi16(0x0f0e, 0x0d0c, 0x0b0a, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100); // none |
| 248 | | vec_shuf_inverse[ 1] = _mm_set_epi16(0x0f0e, 0x0d0c, 0x0b0a, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100); // ??? |
| 249 | | vec_shuf_inverse[ 2] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0908, 0x0908, 0x0504, 0x0504, 0x0100, 0x0100); // 0q |
| 250 | | vec_shuf_inverse[ 3] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0b0a, 0x0b0a, 0x0706, 0x0706, 0x0302, 0x0302); // 1q |
| 251 | | vec_shuf_inverse[ 4] = _mm_set_epi16(0x0908, 0x0908, 0x0908, 0x0908, 0x0100, 0x0100, 0x0100, 0x0100); // 0h |
| 252 | | vec_shuf_inverse[ 5] = _mm_set_epi16(0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0302, 0x0302, 0x0302, 0x0302); // 1h |
| 253 | | vec_shuf_inverse[ 6] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0504, 0x0504, 0x0504, 0x0504); // 2h |
| 254 | | vec_shuf_inverse[ 7] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0706, 0x0706, 0x0706, 0x0706); // 3h |
| 255 | | vec_shuf_inverse[ 8] = _mm_set_epi16(0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100); // 0 |
| 256 | | vec_shuf_inverse[ 9] = _mm_set_epi16(0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302); // 1 |
| 257 | | vec_shuf_inverse[10] = _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504); // 2 |
| 258 | | vec_shuf_inverse[11] = _mm_set_epi16(0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706); // 3 |
| 259 | | vec_shuf_inverse[12] = _mm_set_epi16(0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908); // 4 |
| 260 | | vec_shuf_inverse[13] = _mm_set_epi16(0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a); // 5 |
| 261 | | vec_shuf_inverse[14] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c); // 6 |
| 262 | | vec_shuf_inverse[15] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e); // 7 |
| 263 | | |
| 264 | | vec_shuf[ 0] = _mm_set_epi16(0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e); // none |
| 265 | | vec_shuf[ 1] = _mm_set_epi16(0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e); // ??? |
| 266 | | vec_shuf[ 2] = _mm_set_epi16(0x0302, 0x0302, 0x0706, 0x0706, 0x0b0a, 0x0b0a, 0x0f0e, 0x0f0e); // 0q |
| 267 | | vec_shuf[ 3] = _mm_set_epi16(0x0100, 0x0100, 0x0504, 0x0706, 0x0908, 0x0908, 0x0d0c, 0x0d0c); // 1q |
| 268 | | vec_shuf[ 4] = _mm_set_epi16(0x0706, 0x0706, 0x0706, 0x0706, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e); // 0q |
| 269 | | vec_shuf[ 5] = _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c); // 1q |
| 270 | | vec_shuf[ 6] = _mm_set_epi16(0x0302, 0x0302, 0x0302, 0x0302, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a); // 2q |
| 271 | | vec_shuf[ 7] = _mm_set_epi16(0x0100, 0x0100, 0x0100, 0x0100, 0x0908, 0x0908, 0x0908, 0x0908); // 3q |
| 272 | | vec_shuf[ 8] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e); // 0 |
| 273 | | vec_shuf[ 9] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c); // 1 |
| 274 | | vec_shuf[10] = _mm_set_epi16(0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a); // 2 |
| 275 | | vec_shuf[11] = _mm_set_epi16(0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908); // 3 |
| 276 | | vec_shuf[12] = _mm_set_epi16(0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706); // 4 |
| 277 | | vec_shuf[13] = _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504); // 5 |
| 278 | | vec_shuf[14] = _mm_set_epi16(0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302); // 6 |
| 279 | | vec_shuf[15] = _mm_set_epi16(0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100); // 7 |
| 280 | | m_accum_h = _mm_setzero_si128(); |
| 281 | | m_accum_m = _mm_setzero_si128(); |
| 282 | | m_accum_l = _mm_setzero_si128(); |
| 283 | | m_accum_ll = _mm_setzero_si128(); |
| 284 | | vec_neg1 = _mm_set_epi64x(0xffffffffffffffffL, 0xffffffffffffffffL); |
| 285 | | vec_zero = _mm_setzero_si128(); |
| 286 | | vec_himask = _mm_set_epi64x(0xffff0000ffff0000L, 0xffff0000ffff0000L); |
| 287 | | vec_lomask = _mm_set_epi64x(0x0000ffff0000ffffL, 0x0000ffff0000ffffL); |
| 288 | | vec_hibit = _mm_set_epi64x(0x0001000000010000L, 0x0001000000010000L); |
| 289 | | vec_lobit = _mm_set_epi64x(0x0000000100000001L, 0x0000000100000001L); |
| 290 | | vec_32767 = _mm_set_epi64x(0x7fff7fff7fff7fffL, 0x7fff7fff7fff7fffL); |
| 291 | | vec_n32768 = _mm_set_epi64x(0x8000800080008000L, 0x8000800080008000L); |
| 292 | | vec_flagmask = _mm_set_epi64x(0x0001000100010001L, 0x0001000100010001L); |
| 293 | | vec_shiftmask2 = _mm_set_epi64x(0x0000000300000003L, 0x0000000300000003L); |
| 294 | | vec_shiftmask4 = _mm_set_epi64x(0x000000000000000fL, 0x000000000000000fL); |
| 295 | | vec_flag_reverse = _mm_set_epi16(0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e); |
| 296 | | } |
| 297 | | |
| 298 | | void rsp_cop2_simd::state_string_export(const int index, astring &string) |
| 299 | | { |
| 300 | | switch (index) |
| 301 | | { |
| 302 | | case RSP_V0: |
| 303 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 0], 7), (UINT16)_mm_extract_epi16(m_xv[ 0], 6), (UINT16)_mm_extract_epi16(m_xv[ 0], 5), (UINT16)_mm_extract_epi16(m_xv[ 0], 4), (UINT16)_mm_extract_epi16(m_xv[ 0], 3), (UINT16)_mm_extract_epi16(m_xv[ 0], 2), (UINT16)_mm_extract_epi16(m_xv[ 0], 1), (UINT16)_mm_extract_epi16(m_xv[ 0], 0)); |
| 304 | | break; |
| 305 | | case RSP_V1: |
| 306 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 1], 7), (UINT16)_mm_extract_epi16(m_xv[ 1], 6), (UINT16)_mm_extract_epi16(m_xv[ 1], 5), (UINT16)_mm_extract_epi16(m_xv[ 1], 4), (UINT16)_mm_extract_epi16(m_xv[ 1], 3), (UINT16)_mm_extract_epi16(m_xv[ 1], 2), (UINT16)_mm_extract_epi16(m_xv[ 1], 1), (UINT16)_mm_extract_epi16(m_xv[ 1], 0)); |
| 307 | | break; |
| 308 | | case RSP_V2: |
| 309 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 2], 7), (UINT16)_mm_extract_epi16(m_xv[ 2], 6), (UINT16)_mm_extract_epi16(m_xv[ 2], 5), (UINT16)_mm_extract_epi16(m_xv[ 2], 4), (UINT16)_mm_extract_epi16(m_xv[ 2], 3), (UINT16)_mm_extract_epi16(m_xv[ 2], 2), (UINT16)_mm_extract_epi16(m_xv[ 2], 1), (UINT16)_mm_extract_epi16(m_xv[ 2], 0)); |
| 310 | | break; |
| 311 | | case RSP_V3: |
| 312 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 3], 7), (UINT16)_mm_extract_epi16(m_xv[ 3], 6), (UINT16)_mm_extract_epi16(m_xv[ 3], 5), (UINT16)_mm_extract_epi16(m_xv[ 3], 4), (UINT16)_mm_extract_epi16(m_xv[ 3], 3), (UINT16)_mm_extract_epi16(m_xv[ 3], 2), (UINT16)_mm_extract_epi16(m_xv[ 3], 1), (UINT16)_mm_extract_epi16(m_xv[ 3], 0)); |
| 313 | | break; |
| 314 | | case RSP_V4: |
| 315 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 4], 7), (UINT16)_mm_extract_epi16(m_xv[ 4], 6), (UINT16)_mm_extract_epi16(m_xv[ 4], 5), (UINT16)_mm_extract_epi16(m_xv[ 4], 4), (UINT16)_mm_extract_epi16(m_xv[ 4], 3), (UINT16)_mm_extract_epi16(m_xv[ 4], 2), (UINT16)_mm_extract_epi16(m_xv[ 4], 1), (UINT16)_mm_extract_epi16(m_xv[ 4], 0)); |
| 316 | | break; |
| 317 | | case RSP_V5: |
| 318 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 5], 7), (UINT16)_mm_extract_epi16(m_xv[ 5], 6), (UINT16)_mm_extract_epi16(m_xv[ 5], 5), (UINT16)_mm_extract_epi16(m_xv[ 5], 4), (UINT16)_mm_extract_epi16(m_xv[ 5], 3), (UINT16)_mm_extract_epi16(m_xv[ 5], 2), (UINT16)_mm_extract_epi16(m_xv[ 5], 1), (UINT16)_mm_extract_epi16(m_xv[ 5], 0)); |
| 319 | | break; |
| 320 | | case RSP_V6: |
| 321 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 6], 7), (UINT16)_mm_extract_epi16(m_xv[ 6], 6), (UINT16)_mm_extract_epi16(m_xv[ 6], 5), (UINT16)_mm_extract_epi16(m_xv[ 6], 4), (UINT16)_mm_extract_epi16(m_xv[ 6], 3), (UINT16)_mm_extract_epi16(m_xv[ 6], 2), (UINT16)_mm_extract_epi16(m_xv[ 6], 1), (UINT16)_mm_extract_epi16(m_xv[ 6], 0)); |
| 322 | | break; |
| 323 | | case RSP_V7: |
| 324 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 7], 7), (UINT16)_mm_extract_epi16(m_xv[ 7], 6), (UINT16)_mm_extract_epi16(m_xv[ 7], 5), (UINT16)_mm_extract_epi16(m_xv[ 7], 4), (UINT16)_mm_extract_epi16(m_xv[ 7], 3), (UINT16)_mm_extract_epi16(m_xv[ 7], 2), (UINT16)_mm_extract_epi16(m_xv[ 7], 1), (UINT16)_mm_extract_epi16(m_xv[ 7], 0)); |
| 325 | | break; |
| 326 | | case RSP_V8: |
| 327 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 8], 7), (UINT16)_mm_extract_epi16(m_xv[ 8], 6), (UINT16)_mm_extract_epi16(m_xv[ 8], 5), (UINT16)_mm_extract_epi16(m_xv[ 8], 4), (UINT16)_mm_extract_epi16(m_xv[ 8], 3), (UINT16)_mm_extract_epi16(m_xv[ 8], 2), (UINT16)_mm_extract_epi16(m_xv[ 8], 1), (UINT16)_mm_extract_epi16(m_xv[ 8], 0)); |
| 328 | | break; |
| 329 | | case RSP_V9: |
| 330 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 9], 7), (UINT16)_mm_extract_epi16(m_xv[ 9], 6), (UINT16)_mm_extract_epi16(m_xv[ 9], 5), (UINT16)_mm_extract_epi16(m_xv[ 9], 4), (UINT16)_mm_extract_epi16(m_xv[ 9], 3), (UINT16)_mm_extract_epi16(m_xv[ 9], 2), (UINT16)_mm_extract_epi16(m_xv[ 9], 1), (UINT16)_mm_extract_epi16(m_xv[ 9], 0)); |
| 331 | | break; |
| 332 | | case RSP_V10: |
| 333 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[10], 7), (UINT16)_mm_extract_epi16(m_xv[10], 6), (UINT16)_mm_extract_epi16(m_xv[10], 5), (UINT16)_mm_extract_epi16(m_xv[10], 4), (UINT16)_mm_extract_epi16(m_xv[10], 3), (UINT16)_mm_extract_epi16(m_xv[10], 2), (UINT16)_mm_extract_epi16(m_xv[10], 1), (UINT16)_mm_extract_epi16(m_xv[10], 0)); |
| 334 | | break; |
| 335 | | case RSP_V11: |
| 336 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[11], 7), (UINT16)_mm_extract_epi16(m_xv[11], 6), (UINT16)_mm_extract_epi16(m_xv[11], 5), (UINT16)_mm_extract_epi16(m_xv[11], 4), (UINT16)_mm_extract_epi16(m_xv[11], 3), (UINT16)_mm_extract_epi16(m_xv[11], 2), (UINT16)_mm_extract_epi16(m_xv[11], 1), (UINT16)_mm_extract_epi16(m_xv[11], 0)); |
| 337 | | break; |
| 338 | | case RSP_V12: |
| 339 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[12], 7), (UINT16)_mm_extract_epi16(m_xv[12], 6), (UINT16)_mm_extract_epi16(m_xv[12], 5), (UINT16)_mm_extract_epi16(m_xv[12], 4), (UINT16)_mm_extract_epi16(m_xv[12], 3), (UINT16)_mm_extract_epi16(m_xv[12], 2), (UINT16)_mm_extract_epi16(m_xv[12], 1), (UINT16)_mm_extract_epi16(m_xv[12], 0)); |
| 340 | | break; |
| 341 | | case RSP_V13: |
| 342 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[13], 7), (UINT16)_mm_extract_epi16(m_xv[13], 6), (UINT16)_mm_extract_epi16(m_xv[13], 5), (UINT16)_mm_extract_epi16(m_xv[13], 4), (UINT16)_mm_extract_epi16(m_xv[13], 3), (UINT16)_mm_extract_epi16(m_xv[13], 2), (UINT16)_mm_extract_epi16(m_xv[13], 1), (UINT16)_mm_extract_epi16(m_xv[13], 0)); |
| 343 | | break; |
| 344 | | case RSP_V14: |
| 345 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[14], 7), (UINT16)_mm_extract_epi16(m_xv[14], 6), (UINT16)_mm_extract_epi16(m_xv[14], 5), (UINT16)_mm_extract_epi16(m_xv[14], 4), (UINT16)_mm_extract_epi16(m_xv[14], 3), (UINT16)_mm_extract_epi16(m_xv[14], 2), (UINT16)_mm_extract_epi16(m_xv[14], 1), (UINT16)_mm_extract_epi16(m_xv[14], 0)); |
| 346 | | break; |
| 347 | | case RSP_V15: |
| 348 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[15], 7), (UINT16)_mm_extract_epi16(m_xv[15], 6), (UINT16)_mm_extract_epi16(m_xv[15], 5), (UINT16)_mm_extract_epi16(m_xv[15], 4), (UINT16)_mm_extract_epi16(m_xv[15], 3), (UINT16)_mm_extract_epi16(m_xv[15], 2), (UINT16)_mm_extract_epi16(m_xv[15], 1), (UINT16)_mm_extract_epi16(m_xv[15], 0)); |
| 349 | | break; |
| 350 | | case RSP_V16: |
| 351 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[16], 7), (UINT16)_mm_extract_epi16(m_xv[16], 6), (UINT16)_mm_extract_epi16(m_xv[16], 5), (UINT16)_mm_extract_epi16(m_xv[16], 4), (UINT16)_mm_extract_epi16(m_xv[16], 3), (UINT16)_mm_extract_epi16(m_xv[16], 2), (UINT16)_mm_extract_epi16(m_xv[16], 1), (UINT16)_mm_extract_epi16(m_xv[16], 0)); |
| 352 | | break; |
| 353 | | case RSP_V17: |
| 354 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[17], 7), (UINT16)_mm_extract_epi16(m_xv[17], 6), (UINT16)_mm_extract_epi16(m_xv[17], 5), (UINT16)_mm_extract_epi16(m_xv[17], 4), (UINT16)_mm_extract_epi16(m_xv[17], 3), (UINT16)_mm_extract_epi16(m_xv[17], 2), (UINT16)_mm_extract_epi16(m_xv[17], 1), (UINT16)_mm_extract_epi16(m_xv[17], 0)); |
| 355 | | break; |
| 356 | | case RSP_V18: |
| 357 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[18], 7), (UINT16)_mm_extract_epi16(m_xv[18], 6), (UINT16)_mm_extract_epi16(m_xv[18], 5), (UINT16)_mm_extract_epi16(m_xv[18], 4), (UINT16)_mm_extract_epi16(m_xv[18], 3), (UINT16)_mm_extract_epi16(m_xv[18], 2), (UINT16)_mm_extract_epi16(m_xv[18], 1), (UINT16)_mm_extract_epi16(m_xv[18], 0)); |
| 358 | | break; |
| 359 | | case RSP_V19: |
| 360 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[19], 7), (UINT16)_mm_extract_epi16(m_xv[19], 6), (UINT16)_mm_extract_epi16(m_xv[19], 5), (UINT16)_mm_extract_epi16(m_xv[19], 4), (UINT16)_mm_extract_epi16(m_xv[19], 3), (UINT16)_mm_extract_epi16(m_xv[19], 2), (UINT16)_mm_extract_epi16(m_xv[19], 1), (UINT16)_mm_extract_epi16(m_xv[19], 0)); |
| 361 | | break; |
| 362 | | case RSP_V20: |
| 363 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[20], 7), (UINT16)_mm_extract_epi16(m_xv[20], 6), (UINT16)_mm_extract_epi16(m_xv[20], 5), (UINT16)_mm_extract_epi16(m_xv[20], 4), (UINT16)_mm_extract_epi16(m_xv[20], 3), (UINT16)_mm_extract_epi16(m_xv[20], 2), (UINT16)_mm_extract_epi16(m_xv[20], 1), (UINT16)_mm_extract_epi16(m_xv[20], 0)); |
| 364 | | break; |
| 365 | | case RSP_V21: |
| 366 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[21], 7), (UINT16)_mm_extract_epi16(m_xv[21], 6), (UINT16)_mm_extract_epi16(m_xv[21], 5), (UINT16)_mm_extract_epi16(m_xv[21], 4), (UINT16)_mm_extract_epi16(m_xv[21], 3), (UINT16)_mm_extract_epi16(m_xv[21], 2), (UINT16)_mm_extract_epi16(m_xv[21], 1), (UINT16)_mm_extract_epi16(m_xv[21], 0)); |
| 367 | | break; |
| 368 | | case RSP_V22: |
| 369 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[22], 7), (UINT16)_mm_extract_epi16(m_xv[22], 6), (UINT16)_mm_extract_epi16(m_xv[22], 5), (UINT16)_mm_extract_epi16(m_xv[22], 4), (UINT16)_mm_extract_epi16(m_xv[22], 3), (UINT16)_mm_extract_epi16(m_xv[22], 2), (UINT16)_mm_extract_epi16(m_xv[22], 1), (UINT16)_mm_extract_epi16(m_xv[22], 0)); |
| 370 | | break; |
| 371 | | case RSP_V23: |
| 372 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[23], 7), (UINT16)_mm_extract_epi16(m_xv[23], 6), (UINT16)_mm_extract_epi16(m_xv[23], 5), (UINT16)_mm_extract_epi16(m_xv[23], 4), (UINT16)_mm_extract_epi16(m_xv[23], 3), (UINT16)_mm_extract_epi16(m_xv[23], 2), (UINT16)_mm_extract_epi16(m_xv[23], 1), (UINT16)_mm_extract_epi16(m_xv[23], 0)); |
| 373 | | break; |
| 374 | | case RSP_V24: |
| 375 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[24], 7), (UINT16)_mm_extract_epi16(m_xv[24], 6), (UINT16)_mm_extract_epi16(m_xv[24], 5), (UINT16)_mm_extract_epi16(m_xv[24], 4), (UINT16)_mm_extract_epi16(m_xv[24], 3), (UINT16)_mm_extract_epi16(m_xv[24], 2), (UINT16)_mm_extract_epi16(m_xv[24], 1), (UINT16)_mm_extract_epi16(m_xv[24], 0)); |
| 376 | | break; |
| 377 | | case RSP_V25: |
| 378 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[25], 7), (UINT16)_mm_extract_epi16(m_xv[25], 6), (UINT16)_mm_extract_epi16(m_xv[25], 5), (UINT16)_mm_extract_epi16(m_xv[25], 4), (UINT16)_mm_extract_epi16(m_xv[25], 3), (UINT16)_mm_extract_epi16(m_xv[25], 2), (UINT16)_mm_extract_epi16(m_xv[25], 1), (UINT16)_mm_extract_epi16(m_xv[25], 0)); |
| 379 | | break; |
| 380 | | case RSP_V26: |
| 381 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[26], 7), (UINT16)_mm_extract_epi16(m_xv[26], 6), (UINT16)_mm_extract_epi16(m_xv[26], 5), (UINT16)_mm_extract_epi16(m_xv[26], 4), (UINT16)_mm_extract_epi16(m_xv[26], 3), (UINT16)_mm_extract_epi16(m_xv[26], 2), (UINT16)_mm_extract_epi16(m_xv[26], 1), (UINT16)_mm_extract_epi16(m_xv[26], 0)); |
| 382 | | break; |
| 383 | | case RSP_V27: |
| 384 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[27], 7), (UINT16)_mm_extract_epi16(m_xv[27], 6), (UINT16)_mm_extract_epi16(m_xv[27], 5), (UINT16)_mm_extract_epi16(m_xv[27], 4), (UINT16)_mm_extract_epi16(m_xv[27], 3), (UINT16)_mm_extract_epi16(m_xv[27], 2), (UINT16)_mm_extract_epi16(m_xv[27], 1), (UINT16)_mm_extract_epi16(m_xv[27], 0)); |
| 385 | | break; |
| 386 | | case RSP_V28: |
| 387 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[28], 7), (UINT16)_mm_extract_epi16(m_xv[28], 6), (UINT16)_mm_extract_epi16(m_xv[28], 5), (UINT16)_mm_extract_epi16(m_xv[28], 4), (UINT16)_mm_extract_epi16(m_xv[28], 3), (UINT16)_mm_extract_epi16(m_xv[28], 2), (UINT16)_mm_extract_epi16(m_xv[28], 1), (UINT16)_mm_extract_epi16(m_xv[28], 0)); |
| 388 | | break; |
| 389 | | case RSP_V29: |
| 390 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[29], 7), (UINT16)_mm_extract_epi16(m_xv[29], 6), (UINT16)_mm_extract_epi16(m_xv[29], 5), (UINT16)_mm_extract_epi16(m_xv[29], 4), (UINT16)_mm_extract_epi16(m_xv[29], 3), (UINT16)_mm_extract_epi16(m_xv[29], 2), (UINT16)_mm_extract_epi16(m_xv[29], 1), (UINT16)_mm_extract_epi16(m_xv[29], 0)); |
| 391 | | break; |
| 392 | | case RSP_V30: |
| 393 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[30], 7), (UINT16)_mm_extract_epi16(m_xv[30], 6), (UINT16)_mm_extract_epi16(m_xv[30], 5), (UINT16)_mm_extract_epi16(m_xv[30], 4), (UINT16)_mm_extract_epi16(m_xv[30], 3), (UINT16)_mm_extract_epi16(m_xv[30], 2), (UINT16)_mm_extract_epi16(m_xv[30], 1), (UINT16)_mm_extract_epi16(m_xv[30], 0)); |
| 394 | | break; |
| 395 | | case RSP_V31: |
| 396 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[31], 7), (UINT16)_mm_extract_epi16(m_xv[31], 6), (UINT16)_mm_extract_epi16(m_xv[31], 5), (UINT16)_mm_extract_epi16(m_xv[31], 4), (UINT16)_mm_extract_epi16(m_xv[31], 3), (UINT16)_mm_extract_epi16(m_xv[31], 2), (UINT16)_mm_extract_epi16(m_xv[31], 1), (UINT16)_mm_extract_epi16(m_xv[31], 0)); |
| 397 | | break; |
| 398 | | } |
| 399 | | } |
| 400 | | |
| 401 | | /*************************************************************************** |
| 402 | | Vector Load Instructions |
| 403 | | ***************************************************************************/ |
| 404 | | |
| 405 | | // LBV |
| 406 | | // |
| 407 | | // 31 25 20 15 10 6 0 |
| 408 | | // -------------------------------------------------- |
| 409 | | // | 110010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 410 | | // -------------------------------------------------- |
| 411 | | // |
| 412 | | // Load 1 byte to vector byte index |
| 413 | | |
| 414 | | inline void rsp_cop2_simd::lbv() |
| 415 | | { |
| 416 | | UINT32 op = m_op; |
| 417 | | |
| 418 | | UINT32 ea = 0; |
| 419 | | int dest = (op >> 16) & 0x1f; |
| 420 | | int base = (op >> 21) & 0x1f; |
| 421 | | int index = (op >> 7) & 0xf; |
| 422 | | int offset = (op & 0x7f); |
| 423 | | if (offset & 0x40) |
| 424 | | { |
| 425 | | offset |= 0xffffffc0; |
| 426 | | } |
| 427 | | |
| 428 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + offset : offset; |
| 429 | | |
| 430 | | UINT16 element; |
| 431 | | EXTRACT16(m_xv[dest], element, (index >> 1)); |
| 432 | | element &= 0xff00 >> ((1-(index & 1)) * 8); |
| 433 | | element |= m_rsp.DM_READ8(ea) << ((1-(index & 1)) * 8); |
| 434 | | INSERT16(m_xv[dest], element, (index >> 1)); |
| 435 | | } |
| 436 | | |
| 437 | | static void cfunc_lbv(void *param) |
| 438 | | { |
| 439 | | ((rsp_cop2 *)param)->lbv(); |
| 440 | | } |
| 441 | | |
| 442 | | |
| 443 | | // LSV |
| 444 | | // |
| 445 | | // 31 25 20 15 10 6 0 |
| 446 | | // -------------------------------------------------- |
| 447 | | // | 110010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 448 | | // -------------------------------------------------- |
| 449 | | // |
| 450 | | // Loads 2 bytes starting from vector byte index |
| 451 | | |
| 452 | | inline void rsp_cop2_simd::lsv() |
| 453 | | { |
| 454 | | UINT32 op = m_op; |
| 455 | | int dest = (op >> 16) & 0x1f; |
| 456 | | int base = (op >> 21) & 0x1f; |
| 457 | | int index = (op >> 7) & 0xe; |
| 458 | | int offset = (op & 0x7f); |
| 459 | | if (offset & 0x40) |
| 460 | | { |
| 461 | | offset |= 0xffffffc0; |
| 462 | | } |
| 463 | | |
| 464 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 465 | | int end = index + 2; |
| 466 | | for (int i = index; i < end; i++) |
| 467 | | { |
| 468 | | UINT16 element; |
| 469 | | EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 470 | | element &= 0xff00 >> ((1 - (i & 1)) * 8); |
| 471 | | element |= m_rsp.DM_READ8(ea) << ((1 - (i & 1)) * 8); |
| 472 | | INSERT16(m_xv[dest], element, (i >> 1)); |
| 473 | | ea++; |
| 474 | | } |
| 475 | | } |
| 476 | | |
| 477 | | static void cfunc_lsv(void *param) |
| 478 | | { |
| 479 | | ((rsp_cop2 *)param)->lsv(); |
| 480 | | } |
| 481 | | |
| 482 | | |
| 483 | | // LLV |
| 484 | | // |
| 485 | | // 31 25 20 15 10 6 0 |
| 486 | | // -------------------------------------------------- |
| 487 | | // | 110010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 488 | | // -------------------------------------------------- |
| 489 | | // |
| 490 | | // Loads 4 bytes starting from vector byte index |
| 491 | | |
| 492 | | inline void rsp_cop2_simd::llv() |
| 493 | | { |
| 494 | | UINT32 op = m_op; |
| 495 | | UINT32 ea = 0; |
| 496 | | int dest = (op >> 16) & 0x1f; |
| 497 | | int base = (op >> 21) & 0x1f; |
| 498 | | int index = (op >> 7) & 0xc; |
| 499 | | int offset = (op & 0x7f); |
| 500 | | if (offset & 0x40) |
| 501 | | { |
| 502 | | offset |= 0xffffffc0; |
| 503 | | } |
| 504 | | |
| 505 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 506 | | |
| 507 | | int end = index + 4; |
| 508 | | |
| 509 | | for (int i = index; i < end; i++) |
| 510 | | { |
| 511 | | UINT16 element; |
| 512 | | EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 513 | | element &= 0xff00 >> ((1 - (i & 1)) * 8); |
| 514 | | element |= m_rsp.DM_READ8(ea) << ((1 - (i & 1)) * 8); |
| 515 | | INSERT16(m_xv[dest], element, (i >> 1)); |
| 516 | | ea++; |
| 517 | | } |
| 518 | | } |
| 519 | | |
| 520 | | static void cfunc_llv(void *param) |
| 521 | | { |
| 522 | | ((rsp_cop2 *)param)->llv(); |
| 523 | | } |
| 524 | | #endif |
| 525 | | |
| 526 | | |
| 527 | | // LDV |
| 528 | | // |
| 529 | | // 31 25 20 15 10 6 0 |
| 530 | | // -------------------------------------------------- |
| 531 | | // | 110010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 532 | | // -------------------------------------------------- |
| 533 | | // |
| 534 | | // Loads 8 bytes starting from vector byte index |
| 535 | | |
| 536 | | inline void rsp_cop2_simd::ldv() |
| 537 | | { |
| 538 | | UINT32 op = m_op; |
| 539 | | UINT32 ea = 0; |
| 540 | | int dest = (op >> 16) & 0x1f; |
| 541 | | int base = (op >> 21) & 0x1f; |
| 542 | | int index = (op >> 7) & 0x8; |
| 543 | | int offset = (op & 0x7f); |
| 544 | | if (offset & 0x40) |
| 545 | | { |
| 546 | | offset |= 0xffffffc0; |
| 547 | | } |
| 548 | | |
| 549 | | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 550 | | |
| 551 | | int end = index + 8; |
| 552 | | |
| 553 | | for (int i = index; i < end; i++) |
| 554 | | { |
| 555 | | UINT16 element; |
| 556 | | EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 557 | | element &= 0xff00 >> ((1 - (i & 1)) * 8); |
| 558 | | element |= m_rsp.DM_READ8(ea) << ((1 - (i & 1)) * 8); |
| 559 | | INSERT16(m_xv[dest], element, (i >> 1)); |
| 560 | | ea++; |
| 561 | | } |
| 562 | | } |
| 563 | | |
| 564 | | static void cfunc_ldv(void *param) |
| 565 | | { |
| 566 | | ((rsp_cop2 *)param)->ldv(); |
| 567 | | } |
| 568 | | #endif |
| 569 | | |
| 570 | | |
| 571 | | // LQV |
| 572 | | // |
| 573 | | // 31 25 20 15 10 6 0 |
| 574 | | // -------------------------------------------------- |
| 575 | | // | 110010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 576 | | // -------------------------------------------------- |
| 577 | | // |
| 578 | | // Loads up to 16 bytes starting from vector byte index |
| 579 | | |
| 580 | | inline void rsp_cop2_simd::lqv() |
| 581 | | { |
| 582 | | UINT32 op = m_op; |
| 583 | | int dest = (op >> 16) & 0x1f; |
| 584 | | int base = (op >> 21) & 0x1f; |
| 585 | | int offset = (op & 0x7f); |
| 586 | | if (offset & 0x40) |
| 587 | | { |
| 588 | | offset |= 0xffffffc0; |
| 589 | | } |
| 590 | | |
| 591 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 592 | | |
| 593 | | int end = 16 - (ea & 0xf); |
| 594 | | if (end > 16) end = 16; |
| 595 | | |
| 596 | | for (int i = 0; i < end; i++) |
| 597 | | { |
| 598 | | UINT16 element; |
| 599 | | EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 600 | | element &= 0xff00 >> ((1 - (i & 1)) * 8); |
| 601 | | element |= m_rsp.DM_READ8(ea) << ((1 - (i & 1)) * 8); |
| 602 | | INSERT16(m_xv[dest], element, (i >> 1)); |
| 603 | | ea++; |
| 604 | | } |
| 605 | | } |
| 606 | | |
| 607 | | static void cfunc_lqv(void *param) |
| 608 | | { |
| 609 | | ((rsp_cop2 *)param)->lqv(); |
| 610 | | } |
| 611 | | |
| 612 | | |
| 613 | | // LRV |
| 614 | | // |
| 615 | | // 31 25 20 15 10 6 0 |
| 616 | | // -------------------------------------------------- |
| 617 | | // | 110010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 618 | | // -------------------------------------------------- |
| 619 | | // |
| 620 | | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 621 | | |
| 622 | | inline void rsp_cop2_simd::lrv() |
| 623 | | { |
| 624 | | UINT32 op = m_op; |
| 625 | | int dest = (op >> 16) & 0x1f; |
| 626 | | int base = (op >> 21) & 0x1f; |
| 627 | | int index = (op >> 7) & 0xf; |
| 628 | | int offset = (op & 0x7f); |
| 629 | | if (offset & 0x40) |
| 630 | | { |
| 631 | | offset |= 0xffffffc0; |
| 632 | | } |
| 633 | | |
| 634 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 635 | | |
| 636 | | index = 16 - ((ea & 0xf) - index); |
| 637 | | ea &= ~0xf; |
| 638 | | |
| 639 | | for (int i = index; i < 16; i++) |
| 640 | | { |
| 641 | | UINT16 element; |
| 642 | | EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 643 | | element &= 0xff00 >> ((1-(i & 1)) * 8); |
| 644 | | element |= m_rsp.DM_READ8(ea) << ((1-(i & 1)) * 8); |
| 645 | | INSERT16(m_xv[dest], element, (i >> 1)); |
| 646 | | ea++; |
| 647 | | } |
| 648 | | } |
| 649 | | |
| 650 | | static void cfunc_lrv(void *param) |
| 651 | | { |
| 652 | | ((rsp_cop2 *)param)->lrv(); |
| 653 | | } |
| 654 | | |
| 655 | | |
| 656 | | // LPV |
| 657 | | // |
| 658 | | // 31 25 20 15 10 6 0 |
| 659 | | // -------------------------------------------------- |
| 660 | | // | 110010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 661 | | // -------------------------------------------------- |
| 662 | | // |
| 663 | | // Loads a byte as the upper 8 bits of each element |
| 664 | | |
| 665 | | inline void rsp_cop2_simd::lpv() |
| 666 | | { |
| 667 | | UINT32 op = m_op; |
| 668 | | int dest = (op >> 16) & 0x1f; |
| 669 | | int base = (op >> 21) & 0x1f; |
| 670 | | int index = (op >> 7) & 0xf; |
| 671 | | int offset = (op & 0x7f); |
| 672 | | if (offset & 0x40) |
| 673 | | { |
| 674 | | offset |= 0xffffffc0; |
| 675 | | } |
| 676 | | |
| 677 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 678 | | |
| 679 | | for (int i = 0; i < 8; i++) |
| 680 | | { |
| 681 | | INSERT16(m_xv[dest], m_rsp.DM_READ8(ea + (((16-index) + i) & 0xf)) << 8, i); |
| 682 | | } |
| 683 | | } |
| 684 | | |
| 685 | | static void cfunc_lpv(void *param) |
| 686 | | { |
| 687 | | ((rsp_cop2 *)param)->lpv(); |
| 688 | | } |
| 689 | | |
| 690 | | |
| 691 | | // LUV |
| 692 | | // |
| 693 | | // 31 25 20 15 10 6 0 |
| 694 | | // -------------------------------------------------- |
| 695 | | // | 110010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 696 | | // -------------------------------------------------- |
| 697 | | // |
| 698 | | // Loads a byte as the bits 14-7 of each element |
| 699 | | |
| 700 | | inline void rsp_cop2_simd::luv() |
| 701 | | { |
| 702 | | UINT32 op = m_op; |
| 703 | | int dest = (op >> 16) & 0x1f; |
| 704 | | int base = (op >> 21) & 0x1f; |
| 705 | | int index = (op >> 7) & 0xf; |
| 706 | | int offset = (op & 0x7f); |
| 707 | | if (offset & 0x40) |
| 708 | | { |
| 709 | | offset |= 0xffffffc0; |
| 710 | | } |
| 711 | | |
| 712 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 713 | | |
| 714 | | for (int i = 0; i < 8; i++) |
| 715 | | { |
| 716 | | INSERT16(m_xv[dest], m_rsp.DM_READ8(ea + (((16-index) + i) & 0xf)) << 7, i); |
| 717 | | } |
| 718 | | } |
| 719 | | |
| 720 | | static void cfunc_luv(void *param) |
| 721 | | { |
| 722 | | ((rsp_cop2 *)param)->luv(); |
| 723 | | } |
| 724 | | |
| 725 | | |
| 726 | | // LHV |
| 727 | | // |
| 728 | | // 31 25 20 15 10 6 0 |
| 729 | | // -------------------------------------------------- |
| 730 | | // | 110010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 731 | | // -------------------------------------------------- |
| 732 | | // |
| 733 | | // Loads a byte as the bits 14-7 of each element, with 2-byte stride |
| 734 | | |
| 735 | | inline void rsp_cop2_simd::lhv() |
| 736 | | { |
| 737 | | UINT32 op = m_op; |
| 738 | | int dest = (op >> 16) & 0x1f; |
| 739 | | int base = (op >> 21) & 0x1f; |
| 740 | | int index = (op >> 7) & 0xf; |
| 741 | | int offset = (op & 0x7f); |
| 742 | | if (offset & 0x40) |
| 743 | | { |
| 744 | | offset |= 0xffffffc0; |
| 745 | | } |
| 746 | | |
| 747 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 748 | | |
| 749 | | for (int i = 0; i < 8; i++) |
| 750 | | { |
| 751 | | INSERT16(m_xv[dest], m_rsp.DM_READ8(ea + (((16-index) + (i<<1)) & 0xf)) << 7, i); |
| 752 | | } |
| 753 | | } |
| 754 | | |
| 755 | | static void cfunc_lhv(void *param) |
| 756 | | { |
| 757 | | ((rsp_cop2 *)param)->lhv(); |
| 758 | | } |
| 759 | | |
| 760 | | |
| 761 | | // LFV |
| 762 | | // 31 25 20 15 10 6 0 |
| 763 | | // -------------------------------------------------- |
| 764 | | // | 110010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 765 | | // -------------------------------------------------- |
| 766 | | // |
| 767 | | // Loads a byte as the bits 14-7 of upper or lower quad, with 4-byte stride |
| 768 | | |
| 769 | | inline void rsp_cop2_simd::lfv() |
| 770 | | { |
| 771 | | UINT32 op = m_op; |
| 772 | | int dest = (op >> 16) & 0x1f; |
| 773 | | int base = (op >> 21) & 0x1f; |
| 774 | | int index = (op >> 7) & 0xf; |
| 775 | | int offset = (op & 0x7f); |
| 776 | | if (offset & 0x40) |
| 777 | | { |
| 778 | | offset |= 0xffffffc0; |
| 779 | | } |
| 780 | | |
| 781 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 782 | | |
| 783 | | // not sure what happens if 16-byte boundary is crossed... |
| 784 | | |
| 785 | | int end = (index >> 1) + 4; |
| 786 | | |
| 787 | | for (int i = index >> 1; i < end; i++) |
| 788 | | { |
| 789 | | INSERT16(m_xv[dest], m_rsp.DM_READ8(ea) << 7, i); |
| 790 | | ea += 4; |
| 791 | | } |
| 792 | | } |
| 793 | | |
| 794 | | static void cfunc_lfv(void *param) |
| 795 | | { |
| 796 | | ((rsp_cop2 *)param)->lfv(); |
| 797 | | } |
| 798 | | |
| 799 | | |
| 800 | | // LWV |
| 801 | | // |
| 802 | | // 31 25 20 15 10 6 0 |
| 803 | | // -------------------------------------------------- |
| 804 | | // | 110010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 805 | | // -------------------------------------------------- |
| 806 | | // |
| 807 | | // Loads the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 808 | | // after byte index 15 |
| 809 | | |
| 810 | | inline void rsp_cop2_simd::lwv() |
| 811 | | { |
| 812 | | UINT32 op = m_op; |
| 813 | | int dest = (op >> 16) & 0x1f; |
| 814 | | int base = (op >> 21) & 0x1f; |
| 815 | | int index = (op >> 7) & 0xf; |
| 816 | | int offset = (op & 0x7f); |
| 817 | | if (offset & 0x40) |
| 818 | | { |
| 819 | | offset |= 0xffffffc0; |
| 820 | | } |
| 821 | | |
| 822 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 823 | | int end = (16 - index) + 16; |
| 824 | | |
| 825 | | UINT8 val[16]; |
| 826 | | for (int i = (16 - index); i < end; i++) |
| 827 | | { |
| 828 | | val[i & 0xf] = m_rsp.DM_READ8(ea); |
| 829 | | ea += 4; |
| 830 | | } |
| 831 | | |
| 832 | | m_xv[dest] = _mm_set_epi8(val[15], val[14], val[13], val[12], val[11], val[10], val[ 9], val[ 8], |
| 833 | | val[ 7], val[ 6], val[ 5], val[ 4], val[ 3], val[ 2], val[ 1], val[ 0]); |
| 834 | | } |
| 835 | | |
| 836 | | static void cfunc_lwv(void *param) |
| 837 | | { |
| 838 | | ((rsp_cop2 *)param)->lwv(); |
| 839 | | } |
| 840 | | |
| 841 | | |
| 842 | | // LTV |
| 843 | | // |
| 844 | | // 31 25 20 15 10 6 0 |
| 845 | | // -------------------------------------------------- |
| 846 | | // | 110010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 847 | | // -------------------------------------------------- |
| 848 | | // |
| 849 | | // Loads one element to maximum of 8 vectors, while incrementing element index |
| 850 | | |
| 851 | | inline void rsp_cop2_simd::ltv() |
| 852 | | { |
| 853 | | UINT32 op = m_op; |
| 854 | | int dest = (op >> 16) & 0x1f; |
| 855 | | int base = (op >> 21) & 0x1f; |
| 856 | | int index = (op >> 7) & 0xf; |
| 857 | | int offset = (op & 0x7f); |
| 858 | | |
| 859 | | // FIXME: has a small problem with odd indices |
| 860 | | |
| 861 | | int vs = dest; |
| 862 | | int ve = dest + 8; |
| 863 | | if (ve > 32) |
| 864 | | { |
| 865 | | ve = 32; |
| 866 | | } |
| 867 | | |
| 868 | | int element = 7 - (index >> 1); |
| 869 | | |
| 870 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 871 | | |
| 872 | | ea = ((ea + 8) & ~0xf) + (index & 1); |
| 873 | | for (int i = vs; i < ve; i++) |
| 874 | | { |
| 875 | | element = (8 - (index >> 1) + (i - vs)) << 1; |
| 876 | | UINT16 value = (m_rsp.DM_READ8(ea) << 8) | m_rsp.DM_READ8(ea + 1); |
| 877 | | INSERT16(m_xv[i], value, (element >> 1)); |
| 878 | | ea += 2; |
| 879 | | } |
| 880 | | } |
| 881 | | |
| 882 | | static void cfunc_ltv(void *param) |
| 883 | | { |
| 884 | | ((rsp_cop2 *)param)->ltv(); |
| 885 | | } |
| 886 | | |
| 887 | | |
| 888 | | /*************************************************************************** |
| 889 | | Vector Store Instructions |
| 890 | | ***************************************************************************/ |
| 891 | | |
| 892 | | // SBV |
| 893 | | // |
| 894 | | // 31 25 20 15 10 6 0 |
| 895 | | // -------------------------------------------------- |
| 896 | | // | 111010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 897 | | // -------------------------------------------------- |
| 898 | | // |
| 899 | | // Stores 1 byte from vector byte index |
| 900 | | |
| 901 | | inline void rsp_cop2_simd::sbv() |
| 902 | | { |
| 903 | | UINT32 op = m_op; |
| 904 | | int dest = (op >> 16) & 0x1f; |
| 905 | | int base = (op >> 21) & 0x1f; |
| 906 | | int index = (op >> 7) & 0xf; |
| 907 | | int offset = (op & 0x7f); |
| 908 | | if (offset & 0x40) |
| 909 | | { |
| 910 | | offset |= 0xffffffc0; |
| 911 | | } |
| 912 | | |
| 913 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + offset : offset; |
| 914 | | UINT16 value; |
| 915 | | EXTRACT16(m_xv[dest], value, (index >> 1)); |
| 916 | | value >>= (1-(index & 1)) * 8; |
| 917 | | m_rsp.DM_WRITE8(ea, (UINT8)value); |
| 918 | | } |
| 919 | | |
| 920 | | static void cfunc_sbv(void *param) |
| 921 | | { |
| 922 | | ((rsp_cop2 *)param)->sbv(); |
| 923 | | } |
| 924 | | |
| 925 | | |
| 926 | | // SSV |
| 927 | | // |
| 928 | | // 31 25 20 15 10 6 0 |
| 929 | | // -------------------------------------------------- |
| 930 | | // | 111010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 931 | | // -------------------------------------------------- |
| 932 | | // |
| 933 | | // Stores 2 bytes starting from vector byte index |
| 934 | | |
| 935 | | inline void rsp_cop2_simd::ssv() |
| 936 | | { |
| 937 | | UINT32 op = m_op; |
| 938 | | int dest = (op >> 16) & 0x1f; |
| 939 | | int base = (op >> 21) & 0x1f; |
| 940 | | int index = (op >> 7) & 0xf; |
| 941 | | int offset = (op & 0x7f); |
| 942 | | if (offset & 0x40) |
| 943 | | { |
| 944 | | offset |= 0xffffffc0; |
| 945 | | } |
| 946 | | |
| 947 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 948 | | |
| 949 | | int end = index + 2; |
| 950 | | for (int i = index; i < end; i++) |
| 951 | | { |
| 952 | | UINT16 value; |
| 953 | | EXTRACT16(m_xv[dest], value, (i >> 1)); |
| 954 | | value >>= (1 - (i & 1)) * 8; |
| 955 | | m_rsp.DM_WRITE8(ea, (UINT8)value); |
| 956 | | ea++; |
| 957 | | } |
| 958 | | } |
| 959 | | |
| 960 | | static void cfunc_ssv(void *param) |
| 961 | | { |
| 962 | | ((rsp_cop2 *)param)->ssv(); |
| 963 | | } |
| 964 | | |
| 965 | | |
| 966 | | // SLV |
| 967 | | // |
| 968 | | // 31 25 20 15 10 6 0 |
| 969 | | // -------------------------------------------------- |
| 970 | | // | 111010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 971 | | // -------------------------------------------------- |
| 972 | | // |
| 973 | | // Stores 4 bytes starting from vector byte index |
| 974 | | |
| 975 | | inline void rsp_cop2_simd::slv() |
| 976 | | { |
| 977 | | UINT32 op = m_op; |
| 978 | | int dest = (op >> 16) & 0x1f; |
| 979 | | int base = (op >> 21) & 0x1f; |
| 980 | | int index = (op >> 7) & 0xf; |
| 981 | | int offset = (op & 0x7f); |
| 982 | | if (offset & 0x40) |
| 983 | | { |
| 984 | | offset |= 0xffffffc0; |
| 985 | | } |
| 986 | | |
| 987 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 988 | | |
| 989 | | int end = index + 4; |
| 990 | | for (int i = index; i < end; i++) |
| 991 | | { |
| 992 | | UINT16 value; |
| 993 | | EXTRACT16(m_xv[dest], value, (i >> 1)); |
| 994 | | value >>= (1 - (i & 1)) * 8; |
| 995 | | m_rsp.DM_WRITE8(ea, (UINT8)value); |
| 996 | | ea++; |
| 997 | | } |
| 998 | | } |
| 999 | | |
| 1000 | | static void cfunc_slv(void *param) |
| 1001 | | { |
| 1002 | | ((rsp_cop2 *)param)->slv(); |
| 1003 | | } |
| 1004 | | |
| 1005 | | |
| 1006 | | // SDV |
| 1007 | | // |
| 1008 | | // 31 25 20 15 10 6 0 |
| 1009 | | // -------------------------------------------------- |
| 1010 | | // | 111010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 1011 | | // -------------------------------------------------- |
| 1012 | | // |
| 1013 | | // Stores 8 bytes starting from vector byte index |
| 1014 | | |
| 1015 | | inline void rsp_cop2_simd::sdv() |
| 1016 | | { |
| 1017 | | UINT32 op = m_op; |
| 1018 | | int dest = (op >> 16) & 0x1f; |
| 1019 | | int base = (op >> 21) & 0x1f; |
| 1020 | | int index = (op >> 7) & 0x8; |
| 1021 | | int offset = (op & 0x7f); |
| 1022 | | if (offset & 0x40) |
| 1023 | | { |
| 1024 | | offset |= 0xffffffc0; |
| 1025 | | } |
| 1026 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1027 | | |
| 1028 | | int end = index + 8; |
| 1029 | | for (int i = index; i < end; i++) |
| 1030 | | { |
| 1031 | | UINT16 value; |
| 1032 | | EXTRACT16(m_xv[dest], value, (i >> 1)); |
| 1033 | | value >>= (1 - (i & 1)) * 8; |
| 1034 | | m_rsp.DM_WRITE8(ea, (UINT8)value); |
| 1035 | | ea++; |
| 1036 | | } |
| 1037 | | } |
| 1038 | | |
| 1039 | | static void cfunc_sdv(void *param) |
| 1040 | | { |
| 1041 | | ((rsp_cop2 *)param)->sdv(); |
| 1042 | | } |
| 1043 | | |
| 1044 | | |
| 1045 | | // SQV |
| 1046 | | // |
| 1047 | | // 31 25 20 15 10 6 0 |
| 1048 | | // -------------------------------------------------- |
| 1049 | | // | 111010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 1050 | | // -------------------------------------------------- |
| 1051 | | // |
| 1052 | | // Stores up to 16 bytes starting from vector byte index until 16-byte boundary |
| 1053 | | |
| 1054 | | inline void rsp_cop2_simd::sqv() |
| 1055 | | { |
| 1056 | | UINT32 op = m_op; |
| 1057 | | int dest = (op >> 16) & 0x1f; |
| 1058 | | int base = (op >> 21) & 0x1f; |
| 1059 | | int index = (op >> 7) & 0xf; |
| 1060 | | int offset = (op & 0x7f); |
| 1061 | | if (offset & 0x40) |
| 1062 | | { |
| 1063 | | offset |= 0xffffffc0; |
| 1064 | | } |
| 1065 | | |
| 1066 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1067 | | int end = index + (16 - (ea & 0xf)); |
| 1068 | | for (int i=index; i < end; i++) |
| 1069 | | { |
| 1070 | | UINT16 value; |
| 1071 | | EXTRACT16(m_xv[dest], value, (i >> 1)); |
| 1072 | | value >>= (1-(i & 1)) * 8; |
| 1073 | | m_rsp.DM_WRITE8(ea, (UINT8)value); |
| 1074 | | ea++; |
| 1075 | | } |
| 1076 | | } |
| 1077 | | |
| 1078 | | static void cfunc_sqv(void *param) |
| 1079 | | { |
| 1080 | | ((rsp_cop2 *)param)->sqv(); |
| 1081 | | } |
| 1082 | | |
| 1083 | | |
| 1084 | | // SRV |
| 1085 | | // |
| 1086 | | // 31 25 20 15 10 6 0 |
| 1087 | | // -------------------------------------------------- |
| 1088 | | // | 111010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 1089 | | // -------------------------------------------------- |
| 1090 | | // |
| 1091 | | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 1092 | | |
| 1093 | | inline void rsp_cop2_simd::srv() |
| 1094 | | { |
| 1095 | | UINT32 op = m_op; |
| 1096 | | int dest = (op >> 16) & 0x1f; |
| 1097 | | int base = (op >> 21) & 0x1f; |
| 1098 | | int index = (op >> 7) & 0xf; |
| 1099 | | int offset = (op & 0x7f); |
| 1100 | | if (offset & 0x40) |
| 1101 | | { |
| 1102 | | offset |= 0xffffffc0; |
| 1103 | | } |
| 1104 | | |
| 1105 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1106 | | |
| 1107 | | int end = index + (ea & 0xf); |
| 1108 | | int o = (16 - (ea & 0xf)) & 0xf; |
| 1109 | | ea &= ~0xf; |
| 1110 | | |
| 1111 | | for (int i = index; i < end; i++) |
| 1112 | | { |
| 1113 | | UINT32 bi = (i + o) & 0xf; |
| 1114 | | UINT16 value; |
| 1115 | | EXTRACT16(m_xv[dest], value, (bi >> 1)); |
| 1116 | | value >>= (1-(bi & 1)) * 8; |
| 1117 | | m_rsp.DM_WRITE8(ea, (UINT8)value); |
| 1118 | | ea++; |
| 1119 | | } |
| 1120 | | } |
| 1121 | | |
| 1122 | | static void cfunc_srv(void *param) |
| 1123 | | { |
| 1124 | | ((rsp_cop2 *)param)->srv(); |
| 1125 | | } |
| 1126 | | |
| 1127 | | |
| 1128 | | // SPV |
| 1129 | | // |
| 1130 | | // 31 25 20 15 10 6 0 |
| 1131 | | // -------------------------------------------------- |
| 1132 | | // | 111010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 1133 | | // -------------------------------------------------- |
| 1134 | | // |
| 1135 | | // Stores upper 8 bits of each element |
| 1136 | | |
| 1137 | | inline void rsp_cop2_simd::spv() |
| 1138 | | { |
| 1139 | | UINT32 op = m_op; |
| 1140 | | int dest = (op >> 16) & 0x1f; |
| 1141 | | int base = (op >> 21) & 0x1f; |
| 1142 | | int index = (op >> 7) & 0xf; |
| 1143 | | int offset = (op & 0x7f); |
| 1144 | | if (offset & 0x40) |
| 1145 | | { |
| 1146 | | offset |= 0xffffffc0; |
| 1147 | | } |
| 1148 | | |
| 1149 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1150 | | int end = index + 8; |
| 1151 | | for (int i=index; i < end; i++) |
| 1152 | | { |
| 1153 | | if ((i & 0xf) < 8) |
| 1154 | | { |
| 1155 | | UINT16 value; |
| 1156 | | EXTRACT16(m_xv[dest], value, i); |
| 1157 | | m_rsp.DM_WRITE8(ea, (UINT8)(value >> 8)); |
| 1158 | | } |
| 1159 | | else |
| 1160 | | { |
| 1161 | | UINT16 value; |
| 1162 | | EXTRACT16(m_xv[dest], value, i); |
| 1163 | | m_rsp.DM_WRITE8(ea, (UINT8)(value >> 7)); |
| 1164 | | } |
| 1165 | | ea++; |
| 1166 | | } |
| 1167 | | } |
| 1168 | | |
| 1169 | | static void cfunc_spv(void *param) |
| 1170 | | { |
| 1171 | | ((rsp_cop2 *)param)->spv(); |
| 1172 | | } |
| 1173 | | |
| 1174 | | |
| 1175 | | // SUV |
| 1176 | | // |
| 1177 | | // 31 25 20 15 10 6 0 |
| 1178 | | // -------------------------------------------------- |
| 1179 | | // | 111010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 1180 | | // -------------------------------------------------- |
| 1181 | | // |
| 1182 | | // Stores bits 14-7 of each element |
| 1183 | | |
| 1184 | | inline void rsp_cop2_simd::suv() |
| 1185 | | { |
| 1186 | | UINT32 op = m_op; |
| 1187 | | int dest = (op >> 16) & 0x1f; |
| 1188 | | int base = (op >> 21) & 0x1f; |
| 1189 | | int index = (op >> 7) & 0xf; |
| 1190 | | int offset = (op & 0x7f); |
| 1191 | | if (offset & 0x40) |
| 1192 | | { |
| 1193 | | offset |= 0xffffffc0; |
| 1194 | | } |
| 1195 | | |
| 1196 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1197 | | int end = index + 8; |
| 1198 | | for (int i=index; i < end; i++) |
| 1199 | | { |
| 1200 | | if ((i & 0xf) < 8) |
| 1201 | | { |
| 1202 | | UINT16 value; |
| 1203 | | EXTRACT16(m_xv[dest], value, i); |
| 1204 | | m_rsp.DM_WRITE8(ea, (UINT8)(value >> 7)); |
| 1205 | | } |
| 1206 | | else |
| 1207 | | { |
| 1208 | | UINT16 value; |
| 1209 | | EXTRACT16(m_xv[dest], value, i); |
| 1210 | | m_rsp.DM_WRITE8(ea, (UINT8)(value >> 8)); |
| 1211 | | } |
| 1212 | | ea++; |
| 1213 | | } |
| 1214 | | } |
| 1215 | | |
| 1216 | | static void cfunc_suv(void *param) |
| 1217 | | { |
| 1218 | | ((rsp_cop2 *)param)->suv(); |
| 1219 | | } |
| 1220 | | |
| 1221 | | |
| 1222 | | // SHV |
| 1223 | | // |
| 1224 | | // 31 25 20 15 10 6 0 |
| 1225 | | // -------------------------------------------------- |
| 1226 | | // | 111010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 1227 | | // -------------------------------------------------- |
| 1228 | | // |
| 1229 | | // Stores bits 14-7 of each element, with 2-byte stride |
| 1230 | | |
| 1231 | | inline void rsp_cop2_simd::shv() |
| 1232 | | { |
| 1233 | | UINT32 op = m_op; |
| 1234 | | int dest = (op >> 16) & 0x1f; |
| 1235 | | int base = (op >> 21) & 0x1f; |
| 1236 | | int index = (op >> 7) & 0xf; |
| 1237 | | int offset = (op & 0x7f); |
| 1238 | | if (offset & 0x40) |
| 1239 | | { |
| 1240 | | offset |= 0xffffffc0; |
| 1241 | | } |
| 1242 | | |
| 1243 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1244 | | for (int i=0; i < 8; i++) |
| 1245 | | { |
| 1246 | | int element = index + (i << 1); |
| 1247 | | UINT16 value; |
| 1248 | | EXTRACT16(m_xv[dest], value, element >> 1); |
| 1249 | | m_rsp.DM_WRITE8(ea, (value >> 7) & 0x00ff); |
| 1250 | | ea += 2; |
| 1251 | | } |
| 1252 | | } |
| 1253 | | |
| 1254 | | static void cfunc_shv(void *param) |
| 1255 | | { |
| 1256 | | ((rsp_cop2 *)param)->shv(); |
| 1257 | | } |
| 1258 | | |
| 1259 | | |
| 1260 | | // SFV |
| 1261 | | // |
| 1262 | | // 31 25 20 15 10 6 0 |
| 1263 | | // -------------------------------------------------- |
| 1264 | | // | 111010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 1265 | | // -------------------------------------------------- |
| 1266 | | // |
| 1267 | | // Stores bits 14-7 of upper or lower quad, with 4-byte stride |
| 1268 | | |
| 1269 | | inline void rsp_cop2_simd::sfv() |
| 1270 | | { |
| 1271 | | UINT32 op = m_op; |
| 1272 | | int dest = (op >> 16) & 0x1f; |
| 1273 | | int base = (op >> 21) & 0x1f; |
| 1274 | | int index = (op >> 7) & 0xf; |
| 1275 | | int offset = (op & 0x7f); |
| 1276 | | if (offset & 0x40) |
| 1277 | | { |
| 1278 | | offset |= 0xffffffc0; |
| 1279 | | } |
| 1280 | | |
| 1281 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1282 | | int eaoffset = ea & 0xf; |
| 1283 | | ea &= ~0xf; |
| 1284 | | |
| 1285 | | int end = (index >> 1) + 4; |
| 1286 | | |
| 1287 | | for (int i = index>>1; i < end; i++) |
| 1288 | | { |
| 1289 | | UINT16 value; |
| 1290 | | EXTRACT16(m_xv[dest], value, i); |
| 1291 | | m_rsp.DM_WRITE8(ea + (eaoffset & 0xf), (value >> 7) & 0x00ff); |
| 1292 | | eaoffset += 4; |
| 1293 | | } |
| 1294 | | } |
| 1295 | | |
| 1296 | | static void cfunc_sfv(void *param) |
| 1297 | | { |
| 1298 | | ((rsp_cop2 *)param)->sfv(); |
| 1299 | | } |
| 1300 | | |
| 1301 | | |
| 1302 | | // SWV |
| 1303 | | // |
| 1304 | | // 31 25 20 15 10 6 0 |
| 1305 | | // -------------------------------------------------- |
| 1306 | | // | 111010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 1307 | | // -------------------------------------------------- |
| 1308 | | // |
| 1309 | | // Stores the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 1310 | | // after byte index 15 |
| 1311 | | |
| 1312 | | inline void rsp_cop2_simd::swv() |
| 1313 | | { |
| 1314 | | UINT32 op = m_op; |
| 1315 | | int dest = (op >> 16) & 0x1f; |
| 1316 | | int base = (op >> 21) & 0x1f; |
| 1317 | | int index = (op >> 7) & 0xf; |
| 1318 | | int offset = (op & 0x7f); |
| 1319 | | if (offset & 0x40) |
| 1320 | | { |
| 1321 | | offset |= 0xffffffc0; |
| 1322 | | } |
| 1323 | | |
| 1324 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1325 | | int eaoffset = ea & 0xf; |
| 1326 | | ea &= ~0xf; |
| 1327 | | |
| 1328 | | int end = index + 16; |
| 1329 | | for (int i = index; i < end; i++) |
| 1330 | | { |
| 1331 | | UINT16 value; |
| 1332 | | EXTRACT16(m_xv[dest], value, i >> 1); |
| 1333 | | m_rsp.DM_WRITE8(ea + (eaoffset & 0xf), (value >> ((1-(i & 1)) * 8)) & 0xff); |
| 1334 | | eaoffset++; |
| 1335 | | } |
| 1336 | | } |
| 1337 | | |
| 1338 | | static void cfunc_swv(void *param) |
| 1339 | | { |
| 1340 | | ((rsp_cop2 *)param)->swv(); |
| 1341 | | } |
| 1342 | | |
| 1343 | | |
| 1344 | | // STV |
| 1345 | | // |
| 1346 | | // 31 25 20 15 10 6 0 |
| 1347 | | // -------------------------------------------------- |
| 1348 | | // | 111010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 1349 | | // -------------------------------------------------- |
| 1350 | | // |
| 1351 | | // Stores one element from maximum of 8 vectors, while incrementing element index |
| 1352 | | |
| 1353 | | inline void rsp_cop2_simd::stv() |
| 1354 | | { |
| 1355 | | UINT32 op = m_op; |
| 1356 | | int dest = (op >> 16) & 0x1f; |
| 1357 | | int base = (op >> 21) & 0x1f; |
| 1358 | | int index = (op >> 7) & 0xf; |
| 1359 | | int offset = (op & 0x7f); |
| 1360 | | |
| 1361 | | if (offset & 0x40) |
| 1362 | | { |
| 1363 | | offset |= 0xffffffc0; |
| 1364 | | } |
| 1365 | | |
| 1366 | | int vs = dest; |
| 1367 | | int ve = dest + 8; |
| 1368 | | if (ve > 32) |
| 1369 | | { |
| 1370 | | ve = 32; |
| 1371 | | } |
| 1372 | | |
| 1373 | | int element = 8 - (index >> 1); |
| 1374 | | |
| 1375 | | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1376 | | int eaoffset = (ea & 0xf) + (element * 2); |
| 1377 | | ea &= ~0xf; |
| 1378 | | |
| 1379 | | for (int i = vs; i < ve; i++) |
| 1380 | | { |
| 1381 | | UINT16 value; |
| 1382 | | EXTRACT16(m_xv[i], value, element); |
| 1383 | | m_rsp.DM_WRITE16(ea + (eaoffset & 0xf), value); |
| 1384 | | eaoffset += 2; |
| 1385 | | element++; |
| 1386 | | } |
| 1387 | | } |
| 1388 | | |
| 1389 | | static void cfunc_stv(void *param) |
| 1390 | | { |
| 1391 | | ((rsp_cop2 *)param)->stv(); |
| 1392 | | } |
| 1393 | | |
| 1394 | | |
| 1395 | | /*************************************************************************** |
| 1396 | | SIMD Accelerators |
| 1397 | | ***************************************************************************/ |
| 1398 | | |
| 1399 | | /* ============================================================================ |
| 1400 | | * RSPPackLo32to16: Pack LSBs of 32-bit vectors to 16-bits without saturation. |
| 1401 | | * TODO: 5 SSE2 operations is kind of expensive just to truncate values? |
| 1402 | | * ========================================================================= */ |
| 1403 | | INLINE __m128i RSPPackLo32to16(__m128i vectorLow, __m128i vectorHigh) |
| 1404 | | { |
| 1405 | | vectorLow = _mm_slli_epi32(vectorLow, 16); |
| 1406 | | vectorHigh = _mm_slli_epi32(vectorHigh, 16); |
| 1407 | | vectorLow = _mm_srai_epi32(vectorLow, 16); |
| 1408 | | vectorHigh = _mm_srai_epi32(vectorHigh, 16); |
| 1409 | | return _mm_packs_epi32(vectorLow, vectorHigh); |
| 1410 | | } |
| 1411 | | |
| 1412 | | /* ============================================================================ |
| 1413 | | * RSPPackHi32to16: Pack MSBs of 32-bit vectors to 16-bits without saturation. |
| 1414 | | * ========================================================================= */ |
| 1415 | | INLINE __m128i RSPPackHi32to16(__m128i vectorLow, __m128i vectorHigh) |
| 1416 | | { |
| 1417 | | vectorLow = _mm_srai_epi32(vectorLow, 16); |
| 1418 | | vectorHigh = _mm_srai_epi32(vectorHigh, 16); |
| 1419 | | return _mm_packs_epi32(vectorLow, vectorHigh); |
| 1420 | | } |
| 1421 | | |
| 1422 | | /* ============================================================================ |
| 1423 | | * RSPSignExtend16to32: Sign-extend 16-bit slices to 32-bit slices. |
| 1424 | | * ========================================================================= */ |
| 1425 | | INLINE void RSPSignExtend16to32(__m128i source, __m128i *vectorLow, __m128i *vectorHigh) |
| 1426 | | { |
| 1427 | | __m128i vMask = _mm_srai_epi16(source, 15); |
| 1428 | | *vectorHigh = _mm_unpackhi_epi16(source, vMask); |
| 1429 | | *vectorLow = _mm_unpacklo_epi16(source, vMask); |
| 1430 | | } |
| 1431 | | |
| 1432 | | /* ============================================================================ |
| 1433 | | * RSPZeroExtend16to32: Zero-extend 16-bit slices to 32-bit slices. |
| 1434 | | * ========================================================================= */ |
| 1435 | | INLINE void RSPZeroExtend16to32(__m128i source, __m128i *vectorLow, __m128i *vectorHigh) |
| 1436 | | { |
| 1437 | | *vectorHigh = _mm_unpackhi_epi16(source, _mm_setzero_si128()); |
| 1438 | | *vectorLow = _mm_unpacklo_epi16(source, _mm_setzero_si128()); |
| 1439 | | } |
| 1440 | | |
| 1441 | | /* ============================================================================ |
| 1442 | | * _mm_mullo_epi32: SSE2 lacks _mm_mullo_epi32, define it manually. |
| 1443 | | * TODO/WARNING/DISCLAIMER: Assumes one argument is positive. |
| 1444 | | * ========================================================================= */ |
| 1445 | | INLINE __m128i _mm_mullo_epi32(__m128i a, __m128i b) |
| 1446 | | { |
| 1447 | | __m128i a4 = _mm_srli_si128(a, 4); |
| 1448 | | __m128i b4 = _mm_srli_si128(b, 4); |
| 1449 | | __m128i ba = _mm_mul_epu32(b, a); |
| 1450 | | __m128i b4a4 = _mm_mul_epu32(b4, a4); |
| 1451 | | |
| 1452 | | __m128i mask = _mm_setr_epi32(~0, 0, ~0, 0); |
| 1453 | | __m128i baMask = _mm_and_si128(ba, mask); |
| 1454 | | __m128i b4a4Mask = _mm_and_si128(b4a4, mask); |
| 1455 | | __m128i b4a4MaskShift = _mm_slli_si128(b4a4Mask, 4); |
| 1456 | | |
| 1457 | | return _mm_or_si128(baMask, b4a4MaskShift); |
| 1458 | | } |
| 1459 | | |
| 1460 | | /* ============================================================================ |
| 1461 | | * RSPClampLowToVal: Clamps the low word of the accumulator. |
| 1462 | | * ========================================================================= */ |
| 1463 | | INLINE __m128i RSPClampLowToVal(__m128i vaccLow, __m128i vaccMid, __m128i vaccHigh) |
| 1464 | | { |
| 1465 | | __m128i setMask = _mm_cmpeq_epi16(_mm_setzero_si128(), _mm_setzero_si128()); |
| 1466 | | __m128i negCheck, useValMask, negVal, posVal; |
| 1467 | | |
| 1468 | | /* Compute some common values ahead of time. */ |
| 1469 | | negCheck = _mm_cmplt_epi16(vaccHigh, _mm_setzero_si128()); |
| 1470 | | |
| 1471 | | /* If accmulator < 0, clamp to val if val != TMin. */ |
| 1472 | | useValMask = _mm_and_si128(vaccHigh, _mm_srai_epi16(vaccMid, 15)); |
| 1473 | | useValMask = _mm_cmpeq_epi16(useValMask, setMask); |
| 1474 | | negVal = _mm_and_si128(useValMask, vaccLow); |
| 1475 | | |
| 1476 | | /* Otherwise, clamp to ~0 if any high bits are set. */ |
| 1477 | | useValMask = _mm_or_si128(vaccHigh, _mm_srai_epi16(vaccMid, 15)); |
| 1478 | | useValMask = _mm_cmpeq_epi16(useValMask, _mm_setzero_si128()); |
| 1479 | | posVal = _mm_and_si128(useValMask, vaccLow); |
| 1480 | | |
| 1481 | | negVal = _mm_and_si128(negCheck, negVal); |
| 1482 | | posVal = _mm_andnot_si128(negCheck, posVal); |
| 1483 | | return _mm_or_si128(negVal, posVal); |
| 1484 | | } |
| 1485 | | |
| 1486 | | |
| 1487 | | /*************************************************************************** |
| 1488 | | Vector Opcodes |
| 1489 | | ***************************************************************************/ |
| 1490 | | |
| 1491 | | // VMULF |
| 1492 | | // |
| 1493 | | // 31 25 24 20 15 10 5 0 |
| 1494 | | // ------------------------------------------------------ |
| 1495 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000000 | |
| 1496 | | // ------------------------------------------------------ |
| 1497 | | // |
| 1498 | | // Multiplies signed integer by signed integer * 2 |
| 1499 | | |
| 1500 | | inline void rsp_cop2_simd::vmulf() |
| 1501 | | { |
| 1502 | | int op = m_op; |
| 1503 | | |
| 1504 | | for (int i = 0; i < 8; i++) |
| 1505 | | { |
| 1506 | | UINT16 w1, w2; |
| 1507 | | GET_VS1(w1, i); |
| 1508 | | GET_VS2(w2, i); |
| 1509 | | INT32 s1 = (INT32)(INT16)w1; |
| 1510 | | INT32 s2 = (INT32)(INT16)w2; |
| 1511 | | |
| 1512 | | if (s1 == -32768 && s2 == -32768) |
| 1513 | | { |
| 1514 | | // overflow |
| 1515 | | SET_ACCUM_H(0, i); |
| 1516 | | SET_ACCUM_M(-32768, i); |
| 1517 | | SET_ACCUM_L(-32768, i); |
| 1518 | | m_vres[i] = 0x7fff; |
| 1519 | | } |
| 1520 | | else |
| 1521 | | { |
| 1522 | | INT64 r = s1 * s2 * 2; |
| 1523 | | r += 0x8000; // rounding ? |
| 1524 | | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); |
| 1525 | | SET_ACCUM_M((INT16)(r >> 16), i); |
| 1526 | | SET_ACCUM_L((UINT16)(r), i); |
| 1527 | | m_vres[i] = ACCUM_M(i); |
| 1528 | | } |
| 1529 | | } |
| 1530 | | WRITEBACK_RESULT(); |
| 1531 | | } |
| 1532 | | |
| 1533 | | static void cfunc_vmulf(void *param) |
| 1534 | | { |
| 1535 | | ((rsp_cop2 *)param)->vmulf(); |
| 1536 | | } |
| 1537 | | |
| 1538 | | |
| 1539 | | // VMULU |
| 1540 | | // |
| 1541 | | // 31 25 24 20 15 10 5 0 |
| 1542 | | // ------------------------------------------------------ |
| 1543 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000001 | |
| 1544 | | // ------------------------------------------------------ |
| 1545 | | // |
| 1546 | | |
| 1547 | | inline void rsp_cop2_simd::vmulu() |
| 1548 | | { |
| 1549 | | int op = m_op; |
| 1550 | | |
| 1551 | | for (int i = 0; i < 8; i++) |
| 1552 | | { |
| 1553 | | UINT16 w1, w2; |
| 1554 | | GET_VS1(w1, i); |
| 1555 | | GET_VS2(w2, i); |
| 1556 | | INT32 s1 = (INT32)(INT16)w1; |
| 1557 | | INT32 s2 = (INT32)(INT16)w2; |
| 1558 | | |
| 1559 | | INT64 r = s1 * s2 * 2; |
| 1560 | | r += 0x8000; // rounding ? |
| 1561 | | |
| 1562 | | SET_ACCUM_H((UINT16)(r >> 32), i); |
| 1563 | | SET_ACCUM_M((UINT16)(r >> 16), i); |
| 1564 | | SET_ACCUM_L((UINT16)(r), i); |
| 1565 | | |
| 1566 | | if (r < 0) |
| 1567 | | { |
| 1568 | | m_vres[i] = 0; |
| 1569 | | } |
| 1570 | | else if (((INT16)(ACCUM_H(i)) ^ (INT16)(ACCUM_M(i))) < 0) |
| 1571 | | { |
| 1572 | | m_vres[i] = -1; |
| 1573 | | } |
| 1574 | | else |
| 1575 | | { |
| 1576 | | m_vres[i] = ACCUM_M(i); |
| 1577 | | } |
| 1578 | | } |
| 1579 | | WRITEBACK_RESULT(); |
| 1580 | | } |
| 1581 | | |
| 1582 | | static void cfunc_vmulu(void *param) |
| 1583 | | { |
| 1584 | | ((rsp_cop2 *)param)->vmulu(); |
| 1585 | | } |
| 1586 | | |
| 1587 | | |
| 1588 | | // VMUDL |
| 1589 | | // |
| 1590 | | // 31 25 24 20 15 10 5 0 |
| 1591 | | // ------------------------------------------------------ |
| 1592 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 | |
| 1593 | | // ------------------------------------------------------ |
| 1594 | | // |
| 1595 | | // Multiplies signed integer by unsigned fraction |
| 1596 | | // The result is added into accumulator |
| 1597 | | // The middle slice of accumulator is stored into destination element |
| 1598 | | |
| 1599 | | inline void rsp_cop2_simd::vmudl() |
| 1600 | | { |
| 1601 | | int op = m_op; |
| 1602 | | |
| 1603 | | __m128i vsReg = m_xv[VS1REG]; |
| 1604 | | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 1605 | | |
| 1606 | | /* Unpack to obtain for 32-bit precision. */ |
| 1607 | | __m128i unpackLo = _mm_mullo_epi16(vsReg, vtReg); |
| 1608 | | __m128i unpackHi = _mm_mulhi_epu16(vsReg, vtReg); |
| 1609 | | __m128i loProduct = _mm_unpacklo_epi16(unpackLo, unpackHi); |
| 1610 | | __m128i hiProduct = _mm_unpackhi_epi16(unpackLo, unpackHi); |
| 1611 | | |
| 1612 | | m_xv[VDREG] = m_accum_l = RSPPackHi32to16(loProduct, hiProduct); |
| 1613 | | |
| 1614 | | m_accum_m = _mm_setzero_si128(); |
| 1615 | | m_accum_h = _mm_setzero_si128(); |
| 1616 | | } |
| 1617 | | |
| 1618 | | static void cfunc_vmudl(void *param) |
| 1619 | | { |
| 1620 | | ((rsp_cop2 *)param)->vmudl(); |
| 1621 | | } |
| 1622 | | |
| 1623 | | |
| 1624 | | // VMUDM |
| 1625 | | // |
| 1626 | | // 31 25 24 20 15 10 5 0 |
| 1627 | | // ------------------------------------------------------ |
| 1628 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000101 | |
| 1629 | | // ------------------------------------------------------ |
| 1630 | | // |
| 1631 | | // Multiplies signed integer by unsigned fraction |
| 1632 | | // The result is stored into accumulator |
| 1633 | | // The middle slice of accumulator is stored into destination element |
| 1634 | | |
| 1635 | | inline void rsp_cop2_simd::vmudm() |
| 1636 | | { |
| 1637 | | int op = m_op; |
| 1638 | | |
| 1639 | | __m128i vsRegLo, vsRegHi, vtRegLo, vtRegHi; |
| 1640 | | |
| 1641 | | __m128i vsReg = m_xv[VS1REG]; |
| 1642 | | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 1643 | | |
| 1644 | | /* Unpack to obtain for 32-bit precision. */ |
| 1645 | | RSPSignExtend16to32(vsReg, &vsRegLo, &vsRegHi); |
| 1646 | | RSPZeroExtend16to32(vtReg, &vtRegLo, &vtRegHi); |
| 1647 | | |
| 1648 | | /* Begin accumulating the products. */ |
| 1649 | | __m128i loProduct = _mm_mullo_epi32(vsRegLo, vtRegLo); |
| 1650 | | __m128i hiProduct = _mm_mullo_epi32(vsRegHi, vtRegHi); |
| 1651 | | m_accum_l = RSPPackLo32to16(loProduct, hiProduct); |
| 1652 | | m_accum_m = m_xv[VDREG] = RSPPackHi32to16(loProduct, hiProduct); |
| 1653 | | |
| 1654 | | loProduct = _mm_cmplt_epi32(loProduct, _mm_setzero_si128()); |
| 1655 | | hiProduct = _mm_cmplt_epi32(hiProduct, _mm_setzero_si128()); |
| 1656 | | m_accum_h = _mm_packs_epi32(loProduct, hiProduct); |
| 1657 | | } |
| 1658 | | |
| 1659 | | static void cfunc_vmudm(void *param) |
| 1660 | | { |
| 1661 | | ((rsp_cop2 *)param)->vmudm(); |
| 1662 | | } |
| 1663 | | |
| 1664 | | |
| 1665 | | // VMUDN |
| 1666 | | // |
| 1667 | | // 31 25 24 20 15 10 5 0 |
| 1668 | | // ------------------------------------------------------ |
| 1669 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000110 | |
| 1670 | | // ------------------------------------------------------ |
| 1671 | | // |
| 1672 | | // Multiplies unsigned fraction by signed integer |
| 1673 | | // The result is stored into accumulator |
| 1674 | | // The low slice of accumulator is stored into destination element |
| 1675 | | |
| 1676 | | inline void rsp_cop2_simd::vmudn() |
| 1677 | | { |
| 1678 | | int op = m_op; |
| 1679 | | |
| 1680 | | __m128i vsRegLo, vsRegHi, vtRegLo, vtRegHi; |
| 1681 | | |
| 1682 | | __m128i vsReg = m_xv[VS1REG]; |
| 1683 | | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 1684 | | |
| 1685 | | /* Unpack to obtain for 32-bit precision. */ |
| 1686 | | RSPZeroExtend16to32(vsReg, &vsRegLo, &vsRegHi); |
| 1687 | | RSPSignExtend16to32(vtReg, &vtRegLo, &vtRegHi); |
| 1688 | | |
| 1689 | | /* Begin accumulating the products. */ |
| 1690 | | __m128i loProduct = _mm_mullo_epi32(vsRegLo, vtRegLo); |
| 1691 | | __m128i hiProduct = _mm_mullo_epi32(vsRegHi, vtRegHi); |
| 1692 | | m_xv[VDREG] = m_accum_l = RSPPackLo32to16(loProduct, hiProduct); |
| 1693 | | m_accum_m = RSPPackHi32to16(loProduct, hiProduct); |
| 1694 | | m_accum_h = _mm_cmplt_epi16(m_accum_m, _mm_setzero_si128()); |
| 1695 | | } |
| 1696 | | |
| 1697 | | static void cfunc_vmudn(void *param) |
| 1698 | | { |
| 1699 | | ((rsp_cop2 *)param)->vmudn(); |
| 1700 | | } |
| 1701 | | |
| 1702 | | |
| 1703 | | // VMUDH |
| 1704 | | // |
| 1705 | | // 31 25 24 20 15 10 5 0 |
| 1706 | | // ------------------------------------------------------ |
| 1707 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000111 | |
| 1708 | | // ------------------------------------------------------ |
| 1709 | | // |
| 1710 | | // Multiplies signed integer by signed integer |
| 1711 | | // The result is stored into highest 32 bits of accumulator, the low slice is zero |
| 1712 | | // The highest 32 bits of accumulator is saturated into destination element |
| 1713 | | |
| 1714 | | inline void rsp_cop2_simd::vmudh() |
| 1715 | | { |
| 1716 | | int op = m_op; |
| 1717 | | |
| 1718 | | __m128i vaccLow, vaccHigh; |
| 1719 | | __m128i unpackLo, unpackHi; |
| 1720 | | |
| 1721 | | __m128i vsReg = m_xv[VS1REG]; |
| 1722 | | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 1723 | | |
| 1724 | | /* Multiply the sources, accumulate the product. */ |
| 1725 | | unpackLo = _mm_mullo_epi16(vsReg, vtReg); |
| 1726 | | unpackHi = _mm_mulhi_epi16(vsReg, vtReg); |
| 1727 | | vaccHigh = _mm_unpackhi_epi16(unpackLo, unpackHi); |
| 1728 | | vaccLow = _mm_unpacklo_epi16(unpackLo, unpackHi); |
| 1729 | | |
| 1730 | | /* Pack the accumulator and result back up. */ |
| 1731 | | m_xv[VDREG] = _mm_packs_epi32(vaccLow, vaccHigh); |
| 1732 | | m_accum_l = _mm_setzero_si128(); |
| 1733 | | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 1734 | | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 1735 | | } |
| 1736 | | |
| 1737 | | static void cfunc_vmudh(void *param) |
| 1738 | | { |
| 1739 | | ((rsp_cop2 *)param)->vmudh(); |
| 1740 | | } |
| 1741 | | |
| 1742 | | |
| 1743 | | // VMACF |
| 1744 | | // |
| 1745 | | // 31 25 24 20 15 10 5 0 |
| 1746 | | // ------------------------------------------------------ |
| 1747 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001000 | |
| 1748 | | // ------------------------------------------------------ |
| 1749 | | // |
| 1750 | | |
| 1751 | | inline void rsp_cop2_simd::vmacf() |
| 1752 | | { |
| 1753 | | int op = m_op; |
| 1754 | | |
| 1755 | | for (int i = 0; i < 8; i++) |
| 1756 | | { |
| 1757 | | UINT16 w1, w2; |
| 1758 | | GET_VS1(w1, i); |
| 1759 | | GET_VS2(w2, i); |
| 1760 | | INT32 s1 = (INT32)(INT16)w1; |
| 1761 | | INT32 s2 = (INT32)(INT16)w2; |
| 1762 | | |
| 1763 | | INT32 r = s1 * s2; |
| 1764 | | |
| 1765 | | UINT64 q = (UINT64)(UINT16)ACCUM_LL(i); |
| 1766 | | q |= (((UINT64)(UINT16)ACCUM_L(i)) << 16); |
| 1767 | | q |= (((UINT64)(UINT16)ACCUM_M(i)) << 32); |
| 1768 | | q |= (((UINT64)(UINT16)ACCUM_H(i)) << 48); |
| 1769 | | |
| 1770 | | q += (INT64)(r) << 17; |
| 1771 | | SET_ACCUM_LL((UINT16)q, i); |
| 1772 | | SET_ACCUM_L((UINT16)(q >> 16), i); |
| 1773 | | SET_ACCUM_M((UINT16)(q >> 32), i); |
| 1774 | | SET_ACCUM_H((UINT16)(q >> 48), i); |
| 1775 | | |
| 1776 | | m_vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1777 | | } |
| 1778 | | WRITEBACK_RESULT(); |
| 1779 | | } |
| 1780 | | |
| 1781 | | static void cfunc_vmacf(void *param) |
| 1782 | | { |
| 1783 | | ((rsp_cop2 *)param)->vmacf(); |
| 1784 | | } |
| 1785 | | |
| 1786 | | |
| 1787 | | // VMACU |
| 1788 | | // |
| 1789 | | // 31 25 24 20 15 10 5 0 |
| 1790 | | // ------------------------------------------------------ |
| 1791 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001001 | |
| 1792 | | // ------------------------------------------------------ |
| 1793 | | // |
| 1794 | | |
| 1795 | | inline void rsp_cop2_simd::vmacu() |
| 1796 | | { |
| 1797 | | int op = m_op; |
| 1798 | | |
| 1799 | | __m128i loProduct, hiProduct, unpackLo, unpackHi; |
| 1800 | | __m128i vaccHigh; |
| 1801 | | __m128i vdReg, vdRegLo, vdRegHi; |
| 1802 | | |
| 1803 | | __m128i vsReg = m_xv[VS1REG]; |
| 1804 | | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 1805 | | |
| 1806 | | __m128i vaccLow = m_accum_l; |
| 1807 | | |
| 1808 | | /* Unpack to obtain for 32-bit precision. */ |
| 1809 | | RSPZeroExtend16to32(vaccLow, &vaccLow, &vaccHigh); |
| 1810 | | |
| 1811 | | /* Begin accumulating the products. */ |
| 1812 | | unpackLo = _mm_mullo_epi16(vsReg, vtReg); |
| 1813 | | unpackHi = _mm_mulhi_epi16(vsReg, vtReg); |
| 1814 | | loProduct = _mm_unpacklo_epi16(unpackLo, unpackHi); |
| 1815 | | hiProduct = _mm_unpackhi_epi16(unpackLo, unpackHi); |
| 1816 | | loProduct = _mm_slli_epi32(loProduct, 1); |
| 1817 | | hiProduct = _mm_slli_epi32(hiProduct, 1); |
| 1818 | | |
| 1819 | | vdRegLo = _mm_srli_epi32(loProduct, 16); |
| 1820 | | vdRegHi = _mm_srli_epi32(hiProduct, 16); |
| 1821 | | vdRegLo = _mm_slli_epi32(vdRegLo, 16); |
| 1822 | | vdRegHi = _mm_slli_epi32(vdRegHi, 16); |
| 1823 | | vdRegLo = _mm_xor_si128(vdRegLo, loProduct); |
| 1824 | | vdRegHi = _mm_xor_si128(vdRegHi, hiProduct); |
| 1825 | | |
| 1826 | | vaccLow = _mm_add_epi32(vaccLow, vdRegLo); |
| 1827 | | vaccHigh = _mm_add_epi32(vaccHigh, vdRegHi); |
| 1828 | | |
| 1829 | | m_accum_l = vdReg = RSPPackLo32to16(vaccLow, vaccHigh); |
| 1830 | | |
| 1831 | | /* Multiply the MSB of sources, accumulate the product. */ |
| 1832 | | vdRegLo = _mm_unpacklo_epi16(m_accum_m, m_accum_h); |
| 1833 | | vdRegHi = _mm_unpackhi_epi16(m_accum_m, m_accum_h); |
| 1834 | | |
| 1835 | | loProduct = _mm_srai_epi32(loProduct, 16); |
| 1836 | | hiProduct = _mm_srai_epi32(hiProduct, 16); |
| 1837 | | vaccLow = _mm_srai_epi32(vaccLow, 16); |
| 1838 | | vaccHigh = _mm_srai_epi32(vaccHigh, 16); |
| 1839 | | |
| 1840 | | vaccLow = _mm_add_epi32(loProduct, vaccLow); |
| 1841 | | vaccHigh = _mm_add_epi32(hiProduct, vaccHigh); |
| 1842 | | vaccLow = _mm_add_epi32(vdRegLo, vaccLow); |
| 1843 | | vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh); |
| 1844 | | |
| 1845 | | /* Clamp the accumulator and write it all out. */ |
| 1846 | | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 1847 | | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 1848 | | } |
| 1849 | | |
| 1850 | | static void cfunc_vmacu(void *param) |
| 1851 | | { |
| 1852 | | ((rsp_cop2 *)param)->vmacu(); |
| 1853 | | } |
| 1854 | | |
| 1855 | | |
| 1856 | | // VMADL |
| 1857 | | // |
| 1858 | | // 31 25 24 20 15 10 5 0 |
| 1859 | | // ------------------------------------------------------ |
| 1860 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001100 | |
| 1861 | | // ------------------------------------------------------ |
| 1862 | | // |
| 1863 | | // Multiplies unsigned fraction by unsigned fraction |
| 1864 | | // Adds the higher 16 bits of the 32-bit result to accumulator |
| 1865 | | // The low slice of accumulator is stored into destination element |
| 1866 | | |
| 1867 | | inline void rsp_cop2_simd::vmadl() |
| 1868 | | { |
| 1869 | | int op = m_op; |
| 1870 | | |
| 1871 | | for (int i = 0; i < 8; i++) |
| 1872 | | { |
| 1873 | | UINT16 w1, w2; |
| 1874 | | GET_VS1(w1, i); |
| 1875 | | GET_VS2(w2, i); |
| 1876 | | UINT32 s1 = w1; |
| 1877 | | UINT32 s2 = w2; |
| 1878 | | |
| 1879 | | UINT32 r1 = s1 * s2; |
| 1880 | | UINT32 r2 = (UINT16)ACCUM_L(i) + (r1 >> 16); |
| 1881 | | UINT32 r3 = (UINT16)ACCUM_M(i) + (r2 >> 16); |
| 1882 | | |
| 1883 | | SET_ACCUM_L((UINT16)r2, i); |
| 1884 | | SET_ACCUM_M((UINT16)r3, i); |
| 1885 | | SET_ACCUM_H(ACCUM_H(i) + (INT16)(r3 >> 16), i); |
| 1886 | | |
| 1887 | | m_vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 1888 | | } |
| 1889 | | WRITEBACK_RESULT(); |
| 1890 | | } |
| 1891 | | |
| 1892 | | static void cfunc_vmadl(void *param) |
| 1893 | | { |
| 1894 | | ((rsp_cop2 *)param)->vmadl(); |
| 1895 | | } |
| 1896 | | |
| 1897 | | |
| 1898 | | // VMADM |
| 1899 | | // |
| 1900 | | // 31 25 24 20 15 10 5 0 |
| 1901 | | // ------------------------------------------------------ |
| 1902 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 | |
| 1903 | | // ------------------------------------------------------ |
| 1904 | | // |
| 1905 | | // Multiplies signed fraction by unsigned fraction |
| 1906 | | // Adds the higher 16 bits of the 32-bit result to accumulator |
| 1907 | | // The medium slice of accumulator is stored into destination element |
| 1908 | | |
| 1909 | | inline void rsp_cop2_simd::vmadm() |
| 1910 | | { |
| 1911 | | int op = m_op; |
| 1912 | | |
| 1913 | | __m128i vaccLow, vaccHigh, loProduct, hiProduct; |
| 1914 | | __m128i vsRegLo, vsRegHi, vtRegLo, vtRegHi, vdRegLo, vdRegHi; |
| 1915 | | |
| 1916 | | __m128i vsReg = m_xv[VS1REG]; |
| 1917 | | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 1918 | | |
| 1919 | | /* Unpack to obtain for 32-bit precision. */ |
| 1920 | | RSPSignExtend16to32(vsReg, &vsRegLo, &vsRegHi); |
| 1921 | | RSPZeroExtend16to32(vtReg, &vtRegLo, &vtRegHi); |
| 1922 | | RSPZeroExtend16to32(m_accum_l, &vaccLow, &vaccHigh); |
| 1923 | | |
| 1924 | | /* Begin accumulating the products. */ |
| 1925 | | loProduct = _mm_mullo_epi32(vsRegLo, vtRegLo); |
| 1926 | | hiProduct = _mm_mullo_epi32(vsRegHi, vtRegHi); |
| 1927 | | |
| 1928 | | vdRegLo = _mm_srli_epi32(loProduct, 16); |
| 1929 | | vdRegHi = _mm_srli_epi32(hiProduct, 16); |
| 1930 | | vdRegLo = _mm_slli_epi32(vdRegLo, 16); |
| 1931 | | vdRegHi = _mm_slli_epi32(vdRegHi, 16); |
| 1932 | | vdRegLo = _mm_xor_si128(vdRegLo, loProduct); |
| 1933 | | vdRegHi = _mm_xor_si128(vdRegHi, hiProduct); |
| 1934 | | vaccLow = _mm_add_epi32(vaccLow, vdRegLo); |
| 1935 | | vaccHigh = _mm_add_epi32(vaccHigh, vdRegHi); |
| 1936 | | |
| 1937 | | m_accum_l = m_xv[VDREG] = RSPPackLo32to16(vaccLow, vaccHigh); |
| 1938 | | |
| 1939 | | /* Multiply the MSB of sources, accumulate the product. */ |
| 1940 | | vdRegLo = _mm_unpacklo_epi16(m_accum_m, m_accum_h); |
| 1941 | | vdRegHi = _mm_unpackhi_epi16(m_accum_m, m_accum_h); |
| 1942 | | |
| 1943 | | loProduct = _mm_srai_epi32(loProduct, 16); |
| 1944 | | hiProduct = _mm_srai_epi32(hiProduct, 16); |
| 1945 | | vaccLow = _mm_srai_epi32(vaccLow, 16); |
| 1946 | | vaccHigh = _mm_srai_epi32(vaccHigh, 16); |
| 1947 | | |
| 1948 | | vaccLow = _mm_add_epi32(loProduct, vaccLow); |
| 1949 | | vaccHigh = _mm_add_epi32(hiProduct, vaccHigh); |
| 1950 | | vaccLow = _mm_add_epi32(vdRegLo, vaccLow); |
| 1951 | | vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh); |
| 1952 | | |
| 1953 | | /* Clamp the accumulator and write it all out. */ |
| 1954 | | m_xv[VDREG] = _mm_packs_epi32(vaccLow, vaccHigh); |
| 1955 | | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 1956 | | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 1957 | | } |
| 1958 | | |
| 1959 | | static void cfunc_vmadm(void *param) |
| 1960 | | { |
| 1961 | | ((rsp_cop2 *)param)->vmadm(); |
| 1962 | | } |
| 1963 | | |
| 1964 | | |
| 1965 | | // VMADN |
| 1966 | | // |
| 1967 | | // 31 25 24 20 15 10 5 0 |
| 1968 | | // ------------------------------------------------------ |
| 1969 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001110 | |
| 1970 | | // ------------------------------------------------------ |
| 1971 | | // |
| 1972 | | // Multiplies unsigned fraction by signed fraction |
| 1973 | | // Adds the 32-bit result to the medium and high slices of the accumulator |
| 1974 | | // The low slice of accumulator is saturated into destination element |
| 1975 | | |
| 1976 | | inline void rsp_cop2_simd::vmadn() |
| 1977 | | { |
| 1978 | | int op = m_op; |
| 1979 | | |
| 1980 | | for (int i = 0; i < 8; i++) |
| 1981 | | { |
| 1982 | | UINT16 w1, w2; |
| 1983 | | GET_VS1(w1, i); |
| 1984 | | GET_VS2(w2, i); |
| 1985 | | INT32 s1 = (UINT16)w1; |
| 1986 | | INT32 s2 = (INT32)(INT16)w2; |
| 1987 | | |
| 1988 | | UINT64 q = (UINT64)ACCUM_LL(i); |
| 1989 | | q |= (((UINT64)ACCUM_L(i)) << 16); |
| 1990 | | q |= (((UINT64)ACCUM_M(i)) << 32); |
| 1991 | | q |= (((UINT64)ACCUM_H(i)) << 48); |
| 1992 | | q += (INT64)(s1*s2) << 16; |
| 1993 | | |
| 1994 | | SET_ACCUM_LL((UINT16)q, i); |
| 1995 | | SET_ACCUM_L((UINT16)(q >> 16), i); |
| 1996 | | SET_ACCUM_M((UINT16)(q >> 32), i); |
| 1997 | | SET_ACCUM_H((UINT16)(q >> 48), i); |
| 1998 | | |
| 1999 | | m_vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 2000 | | } |
| 2001 | | WRITEBACK_RESULT(); |
| 2002 | | } |
| 2003 | | |
| 2004 | | static void cfunc_vmadn(void *param) |
| 2005 | | { |
| 2006 | | ((rsp_cop2 *)param)->vmadn(); |
| 2007 | | } |
| 2008 | | |
| 2009 | | |
| 2010 | | // VMADH |
| 2011 | | // |
| 2012 | | // 31 25 24 20 15 10 5 0 |
| 2013 | | // ------------------------------------------------------ |
| 2014 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001111 | |
| 2015 | | // ------------------------------------------------------ |
| 2016 | | // |
| 2017 | | // Multiplies signed integer by signed integer |
| 2018 | | // The result is added into highest 32 bits of accumulator, the low slice is zero |
| 2019 | | // The highest 32 bits of accumulator is saturated into destination element |
| 2020 | | |
| 2021 | | inline void rsp_cop2_simd::vmadh() |
| 2022 | | { |
| 2023 | | int op = m_op; |
| 2024 | | |
| 2025 | | __m128i vsReg = m_xv[VS1REG]; |
| 2026 | | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2027 | | |
| 2028 | | /* Unpack to obtain for 32-bit precision. */ |
| 2029 | | __m128i vaccLow = _mm_unpacklo_epi16(m_accum_m, m_accum_h); |
| 2030 | | __m128i vaccHigh = _mm_unpackhi_epi16(m_accum_m, m_accum_h); |
| 2031 | | |
| 2032 | | /* Multiply the sources, accumulate the product. */ |
| 2033 | | __m128i unpackLo = _mm_mullo_epi16(vsReg, vtReg); |
| 2034 | | __m128i unpackHi = _mm_mulhi_epi16(vsReg, vtReg); |
| 2035 | | __m128i loProduct = _mm_unpacklo_epi16(unpackLo, unpackHi); |
| 2036 | | __m128i hiProduct = _mm_unpackhi_epi16(unpackLo, unpackHi); |
| 2037 | | vaccLow = _mm_add_epi32(vaccLow, loProduct); |
| 2038 | | vaccHigh = _mm_add_epi32(vaccHigh, hiProduct); |
| 2039 | | |
| 2040 | | /* Pack the accumulator and result back up. */ |
| 2041 | | m_xv[VDREG] = _mm_packs_epi32(vaccLow, vaccHigh); |
| 2042 | | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 2043 | | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 2044 | | } |
| 2045 | | |
| 2046 | | static void cfunc_vmadh(void *param) |
| 2047 | | { |
| 2048 | | ((rsp_cop2 *)param)->vmadh(); |
| 2049 | | } |
| 2050 | | |
| 2051 | | |
| 2052 | | // VADD |
| 2053 | | // 31 25 24 20 15 10 5 0 |
| 2054 | | // ------------------------------------------------------ |
| 2055 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010000 | |
| 2056 | | // ------------------------------------------------------ |
| 2057 | | // |
| 2058 | | // Adds two vector registers and carry flag, the result is saturated to 32767 |
| 2059 | | |
| 2060 | | inline void rsp_cop2_simd::vadd() |
| 2061 | | { |
| 2062 | | int op = m_op; |
| 2063 | | |
| 2064 | | __m128i shuffled = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2065 | | __m128i carry = _mm_and_si128(m_xvflag[CARRY], vec_flagmask); |
| 2066 | | m_accum_l = _mm_add_epi16(_mm_add_epi16(m_xv[VS1REG], shuffled), carry); |
| 2067 | | |
| 2068 | | __m128i addvec = _mm_adds_epi16(m_xv[VS1REG], shuffled); |
| 2069 | | |
| 2070 | | carry = _mm_and_si128(carry, _mm_xor_si128(_mm_cmpeq_epi16(addvec, vec_32767), vec_neg1)); |
| 2071 | | carry = _mm_and_si128(carry, _mm_xor_si128(_mm_cmpeq_epi16(addvec, vec_n32768), vec_neg1)); |
| 2072 | | |
| 2073 | | m_xv[VDREG] = _mm_add_epi16(addvec, carry); |
| 2074 | | |
| 2075 | | m_xvflag[ZERO] = vec_zero; |
| 2076 | | m_xvflag[CARRY] = vec_zero; |
| 2077 | | } |
| 2078 | | |
| 2079 | | static void cfunc_vadd(void *param) |
| 2080 | | { |
| 2081 | | ((rsp_cop2 *)param)->vadd(); |
| 2082 | | } |
| 2083 | | |
| 2084 | | |
| 2085 | | // VSUB |
| 2086 | | // |
| 2087 | | // 31 25 24 20 15 10 5 0 |
| 2088 | | // ------------------------------------------------------ |
| 2089 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010001 | |
| 2090 | | // ------------------------------------------------------ |
| 2091 | | // |
| 2092 | | // Subtracts two vector registers and carry flag, the result is saturated to -32768 |
| 2093 | | // TODO: check VS2REG == VDREG |
| 2094 | | |
| 2095 | | inline void rsp_cop2_simd::vsub() |
| 2096 | | { |
| 2097 | | int op = m_op; |
| 2098 | | |
| 2099 | | __m128i shuffled = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2100 | | __m128i carry = _mm_and_si128(m_xvflag[CARRY], vec_flagmask); |
| 2101 | | __m128i unsat = _mm_sub_epi16(m_xv[VS1REG], shuffled); |
| 2102 | | |
| 2103 | | __m128i vs2neg = _mm_cmplt_epi16(shuffled, vec_zero); |
| 2104 | | __m128i vs2pos = _mm_cmpeq_epi16(vs2neg, vec_zero); |
| 2105 | | |
| 2106 | | __m128i saturated = _mm_subs_epi16(m_xv[VS1REG], shuffled); |
| 2107 | | __m128i carry_mask = _mm_cmpeq_epi16(unsat, saturated); |
| 2108 | | carry_mask = _mm_and_si128(vs2neg, carry_mask); |
| 2109 | | |
| 2110 | | vs2neg = _mm_and_si128(carry_mask, carry); |
| 2111 | | vs2pos = _mm_and_si128(vs2pos, carry); |
| 2112 | | __m128i dest_carry = _mm_or_si128(vs2neg, vs2pos); |
| 2113 | | m_xv[VDREG] = _mm_subs_epi16(saturated, dest_carry); |
| 2114 | | |
| 2115 | | m_accum_l = _mm_sub_epi16(unsat, carry); |
| 2116 | | |
| 2117 | | m_xvflag[ZERO] = _mm_setzero_si128(); |
| 2118 | | m_xvflag[CARRY] = _mm_setzero_si128(); |
| 2119 | | } |
| 2120 | | |
| 2121 | | static void cfunc_vsub(void *param) |
| 2122 | | { |
| 2123 | | ((rsp_cop2 *)param)->vsub(); |
| 2124 | | } |
| 2125 | | |
| 2126 | | |
| 2127 | | // VABS |
| 2128 | | // |
| 2129 | | // 31 25 24 20 15 10 5 0 |
| 2130 | | // ------------------------------------------------------ |
| 2131 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010011 | |
| 2132 | | // ------------------------------------------------------ |
| 2133 | | // |
| 2134 | | // Changes the sign of source register 2 if source register 1 is negative and stores the result to destination register |
| 2135 | | |
| 2136 | | inline void rsp_cop2_simd::vabs() |
| 2137 | | { |
| 2138 | | int op = m_op; |
| 2139 | | |
| 2140 | | __m128i shuf2 = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2141 | | __m128i negs2 = _mm_sub_epi16(_mm_setzero_si128(), shuf2); |
| 2142 | | __m128i s2_n32768 = _mm_cmpeq_epi16(shuf2, vec_n32768); |
| 2143 | | __m128i s1_lz = _mm_cmplt_epi16(m_xv[VS1REG], _mm_setzero_si128()); |
| 2144 | | |
| 2145 | | __m128i result_gz = _mm_and_si128(shuf2, _mm_cmpgt_epi16(m_xv[VS1REG], _mm_setzero_si128())); |
| 2146 | | __m128i result_n32768 = _mm_and_si128(s1_lz, _mm_and_si128(vec_32767, s2_n32768)); |
| 2147 | | __m128i result_negs2 = _mm_and_si128(s1_lz, _mm_and_si128(negs2, _mm_xor_si128(s2_n32768, vec_neg1))); |
| 2148 | | m_xv[VDREG] = m_accum_l = _mm_or_si128(result_gz, _mm_or_si128(result_n32768, result_negs2)); |
| 2149 | | } |
| 2150 | | |
| 2151 | | static void cfunc_vabs(void *param) |
| 2152 | | { |
| 2153 | | ((rsp_cop2 *)param)->vabs(); |
| 2154 | | } |
| 2155 | | |
| 2156 | | |
| 2157 | | // VADDC |
| 2158 | | // |
| 2159 | | // 31 25 24 20 15 10 5 0 |
| 2160 | | // ------------------------------------------------------ |
| 2161 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010100 | |
| 2162 | | // ------------------------------------------------------ |
| 2163 | | // |
| 2164 | | // Adds two vector registers, the carry out is stored into carry register |
| 2165 | | // TODO: check VS2REG = VDREG |
| 2166 | | |
| 2167 | | inline void rsp_cop2_simd::vaddc() |
| 2168 | | { |
| 2169 | | int op = m_op; |
| 2170 | | |
| 2171 | | CLEAR_ZERO_FLAGS(); |
| 2172 | | CLEAR_CARRY_FLAGS(); |
| 2173 | | |
| 2174 | | __m128i shuf2 = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2175 | | __m128i vec7531 = _mm_and_si128(m_xv[VS1REG], vec_lomask); |
| 2176 | | __m128i vec6420 = _mm_srli_epi32(m_xv[VS1REG], 16); |
| 2177 | | __m128i shuf7531 = _mm_and_si128(shuf2, vec_lomask); |
| 2178 | | __m128i shuf6420 = _mm_srli_epi32(shuf2, 16); |
| 2179 | | __m128i sum7531 = _mm_add_epi32(vec7531, shuf7531); |
| 2180 | | __m128i sum6420 = _mm_add_epi32(vec6420, shuf6420); |
| 2181 | | |
| 2182 | | __m128i over7531 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum7531, _mm_setzero_si128()), vec_neg1), vec_himask); |
| 2183 | | __m128i over6420 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum6420, _mm_setzero_si128()), vec_neg1), vec_himask); |
| 2184 | | |
| 2185 | | sum7531 = _mm_and_si128(sum7531, vec_lomask); |
| 2186 | | sum6420 = _mm_and_si128(sum6420, vec_lomask); |
| 2187 | | |
| 2188 | | m_xvflag[CARRY] = _mm_or_si128(over6420, _mm_srli_epi32(over7531, 16)); |
| 2189 | | m_accum_l = m_xv[VDREG] = _mm_or_si128(_mm_slli_epi32(sum6420, 16), sum7531); |
| 2190 | | } |
| 2191 | | |
| 2192 | | static void cfunc_vaddc(void *param) |
| 2193 | | { |
| 2194 | | ((rsp_cop2 *)param)->vaddc(); |
| 2195 | | } |
| 2196 | | |
| 2197 | | |
| 2198 | | // VSUBC |
| 2199 | | // |
| 2200 | | // 31 25 24 20 15 10 5 0 |
| 2201 | | // ------------------------------------------------------ |
| 2202 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010101 | |
| 2203 | | // ------------------------------------------------------ |
| 2204 | | // |
| 2205 | | // Subtracts two vector registers, the carry out is stored into carry register |
| 2206 | | // TODO: check VS2REG = VDREG |
| 2207 | | |
| 2208 | | inline void rsp_cop2_simd::vsubc() |
| 2209 | | { |
| 2210 | | int op = m_op; |
| 2211 | | |
| 2212 | | CLEAR_ZERO_FLAGS(); |
| 2213 | | CLEAR_CARRY_FLAGS(); |
| 2214 | | |
| 2215 | | __m128i shuf2 = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2216 | | __m128i vec7531 = _mm_and_si128(m_xv[VS1REG], vec_lomask); |
| 2217 | | __m128i vec6420 = _mm_srli_epi32(m_xv[VS1REG], 16); |
| 2218 | | __m128i shuf7531 = _mm_and_si128(shuf2, vec_lomask); |
| 2219 | | __m128i shuf6420 = _mm_srli_epi32(shuf2, 16); |
| 2220 | | __m128i sum7531 = _mm_sub_epi32(vec7531, shuf7531); |
| 2221 | | __m128i sum6420 = _mm_sub_epi32(vec6420, shuf6420); |
| 2222 | | |
| 2223 | | __m128i over7531 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum7531, _mm_setzero_si128()), vec_neg1), vec_himask); |
| 2224 | | __m128i over6420 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum6420, _mm_setzero_si128()), vec_neg1), vec_himask); |
| 2225 | | sum7531 = _mm_and_si128(sum7531, vec_lomask); |
| 2226 | | sum6420 = _mm_and_si128(sum6420, vec_lomask); |
| 2227 | | __m128i zero7531 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum7531, _mm_setzero_si128()), vec_neg1), vec_lomask); |
| 2228 | | __m128i zero6420 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum6420, _mm_setzero_si128()), vec_neg1), vec_lomask); |
| 2229 | | |
| 2230 | | m_xvflag[CARRY] = _mm_or_si128(over6420, _mm_srli_epi32(over7531, 16)); |
| 2231 | | m_xvflag[ZERO] = _mm_or_si128(_mm_slli_epi32(zero6420, 16), zero7531); |
| 2232 | | |
| 2233 | | m_accum_l = m_xv[VDREG] = _mm_or_si128(_mm_slli_epi32(sum6420, 16), sum7531); |
| 2234 | | } |
| 2235 | | |
| 2236 | | static void cfunc_vsubc(void *param) |
| 2237 | | { |
| 2238 | | ((rsp_cop2 *)param)->vsubc(); |
| 2239 | | } |
| 2240 | | |
| 2241 | | |
| 2242 | | // VADDB |
| 2243 | | // |
| 2244 | | // 31 25 24 20 15 10 5 0 |
| 2245 | | // ------------------------------------------------------ |
| 2246 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010110 | |
| 2247 | | // ------------------------------------------------------ |
| 2248 | | // |
| 2249 | | // Adds two vector registers bytewise with rounding |
| 2250 | | inline void rsp_cop2_simd::vaddb() |
| 2251 | | { |
| 2252 | | const int op = m_op; |
| 2253 | | const int round = (EL == 0) ? 0 : (1 << (EL - 1)); |
| 2254 | | |
| 2255 | | for (int i = 0; i < 8; i++) |
| 2256 | | { |
| 2257 | | UINT16 w1, w2; |
| 2258 | | GET_VS1(w1, i); |
| 2259 | | GET_VS2(w2, i); |
| 2260 | | |
| 2261 | | UINT8 hb1 = w1 >> 8; |
| 2262 | | UINT8 lb1 = w1 & 0xff; |
| 2263 | | UINT8 hb2 = w2 >> 8; |
| 2264 | | UINT8 lb2 = w2 & 0xff; |
| 2265 | | |
| 2266 | | UINT16 hs = hb1 + hb2 + round; |
| 2267 | | UINT16 ls = lb1 + lb2 + round; |
| 2268 | | |
| 2269 | | SET_ACCUM_L((hs << 8) | ls, i); |
| 2270 | | |
| 2271 | | hs >>= EL; |
| 2272 | | if (hs > 255) |
| 2273 | | { |
| 2274 | | hs = 255; |
| 2275 | | } |
| 2276 | | |
| 2277 | | ls >>= EL; |
| 2278 | | if (ls > 255) |
| 2279 | | { |
| 2280 | | ls = 255; |
| 2281 | | } |
| 2282 | | |
| 2283 | | m_vres[i] = 0; // VD writeback disabled on production hardware |
| 2284 | | // m_vres[i] = (hs << 8) | ls; |
| 2285 | | } |
| 2286 | | WRITEBACK_RESULT(); |
| 2287 | | } |
| 2288 | | |
| 2289 | | static void cfunc_vaddb(void *param) |
| 2290 | | { |
| 2291 | | ((rsp_cop2 *)param)->vaddb(); |
| 2292 | | } |
| 2293 | | |
| 2294 | | |
| 2295 | | // VSAW |
| 2296 | | // |
| 2297 | | // 31 25 24 20 15 10 5 0 |
| 2298 | | // ------------------------------------------------------ |
| 2299 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 011101 | |
| 2300 | | // ------------------------------------------------------ |
| 2301 | | // |
| 2302 | | // Stores high, middle or low slice of accumulator to destination vector |
| 2303 | | |
| 2304 | | inline void rsp_cop2_simd::vsaw() |
| 2305 | | { |
| 2306 | | int op = m_op; |
| 2307 | | |
| 2308 | | switch (EL) |
| 2309 | | { |
| 2310 | | case 0x08: // VSAWH |
| 2311 | | { |
| 2312 | | m_xv[VDREG] = m_accum_h; |
| 2313 | | break; |
| 2314 | | } |
| 2315 | | case 0x09: // VSAWM |
| 2316 | | { |
| 2317 | | m_xv[VDREG] = m_accum_m; |
| 2318 | | break; |
| 2319 | | } |
| 2320 | | case 0x0a: // VSAWL |
| 2321 | | { |
| 2322 | | m_xv[VDREG] = m_accum_l; |
| 2323 | | break; |
| 2324 | | } |
| 2325 | | default: // Unsupported, writes 0 to VD |
| 2326 | | { |
| 2327 | | |
| 2328 | | } |
| 2329 | | } |
| 2330 | | } |
| 2331 | | |
| 2332 | | static void cfunc_vsaw(void *param) |
| 2333 | | { |
| 2334 | | ((rsp_cop2 *)param)->vsaw(); |
| 2335 | | } |
| 2336 | | |
| 2337 | | |
| 2338 | | // VLT |
| 2339 | | // |
| 2340 | | // 31 25 24 20 15 10 5 0 |
| 2341 | | // ------------------------------------------------------ |
| 2342 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100000 | |
| 2343 | | // ------------------------------------------------------ |
| 2344 | | // |
| 2345 | | // Sets compare flags if elements in VS1 are less than VS2 |
| 2346 | | // Moves the element in VS2 to destination vector |
| 2347 | | |
| 2348 | | inline void rsp_cop2_simd::vlt() |
| 2349 | | { |
| 2350 | | int op = m_op; |
| 2351 | | |
| 2352 | | m_xvflag[COMPARE] = m_xvflag[CLIP2] = _mm_setzero_si128(); |
| 2353 | | |
| 2354 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2355 | | __m128i zc_mask = _mm_and_si128(m_xvflag[ZERO], m_xvflag[CARRY]); |
| 2356 | | __m128i lt_mask = _mm_cmplt_epi16(m_xv[VS1REG], shuf); |
| 2357 | | __m128i eq_mask = _mm_and_si128(_mm_cmpeq_epi16(m_xv[VS1REG], shuf), zc_mask); |
| 2358 | | |
| 2359 | | m_xvflag[COMPARE] = _mm_or_si128(lt_mask, eq_mask); |
| 2360 | | |
| 2361 | | __m128i result = _mm_and_si128(m_xv[VS1REG], m_xvflag[COMPARE]); |
| 2362 | | m_accum_l = m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(m_xvflag[COMPARE], vec_neg1))); |
| 2363 | | |
| 2364 | | m_xvflag[ZERO] = m_xvflag[CARRY] = _mm_setzero_si128(); |
| 2365 | | } |
| 2366 | | |
| 2367 | | static void cfunc_void vlt(void *param) |
| 2368 | | { |
| 2369 | | ((rsp_cop2 *)param)->vlt(); |
| 2370 | | } |
| 2371 | | |
| 2372 | | |
| 2373 | | // VEQ |
| 2374 | | // |
| 2375 | | // 31 25 24 20 15 10 5 0 |
| 2376 | | // ------------------------------------------------------ |
| 2377 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100001 | |
| 2378 | | // ------------------------------------------------------ |
| 2379 | | // |
| 2380 | | // Sets compare flags if elements in VS1 are equal with VS2 |
| 2381 | | // Moves the element in VS2 to destination vector |
| 2382 | | |
| 2383 | | inline void rsp_cop2_simd::veq() |
| 2384 | | { |
| 2385 | | int op = m_op; |
| 2386 | | |
| 2387 | | m_xvflag[COMPARE] = m_xvflag[CLIP2] = _mm_setzero_si128(); |
| 2388 | | |
| 2389 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2390 | | __m128i zero_mask = _mm_cmpeq_epi16(m_xvflag[ZERO], _mm_setzero_si128()); |
| 2391 | | __m128i eq_mask = _mm_cmpeq_epi16(m_xv[VS1REG], shuf); |
| 2392 | | |
| 2393 | | m_xvflag[COMPARE] = _mm_and_si128(zero_mask, eq_mask); |
| 2394 | | |
| 2395 | | __m128i result = _mm_and_si128(m_xv[VS1REG], m_xvflag[COMPARE]); |
| 2396 | | m_accum_l = m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(m_xvflag[COMPARE], vec_neg1))); |
| 2397 | | |
| 2398 | | m_xvflag[ZERO] = m_xvflag[CARRY] = _mm_setzero_si128(); |
| 2399 | | } |
| 2400 | | |
| 2401 | | static void cfunc_veq(void *param) |
| 2402 | | { |
| 2403 | | ((rsp_cop2 *)param)->veq(); |
| 2404 | | } |
| 2405 | | |
| 2406 | | |
| 2407 | | // VNE |
| 2408 | | // |
| 2409 | | // 31 25 24 20 15 10 5 0 |
| 2410 | | // ------------------------------------------------------ |
| 2411 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100010 | |
| 2412 | | // ------------------------------------------------------ |
| 2413 | | // |
| 2414 | | // Sets compare flags if elements in VS1 are not equal with VS2 |
| 2415 | | // Moves the element in VS2 to destination vector |
| 2416 | | |
| 2417 | | inline void rsp_cop2_simd::vne() |
| 2418 | | { |
| 2419 | | int op = m_op; |
| 2420 | | |
| 2421 | | m_xvflag[COMPARE] = m_xvflag[CLIP2] = _mm_setzero_si128(); |
| 2422 | | |
| 2423 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2424 | | __m128i neq_mask = _mm_xor_si128(_mm_cmpeq_epi16(m_xv[VS1REG], shuf), vec_neg1); |
| 2425 | | |
| 2426 | | m_xvflag[COMPARE] = _mm_or_si128(m_xvflag[ZERO], neq_mask); |
| 2427 | | |
| 2428 | | __m128i result = _mm_and_si128(m_xv[VS1REG], m_xvflag[COMPARE]); |
| 2429 | | m_accum_l = m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(m_xvflag[COMPARE], vec_neg1))); |
| 2430 | | |
| 2431 | | m_xvflag[ZERO] = m_xvflag[CARRY] = _mm_setzero_si128(); |
| 2432 | | } |
| 2433 | | |
| 2434 | | static void cfunc_vne(void *param) |
| 2435 | | { |
| 2436 | | ((rsp_cop2 *)param)->vne(); |
| 2437 | | } |
| 2438 | | |
| 2439 | | |
| 2440 | | // VGE |
| 2441 | | // |
| 2442 | | // 31 25 24 20 15 10 5 0 |
| 2443 | | // ------------------------------------------------------ |
| 2444 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100011 | |
| 2445 | | // ------------------------------------------------------ |
| 2446 | | // |
| 2447 | | // Sets compare flags if elements in VS1 are greater or equal with VS2 |
| 2448 | | // Moves the element in VS2 to destination vector |
| 2449 | | |
| 2450 | | inline void rsp_cop2_simd::vge() |
| 2451 | | { |
| 2452 | | int op = m_op; |
| 2453 | | |
| 2454 | | m_xvflag[COMPARE] = m_xvflag[CLIP2] = _mm_setzero_si128(); |
| 2455 | | |
| 2456 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2457 | | __m128i zero_mask = _mm_cmpeq_epi16(m_xvflag[ZERO], _mm_setzero_si128()); |
| 2458 | | __m128i carry_mask = _mm_cmpeq_epi16(m_xvflag[CARRY], _mm_setzero_si128()); |
| 2459 | | __m128i flag_mask = _mm_or_si128(zero_mask, carry_mask); |
| 2460 | | __m128i eq_mask = _mm_and_si128(_mm_cmpeq_epi16(m_xv[VS1REG], shuf), flag_mask); |
| 2461 | | __m128i gt_mask = _mm_cmpgt_epi16(m_xv[VS1REG], shuf); |
| 2462 | | m_xvflag[COMPARE] = _mm_or_si128(eq_mask, gt_mask); |
| 2463 | | |
| 2464 | | __m128i result = _mm_and_si128(m_xv[VS1REG], m_xvflag[COMPARE]); |
| 2465 | | m_accum_l = m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(m_xvflag[COMPARE], vec_neg1))); |
| 2466 | | |
| 2467 | | m_xvflag[ZERO] = m_xvflag[CARRY] = _mm_setzero_si128(); |
| 2468 | | } |
| 2469 | | |
| 2470 | | static void cfunc_vge(void *param) |
| 2471 | | { |
| 2472 | | ((rsp_cop2 *)param)->vge(); |
| 2473 | | } |
| 2474 | | |
| 2475 | | |
| 2476 | | // VCL |
| 2477 | | // |
| 2478 | | // 31 25 24 20 15 10 5 0 |
| 2479 | | // ------------------------------------------------------ |
| 2480 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100100 | |
| 2481 | | // ------------------------------------------------------ |
| 2482 | | // |
| 2483 | | // Vector clip low |
| 2484 | | |
| 2485 | | inline void rsp_cop2_simd::vcl() |
| 2486 | | { |
| 2487 | | int op = m_op; |
| 2488 | | |
| 2489 | | for (int i = 0; i < 8; i++) |
| 2490 | | { |
| 2491 | | INT16 s1, s2; |
| 2492 | | GET_VS1(s1, i); |
| 2493 | | GET_VS2(s2, i); |
| 2494 | | |
| 2495 | | if (CARRY_FLAG(i) != 0) |
| 2496 | | { |
| 2497 | | if (ZERO_FLAG(i) != 0) |
| 2498 | | { |
| 2499 | | if (COMPARE_FLAG(i) != 0) |
| 2500 | | { |
| 2501 | | SET_ACCUM_L(-(UINT16)s2, i); |
| 2502 | | } |
| 2503 | | else |
| 2504 | | { |
| 2505 | | SET_ACCUM_L(s1, i); |
| 2506 | | } |
| 2507 | | } |
| 2508 | | else |
| 2509 | | { |
| 2510 | | if (CLIP1_FLAG(i) != 0) |
| 2511 | | { |
| 2512 | | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) > 0x10000) |
| 2513 | | { |
| 2514 | | SET_ACCUM_L(s1, i); |
| 2515 | | CLEAR_COMPARE_FLAG(i); |
| 2516 | | } |
| 2517 | | else |
| 2518 | | { |
| 2519 | | SET_ACCUM_L(-((UINT16)s2), i); |
| 2520 | | SET_COMPARE_FLAG(i); |
| 2521 | | } |
| 2522 | | } |
| 2523 | | else |
| 2524 | | { |
| 2525 | | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) != 0) |
| 2526 | | { |
| 2527 | | SET_ACCUM_L(s1, i); |
| 2528 | | CLEAR_COMPARE_FLAG(i); |
| 2529 | | } |
| 2530 | | else |
| 2531 | | { |
| 2532 | | SET_ACCUM_L(-((UINT16)s2), i); |
| 2533 | | SET_COMPARE_FLAG(i); |
| 2534 | | } |
| 2535 | | } |
| 2536 | | } |
| 2537 | | } |
| 2538 | | else |
| 2539 | | { |
| 2540 | | if (ZERO_FLAG(i) != 0) |
| 2541 | | { |
| 2542 | | if (CLIP2_FLAG(i) != 0) |
| 2543 | | { |
| 2544 | | SET_ACCUM_L(s2, i); |
| 2545 | | } |
| 2546 | | else |
| 2547 | | { |
| 2548 | | SET_ACCUM_L(s1, i); |
| 2549 | | } |
| 2550 | | } |
| 2551 | | else |
| 2552 | | { |
| 2553 | | if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0) |
| 2554 | | { |
| 2555 | | SET_ACCUM_L(s2, i); |
| 2556 | | SET_CLIP2_FLAG(i); |
| 2557 | | } |
| 2558 | | else |
| 2559 | | { |
| 2560 | | SET_ACCUM_L(s1, i); |
| 2561 | | CLEAR_CLIP2_FLAG(i); |
| 2562 | | } |
| 2563 | | } |
| 2564 | | } |
| 2565 | | m_vres[i] = ACCUM_L(i); |
| 2566 | | } |
| 2567 | | CLEAR_ZERO_FLAGS(); |
| 2568 | | CLEAR_CARRY_FLAGS(); |
| 2569 | | CLEAR_CLIP1_FLAGS(); |
| 2570 | | WRITEBACK_RESULT(); |
| 2571 | | } |
| 2572 | | |
| 2573 | | static void cfunc_vcl(void *param) |
| 2574 | | { |
| 2575 | | ((rsp_cop2 *)param)->vcl(); |
| 2576 | | } |
| 2577 | | |
| 2578 | | |
| 2579 | | // VCH |
| 2580 | | // |
| 2581 | | // 31 25 24 20 15 10 5 0 |
| 2582 | | // ------------------------------------------------------ |
| 2583 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100101 | |
| 2584 | | // ------------------------------------------------------ |
| 2585 | | // |
| 2586 | | // Vector clip high |
| 2587 | | |
| 2588 | | inline void rsp_cop2_simd::vch() |
| 2589 | | { |
| 2590 | | int op = m_op; |
| 2591 | | |
| 2592 | | CLEAR_CARRY_FLAGS(); |
| 2593 | | CLEAR_COMPARE_FLAGS(); |
| 2594 | | CLEAR_CLIP1_FLAGS(); |
| 2595 | | CLEAR_ZERO_FLAGS(); |
| 2596 | | CLEAR_CLIP2_FLAGS(); |
| 2597 | | |
| 2598 | | UINT32 vce = 0; |
| 2599 | | for (int i = 0; i < 8; i++) |
| 2600 | | { |
| 2601 | | INT16 s1, s2; |
| 2602 | | GET_VS1(s1, i); |
| 2603 | | GET_VS2(s2, i); |
| 2604 | | |
| 2605 | | if ((s1 ^ s2) < 0) |
| 2606 | | { |
| 2607 | | vce = (s1 + s2 == -1); |
| 2608 | | SET_CARRY_FLAG(i); |
| 2609 | | if (s2 < 0) |
| 2610 | | { |
| 2611 | | SET_CLIP2_FLAG(i); |
| 2612 | | } |
| 2613 | | |
| 2614 | | if ((s1 + s2) <= 0) |
| 2615 | | { |
| 2616 | | SET_COMPARE_FLAG(i); |
| 2617 | | m_vres[i] = -((UINT16)s2); |
| 2618 | | } |
| 2619 | | else |
| 2620 | | { |
| 2621 | | m_vres[i] = s1; |
| 2622 | | } |
| 2623 | | |
| 2624 | | if ((s1 + s2) != 0 && s1 != ~s2) |
| 2625 | | { |
| 2626 | | SET_ZERO_FLAG(i); |
| 2627 | | } |
| 2628 | | }//sign |
| 2629 | | else |
| 2630 | | { |
| 2631 | | vce = 0; |
| 2632 | | if (s2 < 0) |
| 2633 | | { |
| 2634 | | SET_COMPARE_FLAG(i); |
| 2635 | | } |
| 2636 | | if ((s1 - s2) >= 0) |
| 2637 | | { |
| 2638 | | SET_CLIP2_FLAG(i); |
| 2639 | | m_vres[i] = s2; |
| 2640 | | } |
| 2641 | | else |
| 2642 | | { |
| 2643 | | m_vres[i] = s1; |
| 2644 | | } |
| 2645 | | |
| 2646 | | if ((s1 - s2) != 0 && s1 != ~s2) |
| 2647 | | { |
| 2648 | | SET_ZERO_FLAG(i); |
| 2649 | | } |
| 2650 | | } |
| 2651 | | if (vce) |
| 2652 | | { |
| 2653 | | SET_CLIP1_FLAG(i); |
| 2654 | | } |
| 2655 | | SET_ACCUM_L(m_vres[i], i); |
| 2656 | | } |
| 2657 | | WRITEBACK_RESULT(); |
| 2658 | | } |
| 2659 | | |
| 2660 | | static void cfunc_vch(void *param) |
| 2661 | | { |
| 2662 | | ((rsp_cop2 *)param)->vch(); |
| 2663 | | } |
| 2664 | | |
| 2665 | | |
| 2666 | | // VCR |
| 2667 | | // |
| 2668 | | // 31 25 24 20 15 10 5 0 |
| 2669 | | // ------------------------------------------------------ |
| 2670 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100110 | |
| 2671 | | // ------------------------------------------------------ |
| 2672 | | // |
| 2673 | | // Vector clip reverse |
| 2674 | | |
| 2675 | | inline void rsp_cop2_simd::vcr() |
| 2676 | | { |
| 2677 | | int op = m_op; |
| 2678 | | |
| 2679 | | CLEAR_CARRY_FLAGS(); |
| 2680 | | CLEAR_COMPARE_FLAGS(); |
| 2681 | | CLEAR_CLIP1_FLAGS(); |
| 2682 | | CLEAR_ZERO_FLAGS(); |
| 2683 | | CLEAR_CLIP2_FLAGS(); |
| 2684 | | |
| 2685 | | for (int i = 0; i < 8; i++) |
| 2686 | | { |
| 2687 | | INT16 s1, s2; |
| 2688 | | GET_VS1(s1, i); |
| 2689 | | GET_VS2(s2, i); |
| 2690 | | |
| 2691 | | if ((INT16)(s1 ^ s2) < 0) |
| 2692 | | { |
| 2693 | | if (s2 < 0) |
| 2694 | | { |
| 2695 | | SET_CLIP2_FLAG(i); |
| 2696 | | } |
| 2697 | | if ((s1 + s2) <= 0) |
| 2698 | | { |
| 2699 | | SET_ACCUM_L(~((UINT16)s2), i); |
| 2700 | | SET_COMPARE_FLAG(i); |
| 2701 | | } |
| 2702 | | else |
| 2703 | | { |
| 2704 | | SET_ACCUM_L(s1, i); |
| 2705 | | } |
| 2706 | | } |
| 2707 | | else |
| 2708 | | { |
| 2709 | | if (s2 < 0) |
| 2710 | | { |
| 2711 | | SET_COMPARE_FLAG(i); |
| 2712 | | } |
| 2713 | | if ((s1 - s2) >= 0) |
| 2714 | | { |
| 2715 | | SET_ACCUM_L(s2, i); |
| 2716 | | SET_CLIP2_FLAG(i); |
| 2717 | | } |
| 2718 | | else |
| 2719 | | { |
| 2720 | | SET_ACCUM_L(s1, i); |
| 2721 | | } |
| 2722 | | } |
| 2723 | | |
| 2724 | | m_vres[i] = ACCUM_L(i); |
| 2725 | | } |
| 2726 | | WRITEBACK_RESULT(); |
| 2727 | | } |
| 2728 | | |
| 2729 | | static void cfunc_vcr(void *param) |
| 2730 | | { |
| 2731 | | ((rsp_cop2 *)param)->vcr(); |
| 2732 | | } |
| 2733 | | |
| 2734 | | |
| 2735 | | // VMRG |
| 2736 | | // |
| 2737 | | // 31 25 24 20 15 10 5 0 |
| 2738 | | // ------------------------------------------------------ |
| 2739 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100111 | |
| 2740 | | // ------------------------------------------------------ |
| 2741 | | // |
| 2742 | | // Merges two vectors according to compare flags |
| 2743 | | |
| 2744 | | inline void rsp_cop2_simd::vmrg() |
| 2745 | | { |
| 2746 | | int op = m_op; |
| 2747 | | |
| 2748 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2749 | | __m128i s2mask = _mm_cmpeq_epi16(m_xvflag[COMPARE], _mm_setzero_si128()); |
| 2750 | | __m128i s1mask = _mm_xor_si128(s2mask, vec_neg1); |
| 2751 | | __m128i result = _mm_and_si128(m_xv[VS1REG], s1mask); |
| 2752 | | m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, s2mask)); |
| 2753 | | m_accum_l = m_xv[VDREG]; |
| 2754 | | } |
| 2755 | | |
| 2756 | | static void cfunc_vmrg(void *param) |
| 2757 | | { |
| 2758 | | ((rsp_cop2 *)param)->vmrg(); |
| 2759 | | } |
| 2760 | | |
| 2761 | | |
| 2762 | | // VAND |
| 2763 | | // |
| 2764 | | // 31 25 24 20 15 10 5 0 |
| 2765 | | // ------------------------------------------------------ |
| 2766 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101000 | |
| 2767 | | // ------------------------------------------------------ |
| 2768 | | // |
| 2769 | | // Bitwise AND of two vector registers |
| 2770 | | |
| 2771 | | inline void rsp_cop2_simd::vand() |
| 2772 | | { |
| 2773 | | int op = m_op; |
| 2774 | | |
| 2775 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2776 | | m_accum_l = m_xv[VDREG] = _mm_and_si128(m_xv[VS1REG], shuf); |
| 2777 | | } |
| 2778 | | |
| 2779 | | static void cfunc_vand(void *param) |
| 2780 | | { |
| 2781 | | ((rsp_cop2 *)param)->vand(); |
| 2782 | | } |
| 2783 | | |
| 2784 | | |
| 2785 | | // VNAND |
| 2786 | | // |
| 2787 | | // 31 25 24 20 15 10 5 0 |
| 2788 | | // ------------------------------------------------------ |
| 2789 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101001 | |
| 2790 | | // ------------------------------------------------------ |
| 2791 | | // |
| 2792 | | // Bitwise NOT AND of two vector registers |
| 2793 | | |
| 2794 | | inline void rsp_cop2_simd::vnand() |
| 2795 | | { |
| 2796 | | int op = m_op; |
| 2797 | | |
| 2798 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2799 | | m_accum_l = m_xv[VDREG] = _mm_xor_si128(_mm_and_si128(m_xv[VS1REG], shuf), vec_neg1); |
| 2800 | | } |
| 2801 | | |
| 2802 | | static void cfunc_vnand(void *param) |
| 2803 | | { |
| 2804 | | ((rsp_cop2 *)param)->vnand(); |
| 2805 | | } |
| 2806 | | |
| 2807 | | |
| 2808 | | // VOR |
| 2809 | | // |
| 2810 | | // 31 25 24 20 15 10 5 0 |
| 2811 | | // ------------------------------------------------------ |
| 2812 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101010 | |
| 2813 | | // ------------------------------------------------------ |
| 2814 | | // |
| 2815 | | // Bitwise OR of two vector registers |
| 2816 | | |
| 2817 | | inline void rsp_cop2_simd::vor() |
| 2818 | | { |
| 2819 | | int op = m_op; |
| 2820 | | |
| 2821 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2822 | | m_accum_l = m_xv[VDREG] = _mm_or_si128(m_xv[VS1REG], shuf); |
| 2823 | | } |
| 2824 | | |
| 2825 | | static void cfunc_vor_simd(void *param) |
| 2826 | | { |
| 2827 | | ((rsp_cop2 *)param)->vor(); |
| 2828 | | } |
| 2829 | | |
| 2830 | | |
| 2831 | | // VNOR |
| 2832 | | // |
| 2833 | | // 31 25 24 20 15 10 5 0 |
| 2834 | | // ------------------------------------------------------ |
| 2835 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101011 | |
| 2836 | | // ------------------------------------------------------ |
| 2837 | | // |
| 2838 | | // Bitwise NOT OR of two vector registers |
| 2839 | | |
| 2840 | | inline void rsp_cop2_simd::vnor() |
| 2841 | | { |
| 2842 | | int op = m_op; |
| 2843 | | |
| 2844 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2845 | | m_accum_l = m_xv[VDREG] = _mm_xor_si128(_mm_or_si128(m_xv[VS1REG], shuf), vec_neg1); |
| 2846 | | } |
| 2847 | | |
| 2848 | | static void cfunc_vnor(void *param) |
| 2849 | | { |
| 2850 | | ((rsp_cop2 *)param)->vnor(); |
| 2851 | | } |
| 2852 | | |
| 2853 | | |
| 2854 | | // VXOR |
| 2855 | | // |
| 2856 | | // 31 25 24 20 15 10 5 0 |
| 2857 | | // ------------------------------------------------------ |
| 2858 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101100 | |
| 2859 | | // ------------------------------------------------------ |
| 2860 | | // |
| 2861 | | // Bitwise XOR of two vector registers |
| 2862 | | |
| 2863 | | inline void rsp_cop2_simd::vxor() |
| 2864 | | { |
| 2865 | | int op = m_op; |
| 2866 | | |
| 2867 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2868 | | m_accum_l = m_xv[VDREG] = _mm_xor_si128(m_xv[VS1REG], shuf); |
| 2869 | | } |
| 2870 | | |
| 2871 | | static void cfunc_vxor(void *param) |
| 2872 | | { |
| 2873 | | ((rsp_cop2 *)param)->vxor(); |
| 2874 | | } |
| 2875 | | |
| 2876 | | |
| 2877 | | // VNXOR |
| 2878 | | // |
| 2879 | | // 31 25 24 20 15 10 5 0 |
| 2880 | | // ------------------------------------------------------ |
| 2881 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101101 | |
| 2882 | | // ------------------------------------------------------ |
| 2883 | | // |
| 2884 | | // Bitwise NOT XOR of two vector registers |
| 2885 | | |
| 2886 | | inline void rsp_cop2_simd::vnxor() |
| 2887 | | { |
| 2888 | | int op = m_op; |
| 2889 | | |
| 2890 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2891 | | m_accum_l = m_xv[VDREG] = _mm_xor_si128(_mm_xor_si128(m_xv[VS1REG], shuf), vec_neg1); |
| 2892 | | } |
| 2893 | | |
| 2894 | | static void cfunc_vnxor(void *param) |
| 2895 | | { |
| 2896 | | ((rsp_cop2 *)param)->vnxor(); |
| 2897 | | } |
| 2898 | | |
| 2899 | | |
| 2900 | | // VRCP |
| 2901 | | // |
| 2902 | | // 31 25 24 20 15 10 5 0 |
| 2903 | | // ------------------------------------------------------ |
| 2904 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110000 | |
| 2905 | | // ------------------------------------------------------ |
| 2906 | | // |
| 2907 | | // Calculates reciprocal |
| 2908 | | |
| 2909 | | inline void rsp_cop2_simd::vrcp() |
| 2910 | | { |
| 2911 | | int op = m_op; |
| 2912 | | |
| 2913 | | INT32 shifter = 0; |
| 2914 | | UINT16 urec; |
| 2915 | | INT32 rec; |
| 2916 | | EXTRACT16(m_xv[VS2REG], urec, EL); |
| 2917 | | rec = (INT16)urec; |
| 2918 | | INT32 datainput = (rec < 0) ? (-rec) : rec; |
| 2919 | | if (datainput) |
| 2920 | | { |
| 2921 | | for (int i = 0; i < 32; i++) |
| 2922 | | { |
| 2923 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 2924 | | { |
| 2925 | | shifter = i; |
| 2926 | | break; |
| 2927 | | } |
| 2928 | | } |
| 2929 | | } |
| 2930 | | else |
| 2931 | | { |
| 2932 | | shifter = 0x10; |
| 2933 | | } |
| 2934 | | |
| 2935 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2936 | | INT32 fetchval = rsp_divtable[address]; |
| 2937 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 2938 | | if (rec < 0) |
| 2939 | | { |
| 2940 | | temp = ~temp; |
| 2941 | | } |
| 2942 | | if (!rec) |
| 2943 | | { |
| 2944 | | temp = 0x7fffffff; |
| 2945 | | } |
| 2946 | | else if (rec == 0xffff8000) |
| 2947 | | { |
| 2948 | | temp = 0xffff0000; |
| 2949 | | } |
| 2950 | | rec = temp; |
| 2951 | | |
| 2952 | | m_reciprocal_res = rec; |
| 2953 | | m_dp_allowed = 0; |
| 2954 | | |
| 2955 | | INSERT16(m_xv[VDREG], (UINT16)rec, VS1REG); |
| 2956 | | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2957 | | } |
| 2958 | | |
| 2959 | | static void cfunc_vrcp(void *param) |
| 2960 | | { |
| 2961 | | ((rsp_cop2 *)param)->vrcp(); |
| 2962 | | } |
| 2963 | | |
| 2964 | | |
| 2965 | | // VRCPL |
| 2966 | | // |
| 2967 | | // 31 25 24 20 15 10 5 0 |
| 2968 | | // ------------------------------------------------------ |
| 2969 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110001 | |
| 2970 | | // ------------------------------------------------------ |
| 2971 | | // |
| 2972 | | // Calculates reciprocal low part |
| 2973 | | |
| 2974 | | inline void rsp_cop2_simd::vrcpl() |
| 2975 | | { |
| 2976 | | int op = m_op; |
| 2977 | | |
| 2978 | | #if SIMUL_SIMD |
| 2979 | | m_old_reciprocal_res = m_reciprocal_res; |
| 2980 | | m_old_reciprocal_high = m_reciprocal_high; |
| 2981 | | m_old_dp_allowed = m_dp_allowed; |
| 2982 | | #endif |
| 2983 | | |
| 2984 | | INT32 shifter = 0; |
| 2985 | | |
| 2986 | | UINT16 urec; |
| 2987 | | EXTRACT16(m_xv[VS2REG], urec, EL); |
| 2988 | | INT32 rec = (INT16)urec; |
| 2989 | | INT32 datainput = rec; |
| 2990 | | |
| 2991 | | if (m_dp_allowed) |
| 2992 | | { |
| 2993 | | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 2994 | | datainput = rec; |
| 2995 | | |
| 2996 | | if (rec < 0) |
| 2997 | | { |
| 2998 | | if (rec < -32768) |
| 2999 | | { |
| 3000 | | datainput = ~datainput; |
| 3001 | | } |
| 3002 | | else |
| 3003 | | { |
| 3004 | | datainput = -datainput; |
| 3005 | | } |
| 3006 | | } |
| 3007 | | } |
| 3008 | | else if (datainput < 0) |
| 3009 | | { |
| 3010 | | datainput = -datainput; |
| 3011 | | |
| 3012 | | shifter = 0x10; |
| 3013 | | } |
| 3014 | | |
| 3015 | | if (datainput) |
| 3016 | | { |
| 3017 | | for (int i = 0; i < 32; i++) |
| 3018 | | { |
| 3019 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 3020 | | { |
| 3021 | | shifter = i; |
| 3022 | | break; |
| 3023 | | } |
| 3024 | | } |
| 3025 | | } |
| 3026 | | |
| 3027 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 3028 | | INT32 fetchval = rsp_divtable[address]; |
| 3029 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 3030 | | temp ^= rec >> 31; |
| 3031 | | |
| 3032 | | if (!rec) |
| 3033 | | { |
| 3034 | | temp = 0x7fffffff; |
| 3035 | | } |
| 3036 | | else if (rec == 0xffff8000) |
| 3037 | | { |
| 3038 | | temp = 0xffff0000; |
| 3039 | | } |
| 3040 | | rec = temp; |
| 3041 | | |
| 3042 | | m_reciprocal_res = rec; |
| 3043 | | m_dp_allowed = 0; |
| 3044 | | |
| 3045 | | INSERT16(m_xv[VDREG], (UINT16)rec, VS1REG); |
| 3046 | | |
| 3047 | | for (int i = 0; i < 8; i++) |
| 3048 | | { |
| 3049 | | INT16 val; |
| 3050 | | EXTRACT16(m_xv[VS2REG], val, VEC_EL_2(EL, i)); |
| 3051 | | SET_ACCUM_L(val, i); |
| 3052 | | } |
| 3053 | | } |
| 3054 | | |
| 3055 | | static void cfunc_vrcpl(void *param) |
| 3056 | | { |
| 3057 | | ((rsp_cop2 *)param)->vrcpl(); |
| 3058 | | } |
| 3059 | | |
| 3060 | | |
| 3061 | | // VRCPH |
| 3062 | | // |
| 3063 | | // 31 25 24 20 15 10 5 0 |
| 3064 | | // ------------------------------------------------------ |
| 3065 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110010 | |
| 3066 | | // ------------------------------------------------------ |
| 3067 | | // |
| 3068 | | // Calculates reciprocal high part |
| 3069 | | |
| 3070 | | inline void rsp_cop2_simd::vrcph() |
| 3071 | | { |
| 3072 | | int op = m_op; |
| 3073 | | |
| 3074 | | #if SIMUL_SIMD |
| 3075 | | m_old_reciprocal_res = m_reciprocal_res; |
| 3076 | | m_old_reciprocal_high = m_reciprocal_high; |
| 3077 | | m_old_dp_allowed = m_dp_allowed; |
| 3078 | | #endif |
| 3079 | | |
| 3080 | | UINT16 rcph; |
| 3081 | | EXTRACT16(m_xv[VS2REG], rcph, EL); |
| 3082 | | m_reciprocal_high = rcph << 16; |
| 3083 | | m_dp_allowed = 1; |
| 3084 | | |
| 3085 | | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3086 | | |
| 3087 | | INSERT16(m_xv[VDREG], (INT16)(m_reciprocal_res >> 16), VS1REG); |
| 3088 | | } |
| 3089 | | |
| 3090 | | static void cfunc_vrcph(void *param) |
| 3091 | | { |
| 3092 | | ((rsp_cop2 *)param)->vrcph(); |
| 3093 | | } |
| 3094 | | |
| 3095 | | |
| 3096 | | // VMOV |
| 3097 | | // |
| 3098 | | // 31 25 24 20 15 10 5 0 |
| 3099 | | // ------------------------------------------------------ |
| 3100 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110011 | |
| 3101 | | // ------------------------------------------------------ |
| 3102 | | // |
| 3103 | | // Moves element from vector to destination vector |
| 3104 | | |
| 3105 | | inline void rsp_cop2_simd::vmov() |
| 3106 | | { |
| 3107 | | int op = m_op; |
| 3108 | | |
| 3109 | | INT16 val; |
| 3110 | | EXTRACT16(m_xv[VS2REG], val, EL); |
| 3111 | | INSERT16(m_xv[VDREG], val, VS1REG); |
| 3112 | | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3113 | | } |
| 3114 | | |
| 3115 | | static void cfunc_vmov(void *param) |
| 3116 | | { |
| 3117 | | ((rsp_cop2 *)param)->vmov(); |
| 3118 | | } |
| 3119 | | |
| 3120 | | |
| 3121 | | // VRSQ |
| 3122 | | // |
| 3123 | | // 31 25 24 20 15 10 5 0 |
| 3124 | | // ------------------------------------------------------ |
| 3125 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110100 | |
| 3126 | | // ------------------------------------------------------ |
| 3127 | | // |
| 3128 | | // Calculates reciprocal square-root |
| 3129 | | |
| 3130 | | inline void rsp_cop2_simd::vrsq() |
| 3131 | | { |
| 3132 | | int op = m_op; |
| 3133 | | |
| 3134 | | INT32 shifter = 0; |
| 3135 | | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 3136 | | INT32 datainput = (rec < 0) ? (-rec) : (rec); |
| 3137 | | |
| 3138 | | if (rec < 0) |
| 3139 | | { |
| 3140 | | if (rec < -32768) |
| 3141 | | { |
| 3142 | | datainput = ~datainput; |
| 3143 | | } |
| 3144 | | else |
| 3145 | | { |
| 3146 | | datainput = -datainput; |
| 3147 | | } |
| 3148 | | } |
| 3149 | | |
| 3150 | | if (datainput) |
| 3151 | | { |
| 3152 | | for (int i = 0; i < 32; i++) |
| 3153 | | { |
| 3154 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 3155 | | { |
| 3156 | | shifter = i; |
| 3157 | | break; |
| 3158 | | } |
| 3159 | | } |
| 3160 | | } |
| 3161 | | else |
| 3162 | | { |
| 3163 | | shifter = 0; |
| 3164 | | } |
| 3165 | | |
| 3166 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 3167 | | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 3168 | | |
| 3169 | | INT32 fetchval = rsp_divtable[address]; |
| 3170 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 3171 | | if (rec < 0) |
| 3172 | | { |
| 3173 | | temp = ~temp; |
| 3174 | | } |
| 3175 | | if (!rec) |
| 3176 | | { |
| 3177 | | temp = 0x7fffffff; |
| 3178 | | } |
| 3179 | | else if (rec == 0xffff8000) |
| 3180 | | { |
| 3181 | | temp = 0xffff0000; |
| 3182 | | } |
| 3183 | | rec = temp; |
| 3184 | | |
| 3185 | | if (rec < 0) |
| 3186 | | { |
| 3187 | | if (m_dp_allowed) |
| 3188 | | { |
| 3189 | | if (rec < -32768) |
| 3190 | | { |
| 3191 | | datainput = ~datainput; |
| 3192 | | } |
| 3193 | | else |
| 3194 | | { |
| 3195 | | datainput = -datainput; |
| 3196 | | } |
| 3197 | | } |
| 3198 | | else |
| 3199 | | { |
| 3200 | | datainput = -datainput; |
| 3201 | | } |
| 3202 | | } |
| 3203 | | |
| 3204 | | if (datainput) |
| 3205 | | { |
| 3206 | | for (int i = 0; i < 32; i++) |
| 3207 | | { |
| 3208 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 3209 | | { |
| 3210 | | shifter = i; |
| 3211 | | break; |
| 3212 | | } |
| 3213 | | } |
| 3214 | | } |
| 3215 | | else |
| 3216 | | { |
| 3217 | | shifter = 0; |
| 3218 | | } |
| 3219 | | |
| 3220 | | address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 3221 | | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 3222 | | |
| 3223 | | fetchval = rsp_divtable[address]; |
| 3224 | | temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 3225 | | if (rec < 0) |
| 3226 | | { |
| 3227 | | temp = ~temp; |
| 3228 | | } |
| 3229 | | if (!rec) |
| 3230 | | { |
| 3231 | | temp = 0x7fff; |
| 3232 | | } |
| 3233 | | else if (rec == 0xffff8000) |
| 3234 | | { |
| 3235 | | temp = 0x0000; |
| 3236 | | } |
| 3237 | | rec = temp; |
| 3238 | | |
| 3239 | | W_VREG_S(VDREG, VS1REG & 7) = (UINT16)rec; |
| 3240 | | for (int i = 0; i < 8; i++) |
| 3241 | | { |
| 3242 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 3243 | | } |
| 3244 | | } |
| 3245 | | |
| 3246 | | static void cfunc_vrsq(void *param) |
| 3247 | | { |
| 3248 | | ((rsp_cop2 *)param)->vrsq(); |
| 3249 | | } |
| 3250 | | |
| 3251 | | |
| 3252 | | // VRSQL |
| 3253 | | // |
| 3254 | | // 31 25 24 20 15 10 5 0 |
| 3255 | | // ------------------------------------------------------ |
| 3256 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110101 | |
| 3257 | | // ------------------------------------------------------ |
| 3258 | | // |
| 3259 | | // Calculates reciprocal square-root low part |
| 3260 | | |
| 3261 | | inline void rsp_cop2_simd::vrsql() |
| 3262 | | { |
| 3263 | | int op = m_op; |
| 3264 | | |
| 3265 | | #if SIMUL_SIMD |
| 3266 | | m_old_reciprocal_res = m_reciprocal_res; |
| 3267 | | m_old_reciprocal_high = m_reciprocal_high; |
| 3268 | | m_old_dp_allowed = m_dp_allowed; |
| 3269 | | #endif |
| 3270 | | |
| 3271 | | INT32 shifter = 0; |
| 3272 | | UINT16 val; |
| 3273 | | EXTRACT16(m_xv[VS2REG], val, EL); |
| 3274 | | INT32 rec = (INT16)val; |
| 3275 | | INT32 datainput = rec; |
| 3276 | | |
| 3277 | | if (m_dp_allowed) |
| 3278 | | { |
| 3279 | | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 3280 | | datainput = rec; |
| 3281 | | |
| 3282 | | if (rec < 0) |
| 3283 | | { |
| 3284 | | if (rec < -32768) |
| 3285 | | { |
| 3286 | | datainput = ~datainput; |
| 3287 | | } |
| 3288 | | else |
| 3289 | | { |
| 3290 | | datainput = -datainput; |
| 3291 | | } |
| 3292 | | } |
| 3293 | | } |
| 3294 | | else if (datainput < 0) |
| 3295 | | { |
| 3296 | | datainput = -datainput; |
| 3297 | | |
| 3298 | | shifter = 0x10; |
| 3299 | | } |
| 3300 | | |
| 3301 | | if (datainput) |
| 3302 | | { |
| 3303 | | for (int i = 0; i < 32; i++) |
| 3304 | | { |
| 3305 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 3306 | | { |
| 3307 | | shifter = i; |
| 3308 | | break; |
| 3309 | | } |
| 3310 | | } |
| 3311 | | } |
| 3312 | | |
| 3313 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 3314 | | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 3315 | | |
| 3316 | | INT32 fetchval = rsp_divtable[address]; |
| 3317 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 3318 | | temp ^= rec >> 31; |
| 3319 | | |
| 3320 | | if (!rec) |
| 3321 | | { |
| 3322 | | temp = 0x7fffffff; |
| 3323 | | } |
| 3324 | | else if (rec == 0xffff8000) |
| 3325 | | { |
| 3326 | | temp = 0xffff0000; |
| 3327 | | } |
| 3328 | | rec = temp; |
| 3329 | | |
| 3330 | | m_reciprocal_res = rec; |
| 3331 | | m_dp_allowed = 0; |
| 3332 | | |
| 3333 | | INSERT16(m_xv[VDREG], (UINT16)rec, VS1REG); |
| 3334 | | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3335 | | } |
| 3336 | | |
| 3337 | | static void cfunc_vrsql(void *param) |
| 3338 | | { |
| 3339 | | ((rsp_cop2 *)param)->vrsql(); |
| 3340 | | } |
| 3341 | | |
| 3342 | | |
| 3343 | | // VRSQH |
| 3344 | | // |
| 3345 | | // 31 25 24 20 15 10 5 0 |
| 3346 | | // ------------------------------------------------------ |
| 3347 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110110 | |
| 3348 | | // ------------------------------------------------------ |
| 3349 | | // |
| 3350 | | // Calculates reciprocal square-root high part |
| 3351 | | |
| 3352 | | inline void rsp_cop2_simd::vrsqh() |
| 3353 | | { |
| 3354 | | int op = m_op; |
| 3355 | | |
| 3356 | | #if SIMUL_SIMD |
| 3357 | | m_old_reciprocal_res = m_reciprocal_res; |
| 3358 | | m_old_reciprocal_high = m_reciprocal_high; |
| 3359 | | m_old_dp_allowed = m_dp_allowed; |
| 3360 | | #endif |
| 3361 | | |
| 3362 | | UINT16 val; |
| 3363 | | EXTRACT16(m_xv[VS2REG], val, EL); |
| 3364 | | m_reciprocal_high = val << 16; |
| 3365 | | m_dp_allowed = 1; |
| 3366 | | |
| 3367 | | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3368 | | |
| 3369 | | INSERT16(m_xv[VDREG], (INT16)(m_reciprocal_res >> 16), VS1REG); // store high part |
| 3370 | | } |
| 3371 | | |
| 3372 | | static void cfunc_vrsqh(void *param) |
| 3373 | | { |
| 3374 | | ((rsp_cop2 *)param)->vrsqh(); |
| 3375 | | } |
| 3376 | | |
| 3377 | | |
| 3378 | | /*************************************************************************** |
| 3379 | | Vector Flag Reading/Writing |
| 3380 | | ***************************************************************************/ |
| 3381 | | |
| 3382 | | inline void rsp_cop2_simd::mfc2() |
| 3383 | | { |
| 3384 | | UINT32 op = m_op; |
| 3385 | | int el = (op >> 7) & 0xf; |
| 3386 | | |
| 3387 | | UINT16 out; |
| 3388 | | EXTRACT16(m_xv[VS1REG], out, (el >> 1)); |
| 3389 | | out >>= (1 - (el & 1)) * 8; |
| 3390 | | out &= 0x00ff; |
| 3391 | | |
| 3392 | | el++; |
| 3393 | | |
| 3394 | | UINT16 temp; |
| 3395 | | EXTRACT16(m_xv[VS1REG], temp, (el >> 1)); |
| 3396 | | temp >>= (1 - (el & 1)) * 8; |
| 3397 | | temp &= 0x00ff; |
| 3398 | | |
| 3399 | | m_rsp.m_rsp_state->r[RTREG] = (INT32)(INT16)((out << 8) | temp); |
| 3400 | | } |
| 3401 | | |
| 3402 | | static void cfunc_mfc2(void *param) |
| 3403 | | { |
| 3404 | | ((rsp_cop2 *)param)->mfc2(); |
| 3405 | | } |
| 3406 | | |
| 3407 | | |
| 3408 | | inline void rsp_cop2_simd::cfc2() |
| 3409 | | { |
| 3410 | | UINT32 op = m_op; |
| 3411 | | if (RTREG) |
| 3412 | | { |
| 3413 | | switch(RDREG) |
| 3414 | | { |
| 3415 | | case 0: |
| 3416 | | RTVAL = ((CARRY_FLAG(0) & 1) << 0) | |
| 3417 | | ((CARRY_FLAG(1) & 1) << 1) | |
| 3418 | | ((CARRY_FLAG(2) & 1) << 2) | |
| 3419 | | ((CARRY_FLAG(3) & 1) << 3) | |
| 3420 | | ((CARRY_FLAG(4) & 1) << 4) | |
| 3421 | | ((CARRY_FLAG(5) & 1) << 5) | |
| 3422 | | ((CARRY_FLAG(6) & 1) << 6) | |
| 3423 | | ((CARRY_FLAG(7) & 1) << 7) | |
| 3424 | | ((ZERO_FLAG(0) & 1) << 8) | |
| 3425 | | ((ZERO_FLAG(1) & 1) << 9) | |
| 3426 | | ((ZERO_FLAG(2) & 1) << 10) | |
| 3427 | | ((ZERO_FLAG(3) & 1) << 11) | |
| 3428 | | ((ZERO_FLAG(4) & 1) << 12) | |
| 3429 | | ((ZERO_FLAG(5) & 1) << 13) | |
| 3430 | | ((ZERO_FLAG(6) & 1) << 14) | |
| 3431 | | ((ZERO_FLAG(7) & 1) << 15); |
| 3432 | | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 3433 | | break; |
| 3434 | | case 1: |
| 3435 | | RTVAL = ((COMPARE_FLAG(0) & 1) << 0) | |
| 3436 | | ((COMPARE_FLAG(1) & 1) << 1) | |
| 3437 | | ((COMPARE_FLAG(2) & 1) << 2) | |
| 3438 | | ((COMPARE_FLAG(3) & 1) << 3) | |
| 3439 | | ((COMPARE_FLAG(4) & 1) << 4) | |
| 3440 | | ((COMPARE_FLAG(5) & 1) << 5) | |
| 3441 | | ((COMPARE_FLAG(6) & 1) << 6) | |
| 3442 | | ((COMPARE_FLAG(7) & 1) << 7) | |
| 3443 | | ((CLIP2_FLAG(0) & 1) << 8) | |
| 3444 | | ((CLIP2_FLAG(1) & 1) << 9) | |
| 3445 | | ((CLIP2_FLAG(2) & 1) << 10) | |
| 3446 | | ((CLIP2_FLAG(3) & 1) << 11) | |
| 3447 | | ((CLIP2_FLAG(4) & 1) << 12) | |
| 3448 | | ((CLIP2_FLAG(5) & 1) << 13) | |
| 3449 | | ((CLIP2_FLAG(6) & 1) << 14) | |
| 3450 | | ((CLIP2_FLAG(7) & 1) << 15); |
| 3451 | | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 3452 | | break; |
| 3453 | | case 2: |
| 3454 | | RTVAL = ((CLIP1_FLAG(0) & 1) << 0) | |
| 3455 | | ((CLIP1_FLAG(1) & 1) << 1) | |
| 3456 | | ((CLIP1_FLAG(2) & 1) << 2) | |
| 3457 | | ((CLIP1_FLAG(3) & 1) << 3) | |
| 3458 | | ((CLIP1_FLAG(4) & 1) << 4) | |
| 3459 | | ((CLIP1_FLAG(5) & 1) << 5) | |
| 3460 | | ((CLIP1_FLAG(6) & 1) << 6) | |
| 3461 | | ((CLIP1_FLAG(7) & 1) << 7); |
| 3462 | | break; |
| 3463 | | } |
| 3464 | | } |
| 3465 | | } |
| 3466 | | |
| 3467 | | static void cfunc_cfc2(void *param) |
| 3468 | | { |
| 3469 | | ((rsp_cop2 *)param)->cfc2(); |
| 3470 | | } |
| 3471 | | |
| 3472 | | |
| 3473 | | inline void rsp_cop2_simd::mtc2() |
| 3474 | | { |
| 3475 | | UINT32 op = m_op; |
| 3476 | | int el = (op >> 7) & 0xf; |
| 3477 | | INSERT16(m_xv[VS1REG], RTVAL, el >> 1); |
| 3478 | | } |
| 3479 | | |
| 3480 | | static void cfunc_mtc2(void *param) |
| 3481 | | { |
| 3482 | | ((rsp_cop2 *)param)->mtc2(); |
| 3483 | | } |
| 3484 | | |
| 3485 | | |
| 3486 | | inline void rsp_cop2_simd::ctc2() |
| 3487 | | { |
| 3488 | | UINT32 op = m_op; |
| 3489 | | switch(RDREG) |
| 3490 | | { |
| 3491 | | case 0: |
| 3492 | | CLEAR_CARRY_FLAGS(); |
| 3493 | | CLEAR_ZERO_FLAGS(); |
| 3494 | | m_vflag[0][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 3495 | | m_vflag[0][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 3496 | | m_vflag[0][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 3497 | | m_vflag[0][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 3498 | | m_vflag[0][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 3499 | | m_vflag[0][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 3500 | | m_vflag[0][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 3501 | | m_vflag[0][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 3502 | | if (RTVAL & (1 << 0)) { SET_CARRY_FLAG(0); } |
| 3503 | | if (RTVAL & (1 << 1)) { SET_CARRY_FLAG(1); } |
| 3504 | | if (RTVAL & (1 << 2)) { SET_CARRY_FLAG(2); } |
| 3505 | | if (RTVAL & (1 << 3)) { SET_CARRY_FLAG(3); } |
| 3506 | | if (RTVAL & (1 << 4)) { SET_CARRY_FLAG(4); } |
| 3507 | | if (RTVAL & (1 << 5)) { SET_CARRY_FLAG(5); } |
| 3508 | | if (RTVAL & (1 << 6)) { SET_CARRY_FLAG(6); } |
| 3509 | | if (RTVAL & (1 << 7)) { SET_CARRY_FLAG(7); } |
| 3510 | | m_vflag[3][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 3511 | | m_vflag[3][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 3512 | | m_vflag[3][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 3513 | | m_vflag[3][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 3514 | | m_vflag[3][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 3515 | | m_vflag[3][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 3516 | | m_vflag[3][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 3517 | | m_vflag[3][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 3518 | | if (RTVAL & (1 << 8)) { SET_ZERO_FLAG(0); } |
| 3519 | | if (RTVAL & (1 << 9)) { SET_ZERO_FLAG(1); } |
| 3520 | | if (RTVAL & (1 << 10)) { SET_ZERO_FLAG(2); } |
| 3521 | | if (RTVAL & (1 << 11)) { SET_ZERO_FLAG(3); } |
| 3522 | | if (RTVAL & (1 << 12)) { SET_ZERO_FLAG(4); } |
| 3523 | | if (RTVAL & (1 << 13)) { SET_ZERO_FLAG(5); } |
| 3524 | | if (RTVAL & (1 << 14)) { SET_ZERO_FLAG(6); } |
| 3525 | | if (RTVAL & (1 << 15)) { SET_ZERO_FLAG(7); } |
| 3526 | | break; |
| 3527 | | case 1: |
| 3528 | | CLEAR_COMPARE_FLAGS(); |
| 3529 | | CLEAR_CLIP2_FLAGS(); |
| 3530 | | m_vflag[1][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 3531 | | m_vflag[1][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 3532 | | m_vflag[1][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 3533 | | m_vflag[1][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 3534 | | m_vflag[1][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 3535 | | m_vflag[1][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 3536 | | m_vflag[1][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 3537 | | m_vflag[1][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 3538 | | if (RTVAL & (1 << 0)) { SET_COMPARE_FLAG(0); } |
| 3539 | | if (RTVAL & (1 << 1)) { SET_COMPARE_FLAG(1); } |
| 3540 | | if (RTVAL & (1 << 2)) { SET_COMPARE_FLAG(2); } |
| 3541 | | if (RTVAL & (1 << 3)) { SET_COMPARE_FLAG(3); } |
| 3542 | | if (RTVAL & (1 << 4)) { SET_COMPARE_FLAG(4); } |
| 3543 | | if (RTVAL & (1 << 5)) { SET_COMPARE_FLAG(5); } |
| 3544 | | if (RTVAL & (1 << 6)) { SET_COMPARE_FLAG(6); } |
| 3545 | | if (RTVAL & (1 << 7)) { SET_COMPARE_FLAG(7); } |
| 3546 | | m_vflag[4][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 3547 | | m_vflag[4][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 3548 | | m_vflag[4][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 3549 | | m_vflag[4][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 3550 | | m_vflag[4][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 3551 | | m_vflag[4][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 3552 | | m_vflag[4][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 3553 | | m_vflag[4][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 3554 | | if (RTVAL & (1 << 8)) { SET_CLIP2_FLAG(0); } |
| 3555 | | if (RTVAL & (1 << 9)) { SET_CLIP2_FLAG(1); } |
| 3556 | | if (RTVAL & (1 << 10)) { SET_CLIP2_FLAG(2); } |
| 3557 | | if (RTVAL & (1 << 11)) { SET_CLIP2_FLAG(3); } |
| 3558 | | if (RTVAL & (1 << 12)) { SET_CLIP2_FLAG(4); } |
| 3559 | | if (RTVAL & (1 << 13)) { SET_CLIP2_FLAG(5); } |
| 3560 | | if (RTVAL & (1 << 14)) { SET_CLIP2_FLAG(6); } |
| 3561 | | if (RTVAL & (1 << 15)) { SET_CLIP2_FLAG(7); } |
| 3562 | | break; |
| 3563 | | case 2: |
| 3564 | | CLEAR_CLIP1_FLAGS(); |
| 3565 | | m_vflag[2][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 3566 | | m_vflag[2][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 3567 | | m_vflag[2][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 3568 | | m_vflag[2][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 3569 | | m_vflag[2][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 3570 | | m_vflag[2][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 3571 | | m_vflag[2][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 3572 | | m_vflag[2][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 3573 | | if (RTVAL & (1 << 0)) { SET_CLIP1_FLAG(0); } |
| 3574 | | if (RTVAL & (1 << 1)) { SET_CLIP1_FLAG(1); } |
| 3575 | | if (RTVAL & (1 << 2)) { SET_CLIP1_FLAG(2); } |
| 3576 | | if (RTVAL & (1 << 3)) { SET_CLIP1_FLAG(3); } |
| 3577 | | if (RTVAL & (1 << 4)) { SET_CLIP1_FLAG(4); } |
| 3578 | | if (RTVAL & (1 << 5)) { SET_CLIP1_FLAG(5); } |
| 3579 | | if (RTVAL & (1 << 6)) { SET_CLIP1_FLAG(6); } |
| 3580 | | if (RTVAL & (1 << 7)) { SET_CLIP1_FLAG(7); } |
| 3581 | | break; |
| 3582 | | } |
| 3583 | | } |
| 3584 | | |
| 3585 | | static void cfunc_ctc2(void *param) |
| 3586 | | { |
| 3587 | | ((rsp_cop2 *)param)->ctc2(); |
| 3588 | | } |
| 3589 | | |
| 3590 | | |
| 3591 | | /*************************************************************************** |
| 3592 | | COP2 Opcode Compilation |
| 3593 | | ***************************************************************************/ |
| 3594 | | |
| 3595 | | int rsp_cop2_simd::generate_cop2(drcuml_block *block, rsp_device::compiler_state *compiler, const opcode_desc *desc) |
| 3596 | | { |
| 3597 | | UINT32 op = desc->opptr.l[0]; |
| 3598 | | UINT8 opswitch = RSREG; |
| 3599 | | |
| 3600 | | switch (opswitch) |
| 3601 | | { |
| 3602 | | case 0x00: /* MFCz */ |
| 3603 | | if (RTREG != 0) |
| 3604 | | { |
| 3605 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3606 | | UML_CALLC(block, cfunc_mfc2, this); // callc mfc2 |
| 3607 | | } |
| 3608 | | return TRUE; |
| 3609 | | |
| 3610 | | case 0x02: /* CFCz */ |
| 3611 | | if (RTREG != 0) |
| 3612 | | { |
| 3613 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3614 | | UML_CALLC(block, cfunc_cfc2, this); // callc cfc2 |
| 3615 | | } |
| 3616 | | return TRUE; |
| 3617 | | |
| 3618 | | case 0x04: /* MTCz */ |
| 3619 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3620 | | UML_CALLC(block, cfunc_mtc2, this); // callc mtc2 |
| 3621 | | return TRUE; |
| 3622 | | |
| 3623 | | case 0x06: /* CTCz */ |
| 3624 | | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3625 | | UML_CALLC(block, cfunc_ctc2, this); // callc ctc2 |
| 3626 | | return TRUE; |
| 3627 | | |
| 3628 | | case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: |
| 3629 | | case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: |
| 3630 | | return generate_vector_opcode(block, compiler, desc); |
| 3631 | | } |
| 3632 | | return FALSE; |
| 3633 | | } |
trunk/src/emu/cpu/rsp/rspdrc.c
| r241959 | r241960 | |
| 25 | 25 | #include "rsp.h" |
| 26 | 26 | #include "rspdiv.h" |
| 27 | 27 | #include "rspfe.h" |
| 28 | | #include "rspcp2.h" |
| 29 | 28 | #include "cpu/drcfe.h" |
| 30 | 29 | #include "cpu/drcuml.h" |
| 31 | 30 | #include "cpu/drcumlsh.h" |
| r241959 | r241960 | |
| 53 | 52 | |
| 54 | 53 | |
| 55 | 54 | /*************************************************************************** |
| 56 | | Macros |
| 55 | MACROS |
| 57 | 56 | ***************************************************************************/ |
| 58 | 57 | |
| 59 | 58 | #define R32(reg) m_regmap[reg] |
| 60 | 59 | |
| 61 | 60 | /*************************************************************************** |
| 62 | | Inline Functions |
| 61 | HELPFUL DEFINES |
| 63 | 62 | ***************************************************************************/ |
| 64 | 63 | |
| 64 | #define VDREG ((op >> 6) & 0x1f) |
| 65 | #define VS1REG ((op >> 11) & 0x1f) |
| 66 | #define VS2REG ((op >> 16) & 0x1f) |
| 67 | #define EL ((op >> 21) & 0xf) |
| 68 | |
| 69 | #define SIMD_EXTRACT16(reg, value, element) \ |
| 70 | switch((element) & 7) \ |
| 71 | { \ |
| 72 | case 0: value = _mm_extract_epi16(reg, 0); break; \ |
| 73 | case 1: value = _mm_extract_epi16(reg, 1); break; \ |
| 74 | case 2: value = _mm_extract_epi16(reg, 2); break; \ |
| 75 | case 3: value = _mm_extract_epi16(reg, 3); break; \ |
| 76 | case 4: value = _mm_extract_epi16(reg, 4); break; \ |
| 77 | case 5: value = _mm_extract_epi16(reg, 5); break; \ |
| 78 | case 6: value = _mm_extract_epi16(reg, 6); break; \ |
| 79 | case 7: value = _mm_extract_epi16(reg, 7); break; \ |
| 80 | } |
| 81 | |
| 82 | |
| 83 | #define SIMD_INSERT16(reg, value, element) \ |
| 84 | switch((element) & 7) \ |
| 85 | { \ |
| 86 | case 0: reg = _mm_insert_epi16(reg, value, 0); break; \ |
| 87 | case 1: reg = _mm_insert_epi16(reg, value, 1); break; \ |
| 88 | case 2: reg = _mm_insert_epi16(reg, value, 2); break; \ |
| 89 | case 3: reg = _mm_insert_epi16(reg, value, 3); break; \ |
| 90 | case 4: reg = _mm_insert_epi16(reg, value, 4); break; \ |
| 91 | case 5: reg = _mm_insert_epi16(reg, value, 5); break; \ |
| 92 | case 6: reg = _mm_insert_epi16(reg, value, 6); break; \ |
| 93 | case 7: reg = _mm_insert_epi16(reg, value, 7); break; \ |
| 94 | } |
| 95 | |
| 96 | |
| 97 | #define SIMD_EXTRACT16C(reg, value, element) value = _mm_extract_epi16(reg, element); |
| 98 | #define SIMD_INSERT16C(reg, value, element) reg = _mm_insert_epi16(reg, value, element); |
| 99 | |
| 100 | #define VREG_B(reg, offset) m_v[(reg)].b[(offset)^1] |
| 101 | #define W_VREG_S(reg, offset) m_v[(reg)].s[(offset)] |
| 102 | #define VREG_S(reg, offset) (INT16)m_v[(reg)].s[(offset)] |
| 103 | |
| 104 | #define VEC_EL_2(x,z) (vector_elements_2[(x)][(z)]) |
| 105 | |
| 106 | #define ACCUM(x) m_accum[x].q |
| 107 | |
| 108 | #define CARRY 0 |
| 109 | #define COMPARE 1 |
| 110 | #define CLIP1 2 |
| 111 | #define ZERO 3 |
| 112 | #define CLIP2 4 |
| 113 | |
| 114 | |
| 115 | #if USE_SIMD |
| 116 | static void cfunc_mfc2_simd(void *param); |
| 117 | static void cfunc_cfc2_simd(void *param); |
| 118 | static void cfunc_mtc2_simd(void *param); |
| 119 | static void cfunc_ctc2_simd(void *param); |
| 120 | #endif |
| 121 | |
| 122 | #if (!USE_SIMD || SIMUL_SIMD) |
| 123 | static void cfunc_mfc2_scalar(void *param); |
| 124 | static void cfunc_cfc2_scalar(void *param); |
| 125 | static void cfunc_mtc2_scalar(void *param); |
| 126 | static void cfunc_ctc2_scalar(void *param); |
| 127 | #endif |
| 128 | |
| 129 | |
| 130 | #if USE_SIMD |
| 131 | inline UINT16 rsp_device::VEC_ACCUM_H(int x) |
| 132 | { |
| 133 | UINT16 out; |
| 134 | SIMD_EXTRACT16(m_accum_h, out, x); |
| 135 | return out; |
| 136 | } |
| 137 | |
| 138 | inline UINT16 rsp_device::VEC_ACCUM_M(int x) |
| 139 | { |
| 140 | UINT16 out; |
| 141 | SIMD_EXTRACT16(m_accum_m, out, x); |
| 142 | return out; |
| 143 | } |
| 144 | |
| 145 | inline UINT16 rsp_device::VEC_ACCUM_L(int x) |
| 146 | { |
| 147 | UINT16 out; |
| 148 | SIMD_EXTRACT16(m_accum_l, out, x); |
| 149 | return out; |
| 150 | } |
| 151 | |
| 152 | inline UINT16 rsp_device::VEC_ACCUM_LL(int x) |
| 153 | { |
| 154 | UINT16 out; |
| 155 | SIMD_EXTRACT16(m_accum_ll, out, x); |
| 156 | return out; |
| 157 | } |
| 158 | |
| 159 | #define VEC_SET_ACCUM_H(v, x) SIMD_INSERT16(m_accum_h, v, x); |
| 160 | #define VEC_SET_ACCUM_M(v, x) SIMD_INSERT16(m_>accum_m, v, x); |
| 161 | #define VEC_SET_ACCUM_L(v, x) SIMD_INSERT16(m_accum_l, v, x); |
| 162 | #define VEC_SET_ACCUM_LL(v, x) SIMD_INSERT16(m_accum_ll, v, x); |
| 163 | |
| 164 | #define VEC_GET_SCALAR_VS1(out, i) SIMD_EXTRACT16(m_xv[VS1REG], out, i); |
| 165 | #define VEC_GET_SCALAR_VS2(out, i) SIMD_EXTRACT16(m_xv[VS2REG], out, VEC_EL_2(EL, i)); |
| 166 | |
| 167 | inline UINT16 rsp_device::VEC_CARRY_FLAG(const int x) |
| 168 | { |
| 169 | UINT16 out; |
| 170 | SIMD_EXTRACT16(m_xvflag[CARRY], out, x); |
| 171 | return out; |
| 172 | } |
| 173 | |
| 174 | inline UINT16 rsp_device::VEC_COMPARE_FLAG(const int x) |
| 175 | { |
| 176 | UINT16 out; |
| 177 | SIMD_EXTRACT16(m_xvflag[COMPARE], out, x); |
| 178 | return out; |
| 179 | } |
| 180 | |
| 181 | inline UINT16 rsp_device::VEC_CLIP1_FLAG(const int x) |
| 182 | { |
| 183 | UINT16 out; |
| 184 | SIMD_EXTRACT16(m_xvflag[CLIP1], out, x); |
| 185 | return out; |
| 186 | } |
| 187 | |
| 188 | inline UINT16 rsp_device::VEC_ZERO_FLAG(const int x) |
| 189 | { |
| 190 | UINT16 out; |
| 191 | SIMD_EXTRACT16(m_xvflag[ZERO], out, x); |
| 192 | return out; |
| 193 | } |
| 194 | |
| 195 | inline UINT16 rsp_device::VEC_CLIP2_FLAG(const int x) |
| 196 | { |
| 197 | UINT16 out; |
| 198 | SIMD_EXTRACT16(m_xvflag[CLIP2], out, x); |
| 199 | return out; |
| 200 | } |
| 201 | |
| 202 | #define VEC_CLEAR_CARRY_FLAGS() { m_xvflag[CARRY] = _mm_setzero_si128(); } |
| 203 | #define VEC_CLEAR_COMPARE_FLAGS() { m_xvflag[COMPARE] = _mm_setzero_si128(); } |
| 204 | #define VEC_CLEAR_CLIP1_FLAGS() { m_xvflag[CLIP1] = _mm_setzero_si128(); } |
| 205 | #define VEC_CLEAR_ZERO_FLAGS() { m_xvflag[ZERO] = _mm_setzero_si128(); } |
| 206 | #define VEC_CLEAR_CLIP2_FLAGS() { m_xvflag[CLIP2] = _mm_setzero_si128(); } |
| 207 | |
| 208 | #define VEC_SET_CARRY_FLAG(x) { SIMD_INSERT16(m_xvflag[CARRY], 0xffff, x); } |
| 209 | #define VEC_SET_COMPARE_FLAG(x) { SIMD_INSERT16(m_xvflag[COMPARE], 0xffff, x); } |
| 210 | #define VEC_SET_CLIP1_FLAG(x) { SIMD_INSERT16(m_xvflag[CLIP1], 0xffff, x); } |
| 211 | #define VEC_SET_ZERO_FLAG(x) { SIMD_INSERT16(m_xvflag[ZERO], 0xffff, x); } |
| 212 | #define VEC_SET_CLIP2_FLAG(x) { SIMD_INSERT16(m_xvflag[CLIP2], 0xffff, x); } |
| 213 | |
| 214 | #define VEC_CLEAR_CARRY_FLAG(x) { SIMD_INSERT16(m_xvflag[CARRY], 0, x); } |
| 215 | #define VEC_CLEAR_COMPARE_FLAG(x) { SIMD_INSERT16(m_xvflag[COMPARE], 0, x); } |
| 216 | #define VEC_CLEAR_CLIP1_FLAG(x) { SIMD_INSERT16(m_xvflag[CLIP1], 0, x); } |
| 217 | #define VEC_CLEAR_ZERO_FLAG(x) { SIMD_INSERT16(m_xvflag[ZERO], 0, x); } |
| 218 | #define VEC_CLEAR_CLIP2_FLAG(x) { SIMD_INSERT16(m_xvflag[CLIP2], 0, x); } |
| 219 | |
| 220 | #endif |
| 221 | |
| 222 | #define ACCUM_H(x) (UINT16)m_accum[x].w[3] |
| 223 | #define ACCUM_M(x) (UINT16)m_accum[x].w[2] |
| 224 | #define ACCUM_L(x) (UINT16)m_accum[x].w[1] |
| 225 | #define ACCUM_LL(x) (UINT16)m_accum[x].w[0] |
| 226 | |
| 227 | #define SET_ACCUM_H(v, x) m_accum[x].w[3] = v; |
| 228 | #define SET_ACCUM_M(v, x) m_accum[x].w[2] = v; |
| 229 | #define SET_ACCUM_L(v, x) m_accum[x].w[1] = v; |
| 230 | #define SET_ACCUM_LL(v, x) m_accum[x].w[0] = v; |
| 231 | |
| 232 | #define SCALAR_GET_VS1(out, i) out = VREG_S(VS1REG, i) |
| 233 | #define SCALAR_GET_VS2(out, i) out = VREG_S(VS2REG, VEC_EL_2(EL, i)) |
| 234 | |
| 235 | #define CARRY_FLAG(x) (m_vflag[CARRY][x & 7] != 0 ? 0xffff : 0) |
| 236 | #define COMPARE_FLAG(x) (m_vflag[COMPARE][x & 7] != 0 ? 0xffff : 0) |
| 237 | #define CLIP1_FLAG(x) (m_vflag[CLIP1][x & 7] != 0 ? 0xffff : 0) |
| 238 | #define ZERO_FLAG(x) (m_vflag[ZERO][x & 7] != 0 ? 0xffff : 0) |
| 239 | #define CLIP2_FLAG(x) (m_vflag[CLIP2][x & 7] != 0 ? 0xffff : 0) |
| 240 | |
| 241 | #define CLEAR_CARRY_FLAGS() { memset(m_vflag[CARRY], 0, 16); } |
| 242 | #define CLEAR_COMPARE_FLAGS() { memset(m_vflag[COMPARE], 0, 16); } |
| 243 | #define CLEAR_CLIP1_FLAGS() { memset(m_vflag[CLIP1], 0, 16); } |
| 244 | #define CLEAR_ZERO_FLAGS() { memset(m_vflag[ZERO], 0, 16); } |
| 245 | #define CLEAR_CLIP2_FLAGS() { memset(m_vflag[CLIP2], 0, 16); } |
| 246 | |
| 247 | #define SET_CARRY_FLAG(x) { m_vflag[CARRY][x & 7] = 0xffff; } |
| 248 | #define SET_COMPARE_FLAG(x) { m_vflag[COMPARE][x & 7] = 0xffff; } |
| 249 | #define SET_CLIP1_FLAG(x) { m_vflag[CLIP1][x & 7] = 0xffff; } |
| 250 | #define SET_ZERO_FLAG(x) { m_vflag[ZERO][x & 7] = 0xffff; } |
| 251 | #define SET_CLIP2_FLAG(x) { m_vflag[CLIP2][x & 7] = 0xffff; } |
| 252 | |
| 253 | #define CLEAR_CARRY_FLAG(x) { m_vflag[CARRY][x & 7] = 0; } |
| 254 | #define CLEAR_COMPARE_FLAG(x) { m_vflag[COMPARE][x & 7] = 0; } |
| 255 | #define CLEAR_CLIP1_FLAG(x) { m_vflag[CLIP1][x & 7] = 0; } |
| 256 | #define CLEAR_ZERO_FLAG(x) { m_vflag[ZERO][x & 7] = 0; } |
| 257 | #define CLEAR_CLIP2_FLAG(x) { m_vflag[CLIP2][x & 7] = 0; } |
| 258 | |
| 259 | |
| 260 | /*************************************************************************** |
| 261 | INLINE FUNCTIONS |
| 262 | ***************************************************************************/ |
| 263 | |
| 65 | 264 | /*------------------------------------------------- |
| 66 | 265 | epc - compute the exception PC from a |
| 67 | 266 | descriptor |
| r241959 | r241960 | |
| 132 | 331 | m_dmem8 = (UINT8*)base; |
| 133 | 332 | } |
| 134 | 333 | |
| 135 | | UINT8 rsp_device::DM_READ8(UINT32 address) |
| 334 | inline UINT8 rsp_device::DM_READ8(UINT32 address) |
| 136 | 335 | { |
| 137 | 336 | UINT8 ret = m_dmem8[BYTE4_XOR_BE(address & 0xfff)]; |
| 138 | | //printf("R8:%08x=%02x\n", address, ret); |
| 139 | 337 | return ret; |
| 140 | 338 | } |
| 141 | 339 | |
| r241959 | r241960 | |
| 149 | 347 | ((rsp_device *)param)->ccfunc_read8(); |
| 150 | 348 | } |
| 151 | 349 | |
| 152 | | UINT16 rsp_device::DM_READ16(UINT32 address) |
| 350 | inline UINT16 rsp_device::DM_READ16(UINT32 address) |
| 153 | 351 | { |
| 154 | 352 | UINT16 ret; |
| 155 | 353 | address &= 0xfff; |
| 156 | 354 | ret = m_dmem8[BYTE4_XOR_BE(address)] << 8; |
| 157 | 355 | ret |= m_dmem8[BYTE4_XOR_BE(address + 1)]; |
| 158 | | //printf("R16:%08x=%04x\n", address, ret); |
| 159 | 356 | return ret; |
| 160 | 357 | } |
| 161 | 358 | |
| r241959 | r241960 | |
| 169 | 366 | ((rsp_device *)param)->ccfunc_read16(); |
| 170 | 367 | } |
| 171 | 368 | |
| 172 | | UINT32 rsp_device::DM_READ32(UINT32 address) |
| 369 | inline UINT32 rsp_device::DM_READ32(UINT32 address) |
| 173 | 370 | { |
| 174 | 371 | UINT32 ret; |
| 175 | 372 | address &= 0xfff; |
| r241959 | r241960 | |
| 177 | 374 | ret |= m_dmem8[BYTE4_XOR_BE(address + 1)] << 16; |
| 178 | 375 | ret |= m_dmem8[BYTE4_XOR_BE(address + 2)] << 8; |
| 179 | 376 | ret |= m_dmem8[BYTE4_XOR_BE(address + 3)]; |
| 180 | | //printf("R32:%08x=%08x\n", address, ret); |
| 181 | 377 | return ret; |
| 182 | 378 | } |
| 183 | 379 | |
| r241959 | r241960 | |
| 191 | 387 | ((rsp_device *)param)->ccfunc_read32();; |
| 192 | 388 | } |
| 193 | 389 | |
| 194 | | void rsp_device::DM_WRITE8(UINT32 address, UINT8 data) |
| 390 | inline void rsp_device::DM_WRITE8(UINT32 address, UINT8 data) |
| 195 | 391 | { |
| 196 | 392 | address &= 0xfff; |
| 197 | 393 | m_dmem8[BYTE4_XOR_BE(address)] = data; |
| 198 | | //printf("W8:%08x=%02x\n", address, data); |
| 199 | 394 | } |
| 200 | 395 | |
| 201 | 396 | inline void rsp_device::ccfunc_write8() |
| r241959 | r241960 | |
| 208 | 403 | ((rsp_device *)param)->ccfunc_write8();; |
| 209 | 404 | } |
| 210 | 405 | |
| 211 | | void rsp_device::DM_WRITE16(UINT32 address, UINT16 data) |
| 406 | inline void rsp_device::DM_WRITE16(UINT32 address, UINT16 data) |
| 212 | 407 | { |
| 213 | 408 | address &= 0xfff; |
| 214 | 409 | m_dmem8[BYTE4_XOR_BE(address)] = data >> 8; |
| 215 | 410 | m_dmem8[BYTE4_XOR_BE(address + 1)] = data & 0xff; |
| 216 | | //printf("W16:%08x=%04x\n", address, data); |
| 217 | 411 | } |
| 218 | 412 | |
| 219 | 413 | inline void rsp_device::ccfunc_write16() |
| r241959 | r241960 | |
| 226 | 420 | ((rsp_device *)param)->ccfunc_write16();; |
| 227 | 421 | } |
| 228 | 422 | |
| 229 | | void rsp_device::DM_WRITE32(UINT32 address, UINT32 data) |
| 423 | inline void rsp_device::DM_WRITE32(UINT32 address, UINT32 data) |
| 230 | 424 | { |
| 231 | 425 | address &= 0xfff; |
| 232 | 426 | m_dmem8[BYTE4_XOR_BE(address)] = data >> 24; |
| 233 | 427 | m_dmem8[BYTE4_XOR_BE(address + 1)] = (data >> 16) & 0xff; |
| 234 | 428 | m_dmem8[BYTE4_XOR_BE(address + 2)] = (data >> 8) & 0xff; |
| 235 | 429 | m_dmem8[BYTE4_XOR_BE(address + 3)] = data & 0xff; |
| 236 | | //printf("W32:%08x=%08x\n", address, data); |
| 237 | 430 | } |
| 238 | 431 | |
| 239 | 432 | inline void rsp_device::ccfunc_write32() |
| r241959 | r241960 | |
| 259 | 452 | } |
| 260 | 453 | |
| 261 | 454 | |
| 455 | /*------------------------------------------------- |
| 456 | cfunc_printf_debug - generic printf for |
| 457 | debugging |
| 458 | -------------------------------------------------*/ |
| 459 | |
| 460 | #ifdef UNUSED_CODE |
| 461 | inline void rs_device::cfunc_printf_debug() |
| 462 | { |
| 463 | switch(m_arg2) |
| 464 | { |
| 465 | case 0: // WRITE8 |
| 466 | printf("%04x:%02x\n", m_rsp_state->arg0 & 0xffff, (UINT8)m_rsp_state->arg1); |
| 467 | break; |
| 468 | case 1: // WRITE16 |
| 469 | printf("%04x:%04x\n", m_rsp_state->arg0 & 0xffff, (UINT16)m_rsp_state->arg1); |
| 470 | break; |
| 471 | case 2: // WRITE32 |
| 472 | printf("%04x:%08x\n", m_rsp_state->arg0 & 0xffff, m_rsp_state->arg1); |
| 473 | break; |
| 474 | case 3: // READ8 |
| 475 | printf("%04xr%02x\n", m_rsp_state->arg0 & 0xffff, (UINT8)m_rsp_state->arg1); |
| 476 | break; |
| 477 | case 4: // READ16 |
| 478 | printf("%04xr%04x\n", m_rsp_state->arg0 & 0xffff, (UINT16)m_rsp_state->arg1); |
| 479 | break; |
| 480 | case 5: // READ32 |
| 481 | printf("%04xr%08x\n", m_rsp_state->arg0 & 0xffff, m_rsp_state->arg1); |
| 482 | break; |
| 483 | case 6: // Checksum |
| 484 | printf("Sum: %08x\n", m_rsp_state->arg0); |
| 485 | break; |
| 486 | case 7: // Checksum |
| 487 | printf("Correct Sum: %08x\n", m_rsp_state->arg0); |
| 488 | break; |
| 489 | default: // ??? |
| 490 | printf("%08x %08x\n", m_rsp_state->arg0 & 0xffff, m_rsp_state->arg1); |
| 491 | break; |
| 492 | } |
| 493 | } |
| 494 | |
| 495 | static void cfunc_printf_debug(void *param) |
| 496 | { |
| 497 | ((rsp_device *)param)->ccfunc_printf_debug(); |
| 498 | } |
| 499 | #endif |
| 500 | |
| 262 | 501 | inline void rsp_device::ccfunc_get_cop0_reg() |
| 263 | 502 | { |
| 264 | 503 | int reg = m_rsp_state->arg0; |
| r241959 | r241960 | |
| 313 | 552 | ((rsp_device *)param)->ccfunc_set_cop0_reg(); |
| 314 | 553 | } |
| 315 | 554 | |
| 555 | inline void rsp_device::ccfunc_unimplemented_opcode() |
| 556 | { |
| 557 | int op = m_rsp_state->arg0; |
| 558 | if ((machine().debug_flags & DEBUG_FLAG_ENABLED) != 0) |
| 559 | { |
| 560 | char string[200]; |
| 561 | rsp_dasm_one(string, m_ppc, op); |
| 562 | osd_printf_debug("%08X: %s\n", m_ppc, string); |
| 563 | } |
| 564 | |
| 565 | fatalerror("RSP: unknown opcode %02X (%08X) at %08X\n", op >> 26, op, m_ppc); |
| 566 | } |
| 567 | |
| 568 | static void cfunc_unimplemented_opcode(void *param) |
| 569 | { |
| 570 | ((rsp_device *)param)->ccfunc_unimplemented_opcode(); |
| 571 | } |
| 572 | |
| 316 | 573 | /*****************************************************************************/ |
| 317 | 574 | |
| 575 | /* Legacy. Going forward, this will be transitioned into unrolled opcode decodes. */ |
| 576 | static const int vector_elements_2[16][8] = |
| 577 | { |
| 578 | { 0, 1, 2, 3, 4, 5, 6, 7 }, // none |
| 579 | { 0, 1, 2, 3, 4, 5, 6, 7 }, // ??? |
| 580 | { 0, 0, 2, 2, 4, 4, 6, 6 }, // 0q |
| 581 | { 1, 1, 3, 3, 5, 5, 7, 7 }, // 1q |
| 582 | { 0, 0, 0, 0, 4, 4, 4, 4 }, // 0h |
| 583 | { 1, 1, 1, 1, 5, 5, 5, 5 }, // 1h |
| 584 | { 2, 2, 2, 2, 6, 6, 6, 6 }, // 2h |
| 585 | { 3, 3, 3, 3, 7, 7, 7, 7 }, // 3h |
| 586 | { 0, 0, 0, 0, 0, 0, 0, 0 }, // 0 |
| 587 | { 1, 1, 1, 1, 1, 1, 1, 1 }, // 1 |
| 588 | { 2, 2, 2, 2, 2, 2, 2, 2 }, // 2 |
| 589 | { 3, 3, 3, 3, 3, 3, 3, 3 }, // 3 |
| 590 | { 4, 4, 4, 4, 4, 4, 4, 4 }, // 4 |
| 591 | { 5, 5, 5, 5, 5, 5, 5, 5 }, // 5 |
| 592 | { 6, 6, 6, 6, 6, 6, 6, 6 }, // 6 |
| 593 | { 7, 7, 7, 7, 7, 7, 7, 7 }, // 7 |
| 594 | }; |
| 595 | |
| 596 | #if USE_SIMD |
| 597 | static __m128i vec_himask; |
| 598 | static __m128i vec_lomask; |
| 599 | static __m128i vec_hibit; |
| 600 | static __m128i vec_lobit; |
| 601 | static __m128i vec_n32768; |
| 602 | static __m128i vec_32767; |
| 603 | static __m128i vec_flagmask; |
| 604 | static __m128i vec_shiftmask2; |
| 605 | static __m128i vec_shiftmask4; |
| 606 | static __m128i vec_flag_reverse; |
| 607 | static __m128i vec_neg1; |
| 608 | static __m128i vec_zero; |
| 609 | static __m128i vec_shuf[16]; |
| 610 | static __m128i vec_shuf_inverse[16]; |
| 611 | #endif |
| 612 | |
| 318 | 613 | void rsp_device::rspcom_init() |
| 319 | 614 | { |
| 615 | #if USE_SIMD |
| 616 | VEC_CLEAR_CARRY_FLAGS(); |
| 617 | VEC_CLEAR_COMPARE_FLAGS(); |
| 618 | VEC_CLEAR_CLIP1_FLAGS(); |
| 619 | VEC_CLEAR_ZERO_FLAGS(); |
| 620 | VEC_CLEAR_CLIP2_FLAGS(); |
| 621 | #endif |
| 622 | |
| 623 | #if (!USE_SIMD || SIMUL_SIMD) |
| 624 | CLEAR_CARRY_FLAGS(); |
| 625 | CLEAR_COMPARE_FLAGS(); |
| 626 | CLEAR_CLIP1_FLAGS(); |
| 627 | CLEAR_ZERO_FLAGS(); |
| 628 | CLEAR_CLIP2_FLAGS(); |
| 629 | #endif |
| 630 | |
| 631 | #if USE_SIMD |
| 632 | vec_shuf_inverse[ 0] = _mm_set_epi16(0x0f0e, 0x0d0c, 0x0b0a, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100); // none |
| 633 | vec_shuf_inverse[ 1] = _mm_set_epi16(0x0f0e, 0x0d0c, 0x0b0a, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100); // ??? |
| 634 | vec_shuf_inverse[ 2] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0908, 0x0908, 0x0504, 0x0504, 0x0100, 0x0100); // 0q |
| 635 | vec_shuf_inverse[ 3] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0b0a, 0x0b0a, 0x0706, 0x0706, 0x0302, 0x0302); // 1q |
| 636 | vec_shuf_inverse[ 4] = _mm_set_epi16(0x0908, 0x0908, 0x0908, 0x0908, 0x0100, 0x0100, 0x0100, 0x0100); // 0h |
| 637 | vec_shuf_inverse[ 5] = _mm_set_epi16(0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0302, 0x0302, 0x0302, 0x0302); // 1h |
| 638 | vec_shuf_inverse[ 6] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0504, 0x0504, 0x0504, 0x0504); // 2h |
| 639 | vec_shuf_inverse[ 7] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0706, 0x0706, 0x0706, 0x0706); // 3h |
| 640 | vec_shuf_inverse[ 8] = _mm_set_epi16(0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100); // 0 |
| 641 | vec_shuf_inverse[ 9] = _mm_set_epi16(0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302); // 1 |
| 642 | vec_shuf_inverse[10] = _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504); // 2 |
| 643 | vec_shuf_inverse[11] = _mm_set_epi16(0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706); // 3 |
| 644 | vec_shuf_inverse[12] = _mm_set_epi16(0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908); // 4 |
| 645 | vec_shuf_inverse[13] = _mm_set_epi16(0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a); // 5 |
| 646 | vec_shuf_inverse[14] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c); // 6 |
| 647 | vec_shuf_inverse[15] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e); // 7 |
| 648 | |
| 649 | vec_shuf[ 0] = _mm_set_epi16(0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e); // none |
| 650 | vec_shuf[ 1] = _mm_set_epi16(0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e); // ??? |
| 651 | vec_shuf[ 2] = _mm_set_epi16(0x0302, 0x0302, 0x0706, 0x0706, 0x0b0a, 0x0b0a, 0x0f0e, 0x0f0e); // 0q |
| 652 | vec_shuf[ 3] = _mm_set_epi16(0x0100, 0x0100, 0x0504, 0x0706, 0x0908, 0x0908, 0x0d0c, 0x0d0c); // 1q |
| 653 | vec_shuf[ 4] = _mm_set_epi16(0x0706, 0x0706, 0x0706, 0x0706, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e); // 0q |
| 654 | vec_shuf[ 5] = _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c); // 1q |
| 655 | vec_shuf[ 6] = _mm_set_epi16(0x0302, 0x0302, 0x0302, 0x0302, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a); // 2q |
| 656 | vec_shuf[ 7] = _mm_set_epi16(0x0100, 0x0100, 0x0100, 0x0100, 0x0908, 0x0908, 0x0908, 0x0908); // 3q |
| 657 | vec_shuf[ 8] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e); // 0 |
| 658 | vec_shuf[ 9] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c); // 1 |
| 659 | vec_shuf[10] = _mm_set_epi16(0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a); // 2 |
| 660 | vec_shuf[11] = _mm_set_epi16(0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908); // 3 |
| 661 | vec_shuf[12] = _mm_set_epi16(0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706); // 4 |
| 662 | vec_shuf[13] = _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504); // 5 |
| 663 | vec_shuf[14] = _mm_set_epi16(0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302); // 6 |
| 664 | vec_shuf[15] = _mm_set_epi16(0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100); // 7 |
| 665 | m_accum_h = _mm_setzero_si128(); |
| 666 | m_accum_m = _mm_setzero_si128(); |
| 667 | m_accum_l = _mm_setzero_si128(); |
| 668 | m_accum_ll = _mm_setzero_si128(); |
| 669 | vec_neg1 = _mm_set_epi64x(0xffffffffffffffffL, 0xffffffffffffffffL); |
| 670 | vec_zero = _mm_setzero_si128(); |
| 671 | vec_himask = _mm_set_epi64x(0xffff0000ffff0000L, 0xffff0000ffff0000L); |
| 672 | vec_lomask = _mm_set_epi64x(0x0000ffff0000ffffL, 0x0000ffff0000ffffL); |
| 673 | vec_hibit = _mm_set_epi64x(0x0001000000010000L, 0x0001000000010000L); |
| 674 | vec_lobit = _mm_set_epi64x(0x0000000100000001L, 0x0000000100000001L); |
| 675 | vec_32767 = _mm_set_epi64x(0x7fff7fff7fff7fffL, 0x7fff7fff7fff7fffL); |
| 676 | vec_n32768 = _mm_set_epi64x(0x8000800080008000L, 0x8000800080008000L); |
| 677 | vec_flagmask = _mm_set_epi64x(0x0001000100010001L, 0x0001000100010001L); |
| 678 | vec_shiftmask2 = _mm_set_epi64x(0x0000000300000003L, 0x0000000300000003L); |
| 679 | vec_shiftmask4 = _mm_set_epi64x(0x000000000000000fL, 0x000000000000000fL); |
| 680 | vec_flag_reverse = _mm_set_epi16(0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e); |
| 681 | #endif |
| 320 | 682 | } |
| 321 | 683 | |
| 684 | |
| 685 | #if USE_SIMD |
| 686 | // LBV |
| 687 | // |
| 688 | // 31 25 20 15 10 6 0 |
| 689 | // -------------------------------------------------- |
| 690 | // | 110010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 691 | // -------------------------------------------------- |
| 692 | // |
| 693 | // Load 1 byte to vector byte index |
| 694 | |
| 695 | inline void rsp_device::ccfunc_rsp_lbv_simd() |
| 696 | { |
| 697 | UINT32 op = m_rsp_state->arg0; |
| 698 | |
| 699 | UINT32 ea = 0; |
| 700 | int dest = (op >> 16) & 0x1f; |
| 701 | int base = (op >> 21) & 0x1f; |
| 702 | int index = (op >> 7) & 0xf; |
| 703 | int offset = (op & 0x7f); |
| 704 | if (offset & 0x40) |
| 705 | { |
| 706 | offset |= 0xffffffc0; |
| 707 | } |
| 708 | |
| 709 | ea = (base) ? m_rsp_state->r[base] + offset : offset; |
| 710 | |
| 711 | UINT16 element; |
| 712 | SIMD_EXTRACT16(m_xv[dest], element, (index >> 1)); |
| 713 | element &= 0xff00 >> ((1-(index & 1)) * 8); |
| 714 | element |= DM_READ8(ea) << ((1-(index & 1)) * 8); |
| 715 | SIMD_INSERT16(m_xv[dest], element, (index >> 1)); |
| 716 | } |
| 717 | |
| 718 | static void cfunc_rsp_lbv_simd(void *param) |
| 719 | { |
| 720 | ((rsp_device *)param)->ccfunc_rsp_lbv_simd(); |
| 721 | } |
| 722 | #endif |
| 723 | |
| 724 | #if (!USE_SIMD || SIMUL_SIMD) |
| 725 | inline void rsp_device::ccfunc_rsp_lbv_scalar() |
| 726 | { |
| 727 | UINT32 op = m_rsp_state->arg0; |
| 728 | |
| 729 | UINT32 ea = 0; |
| 730 | int dest = (op >> 16) & 0x1f; |
| 731 | int base = (op >> 21) & 0x1f; |
| 732 | int index = (op >> 7) & 0xf; |
| 733 | int offset = (op & 0x7f); |
| 734 | if (offset & 0x40) |
| 735 | { |
| 736 | offset |= 0xffffffc0; |
| 737 | } |
| 738 | |
| 739 | ea = (base) ? m_rsp_state->r[base] + offset : offset; |
| 740 | VREG_B(dest, index) = DM_READ8(ea); |
| 741 | } |
| 742 | |
| 743 | static void cfunc_rsp_lbv_scalar(void *param) |
| 744 | { |
| 745 | ((rsp_device *)param)->ccfunc_rsp_lbv_scalar(); |
| 746 | } |
| 747 | #endif |
| 748 | |
| 749 | #if USE_SIMD |
| 750 | // LSV |
| 751 | // |
| 752 | // 31 25 20 15 10 6 0 |
| 753 | // -------------------------------------------------- |
| 754 | // | 110010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 755 | // -------------------------------------------------- |
| 756 | // |
| 757 | // Loads 2 bytes starting from vector byte index |
| 758 | |
| 759 | inline void rsp_device::ccfunc_rsp_lsv_simd() |
| 760 | { |
| 761 | UINT32 op = m_rsp_state->arg0; |
| 762 | int dest = (op >> 16) & 0x1f; |
| 763 | int base = (op >> 21) & 0x1f; |
| 764 | int index = (op >> 7) & 0xe; |
| 765 | int offset = (op & 0x7f); |
| 766 | if (offset & 0x40) |
| 767 | { |
| 768 | offset |= 0xffffffc0; |
| 769 | } |
| 770 | |
| 771 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 772 | int end = index + 2; |
| 773 | for (int i = index; i < end; i++) |
| 774 | { |
| 775 | UINT16 element; |
| 776 | SIMD_EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 777 | element &= 0xff00 >> ((1 - (i & 1)) * 8); |
| 778 | element |= DM_READ8(ea) << ((1 - (i & 1)) * 8); |
| 779 | SIMD_INSERT16(m_xv[dest], element, (i >> 1)); |
| 780 | ea++; |
| 781 | } |
| 782 | } |
| 783 | |
| 784 | static void cfunc_rsp_lsv_simd(void *param) |
| 785 | { |
| 786 | ((rsp_device *)param)->ccfunc_rsp_lsv_simd(); |
| 787 | } |
| 788 | #endif |
| 789 | |
| 790 | #if (!USE_SIMD || SIMUL_SIMD) |
| 791 | inline void rsp_device::ccfunc_rsp_lsv_scalar() |
| 792 | { |
| 793 | UINT32 op = m_rsp_state->arg0; |
| 794 | int dest = (op >> 16) & 0x1f; |
| 795 | int base = (op >> 21) & 0x1f; |
| 796 | int index = (op >> 7) & 0xe; |
| 797 | int offset = (op & 0x7f); |
| 798 | if (offset & 0x40) |
| 799 | { |
| 800 | offset |= 0xffffffc0; |
| 801 | } |
| 802 | |
| 803 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 804 | int end = index + 2; |
| 805 | for (int i = index; i < end; i++) |
| 806 | { |
| 807 | VREG_B(dest, i) = DM_READ8(ea); |
| 808 | ea++; |
| 809 | } |
| 810 | } |
| 811 | |
| 812 | static void cfunc_rsp_lsv_scalar(void *param) |
| 813 | { |
| 814 | ((rsp_device *)param)->ccfunc_rsp_lsv_scalar(); |
| 815 | } |
| 816 | #endif |
| 817 | |
| 818 | #if USE_SIMD |
| 819 | // LLV |
| 820 | // |
| 821 | // 31 25 20 15 10 6 0 |
| 822 | // -------------------------------------------------- |
| 823 | // | 110010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 824 | // -------------------------------------------------- |
| 825 | // |
| 826 | // Loads 4 bytes starting from vector byte index |
| 827 | |
| 828 | inline void rsp_device::ccfunc_rsp_llv_simd() |
| 829 | { |
| 830 | UINT32 op = m_rsp_state->arg0; |
| 831 | UINT32 ea = 0; |
| 832 | int dest = (op >> 16) & 0x1f; |
| 833 | int base = (op >> 21) & 0x1f; |
| 834 | int index = (op >> 7) & 0xc; |
| 835 | int offset = (op & 0x7f); |
| 836 | if (offset & 0x40) |
| 837 | { |
| 838 | offset |= 0xffffffc0; |
| 839 | } |
| 840 | |
| 841 | ea = (base) ? m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 842 | |
| 843 | int end = index + 4; |
| 844 | |
| 845 | for (int i = index; i < end; i++) |
| 846 | { |
| 847 | UINT16 element; |
| 848 | SIMD_EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 849 | element &= 0xff00 >> ((1 - (i & 1)) * 8); |
| 850 | element |= DM_READ8(ea) << ((1 - (i & 1)) * 8); |
| 851 | SIMD_INSERT16(m_xv[dest], element, (i >> 1)); |
| 852 | ea++; |
| 853 | } |
| 854 | } |
| 855 | |
| 856 | static void cfunc_rsp_llv_simd(void *param) |
| 857 | { |
| 858 | ((rsp_device *)param)->ccfunc_rsp_llv_simd(); |
| 859 | } |
| 860 | #endif |
| 861 | |
| 862 | #if (!USE_SIMD || SIMUL_SIMD) |
| 863 | |
| 864 | inline void rsp_device::ccfunc_rsp_llv_scalar() |
| 865 | { |
| 866 | UINT32 op = m_rsp_state->arg0; |
| 867 | UINT32 ea = 0; |
| 868 | int dest = (op >> 16) & 0x1f; |
| 869 | int base = (op >> 21) & 0x1f; |
| 870 | int index = (op >> 7) & 0xc; |
| 871 | int offset = (op & 0x7f); |
| 872 | if (offset & 0x40) |
| 873 | { |
| 874 | offset |= 0xffffffc0; |
| 875 | } |
| 876 | |
| 877 | ea = (base) ? m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 878 | |
| 879 | int end = index + 4; |
| 880 | |
| 881 | for (int i = index; i < end; i++) |
| 882 | { |
| 883 | VREG_B(dest, i) = DM_READ8(ea); |
| 884 | ea++; |
| 885 | } |
| 886 | } |
| 887 | |
| 888 | static void cfunc_rsp_llv_scalar(void *param) |
| 889 | { |
| 890 | ((rsp_device *)param)->ccfunc_rsp_llv_scalar(); |
| 891 | } |
| 892 | #endif |
| 893 | |
| 894 | #if USE_SIMD |
| 895 | // LDV |
| 896 | // |
| 897 | // 31 25 20 15 10 6 0 |
| 898 | // -------------------------------------------------- |
| 899 | // | 110010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 900 | // -------------------------------------------------- |
| 901 | // |
| 902 | // Loads 8 bytes starting from vector byte index |
| 903 | |
| 904 | inline void rsp_device::ccfunc_rsp_ldv_simd() |
| 905 | { |
| 906 | UINT32 op = m_rsp_state->arg0; |
| 907 | UINT32 ea = 0; |
| 908 | int dest = (op >> 16) & 0x1f; |
| 909 | int base = (op >> 21) & 0x1f; |
| 910 | int index = (op >> 7) & 0x8; |
| 911 | int offset = (op & 0x7f); |
| 912 | if (offset & 0x40) |
| 913 | { |
| 914 | offset |= 0xffffffc0; |
| 915 | } |
| 916 | |
| 917 | ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 918 | |
| 919 | int end = index + 8; |
| 920 | |
| 921 | for (int i = index; i < end; i++) |
| 922 | { |
| 923 | UINT16 element; |
| 924 | SIMD_EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 925 | element &= 0xff00 >> ((1 - (i & 1)) * 8); |
| 926 | element |= DM_READ8(ea) << ((1 - (i & 1)) * 8); |
| 927 | SIMD_INSERT16(m_xv[dest], element, (i >> 1)); |
| 928 | ea++; |
| 929 | } |
| 930 | } |
| 931 | |
| 932 | static void cfunc_rsp_ldv_simd(void *param) |
| 933 | { |
| 934 | ((rsp_device *)param)->ccfunc_rsp_ldv_simd(); |
| 935 | } |
| 936 | #endif |
| 937 | |
| 938 | #if (!USE_SIMD || SIMUL_SIMD) |
| 939 | |
| 940 | inline void rsp_device::ccfunc_rsp_ldv_scalar() |
| 941 | { |
| 942 | UINT32 op = m_rsp_state->arg0; |
| 943 | UINT32 ea = 0; |
| 944 | int dest = (op >> 16) & 0x1f; |
| 945 | int base = (op >> 21) & 0x1f; |
| 946 | int index = (op >> 7) & 0x8; |
| 947 | int offset = (op & 0x7f); |
| 948 | if (offset & 0x40) |
| 949 | { |
| 950 | offset |= 0xffffffc0; |
| 951 | } |
| 952 | |
| 953 | ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 954 | |
| 955 | int end = index + 8; |
| 956 | |
| 957 | for (int i = index; i < end; i++) |
| 958 | { |
| 959 | VREG_B(dest, i) = DM_READ8(ea); |
| 960 | ea++; |
| 961 | } |
| 962 | } |
| 963 | |
| 964 | static void cfunc_rsp_ldv_scalar(void *param) |
| 965 | { |
| 966 | ((rsp_device *)param)->ccfunc_rsp_ldv_scalar(); |
| 967 | } |
| 968 | #endif |
| 969 | |
| 970 | #if USE_SIMD |
| 971 | // LQV |
| 972 | // |
| 973 | // 31 25 20 15 10 6 0 |
| 974 | // -------------------------------------------------- |
| 975 | // | 110010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 976 | // -------------------------------------------------- |
| 977 | // |
| 978 | // Loads up to 16 bytes starting from vector byte index |
| 979 | |
| 980 | inline void rsp_device::ccfunc_rsp_lqv_simd() |
| 981 | { |
| 982 | UINT32 op = m_rsp_state->arg0; |
| 983 | int dest = (op >> 16) & 0x1f; |
| 984 | int base = (op >> 21) & 0x1f; |
| 985 | int offset = (op & 0x7f); |
| 986 | if (offset & 0x40) |
| 987 | { |
| 988 | offset |= 0xffffffc0; |
| 989 | } |
| 990 | |
| 991 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 992 | |
| 993 | int end = 16 - (ea & 0xf); |
| 994 | if (end > 16) end = 16; |
| 995 | |
| 996 | for (int i = 0; i < end; i++) |
| 997 | { |
| 998 | UINT16 element; |
| 999 | SIMD_EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 1000 | element &= 0xff00 >> ((1 - (i & 1)) * 8); |
| 1001 | element |= DM_READ8(ea) << ((1 - (i & 1)) * 8); |
| 1002 | SIMD_INSERT16(m_xv[dest], element, (i >> 1)); |
| 1003 | ea++; |
| 1004 | } |
| 1005 | } |
| 1006 | |
| 1007 | static void cfunc_rsp_lqv_simd(void *param) |
| 1008 | { |
| 1009 | ((rsp_device *)param)->ccfunc_rsp_lqv_simd(); |
| 1010 | } |
| 1011 | #endif |
| 1012 | |
| 1013 | #if (!USE_SIMD || SIMUL_SIMD) |
| 1014 | |
| 1015 | inline void rsp_device::ccfunc_rsp_lqv_scalar() |
| 1016 | { |
| 1017 | UINT32 op = m_rsp_state->arg0; |
| 1018 | int dest = (op >> 16) & 0x1f; |
| 1019 | int base = (op >> 21) & 0x1f; |
| 1020 | int offset = (op & 0x7f); |
| 1021 | if (offset & 0x40) |
| 1022 | { |
| 1023 | offset |= 0xffffffc0; |
| 1024 | } |
| 1025 | |
| 1026 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1027 | |
| 1028 | int end = 16 - (ea & 0xf); |
| 1029 | if (end > 16) end = 16; |
| 1030 | |
| 1031 | for (int i = 0; i < end; i++) |
| 1032 | { |
| 1033 | VREG_B(dest, i) = DM_READ8(ea); |
| 1034 | ea++; |
| 1035 | } |
| 1036 | } |
| 1037 | |
| 1038 | static void cfunc_rsp_lqv_scalar(void *param) |
| 1039 | { |
| 1040 | ((rsp_device *)param)->ccfunc_rsp_lqv_scalar(); |
| 1041 | } |
| 1042 | #endif |
| 1043 | |
| 1044 | #if USE_SIMD |
| 1045 | // LRV |
| 1046 | // |
| 1047 | // 31 25 20 15 10 6 0 |
| 1048 | // -------------------------------------------------- |
| 1049 | // | 110010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 1050 | // -------------------------------------------------- |
| 1051 | // |
| 1052 | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 1053 | |
| 1054 | inline void rsp_device::ccfunc_rsp_lrv_simd() |
| 1055 | { |
| 1056 | UINT32 op = m_rsp_state->arg0; |
| 1057 | int dest = (op >> 16) & 0x1f; |
| 1058 | int base = (op >> 21) & 0x1f; |
| 1059 | int index = (op >> 7) & 0xf; |
| 1060 | int offset = (op & 0x7f); |
| 1061 | if (offset & 0x40) |
| 1062 | { |
| 1063 | offset |= 0xffffffc0; |
| 1064 | } |
| 1065 | |
| 1066 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1067 | |
| 1068 | index = 16 - ((ea & 0xf) - index); |
| 1069 | ea &= ~0xf; |
| 1070 | |
| 1071 | for (int i = index; i < 16; i++) |
| 1072 | { |
| 1073 | UINT16 element; |
| 1074 | SIMD_EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 1075 | element &= 0xff00 >> ((1-(i & 1)) * 8); |
| 1076 | element |= DM_READ8(ea) << ((1-(i & 1)) * 8); |
| 1077 | SIMD_INSERT16(m_xv[dest], element, (i >> 1)); |
| 1078 | ea++; |
| 1079 | } |
| 1080 | } |
| 1081 | |
| 1082 | static void cfunc_rsp_lrv_simd(void *param) |
| 1083 | { |
| 1084 | ((rsp_device *)param)->ccfunc_rsp_lrv_simd(); |
| 1085 | } |
| 1086 | #endif |
| 1087 | |
| 1088 | #if (!USE_SIMD || SIMUL_SIMD) |
| 1089 | |
| 1090 | inline void rsp_device::ccfunc_rsp_lrv_scalar() |
| 1091 | { |
| 1092 | UINT32 op = m_rsp_state->arg0; |
| 1093 | int dest = (op >> 16) & 0x1f; |
| 1094 | int base = (op >> 21) & 0x1f; |
| 1095 | int index = (op >> 7) & 0xf; |
| 1096 | int offset = (op & 0x7f); |
| 1097 | if (offset & 0x40) |
| 1098 | { |
| 1099 | offset |= 0xffffffc0; |
| 1100 | } |
| 1101 | |
| 1102 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1103 | |
| 1104 | index = 16 - ((ea & 0xf) - index); |
| 1105 | ea &= ~0xf; |
| 1106 | |
| 1107 | for (int i = index; i < 16; i++) |
| 1108 | { |
| 1109 | VREG_B(dest, i) = DM_READ8(ea); |
| 1110 | ea++; |
| 1111 | } |
| 1112 | } |
| 1113 | |
| 1114 | static void cfunc_rsp_lrv_scalar(void *param) |
| 1115 | { |
| 1116 | ((rsp_device *)param)->ccfunc_rsp_lrv_scalar(); |
| 1117 | } |
| 1118 | #endif |
| 1119 | |
| 1120 | #if USE_SIMD |
| 1121 | // LPV |
| 1122 | // |
| 1123 | // 31 25 20 15 10 6 0 |
| 1124 | // -------------------------------------------------- |
| 1125 | // | 110010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 1126 | // -------------------------------------------------- |
| 1127 | // |
| 1128 | // Loads a byte as the upper 8 bits of each element |
| 1129 | |
| 1130 | inline void rsp_device::ccfunc_rsp_lpv_simd() |
| 1131 | { |
| 1132 | UINT32 op = m_rsp_state->arg0; |
| 1133 | int dest = (op >> 16) & 0x1f; |
| 1134 | int base = (op >> 21) & 0x1f; |
| 1135 | int index = (op >> 7) & 0xf; |
| 1136 | int offset = (op & 0x7f); |
| 1137 | if (offset & 0x40) |
| 1138 | { |
| 1139 | offset |= 0xffffffc0; |
| 1140 | } |
| 1141 | |
| 1142 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1143 | |
| 1144 | for (int i = 0; i < 8; i++) |
| 1145 | { |
| 1146 | SIMD_INSERT16(m_xv[dest], DM_READ8(ea + (((16-index) + i) & 0xf)) << 8, i); |
| 1147 | } |
| 1148 | } |
| 1149 | |
| 1150 | static void cfunc_rsp_lpv_simd(void *param) |
| 1151 | { |
| 1152 | ((rsp_device *)param)->ccfunc_rsp_lpv_simd(); |
| 1153 | } |
| 1154 | #endif |
| 1155 | |
| 1156 | #if (!USE_SIMD || SIMUL_SIMD) |
| 1157 | |
| 1158 | inline void rsp_device::ccfunc_rsp_lpv_scalar() |
| 1159 | { |
| 1160 | UINT32 op = m_rsp_state->arg0; |
| 1161 | int dest = (op >> 16) & 0x1f; |
| 1162 | int base = (op >> 21) & 0x1f; |
| 1163 | int index = (op >> 7) & 0xf; |
| 1164 | int offset = (op & 0x7f); |
| 1165 | if (offset & 0x40) |
| 1166 | { |
| 1167 | offset |= 0xffffffc0; |
| 1168 | } |
| 1169 | |
| 1170 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1171 | |
| 1172 | for (int i = 0; i < 8; i++) |
| 1173 | { |
| 1174 | W_VREG_S(dest, i) = DM_READ8(ea + (((16-index) + i) & 0xf)) << 8; |
| 1175 | } |
| 1176 | } |
| 1177 | |
| 1178 | static void cfunc_rsp_lpv_scalar(void *param) |
| 1179 | { |
| 1180 | ((rsp_device *)param)->ccfunc_rsp_lpv_scalar(); |
| 1181 | } |
| 1182 | #endif |
| 1183 | |
| 1184 | #if USE_SIMD |
| 1185 | // LUV |
| 1186 | // |
| 1187 | // 31 25 20 15 10 6 0 |
| 1188 | // -------------------------------------------------- |
| 1189 | // | 110010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 1190 | // -------------------------------------------------- |
| 1191 | // |
| 1192 | // Loads a byte as the bits 14-7 of each element |
| 1193 | |
| 1194 | inline void rsp_device::ccfunc_rsp_luv_simd() |
| 1195 | { |
| 1196 | UINT32 op = m_rsp_state->arg0; |
| 1197 | int dest = (op >> 16) & 0x1f; |
| 1198 | int base = (op >> 21) & 0x1f; |
| 1199 | int index = (op >> 7) & 0xf; |
| 1200 | int offset = (op & 0x7f); |
| 1201 | if (offset & 0x40) |
| 1202 | { |
| 1203 | offset |= 0xffffffc0; |
| 1204 | } |
| 1205 | |
| 1206 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1207 | |
| 1208 | for (int i = 0; i < 8; i++) |
| 1209 | { |
| 1210 | SIMD_INSERT16(m_xv[dest], DM_READ8(ea + (((16-index) + i) & 0xf)) << 7, i); |
| 1211 | } |
| 1212 | } |
| 1213 | |
| 1214 | static void cfunc_rsp_luv_simd(void *param) |
| 1215 | { |
| 1216 | ((rsp_device *)param)->ccfunc_rsp_luv_simd(); |
| 1217 | } |
| 1218 | #endif |
| 1219 | |
| 1220 | #if (!USE_SIMD || SIMUL_SIMD) |
| 1221 | |
| 1222 | inline void rsp_device::ccfunc_rsp_luv_scalar() |
| 1223 | { |
| 1224 | UINT32 op = m_rsp_state->arg0; |
| 1225 | int dest = (op >> 16) & 0x1f; |
| 1226 | int base = (op >> 21) & 0x1f; |
| 1227 | int index = (op >> 7) & 0xf; |
| 1228 | int offset = (op & 0x7f); |
| 1229 | if (offset & 0x40) |
| 1230 | { |
| 1231 | offset |= 0xffffffc0; |
| 1232 | } |
| 1233 | |
| 1234 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1235 | |
| 1236 | for (int i = 0; i < 8; i++) |
| 1237 | { |
| 1238 | W_VREG_S(dest, i) = DM_READ8(ea + (((16-index) + i) & 0xf)) << 7; |
| 1239 | } |
| 1240 | } |
| 1241 | |
| 1242 | static void cfunc_rsp_luv_scalar(void *param) |
| 1243 | { |
| 1244 | ((rsp_device *)param)->ccfunc_rsp_luv_scalar(); |
| 1245 | } |
| 1246 | #endif |
| 1247 | |
| 1248 | #if USE_SIMD |
| 1249 | // LHV |
| 1250 | // |
| 1251 | // 31 25 20 15 10 6 0 |
| 1252 | // -------------------------------------------------- |
| 1253 | // | 110010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 1254 | // -------------------------------------------------- |
| 1255 | // |
| 1256 | // Loads a byte as the bits 14-7 of each element, with 2-byte stride |
| 1257 | |
| 1258 | inline void rsp_device::ccfunc_rsp_lhv_simd() |
| 1259 | { |
| 1260 | UINT32 op = m_rsp_state->arg0; |
| 1261 | int dest = (op >> 16) & 0x1f; |
| 1262 | int base = (op >> 21) & 0x1f; |
| 1263 | int index = (op >> 7) & 0xf; |
| 1264 | int offset = (op & 0x7f); |
| 1265 | if (offset & 0x40) |
| 1266 | { |
| 1267 | offset |= 0xffffffc0; |
| 1268 | } |
| 1269 | |
| 1270 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1271 | |
| 1272 | for (int i = 0; i < 8; i++) |
| 1273 | { |
| 1274 | SIMD_INSERT16(m_xv[dest], DM_READ8(ea + (((16-index) + (i<<1)) & 0xf)) << 7, i); |
| 1275 | } |
| 1276 | } |
| 1277 | |
| 1278 | static void cfunc_rsp_lhv_simd(void *param) |
| 1279 | { |
| 1280 | ((rsp_device *)param)->ccfunc_rsp_lhv_simd(); |
| 1281 | } |
| 1282 | #endif |
| 1283 | |
| 1284 | #if (!USE_SIMD || SIMUL_SIMD) |
| 1285 | |
| 1286 | inline void rsp_device::ccfunc_rsp_lhv_scalar() |
| 1287 | { |
| 1288 | UINT32 op = m_rsp_state->arg0; |
| 1289 | int dest = (op >> 16) & 0x1f; |
| 1290 | int base = (op >> 21) & 0x1f; |
| 1291 | int index = (op >> 7) & 0xf; |
| 1292 | int offset = (op & 0x7f); |
| 1293 | if (offset & 0x40) |
| 1294 | { |
| 1295 | offset |= 0xffffffc0; |
| 1296 | } |
| 1297 | |
| 1298 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1299 | |
| 1300 | for (int i = 0; i < 8; i++) |
| 1301 | { |
| 1302 | W_VREG_S(dest, i) = DM_READ8(ea + (((16-index) + (i<<1)) & 0xf)) << 7; |
| 1303 | } |
| 1304 | } |
| 1305 | |
| 1306 | static void cfunc_rsp_lhv_scalar(void *param) |
| 1307 | { |
| 1308 | ((rsp_device *)param)->ccfunc_rsp_lhv_scalar(); |
| 1309 | } |
| 1310 | #endif |
| 1311 | |
| 1312 | #if USE_SIMD |
| 1313 | // LFV |
| 1314 | // 31 25 20 15 10 6 0 |
| 1315 | // -------------------------------------------------- |
| 1316 | // | 110010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 1317 | // -------------------------------------------------- |
| 1318 | // |
| 1319 | // Loads a byte as the bits 14-7 of upper or lower quad, with 4-byte stride |
| 1320 | |
| 1321 | inline void rsp_device::ccfunc_rsp_lfv_simd() |
| 1322 | { |
| 1323 | UINT32 op = m_rsp_state->arg0; |
| 1324 | int dest = (op >> 16) & 0x1f; |
| 1325 | int base = (op >> 21) & 0x1f; |
| 1326 | int index = (op >> 7) & 0xf; |
| 1327 | int offset = (op & 0x7f); |
| 1328 | if (offset & 0x40) |
| 1329 | { |
| 1330 | offset |= 0xffffffc0; |
| 1331 | } |
| 1332 | |
| 1333 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1334 | |
| 1335 | // not sure what happens if 16-byte boundary is crossed... |
| 1336 | |
| 1337 | int end = (index >> 1) + 4; |
| 1338 | |
| 1339 | for (int i = index >> 1; i < end; i++) |
| 1340 | { |
| 1341 | SIMD_INSERT16(m_xv[dest], DM_READ8(ea) << 7, i); |
| 1342 | ea += 4; |
| 1343 | } |
| 1344 | } |
| 1345 | |
| 1346 | static void cfunc_rsp_lfv_simd(void *param) |
| 1347 | { |
| 1348 | ((rsp_device *)param)->ccfunc_rsp_lfv_simd(); |
| 1349 | } |
| 1350 | #endif |
| 1351 | |
| 1352 | #if (!USE_SIMD || SIMUL_SIMD) |
| 1353 | |
| 1354 | inline void rsp_device::ccfunc_rsp_lfv_scalar() |
| 1355 | { |
| 1356 | UINT32 op = m_rsp_state->arg0; |
| 1357 | int dest = (op >> 16) & 0x1f; |
| 1358 | int base = (op >> 21) & 0x1f; |
| 1359 | int index = (op >> 7) & 0xf; |
| 1360 | int offset = (op & 0x7f); |
| 1361 | if (offset & 0x40) |
| 1362 | { |
| 1363 | offset |= 0xffffffc0; |
| 1364 | } |
| 1365 | |
| 1366 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1367 | |
| 1368 | // not sure what happens if 16-byte boundary is crossed... |
| 1369 | |
| 1370 | int end = (index >> 1) + 4; |
| 1371 | |
| 1372 | for (int i = index >> 1; i < end; i++) |
| 1373 | { |
| 1374 | W_VREG_S(dest, i) = DM_READ8(ea) << 7; |
| 1375 | ea += 4; |
| 1376 | } |
| 1377 | } |
| 1378 | |
| 1379 | static void cfunc_rsp_lfv_scalar(void *param) |
| 1380 | { |
| 1381 | ((rsp_device *)param)->ccfunc_rsp_lfv_scalar(); |
| 1382 | } |
| 1383 | #endif |
| 1384 | |
| 1385 | #if USE_SIMD |
| 1386 | // LWV |
| 1387 | // |
| 1388 | // 31 25 20 15 10 6 0 |
| 1389 | // -------------------------------------------------- |
| 1390 | // | 110010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 1391 | // -------------------------------------------------- |
| 1392 | // |
| 1393 | // Loads the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 1394 | // after byte index 15 |
| 1395 | |
| 1396 | inline void rsp_device::ccfunc_rsp_lwv_simd() |
| 1397 | { |
| 1398 | UINT32 op = m_rsp_state->arg0; |
| 1399 | int dest = (op >> 16) & 0x1f; |
| 1400 | int base = (op >> 21) & 0x1f; |
| 1401 | int index = (op >> 7) & 0xf; |
| 1402 | int offset = (op & 0x7f); |
| 1403 | if (offset & 0x40) |
| 1404 | { |
| 1405 | offset |= 0xffffffc0; |
| 1406 | } |
| 1407 | |
| 1408 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1409 | int end = (16 - index) + 16; |
| 1410 | |
| 1411 | UINT8 val[16]; |
| 1412 | for (int i = (16 - index); i < end; i++) |
| 1413 | { |
| 1414 | val[i & 0xf] = DM_READ8(ea); |
| 1415 | ea += 4; |
| 1416 | } |
| 1417 | |
| 1418 | m_xv[dest] = _mm_set_epi8(val[15], val[14], val[13], val[12], val[11], val[10], val[ 9], val[ 8], |
| 1419 | val[ 7], val[ 6], val[ 5], val[ 4], val[ 3], val[ 2], val[ 1], val[ 0]); |
| 1420 | } |
| 1421 | |
| 1422 | static void cfunc_rsp_lwv_simd(void *param) |
| 1423 | { |
| 1424 | ((rsp_device *)param)->ccfunc_rsp_lwv_simd(); |
| 1425 | } |
| 1426 | #endif |
| 1427 | |
| 1428 | #if (!USE_SIMD || SIMUL_SIMD) |
| 1429 | |
| 1430 | inline void rsp_device::ccfunc_rsp_lwv_scalar() |
| 1431 | { |
| 1432 | UINT32 op = m_rsp_state->arg0; |
| 1433 | int dest = (op >> 16) & 0x1f; |
| 1434 | int base = (op >> 21) & 0x1f; |
| 1435 | int index = (op >> 7) & 0xf; |
| 1436 | int offset = (op & 0x7f); |
| 1437 | if (offset & 0x40) |
| 1438 | { |
| 1439 | offset |= 0xffffffc0; |
| 1440 | } |
| 1441 | |
| 1442 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1443 | int end = (16 - index) + 16; |
| 1444 | |
| 1445 | for (int i = (16 - index); i < end; i++) |
| 1446 | { |
| 1447 | VREG_B(dest, i & 0xf) = DM_READ8(ea); |
| 1448 | ea += 4; |
| 1449 | } |
| 1450 | } |
| 1451 | |
| 1452 | static void cfunc_rsp_lwv_scalar(void *param) |
| 1453 | { |
| 1454 | ((rsp_device *)param)->ccfunc_rsp_lwv_scalar(); |
| 1455 | } |
| 1456 | #endif |
| 1457 | |
| 1458 | #if USE_SIMD |
| 1459 | // LTV |
| 1460 | // |
| 1461 | // 31 25 20 15 10 6 0 |
| 1462 | // -------------------------------------------------- |
| 1463 | // | 110010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 1464 | // -------------------------------------------------- |
| 1465 | // |
| 1466 | // Loads one element to maximum of 8 vectors, while incrementing element index |
| 1467 | |
| 1468 | inline void rsp_device::ccfunc_rsp_ltv_simd() |
| 1469 | { |
| 1470 | UINT32 op = m_rsp_state->arg0; |
| 1471 | int dest = (op >> 16) & 0x1f; |
| 1472 | int base = (op >> 21) & 0x1f; |
| 1473 | int index = (op >> 7) & 0xf; |
| 1474 | int offset = (op & 0x7f); |
| 1475 | |
| 1476 | // FIXME: has a small problem with odd indices |
| 1477 | |
| 1478 | int vs = dest; |
| 1479 | int ve = dest + 8; |
| 1480 | if (ve > 32) |
| 1481 | { |
| 1482 | ve = 32; |
| 1483 | } |
| 1484 | |
| 1485 | int element = 7 - (index >> 1); |
| 1486 | |
| 1487 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1488 | |
| 1489 | ea = ((ea + 8) & ~0xf) + (index & 1); |
| 1490 | for (int i = vs; i < ve; i++) |
| 1491 | { |
| 1492 | element = (8 - (index >> 1) + (i - vs)) << 1; |
| 1493 | UINT16 value = (DM_READ8(ea) << 8) | DM_READ8(ea + 1); |
| 1494 | SIMD_INSERT16(m_xv[i], value, (element >> 1)); |
| 1495 | ea += 2; |
| 1496 | } |
| 1497 | } |
| 1498 | |
| 1499 | static void cfunc_rsp_ltv_simd(void *param) |
| 1500 | { |
| 1501 | ((rsp_device *)param)->ccfunc_rsp_ltv_simd(); |
| 1502 | } |
| 1503 | #endif |
| 1504 | |
| 1505 | #if (!USE_SIMD || SIMUL_SIMD) |
| 1506 | |
| 1507 | inline void rsp_device::ccfunc_rsp_ltv_scalar() |
| 1508 | { |
| 1509 | UINT32 op = m_rsp_state->arg0; |
| 1510 | int dest = (op >> 16) & 0x1f; |
| 1511 | int base = (op >> 21) & 0x1f; |
| 1512 | int index = (op >> 7) & 0xf; |
| 1513 | int offset = (op & 0x7f); |
| 1514 | |
| 1515 | // FIXME: has a small problem with odd indices |
| 1516 | |
| 1517 | int vs = dest; |
| 1518 | int ve = dest + 8; |
| 1519 | if (ve > 32) |
| 1520 | { |
| 1521 | ve = 32; |
| 1522 | } |
| 1523 | |
| 1524 | int element = 7 - (index >> 1); |
| 1525 | |
| 1526 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1527 | |
| 1528 | ea = ((ea + 8) & ~0xf) + (index & 1); |
| 1529 | for (int i = vs; i < ve; i++) |
| 1530 | { |
| 1531 | element = (8 - (index >> 1) + (i - vs)) << 1; |
| 1532 | VREG_B(i, (element & 0xf)) = DM_READ8(ea); |
| 1533 | VREG_B(i, ((element + 1) & 0xf)) = DM_READ8(ea + 1); |
| 1534 | ea += 2; |
| 1535 | } |
| 1536 | } |
| 1537 | |
| 1538 | static void cfunc_rsp_ltv_scalar(void *param) |
| 1539 | { |
| 1540 | ((rsp_device *)param)->ccfunc_rsp_ltv_scalar(); |
| 1541 | } |
| 1542 | #endif |
| 1543 | |
| 1544 | #if USE_SIMD && SIMUL_SIMD |
| 1545 | inline void rsp_device::ccfunc_backup_regs() |
| 1546 | { |
| 1547 | memcpy(m_old_dmem, m_dmem8, sizeof(m_old_dmem)); |
| 1548 | memcpy(m_old_r, m_r, sizeof(m_r)); |
| 1549 | |
| 1550 | m_simd_reciprocal_res = m_reciprocal_res; |
| 1551 | m_simd_reciprocal_high = m_reciprocal_high; |
| 1552 | m_simd_dp_allowed = m_dp_allowed; |
| 1553 | |
| 1554 | m_reciprocal_res = m_old_reciprocal_res; |
| 1555 | m_reciprocal_high = m_old_reciprocal_high; |
| 1556 | m_dp_allowed = m_old_dp_allowed; |
| 1557 | } |
| 1558 | |
| 1559 | static void cfunc_backup_regs(void *param) |
| 1560 | { |
| 1561 | ((rsp_device *)param)->ccfunc_backup_regs(); |
| 1562 | } |
| 1563 | |
| 1564 | inline void rsp_device::ccfunc_restore_regs() |
| 1565 | { |
| 1566 | memcpy(m_scalar_r, m_r, sizeof(m_r)); |
| 1567 | memcpy(m_r, m_old_r, sizeof(m_r)); |
| 1568 | memcpy(m_scalar_dmem, m_dmem8, sizeof(m_scalar_dmem)); |
| 1569 | memcpy(m_dmem8, m_old_dmem, sizeof(m_old_dmem)); |
| 1570 | |
| 1571 | m_scalar_reciprocal_res = m_reciprocal_res; |
| 1572 | m_scalar_reciprocal_high = m_reciprocal_high; |
| 1573 | m_scalar_dp_allowed = m_dp_allowed; |
| 1574 | |
| 1575 | m_reciprocal_res = m_simd_reciprocal_res; |
| 1576 | m_reciprocal_high = m_simd_reciprocal_high; |
| 1577 | m_dp_allowed = m_simd_dp_allowed; |
| 1578 | } |
| 1579 | |
| 1580 | static void cfunc_restore_regs(void *param) |
| 1581 | { |
| 1582 | ((rsp_device *)param)->ccfunc_restore_regs(); |
| 1583 | } |
| 1584 | |
| 1585 | inline void rsp_device::ccfunc_verify_regs() |
| 1586 | { |
| 1587 | int op = m_rsp_state->arg0; |
| 1588 | if (VEC_ACCUM_H(0) != ACCUM_H(0)) fatalerror("ACCUM_H element 0 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_H(0), ACCUM_H(0), op); |
| 1589 | if (VEC_ACCUM_H(1) != ACCUM_H(1)) fatalerror("ACCUM_H element 1 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_H(1), ACCUM_H(1), op); |
| 1590 | if (VEC_ACCUM_H(2) != ACCUM_H(2)) fatalerror("ACCUM_H element 2 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_H(2), ACCUM_H(2), op); |
| 1591 | if (VEC_ACCUM_H(3) != ACCUM_H(3)) fatalerror("ACCUM_H element 3 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_H(3), ACCUM_H(3), op); |
| 1592 | if (VEC_ACCUM_H(4) != ACCUM_H(4)) fatalerror("ACCUM_H element 4 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_H(4), ACCUM_H(4), op); |
| 1593 | if (VEC_ACCUM_H(5) != ACCUM_H(5)) fatalerror("ACCUM_H element 5 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_H(5), ACCUM_H(5), op); |
| 1594 | if (VEC_ACCUM_H(6) != ACCUM_H(6)) fatalerror("ACCUM_H element 6 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_H(6), ACCUM_H(6), op); |
| 1595 | if (VEC_ACCUM_H(7) != ACCUM_H(7)) fatalerror("ACCUM_H element 7 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_H(7), ACCUM_H(7), op); |
| 1596 | if (VEC_ACCUM_M(0) != ACCUM_M(0)) fatalerror("ACCUM_M element 0 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_M(0), ACCUM_M(0), op); |
| 1597 | if (VEC_ACCUM_M(1) != ACCUM_M(1)) fatalerror("ACCUM_M element 1 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_M(1), ACCUM_M(1), op); |
| 1598 | if (VEC_ACCUM_M(2) != ACCUM_M(2)) fatalerror("ACCUM_M element 2 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_M(2), ACCUM_M(2), op); |
| 1599 | if (VEC_ACCUM_M(3) != ACCUM_M(3)) fatalerror("ACCUM_M element 3 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_M(3), ACCUM_M(3), op); |
| 1600 | if (VEC_ACCUM_M(4) != ACCUM_M(4)) fatalerror("ACCUM_M element 4 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_M(4), ACCUM_M(4), op); |
| 1601 | if (VEC_ACCUM_M(5) != ACCUM_M(5)) fatalerror("ACCUM_M element 5 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_M(5), ACCUM_M(5), op); |
| 1602 | if (VEC_ACCUM_M(6) != ACCUM_M(6)) fatalerror("ACCUM_M element 6 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_M(6), ACCUM_M(6), op); |
| 1603 | if (VEC_ACCUM_M(7) != ACCUM_M(7)) fatalerror("ACCUM_M element 7 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_M(7), ACCUM_M(7), op); |
| 1604 | if (VEC_ACCUM_L(0) != ACCUM_L(0)) fatalerror("ACCUM_L element 0 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_L(0), ACCUM_L(0), op); |
| 1605 | if (VEC_ACCUM_L(1) != ACCUM_L(1)) fatalerror("ACCUM_L element 1 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_L(1), ACCUM_L(1), op); |
| 1606 | if (VEC_ACCUM_L(2) != ACCUM_L(2)) fatalerror("ACCUM_L element 2 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_L(2), ACCUM_L(2), op); |
| 1607 | if (VEC_ACCUM_L(3) != ACCUM_L(3)) fatalerror("ACCUM_L element 3 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_L(3), ACCUM_L(3), op); |
| 1608 | if (VEC_ACCUM_L(4) != ACCUM_L(4)) fatalerror("ACCUM_L element 4 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_L(4), ACCUM_L(4), op); |
| 1609 | if (VEC_ACCUM_L(5) != ACCUM_L(5)) fatalerror("ACCUM_L element 5 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_L(5), ACCUM_L(5), op); |
| 1610 | if (VEC_ACCUM_L(6) != ACCUM_L(6)) fatalerror("ACCUM_L element 6 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_L(6), ACCUM_L(6), op); |
| 1611 | if (VEC_ACCUM_L(7) != ACCUM_L(7)) fatalerror("ACCUM_L element 7 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_L(7), ACCUM_L(7), op); |
| 1612 | for (int i = 0; i < 32; i++) |
| 1613 | { |
| 1614 | if (m_rsp_state->r[i] != m_scalar_r[i]) fatalerror("r[%d] mismatch (SIMD %08x vs. Scalar %08x) after op: %08x\n", i, m_rsp_state->r[i], m_scalar_r[i], op); |
| 1615 | for (int el = 0; el < 8; el++) |
| 1616 | { |
| 1617 | UINT16 out; |
| 1618 | SIMD_EXTRACT16(m_xv[i], out, el); |
| 1619 | if ((UINT16)VREG_S(i, el) != out) fatalerror("Vector %d element %d mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", i, el, out, (UINT16)VREG_S(i, el), op); |
| 1620 | } |
| 1621 | } |
| 1622 | for (int i = 0; i < 4096; i++) |
| 1623 | { |
| 1624 | if (m_dmem8[i] != m_scalar_dmem[i]) fatalerror("dmem[%d] mismatch (SIMD %02x vs. Scalar %02x) after op: %08x\n", i, m_dmem8[i], m_scalar_dmem[i], op); |
| 1625 | } |
| 1626 | for (int i = 0; i < 5; i++) |
| 1627 | { |
| 1628 | for (int el = 0; el < 8; el++) |
| 1629 | { |
| 1630 | UINT16 out; |
| 1631 | SIMD_EXTRACT16(m_xvflag[i], out, el); |
| 1632 | if (m_vflag[i][el] != out) fatalerror("flag[%d][%d] mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", i, el, out, m_vflag[i][el], op); |
| 1633 | } |
| 1634 | } |
| 1635 | } |
| 1636 | |
| 1637 | static void cfunc_verify_regs(void *param) |
| 1638 | { |
| 1639 | ((rsp_device *)param)->ccfunc_verify_regs(); |
| 1640 | } |
| 1641 | #endif |
| 1642 | |
| 1643 | #if USE_SIMD |
| 1644 | int rsp_device::generate_lwc2(drcuml_block *block, compiler_state *compiler, const opcode_desc *desc) |
| 1645 | { |
| 1646 | //int loopdest; |
| 1647 | UINT32 op = desc->opptr.l[0]; |
| 1648 | //int dest = (op >> 16) & 0x1f; |
| 1649 | //int base = (op >> 21) & 0x1f; |
| 1650 | //int index = (op >> 7) & 0xf; |
| 1651 | int offset = (op & 0x7f); |
| 1652 | //int skip; |
| 1653 | if (offset & 0x40) |
| 1654 | { |
| 1655 | offset |= 0xffffffc0; |
| 1656 | } |
| 1657 | |
| 1658 | switch ((op >> 11) & 0x1f) |
| 1659 | { |
| 1660 | case 0x00: /* LBV */ |
| 1661 | //UML_ADD(block, I0, R32(RSREG), offset); |
| 1662 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1663 | UML_CALLC(block, cfunc_rsp_lbv_simd, this); |
| 1664 | #if SIMUL_SIMD |
| 1665 | UML_CALLC(block, cfunc_backup_regs, this); |
| 1666 | UML_CALLC(block, cfunc_rsp_lbv_scalar, this); |
| 1667 | UML_CALLC(block, cfunc_restore_regs, this); |
| 1668 | UML_CALLC(block, cfunc_verify_regs, this); |
| 1669 | #endif |
| 1670 | return TRUE; |
| 1671 | case 0x01: /* LSV */ |
| 1672 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1673 | UML_CALLC(block, cfunc_rsp_lsv_simd, this); |
| 1674 | #if SIMUL_SIMD |
| 1675 | UML_CALLC(block, cfunc_backup_regs, this); |
| 1676 | UML_CALLC(block, cfunc_rsp_lsv_scalar, this); |
| 1677 | UML_CALLC(block, cfunc_restore_regs, this); |
| 1678 | UML_CALLC(block, cfunc_verify_regs, this); |
| 1679 | #endif |
| 1680 | return TRUE; |
| 1681 | case 0x02: /* LLV */ |
| 1682 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1683 | UML_CALLC(block, cfunc_rsp_llv_simd, this); |
| 1684 | #if SIMUL_SIMD |
| 1685 | UML_CALLC(block, cfunc_backup_regs, this); |
| 1686 | UML_CALLC(block, cfunc_rsp_llv_scalar, this); |
| 1687 | UML_CALLC(block, cfunc_restore_regs, this); |
| 1688 | UML_CALLC(block, cfunc_verify_regs, this); |
| 1689 | #endif |
| 1690 | return TRUE; |
| 1691 | case 0x03: /* LDV */ |
| 1692 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1693 | UML_CALLC(block, cfunc_rsp_ldv_simd, this); |
| 1694 | #if SIMUL_SIMD |
| 1695 | UML_CALLC(block, cfunc_backup_regs, this); |
| 1696 | UML_CALLC(block, cfunc_rsp_ldv_scalar, this); |
| 1697 | UML_CALLC(block, cfunc_restore_regs, this); |
| 1698 | UML_CALLC(block, cfunc_verify_regs, this); |
| 1699 | #endif |
| 1700 | return TRUE; |
| 1701 | case 0x04: /* LQV */ |
| 1702 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1703 | UML_CALLC(block, cfunc_rsp_lqv_simd, this); |
| 1704 | #if SIMUL_SIMD |
| 1705 | UML_CALLC(block, cfunc_backup_regs, this); |
| 1706 | UML_CALLC(block, cfunc_rsp_lqv_scalar, this); |
| 1707 | UML_CALLC(block, cfunc_restore_regs, this); |
| 1708 | UML_CALLC(block, cfunc_verify_regs, this); |
| 1709 | #endif |
| 1710 | return TRUE; |
| 1711 | case 0x05: /* LRV */ |
| 1712 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1713 | UML_CALLC(block, cfunc_rsp_lrv_simd, this); |
| 1714 | #if SIMUL_SIMD |
| 1715 | UML_CALLC(block, cfunc_backup_regs, this); |
| 1716 | UML_CALLC(block, cfunc_rsp_lrv_scalar, this); |
| 1717 | UML_CALLC(block, cfunc_restore_regs, this); |
| 1718 | UML_CALLC(block, cfunc_verify_regs, this); |
| 1719 | #endif |
| 1720 | return TRUE; |
| 1721 | case 0x06: /* LPV */ |
| 1722 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1723 | UML_CALLC(block, cfunc_rsp_lpv_simd, this); |
| 1724 | #if SIMUL_SIMD |
| 1725 | UML_CALLC(block, cfunc_backup_regs, this); |
| 1726 | UML_CALLC(block, cfunc_rsp_lpv_scalar, this); |
| 1727 | UML_CALLC(block, cfunc_restore_regs, this); |
| 1728 | UML_CALLC(block, cfunc_verify_regs, this); |
| 1729 | #endif |
| 1730 | return TRUE; |
| 1731 | case 0x07: /* LUV */ |
| 1732 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1733 | UML_CALLC(block, cfunc_rsp_luv_simd, this); |
| 1734 | #if SIMUL_SIMD |
| 1735 | UML_CALLC(block, cfunc_backup_regs, this); |
| 1736 | UML_CALLC(block, cfunc_rsp_luv_scalar, this); |
| 1737 | UML_CALLC(block, cfunc_restore_regs, this); |
| 1738 | UML_CALLC(block, cfunc_verify_regs, this); |
| 1739 | #endif |
| 1740 | return TRUE; |
| 1741 | case 0x08: /* LHV */ |
| 1742 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1743 | UML_CALLC(block, cfunc_rsp_lhv_simd, this); |
| 1744 | #if SIMUL_SIMD |
| 1745 | UML_CALLC(block, cfunc_backup_regs, this); |
| 1746 | UML_CALLC(block, cfunc_rsp_lhv_scalar, this); |
| 1747 | UML_CALLC(block, cfunc_restore_regs, this); |
| 1748 | UML_CALLC(block, cfunc_verify_regs, this); |
| 1749 | #endif |
| 1750 | return TRUE; |
| 1751 | case 0x09: /* LFV */ |
| 1752 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1753 | UML_CALLC(block, cfunc_rsp_lfv_simd, this); |
| 1754 | #if SIMUL_SIMD |
| 1755 | UML_CALLC(block, cfunc_backup_regs, this); |
| 1756 | UML_CALLC(block, cfunc_rsp_lfv_scalar, this); |
| 1757 | UML_CALLC(block, cfunc_restore_regs, this); |
| 1758 | UML_CALLC(block, cfunc_verify_regs, this); |
| 1759 | #endif |
| 1760 | return TRUE; |
| 1761 | case 0x0a: /* LWV */ |
| 1762 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1763 | UML_CALLC(block, cfunc_rsp_lwv_simd, this); |
| 1764 | #if SIMUL_SIMD |
| 1765 | UML_CALLC(block, cfunc_backup_regs, this); |
| 1766 | UML_CALLC(block, cfunc_rsp_lwv_scalar, this); |
| 1767 | UML_CALLC(block, cfunc_restore_regs, this); |
| 1768 | UML_CALLC(block, cfunc_verify_regs, this); |
| 1769 | #endif |
| 1770 | return TRUE; |
| 1771 | case 0x0b: /* LTV */ |
| 1772 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1773 | UML_CALLC(block, cfunc_rsp_ltv_simd, this); |
| 1774 | #if SIMUL_SIMD |
| 1775 | UML_CALLC(block, cfunc_backup_regs, this); |
| 1776 | UML_CALLC(block, cfunc_rsp_ltv_scalar, this); |
| 1777 | UML_CALLC(block, cfunc_restore_regs, this); |
| 1778 | UML_CALLC(block, cfunc_verify_regs, this); |
| 1779 | #endif |
| 1780 | return TRUE; |
| 1781 | |
| 1782 | default: |
| 1783 | return FALSE; |
| 1784 | } |
| 1785 | } |
| 1786 | |
| 1787 | #else |
| 1788 | |
| 1789 | int rsp_device::generate_lwc2(drcuml_block *block, compiler_state *compiler, const opcode_desc *desc) |
| 1790 | { |
| 1791 | //int loopdest; |
| 1792 | UINT32 op = desc->opptr.l[0]; |
| 1793 | //int dest = (op >> 16) & 0x1f; |
| 1794 | //int base = (op >> 21) & 0x1f; |
| 1795 | //int index = (op >> 7) & 0xf; |
| 1796 | int offset = (op & 0x7f); |
| 1797 | //int skip; |
| 1798 | if (offset & 0x40) |
| 1799 | { |
| 1800 | offset |= 0xffffffc0; |
| 1801 | } |
| 1802 | |
| 1803 | switch ((op >> 11) & 0x1f) |
| 1804 | { |
| 1805 | case 0x00: /* LBV */ |
| 1806 | //UML_ADD(block, I0, R32(RSREG), offset); |
| 1807 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1808 | UML_CALLC(block, cfunc_rsp_lbv_scalar, this); |
| 1809 | return TRUE; |
| 1810 | case 0x01: /* LSV */ |
| 1811 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1812 | UML_CALLC(block, cfunc_rsp_lsv_scalar, this); |
| 1813 | return TRUE; |
| 1814 | case 0x02: /* LLV */ |
| 1815 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1816 | UML_CALLC(block, cfunc_rsp_llv_scalar, this); |
| 1817 | return TRUE; |
| 1818 | case 0x03: /* LDV */ |
| 1819 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1820 | UML_CALLC(block, cfunc_rsp_ldv_scalar, this); |
| 1821 | return TRUE; |
| 1822 | case 0x04: /* LQV */ |
| 1823 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1824 | UML_CALLC(block, cfunc_rsp_lqv_scalar, this); |
| 1825 | return TRUE; |
| 1826 | case 0x05: /* LRV */ |
| 1827 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1828 | UML_CALLC(block, cfunc_rsp_lrv_scalar, this); |
| 1829 | return TRUE; |
| 1830 | case 0x06: /* LPV */ |
| 1831 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1832 | UML_CALLC(block, cfunc_rsp_lpv_scalar, this); |
| 1833 | return TRUE; |
| 1834 | case 0x07: /* LUV */ |
| 1835 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1836 | UML_CALLC(block, cfunc_rsp_luv_scalar, this); |
| 1837 | return TRUE; |
| 1838 | case 0x08: /* LHV */ |
| 1839 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1840 | UML_CALLC(block, cfunc_rsp_lhv_scalar, this); |
| 1841 | return TRUE; |
| 1842 | case 0x09: /* LFV */ |
| 1843 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1844 | UML_CALLC(block, cfunc_rsp_lfv_scalar, this); |
| 1845 | return TRUE; |
| 1846 | case 0x0a: /* LWV */ |
| 1847 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1848 | UML_CALLC(block, cfunc_rsp_lwv_scalar, this); |
| 1849 | return TRUE; |
| 1850 | case 0x0b: /* LTV */ |
| 1851 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1852 | UML_CALLC(block, cfunc_rsp_ltv_scalar, this); |
| 1853 | return TRUE; |
| 1854 | |
| 1855 | default: |
| 1856 | return FALSE; |
| 1857 | } |
| 1858 | } |
| 1859 | #endif |
| 1860 | |
| 1861 | #if USE_SIMD |
| 1862 | // SBV |
| 1863 | // |
| 1864 | // 31 25 20 15 10 6 0 |
| 1865 | // -------------------------------------------------- |
| 1866 | // | 111010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 1867 | // -------------------------------------------------- |
| 1868 | // |
| 1869 | // Stores 1 byte from vector byte index |
| 1870 | |
| 1871 | inline void rsp_device::ccfunc_rsp_sbv_simd() |
| 1872 | { |
| 1873 | UINT32 op = m_rsp_state->arg0; |
| 1874 | int dest = (op >> 16) & 0x1f; |
| 1875 | int base = (op >> 21) & 0x1f; |
| 1876 | int index = (op >> 7) & 0xf; |
| 1877 | int offset = (op & 0x7f); |
| 1878 | if (offset & 0x40) |
| 1879 | { |
| 1880 | offset |= 0xffffffc0; |
| 1881 | } |
| 1882 | |
| 1883 | UINT32 ea = (base) ? m_rsp_state->r[base] + offset : offset; |
| 1884 | UINT16 value; |
| 1885 | SIMD_EXTRACT16(m_xv[dest], value, (index >> 1)); |
| 1886 | value >>= (1-(index & 1)) * 8; |
| 1887 | DM_WRITE8(ea, (UINT8)value); |
| 1888 | } |
| 1889 | |
| 1890 | static void cfunc_rsp_sbv_simd(void *param) |
| 1891 | { |
| 1892 | ((rsp_device *)param)->ccfunc_rsp_sbv_simd(); |
| 1893 | } |
| 1894 | #endif |
| 1895 | |
| 1896 | #if (!USE_SIMD || SIMUL_SIMD) |
| 1897 | |
| 1898 | inline void rsp_device::ccfunc_rsp_sbv_scalar() |
| 1899 | { |
| 1900 | UINT32 op = m_rsp_state->arg0; |
| 1901 | int dest = (op >> 16) & 0x1f; |
| 1902 | int base = (op >> 21) & 0x1f; |
| 1903 | int index = (op >> 7) & 0xf; |
| 1904 | int offset = (op & 0x7f); |
| 1905 | if (offset & 0x40) |
| 1906 | { |
| 1907 | offset |= 0xffffffc0; |
| 1908 | } |
| 1909 | |
| 1910 | UINT32 ea = (base) ? m_rsp_state->r[base] + offset : offset; |
| 1911 | DM_WRITE8(ea, VREG_B(dest, index)); |
| 1912 | } |
| 1913 | |
| 1914 | static void cfunc_rsp_sbv_scalar(void *param) |
| 1915 | { |
| 1916 | ((rsp_device *)param)->ccfunc_rsp_sbv_scalar(); |
| 1917 | } |
| 1918 | #endif |
| 1919 | |
| 1920 | #if USE_SIMD |
| 1921 | // SSV |
| 1922 | // |
| 1923 | // 31 25 20 15 10 6 0 |
| 1924 | // -------------------------------------------------- |
| 1925 | // | 111010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 1926 | // -------------------------------------------------- |
| 1927 | // |
| 1928 | // Stores 2 bytes starting from vector byte index |
| 1929 | |
| 1930 | inline void rsp_device::ccfunc_rsp_ssv_simd() |
| 1931 | { |
| 1932 | UINT32 op = m_rsp_state->arg0; |
| 1933 | int dest = (op >> 16) & 0x1f; |
| 1934 | int base = (op >> 21) & 0x1f; |
| 1935 | int index = (op >> 7) & 0xf; |
| 1936 | int offset = (op & 0x7f); |
| 1937 | if (offset & 0x40) |
| 1938 | { |
| 1939 | offset |= 0xffffffc0; |
| 1940 | } |
| 1941 | |
| 1942 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 1943 | |
| 1944 | int end = index + 2; |
| 1945 | for (int i = index; i < end; i++) |
| 1946 | { |
| 1947 | UINT16 value; |
| 1948 | SIMD_EXTRACT16(m_xv[dest], value, (i >> 1)); |
| 1949 | value >>= (1 - (i & 1)) * 8; |
| 1950 | DM_WRITE8(ea, (UINT8)value); |
| 1951 | ea++; |
| 1952 | } |
| 1953 | } |
| 1954 | |
| 1955 | static void cfunc_rsp_ssv_simd(void *param) |
| 1956 | { |
| 1957 | ((rsp_device *)param)->ccfunc_rsp_ssv_simd(); |
| 1958 | } |
| 1959 | #endif |
| 1960 | |
| 1961 | #if (!USE_SIMD || SIMUL_SIMD) |
| 1962 | |
| 1963 | inline void rsp_device::ccfunc_rsp_ssv_scalar() |
| 1964 | { |
| 1965 | UINT32 op = m_rsp_state->arg0; |
| 1966 | int dest = (op >> 16) & 0x1f; |
| 1967 | int base = (op >> 21) & 0x1f; |
| 1968 | int index = (op >> 7) & 0xf; |
| 1969 | int offset = (op & 0x7f); |
| 1970 | if (offset & 0x40) |
| 1971 | { |
| 1972 | offset |= 0xffffffc0; |
| 1973 | } |
| 1974 | |
| 1975 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 1976 | |
| 1977 | int end = index + 2; |
| 1978 | for (int i = index; i < end; i++) |
| 1979 | { |
| 1980 | DM_WRITE8(ea, VREG_B(dest, i)); |
| 1981 | ea++; |
| 1982 | } |
| 1983 | } |
| 1984 | |
| 1985 | static void cfunc_rsp_ssv_scalar(void *param) |
| 1986 | { |
| 1987 | ((rsp_device *)param)->ccfunc_rsp_ssv_scalar(); |
| 1988 | } |
| 1989 | #endif |
| 1990 | |
| 1991 | #if USE_SIMD |
| 1992 | // SLV |
| 1993 | // |
| 1994 | // 31 25 20 15 10 6 0 |
| 1995 | // -------------------------------------------------- |
| 1996 | // | 111010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 1997 | // -------------------------------------------------- |
| 1998 | // |
| 1999 | // Stores 4 bytes starting from vector byte index |
| 2000 | |
| 2001 | inline void rsp_device::ccfunc_rsp_slv_simd() |
| 2002 | { |
| 2003 | UINT32 op = m_rsp_state->arg0; |
| 2004 | int dest = (op >> 16) & 0x1f; |
| 2005 | int base = (op >> 21) & 0x1f; |
| 2006 | int index = (op >> 7) & 0xf; |
| 2007 | int offset = (op & 0x7f); |
| 2008 | if (offset & 0x40) |
| 2009 | { |
| 2010 | offset |= 0xffffffc0; |
| 2011 | } |
| 2012 | |
| 2013 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 2014 | |
| 2015 | int end = index + 4; |
| 2016 | for (int i = index; i < end; i++) |
| 2017 | { |
| 2018 | UINT16 value; |
| 2019 | SIMD_EXTRACT16(m_xv[dest], value, (i >> 1)); |
| 2020 | value >>= (1 - (i & 1)) * 8; |
| 2021 | DM_WRITE8(ea, (UINT8)value); |
| 2022 | ea++; |
| 2023 | } |
| 2024 | } |
| 2025 | |
| 2026 | static void cfunc_rsp_slv_simd(void *param) |
| 2027 | { |
| 2028 | ((rsp_device *)param)->ccfunc_rsp_slv_simd(); |
| 2029 | } |
| 2030 | #endif |
| 2031 | |
| 2032 | #if (!USE_SIMD || SIMUL_SIMD) |
| 2033 | |
| 2034 | inline void rsp_device::ccfunc_rsp_slv_scalar() |
| 2035 | { |
| 2036 | UINT32 op = m_rsp_state->arg0; |
| 2037 | int dest = (op >> 16) & 0x1f; |
| 2038 | int base = (op >> 21) & 0x1f; |
| 2039 | int index = (op >> 7) & 0xf; |
| 2040 | int offset = (op & 0x7f); |
| 2041 | if (offset & 0x40) |
| 2042 | { |
| 2043 | offset |= 0xffffffc0; |
| 2044 | } |
| 2045 | |
| 2046 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 2047 | |
| 2048 | int end = index + 4; |
| 2049 | for (int i = index; i < end; i++) |
| 2050 | { |
| 2051 | DM_WRITE8(ea, VREG_B(dest, i)); |
| 2052 | ea++; |
| 2053 | } |
| 2054 | } |
| 2055 | |
| 2056 | static void cfunc_rsp_slv_scalar(void *param) |
| 2057 | { |
| 2058 | ((rsp_device *)param)->ccfunc_rsp_slv_scalar(); |
| 2059 | } |
| 2060 | #endif |
| 2061 | |
| 2062 | #if USE_SIMD |
| 2063 | // SDV |
| 2064 | // |
| 2065 | // 31 25 20 15 10 6 0 |
| 2066 | // -------------------------------------------------- |
| 2067 | // | 111010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 2068 | // -------------------------------------------------- |
| 2069 | // |
| 2070 | // Stores 8 bytes starting from vector byte index |
| 2071 | |
| 2072 | inline void rsp_device::ccfunc_rsp_sdv_simd() |
| 2073 | { |
| 2074 | UINT32 op = m_rsp_state->arg0; |
| 2075 | int dest = (op >> 16) & 0x1f; |
| 2076 | int base = (op >> 21) & 0x1f; |
| 2077 | int index = (op >> 7) & 0x8; |
| 2078 | int offset = (op & 0x7f); |
| 2079 | if (offset & 0x40) |
| 2080 | { |
| 2081 | offset |= 0xffffffc0; |
| 2082 | } |
| 2083 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 2084 | |
| 2085 | int end = index + 8; |
| 2086 | for (int i = index; i < end; i++) |
| 2087 | { |
| 2088 | UINT16 value; |
| 2089 | SIMD_EXTRACT16(m_xv[dest], value, (i >> 1)); |
| 2090 | value >>= (1 - (i & 1)) * 8; |
| 2091 | DM_WRITE8(ea, (UINT8)value); |
| 2092 | ea++; |
| 2093 | } |
| 2094 | } |
| 2095 | |
| 2096 | static void cfunc_rsp_sdv_simd(void *param) |
| 2097 | { |
| 2098 | ((rsp_device *)param)->ccfunc_rsp_sdv_simd(); |
| 2099 | } |
| 2100 | #endif |
| 2101 | |
| 2102 | #if (!USE_SIMD || SIMUL_SIMD) |
| 2103 | |
| 2104 | inline void rsp_device::ccfunc_rsp_sdv_scalar() |
| 2105 | { |
| 2106 | UINT32 op = m_rsp_state->arg0; |
| 2107 | int dest = (op >> 16) & 0x1f; |
| 2108 | int base = (op >> 21) & 0x1f; |
| 2109 | int index = (op >> 7) & 0x8; |
| 2110 | int offset = (op & 0x7f); |
| 2111 | if (offset & 0x40) |
| 2112 | { |
| 2113 | offset |= 0xffffffc0; |
| 2114 | } |
| 2115 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 2116 | |
| 2117 | int end = index + 8; |
| 2118 | for (int i = index; i < end; i++) |
| 2119 | { |
| 2120 | DM_WRITE8(ea, VREG_B(dest, i)); |
| 2121 | ea++; |
| 2122 | } |
| 2123 | } |
| 2124 | |
| 2125 | static void cfunc_rsp_sdv_scalar(void *param) |
| 2126 | { |
| 2127 | ((rsp_device *)param)->ccfunc_rsp_sdv_scalar(); |
| 2128 | } |
| 2129 | #endif |
| 2130 | |
| 2131 | #if USE_SIMD |
| 2132 | // SQV |
| 2133 | // |
| 2134 | // 31 25 20 15 10 6 0 |
| 2135 | // -------------------------------------------------- |
| 2136 | // | 111010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 2137 | // -------------------------------------------------- |
| 2138 | // |
| 2139 | // Stores up to 16 bytes starting from vector byte index until 16-byte boundary |
| 2140 | |
| 2141 | inline void rsp_device::ccfunc_rsp_sqv_simd() |
| 2142 | { |
| 2143 | UINT32 op = m_rsp_state->arg0; |
| 2144 | int dest = (op >> 16) & 0x1f; |
| 2145 | int base = (op >> 21) & 0x1f; |
| 2146 | int index = (op >> 7) & 0xf; |
| 2147 | int offset = (op & 0x7f); |
| 2148 | if (offset & 0x40) |
| 2149 | { |
| 2150 | offset |= 0xffffffc0; |
| 2151 | } |
| 2152 | |
| 2153 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2154 | int end = index + (16 - (ea & 0xf)); |
| 2155 | for (int i=index; i < end; i++) |
| 2156 | { |
| 2157 | UINT16 value; |
| 2158 | SIMD_EXTRACT16(m_xv[dest], value, (i >> 1)); |
| 2159 | value >>= (1-(i & 1)) * 8; |
| 2160 | DM_WRITE8(ea, (UINT8)value); |
| 2161 | ea++; |
| 2162 | } |
| 2163 | } |
| 2164 | |
| 2165 | static void cfunc_rsp_sqv_simd(void *param) |
| 2166 | { |
| 2167 | ((rsp_device *)param)->ccfunc_rsp_sqv_simd(); |
| 2168 | } |
| 2169 | #endif |
| 2170 | |
| 2171 | #if (!USE_SIMD || SIMUL_SIMD) |
| 2172 | |
| 2173 | inline void rsp_device::ccfunc_rsp_sqv_scalar() |
| 2174 | { |
| 2175 | UINT32 op = m_rsp_state->arg0; |
| 2176 | int dest = (op >> 16) & 0x1f; |
| 2177 | int base = (op >> 21) & 0x1f; |
| 2178 | int index = (op >> 7) & 0xf; |
| 2179 | int offset = (op & 0x7f); |
| 2180 | if (offset & 0x40) |
| 2181 | { |
| 2182 | offset |= 0xffffffc0; |
| 2183 | } |
| 2184 | |
| 2185 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2186 | int end = index + (16 - (ea & 0xf)); |
| 2187 | for (int i=index; i < end; i++) |
| 2188 | { |
| 2189 | DM_WRITE8(ea, VREG_B(dest, i & 0xf)); |
| 2190 | ea++; |
| 2191 | } |
| 2192 | } |
| 2193 | |
| 2194 | static void cfunc_rsp_sqv_scalar(void *param) |
| 2195 | { |
| 2196 | ((rsp_device *)param)->ccfunc_rsp_sqv_scalar(); |
| 2197 | } |
| 2198 | #endif |
| 2199 | |
| 2200 | #if USE_SIMD |
| 2201 | // SRV |
| 2202 | // |
| 2203 | // 31 25 20 15 10 6 0 |
| 2204 | // -------------------------------------------------- |
| 2205 | // | 111010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 2206 | // -------------------------------------------------- |
| 2207 | // |
| 2208 | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 2209 | |
| 2210 | inline void rsp_device::ccfunc_rsp_srv_simd() |
| 2211 | { |
| 2212 | UINT32 op = m_rsp_state->arg0; |
| 2213 | int dest = (op >> 16) & 0x1f; |
| 2214 | int base = (op >> 21) & 0x1f; |
| 2215 | int index = (op >> 7) & 0xf; |
| 2216 | int offset = (op & 0x7f); |
| 2217 | if (offset & 0x40) |
| 2218 | { |
| 2219 | offset |= 0xffffffc0; |
| 2220 | } |
| 2221 | |
| 2222 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2223 | |
| 2224 | int end = index + (ea & 0xf); |
| 2225 | int o = (16 - (ea & 0xf)) & 0xf; |
| 2226 | ea &= ~0xf; |
| 2227 | |
| 2228 | for (int i = index; i < end; i++) |
| 2229 | { |
| 2230 | UINT32 bi = (i + o) & 0xf; |
| 2231 | UINT16 value; |
| 2232 | SIMD_EXTRACT16(m_xv[dest], value, (bi >> 1)); |
| 2233 | value >>= (1-(bi & 1)) * 8; |
| 2234 | DM_WRITE8(ea, (UINT8)value); |
| 2235 | ea++; |
| 2236 | } |
| 2237 | } |
| 2238 | |
| 2239 | static void cfunc_rsp_srv_simd(void *param) |
| 2240 | { |
| 2241 | ((rsp_device *)param)->ccfunc_rsp_srv_simd(); |
| 2242 | } |
| 2243 | #endif |
| 2244 | |
| 2245 | #if (!USE_SIMD || SIMUL_SIMD) |
| 2246 | |
| 2247 | inline void rsp_device::ccfunc_rsp_srv_scalar() |
| 2248 | { |
| 2249 | UINT32 op = m_rsp_state->arg0; |
| 2250 | int dest = (op >> 16) & 0x1f; |
| 2251 | int base = (op >> 21) & 0x1f; |
| 2252 | int index = (op >> 7) & 0xf; |
| 2253 | int offset = (op & 0x7f); |
| 2254 | if (offset & 0x40) |
| 2255 | { |
| 2256 | offset |= 0xffffffc0; |
| 2257 | } |
| 2258 | |
| 2259 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2260 | |
| 2261 | int end = index + (ea & 0xf); |
| 2262 | int o = (16 - (ea & 0xf)) & 0xf; |
| 2263 | ea &= ~0xf; |
| 2264 | |
| 2265 | for (int i = index; i < end; i++) |
| 2266 | { |
| 2267 | DM_WRITE8(ea, VREG_B(dest, ((i + o) & 0xf))); |
| 2268 | ea++; |
| 2269 | } |
| 2270 | } |
| 2271 | |
| 2272 | static void cfunc_rsp_srv_scalar(void *param) |
| 2273 | { |
| 2274 | ((rsp_device *)param)->ccfunc_rsp_srv_scalar(); |
| 2275 | } |
| 2276 | #endif |
| 2277 | |
| 2278 | #if USE_SIMD |
| 2279 | // SPV |
| 2280 | // |
| 2281 | // 31 25 20 15 10 6 0 |
| 2282 | // -------------------------------------------------- |
| 2283 | // | 111010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 2284 | // -------------------------------------------------- |
| 2285 | // |
| 2286 | // Stores upper 8 bits of each element |
| 2287 | |
| 2288 | inline void rsp_device::ccfunc_rsp_spv_simd() |
| 2289 | { |
| 2290 | UINT32 op = m_rsp_state->arg0; |
| 2291 | int dest = (op >> 16) & 0x1f; |
| 2292 | int base = (op >> 21) & 0x1f; |
| 2293 | int index = (op >> 7) & 0xf; |
| 2294 | int offset = (op & 0x7f); |
| 2295 | if (offset & 0x40) |
| 2296 | { |
| 2297 | offset |= 0xffffffc0; |
| 2298 | } |
| 2299 | |
| 2300 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 2301 | int end = index + 8; |
| 2302 | for (int i=index; i < end; i++) |
| 2303 | { |
| 2304 | if ((i & 0xf) < 8) |
| 2305 | { |
| 2306 | UINT16 value; |
| 2307 | SIMD_EXTRACT16(m_xv[dest], value, i); |
| 2308 | DM_WRITE8(ea, (UINT8)(value >> 8)); |
| 2309 | } |
| 2310 | else |
| 2311 | { |
| 2312 | UINT16 value; |
| 2313 | SIMD_EXTRACT16(m_xv[dest], value, i); |
| 2314 | DM_WRITE8(ea, (UINT8)(value >> 7)); |
| 2315 | } |
| 2316 | ea++; |
| 2317 | } |
| 2318 | } |
| 2319 | |
| 2320 | static void cfunc_rsp_spv_simd(void *param) |
| 2321 | { |
| 2322 | ((rsp_device *)param)->ccfunc_rsp_spv_simd(); |
| 2323 | } |
| 2324 | #endif |
| 2325 | |
| 2326 | #if (!USE_SIMD || SIMUL_SIMD) |
| 2327 | |
| 2328 | inline void rsp_device::ccfunc_rsp_spv_scalar() |
| 2329 | { |
| 2330 | UINT32 op = m_rsp_state->arg0; |
| 2331 | int dest = (op >> 16) & 0x1f; |
| 2332 | int base = (op >> 21) & 0x1f; |
| 2333 | int index = (op >> 7) & 0xf; |
| 2334 | int offset = (op & 0x7f); |
| 2335 | if (offset & 0x40) |
| 2336 | { |
| 2337 | offset |= 0xffffffc0; |
| 2338 | } |
| 2339 | |
| 2340 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 2341 | int end = index + 8; |
| 2342 | for (int i=index; i < end; i++) |
| 2343 | { |
| 2344 | if ((i & 0xf) < 8) |
| 2345 | { |
| 2346 | DM_WRITE8(ea, VREG_B(dest, (i & 0xf) << 1)); |
| 2347 | } |
| 2348 | else |
| 2349 | { |
| 2350 | DM_WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); |
| 2351 | } |
| 2352 | ea++; |
| 2353 | } |
| 2354 | } |
| 2355 | |
| 2356 | static void cfunc_rsp_spv_scalar(void *param) |
| 2357 | { |
| 2358 | ((rsp_device *)param)->ccfunc_rsp_spv_scalar(); |
| 2359 | } |
| 2360 | #endif |
| 2361 | |
| 2362 | #if USE_SIMD |
| 2363 | // SUV |
| 2364 | // |
| 2365 | // 31 25 20 15 10 6 0 |
| 2366 | // -------------------------------------------------- |
| 2367 | // | 111010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 2368 | // -------------------------------------------------- |
| 2369 | // |
| 2370 | // Stores bits 14-7 of each element |
| 2371 | |
| 2372 | inline void rsp_device::ccfunc_rsp_suv_simd() |
| 2373 | { |
| 2374 | UINT32 op = m_rsp_state->arg0; |
| 2375 | int dest = (op >> 16) & 0x1f; |
| 2376 | int base = (op >> 21) & 0x1f; |
| 2377 | int index = (op >> 7) & 0xf; |
| 2378 | int offset = (op & 0x7f); |
| 2379 | if (offset & 0x40) |
| 2380 | { |
| 2381 | offset |= 0xffffffc0; |
| 2382 | } |
| 2383 | |
| 2384 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 2385 | int end = index + 8; |
| 2386 | for (int i=index; i < end; i++) |
| 2387 | { |
| 2388 | if ((i & 0xf) < 8) |
| 2389 | { |
| 2390 | UINT16 value; |
| 2391 | SIMD_EXTRACT16(m_xv[dest], value, i); |
| 2392 | DM_WRITE8(ea, (UINT8)(value >> 7)); |
| 2393 | } |
| 2394 | else |
| 2395 | { |
| 2396 | UINT16 value; |
| 2397 | SIMD_EXTRACT16(m_xv[dest], value, i); |
| 2398 | DM_WRITE8(ea, (UINT8)(value >> 8)); |
| 2399 | } |
| 2400 | ea++; |
| 2401 | } |
| 2402 | } |
| 2403 | |
| 2404 | static void cfunc_rsp_suv_simd(void *param) |
| 2405 | { |
| 2406 | ((rsp_device *)param)->ccfunc_rsp_suv_simd(); |
| 2407 | } |
| 2408 | #endif |
| 2409 | |
| 2410 | #if (!USE_SIMD || SIMUL_SIMD) |
| 2411 | |
| 2412 | inline void rsp_device::ccfunc_rsp_suv_scalar() |
| 2413 | { |
| 2414 | UINT32 op = m_rsp_state->arg0; |
| 2415 | int dest = (op >> 16) & 0x1f; |
| 2416 | int base = (op >> 21) & 0x1f; |
| 2417 | int index = (op >> 7) & 0xf; |
| 2418 | int offset = (op & 0x7f); |
| 2419 | if (offset & 0x40) |
| 2420 | { |
| 2421 | offset |= 0xffffffc0; |
| 2422 | } |
| 2423 | |
| 2424 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 2425 | int end = index + 8; |
| 2426 | for (int i=index; i < end; i++) |
| 2427 | { |
| 2428 | if ((i & 0xf) < 8) |
| 2429 | { |
| 2430 | DM_WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); |
| 2431 | } |
| 2432 | else |
| 2433 | { |
| 2434 | DM_WRITE8(ea, VREG_B(dest, ((i & 0x7) << 1))); |
| 2435 | } |
| 2436 | ea++; |
| 2437 | } |
| 2438 | } |
| 2439 | |
| 2440 | static void cfunc_rsp_suv_scalar(void *param) |
| 2441 | { |
| 2442 | ((rsp_device *)param)->ccfunc_rsp_suv_scalar(); |
| 2443 | } |
| 2444 | #endif |
| 2445 | |
| 2446 | #if USE_SIMD |
| 2447 | // SHV |
| 2448 | // |
| 2449 | // 31 25 20 15 10 6 0 |
| 2450 | // -------------------------------------------------- |
| 2451 | // | 111010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 2452 | // -------------------------------------------------- |
| 2453 | // |
| 2454 | // Stores bits 14-7 of each element, with 2-byte stride |
| 2455 | |
| 2456 | inline void rsp_device::ccfunc_rsp_shv_simd() |
| 2457 | { |
| 2458 | UINT32 op = m_rsp_state->arg0; |
| 2459 | int dest = (op >> 16) & 0x1f; |
| 2460 | int base = (op >> 21) & 0x1f; |
| 2461 | int index = (op >> 7) & 0xf; |
| 2462 | int offset = (op & 0x7f); |
| 2463 | if (offset & 0x40) |
| 2464 | { |
| 2465 | offset |= 0xffffffc0; |
| 2466 | } |
| 2467 | |
| 2468 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2469 | for (int i=0; i < 8; i++) |
| 2470 | { |
| 2471 | int element = index + (i << 1); |
| 2472 | UINT16 value; |
| 2473 | SIMD_EXTRACT16(m_xv[dest], value, element >> 1); |
| 2474 | DM_WRITE8(ea, (value >> 7) & 0x00ff); |
| 2475 | ea += 2; |
| 2476 | } |
| 2477 | } |
| 2478 | |
| 2479 | static void cfunc_rsp_shv_simd(void *param) |
| 2480 | { |
| 2481 | ((rsp_device *)param)->ccfunc_rsp_shv_simd(); |
| 2482 | } |
| 2483 | #endif |
| 2484 | |
| 2485 | #if (!USE_SIMD || SIMUL_SIMD) |
| 2486 | |
| 2487 | inline void rsp_device::ccfunc_rsp_shv_scalar() |
| 2488 | { |
| 2489 | UINT32 op = m_rsp_state->arg0; |
| 2490 | int dest = (op >> 16) & 0x1f; |
| 2491 | int base = (op >> 21) & 0x1f; |
| 2492 | int index = (op >> 7) & 0xf; |
| 2493 | int offset = (op & 0x7f); |
| 2494 | if (offset & 0x40) |
| 2495 | { |
| 2496 | offset |= 0xffffffc0; |
| 2497 | } |
| 2498 | |
| 2499 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2500 | for (int i=0; i < 8; i++) |
| 2501 | { |
| 2502 | int element = index + (i << 1); |
| 2503 | UINT8 d = (VREG_B(dest, (element & 0xf)) << 1) | |
| 2504 | (VREG_B(dest, ((element + 1) & 0xf)) >> 7); |
| 2505 | DM_WRITE8(ea, d); |
| 2506 | ea += 2; |
| 2507 | } |
| 2508 | } |
| 2509 | |
| 2510 | static void cfunc_rsp_shv_scalar(void *param) |
| 2511 | { |
| 2512 | ((rsp_device *)param)->ccfunc_rsp_shv_scalar(); |
| 2513 | } |
| 2514 | #endif |
| 2515 | |
| 2516 | #if USE_SIMD |
| 2517 | // SFV |
| 2518 | // |
| 2519 | // 31 25 20 15 10 6 0 |
| 2520 | // -------------------------------------------------- |
| 2521 | // | 111010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 2522 | // -------------------------------------------------- |
| 2523 | // |
| 2524 | // Stores bits 14-7 of upper or lower quad, with 4-byte stride |
| 2525 | |
| 2526 | inline void rsp_device::ccfunc_rsp_sfv_simd() |
| 2527 | { |
| 2528 | UINT32 op = m_rsp_state->arg0; |
| 2529 | int dest = (op >> 16) & 0x1f; |
| 2530 | int base = (op >> 21) & 0x1f; |
| 2531 | int index = (op >> 7) & 0xf; |
| 2532 | int offset = (op & 0x7f); |
| 2533 | if (offset & 0x40) |
| 2534 | { |
| 2535 | offset |= 0xffffffc0; |
| 2536 | } |
| 2537 | |
| 2538 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2539 | int eaoffset = ea & 0xf; |
| 2540 | ea &= ~0xf; |
| 2541 | |
| 2542 | int end = (index >> 1) + 4; |
| 2543 | |
| 2544 | for (int i = index>>1; i < end; i++) |
| 2545 | { |
| 2546 | UINT16 value; |
| 2547 | SIMD_EXTRACT16(m_xv[dest], value, i); |
| 2548 | DM_WRITE8(ea + (eaoffset & 0xf), (value >> 7) & 0x00ff); |
| 2549 | eaoffset += 4; |
| 2550 | } |
| 2551 | } |
| 2552 | |
| 2553 | static void cfunc_rsp_sfv_simd(void *param) |
| 2554 | { |
| 2555 | ((rsp_device *)param)->ccfunc_rsp_sfv_simd(); |
| 2556 | } |
| 2557 | #endif |
| 2558 | |
| 2559 | #if (!USE_SIMD || SIMUL_SIMD) |
| 2560 | |
| 2561 | inline void rsp_device::ccfunc_rsp_sfv_scalar() |
| 2562 | { |
| 2563 | UINT32 op = m_rsp_state->arg0; |
| 2564 | int dest = (op >> 16) & 0x1f; |
| 2565 | int base = (op >> 21) & 0x1f; |
| 2566 | int index = (op >> 7) & 0xf; |
| 2567 | int offset = (op & 0x7f); |
| 2568 | if (offset & 0x40) |
| 2569 | { |
| 2570 | offset |= 0xffffffc0; |
| 2571 | } |
| 2572 | |
| 2573 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2574 | int eaoffset = ea & 0xf; |
| 2575 | ea &= ~0xf; |
| 2576 | |
| 2577 | int end = (index >> 1) + 4; |
| 2578 | |
| 2579 | for (int i = index>>1; i < end; i++) |
| 2580 | { |
| 2581 | DM_WRITE8(ea + (eaoffset & 0xf), VREG_S(dest, i) >> 7); |
| 2582 | eaoffset += 4; |
| 2583 | } |
| 2584 | } |
| 2585 | |
| 2586 | static void cfunc_rsp_sfv_scalar(void *param) |
| 2587 | { |
| 2588 | ((rsp_device *)param)->ccfunc_rsp_sfv_scalar(); |
| 2589 | } |
| 2590 | #endif |
| 2591 | |
| 2592 | #if USE_SIMD |
| 2593 | // SWV |
| 2594 | // |
| 2595 | // 31 25 20 15 10 6 0 |
| 2596 | // -------------------------------------------------- |
| 2597 | // | 111010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 2598 | // -------------------------------------------------- |
| 2599 | // |
| 2600 | // Stores the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 2601 | // after byte index 15 |
| 2602 | |
| 2603 | inline void rsp_device::ccfunc_rsp_swv_simd() |
| 2604 | { |
| 2605 | UINT32 op = m_rsp_state->arg0; |
| 2606 | int dest = (op >> 16) & 0x1f; |
| 2607 | int base = (op >> 21) & 0x1f; |
| 2608 | int index = (op >> 7) & 0xf; |
| 2609 | int offset = (op & 0x7f); |
| 2610 | if (offset & 0x40) |
| 2611 | { |
| 2612 | offset |= 0xffffffc0; |
| 2613 | } |
| 2614 | |
| 2615 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2616 | int eaoffset = ea & 0xf; |
| 2617 | ea &= ~0xf; |
| 2618 | |
| 2619 | int end = index + 16; |
| 2620 | for (int i = index; i < end; i++) |
| 2621 | { |
| 2622 | UINT16 value; |
| 2623 | SIMD_EXTRACT16(m_xv[dest], value, i >> 1); |
| 2624 | DM_WRITE8(ea + (eaoffset & 0xf), (value >> ((1-(i & 1)) * 8)) & 0xff); |
| 2625 | eaoffset++; |
| 2626 | } |
| 2627 | } |
| 2628 | |
| 2629 | static void cfunc_rsp_swv_simd(void *param) |
| 2630 | { |
| 2631 | ((rsp_device *)param)->ccfunc_rsp_swv_simd(); |
| 2632 | } |
| 2633 | #endif |
| 2634 | |
| 2635 | #if (!USE_SIMD || SIMUL_SIMD) |
| 2636 | |
| 2637 | inline void rsp_device::ccfunc_rsp_swv_scalar() |
| 2638 | { |
| 2639 | UINT32 op = m_rsp_state->arg0; |
| 2640 | int dest = (op >> 16) & 0x1f; |
| 2641 | int base = (op >> 21) & 0x1f; |
| 2642 | int index = (op >> 7) & 0xf; |
| 2643 | int offset = (op & 0x7f); |
| 2644 | if (offset & 0x40) |
| 2645 | { |
| 2646 | offset |= 0xffffffc0; |
| 2647 | } |
| 2648 | |
| 2649 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2650 | int eaoffset = ea & 0xf; |
| 2651 | ea &= ~0xf; |
| 2652 | |
| 2653 | int end = index + 16; |
| 2654 | for (int i = index; i < end; i++) |
| 2655 | { |
| 2656 | DM_WRITE8(ea + (eaoffset & 0xf), VREG_B(dest, i & 0xf)); |
| 2657 | eaoffset++; |
| 2658 | } |
| 2659 | } |
| 2660 | |
| 2661 | static void cfunc_rsp_swv_scalar(void *param) |
| 2662 | { |
| 2663 | ((rsp_device *)param)->ccfunc_rsp_swv_scalar(); |
| 2664 | } |
| 2665 | #endif |
| 2666 | |
| 2667 | #if USE_SIMD |
| 2668 | // STV |
| 2669 | // |
| 2670 | // 31 25 20 15 10 6 0 |
| 2671 | // -------------------------------------------------- |
| 2672 | // | 111010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 2673 | // -------------------------------------------------- |
| 2674 | // |
| 2675 | // Stores one element from maximum of 8 vectors, while incrementing element index |
| 2676 | |
| 2677 | inline void rsp_device::ccfunc_rsp_stv_simd() |
| 2678 | { |
| 2679 | UINT32 op = m_rsp_state->arg0; |
| 2680 | int dest = (op >> 16) & 0x1f; |
| 2681 | int base = (op >> 21) & 0x1f; |
| 2682 | int index = (op >> 7) & 0xf; |
| 2683 | int offset = (op & 0x7f); |
| 2684 | |
| 2685 | if (offset & 0x40) |
| 2686 | { |
| 2687 | offset |= 0xffffffc0; |
| 2688 | } |
| 2689 | |
| 2690 | int vs = dest; |
| 2691 | int ve = dest + 8; |
| 2692 | if (ve > 32) |
| 2693 | { |
| 2694 | ve = 32; |
| 2695 | } |
| 2696 | |
| 2697 | int element = 8 - (index >> 1); |
| 2698 | |
| 2699 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2700 | int eaoffset = (ea & 0xf) + (element * 2); |
| 2701 | ea &= ~0xf; |
| 2702 | |
| 2703 | for (int i = vs; i < ve; i++) |
| 2704 | { |
| 2705 | UINT16 value; |
| 2706 | SIMD_EXTRACT16(m_xv[i], value, element); |
| 2707 | DM_WRITE16(ea + (eaoffset & 0xf), value); |
| 2708 | eaoffset += 2; |
| 2709 | element++; |
| 2710 | } |
| 2711 | } |
| 2712 | |
| 2713 | static void cfunc_rsp_stv_simd(void *param) |
| 2714 | { |
| 2715 | ((rsp_device *)param)->ccfunc_rsp_stv_simd(); |
| 2716 | } |
| 2717 | #endif |
| 2718 | |
| 2719 | #if (!USE_SIMD || SIMUL_SIMD) |
| 2720 | |
| 2721 | inline void rsp_device::ccfunc_rsp_stv_scalar() |
| 2722 | { |
| 2723 | UINT32 op = m_rsp_state->arg0; |
| 2724 | int dest = (op >> 16) & 0x1f; |
| 2725 | int base = (op >> 21) & 0x1f; |
| 2726 | int index = (op >> 7) & 0xf; |
| 2727 | int offset = (op & 0x7f); |
| 2728 | |
| 2729 | if (offset & 0x40) |
| 2730 | { |
| 2731 | offset |= 0xffffffc0; |
| 2732 | } |
| 2733 | |
| 2734 | int vs = dest; |
| 2735 | int ve = dest + 8; |
| 2736 | if (ve > 32) |
| 2737 | { |
| 2738 | ve = 32; |
| 2739 | } |
| 2740 | |
| 2741 | int element = 8 - (index >> 1); |
| 2742 | |
| 2743 | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2744 | int eaoffset = (ea & 0xf) + (element * 2); |
| 2745 | ea &= ~0xf; |
| 2746 | |
| 2747 | for (int i = vs; i < ve; i++) |
| 2748 | { |
| 2749 | DM_WRITE16(ea + (eaoffset & 0xf), VREG_S(i, element & 0x7)); |
| 2750 | eaoffset += 2; |
| 2751 | element++; |
| 2752 | } |
| 2753 | } |
| 2754 | |
| 2755 | static void cfunc_rsp_stv_scalar(void *param) |
| 2756 | { |
| 2757 | ((rsp_device *)param)->ccfunc_rsp_stv_scalar(); |
| 2758 | } |
| 2759 | #endif |
| 2760 | |
| 2761 | #if USE_SIMD |
| 2762 | int rsp_device::generate_swc2(drcuml_block *block, compiler_state *compiler, const opcode_desc *desc) |
| 2763 | { |
| 2764 | // int loopdest; |
| 2765 | UINT32 op = desc->opptr.l[0]; |
| 2766 | //int dest = (op >> 16) & 0x1f; |
| 2767 | //int base = (op >> 21) & 0x1f; |
| 2768 | //int index = (op >> 7) & 0xf; |
| 2769 | int offset = (op & 0x7f); |
| 2770 | //int skip; |
| 2771 | if (offset & 0x40) |
| 2772 | { |
| 2773 | offset |= 0xffffffc0; |
| 2774 | } |
| 2775 | |
| 2776 | switch ((op >> 11) & 0x1f) |
| 2777 | { |
| 2778 | case 0x00: /* SBV */ |
| 2779 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2780 | UML_CALLC(block, cfunc_rsp_sbv_simd, this); |
| 2781 | #if SIMUL_SIMD |
| 2782 | UML_CALLC(block, cfunc_backup_regs, this); |
| 2783 | UML_CALLC(block, cfunc_rsp_sbv_scalar, this); |
| 2784 | UML_CALLC(block, cfunc_restore_regs, this); |
| 2785 | UML_CALLC(block, cfunc_verify_regs, this); |
| 2786 | #endif |
| 2787 | return TRUE; |
| 2788 | case 0x01: /* SSV */ |
| 2789 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2790 | UML_CALLC(block, cfunc_rsp_ssv_simd, this); |
| 2791 | #if SIMUL_SIMD |
| 2792 | UML_CALLC(block, cfunc_backup_regs, this); |
| 2793 | UML_CALLC(block, cfunc_rsp_ssv_scalar, this); |
| 2794 | UML_CALLC(block, cfunc_restore_regs, this); |
| 2795 | UML_CALLC(block, cfunc_verify_regs, this); |
| 2796 | #endif |
| 2797 | return TRUE; |
| 2798 | case 0x02: /* SLV */ |
| 2799 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2800 | UML_CALLC(block, cfunc_rsp_slv_simd, this); |
| 2801 | #if SIMUL_SIMD |
| 2802 | UML_CALLC(block, cfunc_backup_regs, this); |
| 2803 | UML_CALLC(block, cfunc_rsp_slv_scalar, this); |
| 2804 | UML_CALLC(block, cfunc_restore_regs, this); |
| 2805 | UML_CALLC(block, cfunc_verify_regs, this); |
| 2806 | #endif |
| 2807 | return TRUE; |
| 2808 | case 0x03: /* SDV */ |
| 2809 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2810 | UML_CALLC(block, cfunc_rsp_sdv_simd, this); |
| 2811 | #if SIMUL_SIMD |
| 2812 | UML_CALLC(block, cfunc_backup_regs, this); |
| 2813 | UML_CALLC(block, cfunc_rsp_sdv_scalar, this); |
| 2814 | UML_CALLC(block, cfunc_restore_regs, this); |
| 2815 | UML_CALLC(block, cfunc_verify_regs, this); |
| 2816 | #endif |
| 2817 | return TRUE; |
| 2818 | case 0x04: /* SQV */ |
| 2819 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2820 | UML_CALLC(block, cfunc_rsp_sqv_simd, this); |
| 2821 | #if SIMUL_SIMD |
| 2822 | UML_CALLC(block, cfunc_backup_regs, this); |
| 2823 | UML_CALLC(block, cfunc_rsp_sqv_scalar, this); |
| 2824 | UML_CALLC(block, cfunc_restore_regs, this); |
| 2825 | UML_CALLC(block, cfunc_verify_regs, this); |
| 2826 | #endif |
| 2827 | return TRUE; |
| 2828 | case 0x05: /* SRV */ |
| 2829 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2830 | UML_CALLC(block, cfunc_rsp_srv_simd, this); |
| 2831 | #if SIMUL_SIMD |
| 2832 | UML_CALLC(block, cfunc_backup_regs, this); |
| 2833 | UML_CALLC(block, cfunc_rsp_srv_scalar, this); |
| 2834 | UML_CALLC(block, cfunc_restore_regs, this); |
| 2835 | UML_CALLC(block, cfunc_verify_regs, this); |
| 2836 | #endif |
| 2837 | return TRUE; |
| 2838 | case 0x06: /* SPV */ |
| 2839 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2840 | UML_CALLC(block, cfunc_rsp_spv_simd, this); |
| 2841 | #if SIMUL_SIMD |
| 2842 | UML_CALLC(block, cfunc_backup_regs, this); |
| 2843 | UML_CALLC(block, cfunc_rsp_spv_scalar, this); |
| 2844 | UML_CALLC(block, cfunc_restore_regs, this); |
| 2845 | UML_CALLC(block, cfunc_verify_regs, this); |
| 2846 | #endif |
| 2847 | return TRUE; |
| 2848 | case 0x07: /* SUV */ |
| 2849 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2850 | UML_CALLC(block, cfunc_rsp_suv_simd, this); |
| 2851 | #if SIMUL_SIMD |
| 2852 | UML_CALLC(block, cfunc_backup_regs, this); |
| 2853 | UML_CALLC(block, cfunc_rsp_suv_scalar, this); |
| 2854 | UML_CALLC(block, cfunc_restore_regs, this); |
| 2855 | UML_CALLC(block, cfunc_verify_regs, this); |
| 2856 | #endif |
| 2857 | return TRUE; |
| 2858 | case 0x08: /* SHV */ |
| 2859 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2860 | UML_CALLC(block, cfunc_rsp_shv_simd, this); |
| 2861 | #if SIMUL_SIMD |
| 2862 | UML_CALLC(block, cfunc_backup_regs, this); |
| 2863 | UML_CALLC(block, cfunc_rsp_shv_scalar, this); |
| 2864 | UML_CALLC(block, cfunc_restore_regs, this); |
| 2865 | UML_CALLC(block, cfunc_verify_regs, this); |
| 2866 | #endif |
| 2867 | return TRUE; |
| 2868 | case 0x09: /* SFV */ |
| 2869 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2870 | UML_CALLC(block, cfunc_rsp_sfv_simd, this); |
| 2871 | #if SIMUL_SIMD |
| 2872 | UML_CALLC(block, cfunc_backup_regs, this); |
| 2873 | UML_CALLC(block, cfunc_rsp_sfv_scalar, this); |
| 2874 | UML_CALLC(block, cfunc_restore_regs, this); |
| 2875 | UML_CALLC(block, cfunc_verify_regs, this); |
| 2876 | #endif |
| 2877 | return TRUE; |
| 2878 | case 0x0a: /* SWV */ |
| 2879 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2880 | UML_CALLC(block, cfunc_rsp_swv_simd, this); |
| 2881 | #if SIMUL_SIMD |
| 2882 | UML_CALLC(block, cfunc_backup_regs, this); |
| 2883 | UML_CALLC(block, cfunc_rsp_swv_scalar, this); |
| 2884 | UML_CALLC(block, cfunc_restore_regs, this); |
| 2885 | UML_CALLC(block, cfunc_verify_regs, this); |
| 2886 | #endif |
| 2887 | return TRUE; |
| 2888 | case 0x0b: /* STV */ |
| 2889 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2890 | UML_CALLC(block, cfunc_rsp_stv_simd, this); |
| 2891 | #if SIMUL_SIMD |
| 2892 | UML_CALLC(block, cfunc_backup_regs, this); |
| 2893 | UML_CALLC(block, cfunc_rsp_stv_scalar, this); |
| 2894 | UML_CALLC(block, cfunc_restore_regs, this); |
| 2895 | UML_CALLC(block, cfunc_verify_regs, this); |
| 2896 | #endif |
| 2897 | return TRUE; |
| 2898 | |
| 2899 | default: |
| 2900 | unimplemented_opcode(op); |
| 2901 | return FALSE; |
| 2902 | } |
| 2903 | |
| 2904 | return TRUE; |
| 2905 | } |
| 2906 | |
| 2907 | #else |
| 2908 | |
| 2909 | int rsp_device::generate_swc2(drcuml_block *block, compiler_state *compiler, const opcode_desc *desc) |
| 2910 | { |
| 2911 | // int loopdest; |
| 2912 | UINT32 op = desc->opptr.l[0]; |
| 2913 | //int dest = (op >> 16) & 0x1f; |
| 2914 | //int base = (op >> 21) & 0x1f; |
| 2915 | //int index = (op >> 7) & 0xf; |
| 2916 | int offset = (op & 0x7f); |
| 2917 | //int skip; |
| 2918 | if (offset & 0x40) |
| 2919 | { |
| 2920 | offset |= 0xffffffc0; |
| 2921 | } |
| 2922 | |
| 2923 | switch ((op >> 11) & 0x1f) |
| 2924 | { |
| 2925 | case 0x00: /* SBV */ |
| 2926 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2927 | UML_CALLC(block, cfunc_rsp_sbv_scalar, this); |
| 2928 | return TRUE; |
| 2929 | case 0x01: /* SSV */ |
| 2930 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2931 | UML_CALLC(block, cfunc_rsp_ssv_scalar, this); |
| 2932 | return TRUE; |
| 2933 | case 0x02: /* SLV */ |
| 2934 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2935 | UML_CALLC(block, cfunc_rsp_slv_scalar, this); |
| 2936 | return TRUE; |
| 2937 | case 0x03: /* SDV */ |
| 2938 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2939 | UML_CALLC(block, cfunc_rsp_sdv_scalar, this); |
| 2940 | return TRUE; |
| 2941 | case 0x04: /* SQV */ |
| 2942 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2943 | UML_CALLC(block, cfunc_rsp_sqv_scalar, this); |
| 2944 | return TRUE; |
| 2945 | case 0x05: /* SRV */ |
| 2946 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2947 | UML_CALLC(block, cfunc_rsp_srv_scalar, this); |
| 2948 | return TRUE; |
| 2949 | case 0x06: /* SPV */ |
| 2950 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2951 | UML_CALLC(block, cfunc_rsp_spv_scalar, this); |
| 2952 | return TRUE; |
| 2953 | case 0x07: /* SUV */ |
| 2954 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2955 | UML_CALLC(block, cfunc_rsp_suv_scalar, this); |
| 2956 | return TRUE; |
| 2957 | case 0x08: /* SHV */ |
| 2958 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2959 | UML_CALLC(block, cfunc_rsp_shv_scalar, this); |
| 2960 | return TRUE; |
| 2961 | case 0x09: /* SFV */ |
| 2962 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2963 | UML_CALLC(block, cfunc_rsp_sfv_scalar, this); |
| 2964 | return TRUE; |
| 2965 | case 0x0a: /* SWV */ |
| 2966 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2967 | UML_CALLC(block, cfunc_rsp_swv_scalar, this); |
| 2968 | return TRUE; |
| 2969 | case 0x0b: /* STV */ |
| 2970 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2971 | UML_CALLC(block, cfunc_rsp_stv_scalar, this); |
| 2972 | return TRUE; |
| 2973 | |
| 2974 | default: |
| 2975 | unimplemented_opcode(op); |
| 2976 | return FALSE; |
| 2977 | } |
| 2978 | |
| 2979 | return TRUE; |
| 2980 | } |
| 2981 | #endif |
| 2982 | |
| 2983 | #if USE_SIMD |
| 2984 | inline UINT16 rsp_device::VEC_SATURATE_ACCUM(int accum, int slice, UINT16 negative, UINT16 positive) |
| 2985 | { |
| 2986 | if ((INT16)VEC_ACCUM_H(accum) < 0) |
| 2987 | { |
| 2988 | if ((UINT16)(VEC_ACCUM_H(accum)) != 0xffff) |
| 2989 | { |
| 2990 | return negative; |
| 2991 | } |
| 2992 | else |
| 2993 | { |
| 2994 | if ((INT16)VEC_ACCUM_M(accum) >= 0) |
| 2995 | { |
| 2996 | return negative; |
| 2997 | } |
| 2998 | else |
| 2999 | { |
| 3000 | if (slice == 0) |
| 3001 | { |
| 3002 | return VEC_ACCUM_L(accum); |
| 3003 | } |
| 3004 | else if (slice == 1) |
| 3005 | { |
| 3006 | return VEC_ACCUM_M(accum); |
| 3007 | } |
| 3008 | } |
| 3009 | } |
| 3010 | } |
| 3011 | else |
| 3012 | { |
| 3013 | if ((UINT16)(VEC_ACCUM_H(accum)) != 0) |
| 3014 | { |
| 3015 | return positive; |
| 3016 | } |
| 3017 | else |
| 3018 | { |
| 3019 | if ((INT16)VEC_ACCUM_M(accum) < 0) |
| 3020 | { |
| 3021 | return positive; |
| 3022 | } |
| 3023 | else |
| 3024 | { |
| 3025 | if (slice == 0) |
| 3026 | { |
| 3027 | return VEC_ACCUM_L(accum); |
| 3028 | } |
| 3029 | else |
| 3030 | { |
| 3031 | return VEC_ACCUM_M(accum); |
| 3032 | } |
| 3033 | } |
| 3034 | } |
| 3035 | } |
| 3036 | return 0; |
| 3037 | } |
| 3038 | #endif |
| 3039 | |
| 3040 | #if (!USE_SIMD || SIMUL_SIMD) |
| 3041 | inline UINT16 rsp_device::SATURATE_ACCUM(int accum, int slice, UINT16 negative, UINT16 positive) |
| 3042 | { |
| 3043 | if ((INT16)ACCUM_H(accum) < 0) |
| 3044 | { |
| 3045 | if ((UINT16)(ACCUM_H(accum)) != 0xffff) |
| 3046 | { |
| 3047 | return negative; |
| 3048 | } |
| 3049 | else |
| 3050 | { |
| 3051 | if ((INT16)ACCUM_M(accum) >= 0) |
| 3052 | { |
| 3053 | return negative; |
| 3054 | } |
| 3055 | else |
| 3056 | { |
| 3057 | if (slice == 0) |
| 3058 | { |
| 3059 | return ACCUM_L(accum); |
| 3060 | } |
| 3061 | else if (slice == 1) |
| 3062 | { |
| 3063 | return ACCUM_M(accum); |
| 3064 | } |
| 3065 | } |
| 3066 | } |
| 3067 | } |
| 3068 | else |
| 3069 | { |
| 3070 | if ((UINT16)(ACCUM_H(accum)) != 0) |
| 3071 | { |
| 3072 | return positive; |
| 3073 | } |
| 3074 | else |
| 3075 | { |
| 3076 | if ((INT16)ACCUM_M(accum) < 0) |
| 3077 | { |
| 3078 | return positive; |
| 3079 | } |
| 3080 | else |
| 3081 | { |
| 3082 | if (slice == 0) |
| 3083 | { |
| 3084 | return ACCUM_L(accum); |
| 3085 | } |
| 3086 | else |
| 3087 | { |
| 3088 | return ACCUM_M(accum); |
| 3089 | } |
| 3090 | } |
| 3091 | } |
| 3092 | } |
| 3093 | return 0; |
| 3094 | } |
| 3095 | #endif |
| 3096 | |
| 3097 | inline UINT16 rsp_device::SATURATE_ACCUM1(int accum, UINT16 negative, UINT16 positive) |
| 3098 | { |
| 3099 | // Return negative if H<0 && (H!=0xffff || M >= 0) |
| 3100 | // Return positive if H>0 || (H==0 && M<0) |
| 3101 | // Return medium slice if H==0xffff && M<0 |
| 3102 | // Return medium slice if H==0 && M>=0 |
| 3103 | if ((INT16)ACCUM_H(accum) < 0) |
| 3104 | { |
| 3105 | if ((UINT16)(ACCUM_H(accum)) != 0xffff) |
| 3106 | { |
| 3107 | return negative; |
| 3108 | } |
| 3109 | else |
| 3110 | { |
| 3111 | if ((INT16)ACCUM_M(accum) >= 0) |
| 3112 | { |
| 3113 | return negative; |
| 3114 | } |
| 3115 | else |
| 3116 | { |
| 3117 | return ACCUM_M(accum); |
| 3118 | } |
| 3119 | } |
| 3120 | } |
| 3121 | else |
| 3122 | { |
| 3123 | if ((UINT16)(ACCUM_H(accum)) != 0) |
| 3124 | { |
| 3125 | return positive; |
| 3126 | } |
| 3127 | else |
| 3128 | { |
| 3129 | if ((INT16)ACCUM_M(accum) < 0) |
| 3130 | { |
| 3131 | return positive; |
| 3132 | } |
| 3133 | else |
| 3134 | { |
| 3135 | return ACCUM_M(accum); |
| 3136 | } |
| 3137 | } |
| 3138 | } |
| 3139 | // never executed |
| 3140 | //return 0; |
| 3141 | } |
| 3142 | |
| 3143 | #if USE_SIMD |
| 3144 | #define VEC_WRITEBACK_RESULT() { \ |
| 3145 | SIMD_INSERT16(m_xv[VDREG], vres[0], 0); \ |
| 3146 | SIMD_INSERT16(m_xv[VDREG], vres[1], 1); \ |
| 3147 | SIMD_INSERT16(m_xv[VDREG], vres[2], 2); \ |
| 3148 | SIMD_INSERT16(m_xv[VDREG], vres[3], 3); \ |
| 3149 | SIMD_INSERT16(m_xv[VDREG], vres[4], 4); \ |
| 3150 | SIMD_INSERT16(m_xv[VDREG], vres[5], 5); \ |
| 3151 | SIMD_INSERT16(m_xv[VDREG], vres[6], 6); \ |
| 3152 | SIMD_INSERT16(m_xv[VDREG], vres[7], 7); \ |
| 3153 | } |
| 3154 | #endif |
| 3155 | |
| 3156 | #define WRITEBACK_RESULT() { \ |
| 3157 | W_VREG_S(VDREG, 0) = vres[0]; \ |
| 3158 | W_VREG_S(VDREG, 1) = vres[1]; \ |
| 3159 | W_VREG_S(VDREG, 2) = vres[2]; \ |
| 3160 | W_VREG_S(VDREG, 3) = vres[3]; \ |
| 3161 | W_VREG_S(VDREG, 4) = vres[4]; \ |
| 3162 | W_VREG_S(VDREG, 5) = vres[5]; \ |
| 3163 | W_VREG_S(VDREG, 6) = vres[6]; \ |
| 3164 | W_VREG_S(VDREG, 7) = vres[7]; \ |
| 3165 | } |
| 3166 | |
| 3167 | #if USE_SIMD |
| 3168 | /* ============================================================================ |
| 3169 | * RSPPackLo32to16: Pack LSBs of 32-bit vectors to 16-bits without saturation. |
| 3170 | * TODO: 5 SSE2 operations is kind of expensive just to truncate values? |
| 3171 | * ========================================================================= */ |
| 3172 | INLINE __m128i RSPPackLo32to16(__m128i vectorLow, __m128i vectorHigh) |
| 3173 | { |
| 3174 | vectorLow = _mm_slli_epi32(vectorLow, 16); |
| 3175 | vectorHigh = _mm_slli_epi32(vectorHigh, 16); |
| 3176 | vectorLow = _mm_srai_epi32(vectorLow, 16); |
| 3177 | vectorHigh = _mm_srai_epi32(vectorHigh, 16); |
| 3178 | return _mm_packs_epi32(vectorLow, vectorHigh); |
| 3179 | } |
| 3180 | |
| 3181 | /* ============================================================================ |
| 3182 | * RSPPackHi32to16: Pack MSBs of 32-bit vectors to 16-bits without saturation. |
| 3183 | * ========================================================================= */ |
| 3184 | INLINE __m128i RSPPackHi32to16(__m128i vectorLow, __m128i vectorHigh) |
| 3185 | { |
| 3186 | vectorLow = _mm_srai_epi32(vectorLow, 16); |
| 3187 | vectorHigh = _mm_srai_epi32(vectorHigh, 16); |
| 3188 | return _mm_packs_epi32(vectorLow, vectorHigh); |
| 3189 | } |
| 3190 | |
| 3191 | /* ============================================================================ |
| 3192 | * RSPSignExtend16to32: Sign-extend 16-bit slices to 32-bit slices. |
| 3193 | * ========================================================================= */ |
| 3194 | INLINE void RSPSignExtend16to32(__m128i source, __m128i *vectorLow, __m128i *vectorHigh) |
| 3195 | { |
| 3196 | __m128i vMask = _mm_srai_epi16(source, 15); |
| 3197 | *vectorHigh = _mm_unpackhi_epi16(source, vMask); |
| 3198 | *vectorLow = _mm_unpacklo_epi16(source, vMask); |
| 3199 | } |
| 3200 | |
| 3201 | /* ============================================================================ |
| 3202 | * RSPZeroExtend16to32: Zero-extend 16-bit slices to 32-bit slices. |
| 3203 | * ========================================================================= */ |
| 3204 | INLINE void RSPZeroExtend16to32(__m128i source, __m128i *vectorLow, __m128i *vectorHigh) |
| 3205 | { |
| 3206 | *vectorHigh = _mm_unpackhi_epi16(source, _mm_setzero_si128()); |
| 3207 | *vectorLow = _mm_unpacklo_epi16(source, _mm_setzero_si128()); |
| 3208 | } |
| 3209 | |
| 3210 | /* ============================================================================ |
| 3211 | * _mm_mullo_epi32: SSE2 lacks _mm_mullo_epi32, define it manually. |
| 3212 | * TODO/WARNING/DISCLAIMER: Assumes one argument is positive. |
| 3213 | * ========================================================================= */ |
| 3214 | INLINE __m128i _mm_mullo_epi32(__m128i a, __m128i b) |
| 3215 | { |
| 3216 | __m128i a4 = _mm_srli_si128(a, 4); |
| 3217 | __m128i b4 = _mm_srli_si128(b, 4); |
| 3218 | __m128i ba = _mm_mul_epu32(b, a); |
| 3219 | __m128i b4a4 = _mm_mul_epu32(b4, a4); |
| 3220 | |
| 3221 | __m128i mask = _mm_setr_epi32(~0, 0, ~0, 0); |
| 3222 | __m128i baMask = _mm_and_si128(ba, mask); |
| 3223 | __m128i b4a4Mask = _mm_and_si128(b4a4, mask); |
| 3224 | __m128i b4a4MaskShift = _mm_slli_si128(b4a4Mask, 4); |
| 3225 | |
| 3226 | return _mm_or_si128(baMask, b4a4MaskShift); |
| 3227 | } |
| 3228 | |
| 3229 | /* ============================================================================ |
| 3230 | * RSPClampLowToVal: Clamps the low word of the accumulator. |
| 3231 | * ========================================================================= */ |
| 3232 | INLINE __m128i RSPClampLowToVal(__m128i vaccLow, __m128i vaccMid, __m128i vaccHigh) |
| 3233 | { |
| 3234 | __m128i setMask = _mm_cmpeq_epi16(_mm_setzero_si128(), _mm_setzero_si128()); |
| 3235 | __m128i negCheck, useValMask, negVal, posVal; |
| 3236 | |
| 3237 | /* Compute some common values ahead of time. */ |
| 3238 | negCheck = _mm_cmplt_epi16(vaccHigh, _mm_setzero_si128()); |
| 3239 | |
| 3240 | /* If accmulator < 0, clamp to val if val != TMin. */ |
| 3241 | useValMask = _mm_and_si128(vaccHigh, _mm_srai_epi16(vaccMid, 15)); |
| 3242 | useValMask = _mm_cmpeq_epi16(useValMask, setMask); |
| 3243 | negVal = _mm_and_si128(useValMask, vaccLow); |
| 3244 | |
| 3245 | /* Otherwise, clamp to ~0 if any high bits are set. */ |
| 3246 | useValMask = _mm_or_si128(vaccHigh, _mm_srai_epi16(vaccMid, 15)); |
| 3247 | useValMask = _mm_cmpeq_epi16(useValMask, _mm_setzero_si128()); |
| 3248 | posVal = _mm_and_si128(useValMask, vaccLow); |
| 3249 | |
| 3250 | negVal = _mm_and_si128(negCheck, negVal); |
| 3251 | posVal = _mm_andnot_si128(negCheck, posVal); |
| 3252 | return _mm_or_si128(negVal, posVal); |
| 3253 | } |
| 3254 | #endif |
| 3255 | |
| 3256 | #if USE_SIMD |
| 3257 | // VMULF |
| 3258 | // |
| 3259 | // 31 25 24 20 15 10 5 0 |
| 3260 | // ------------------------------------------------------ |
| 3261 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000000 | |
| 3262 | // ------------------------------------------------------ |
| 3263 | // |
| 3264 | // Multiplies signed integer by signed integer * 2 |
| 3265 | |
| 3266 | inline void rsp_device::ccfunc_rsp_vmulf_simd() |
| 3267 | { |
| 3268 | int op = m_rsp_state->arg0; |
| 3269 | |
| 3270 | INT16 vres[8]; |
| 3271 | for (int i = 0; i < 8; i++) |
| 3272 | { |
| 3273 | UINT16 w1, w2; |
| 3274 | VEC_GET_SCALAR_VS1(w1, i); |
| 3275 | VEC_GET_SCALAR_VS2(w2, i); |
| 3276 | INT32 s1 = (INT32)(INT16)w1; |
| 3277 | INT32 s2 = (INT32)(INT16)w2; |
| 3278 | |
| 3279 | if (s1 == -32768 && s2 == -32768) |
| 3280 | { |
| 3281 | // overflow |
| 3282 | VEC_SET_ACCUM_H(0, i); |
| 3283 | VEC_SET_ACCUM_M(-32768, i); |
| 3284 | VEC_SET_ACCUM_L(-32768, i); |
| 3285 | vres[i] = 0x7fff; |
| 3286 | } |
| 3287 | else |
| 3288 | { |
| 3289 | INT64 r = s1 * s2 * 2; |
| 3290 | r += 0x8000; // rounding ? |
| 3291 | VEC_SET_ACCUM_H((r < 0) ? 0xffff : 0, i); |
| 3292 | VEC_SET_ACCUM_M((INT16)(r >> 16), i); |
| 3293 | VEC_SET_ACCUM_L((UINT16)(r), i); |
| 3294 | vres[i] = VEC_ACCUM_M(i); |
| 3295 | } |
| 3296 | } |
| 3297 | VEC_WRITEBACK_RESULT(); |
| 3298 | } |
| 3299 | |
| 3300 | static void cfunc_rsp_vmulf_simd(void *param) |
| 3301 | { |
| 3302 | ((rsp_device *)param)->ccfunc_rsp_vmulf_simd(); |
| 3303 | } |
| 3304 | #endif |
| 3305 | |
| 3306 | #if (!USE_SIMD || SIMUL_SIMD) |
| 3307 | |
| 3308 | inline void rsp_device::ccfunc_rsp_vmulf_scalar() |
| 3309 | { |
| 3310 | int op = m_rsp_state->arg0; |
| 3311 | |
| 3312 | INT16 vres[8]; |
| 3313 | for (int i = 0; i < 8; i++) |
| 3314 | { |
| 3315 | UINT16 w1, w2; |
| 3316 | SCALAR_GET_VS1(w1, i); |
| 3317 | SCALAR_GET_VS2(w2, i); |
| 3318 | INT32 s1 = (INT32)(INT16)w1; |
| 3319 | INT32 s2 = (INT32)(INT16)w2; |
| 3320 | |
| 3321 | if (s1 == -32768 && s2 == -32768) |
| 3322 | { |
| 3323 | // overflow |
| 3324 | SET_ACCUM_H(0, i); |
| 3325 | SET_ACCUM_M(-32768, i); |
| 3326 | SET_ACCUM_L(-32768, i); |
| 3327 | vres[i] = 0x7fff; |
| 3328 | } |
| 3329 | else |
| 3330 | { |
| 3331 | INT64 r = s1 * s2 * 2; |
| 3332 | r += 0x8000; // rounding ? |
| 3333 | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); |
| 3334 | SET_ACCUM_M((INT16)(r >> 16), i); |
| 3335 | SET_ACCUM_L((UINT16)(r), i); |
| 3336 | vres[i] = ACCUM_M(i); |
| 3337 | } |
| 3338 | } |
| 3339 | WRITEBACK_RESULT(); |
| 3340 | } |
| 3341 | |
| 3342 | static void cfunc_rsp_vmulf_scalar(void *param) |
| 3343 | { |
| 3344 | ((rsp_device *)param)->ccfunc_rsp_vmulf_scalar(); |
| 3345 | } |
| 3346 | #endif |
| 3347 | |
| 3348 | #if USE_SIMD |
| 3349 | // VMULU |
| 3350 | // |
| 3351 | // 31 25 24 20 15 10 5 0 |
| 3352 | // ------------------------------------------------------ |
| 3353 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000001 | |
| 3354 | // ------------------------------------------------------ |
| 3355 | // |
| 3356 | |
| 3357 | inline void rsp_device::ccfunc_rsp_vmulu_simd() |
| 3358 | { |
| 3359 | int op = m_rsp_state->arg0; |
| 3360 | |
| 3361 | INT16 vres[8]; |
| 3362 | for (int i = 0; i < 8; i++) |
| 3363 | { |
| 3364 | UINT16 w1, w2; |
| 3365 | VEC_GET_SCALAR_VS1(w1, i); |
| 3366 | VEC_GET_SCALAR_VS2(w2, i); |
| 3367 | INT32 s1 = (INT32)(INT16)w1; |
| 3368 | INT32 s2 = (INT32)(INT16)w2; |
| 3369 | |
| 3370 | INT64 r = s1 * s2 * 2; |
| 3371 | r += 0x8000; // rounding ? |
| 3372 | |
| 3373 | VEC_SET_ACCUM_H((UINT16)(r >> 32), i); |
| 3374 | VEC_SET_ACCUM_M((UINT16)(r >> 16), i); |
| 3375 | VEC_SET_ACCUM_L((UINT16)(r), i); |
| 3376 | |
| 3377 | if (r < 0) |
| 3378 | { |
| 3379 | vres[i] = 0; |
| 3380 | } |
| 3381 | else if (((INT16)(VEC_ACCUM_H(i)) ^ (INT16)(VEC_ACCUM_M(i))) < 0) |
| 3382 | { |
| 3383 | vres[i] = -1; |
| 3384 | } |
| 3385 | else |
| 3386 | { |
| 3387 | vres[i] = VEC_ACCUM_M(i); |
| 3388 | } |
| 3389 | } |
| 3390 | VEC_WRITEBACK_RESULT(); |
| 3391 | } |
| 3392 | |
| 3393 | static void cfunc_rsp_vmulu_simd(void *param) |
| 3394 | { |
| 3395 | ((rsp_device *)param)->ccfunc_rsp_vmulu_simd(); |
| 3396 | } |
| 3397 | #endif |
| 3398 | |
| 3399 | #if (!USE_SIMD || SIMUL_SIMD) |
| 3400 | |
| 3401 | inline void rsp_device::ccfunc_rsp_vmulu_scalar() |
| 3402 | { |
| 3403 | int op = m_rsp_state->arg0; |
| 3404 | |
| 3405 | INT16 vres[8]; |
| 3406 | for (int i = 0; i < 8; i++) |
| 3407 | { |
| 3408 | UINT16 w1, w2; |
| 3409 | SCALAR_GET_VS1(w1, i); |
| 3410 | SCALAR_GET_VS2(w2, i); |
| 3411 | INT32 s1 = (INT32)(INT16)w1; |
| 3412 | INT32 s2 = (INT32)(INT16)w2; |
| 3413 | |
| 3414 | INT64 r = s1 * s2 * 2; |
| 3415 | r += 0x8000; // rounding ? |
| 3416 | |
| 3417 | SET_ACCUM_H((UINT16)(r >> 32), i); |
| 3418 | SET_ACCUM_M((UINT16)(r >> 16), i); |
| 3419 | SET_ACCUM_L((UINT16)(r), i); |
| 3420 | |
| 3421 | if (r < 0) |
| 3422 | { |
| 3423 | vres[i] = 0; |
| 3424 | } |
| 3425 | else if (((INT16)(ACCUM_H(i)) ^ (INT16)(ACCUM_M(i))) < 0) |
| 3426 | { |
| 3427 | vres[i] = -1; |
| 3428 | } |
| 3429 | else |
| 3430 | { |
| 3431 | vres[i] = ACCUM_M(i); |
| 3432 | } |
| 3433 | } |
| 3434 | WRITEBACK_RESULT(); |
| 3435 | } |
| 3436 | |
| 3437 | static void cfunc_rsp_vmulu_scalar(void *param) |
| 3438 | { |
| 3439 | ((rsp_device *)param)->ccfunc_rsp_vmulu_scalar(); |
| 3440 | } |
| 3441 | #endif |
| 3442 | |
| 3443 | #if USE_SIMD |
| 3444 | // VMUDL |
| 3445 | // |
| 3446 | // 31 25 24 20 15 10 5 0 |
| 3447 | // ------------------------------------------------------ |
| 3448 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 | |
| 3449 | // ------------------------------------------------------ |
| 3450 | // |
| 3451 | // Multiplies signed integer by unsigned fraction |
| 3452 | // The result is added into accumulator |
| 3453 | // The middle slice of accumulator is stored into destination element |
| 3454 | |
| 3455 | inline void rsp_device::ccfunc_rsp_vmudl_simd() |
| 3456 | { |
| 3457 | int op = m_rsp_state->arg0; |
| 3458 | |
| 3459 | __m128i vsReg = m_xv[VS1REG]; |
| 3460 | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3461 | |
| 3462 | /* Unpack to obtain for 32-bit precision. */ |
| 3463 | __m128i unpackLo = _mm_mullo_epi16(vsReg, vtReg); |
| 3464 | __m128i unpackHi = _mm_mulhi_epu16(vsReg, vtReg); |
| 3465 | __m128i loProduct = _mm_unpacklo_epi16(unpackLo, unpackHi); |
| 3466 | __m128i hiProduct = _mm_unpackhi_epi16(unpackLo, unpackHi); |
| 3467 | |
| 3468 | m_xv[VDREG] = m_accum_l = RSPPackHi32to16(loProduct, hiProduct); |
| 3469 | |
| 3470 | m_accum_m = _mm_setzero_si128(); |
| 3471 | m_accum_h = _mm_setzero_si128(); |
| 3472 | } |
| 3473 | |
| 3474 | static void cfunc_rsp_vmudl_simd(void *param) |
| 3475 | { |
| 3476 | ((rsp_device *)param)->ccfunc_rsp_vmudl_simd(); |
| 3477 | } |
| 3478 | #endif |
| 3479 | |
| 3480 | #if (!USE_SIMD || SIMUL_SIMD) |
| 3481 | |
| 3482 | inline void rsp_device::ccfunc_rsp_vmudl_scalar() |
| 3483 | { |
| 3484 | int op = m_rsp_state->arg0; |
| 3485 | |
| 3486 | INT16 vres[8]; |
| 3487 | for (int i = 0; i < 8; i++) |
| 3488 | { |
| 3489 | UINT16 w1, w2; |
| 3490 | SCALAR_GET_VS1(w1, i); |
| 3491 | SCALAR_GET_VS2(w2, i); |
| 3492 | UINT32 s1 = (UINT32)(UINT16)w1; |
| 3493 | UINT32 s2 = (UINT32)(UINT16)w2; |
| 3494 | |
| 3495 | UINT32 r = s1 * s2; |
| 3496 | |
| 3497 | SET_ACCUM_H(0, i); |
| 3498 | SET_ACCUM_M(0, i); |
| 3499 | SET_ACCUM_L((UINT16)(r >> 16), i); |
| 3500 | |
| 3501 | vres[i] = ACCUM_L(i); |
| 3502 | } |
| 3503 | WRITEBACK_RESULT(); |
| 3504 | } |
| 3505 | |
| 3506 | static void cfunc_rsp_vmudl_scalar(void *param) |
| 3507 | { |
| 3508 | ((rsp_device *)param)->ccfunc_rsp_vmudl_scalar(); |
| 3509 | } |
| 3510 | #endif |
| 3511 | |
| 3512 | #if USE_SIMD |
| 3513 | // VMUDM |
| 3514 | // |
| 3515 | // 31 25 24 20 15 10 5 0 |
| 3516 | // ------------------------------------------------------ |
| 3517 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000101 | |
| 3518 | // ------------------------------------------------------ |
| 3519 | // |
| 3520 | // Multiplies signed integer by unsigned fraction |
| 3521 | // The result is stored into accumulator |
| 3522 | // The middle slice of accumulator is stored into destination element |
| 3523 | |
| 3524 | inline void rsp_device::ccfunc_rsp_vmudm_simd() |
| 3525 | { |
| 3526 | int op = m_rsp_state->arg0; |
| 3527 | |
| 3528 | __m128i vsRegLo, vsRegHi, vtRegLo, vtRegHi; |
| 3529 | |
| 3530 | __m128i vsReg = m_xv[VS1REG]; |
| 3531 | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3532 | |
| 3533 | /* Unpack to obtain for 32-bit precision. */ |
| 3534 | RSPSignExtend16to32(vsReg, &vsRegLo, &vsRegHi); |
| 3535 | RSPZeroExtend16to32(vtReg, &vtRegLo, &vtRegHi); |
| 3536 | |
| 3537 | /* Begin accumulating the products. */ |
| 3538 | __m128i loProduct = _mm_mullo_epi32(vsRegLo, vtRegLo); |
| 3539 | __m128i hiProduct = _mm_mullo_epi32(vsRegHi, vtRegHi); |
| 3540 | m_accum_l = RSPPackLo32to16(loProduct, hiProduct); |
| 3541 | m_accum_m = m_xv[VDREG] = RSPPackHi32to16(loProduct, hiProduct); |
| 3542 | |
| 3543 | loProduct = _mm_cmplt_epi32(loProduct, _mm_setzero_si128()); |
| 3544 | hiProduct = _mm_cmplt_epi32(hiProduct, _mm_setzero_si128()); |
| 3545 | m_accum_h = _mm_packs_epi32(loProduct, hiProduct); |
| 3546 | } |
| 3547 | |
| 3548 | static void cfunc_rsp_vmudm_simd(void *param) |
| 3549 | { |
| 3550 | ((rsp_device *)param)->ccfunc_rsp_vmudm_simd(); |
| 3551 | } |
| 3552 | #endif |
| 3553 | |
| 3554 | #if (!USE_SIMD || SIMUL_SIMD) |
| 3555 | |
| 3556 | inline void rsp_device::ccfunc_rsp_vmudm_scalar() |
| 3557 | { |
| 3558 | int op = m_rsp_state->arg0; |
| 3559 | |
| 3560 | INT16 vres[8]; |
| 3561 | for (int i = 0; i < 8; i++) |
| 3562 | { |
| 3563 | UINT16 w1, w2; |
| 3564 | SCALAR_GET_VS1(w1, i); |
| 3565 | SCALAR_GET_VS2(w2, i); |
| 3566 | INT32 s1 = (INT32)(INT16)w1; |
| 3567 | INT32 s2 = (UINT16)w2; |
| 3568 | |
| 3569 | INT32 r = s1 * s2; |
| 3570 | |
| 3571 | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); // sign-extend to 48-bit |
| 3572 | SET_ACCUM_M((INT16)(r >> 16), i); |
| 3573 | SET_ACCUM_L((UINT16)r, i); |
| 3574 | |
| 3575 | vres[i] = ACCUM_M(i); |
| 3576 | } |
| 3577 | WRITEBACK_RESULT(); |
| 3578 | } |
| 3579 | |
| 3580 | static void cfunc_rsp_vmudm_scalar(void *param) |
| 3581 | { |
| 3582 | ((rsp_device *)param)->ccfunc_rsp_vmudm_scalar(); |
| 3583 | } |
| 3584 | #endif |
| 3585 | |
| 3586 | #if USE_SIMD |
| 3587 | // VMUDN |
| 3588 | // |
| 3589 | // 31 25 24 20 15 10 5 0 |
| 3590 | // ------------------------------------------------------ |
| 3591 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000110 | |
| 3592 | // ------------------------------------------------------ |
| 3593 | // |
| 3594 | // Multiplies unsigned fraction by signed integer |
| 3595 | // The result is stored into accumulator |
| 3596 | // The low slice of accumulator is stored into destination element |
| 3597 | |
| 3598 | inline void rsp_device::ccfunc_rsp_vmudn_simd() |
| 3599 | { |
| 3600 | int op = m_rsp_state->arg0; |
| 3601 | |
| 3602 | __m128i vsRegLo, vsRegHi, vtRegLo, vtRegHi; |
| 3603 | |
| 3604 | __m128i vsReg = m_xv[VS1REG]; |
| 3605 | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3606 | |
| 3607 | /* Unpack to obtain for 32-bit precision. */ |
| 3608 | RSPZeroExtend16to32(vsReg, &vsRegLo, &vsRegHi); |
| 3609 | RSPSignExtend16to32(vtReg, &vtRegLo, &vtRegHi); |
| 3610 | |
| 3611 | /* Begin accumulating the products. */ |
| 3612 | __m128i loProduct = _mm_mullo_epi32(vsRegLo, vtRegLo); |
| 3613 | __m128i hiProduct = _mm_mullo_epi32(vsRegHi, vtRegHi); |
| 3614 | m_xv[VDREG] = m_accum_l = RSPPackLo32to16(loProduct, hiProduct); |
| 3615 | m_accum_m = RSPPackHi32to16(loProduct, hiProduct); |
| 3616 | m_accum_h = _mm_cmplt_epi16(m_accum_m, _mm_setzero_si128()); |
| 3617 | } |
| 3618 | |
| 3619 | static void cfunc_rsp_vmudn_simd(void *param) |
| 3620 | { |
| 3621 | ((rsp_device *)param)->ccfunc_rsp_vmudn_simd(); |
| 3622 | } |
| 3623 | #endif |
| 3624 | |
| 3625 | #if (!USE_SIMD || SIMUL_SIMD) |
| 3626 | |
| 3627 | inline void rsp_device::ccfunc_rsp_vmudn_scalar() |
| 3628 | { |
| 3629 | int op = m_rsp_state->arg0; |
| 3630 | |
| 3631 | INT16 vres[8] = { 0 }; |
| 3632 | for (int i = 0; i < 8; i++) |
| 3633 | { |
| 3634 | UINT16 w1, w2; |
| 3635 | SCALAR_GET_VS1(w1, i); |
| 3636 | SCALAR_GET_VS2(w2, i); |
| 3637 | INT32 s1 = (UINT16)w1; |
| 3638 | INT32 s2 = (INT32)(INT16)w2; |
| 3639 | |
| 3640 | INT32 r = s1 * s2; |
| 3641 | |
| 3642 | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); // sign-extend to 48-bit |
| 3643 | SET_ACCUM_M((INT16)(r >> 16), i); |
| 3644 | SET_ACCUM_L((UINT16)(r), i); |
| 3645 | |
| 3646 | vres[i] = (UINT16)(r); |
| 3647 | } |
| 3648 | WRITEBACK_RESULT(); |
| 3649 | } |
| 3650 | |
| 3651 | static void cfunc_rsp_vmudn_scalar(void *param) |
| 3652 | { |
| 3653 | ((rsp_device *)param)->ccfunc_rsp_vmudn_scalar(); |
| 3654 | } |
| 3655 | #endif |
| 3656 | |
| 3657 | #if USE_SIMD |
| 3658 | // VMUDH |
| 3659 | // |
| 3660 | // 31 25 24 20 15 10 5 0 |
| 3661 | // ------------------------------------------------------ |
| 3662 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000111 | |
| 3663 | // ------------------------------------------------------ |
| 3664 | // |
| 3665 | // Multiplies signed integer by signed integer |
| 3666 | // The result is stored into highest 32 bits of accumulator, the low slice is zero |
| 3667 | // The highest 32 bits of accumulator is saturated into destination element |
| 3668 | |
| 3669 | inline void rsp_device::ccfunc_rsp_vmudh_simd() |
| 3670 | { |
| 3671 | int op = m_rsp_state->arg0; |
| 3672 | |
| 3673 | __m128i vaccLow, vaccHigh; |
| 3674 | __m128i unpackLo, unpackHi; |
| 3675 | |
| 3676 | __m128i vsReg = m_xv[VS1REG]; |
| 3677 | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3678 | |
| 3679 | /* Multiply the sources, accumulate the product. */ |
| 3680 | unpackLo = _mm_mullo_epi16(vsReg, vtReg); |
| 3681 | unpackHi = _mm_mulhi_epi16(vsReg, vtReg); |
| 3682 | vaccHigh = _mm_unpackhi_epi16(unpackLo, unpackHi); |
| 3683 | vaccLow = _mm_unpacklo_epi16(unpackLo, unpackHi); |
| 3684 | |
| 3685 | /* Pack the accumulator and result back up. */ |
| 3686 | m_xv[VDREG] = _mm_packs_epi32(vaccLow, vaccHigh); |
| 3687 | m_accum_l = _mm_setzero_si128(); |
| 3688 | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 3689 | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 3690 | } |
| 3691 | |
| 3692 | static void cfunc_rsp_vmudh_simd(void *param) |
| 3693 | { |
| 3694 | ((rsp_device *)param)->ccfunc_rsp_vmudh_simd(); |
| 3695 | } |
| 3696 | #endif |
| 3697 | |
| 3698 | #if (!USE_SIMD || SIMUL_SIMD) |
| 3699 | |
| 3700 | inline void rsp_device::ccfunc_rsp_vmudh_scalar() |
| 3701 | { |
| 3702 | int op = m_rsp_state->arg0; |
| 3703 | |
| 3704 | INT16 vres[8]; |
| 3705 | for (int i = 0; i < 8; i++) |
| 3706 | { |
| 3707 | UINT16 w1, w2; |
| 3708 | SCALAR_GET_VS1(w1, i); |
| 3709 | SCALAR_GET_VS2(w2, i); |
| 3710 | INT32 s1 = (INT32)(INT16)w1; |
| 3711 | INT32 s2 = (INT32)(INT16)w2; |
| 3712 | |
| 3713 | INT32 r = s1 * s2; |
| 3714 | |
| 3715 | SET_ACCUM_H((INT16)(r >> 16), i); |
| 3716 | SET_ACCUM_M((UINT16)(r), i); |
| 3717 | SET_ACCUM_L(0, i); |
| 3718 | |
| 3719 | if (r < -32768) r = -32768; |
| 3720 | if (r > 32767) r = 32767; |
| 3721 | vres[i] = (INT16)(r); |
| 3722 | } |
| 3723 | WRITEBACK_RESULT(); |
| 3724 | } |
| 3725 | |
| 3726 | static void cfunc_rsp_vmudh_scalar(void *param) |
| 3727 | { |
| 3728 | ((rsp_device *)param)->ccfunc_rsp_vmudh_scalar(); |
| 3729 | } |
| 3730 | #endif |
| 3731 | |
| 3732 | #if USE_SIMD |
| 3733 | // VMACF |
| 3734 | // |
| 3735 | // 31 25 24 20 15 10 5 0 |
| 3736 | // ------------------------------------------------------ |
| 3737 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001000 | |
| 3738 | // ------------------------------------------------------ |
| 3739 | // |
| 3740 | |
| 3741 | inline void rsp_device::ccfunc_rsp_vmacf_simd() |
| 3742 | { |
| 3743 | int op = m_rsp_state->arg0; |
| 3744 | |
| 3745 | INT16 vres[8]; |
| 3746 | for (int i = 0; i < 8; i++) |
| 3747 | { |
| 3748 | UINT16 w1, w2; |
| 3749 | VEC_GET_SCALAR_VS1(w1, i); |
| 3750 | VEC_GET_SCALAR_VS2(w2, i); |
| 3751 | INT32 s1 = (INT32)(INT16)w1; |
| 3752 | INT32 s2 = (INT32)(INT16)w2; |
| 3753 | |
| 3754 | INT32 r = s1 * s2; |
| 3755 | |
| 3756 | UINT64 q = (UINT64)(UINT16)VEC_ACCUM_LL(i); |
| 3757 | q |= (((UINT64)(UINT16)VEC_ACCUM_L(i)) << 16); |
| 3758 | q |= (((UINT64)(UINT16)VEC_ACCUM_M(i)) << 32); |
| 3759 | q |= (((UINT64)(UINT16)VEC_ACCUM_H(i)) << 48); |
| 3760 | |
| 3761 | q += (INT64)(r) << 17; |
| 3762 | VEC_SET_ACCUM_LL((UINT16)q, i); |
| 3763 | VEC_SET_ACCUM_L((UINT16)(q >> 16), i); |
| 3764 | VEC_SET_ACCUM_M((UINT16)(q >> 32), i); |
| 3765 | VEC_SET_ACCUM_H((UINT16)(q >> 48), i); |
| 3766 | |
| 3767 | vres[i] = VEC_SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 3768 | } |
| 3769 | VEC_WRITEBACK_RESULT(); |
| 3770 | /* |
| 3771 | __m128i loProduct, hiProduct, unpackLo, unpackHi; |
| 3772 | __m128i vaccHigh; |
| 3773 | __m128i vdReg, vdRegLo, vdRegHi; |
| 3774 | |
| 3775 | __m128i vsReg = m_xv[VS1REG]; |
| 3776 | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3777 | |
| 3778 | __m128i vaccLow = m_accum_l; |
| 3779 | |
| 3780 | // Unpack to obtain for 32-bit precision. |
| 3781 | RSPZeroExtend16to32(vaccLow, &vaccLow, &vaccHigh); |
| 3782 | |
| 3783 | // Begin accumulating the products. |
| 3784 | unpackLo = _mm_mullo_epi16(vsReg, vtReg); |
| 3785 | unpackHi = _mm_mulhi_epi16(vsReg, vtReg); |
| 3786 | loProduct = _mm_unpacklo_epi16(unpackLo, unpackHi); |
| 3787 | hiProduct = _mm_unpackhi_epi16(unpackLo, unpackHi); |
| 3788 | loProduct = _mm_slli_epi32(loProduct, 1); |
| 3789 | hiProduct = _mm_slli_epi32(hiProduct, 1); |
| 3790 | |
| 3791 | vdRegLo = _mm_srli_epi32(loProduct, 16); |
| 3792 | vdRegHi = _mm_srli_epi32(hiProduct, 16); |
| 3793 | vdRegLo = _mm_slli_epi32(vdRegLo, 16); |
| 3794 | vdRegHi = _mm_slli_epi32(vdRegHi, 16); |
| 3795 | vdRegLo = _mm_xor_si128(vdRegLo, loProduct); |
| 3796 | vdRegHi = _mm_xor_si128(vdRegHi, hiProduct); |
| 3797 | |
| 3798 | vaccLow = _mm_add_epi32(vaccLow, vdRegLo); |
| 3799 | vaccHigh = _mm_add_epi32(vaccHigh, vdRegHi); |
| 3800 | |
| 3801 | m_accum_l = vdReg = RSPPackLo32to16(vaccLow, vaccHigh); |
| 3802 | |
| 3803 | // Multiply the MSB of sources, accumulate the product. |
| 3804 | vdRegLo = _mm_unpacklo_epi16(m_accum_m, m_accum_h); |
| 3805 | vdRegHi = _mm_unpackhi_epi16(m_accum_m, m_accum_h); |
| 3806 | |
| 3807 | loProduct = _mm_srai_epi32(loProduct, 16); |
| 3808 | hiProduct = _mm_srai_epi32(hiProduct, 16); |
| 3809 | vaccLow = _mm_srai_epi32(vaccLow, 16); |
| 3810 | vaccHigh = _mm_srai_epi32(vaccHigh, 16); |
| 3811 | |
| 3812 | vaccLow = _mm_add_epi32(loProduct, vaccLow); |
| 3813 | vaccHigh = _mm_add_epi32(hiProduct, vaccHigh); |
| 3814 | vaccLow = _mm_add_epi32(vdRegLo, vaccLow); |
| 3815 | vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh); |
| 3816 | |
| 3817 | // Clamp the accumulator and write it all out. |
| 3818 | m_xv[VDREG] = _mm_packs_epi32(vaccLow, vaccHigh); |
| 3819 | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 3820 | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 3821 | */ |
| 3822 | } |
| 3823 | |
| 3824 | static void cfunc_rsp_vmacf_simd(void *param) |
| 3825 | { |
| 3826 | ((rsp_device *)param)->ccfunc_rsp_vmacf_simd(); |
| 3827 | } |
| 3828 | #endif |
| 3829 | |
| 3830 | #if (!USE_SIMD || SIMUL_SIMD) |
| 3831 | |
| 3832 | inline void rsp_device::ccfunc_rsp_vmacf_scalar() |
| 3833 | { |
| 3834 | int op = m_rsp_state->arg0; |
| 3835 | |
| 3836 | INT16 vres[8]; |
| 3837 | for (int i = 0; i < 8; i++) |
| 3838 | { |
| 3839 | UINT16 w1, w2; |
| 3840 | SCALAR_GET_VS1(w1, i); |
| 3841 | SCALAR_GET_VS2(w2, i); |
| 3842 | INT32 s1 = (INT32)(INT16)w1; |
| 3843 | INT32 s2 = (INT32)(INT16)w2; |
| 3844 | |
| 3845 | INT32 r = s1 * s2; |
| 3846 | |
| 3847 | UINT64 q = (UINT64)(UINT16)ACCUM_LL(i); |
| 3848 | q |= (((UINT64)(UINT16)ACCUM_L(i)) << 16); |
| 3849 | q |= (((UINT64)(UINT16)ACCUM_M(i)) << 32); |
| 3850 | q |= (((UINT64)(UINT16)ACCUM_H(i)) << 48); |
| 3851 | |
| 3852 | q += (INT64)(r) << 17; |
| 3853 | SET_ACCUM_LL((UINT16)q, i); |
| 3854 | SET_ACCUM_L((UINT16)(q >> 16), i); |
| 3855 | SET_ACCUM_M((UINT16)(q >> 32), i); |
| 3856 | SET_ACCUM_H((UINT16)(q >> 48), i); |
| 3857 | |
| 3858 | vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 3859 | } |
| 3860 | WRITEBACK_RESULT(); |
| 3861 | } |
| 3862 | |
| 3863 | static void cfunc_rsp_vmacf_scalar(void *param) |
| 3864 | { |
| 3865 | ((rsp_device *)param)->ccfunc_rsp_vmacf_scalar(); |
| 3866 | } |
| 3867 | #endif |
| 3868 | |
| 3869 | #if USE_SIMD |
| 3870 | // VMACU |
| 3871 | // |
| 3872 | // 31 25 24 20 15 10 5 0 |
| 3873 | // ------------------------------------------------------ |
| 3874 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001001 | |
| 3875 | // ------------------------------------------------------ |
| 3876 | // |
| 3877 | |
| 3878 | inline void rsp_device::ccfunc_rsp_vmacu_simd() |
| 3879 | { |
| 3880 | int op = m_rsp_state->arg0; |
| 3881 | |
| 3882 | __m128i loProduct, hiProduct, unpackLo, unpackHi; |
| 3883 | __m128i vaccHigh; |
| 3884 | __m128i vdReg, vdRegLo, vdRegHi; |
| 3885 | |
| 3886 | __m128i vsReg = m_xv[VS1REG]; |
| 3887 | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3888 | |
| 3889 | __m128i vaccLow = m_accum_l; |
| 3890 | |
| 3891 | /* Unpack to obtain for 32-bit precision. */ |
| 3892 | RSPZeroExtend16to32(vaccLow, &vaccLow, &vaccHigh); |
| 3893 | |
| 3894 | /* Begin accumulating the products. */ |
| 3895 | unpackLo = _mm_mullo_epi16(vsReg, vtReg); |
| 3896 | unpackHi = _mm_mulhi_epi16(vsReg, vtReg); |
| 3897 | loProduct = _mm_unpacklo_epi16(unpackLo, unpackHi); |
| 3898 | hiProduct = _mm_unpackhi_epi16(unpackLo, unpackHi); |
| 3899 | loProduct = _mm_slli_epi32(loProduct, 1); |
| 3900 | hiProduct = _mm_slli_epi32(hiProduct, 1); |
| 3901 | |
| 3902 | vdRegLo = _mm_srli_epi32(loProduct, 16); |
| 3903 | vdRegHi = _mm_srli_epi32(hiProduct, 16); |
| 3904 | vdRegLo = _mm_slli_epi32(vdRegLo, 16); |
| 3905 | vdRegHi = _mm_slli_epi32(vdRegHi, 16); |
| 3906 | vdRegLo = _mm_xor_si128(vdRegLo, loProduct); |
| 3907 | vdRegHi = _mm_xor_si128(vdRegHi, hiProduct); |
| 3908 | |
| 3909 | vaccLow = _mm_add_epi32(vaccLow, vdRegLo); |
| 3910 | vaccHigh = _mm_add_epi32(vaccHigh, vdRegHi); |
| 3911 | |
| 3912 | m_accum_l = vdReg = RSPPackLo32to16(vaccLow, vaccHigh); |
| 3913 | |
| 3914 | /* Multiply the MSB of sources, accumulate the product. */ |
| 3915 | vdRegLo = _mm_unpacklo_epi16(m_accum_m, m_accum_h); |
| 3916 | vdRegHi = _mm_unpackhi_epi16(m_accum_m, m_accum_h); |
| 3917 | |
| 3918 | loProduct = _mm_srai_epi32(loProduct, 16); |
| 3919 | hiProduct = _mm_srai_epi32(hiProduct, 16); |
| 3920 | vaccLow = _mm_srai_epi32(vaccLow, 16); |
| 3921 | vaccHigh = _mm_srai_epi32(vaccHigh, 16); |
| 3922 | |
| 3923 | vaccLow = _mm_add_epi32(loProduct, vaccLow); |
| 3924 | vaccHigh = _mm_add_epi32(hiProduct, vaccHigh); |
| 3925 | vaccLow = _mm_add_epi32(vdRegLo, vaccLow); |
| 3926 | vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh); |
| 3927 | |
| 3928 | /* Clamp the accumulator and write it all out. */ |
| 3929 | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 3930 | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 3931 | } |
| 3932 | |
| 3933 | static void cfunc_rsp_vmacu_simd(void *param) |
| 3934 | { |
| 3935 | ((rsp_device *)param)->ccfunc_rsp_vmacu_simd(); |
| 3936 | } |
| 3937 | #endif |
| 3938 | |
| 3939 | #if (!USE_SIMD || SIMUL_SIMD) |
| 3940 | |
| 3941 | inline void rsp_device::ccfunc_rsp_vmacu_scalar() |
| 3942 | { |
| 3943 | int op = m_rsp_state->arg0; |
| 3944 | |
| 3945 | INT16 vres[8]; |
| 3946 | for (int i = 0; i < 8; i++) |
| 3947 | { |
| 3948 | UINT16 w1, w2; |
| 3949 | SCALAR_GET_VS1(w1, i); |
| 3950 | SCALAR_GET_VS2(w2, i); |
| 3951 | INT32 s1 = (INT32)(INT16)w1; |
| 3952 | INT32 s2 = (INT32)(INT16)w2; |
| 3953 | |
| 3954 | INT32 r1 = s1 * s2; |
| 3955 | UINT32 r2 = (UINT16)ACCUM_L(i) + ((UINT16)(r1) * 2); |
| 3956 | UINT32 r3 = (UINT16)ACCUM_M(i) + (UINT16)((r1 >> 16) * 2) + (UINT16)(r2 >> 16); |
| 3957 | |
| 3958 | SET_ACCUM_L((UINT16)(r2), i); |
| 3959 | SET_ACCUM_M((UINT16)(r3), i); |
| 3960 | SET_ACCUM_H(ACCUM_H(i) + (UINT16)(r3 >> 16) + (UINT16)(r1 >> 31), i); |
| 3961 | |
| 3962 | if ((INT16)ACCUM_H(i) < 0) |
| 3963 | { |
| 3964 | vres[i] = 0; |
| 3965 | } |
| 3966 | else |
| 3967 | { |
| 3968 | if (ACCUM_H(i) != 0) |
| 3969 | { |
| 3970 | vres[i] = (INT16)0xffff; |
| 3971 | } |
| 3972 | else |
| 3973 | { |
| 3974 | if ((INT16)ACCUM_M(i) < 0) |
| 3975 | { |
| 3976 | vres[i] = (INT16)0xffff; |
| 3977 | } |
| 3978 | else |
| 3979 | { |
| 3980 | vres[i] = ACCUM_M(i); |
| 3981 | } |
| 3982 | } |
| 3983 | } |
| 3984 | } |
| 3985 | WRITEBACK_RESULT(); |
| 3986 | } |
| 3987 | |
| 3988 | static void cfunc_rsp_vmacu_scalar(void *param) |
| 3989 | { |
| 3990 | ((rsp_device *)param)->ccfunc_rsp_vmacu_scalar(); |
| 3991 | } |
| 3992 | #endif |
| 3993 | |
| 3994 | #if USE_SIMD |
| 3995 | // VMADL |
| 3996 | // |
| 3997 | // 31 25 24 20 15 10 5 0 |
| 3998 | // ------------------------------------------------------ |
| 3999 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001100 | |
| 4000 | // ------------------------------------------------------ |
| 4001 | // |
| 4002 | // Multiplies unsigned fraction by unsigned fraction |
| 4003 | // Adds the higher 16 bits of the 32-bit result to accumulator |
| 4004 | // The low slice of accumulator is stored into destination element |
| 4005 | |
| 4006 | inline void rsp_device::ccfunc_rsp_vmadl_simd() |
| 4007 | { |
| 4008 | int op = m_rsp_state->arg0; |
| 4009 | |
| 4010 | INT16 vres[8]; |
| 4011 | for (int i = 0; i < 8; i++) |
| 4012 | { |
| 4013 | UINT16 w1, w2; |
| 4014 | VEC_GET_SCALAR_VS1(w1, i); |
| 4015 | VEC_GET_SCALAR_VS2(w2, i); |
| 4016 | UINT32 s1 = w1; |
| 4017 | UINT32 s2 = w2; |
| 4018 | |
| 4019 | UINT32 r1 = s1 * s2; |
| 4020 | UINT32 r2 = (UINT16)VEC_ACCUM_L(i) + (r1 >> 16); |
| 4021 | UINT32 r3 = (UINT16)VEC_ACCUM_M(i) + (r2 >> 16); |
| 4022 | |
| 4023 | VEC_SET_ACCUM_L((UINT16)r2, i); |
| 4024 | VEC_SET_ACCUM_M((UINT16)r3, i); |
| 4025 | VEC_SET_ACCUM_H(VEC_ACCUM_H(i) + (INT16)(r3 >> 16), i); |
| 4026 | |
| 4027 | vres[i] = VEC_SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 4028 | } |
| 4029 | VEC_WRITEBACK_RESULT(); |
| 4030 | |
| 4031 | /*__m128i vaccHigh; |
| 4032 | __m128i unpackHi, loProduct, hiProduct; |
| 4033 | __m128i vdReg, vdRegLo, vdRegHi; |
| 4034 | |
| 4035 | __m128i vsReg = m_xv[VS1REG]; |
| 4036 | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4037 | |
| 4038 | __m128i vaccLow = m_accum_l; |
| 4039 | |
| 4040 | // Unpack to obtain for 32-bit precision. |
| 4041 | RSPZeroExtend16to32(vaccLow, &vaccLow, &vaccHigh); |
| 4042 | |
| 4043 | // Begin accumulating the products. |
| 4044 | unpackHi = _mm_mulhi_epu16(vsReg, vtReg); |
| 4045 | loProduct = _mm_unpacklo_epi16(unpackHi, _mm_setzero_si128()); |
| 4046 | hiProduct = _mm_unpackhi_epi16(unpackHi, _mm_setzero_si128()); |
| 4047 | |
| 4048 | vaccLow = _mm_add_epi32(vaccLow, loProduct); |
| 4049 | vaccHigh = _mm_add_epi32(vaccHigh, hiProduct); |
| 4050 | m_accum_l = vdReg = RSPPackLo32to16(vaccLow, vaccHigh); |
| 4051 | |
| 4052 | // Finish accumulating whatever is left. |
| 4053 | vdRegLo = _mm_unpacklo_epi16(m_accum_m, m_accum_h); |
| 4054 | vdRegHi = _mm_unpackhi_epi16(m_accum_m, m_accum_h); |
| 4055 | |
| 4056 | vaccLow = _mm_srai_epi32(vaccLow, 16); |
| 4057 | vaccHigh = _mm_srai_epi32(vaccHigh, 16); |
| 4058 | vaccLow = _mm_add_epi32(vdRegLo, vaccLow); |
| 4059 | vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh); |
| 4060 | |
| 4061 | // Clamp the accumulator and write it all out. |
| 4062 | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 4063 | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 4064 | m_xv[VDREG] = RSPClampLowToVal(vdReg, m_accum_m, m_accum_h);*/ |
| 4065 | } |
| 4066 | |
| 4067 | static void cfunc_rsp_vmadl_simd(void *param) |
| 4068 | { |
| 4069 | ((rsp_device *)param)->ccfunc_rsp_vmadl_simd(); |
| 4070 | } |
| 4071 | #endif |
| 4072 | |
| 4073 | #if (!USE_SIMD || SIMUL_SIMD) |
| 4074 | |
| 4075 | inline void rsp_device::ccfunc_rsp_vmadl_scalar() |
| 4076 | { |
| 4077 | int op = m_rsp_state->arg0; |
| 4078 | |
| 4079 | INT16 vres[8]; |
| 4080 | for (int i = 0; i < 8; i++) |
| 4081 | { |
| 4082 | UINT16 w1, w2; |
| 4083 | SCALAR_GET_VS1(w1, i); |
| 4084 | SCALAR_GET_VS2(w2, i); |
| 4085 | UINT32 s1 = w1; |
| 4086 | UINT32 s2 = w2; |
| 4087 | |
| 4088 | UINT32 r1 = s1 * s2; |
| 4089 | UINT32 r2 = (UINT16)ACCUM_L(i) + (r1 >> 16); |
| 4090 | UINT32 r3 = (UINT16)ACCUM_M(i) + (r2 >> 16); |
| 4091 | |
| 4092 | SET_ACCUM_L((UINT16)r2, i); |
| 4093 | SET_ACCUM_M((UINT16)r3, i); |
| 4094 | SET_ACCUM_H(ACCUM_H(i) + (INT16)(r3 >> 16), i); |
| 4095 | |
| 4096 | vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 4097 | } |
| 4098 | WRITEBACK_RESULT(); |
| 4099 | } |
| 4100 | |
| 4101 | static void cfunc_rsp_vmadl_scalar(void *param) |
| 4102 | { |
| 4103 | ((rsp_device *)param)->ccfunc_rsp_vmadl_scalar(); |
| 4104 | } |
| 4105 | #endif |
| 4106 | |
| 4107 | #if USE_SIMD |
| 4108 | // VMADM |
| 4109 | // |
| 4110 | |
| 4111 | inline void rsp_device::ccfunc_rsp_vmadm_simd() |
| 4112 | { |
| 4113 | int op = m_rsp_state->arg0; |
| 4114 | |
| 4115 | __m128i vaccLow, vaccHigh, loProduct, hiProduct; |
| 4116 | __m128i vsRegLo, vsRegHi, vtRegLo, vtRegHi, vdRegLo, vdRegHi; |
| 4117 | |
| 4118 | __m128i vsReg = m_xv[VS1REG]; |
| 4119 | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4120 | |
| 4121 | /* Unpack to obtain for 32-bit precision. */ |
| 4122 | RSPSignExtend16to32(vsReg, &vsRegLo, &vsRegHi); |
| 4123 | RSPZeroExtend16to32(vtReg, &vtRegLo, &vtRegHi); |
| 4124 | RSPZeroExtend16to32(m_accum_l, &vaccLow, &vaccHigh); |
| 4125 | |
| 4126 | /* Begin accumulating the products. */ |
| 4127 | loProduct = _mm_mullo_epi32(vsRegLo, vtRegLo); |
| 4128 | hiProduct = _mm_mullo_epi32(vsRegHi, vtRegHi); |
| 4129 | |
| 4130 | vdRegLo = _mm_srli_epi32(loProduct, 16); |
| 4131 | vdRegHi = _mm_srli_epi32(hiProduct, 16); |
| 4132 | vdRegLo = _mm_slli_epi32(vdRegLo, 16); |
| 4133 | vdRegHi = _mm_slli_epi32(vdRegHi, 16); |
| 4134 | vdRegLo = _mm_xor_si128(vdRegLo, loProduct); |
| 4135 | vdRegHi = _mm_xor_si128(vdRegHi, hiProduct); |
| 4136 | vaccLow = _mm_add_epi32(vaccLow, vdRegLo); |
| 4137 | vaccHigh = _mm_add_epi32(vaccHigh, vdRegHi); |
| 4138 | |
| 4139 | m_accum_l = m_xv[VDREG] = RSPPackLo32to16(vaccLow, vaccHigh); |
| 4140 | |
| 4141 | /* Multiply the MSB of sources, accumulate the product. */ |
| 4142 | vdRegLo = _mm_unpacklo_epi16(m_accum_m, m_accum_h); |
| 4143 | vdRegHi = _mm_unpackhi_epi16(m_accum_m, m_accum_h); |
| 4144 | |
| 4145 | loProduct = _mm_srai_epi32(loProduct, 16); |
| 4146 | hiProduct = _mm_srai_epi32(hiProduct, 16); |
| 4147 | vaccLow = _mm_srai_epi32(vaccLow, 16); |
| 4148 | vaccHigh = _mm_srai_epi32(vaccHigh, 16); |
| 4149 | |
| 4150 | vaccLow = _mm_add_epi32(loProduct, vaccLow); |
| 4151 | vaccHigh = _mm_add_epi32(hiProduct, vaccHigh); |
| 4152 | vaccLow = _mm_add_epi32(vdRegLo, vaccLow); |
| 4153 | vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh); |
| 4154 | |
| 4155 | /* Clamp the accumulator and write it all out. */ |
| 4156 | m_xv[VDREG] = _mm_packs_epi32(vaccLow, vaccHigh); |
| 4157 | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 4158 | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 4159 | } |
| 4160 | |
| 4161 | static void cfunc_rsp_vmadm_simd(void *param) |
| 4162 | { |
| 4163 | ((rsp_device *)param)->ccfunc_rsp_vmadm_simd(); |
| 4164 | } |
| 4165 | #endif |
| 4166 | |
| 4167 | #if (!USE_SIMD || SIMUL_SIMD) |
| 4168 | |
| 4169 | inline void rsp_device::ccfunc_rsp_vmadm_scalar() |
| 4170 | { |
| 4171 | int op = m_rsp_state->arg0; |
| 4172 | |
| 4173 | INT16 vres[8]; |
| 4174 | for (int i = 0; i < 8; i++) |
| 4175 | { |
| 4176 | UINT16 w1, w2; |
| 4177 | SCALAR_GET_VS1(w1, i); |
| 4178 | SCALAR_GET_VS2(w2, i); |
| 4179 | UINT32 s1 = (INT32)(INT16)w1; |
| 4180 | UINT32 s2 = (UINT16)w2; |
| 4181 | |
| 4182 | UINT32 r1 = s1 * s2; |
| 4183 | UINT32 r2 = (UINT16)ACCUM_L(i) + (UINT16)(r1); |
| 4184 | UINT32 r3 = (UINT16)ACCUM_M(i) + (r1 >> 16) + (r2 >> 16); |
| 4185 | |
| 4186 | SET_ACCUM_L((UINT16)r2, i); |
| 4187 | SET_ACCUM_M((UINT16)r3, i); |
| 4188 | SET_ACCUM_H((UINT16)ACCUM_H(i) + (UINT16)(r3 >> 16), i); |
| 4189 | if ((INT32)(r1) < 0) |
| 4190 | { |
| 4191 | SET_ACCUM_H((UINT16)ACCUM_H(i) - 1, i); |
| 4192 | } |
| 4193 | |
| 4194 | vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 4195 | } |
| 4196 | WRITEBACK_RESULT(); |
| 4197 | } |
| 4198 | |
| 4199 | static void cfunc_rsp_vmadm_scalar(void *param) |
| 4200 | { |
| 4201 | ((rsp_device *)param)->ccfunc_rsp_vmadm_scalar(); |
| 4202 | } |
| 4203 | #endif |
| 4204 | |
| 4205 | #if USE_SIMD |
| 4206 | // VMADN |
| 4207 | // |
| 4208 | |
| 4209 | inline void rsp_device::ccfunc_rsp_vmadn_simd() |
| 4210 | { |
| 4211 | int op = m_rsp_state->arg0; |
| 4212 | |
| 4213 | INT16 vres[8]; |
| 4214 | for (int i = 0; i < 8; i++) |
| 4215 | { |
| 4216 | UINT16 w1, w2; |
| 4217 | VEC_GET_SCALAR_VS1(w1, i); |
| 4218 | VEC_GET_SCALAR_VS2(w2, i); |
| 4219 | INT32 s1 = (UINT16)w1; |
| 4220 | INT32 s2 = (INT32)(INT16)w2; |
| 4221 | |
| 4222 | UINT64 q = (UINT64)VEC_ACCUM_LL(i); |
| 4223 | q |= (((UINT64)VEC_ACCUM_L(i)) << 16); |
| 4224 | q |= (((UINT64)VEC_ACCUM_M(i)) << 32); |
| 4225 | q |= (((UINT64)VEC_ACCUM_H(i)) << 48); |
| 4226 | q += (INT64)(s1*s2) << 16; |
| 4227 | |
| 4228 | VEC_SET_ACCUM_LL((UINT16)q, i); |
| 4229 | VEC_SET_ACCUM_L((UINT16)(q >> 16), i); |
| 4230 | VEC_SET_ACCUM_M((UINT16)(q >> 32), i); |
| 4231 | VEC_SET_ACCUM_H((UINT16)(q >> 48), i); |
| 4232 | |
| 4233 | vres[i] = VEC_SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 4234 | } |
| 4235 | VEC_WRITEBACK_RESULT(); |
| 4236 | } |
| 4237 | /*INLINE void cfunc_rsp_vmadn_simd(void *param) |
| 4238 | { |
| 4239 | rsp_state *rsp = (rsp_state*)param; |
| 4240 | int op = m_rsp_state->arg0; |
| 4241 | |
| 4242 | __m128i vaccLow, vaccHigh, loProduct, hiProduct; |
| 4243 | __m128i vsRegLo, vsRegHi, vtRegLo, vtRegHi, vdRegLo, vdRegHi; |
| 4244 | |
| 4245 | __m128i vsReg = m_xv[VS1REG]; |
| 4246 | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4247 | |
| 4248 | vaccLow = m_accum_l; |
| 4249 | |
| 4250 | RSPZeroExtend16to32(vsReg, &vsRegLo, &vsRegHi); |
| 4251 | RSPSignExtend16to32(vtReg, &vtRegLo, &vtRegHi); |
| 4252 | RSPZeroExtend16to32(vaccLow, &vaccLow, &vaccHigh); |
| 4253 | |
| 4254 | // Begin accumulating the products. |
| 4255 | loProduct = _mm_mullo_epi32(vsRegLo, vtRegLo); |
| 4256 | hiProduct = _mm_mullo_epi32(vsRegHi, vtRegHi); |
| 4257 | |
| 4258 | vdRegLo = _mm_srli_epi32(loProduct, 16); |
| 4259 | vdRegHi = _mm_srli_epi32(hiProduct, 16); |
| 4260 | vdRegLo = _mm_slli_epi32(vdRegLo, 16); |
| 4261 | vdRegHi = _mm_slli_epi32(vdRegHi, 16); |
| 4262 | vdRegLo = _mm_xor_si128(vdRegLo, loProduct); |
| 4263 | vdRegHi = _mm_xor_si128(vdRegHi, hiProduct); |
| 4264 | |
| 4265 | vaccLow = _mm_add_epi32(vaccLow, vdRegLo); |
| 4266 | vaccHigh = _mm_add_epi32(vaccHigh, vdRegHi); |
| 4267 | |
| 4268 | m_accum_l = RSPPackLo32to16(vaccLow, vaccHigh); |
| 4269 | |
| 4270 | // Multiply the MSB of sources, accumulate the product. |
| 4271 | vdRegLo = _mm_unpacklo_epi16(m_accum_m, m_accum_h); |
| 4272 | vdRegHi = _mm_unpackhi_epi16(m_accum_m, m_accum_h); |
| 4273 | |
| 4274 | loProduct = _mm_srai_epi32(loProduct, 16); |
| 4275 | hiProduct = _mm_srai_epi32(hiProduct, 16); |
| 4276 | vaccLow = _mm_srai_epi32(vaccLow, 16); |
| 4277 | vaccHigh = _mm_srai_epi32(vaccHigh, 16); |
| 4278 | |
| 4279 | vaccLow = _mm_add_epi32(loProduct, vaccLow); |
| 4280 | vaccHigh = _mm_add_epi32(hiProduct, vaccHigh); |
| 4281 | vaccLow = _mm_add_epi32(vdRegLo, vaccLow); |
| 4282 | vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh); |
| 4283 | |
| 4284 | // Clamp the accumulator and write it all out. |
| 4285 | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 4286 | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 4287 | m_xv[VDREG] = RSPClampLowToVal(m_accum_l, m_accum_m, m_accum_h); |
| 4288 | }*/ |
| 4289 | |
| 4290 | static void cfunc_rsp_vmadn_simd(void *param) |
| 4291 | { |
| 4292 | ((rsp_device *)param)->ccfunc_rsp_vmadn_simd(); |
| 4293 | } |
| 4294 | #endif |
| 4295 | |
| 4296 | #if (!USE_SIMD || SIMUL_SIMD) |
| 4297 | |
| 4298 | inline void rsp_device::ccfunc_rsp_vmadn_scalar() |
| 4299 | { |
| 4300 | int op = m_rsp_state->arg0; |
| 4301 | |
| 4302 | INT16 vres[8]; |
| 4303 | for (int i = 0; i < 8; i++) |
| 4304 | { |
| 4305 | UINT16 w1, w2; |
| 4306 | SCALAR_GET_VS1(w1, i); |
| 4307 | SCALAR_GET_VS2(w2, i); |
| 4308 | INT32 s1 = (UINT16)w1; |
| 4309 | INT32 s2 = (INT32)(INT16)w2; |
| 4310 | |
| 4311 | UINT64 q = (UINT64)ACCUM_LL(i); |
| 4312 | q |= (((UINT64)ACCUM_L(i)) << 16); |
| 4313 | q |= (((UINT64)ACCUM_M(i)) << 32); |
| 4314 | q |= (((UINT64)ACCUM_H(i)) << 48); |
| 4315 | q += (INT64)(s1*s2) << 16; |
| 4316 | |
| 4317 | SET_ACCUM_LL((UINT16)q, i); |
| 4318 | SET_ACCUM_L((UINT16)(q >> 16), i); |
| 4319 | SET_ACCUM_M((UINT16)(q >> 32), i); |
| 4320 | SET_ACCUM_H((UINT16)(q >> 48), i); |
| 4321 | |
| 4322 | vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 4323 | } |
| 4324 | WRITEBACK_RESULT(); |
| 4325 | } |
| 4326 | |
| 4327 | static void cfunc_rsp_vmadn_scalar(void *param) |
| 4328 | { |
| 4329 | ((rsp_device *)param)->ccfunc_rsp_vmadn_scalar(); |
| 4330 | } |
| 4331 | #endif |
| 4332 | |
| 4333 | #if USE_SIMD |
| 4334 | // VMADH |
| 4335 | // |
| 4336 | // 31 25 24 20 15 10 5 0 |
| 4337 | // ------------------------------------------------------ |
| 4338 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001111 | |
| 4339 | // ------------------------------------------------------ |
| 4340 | // |
| 4341 | // Multiplies signed integer by signed integer |
| 4342 | // The result is added into highest 32 bits of accumulator, the low slice is zero |
| 4343 | // The highest 32 bits of accumulator is saturated into destination element |
| 4344 | |
| 4345 | inline void rsp_device::ccfunc_rsp_vmadh_simd() |
| 4346 | { |
| 4347 | int op = m_rsp_state->arg0; |
| 4348 | |
| 4349 | __m128i vsReg = m_xv[VS1REG]; |
| 4350 | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4351 | |
| 4352 | /* Unpack to obtain for 32-bit precision. */ |
| 4353 | __m128i vaccLow = _mm_unpacklo_epi16(m_accum_m, m_accum_h); |
| 4354 | __m128i vaccHigh = _mm_unpackhi_epi16(m_accum_m, m_accum_h); |
| 4355 | |
| 4356 | /* Multiply the sources, accumulate the product. */ |
| 4357 | __m128i unpackLo = _mm_mullo_epi16(vsReg, vtReg); |
| 4358 | __m128i unpackHi = _mm_mulhi_epi16(vsReg, vtReg); |
| 4359 | __m128i loProduct = _mm_unpacklo_epi16(unpackLo, unpackHi); |
| 4360 | __m128i hiProduct = _mm_unpackhi_epi16(unpackLo, unpackHi); |
| 4361 | vaccLow = _mm_add_epi32(vaccLow, loProduct); |
| 4362 | vaccHigh = _mm_add_epi32(vaccHigh, hiProduct); |
| 4363 | |
| 4364 | /* Pack the accumulator and result back up. */ |
| 4365 | m_xv[VDREG] = _mm_packs_epi32(vaccLow, vaccHigh); |
| 4366 | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 4367 | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 4368 | } |
| 4369 | |
| 4370 | static void cfunc_rsp_vmadh_simd(void *param) |
| 4371 | { |
| 4372 | ((rsp_device *)param)->ccfunc_rsp_vmadh_simd(); |
| 4373 | } |
| 4374 | #endif |
| 4375 | |
| 4376 | #if (!USE_SIMD || SIMUL_SIMD) |
| 4377 | |
| 4378 | inline void rsp_device::ccfunc_rsp_vmadh_scalar() |
| 4379 | { |
| 4380 | int op = m_rsp_state->arg0; |
| 4381 | |
| 4382 | INT16 vres[8]; |
| 4383 | for (int i = 0; i < 8; i++) |
| 4384 | { |
| 4385 | INT16 w1, w2; |
| 4386 | SCALAR_GET_VS1(w1, i); |
| 4387 | SCALAR_GET_VS2(w2, i); |
| 4388 | INT32 s1 = (INT32)(INT16)w1; |
| 4389 | INT32 s2 = (INT32)(INT16)w2; |
| 4390 | |
| 4391 | INT32 accum = (UINT32)(UINT16)ACCUM_M(i); |
| 4392 | accum |= ((UINT32)((UINT16)ACCUM_H(i))) << 16; |
| 4393 | accum += s1 * s2; |
| 4394 | |
| 4395 | SET_ACCUM_H((UINT16)(accum >> 16), i); |
| 4396 | SET_ACCUM_M((UINT16)accum, i); |
| 4397 | |
| 4398 | vres[i] = SATURATE_ACCUM1(i, 0x8000, 0x7fff); |
| 4399 | } |
| 4400 | WRITEBACK_RESULT(); |
| 4401 | } |
| 4402 | |
| 4403 | static void cfunc_rsp_vmadh_scalar(void *param) |
| 4404 | { |
| 4405 | ((rsp_device *)param)->ccfunc_rsp_vmadh_scalar(); |
| 4406 | } |
| 4407 | #endif |
| 4408 | |
| 4409 | #if USE_SIMD |
| 4410 | // VADD |
| 4411 | // 31 25 24 20 15 10 5 0 |
| 4412 | // ------------------------------------------------------ |
| 4413 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010000 | |
| 4414 | // ------------------------------------------------------ |
| 4415 | // |
| 4416 | // Adds two vector registers and carry flag, the result is saturated to 32767 |
| 4417 | |
| 4418 | inline void rsp_device::ccfunc_rsp_vadd_simd() |
| 4419 | { |
| 4420 | int op = m_rsp_state->arg0; |
| 4421 | |
| 4422 | __m128i shuffled = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4423 | __m128i carry = _mm_and_si128(m_xvflag[CARRY], vec_flagmask); |
| 4424 | m_accum_l = _mm_add_epi16(_mm_add_epi16(m_xv[VS1REG], shuffled), carry); |
| 4425 | |
| 4426 | __m128i addvec = _mm_adds_epi16(m_xv[VS1REG], shuffled); |
| 4427 | |
| 4428 | carry = _mm_and_si128(carry, _mm_xor_si128(_mm_cmpeq_epi16(addvec, vec_32767), vec_neg1)); |
| 4429 | carry = _mm_and_si128(carry, _mm_xor_si128(_mm_cmpeq_epi16(addvec, vec_n32768), vec_neg1)); |
| 4430 | |
| 4431 | m_xv[VDREG] = _mm_add_epi16(addvec, carry); |
| 4432 | |
| 4433 | m_xvflag[ZERO] = vec_zero; |
| 4434 | m_xvflag[CARRY] = vec_zero; |
| 4435 | } |
| 4436 | |
| 4437 | static void cfunc_rsp_vadd_simd(void *param) |
| 4438 | { |
| 4439 | ((rsp_Device *)param)->ccfunc_rsp_vadd_simd(); |
| 4440 | } |
| 4441 | #endif |
| 4442 | |
| 4443 | #if (!USE_SIMD || SIMUL_SIMD) |
| 4444 | |
| 4445 | inline void rsp_device::ccfunc_rsp_vadd_scalar() |
| 4446 | { |
| 4447 | int op = m_rsp_state->arg0; |
| 4448 | |
| 4449 | INT16 vres[8] = { 0 }; |
| 4450 | for (int i = 0; i < 8; i++) |
| 4451 | { |
| 4452 | INT16 w1, w2; |
| 4453 | SCALAR_GET_VS1(w1, i); |
| 4454 | SCALAR_GET_VS2(w2, i); |
| 4455 | INT32 s1 = (INT32)(INT16)w1; |
| 4456 | INT32 s2 = (INT32)(INT16)w2; |
| 4457 | INT32 r = s1 + s2 + (((CARRY_FLAG(i)) != 0) ? 1 : 0); |
| 4458 | |
| 4459 | SET_ACCUM_L((INT16)(r), i); |
| 4460 | |
| 4461 | if (r > 32767) r = 32767; |
| 4462 | if (r < -32768) r = -32768; |
| 4463 | vres[i] = (INT16)(r); |
| 4464 | } |
| 4465 | CLEAR_ZERO_FLAGS(); |
| 4466 | CLEAR_CARRY_FLAGS(); |
| 4467 | WRITEBACK_RESULT(); |
| 4468 | } |
| 4469 | |
| 4470 | static void cfunc_rsp_vadd_scalar(void *param) |
| 4471 | { |
| 4472 | ((rsp_device *)param)->ccfunc_rsp_vadd_scalar(); |
| 4473 | } |
| 4474 | #endif |
| 4475 | |
| 4476 | #if USE_SIMD |
| 4477 | // VSUB |
| 4478 | // |
| 4479 | // 31 25 24 20 15 10 5 0 |
| 4480 | // ------------------------------------------------------ |
| 4481 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010001 | |
| 4482 | // ------------------------------------------------------ |
| 4483 | // |
| 4484 | // Subtracts two vector registers and carry flag, the result is saturated to -32768 |
| 4485 | // TODO: check VS2REG == VDREG |
| 4486 | |
| 4487 | inline void rsp_device::ccfunc_rsp_vsub_simd() |
| 4488 | { |
| 4489 | int op = m_rsp_state->arg0; |
| 4490 | |
| 4491 | __m128i shuffled = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4492 | __m128i carry = _mm_and_si128(m_xvflag[CARRY], vec_flagmask); |
| 4493 | __m128i unsat = _mm_sub_epi16(m_xv[VS1REG], shuffled); |
| 4494 | |
| 4495 | __m128i vs2neg = _mm_cmplt_epi16(shuffled, vec_zero); |
| 4496 | __m128i vs2pos = _mm_cmpeq_epi16(vs2neg, vec_zero); |
| 4497 | |
| 4498 | __m128i saturated = _mm_subs_epi16(m_xv[VS1REG], shuffled); |
| 4499 | __m128i carry_mask = _mm_cmpeq_epi16(unsat, saturated); |
| 4500 | carry_mask = _mm_and_si128(vs2neg, carry_mask); |
| 4501 | |
| 4502 | vs2neg = _mm_and_si128(carry_mask, carry); |
| 4503 | vs2pos = _mm_and_si128(vs2pos, carry); |
| 4504 | __m128i dest_carry = _mm_or_si128(vs2neg, vs2pos); |
| 4505 | m_xv[VDREG] = _mm_subs_epi16(saturated, dest_carry); |
| 4506 | |
| 4507 | m_accum_l = _mm_sub_epi16(unsat, carry); |
| 4508 | |
| 4509 | m_xvflag[ZERO] = _mm_setzero_si128(); |
| 4510 | m_xvflag[CARRY] = _mm_setzero_si128(); |
| 4511 | } |
| 4512 | |
| 4513 | static void cfunc_rsp_vsub_simd(void *param) |
| 4514 | { |
| 4515 | ((rsp_device *)param)->ccfunc_rsp_vsub_simd(); |
| 4516 | } |
| 4517 | #endif |
| 4518 | |
| 4519 | #if (!USE_SIMD || SIMUL_SIMD) |
| 4520 | |
| 4521 | inline void rsp_device::ccfunc_rsp_vsub_scalar() |
| 4522 | { |
| 4523 | int op = m_rsp_state->arg0; |
| 4524 | |
| 4525 | INT16 vres[8]; |
| 4526 | for (int i = 0; i < 8; i++) |
| 4527 | { |
| 4528 | INT16 w1, w2; |
| 4529 | SCALAR_GET_VS1(w1, i); |
| 4530 | SCALAR_GET_VS2(w2, i); |
| 4531 | INT32 s1 = (INT32)(INT16)w1; |
| 4532 | INT32 s2 = (INT32)(INT16)w2; |
| 4533 | INT32 r = s1 - s2 - (((CARRY_FLAG(i)) != 0) ? 1 : 0); |
| 4534 | |
| 4535 | SET_ACCUM_L((INT16)(r), i); |
| 4536 | |
| 4537 | if (r > 32767) r = 32767; |
| 4538 | if (r < -32768) r = -32768; |
| 4539 | |
| 4540 | vres[i] = (INT16)(r); |
| 4541 | } |
| 4542 | CLEAR_ZERO_FLAGS(); |
| 4543 | CLEAR_CARRY_FLAGS(); |
| 4544 | WRITEBACK_RESULT(); |
| 4545 | } |
| 4546 | |
| 4547 | static void cfunc_rsp_vsub_scalar(void *param) |
| 4548 | { |
| 4549 | ((rsp_device *)param)->ccfunc_rsp_vsub_scalar(); |
| 4550 | } |
| 4551 | #endif |
| 4552 | |
| 4553 | #if USE_SIMD |
| 4554 | // VABS |
| 4555 | // |
| 4556 | // 31 25 24 20 15 10 5 0 |
| 4557 | // ------------------------------------------------------ |
| 4558 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010011 | |
| 4559 | // ------------------------------------------------------ |
| 4560 | // |
| 4561 | // Changes the sign of source register 2 if source register 1 is negative and stores the result to destination register |
| 4562 | |
| 4563 | inline void rsp_device::ccfunc_rsp_vabs_simd() |
| 4564 | { |
| 4565 | int op = m_rsp_state->arg0; |
| 4566 | |
| 4567 | __m128i shuf2 = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4568 | __m128i negs2 = _mm_sub_epi16(_mm_setzero_si128(), shuf2); |
| 4569 | __m128i s2_n32768 = _mm_cmpeq_epi16(shuf2, vec_n32768); |
| 4570 | __m128i s1_lz = _mm_cmplt_epi16(m_xv[VS1REG], _mm_setzero_si128()); |
| 4571 | |
| 4572 | __m128i result_gz = _mm_and_si128(shuf2, _mm_cmpgt_epi16(m_xv[VS1REG], _mm_setzero_si128())); |
| 4573 | __m128i result_n32768 = _mm_and_si128(s1_lz, _mm_and_si128(vec_32767, s2_n32768)); |
| 4574 | __m128i result_negs2 = _mm_and_si128(s1_lz, _mm_and_si128(negs2, _mm_xor_si128(s2_n32768, vec_neg1))); |
| 4575 | m_xv[VDREG] = m_accum_l = _mm_or_si128(result_gz, _mm_or_si128(result_n32768, result_negs2)); |
| 4576 | } |
| 4577 | |
| 4578 | static void cfunc_rsp_vabs_simd(void *param) |
| 4579 | { |
| 4580 | ((rsp_device *)param)->ccfunc_rsp_vabs_simd(); |
| 4581 | } |
| 4582 | #endif |
| 4583 | |
| 4584 | #if (!USE_SIMD || SIMUL_SIMD) |
| 4585 | |
| 4586 | inline void rsp_device::ccfunc_rsp_vabs_scalar() |
| 4587 | { |
| 4588 | int op = m_rsp_state->arg0; |
| 4589 | |
| 4590 | INT16 vres[8]; |
| 4591 | for (int i = 0; i < 8; i++) |
| 4592 | { |
| 4593 | INT16 s1, s2; |
| 4594 | SCALAR_GET_VS1(s1, i); |
| 4595 | SCALAR_GET_VS2(s2, i); |
| 4596 | |
| 4597 | if (s1 < 0) |
| 4598 | { |
| 4599 | if (s2 == -32768) |
| 4600 | { |
| 4601 | vres[i] = 32767; |
| 4602 | } |
| 4603 | else |
| 4604 | { |
| 4605 | vres[i] = -s2; |
| 4606 | } |
| 4607 | } |
| 4608 | else if (s1 > 0) |
| 4609 | { |
| 4610 | vres[i] = s2; |
| 4611 | } |
| 4612 | else |
| 4613 | { |
| 4614 | vres[i] = 0; |
| 4615 | } |
| 4616 | |
| 4617 | SET_ACCUM_L(vres[i], i); |
| 4618 | } |
| 4619 | WRITEBACK_RESULT(); |
| 4620 | } |
| 4621 | |
| 4622 | static void cfunc_rsp_vabs_scalar(void *param) |
| 4623 | { |
| 4624 | ((rsp_device *)param)->ccfunc_rsp_vabs_scalar(); |
| 4625 | } |
| 4626 | #endif |
| 4627 | |
| 4628 | #if USE_SIMD |
| 4629 | // VADDC |
| 4630 | // |
| 4631 | // 31 25 24 20 15 10 5 0 |
| 4632 | // ------------------------------------------------------ |
| 4633 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010100 | |
| 4634 | // ------------------------------------------------------ |
| 4635 | // |
| 4636 | // Adds two vector registers, the carry out is stored into carry register |
| 4637 | // TODO: check VS2REG = VDREG |
| 4638 | |
| 4639 | inline void rsp_device::ccfunc_rsp_vaddc_simd() |
| 4640 | { |
| 4641 | int op = m_rsp_state->arg0; |
| 4642 | |
| 4643 | VEC_CLEAR_ZERO_FLAGS(); |
| 4644 | VEC_CLEAR_CARRY_FLAGS(); |
| 4645 | |
| 4646 | __m128i shuf2 = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4647 | __m128i vec7531 = _mm_and_si128(m_xv[VS1REG], vec_lomask); |
| 4648 | __m128i vec6420 = _mm_srli_epi32(m_xv[VS1REG], 16); |
| 4649 | __m128i shuf7531 = _mm_and_si128(shuf2, vec_lomask); |
| 4650 | __m128i shuf6420 = _mm_srli_epi32(shuf2, 16); |
| 4651 | __m128i sum7531 = _mm_add_epi32(vec7531, shuf7531); |
| 4652 | __m128i sum6420 = _mm_add_epi32(vec6420, shuf6420); |
| 4653 | |
| 4654 | __m128i over7531 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum7531, _mm_setzero_si128()), vec_neg1), vec_himask); |
| 4655 | __m128i over6420 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum6420, _mm_setzero_si128()), vec_neg1), vec_himask); |
| 4656 | |
| 4657 | sum7531 = _mm_and_si128(sum7531, vec_lomask); |
| 4658 | sum6420 = _mm_and_si128(sum6420, vec_lomask); |
| 4659 | |
| 4660 | m_xvflag[CARRY] = _mm_or_si128(over6420, _mm_srli_epi32(over7531, 16)); |
| 4661 | m_accum_l = m_xv[VDREG] = _mm_or_si128(_mm_slli_epi32(sum6420, 16), sum7531); |
| 4662 | } |
| 4663 | |
| 4664 | static void cfunc_rsp_vaddc_simd(void *param) |
| 4665 | { |
| 4666 | ((rsp_device *)param)->ccfunc_rsp_vaddc_simd(); |
| 4667 | } |
| 4668 | #endif |
| 4669 | |
| 4670 | #if (!USE_SIMD || SIMUL_SIMD) |
| 4671 | |
| 4672 | inline void rsp_device::ccfunc_rsp_vaddc_scalar() |
| 4673 | { |
| 4674 | int op = m_rsp_state->arg0; |
| 4675 | |
| 4676 | CLEAR_ZERO_FLAGS(); |
| 4677 | CLEAR_CARRY_FLAGS(); |
| 4678 | |
| 4679 | INT16 vres[8] = { 0 }; |
| 4680 | for (int i = 0; i < 8; i++) |
| 4681 | { |
| 4682 | INT16 w1, w2; |
| 4683 | SCALAR_GET_VS1(w1, i); |
| 4684 | SCALAR_GET_VS2(w2, i); |
| 4685 | INT32 s1 = (UINT32)(UINT16)w1; |
| 4686 | INT32 s2 = (UINT32)(UINT16)w2; |
| 4687 | INT32 r = s1 + s2; |
| 4688 | |
| 4689 | vres[i] = (INT16)(r); |
| 4690 | SET_ACCUM_L((INT16)r, i); |
| 4691 | |
| 4692 | if (r & 0xffff0000) |
| 4693 | { |
| 4694 | SET_CARRY_FLAG(i); |
| 4695 | } |
| 4696 | } |
| 4697 | WRITEBACK_RESULT(); |
| 4698 | } |
| 4699 | |
| 4700 | static void cfunc_rsp_vaddc_scalar(void *param) |
| 4701 | { |
| 4702 | ((rsp_device *)param)->ccfunc_rsp_vaddc_scalar(); |
| 4703 | } |
| 4704 | #endif |
| 4705 | |
| 4706 | #if USE_SIMD |
| 4707 | // VSUBC |
| 4708 | // |
| 4709 | // 31 25 24 20 15 10 5 0 |
| 4710 | // ------------------------------------------------------ |
| 4711 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010101 | |
| 4712 | // ------------------------------------------------------ |
| 4713 | // |
| 4714 | // Subtracts two vector registers, the carry out is stored into carry register |
| 4715 | // TODO: check VS2REG = VDREG |
| 4716 | |
| 4717 | inline void rsp_device::ccfunc_rsp_vsubc_simd() |
| 4718 | { |
| 4719 | int op = m_rsp_state->arg0; |
| 4720 | |
| 4721 | VEC_CLEAR_ZERO_FLAGS(); |
| 4722 | VEC_CLEAR_CARRY_FLAGS(); |
| 4723 | |
| 4724 | __m128i shuf2 = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4725 | __m128i vec7531 = _mm_and_si128(m_xv[VS1REG], vec_lomask); |
| 4726 | __m128i vec6420 = _mm_srli_epi32(m_xv[VS1REG], 16); |
| 4727 | __m128i shuf7531 = _mm_and_si128(shuf2, vec_lomask); |
| 4728 | __m128i shuf6420 = _mm_srli_epi32(shuf2, 16); |
| 4729 | __m128i sum7531 = _mm_sub_epi32(vec7531, shuf7531); |
| 4730 | __m128i sum6420 = _mm_sub_epi32(vec6420, shuf6420); |
| 4731 | |
| 4732 | __m128i over7531 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum7531, _mm_setzero_si128()), vec_neg1), vec_himask); |
| 4733 | __m128i over6420 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum6420, _mm_setzero_si128()), vec_neg1), vec_himask); |
| 4734 | sum7531 = _mm_and_si128(sum7531, vec_lomask); |
| 4735 | sum6420 = _mm_and_si128(sum6420, vec_lomask); |
| 4736 | __m128i zero7531 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum7531, _mm_setzero_si128()), vec_neg1), vec_lomask); |
| 4737 | __m128i zero6420 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum6420, _mm_setzero_si128()), vec_neg1), vec_lomask); |
| 4738 | |
| 4739 | m_xvflag[CARRY] = _mm_or_si128(over6420, _mm_srli_epi32(over7531, 16)); |
| 4740 | m_xvflag[ZERO] = _mm_or_si128(_mm_slli_epi32(zero6420, 16), zero7531); |
| 4741 | |
| 4742 | m_accum_l = m_xv[VDREG] = _mm_or_si128(_mm_slli_epi32(sum6420, 16), sum7531); |
| 4743 | } |
| 4744 | |
| 4745 | static void cfunc_rsp_vsubc_simd(void *param) |
| 4746 | { |
| 4747 | ((rsp_device *)param)->ccfunc_rsp_vsubc_simd(); |
| 4748 | } |
| 4749 | #endif |
| 4750 | |
| 4751 | #if (!USE_SIMD || SIMUL_SIMD) |
| 4752 | |
| 4753 | inline void rsp_device::ccfunc_rsp_vsubc_scalar() |
| 4754 | { |
| 4755 | int op = m_rsp_state->arg0; |
| 4756 | |
| 4757 | CLEAR_ZERO_FLAGS(); |
| 4758 | CLEAR_CARRY_FLAGS(); |
| 4759 | |
| 4760 | INT16 vres[8]; |
| 4761 | for (int i = 0; i < 8; i++) |
| 4762 | { |
| 4763 | INT16 w1, w2; |
| 4764 | SCALAR_GET_VS1(w1, i); |
| 4765 | SCALAR_GET_VS2(w2, i); |
| 4766 | INT32 s1 = (UINT32)(UINT16)w1; |
| 4767 | INT32 s2 = (UINT32)(UINT16)w2; |
| 4768 | INT32 r = s1 - s2; |
| 4769 | |
| 4770 | vres[i] = (INT16)(r); |
| 4771 | SET_ACCUM_L((UINT16)r, i); |
| 4772 | |
| 4773 | if ((UINT16)(r) != 0) |
| 4774 | { |
| 4775 | SET_ZERO_FLAG(i); |
| 4776 | } |
| 4777 | if (r & 0xffff0000) |
| 4778 | { |
| 4779 | SET_CARRY_FLAG(i); |
| 4780 | } |
| 4781 | } |
| 4782 | WRITEBACK_RESULT(); |
| 4783 | } |
| 4784 | |
| 4785 | static void cfunc_rsp_vsubc_scalar(void *param) |
| 4786 | { |
| 4787 | ((rsp_device *)param)->ccfunc_rsp_vsubc_scalar(); |
| 4788 | } |
| 4789 | #endif |
| 4790 | |
| 4791 | // VADDB |
| 4792 | // |
| 4793 | // 31 25 24 20 15 10 5 0 |
| 4794 | // ------------------------------------------------------ |
| 4795 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010110 | |
| 4796 | // ------------------------------------------------------ |
| 4797 | // |
| 4798 | // Adds two vector registers bytewise with rounding |
| 4799 | inline void rsp_device::ccfunc_rsp_vaddb_scalar() |
| 4800 | { |
| 4801 | const int op = m_rsp_state->arg0; |
| 4802 | const int round = (EL == 0) ? 0 : (1 << (EL - 1)); |
| 4803 | |
| 4804 | INT16 vres[8]; |
| 4805 | for (int i = 0; i < 8; i++) |
| 4806 | { |
| 4807 | UINT16 w1, w2; |
| 4808 | SCALAR_GET_VS1(w1, i); |
| 4809 | SCALAR_GET_VS2(w2, i); |
| 4810 | |
| 4811 | UINT8 hb1 = w1 >> 8; |
| 4812 | UINT8 lb1 = w1 & 0xff; |
| 4813 | UINT8 hb2 = w2 >> 8; |
| 4814 | UINT8 lb2 = w2 & 0xff; |
| 4815 | |
| 4816 | UINT16 hs = hb1 + hb2 + round; |
| 4817 | UINT16 ls = lb1 + lb2 + round; |
| 4818 | |
| 4819 | SET_ACCUM_L((hs << 8) | ls, i); |
| 4820 | |
| 4821 | hs >>= EL; |
| 4822 | if (hs > 255) |
| 4823 | { |
| 4824 | hs = 255; |
| 4825 | } |
| 4826 | /*else if (hs < 0) |
| 4827 | { |
| 4828 | hs = 0; |
| 4829 | }*/ |
| 4830 | |
| 4831 | ls >>= EL; |
| 4832 | if (ls > 255) |
| 4833 | { |
| 4834 | ls = 255; |
| 4835 | } |
| 4836 | /*else if (ls < 0) |
| 4837 | { |
| 4838 | ls = 0; |
| 4839 | }*/ |
| 4840 | |
| 4841 | vres[i] = 0; // VD writeback disabled on production hardware |
| 4842 | // vres[i] = (hs << 8) | ls; |
| 4843 | } |
| 4844 | WRITEBACK_RESULT(); |
| 4845 | } |
| 4846 | |
| 4847 | static void cfunc_rsp_vaddb_scalar(void *param) |
| 4848 | { |
| 4849 | ((rsp_device *)param)->ccfunc_rsp_vaddb_scalar(); |
| 4850 | } |
| 4851 | |
| 4852 | #if USE_SIMD |
| 4853 | // VSAW |
| 4854 | // |
| 4855 | // 31 25 24 20 15 10 5 0 |
| 4856 | // ------------------------------------------------------ |
| 4857 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 011101 | |
| 4858 | // ------------------------------------------------------ |
| 4859 | // |
| 4860 | // Stores high, middle or low slice of accumulator to destination vector |
| 4861 | |
| 4862 | inline void rsp_device::ccfunc_rsp_vsaw_simd() |
| 4863 | { |
| 4864 | int op = m_rsp_state->arg0; |
| 4865 | |
| 4866 | switch (EL) |
| 4867 | { |
| 4868 | case 0x08: // VSAWH |
| 4869 | { |
| 4870 | m_xv[VDREG] = m_accum_h; |
| 4871 | break; |
| 4872 | } |
| 4873 | case 0x09: // VSAWM |
| 4874 | { |
| 4875 | m_xv[VDREG] = m_accum_m; |
| 4876 | break; |
| 4877 | } |
| 4878 | case 0x0a: // VSAWL |
| 4879 | { |
| 4880 | m_xv[VDREG] = m_accum_l; |
| 4881 | break; |
| 4882 | } |
| 4883 | default: // Unsupported, writes 0 to VD |
| 4884 | { |
| 4885 | |
| 4886 | } |
| 4887 | } |
| 4888 | } |
| 4889 | |
| 4890 | static void cfunc_rsp_vsaw_simd(void *param) |
| 4891 | { |
| 4892 | ((rsp_device *)param)->ccfunc_rsp_vsaw_simd(); |
| 4893 | } |
| 4894 | #endif |
| 4895 | |
| 4896 | #if (!USE_SIMD || SIMUL_SIMD) |
| 4897 | |
| 4898 | inline void rsp_device::ccfunc_rsp_vsaw_scalar() |
| 4899 | { |
| 4900 | int op = m_rsp_state->arg0; |
| 4901 | |
| 4902 | switch (EL) |
| 4903 | { |
| 4904 | case 0x08: // VSAWH |
| 4905 | for (int i = 0; i < 8; i++) |
| 4906 | { |
| 4907 | W_VREG_S(VDREG, i) = ACCUM_H(i); |
| 4908 | } |
| 4909 | break; |
| 4910 | case 0x09: // VSAWM |
| 4911 | for (int i = 0; i < 8; i++) |
| 4912 | { |
| 4913 | W_VREG_S(VDREG, i) = ACCUM_M(i); |
| 4914 | } |
| 4915 | break; |
| 4916 | case 0x0a: // VSAWL |
| 4917 | for (int i = 0; i < 8; i++) |
| 4918 | { |
| 4919 | W_VREG_S(VDREG, i) = ACCUM_L(i); |
| 4920 | } |
| 4921 | break; |
| 4922 | default: // Unsupported |
| 4923 | { |
| 4924 | for (int i = 0; i < 8; i++) |
| 4925 | { |
| 4926 | W_VREG_S(VDREG, i) = 0; |
| 4927 | } |
| 4928 | } |
| 4929 | } |
| 4930 | } |
| 4931 | |
| 4932 | static void cfunc_rsp_vsaw_scalar(void *param) |
| 4933 | { |
| 4934 | ((rsp_device *)param)->ccfunc_rsp_vsaw_scalar(); |
| 4935 | } |
| 4936 | #endif |
| 4937 | |
| 4938 | #if USE_SIMD |
| 4939 | // VLT |
| 4940 | // |
| 4941 | // 31 25 24 20 15 10 5 0 |
| 4942 | // ------------------------------------------------------ |
| 4943 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100000 | |
| 4944 | // ------------------------------------------------------ |
| 4945 | // |
| 4946 | // Sets compare flags if elements in VS1 are less than VS2 |
| 4947 | // Moves the element in VS2 to destination vector |
| 4948 | |
| 4949 | inline void rsp_device::ccfunc_rsp_vlt_simd() |
| 4950 | { |
| 4951 | int op = m_rsp_state->arg0; |
| 4952 | |
| 4953 | m_xvflag[COMPARE] = m_xvflag[CLIP2] = _mm_setzero_si128(); |
| 4954 | |
| 4955 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4956 | __m128i zc_mask = _mm_and_si128(m_xvflag[ZERO], m_xvflag[CARRY]); |
| 4957 | __m128i lt_mask = _mm_cmplt_epi16(m_xv[VS1REG], shuf); |
| 4958 | __m128i eq_mask = _mm_and_si128(_mm_cmpeq_epi16(m_xv[VS1REG], shuf), zc_mask); |
| 4959 | |
| 4960 | m_xvflag[COMPARE] = _mm_or_si128(lt_mask, eq_mask); |
| 4961 | |
| 4962 | __m128i result = _mm_and_si128(m_xv[VS1REG], m_xvflag[COMPARE]); |
| 4963 | m_accum_l = m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(m_xvflag[COMPARE], vec_neg1))); |
| 4964 | |
| 4965 | m_xvflag[ZERO] = m_xvflag[CARRY] = _mm_setzero_si128(); |
| 4966 | } |
| 4967 | |
| 4968 | static void void cfunc_rsp_vlt_simd(void *param) |
| 4969 | { |
| 4970 | ((rsp_device *)param)->ccfunc_rsp_vlt_simd(); |
| 4971 | } |
| 4972 | #endif |
| 4973 | |
| 4974 | #if (!USE_SIMD || SIMUL_SIMD) |
| 4975 | |
| 4976 | inline void rsp_device::ccfunc_rsp_vlt_scalar() |
| 4977 | { |
| 4978 | int op = m_rsp_state->arg0; |
| 4979 | |
| 4980 | CLEAR_COMPARE_FLAGS(); |
| 4981 | CLEAR_CLIP2_FLAGS(); |
| 4982 | |
| 4983 | INT16 vres[8]; |
| 4984 | for (int i = 0; i < 8; i++) |
| 4985 | { |
| 4986 | INT16 s1, s2; |
| 4987 | SCALAR_GET_VS1(s1, i); |
| 4988 | SCALAR_GET_VS2(s2, i); |
| 4989 | |
| 4990 | if (s1 < s2) |
| 4991 | { |
| 4992 | SET_COMPARE_FLAG(i); |
| 4993 | } |
| 4994 | else if (s1 == s2) |
| 4995 | { |
| 4996 | if (ZERO_FLAG(i) != 0 && CARRY_FLAG(i) != 0) |
| 4997 | { |
| 4998 | SET_COMPARE_FLAG(i); |
| 4999 | } |
| 5000 | } |
| 5001 | |
| 5002 | if (COMPARE_FLAG(i) != 0) |
| 5003 | { |
| 5004 | vres[i] = s1; |
| 5005 | } |
| 5006 | else |
| 5007 | { |
| 5008 | vres[i] = s2; |
| 5009 | } |
| 5010 | |
| 5011 | SET_ACCUM_L(vres[i], i); |
| 5012 | } |
| 5013 | |
| 5014 | CLEAR_ZERO_FLAGS(); |
| 5015 | CLEAR_CARRY_FLAGS(); |
| 5016 | WRITEBACK_RESULT(); |
| 5017 | } |
| 5018 | |
| 5019 | static void cfunc_rsp_vlt_scalar(void *param) |
| 5020 | { |
| 5021 | ((rsp_device *)param)->ccfunc_rsp_vlt_scalar(); |
| 5022 | } |
| 5023 | #endif |
| 5024 | |
| 5025 | #if USE_SIMD |
| 5026 | // VEQ |
| 5027 | // |
| 5028 | // 31 25 24 20 15 10 5 0 |
| 5029 | // ------------------------------------------------------ |
| 5030 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100001 | |
| 5031 | // ------------------------------------------------------ |
| 5032 | // |
| 5033 | // Sets compare flags if elements in VS1 are equal with VS2 |
| 5034 | // Moves the element in VS2 to destination vector |
| 5035 | |
| 5036 | inline void rsp_device::ccfunc_rsp_veq_simd() |
| 5037 | { |
| 5038 | int op = m_rsp_state->arg0; |
| 5039 | |
| 5040 | m_xvflag[COMPARE] = m_xvflag[CLIP2] = _mm_setzero_si128(); |
| 5041 | |
| 5042 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 5043 | __m128i zero_mask = _mm_cmpeq_epi16(m_xvflag[ZERO], _mm_setzero_si128()); |
| 5044 | __m128i eq_mask = _mm_cmpeq_epi16(m_xv[VS1REG], shuf); |
| 5045 | |
| 5046 | m_xvflag[COMPARE] = _mm_and_si128(zero_mask, eq_mask); |
| 5047 | |
| 5048 | __m128i result = _mm_and_si128(m_xv[VS1REG], m_xvflag[COMPARE]); |
| 5049 | m_accum_l = m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(m_xvflag[COMPARE], vec_neg1))); |
| 5050 | |
| 5051 | m_xvflag[ZERO] = m_xvflag[CARRY] = _mm_setzero_si128(); |
| 5052 | } |
| 5053 | |
| 5054 | static void cfunc_rsp_veq_simd(void *param) |
| 5055 | { |
| 5056 | ((rsp_device *)param)->ccfunc_rsp_veq_simd(); |
| 5057 | } |
| 5058 | #endif |
| 5059 | |
| 5060 | #if (!USE_SIMD || SIMUL_SIMD) |
| 5061 | |
| 5062 | inline void rsp_device::ccfunc_rsp_veq_scalar() |
| 5063 | { |
| 5064 | int op = m_rsp_state->arg0; |
| 5065 | |
| 5066 | CLEAR_COMPARE_FLAGS(); |
| 5067 | CLEAR_CLIP2_FLAGS(); |
| 5068 | |
| 5069 | INT16 vres[8]; |
| 5070 | for (int i = 0; i < 8; i++) |
| 5071 | { |
| 5072 | INT16 s1, s2; |
| 5073 | SCALAR_GET_VS1(s1, i); |
| 5074 | SCALAR_GET_VS2(s2, i); |
| 5075 | |
| 5076 | if ((s1 == s2) && ZERO_FLAG(i) == 0) |
| 5077 | { |
| 5078 | SET_COMPARE_FLAG(i); |
| 5079 | vres[i] = s1; |
| 5080 | } |
| 5081 | else |
| 5082 | { |
| 5083 | vres[i] = s2; |
| 5084 | } |
| 5085 | |
| 5086 | SET_ACCUM_L(vres[i], i); |
| 5087 | } |
| 5088 | |
| 5089 | CLEAR_ZERO_FLAGS(); |
| 5090 | CLEAR_CARRY_FLAGS(); |
| 5091 | WRITEBACK_RESULT(); |
| 5092 | } |
| 5093 | |
| 5094 | static void cfunc_rsp_veq_scalar(void *param) |
| 5095 | { |
| 5096 | ((rsp_device *)param)->ccfunc_rsp_veq_scalar(); |
| 5097 | } |
| 5098 | #endif |
| 5099 | |
| 5100 | #if USE_SIMD |
| 5101 | // VNE |
| 5102 | // |
| 5103 | // 31 25 24 20 15 10 5 0 |
| 5104 | // ------------------------------------------------------ |
| 5105 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100010 | |
| 5106 | // ------------------------------------------------------ |
| 5107 | // |
| 5108 | // Sets compare flags if elements in VS1 are not equal with VS2 |
| 5109 | // Moves the element in VS2 to destination vector |
| 5110 | |
| 5111 | inline void rsp_device::ccfunc_rsp_vne_simd() |
| 5112 | { |
| 5113 | int op = m_rsp_state->arg0; |
| 5114 | |
| 5115 | m_xvflag[COMPARE] = m_xvflag[CLIP2] = _mm_setzero_si128(); |
| 5116 | |
| 5117 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 5118 | __m128i neq_mask = _mm_xor_si128(_mm_cmpeq_epi16(m_xv[VS1REG], shuf), vec_neg1); |
| 5119 | |
| 5120 | m_xvflag[COMPARE] = _mm_or_si128(m_xvflag[ZERO], neq_mask); |
| 5121 | |
| 5122 | __m128i result = _mm_and_si128(m_xv[VS1REG], m_xvflag[COMPARE]); |
| 5123 | m_accum_l = m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(m_xvflag[COMPARE], vec_neg1))); |
| 5124 | |
| 5125 | m_xvflag[ZERO] = m_xvflag[CARRY] = _mm_setzero_si128(); |
| 5126 | } |
| 5127 | |
| 5128 | static void cfunc_rsp_vne_simd(void *param) |
| 5129 | { |
| 5130 | ((rsp_device *)param)->ccfunc_rsp_vne_simd(); |
| 5131 | } |
| 5132 | #endif |
| 5133 | |
| 5134 | #if (!USE_SIMD || SIMUL_SIMD) |
| 5135 | |
| 5136 | inline void rsp_device::ccfunc_rsp_vne_scalar() |
| 5137 | { |
| 5138 | int op = m_rsp_state->arg0; |
| 5139 | |
| 5140 | CLEAR_COMPARE_FLAGS(); |
| 5141 | CLEAR_CLIP2_FLAGS(); |
| 5142 | |
| 5143 | INT16 vres[8]; |
| 5144 | for (int i = 0; i < 8; i++) |
| 5145 | { |
| 5146 | INT16 s1, s2; |
| 5147 | SCALAR_GET_VS1(s1, i); |
| 5148 | SCALAR_GET_VS2(s2, i); |
| 5149 | |
| 5150 | if (s1 != s2 || ZERO_FLAG(i) != 0) |
| 5151 | { |
| 5152 | SET_COMPARE_FLAG(i); |
| 5153 | vres[i] = s1; |
| 5154 | } |
| 5155 | else |
| 5156 | { |
| 5157 | vres[i] = s2; |
| 5158 | } |
| 5159 | |
| 5160 | SET_ACCUM_L(vres[i], i); |
| 5161 | } |
| 5162 | |
| 5163 | CLEAR_ZERO_FLAGS(); |
| 5164 | CLEAR_CARRY_FLAGS(); |
| 5165 | WRITEBACK_RESULT(); |
| 5166 | } |
| 5167 | |
| 5168 | static void cfunc_rsp_vne_scalar(void *param) |
| 5169 | { |
| 5170 | ((rsp_device *)param)->ccfunc_rsp_vne_scalar(); |
| 5171 | } |
| 5172 | #endif |
| 5173 | |
| 5174 | #if USE_SIMD |
| 5175 | // VGE |
| 5176 | // |
| 5177 | // 31 25 24 20 15 10 5 0 |
| 5178 | // ------------------------------------------------------ |
| 5179 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100011 | |
| 5180 | // ------------------------------------------------------ |
| 5181 | // |
| 5182 | // Sets compare flags if elements in VS1 are greater or equal with VS2 |
| 5183 | // Moves the element in VS2 to destination vector |
| 5184 | |
| 5185 | inline void rsp_device::ccfunc_rsp_vge_simd() |
| 5186 | { |
| 5187 | int op = m_rsp_state->arg0; |
| 5188 | |
| 5189 | m_xvflag[COMPARE] = m_xvflag[CLIP2] = _mm_setzero_si128(); |
| 5190 | |
| 5191 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 5192 | __m128i zero_mask = _mm_cmpeq_epi16(m_xvflag[ZERO], _mm_setzero_si128()); |
| 5193 | __m128i carry_mask = _mm_cmpeq_epi16(m_xvflag[CARRY], _mm_setzero_si128()); |
| 5194 | __m128i flag_mask = _mm_or_si128(zero_mask, carry_mask); |
| 5195 | __m128i eq_mask = _mm_and_si128(_mm_cmpeq_epi16(m_xv[VS1REG], shuf), flag_mask); |
| 5196 | __m128i gt_mask = _mm_cmpgt_epi16(m_xv[VS1REG], shuf); |
| 5197 | m_xvflag[COMPARE] = _mm_or_si128(eq_mask, gt_mask); |
| 5198 | |
| 5199 | __m128i result = _mm_and_si128(m_xv[VS1REG], m_xvflag[COMPARE]); |
| 5200 | m_accum_l = m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(m_xvflag[COMPARE], vec_neg1))); |
| 5201 | |
| 5202 | m_xvflag[ZERO] = m_xvflag[CARRY] = _mm_setzero_si128(); |
| 5203 | } |
| 5204 | |
| 5205 | static void cfunc_rsp_vge_simd(void *param) |
| 5206 | { |
| 5207 | ((rsp_device *)param)->ccfunc_rsp_vge_simd(); |
| 5208 | } |
| 5209 | #endif |
| 5210 | |
| 5211 | #if (!USE_SIMD || SIMUL_SIMD) |
| 5212 | |
| 5213 | inline void rsp_device::ccfunc_rsp_vge_scalar() |
| 5214 | { |
| 5215 | int op = m_rsp_state->arg0; |
| 5216 | |
| 5217 | CLEAR_COMPARE_FLAGS(); |
| 5218 | CLEAR_CLIP2_FLAGS(); |
| 5219 | |
| 5220 | INT16 vres[8]; |
| 5221 | for (int i = 0; i < 8; i++) |
| 5222 | { |
| 5223 | INT16 s1, s2; |
| 5224 | SCALAR_GET_VS1(s1, i); |
| 5225 | SCALAR_GET_VS2(s2, i); |
| 5226 | if ((s1 == s2 && (ZERO_FLAG(i) == 0 || CARRY_FLAG(i) == 0)) || s1 > s2) |
| 5227 | { |
| 5228 | SET_COMPARE_FLAG(i); |
| 5229 | vres[i] = s1; |
| 5230 | } |
| 5231 | else |
| 5232 | { |
| 5233 | vres[i] = s2; |
| 5234 | } |
| 5235 | |
| 5236 | SET_ACCUM_L(vres[i], i); |
| 5237 | } |
| 5238 | |
| 5239 | CLEAR_ZERO_FLAGS(); |
| 5240 | CLEAR_CARRY_FLAGS(); |
| 5241 | WRITEBACK_RESULT(); |
| 5242 | } |
| 5243 | |
| 5244 | static void cfunc_rsp_vge_scalar(void *param) |
| 5245 | { |
| 5246 | ((rsp_device *)param)->ccfunc_rsp_vge_scalar(); |
| 5247 | } |
| 5248 | #endif |
| 5249 | |
| 5250 | #if USE_SIMD |
| 5251 | // VCL |
| 5252 | // |
| 5253 | // 31 25 24 20 15 10 5 0 |
| 5254 | // ------------------------------------------------------ |
| 5255 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100100 | |
| 5256 | // ------------------------------------------------------ |
| 5257 | // |
| 5258 | // Vector clip low |
| 5259 | |
| 5260 | inline void rsp_device::ccfunc_rsp_vcl_simd() |
| 5261 | { |
| 5262 | int op = m_rsp_state->arg0; |
| 5263 | INT16 vres[8]; |
| 5264 | |
| 5265 | for (int i = 0; i < 8; i++) |
| 5266 | { |
| 5267 | INT16 s1, s2; |
| 5268 | VEC_GET_SCALAR_VS1(s1, i); |
| 5269 | VEC_GET_SCALAR_VS2(s2, i); |
| 5270 | |
| 5271 | if (VEC_CARRY_FLAG(i) != 0) |
| 5272 | { |
| 5273 | if (VEC_ZERO_FLAG(i) != 0) |
| 5274 | { |
| 5275 | if (VEC_COMPARE_FLAG(i) != 0) |
| 5276 | { |
| 5277 | VEC_SET_ACCUM_L(-(UINT16)s2, i); |
| 5278 | } |
| 5279 | else |
| 5280 | { |
| 5281 | VEC_SET_ACCUM_L(s1, i); |
| 5282 | } |
| 5283 | } |
| 5284 | else |
| 5285 | { |
| 5286 | if (VEC_CLIP1_FLAG(i) != 0) |
| 5287 | { |
| 5288 | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) > 0x10000) |
| 5289 | { |
| 5290 | VEC_SET_ACCUM_L(s1, i); |
| 5291 | VEC_CLEAR_COMPARE_FLAG(i); |
| 5292 | } |
| 5293 | else |
| 5294 | { |
| 5295 | VEC_SET_ACCUM_L(-((UINT16)s2), i); |
| 5296 | VEC_SET_COMPARE_FLAG(i); |
| 5297 | } |
| 5298 | } |
| 5299 | else |
| 5300 | { |
| 5301 | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) != 0) |
| 5302 | { |
| 5303 | VEC_SET_ACCUM_L(s1, i); |
| 5304 | VEC_CLEAR_COMPARE_FLAG(i); |
| 5305 | } |
| 5306 | else |
| 5307 | { |
| 5308 | VEC_SET_ACCUM_L(-((UINT16)s2), i); |
| 5309 | VEC_SET_COMPARE_FLAG(i); |
| 5310 | } |
| 5311 | } |
| 5312 | } |
| 5313 | } |
| 5314 | else |
| 5315 | { |
| 5316 | if (VEC_ZERO_FLAG(i) != 0) |
| 5317 | { |
| 5318 | if (VEC_CLIP2_FLAG(i) != 0) |
| 5319 | { |
| 5320 | VEC_SET_ACCUM_L(s2, i); |
| 5321 | } |
| 5322 | else |
| 5323 | { |
| 5324 | VEC_SET_ACCUM_L(s1, i); |
| 5325 | } |
| 5326 | } |
| 5327 | else |
| 5328 | { |
| 5329 | if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0) |
| 5330 | { |
| 5331 | VEC_SET_ACCUM_L(s2, i); |
| 5332 | VEC_SET_CLIP2_FLAG(i); |
| 5333 | } |
| 5334 | else |
| 5335 | { |
| 5336 | VEC_SET_ACCUM_L(s1, i); |
| 5337 | VEC_CLEAR_CLIP2_FLAG(i); |
| 5338 | } |
| 5339 | } |
| 5340 | } |
| 5341 | vres[i] = VEC_ACCUM_L(i); |
| 5342 | } |
| 5343 | VEC_CLEAR_ZERO_FLAGS(); |
| 5344 | VEC_CLEAR_CARRY_FLAGS(); |
| 5345 | VEC_CLEAR_CLIP1_FLAGS(); |
| 5346 | VEC_WRITEBACK_RESULT(); |
| 5347 | } |
| 5348 | |
| 5349 | static void cfunc_rsp_vcl_simd(void *param) |
| 5350 | { |
| 5351 | ((rsp_device *)param)->ccfunc_rsp_vcl_simd(); |
| 5352 | } |
| 5353 | #endif |
| 5354 | |
| 5355 | #if (!USE_SIMD || SIMUL_SIMD) |
| 5356 | |
| 5357 | inline void rsp_device::ccfunc_rsp_vcl_scalar() |
| 5358 | { |
| 5359 | int op = m_rsp_state->arg0; |
| 5360 | INT16 vres[8]; |
| 5361 | |
| 5362 | for (int i = 0; i < 8; i++) |
| 5363 | { |
| 5364 | INT16 s1, s2; |
| 5365 | SCALAR_GET_VS1(s1, i); |
| 5366 | SCALAR_GET_VS2(s2, i); |
| 5367 | |
| 5368 | if (CARRY_FLAG(i) != 0) |
| 5369 | { |
| 5370 | if (ZERO_FLAG(i) != 0) |
| 5371 | { |
| 5372 | if (COMPARE_FLAG(i) != 0) |
| 5373 | { |
| 5374 | SET_ACCUM_L(-(UINT16)s2, i); |
| 5375 | } |
| 5376 | else |
| 5377 | { |
| 5378 | SET_ACCUM_L(s1, i); |
| 5379 | } |
| 5380 | } |
| 5381 | else |
| 5382 | { |
| 5383 | if (CLIP1_FLAG(i) != 0) |
| 5384 | { |
| 5385 | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) > 0x10000) |
| 5386 | { |
| 5387 | SET_ACCUM_L(s1, i); |
| 5388 | CLEAR_COMPARE_FLAG(i); |
| 5389 | } |
| 5390 | else |
| 5391 | { |
| 5392 | SET_ACCUM_L(-((UINT16)s2), i); |
| 5393 | SET_COMPARE_FLAG(i); |
| 5394 | } |
| 5395 | } |
| 5396 | else |
| 5397 | { |
| 5398 | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) != 0) |
| 5399 | { |
| 5400 | SET_ACCUM_L(s1, i); |
| 5401 | CLEAR_COMPARE_FLAG(i); |
| 5402 | } |
| 5403 | else |
| 5404 | { |
| 5405 | SET_ACCUM_L(-((UINT16)s2), i); |
| 5406 | SET_COMPARE_FLAG(i); |
| 5407 | } |
| 5408 | } |
| 5409 | } |
| 5410 | } |
| 5411 | else |
| 5412 | { |
| 5413 | if (ZERO_FLAG(i) != 0) |
| 5414 | { |
| 5415 | if (CLIP2_FLAG(i) != 0) |
| 5416 | { |
| 5417 | SET_ACCUM_L(s2, i); |
| 5418 | } |
| 5419 | else |
| 5420 | { |
| 5421 | SET_ACCUM_L(s1, i); |
| 5422 | } |
| 5423 | } |
| 5424 | else |
| 5425 | { |
| 5426 | if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0) |
| 5427 | { |
| 5428 | SET_ACCUM_L(s2, i); |
| 5429 | SET_CLIP2_FLAG(i); |
| 5430 | } |
| 5431 | else |
| 5432 | { |
| 5433 | SET_ACCUM_L(s1, i); |
| 5434 | CLEAR_CLIP2_FLAG(i); |
| 5435 | } |
| 5436 | } |
| 5437 | } |
| 5438 | vres[i] = ACCUM_L(i); |
| 5439 | } |
| 5440 | CLEAR_ZERO_FLAGS(); |
| 5441 | CLEAR_CARRY_FLAGS(); |
| 5442 | CLEAR_CLIP1_FLAGS(); |
| 5443 | WRITEBACK_RESULT(); |
| 5444 | } |
| 5445 | |
| 5446 | static void cfunc_rsp_vcl_scalar(void *param) |
| 5447 | { |
| 5448 | ((rsp_device *)param)->ccfunc_rsp_vcl_scalar(); |
| 5449 | } |
| 5450 | #endif |
| 5451 | |
| 5452 | #if USE_SIMD |
| 5453 | // VCH |
| 5454 | // |
| 5455 | // 31 25 24 20 15 10 5 0 |
| 5456 | // ------------------------------------------------------ |
| 5457 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100101 | |
| 5458 | // ------------------------------------------------------ |
| 5459 | // |
| 5460 | // Vector clip high |
| 5461 | |
| 5462 | inline void rsp_device::ccfunc_rsp_vch_simd() |
| 5463 | { |
| 5464 | int op = m_rsp_state->arg0; |
| 5465 | |
| 5466 | VEC_CLEAR_CARRY_FLAGS(); |
| 5467 | VEC_CLEAR_COMPARE_FLAGS(); |
| 5468 | VEC_CLEAR_CLIP1_FLAGS(); |
| 5469 | VEC_CLEAR_ZERO_FLAGS(); |
| 5470 | VEC_CLEAR_CLIP2_FLAGS(); |
| 5471 | |
| 5472 | #if 0 |
| 5473 | // Compare flag |
| 5474 | // flag[1] bit [0- 7] set if (s1 ^ s2) < 0 && (s1 + s2) <= 0) |
| 5475 | // flag[1] bit [0- 7] set if (s1 ^ s2) >= 0 && (s2 < 0) |
| 5476 | |
| 5477 | // flag[1] bit [8-15] set if (s1 ^ s2) < 0 && (s2 < 0) |
| 5478 | // flag[1] bit [8-15] set if (s1 ^ s2) >= 0 && (s1 - s2) >= 0 |
| 5479 | |
| 5480 | // Carry flag |
| 5481 | // flag[0] bit [0- 7] set if (s1 ^ s2) < 0 |
| 5482 | |
| 5483 | // Zero flag |
| 5484 | // flag[0] bit [8-15] set if (s1 ^ s2) < 0 && (s1 + s2) != 0 && (s1 != ~s2) |
| 5485 | // flag[0] bit [8-15] set if (s1 ^ s2) >= 0 && (s1 - s2) != 0 && (s1 != ~s2) |
| 5486 | |
| 5487 | // flag[2] bit [0- 7] set if (s1 ^ s2) < 0 && (s1 + s2) == -1 |
| 5488 | |
| 5489 | // accum set to -s2 if (s1 ^ s2) < 0 && (s1 + s2) <= 0) |
| 5490 | // accum set to -s2 if (s1 ^ s2) >= 0 && (s1 - s2) >= 0 |
| 5491 | |
| 5492 | // accum set to s1 if (s1 ^ s2) < 0 && (s1 + s2) > 0) |
| 5493 | // accum set to s1 if (s1 ^ s2) >= 0 && (s1 - s2) < 0 |
| 5494 | |
| 5495 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 5496 | __m128i s1_xor_s2 = _mm_xor_si128(m_xv[VS1REG], shuf); |
| 5497 | __m128i s1_plus_s2 = _mm_add_epi16(m_xv[VS1REG], shuf); |
| 5498 | __m128i s1_sub_s2 = _mm_sub_epi16(m_xv[VS1REG], shuf); |
| 5499 | __m128i s2_neg = _mm_xor_si128(shuf, vec_neg1); |
| 5500 | |
| 5501 | __m128i s2_lz = _mm_cmplt_epi16(shuf, _mm_setzero_si128()); |
| 5502 | __m128i s1s2_xor_lz = _mm_cmplt_epi16(s1_xor_s2, _mm_setzero_si128()); |
| 5503 | __m128i s1s2_xor_gez = _mm_xor_si128(s1s2_xor_lz, vec_neg1); |
| 5504 | __m128i s1s2_plus_nz = _mm_xor_si128(_mm_cmpeq_epi16(s1_plus_s2, _mm_setzero_si128()), vec_neg1); |
| 5505 | __m128i s1s2_plus_gz = _mm_cmpgt_epi16(s1_plus_s2, _mm_setzero_si128()); |
| 5506 | __m128i s1s2_plus_lez = _mm_xor_si128(s1s2_plus_gz, vec_neg1); |
| 5507 | __m128i s1s2_plus_n1 = _mm_cmpeq_epi16(s1_plus_s2, vec_neg1); |
| 5508 | __m128i s1s2_sub_nz = _mm_xor_si128(_mm_cmpeq_epi16(s1_sub_s2, _mm_setzero_si128()), vec_neg1); |
| 5509 | __m128i s1s2_sub_lz = _mm_cmplt_epi16(s1_sub_s2, _mm_setzero_si128()); |
| 5510 | __m128i s1s2_sub_gez = _mm_xor_si128(s1s2_sub_lz, vec_neg1); |
| 5511 | __m128i s1_nens2 = _mm_xor_si128(_mm_cmpeq_epi16(m_xv[VS1REG], s2_neg), vec_neg1); |
| 5512 | |
| 5513 | __m128i ext_mask = _mm_and_si128(_mm_and_si128(s1s2_xor_lz, s1s2_plus_n1), vec_flagmask); |
| 5514 | m_flag[2] |= _mm_extract_epi16(ext_mask, 0) << 0; |
| 5515 | m_flag[2] |= _mm_extract_epi16(ext_mask, 1) << 1; |
| 5516 | m_flag[2] |= _mm_extract_epi16(ext_mask, 2) << 2; |
| 5517 | m_flag[2] |= _mm_extract_epi16(ext_mask, 3) << 3; |
| 5518 | m_flag[2] |= _mm_extract_epi16(ext_mask, 4) << 4; |
| 5519 | m_flag[2] |= _mm_extract_epi16(ext_mask, 5) << 5; |
| 5520 | m_flag[2] |= _mm_extract_epi16(ext_mask, 6) << 6; |
| 5521 | m_flag[2] |= _mm_extract_epi16(ext_mask, 7) << 7; |
| 5522 | |
| 5523 | __m128i carry_mask = _mm_and_si128(s1s2_xor_lz, vec_flagmask); |
| 5524 | m_flag[0] |= _mm_extract_epi16(carry_mask, 0) << 0; |
| 5525 | m_flag[0] |= _mm_extract_epi16(carry_mask, 1) << 1; |
| 5526 | m_flag[0] |= _mm_extract_epi16(carry_mask, 2) << 2; |
| 5527 | m_flag[0] |= _mm_extract_epi16(carry_mask, 3) << 3; |
| 5528 | m_flag[0] |= _mm_extract_epi16(carry_mask, 4) << 4; |
| 5529 | m_flag[0] |= _mm_extract_epi16(carry_mask, 5) << 5; |
| 5530 | m_flag[0] |= _mm_extract_epi16(carry_mask, 6) << 6; |
| 5531 | m_flag[0] |= _mm_extract_epi16(carry_mask, 7) << 7; |
| 5532 | |
| 5533 | __m128i z0_mask = _mm_and_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_nz), s1_nens2); |
| 5534 | __m128i z1_mask = _mm_and_si128(_mm_and_si128(s1s2_xor_lz, s1s2_plus_nz), s1_nens2); |
| 5535 | __m128i z_mask = _mm_and_si128(_mm_or_si128(z0_mask, z1_mask), vec_flagmask); |
| 5536 | z_mask = _mm_and_si128(_mm_or_si128(z_mask, _mm_srli_epi32(z_mask, 15)), vec_shiftmask2); |
| 5537 | z_mask = _mm_and_si128(_mm_or_si128(z_mask, _mm_srli_epi64(z_mask, 30)), vec_shiftmask4); |
| 5538 | z_mask = _mm_or_si128(z_mask, _mm_srli_si128(z_mask, 7)); |
| 5539 | z_mask = _mm_or_si128(z_mask, _mm_srli_epi16(z_mask, 4)); |
| 5540 | m_flag[0] |= (_mm_extract_epi16(z_mask, 0) << 8) & 0x00ff00; |
| 5541 | |
| 5542 | __m128i f0_mask = _mm_and_si128(_mm_or_si128(_mm_and_si128(s1s2_xor_gez, s2_lz), _mm_and_si128(s1s2_xor_lz, s1s2_plus_lez)), vec_flagmask); |
| 5543 | __m128i f8_mask = _mm_and_si128(_mm_or_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_gez), _mm_and_si128(s1s2_xor_lz, s2_lz)), vec_flagmask); |
| 5544 | f0_mask = _mm_and_si128(f0_mask, vec_flagmask); |
| 5545 | f8_mask = _mm_and_si128(f8_mask, vec_flagmask); |
| 5546 | m_flag[1] |= _mm_extract_epi16(f0_mask, 0) << 0; |
| 5547 | m_flag[1] |= _mm_extract_epi16(f0_mask, 1) << 1; |
| 5548 | m_flag[1] |= _mm_extract_epi16(f0_mask, 2) << 2; |
| 5549 | m_flag[1] |= _mm_extract_epi16(f0_mask, 3) << 3; |
| 5550 | m_flag[1] |= _mm_extract_epi16(f0_mask, 4) << 4; |
| 5551 | m_flag[1] |= _mm_extract_epi16(f0_mask, 5) << 5; |
| 5552 | m_flag[1] |= _mm_extract_epi16(f0_mask, 6) << 6; |
| 5553 | m_flag[1] |= _mm_extract_epi16(f0_mask, 7) << 7; |
| 5554 | |
| 5555 | m_flag[1] |= _mm_extract_epi16(f8_mask, 0) << 8; |
| 5556 | m_flag[1] |= _mm_extract_epi16(f8_mask, 1) << 9; |
| 5557 | m_flag[1] |= _mm_extract_epi16(f8_mask, 2) << 10; |
| 5558 | m_flag[1] |= _mm_extract_epi16(f8_mask, 3) << 11; |
| 5559 | m_flag[1] |= _mm_extract_epi16(f8_mask, 4) << 12; |
| 5560 | m_flag[1] |= _mm_extract_epi16(f8_mask, 5) << 13; |
| 5561 | m_flag[1] |= _mm_extract_epi16(f8_mask, 6) << 14; |
| 5562 | m_flag[1] |= _mm_extract_epi16(f8_mask, 7) << 15; |
| 5563 | #endif |
| 5564 | INT16 vres[8]; |
| 5565 | UINT32 vce = 0; |
| 5566 | for (int i = 0; i < 8; i++) |
| 5567 | { |
| 5568 | INT16 s1, s2; |
| 5569 | VEC_GET_SCALAR_VS1(s1, i); |
| 5570 | VEC_GET_SCALAR_VS2(s2, i); |
| 5571 | |
| 5572 | if ((s1 ^ s2) < 0) |
| 5573 | { |
| 5574 | vce = (s1 + s2 == -1); |
| 5575 | VEC_SET_CARRY_FLAG(i); |
| 5576 | if (s2 < 0) |
| 5577 | { |
| 5578 | VEC_SET_CLIP2_FLAG(i); |
| 5579 | } |
| 5580 | |
| 5581 | if ((s1 + s2) <= 0) |
| 5582 | { |
| 5583 | VEC_SET_COMPARE_FLAG(i); |
| 5584 | vres[i] = -((UINT16)s2); |
| 5585 | } |
| 5586 | else |
| 5587 | { |
| 5588 | vres[i] = s1; |
| 5589 | } |
| 5590 | |
| 5591 | if ((s1 + s2) != 0 && s1 != ~s2) |
| 5592 | { |
| 5593 | VEC_SET_ZERO_FLAG(i); |
| 5594 | } |
| 5595 | }//sign |
| 5596 | else |
| 5597 | { |
| 5598 | vce = 0; |
| 5599 | if (s2 < 0) |
| 5600 | { |
| 5601 | VEC_SET_COMPARE_FLAG(i); |
| 5602 | } |
| 5603 | if ((s1 - s2) >= 0) |
| 5604 | { |
| 5605 | VEC_SET_CLIP2_FLAG(i); |
| 5606 | vres[i] = s2; |
| 5607 | } |
| 5608 | else |
| 5609 | { |
| 5610 | vres[i] = s1; |
| 5611 | } |
| 5612 | |
| 5613 | if ((s1 - s2) != 0 && s1 != ~s2) |
| 5614 | { |
| 5615 | VEC_SET_ZERO_FLAG(i); |
| 5616 | } |
| 5617 | } |
| 5618 | if (vce) |
| 5619 | { |
| 5620 | VEC_SET_CLIP1_FLAG(i); |
| 5621 | } |
| 5622 | VEC_SET_ACCUM_L(vres[i], i); |
| 5623 | } |
| 5624 | VEC_WRITEBACK_RESULT(); |
| 5625 | } |
| 5626 | |
| 5627 | static void cfunc_rsp_vch_simd(void *param) |
| 5628 | { |
| 5629 | ((rsp_device *)param)->ccfunc_rsp_vch_simd(); |
| 5630 | } |
| 5631 | #endif |
| 5632 | |
| 5633 | #if (!USE_SIMD || SIMUL_SIMD) |
| 5634 | |
| 5635 | inline void rsp_device::ccfunc_rsp_vch_scalar() |
| 5636 | { |
| 5637 | int op = m_rsp_state->arg0; |
| 5638 | |
| 5639 | CLEAR_CARRY_FLAGS(); |
| 5640 | CLEAR_COMPARE_FLAGS(); |
| 5641 | CLEAR_CLIP1_FLAGS(); |
| 5642 | CLEAR_ZERO_FLAGS(); |
| 5643 | CLEAR_CLIP2_FLAGS(); |
| 5644 | |
| 5645 | INT16 vres[8]; |
| 5646 | UINT32 vce = 0; |
| 5647 | for (int i = 0; i < 8; i++) |
| 5648 | { |
| 5649 | INT16 s1, s2; |
| 5650 | SCALAR_GET_VS1(s1, i); |
| 5651 | SCALAR_GET_VS2(s2, i); |
| 5652 | |
| 5653 | if ((s1 ^ s2) < 0) |
| 5654 | { |
| 5655 | vce = (s1 + s2 == -1); |
| 5656 | SET_CARRY_FLAG(i); |
| 5657 | if (s2 < 0) |
| 5658 | { |
| 5659 | SET_CLIP2_FLAG(i); |
| 5660 | } |
| 5661 | |
| 5662 | if ((s1 + s2) <= 0) |
| 5663 | { |
| 5664 | SET_COMPARE_FLAG(i); |
| 5665 | vres[i] = -((UINT16)s2); |
| 5666 | } |
| 5667 | else |
| 5668 | { |
| 5669 | vres[i] = s1; |
| 5670 | } |
| 5671 | |
| 5672 | if ((s1 + s2) != 0 && s1 != ~s2) |
| 5673 | { |
| 5674 | SET_ZERO_FLAG(i); |
| 5675 | } |
| 5676 | }//sign |
| 5677 | else |
| 5678 | { |
| 5679 | vce = 0; |
| 5680 | if (s2 < 0) |
| 5681 | { |
| 5682 | SET_COMPARE_FLAG(i); |
| 5683 | } |
| 5684 | if ((s1 - s2) >= 0) |
| 5685 | { |
| 5686 | SET_CLIP2_FLAG(i); |
| 5687 | vres[i] = s2; |
| 5688 | } |
| 5689 | else |
| 5690 | { |
| 5691 | vres[i] = s1; |
| 5692 | } |
| 5693 | |
| 5694 | if ((s1 - s2) != 0 && s1 != ~s2) |
| 5695 | { |
| 5696 | SET_ZERO_FLAG(i); |
| 5697 | } |
| 5698 | } |
| 5699 | if (vce) |
| 5700 | { |
| 5701 | SET_CLIP1_FLAG(i); |
| 5702 | } |
| 5703 | SET_ACCUM_L(vres[i], i); |
| 5704 | } |
| 5705 | WRITEBACK_RESULT(); |
| 5706 | } |
| 5707 | |
| 5708 | static void cfunc_rsp_vch_scalar(void *param) |
| 5709 | { |
| 5710 | ((rsp_device *)param)->ccfunc_rsp_vch_scalar(); |
| 5711 | } |
| 5712 | #endif |
| 5713 | |
| 5714 | #if USE_SIMD |
| 5715 | // VCR |
| 5716 | // |
| 5717 | // 31 25 24 20 15 10 5 0 |
| 5718 | // ------------------------------------------------------ |
| 5719 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100110 | |
| 5720 | // ------------------------------------------------------ |
| 5721 | // |
| 5722 | // Vector clip reverse |
| 5723 | |
| 5724 | inline void rsp_device::ccfunc_rsp_vcr_simd() |
| 5725 | { |
| 5726 | int op = m_rsp_state->arg0; |
| 5727 | |
| 5728 | VEC_CLEAR_CARRY_FLAGS(); |
| 5729 | VEC_CLEAR_COMPARE_FLAGS(); |
| 5730 | VEC_CLEAR_CLIP1_FLAGS(); |
| 5731 | VEC_CLEAR_ZERO_FLAGS(); |
| 5732 | VEC_CLEAR_CLIP2_FLAGS(); |
| 5733 | |
| 5734 | #if 0 |
| 5735 | // flag[1] bit [0- 7] set if (s1 ^ s2) < 0 && (s1 + s2) <= 0) |
| 5736 | // flag[1] bit [0- 7] set if (s1 ^ s2) >= 0 && (s2 < 0) |
| 5737 | |
| 5738 | // flag[1] bit [8-15] set if (s1 ^ s2) < 0 && (s2 < 0) |
| 5739 | // flag[1] bit [8-15] set if (s1 ^ s2) >= 0 && (s1 - s2) >= 0 |
| 5740 | |
| 5741 | // accum set to ~s2 if (s1 ^ s2) < 0 && (s1 + s2) <= 0) |
| 5742 | // accum set to ~s2 if (s1 ^ s2) >= 0 && (s1 - s2) >= 0 |
| 5743 | |
| 5744 | // accum set to s1 if (s1 ^ s2) < 0 && (s1 + s2) > 0) |
| 5745 | // accum set to s1 if (s1 ^ s2) >= 0 && (s1 - s2) < 0 |
| 5746 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 5747 | __m128i s1_xor_s2 = _mm_xor_si128(m_xv[VS1REG], shuf); |
| 5748 | __m128i s1_plus_s2 = _mm_add_epi16(m_xv[VS1REG], shuf); |
| 5749 | __m128i s1_sub_s2 = _mm_sub_epi16(m_xv[VS1REG], shuf); |
| 5750 | __m128i s2_neg = _mm_xor_si128(shuf, vec_neg1); |
| 5751 | |
| 5752 | __m128i s2_lz = _mm_cmplt_epi16(shuf, _mm_setzero_si128()); |
| 5753 | __m128i s1s2_xor_lz = _mm_cmplt_epi16(s1_xor_s2, _mm_setzero_si128()); |
| 5754 | __m128i s1s2_xor_gez = _mm_xor_si128(s1s2_xor_lz, vec_neg1); |
| 5755 | __m128i s1s2_plus_gz = _mm_cmpgt_epi16(s1_plus_s2, _mm_setzero_si128()); |
| 5756 | __m128i s1s2_plus_lez = _mm_xor_si128(s1s2_plus_gz, vec_neg1); |
| 5757 | __m128i s1s2_sub_lz = _mm_cmplt_epi16(s1_sub_s2, _mm_setzero_si128()); |
| 5758 | __m128i s1s2_sub_gez = _mm_xor_si128(s1s2_sub_lz, vec_neg1); |
| 5759 | |
| 5760 | __m128i s1_mask = _mm_or_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_lz), _mm_and_si128(s1s2_xor_lz, s1s2_plus_gz)); |
| 5761 | __m128i s2_mask = _mm_or_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_gez), _mm_and_si128(s1s2_xor_lz, s1s2_plus_lez)); |
| 5762 | m_accum_l = _mm_or_si128(_mm_and_si128(m_xv[VS1REG], s1_mask), _mm_and_si128(s2_neg, s2_mask)); |
| 5763 | m_xv[VDREG] = m_accum_l; |
| 5764 | |
| 5765 | m_xvflag[COMPARE] = _mm_or_si128(_mm_and_si128(s1s2_xor_gez, s2_lz), _mm_and_si128(s1s2_xor_lz, s1s2_plus_lez)); |
| 5766 | m_xvflag[CLIP2] = _mm_or_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_gez), _mm_and_si128(s1s2_xor_lz, s2_lz)); |
| 5767 | #endif |
| 5768 | INT16 vres[8]; |
| 5769 | for (int i = 0; i < 8; i++) |
| 5770 | { |
| 5771 | INT16 s1, s2; |
| 5772 | VEC_GET_SCALAR_VS1(s1, i); |
| 5773 | VEC_GET_SCALAR_VS2(s2, i); |
| 5774 | |
| 5775 | if ((INT16)(s1 ^ s2) < 0) |
| 5776 | { |
| 5777 | if (s2 < 0) |
| 5778 | { |
| 5779 | VEC_SET_CLIP2_FLAG(i); |
| 5780 | } |
| 5781 | if ((s1 + s2) <= 0) |
| 5782 | { |
| 5783 | VEC_SET_ACCUM_L(~((UINT16)s2), i); |
| 5784 | VEC_SET_COMPARE_FLAG(i); |
| 5785 | } |
| 5786 | else |
| 5787 | { |
| 5788 | VEC_SET_ACCUM_L(s1, i); |
| 5789 | } |
| 5790 | } |
| 5791 | else |
| 5792 | { |
| 5793 | if (s2 < 0) |
| 5794 | { |
| 5795 | VEC_SET_COMPARE_FLAG(i); |
| 5796 | } |
| 5797 | if ((s1 - s2) >= 0) |
| 5798 | { |
| 5799 | VEC_SET_ACCUM_L(s2, i); |
| 5800 | VEC_SET_CLIP2_FLAG(i); |
| 5801 | } |
| 5802 | else |
| 5803 | { |
| 5804 | VEC_SET_ACCUM_L(s1, i); |
| 5805 | } |
| 5806 | } |
| 5807 | |
| 5808 | vres[i] = VEC_ACCUM_L(i); |
| 5809 | } |
| 5810 | VEC_WRITEBACK_RESULT(); |
| 5811 | } |
| 5812 | |
| 5813 | static void cfunc_rsp_vcr_simd(void *param) |
| 5814 | { |
| 5815 | ((rsp_device *)param)->ccfunc_rsp_vcr_simd(); |
| 5816 | } |
| 5817 | #endif |
| 5818 | |
| 5819 | #if (!USE_SIMD || SIMUL_SIMD) |
| 5820 | |
| 5821 | inline void rsp_device::ccfunc_rsp_vcr_scalar() |
| 5822 | { |
| 5823 | int op = m_rsp_state->arg0; |
| 5824 | |
| 5825 | CLEAR_CARRY_FLAGS(); |
| 5826 | CLEAR_COMPARE_FLAGS(); |
| 5827 | CLEAR_CLIP1_FLAGS(); |
| 5828 | CLEAR_ZERO_FLAGS(); |
| 5829 | CLEAR_CLIP2_FLAGS(); |
| 5830 | |
| 5831 | INT16 vres[8]; |
| 5832 | for (int i = 0; i < 8; i++) |
| 5833 | { |
| 5834 | INT16 s1, s2; |
| 5835 | SCALAR_GET_VS1(s1, i); |
| 5836 | SCALAR_GET_VS2(s2, i); |
| 5837 | |
| 5838 | if ((INT16)(s1 ^ s2) < 0) |
| 5839 | { |
| 5840 | if (s2 < 0) |
| 5841 | { |
| 5842 | SET_CLIP2_FLAG(i); |
| 5843 | } |
| 5844 | if ((s1 + s2) <= 0) |
| 5845 | { |
| 5846 | SET_ACCUM_L(~((UINT16)s2), i); |
| 5847 | SET_COMPARE_FLAG(i); |
| 5848 | } |
| 5849 | else |
| 5850 | { |
| 5851 | SET_ACCUM_L(s1, i); |
| 5852 | } |
| 5853 | } |
| 5854 | else |
| 5855 | { |
| 5856 | if (s2 < 0) |
| 5857 | { |
| 5858 | SET_COMPARE_FLAG(i); |
| 5859 | } |
| 5860 | if ((s1 - s2) >= 0) |
| 5861 | { |
| 5862 | SET_ACCUM_L(s2, i); |
| 5863 | SET_CLIP2_FLAG(i); |
| 5864 | } |
| 5865 | else |
| 5866 | { |
| 5867 | SET_ACCUM_L(s1, i); |
| 5868 | } |
| 5869 | } |
| 5870 | |
| 5871 | vres[i] = ACCUM_L(i); |
| 5872 | } |
| 5873 | WRITEBACK_RESULT(); |
| 5874 | } |
| 5875 | |
| 5876 | static void cfunc_rsp_vcr_scalar(void *param) |
| 5877 | { |
| 5878 | ((rsp_device *)param)->ccfunc_rsp_vcr_scalar(); |
| 5879 | } |
| 5880 | #endif |
| 5881 | |
| 5882 | #if USE_SIMD |
| 5883 | // VMRG |
| 5884 | // |
| 5885 | // 31 25 24 20 15 10 5 0 |
| 5886 | // ------------------------------------------------------ |
| 5887 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100111 | |
| 5888 | // ------------------------------------------------------ |
| 5889 | // |
| 5890 | // Merges two vectors according to compare flags |
| 5891 | |
| 5892 | inline void rsp_device::ccfunc_rsp_vmrg_simd() |
| 5893 | { |
| 5894 | int op = m_rsp_state->arg0; |
| 5895 | |
| 5896 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 5897 | __m128i s2mask = _mm_cmpeq_epi16(m_xvflag[COMPARE], _mm_setzero_si128()); |
| 5898 | __m128i s1mask = _mm_xor_si128(s2mask, vec_neg1); |
| 5899 | __m128i result = _mm_and_si128(m_xv[VS1REG], s1mask); |
| 5900 | m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, s2mask)); |
| 5901 | m_accum_l = m_xv[VDREG]; |
| 5902 | } |
| 5903 | |
| 5904 | static void cfunc_rsp_vmrg_simd(void *param) |
| 5905 | { |
| 5906 | ((rsp_device *)param)->ccfunc_rsp_vmrg_simd(); |
| 5907 | } |
| 5908 | #endif |
| 5909 | |
| 5910 | #if (!USE_SIMD || SIMUL_SIMD) |
| 5911 | |
| 5912 | inline void rsp_device::ccfunc_rsp_vmrg_scalar() |
| 5913 | { |
| 5914 | int op = m_rsp_state->arg0; |
| 5915 | |
| 5916 | INT16 vres[8]; |
| 5917 | for (int i = 0; i < 8; i++) |
| 5918 | { |
| 5919 | INT16 s1, s2; |
| 5920 | SCALAR_GET_VS1(s1, i); |
| 5921 | SCALAR_GET_VS2(s2, i); |
| 5922 | if (COMPARE_FLAG(i) != 0) |
| 5923 | { |
| 5924 | vres[i] = s1; |
| 5925 | } |
| 5926 | else |
| 5927 | { |
| 5928 | vres[i] = s2; |
| 5929 | } |
| 5930 | |
| 5931 | SET_ACCUM_L(vres[i], i); |
| 5932 | } |
| 5933 | WRITEBACK_RESULT(); |
| 5934 | } |
| 5935 | |
| 5936 | static void cfunc_rsp_vmrg_scalar(void *param) |
| 5937 | { |
| 5938 | ((rsp_device *)param)->ccfunc_rsp_vmrg_scalar(); |
| 5939 | } |
| 5940 | #endif |
| 5941 | |
| 5942 | #if USE_SIMD |
| 5943 | // VAND |
| 5944 | // |
| 5945 | // 31 25 24 20 15 10 5 0 |
| 5946 | // ------------------------------------------------------ |
| 5947 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101000 | |
| 5948 | // ------------------------------------------------------ |
| 5949 | // |
| 5950 | // Bitwise AND of two vector registers |
| 5951 | |
| 5952 | inline void rsp_device::ccfunc_rsp_vand_simd() |
| 5953 | { |
| 5954 | int op = m_rsp_state->arg0; |
| 5955 | |
| 5956 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 5957 | m_xv[VDREG] = _mm_and_si128(m_xv[VS1REG], shuf); |
| 5958 | m_accum_l = m_xv[VDREG]; |
| 5959 | } |
| 5960 | |
| 5961 | static void cfunc_rsp_vand_simd(void *param) |
| 5962 | { |
| 5963 | ((rsp_device *)param)->ccfunc_rsp_vand_simd(); |
| 5964 | } |
| 5965 | #endif |
| 5966 | |
| 5967 | #if (!USE_SIMD || SIMUL_SIMD) |
| 5968 | |
| 5969 | inline void rsp_device::ccfunc_rsp_vand_scalar() |
| 5970 | { |
| 5971 | int op = m_rsp_state->arg0; |
| 5972 | |
| 5973 | INT16 vres[8]; |
| 5974 | for (int i = 0; i < 8; i++) |
| 5975 | { |
| 5976 | UINT16 s1, s2; |
| 5977 | SCALAR_GET_VS1(s1, i); |
| 5978 | SCALAR_GET_VS2(s2, i); |
| 5979 | vres[i] = s1 & s2; |
| 5980 | SET_ACCUM_L(vres[i], i); |
| 5981 | } |
| 5982 | WRITEBACK_RESULT(); |
| 5983 | } |
| 5984 | |
| 5985 | static void cfunc_rsp_vand_scalar(void *param) |
| 5986 | { |
| 5987 | ((rsp_device *)param)->ccfunc_rsp_vand_scalar(); |
| 5988 | } |
| 5989 | #endif |
| 5990 | |
| 5991 | #if USE_SIMD |
| 5992 | // VNAND |
| 5993 | // |
| 5994 | // 31 25 24 20 15 10 5 0 |
| 5995 | // ------------------------------------------------------ |
| 5996 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101001 | |
| 5997 | // ------------------------------------------------------ |
| 5998 | // |
| 5999 | // Bitwise NOT AND of two vector registers |
| 6000 | |
| 6001 | inline void rsp_device::ccfunc_rsp_vnand_simd() |
| 6002 | { |
| 6003 | int op = m_rsp_state->arg0; |
| 6004 | |
| 6005 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6006 | m_xv[VDREG] = _mm_xor_si128(_mm_and_si128(m_xv[VS1REG], shuf), vec_neg1); |
| 6007 | m_accum_l = m_xv[VDREG]; |
| 6008 | } |
| 6009 | |
| 6010 | static void cfunc_rsp_vnand_simd(void *param) |
| 6011 | { |
| 6012 | ((rsp_device *)param)->ccfunc_rsp_vnand_simd(); |
| 6013 | } |
| 6014 | #endif |
| 6015 | |
| 6016 | #if (!USE_SIMD || SIMUL_SIMD) |
| 6017 | |
| 6018 | inline void rsp_device::ccfunc_rsp_vnand_scalar() |
| 6019 | { |
| 6020 | int op = m_rsp_state->arg0; |
| 6021 | |
| 6022 | INT16 vres[8]; |
| 6023 | for (int i = 0; i < 8; i++) |
| 6024 | { |
| 6025 | UINT16 s1, s2; |
| 6026 | SCALAR_GET_VS1(s1, i); |
| 6027 | SCALAR_GET_VS2(s2, i); |
| 6028 | vres[i] = ~((s1 & s2)); |
| 6029 | SET_ACCUM_L(vres[i], i); |
| 6030 | } |
| 6031 | WRITEBACK_RESULT(); |
| 6032 | } |
| 6033 | |
| 6034 | static void cfunc_rsp_vnand_scalar(void *param) |
| 6035 | { |
| 6036 | ((rsp_device *)param)->ccfunc_rsp_vnand_scalar(); |
| 6037 | } |
| 6038 | #endif |
| 6039 | |
| 6040 | #if USE_SIMD |
| 6041 | // VOR |
| 6042 | // |
| 6043 | // 31 25 24 20 15 10 5 0 |
| 6044 | // ------------------------------------------------------ |
| 6045 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101010 | |
| 6046 | // ------------------------------------------------------ |
| 6047 | // |
| 6048 | // Bitwise OR of two vector registers |
| 6049 | |
| 6050 | inline void rsp_device::ccfunc_rsp_vor_simd() |
| 6051 | { |
| 6052 | int op = m_rsp_state->arg0; |
| 6053 | |
| 6054 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6055 | m_xv[VDREG] = _mm_or_si128(m_xv[VS1REG], shuf); |
| 6056 | m_accum_l = m_xv[VDREG]; |
| 6057 | } |
| 6058 | |
| 6059 | static void cfunc_rsp_vor_simd(void *param) |
| 6060 | { |
| 6061 | ((rsp_device *)param)->ccfunc_rsp_vor_simd(); |
| 6062 | } |
| 6063 | #endif |
| 6064 | |
| 6065 | #if (!USE_SIMD || SIMUL_SIMD) |
| 6066 | |
| 6067 | inline void rsp_device::ccfunc_rsp_vor_scalar() |
| 6068 | { |
| 6069 | int op = m_rsp_state->arg0; |
| 6070 | |
| 6071 | INT16 vres[8]; |
| 6072 | for (int i = 0; i < 8; i++) |
| 6073 | { |
| 6074 | UINT16 s1, s2; |
| 6075 | SCALAR_GET_VS1(s1, i); |
| 6076 | SCALAR_GET_VS2(s2, i); |
| 6077 | vres[i] = s1 | s2; |
| 6078 | SET_ACCUM_L(vres[i], i); |
| 6079 | } |
| 6080 | WRITEBACK_RESULT(); |
| 6081 | } |
| 6082 | |
| 6083 | static void cfunc_rsp_vor_scalar(void *param) |
| 6084 | { |
| 6085 | ((rsp_device *)param)->ccfunc_rsp_vor_scalar(); |
| 6086 | } |
| 6087 | #endif |
| 6088 | |
| 6089 | #if USE_SIMD |
| 6090 | // VNOR |
| 6091 | // |
| 6092 | // 31 25 24 20 15 10 5 0 |
| 6093 | // ------------------------------------------------------ |
| 6094 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101011 | |
| 6095 | // ------------------------------------------------------ |
| 6096 | // |
| 6097 | // Bitwise NOT OR of two vector registers |
| 6098 | |
| 6099 | inline void rsp_device::ccfunc_rsp_vnor_simd() |
| 6100 | { |
| 6101 | int op = m_rsp_state->arg0; |
| 6102 | |
| 6103 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6104 | m_xv[VDREG] = _mm_xor_si128(_mm_or_si128(m_xv[VS1REG], shuf), vec_neg1); |
| 6105 | m_accum_l = m_xv[VDREG]; |
| 6106 | } |
| 6107 | |
| 6108 | static void cfunc_rsp_vnor_simd(void *param) |
| 6109 | { |
| 6110 | ((rsp_device *)param)->ccfunc_rsp_vnor_simd(); |
| 6111 | } |
| 6112 | #endif |
| 6113 | |
| 6114 | #if (!USE_SIMD || SIMUL_SIMD) |
| 6115 | |
| 6116 | inline void rsp_device::ccfunc_rsp_vnor_scalar() |
| 6117 | { |
| 6118 | int op = m_rsp_state->arg0; |
| 6119 | |
| 6120 | INT16 vres[8]; |
| 6121 | for (int i = 0; i < 8; i++) |
| 6122 | { |
| 6123 | UINT16 s1, s2; |
| 6124 | SCALAR_GET_VS1(s1, i); |
| 6125 | SCALAR_GET_VS2(s2, i); |
| 6126 | vres[i] = ~(s1 | s2); |
| 6127 | SET_ACCUM_L(vres[i], i); |
| 6128 | } |
| 6129 | WRITEBACK_RESULT(); |
| 6130 | } |
| 6131 | |
| 6132 | static void cfunc_rsp_vnor_scalar(void *param) |
| 6133 | { |
| 6134 | ((rsp_device *)param)->ccfunc_rsp_vnor_scalar(); |
| 6135 | } |
| 6136 | #endif |
| 6137 | |
| 6138 | #if USE_SIMD |
| 6139 | // VXOR |
| 6140 | // |
| 6141 | // 31 25 24 20 15 10 5 0 |
| 6142 | // ------------------------------------------------------ |
| 6143 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101100 | |
| 6144 | // ------------------------------------------------------ |
| 6145 | // |
| 6146 | // Bitwise XOR of two vector registers |
| 6147 | |
| 6148 | inline void rsp_device::ccfunc_rsp_vxor_simd() |
| 6149 | { |
| 6150 | int op = m_rsp_state->arg0; |
| 6151 | |
| 6152 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6153 | m_xv[VDREG] = _mm_xor_si128(m_xv[VS1REG], shuf); |
| 6154 | m_accum_l = m_xv[VDREG]; |
| 6155 | } |
| 6156 | |
| 6157 | static void cfunc_rsp_vxor_simd(void *param) |
| 6158 | { |
| 6159 | ((rsp_device *)param)->ccfunc_rsp_vxor_simd(); |
| 6160 | } |
| 6161 | #endif |
| 6162 | |
| 6163 | #if (!USE_SIMD || SIMUL_SIMD) |
| 6164 | |
| 6165 | inline void rsp_device::ccfunc_rsp_vxor_scalar() |
| 6166 | { |
| 6167 | int op = m_rsp_state->arg0; |
| 6168 | |
| 6169 | INT16 vres[8]; |
| 6170 | for (int i = 0; i < 8; i++) |
| 6171 | { |
| 6172 | UINT16 s1, s2; |
| 6173 | SCALAR_GET_VS1(s1, i); |
| 6174 | SCALAR_GET_VS2(s2, i); |
| 6175 | vres[i] = s1 ^ s2; |
| 6176 | SET_ACCUM_L(vres[i], i); |
| 6177 | } |
| 6178 | WRITEBACK_RESULT(); |
| 6179 | } |
| 6180 | |
| 6181 | static void cfunc_rsp_vxor_scalar(void *param) |
| 6182 | { |
| 6183 | ((rsp_device *)param)->ccfunc_rsp_vxor_scalar(); |
| 6184 | } |
| 6185 | #endif |
| 6186 | |
| 6187 | #if USE_SIMD |
| 6188 | // VNXOR |
| 6189 | // |
| 6190 | // 31 25 24 20 15 10 5 0 |
| 6191 | // ------------------------------------------------------ |
| 6192 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101101 | |
| 6193 | // ------------------------------------------------------ |
| 6194 | // |
| 6195 | // Bitwise NOT XOR of two vector registers |
| 6196 | |
| 6197 | inline void rsp_device::ccfunc_rsp_vnxor_simd() |
| 6198 | { |
| 6199 | int op = m_rsp_state->arg0; |
| 6200 | |
| 6201 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6202 | m_xv[VDREG] = _mm_xor_si128(_mm_xor_si128(m_xv[VS1REG], shuf), vec_neg1); |
| 6203 | m_accum_l = m_xv[VDREG]; |
| 6204 | } |
| 6205 | |
| 6206 | static void cfunc_rsp_vnxor_simd(void *param) |
| 6207 | { |
| 6208 | ((rsp_device *)param)->ccfunc_rsp_vnxor_simd(); |
| 6209 | } |
| 6210 | #endif |
| 6211 | |
| 6212 | #if (!USE_SIMD || SIMUL_SIMD) |
| 6213 | |
| 6214 | inline void rsp_device::ccfunc_rsp_vnxor_scalar() |
| 6215 | { |
| 6216 | int op = m_rsp_state->arg0; |
| 6217 | |
| 6218 | INT16 vres[8]; |
| 6219 | for (int i = 0; i < 8; i++) |
| 6220 | { |
| 6221 | UINT16 s1, s2; |
| 6222 | SCALAR_GET_VS1(s1, i); |
| 6223 | SCALAR_GET_VS2(s2, i); |
| 6224 | vres[i] = ~(s1 ^ s2); |
| 6225 | SET_ACCUM_L(vres[i], i); |
| 6226 | } |
| 6227 | WRITEBACK_RESULT(); |
| 6228 | } |
| 6229 | |
| 6230 | static void cfunc_rsp_vnxor_scalar(void *param) |
| 6231 | { |
| 6232 | ((rsp_device *)param)->ccfunc_rsp_vnxor_scalar(); |
| 6233 | } |
| 6234 | #endif |
| 6235 | |
| 6236 | #if USE_SIMD |
| 6237 | // VRCP |
| 6238 | // |
| 6239 | // 31 25 24 20 15 10 5 0 |
| 6240 | // ------------------------------------------------------ |
| 6241 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110000 | |
| 6242 | // ------------------------------------------------------ |
| 6243 | // |
| 6244 | // Calculates reciprocal |
| 6245 | |
| 6246 | inline void rsp_device::ccfunc_rsp_vrcp_simd() |
| 6247 | { |
| 6248 | int op = m_rsp_state->arg0; |
| 6249 | |
| 6250 | INT32 shifter = 0; |
| 6251 | UINT16 urec; |
| 6252 | INT32 rec; |
| 6253 | SIMD_EXTRACT16(m_xv[VS2REG], urec, EL); |
| 6254 | rec = (INT16)urec; |
| 6255 | INT32 datainput = (rec < 0) ? (-rec) : rec; |
| 6256 | if (datainput) |
| 6257 | { |
| 6258 | for (int i = 0; i < 32; i++) |
| 6259 | { |
| 6260 | if (datainput & (1 << ((~i) & 0x1f))) |
| 6261 | { |
| 6262 | shifter = i; |
| 6263 | break; |
| 6264 | } |
| 6265 | } |
| 6266 | } |
| 6267 | else |
| 6268 | { |
| 6269 | shifter = 0x10; |
| 6270 | } |
| 6271 | |
| 6272 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 6273 | INT32 fetchval = rsp_divtable[address]; |
| 6274 | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 6275 | if (rec < 0) |
| 6276 | { |
| 6277 | temp = ~temp; |
| 6278 | } |
| 6279 | if (!rec) |
| 6280 | { |
| 6281 | temp = 0x7fffffff; |
| 6282 | } |
| 6283 | else if (rec == 0xffff8000) |
| 6284 | { |
| 6285 | temp = 0xffff0000; |
| 6286 | } |
| 6287 | rec = temp; |
| 6288 | |
| 6289 | m_reciprocal_res = rec; |
| 6290 | m_dp_allowed = 0; |
| 6291 | |
| 6292 | SIMD_INSERT16(m_xv[VDREG], (UINT16)rec, VS1REG); |
| 6293 | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6294 | } |
| 6295 | |
| 6296 | static void cfunc_rsp_vrcp_simd(void *param) |
| 6297 | { |
| 6298 | ((rsp_device *)param)->ccfunc_rsp_vrcp_simd(); |
| 6299 | } |
| 6300 | #endif |
| 6301 | |
| 6302 | #if (!USE_SIMD || SIMUL_SIMD) |
| 6303 | |
| 6304 | inline void rsp_device::ccfunc_rsp_vrcp_scalar() |
| 6305 | { |
| 6306 | int op = m_rsp_state->arg0; |
| 6307 | |
| 6308 | INT32 shifter = 0; |
| 6309 | INT32 rec = (INT16)(VREG_S(VS2REG, EL & 7)); |
| 6310 | INT32 datainput = (rec < 0) ? (-rec) : rec; |
| 6311 | if (datainput) |
| 6312 | { |
| 6313 | for (int i = 0; i < 32; i++) |
| 6314 | { |
| 6315 | if (datainput & (1 << ((~i) & 0x1f))) |
| 6316 | { |
| 6317 | shifter = i; |
| 6318 | break; |
| 6319 | } |
| 6320 | } |
| 6321 | } |
| 6322 | else |
| 6323 | { |
| 6324 | shifter = 0x10; |
| 6325 | } |
| 6326 | |
| 6327 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 6328 | INT32 fetchval = rsp_divtable[address]; |
| 6329 | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 6330 | if (rec < 0) |
| 6331 | { |
| 6332 | temp = ~temp; |
| 6333 | } |
| 6334 | if (!rec) |
| 6335 | { |
| 6336 | temp = 0x7fffffff; |
| 6337 | } |
| 6338 | else if (rec == 0xffff8000) |
| 6339 | { |
| 6340 | temp = 0xffff0000; |
| 6341 | } |
| 6342 | rec = temp; |
| 6343 | |
| 6344 | m_reciprocal_res = rec; |
| 6345 | m_dp_allowed = 0; |
| 6346 | |
| 6347 | W_VREG_S(VDREG, VS1REG & 7) = (UINT16)rec; |
| 6348 | for (int i = 0; i < 8; i++) |
| 6349 | { |
| 6350 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 6351 | } |
| 6352 | } |
| 6353 | |
| 6354 | static void cfunc_rsp_vrcp_scalar(void *param) |
| 6355 | { |
| 6356 | ((rsp_device *)param)->ccfunc_rsp_vrcp_scalar(); |
| 6357 | } |
| 6358 | #endif |
| 6359 | |
| 6360 | #if USE_SIMD |
| 6361 | // VRCPL |
| 6362 | // |
| 6363 | // 31 25 24 20 15 10 5 0 |
| 6364 | // ------------------------------------------------------ |
| 6365 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110001 | |
| 6366 | // ------------------------------------------------------ |
| 6367 | // |
| 6368 | // Calculates reciprocal low part |
| 6369 | |
| 6370 | inline void rsp_device::ccfunc_rsp_vrcpl_simd() |
| 6371 | { |
| 6372 | int op = m_rsp_state->arg0; |
| 6373 | |
| 6374 | #if SIMUL_SIMD |
| 6375 | m_old_reciprocal_res = m_reciprocal_res; |
| 6376 | m_old_reciprocal_high = m_reciprocal_high; |
| 6377 | m_old_dp_allowed = m_dp_allowed; |
| 6378 | #endif |
| 6379 | |
| 6380 | INT32 shifter = 0; |
| 6381 | |
| 6382 | UINT16 urec; |
| 6383 | SIMD_EXTRACT16(m_xv[VS2REG], urec, EL); |
| 6384 | INT32 rec = (INT16)urec; |
| 6385 | INT32 datainput = rec; |
| 6386 | |
| 6387 | if (m_dp_allowed) |
| 6388 | { |
| 6389 | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 6390 | datainput = rec; |
| 6391 | |
| 6392 | if (rec < 0) |
| 6393 | { |
| 6394 | if (rec < -32768) |
| 6395 | { |
| 6396 | datainput = ~datainput; |
| 6397 | } |
| 6398 | else |
| 6399 | { |
| 6400 | datainput = -datainput; |
| 6401 | } |
| 6402 | } |
| 6403 | } |
| 6404 | else if (datainput < 0) |
| 6405 | { |
| 6406 | datainput = -datainput; |
| 6407 | |
| 6408 | shifter = 0x10; |
| 6409 | } |
| 6410 | |
| 6411 | if (datainput) |
| 6412 | { |
| 6413 | for (int i = 0; i < 32; i++) |
| 6414 | { |
| 6415 | if (datainput & (1 << ((~i) & 0x1f))) |
| 6416 | { |
| 6417 | shifter = i; |
| 6418 | break; |
| 6419 | } |
| 6420 | } |
| 6421 | } |
| 6422 | |
| 6423 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 6424 | INT32 fetchval = rsp_divtable[address]; |
| 6425 | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 6426 | temp ^= rec >> 31; |
| 6427 | |
| 6428 | if (!rec) |
| 6429 | { |
| 6430 | temp = 0x7fffffff; |
| 6431 | } |
| 6432 | else if (rec == 0xffff8000) |
| 6433 | { |
| 6434 | temp = 0xffff0000; |
| 6435 | } |
| 6436 | rec = temp; |
| 6437 | |
| 6438 | m_reciprocal_res = rec; |
| 6439 | m_dp_allowed = 0; |
| 6440 | |
| 6441 | SIMD_INSERT16(m_xv[VDREG], (UINT16)rec, VS1REG); |
| 6442 | |
| 6443 | for (int i = 0; i < 8; i++) |
| 6444 | { |
| 6445 | INT16 val; |
| 6446 | SIMD_EXTRACT16(m_xv[VS2REG], val, VEC_EL_2(EL, i)); |
| 6447 | VEC_SET_ACCUM_L(val, i); |
| 6448 | } |
| 6449 | } |
| 6450 | |
| 6451 | static void cfunc_rsp_vrcpl_simd(void *param) |
| 6452 | { |
| 6453 | ((rsp_device *)param)->ccfunc_rsp_vrcpl_simd(); |
| 6454 | } |
| 6455 | #endif |
| 6456 | |
| 6457 | #if (!USE_SIMD || SIMUL_SIMD) |
| 6458 | |
| 6459 | inline void rsp_device::ccfunc_rsp_vrcpl_scalar() |
| 6460 | { |
| 6461 | int op = m_rsp_state->arg0; |
| 6462 | |
| 6463 | INT32 shifter = 0; |
| 6464 | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 6465 | INT32 datainput = rec; |
| 6466 | |
| 6467 | if (m_dp_allowed) |
| 6468 | { |
| 6469 | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 6470 | datainput = rec; |
| 6471 | |
| 6472 | if (rec < 0) |
| 6473 | { |
| 6474 | if (rec < -32768) |
| 6475 | { |
| 6476 | datainput = ~datainput; |
| 6477 | } |
| 6478 | else |
| 6479 | { |
| 6480 | datainput = -datainput; |
| 6481 | } |
| 6482 | } |
| 6483 | } |
| 6484 | else if (datainput < 0) |
| 6485 | { |
| 6486 | datainput = -datainput; |
| 6487 | |
| 6488 | shifter = 0x10; |
| 6489 | } |
| 6490 | |
| 6491 | if (datainput) |
| 6492 | { |
| 6493 | for (int i = 0; i < 32; i++) |
| 6494 | { |
| 6495 | if (datainput & (1 << ((~i) & 0x1f))) |
| 6496 | { |
| 6497 | shifter = i; |
| 6498 | break; |
| 6499 | } |
| 6500 | } |
| 6501 | } |
| 6502 | |
| 6503 | UINT32 address = (datainput << shifter) >> 22; |
| 6504 | INT32 fetchval = rsp_divtable[address & 0x1ff]; |
| 6505 | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 6506 | temp ^= rec >> 31; |
| 6507 | |
| 6508 | if (!rec) |
| 6509 | { |
| 6510 | temp = 0x7fffffff; |
| 6511 | } |
| 6512 | else if (rec == 0xffff8000) |
| 6513 | { |
| 6514 | temp = 0xffff0000; |
| 6515 | } |
| 6516 | rec = temp; |
| 6517 | |
| 6518 | m_reciprocal_res = rec; |
| 6519 | m_dp_allowed = 0; |
| 6520 | |
| 6521 | W_VREG_S(VDREG, VS1REG & 7) = (UINT16)rec; |
| 6522 | |
| 6523 | for (int i = 0; i < 8; i++) |
| 6524 | { |
| 6525 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 6526 | } |
| 6527 | } |
| 6528 | |
| 6529 | static void cfunc_rsp_vrcpl_scalar(void *param) |
| 6530 | { |
| 6531 | ((rsp_device *)param)->ccfunc_rsp_vrcpl_scalar(); |
| 6532 | } |
| 6533 | #endif |
| 6534 | |
| 6535 | #if USE_SIMD |
| 6536 | // VRCPH |
| 6537 | // |
| 6538 | // 31 25 24 20 15 10 5 0 |
| 6539 | // ------------------------------------------------------ |
| 6540 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110010 | |
| 6541 | // ------------------------------------------------------ |
| 6542 | // |
| 6543 | // Calculates reciprocal high part |
| 6544 | |
| 6545 | inline void rsp_device::ccfunc_rsp_vrcph_simd() |
| 6546 | { |
| 6547 | int op = m_rsp_state->arg0; |
| 6548 | |
| 6549 | #if SIMUL_SIMD |
| 6550 | m_old_reciprocal_res = m_reciprocal_res; |
| 6551 | m_old_reciprocal_high = m_reciprocal_high; |
| 6552 | m_old_dp_allowed = m_dp_allowed; |
| 6553 | #endif |
| 6554 | |
| 6555 | UINT16 rcph; |
| 6556 | SIMD_EXTRACT16(m_xv[VS2REG], rcph, EL); |
| 6557 | m_reciprocal_high = rcph << 16; |
| 6558 | m_dp_allowed = 1; |
| 6559 | |
| 6560 | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6561 | |
| 6562 | SIMD_INSERT16(m_xv[VDREG], (INT16)(m_reciprocal_res >> 16), VS1REG); |
| 6563 | } |
| 6564 | |
| 6565 | static void cfunc_rsp_vrcph_simd(void *param) |
| 6566 | { |
| 6567 | ((rsp_device *)param)->ccfunc_rsp_vrcph_simd(); |
| 6568 | } |
| 6569 | #endif |
| 6570 | |
| 6571 | #if (!USE_SIMD || SIMUL_SIMD) |
| 6572 | |
| 6573 | inline void rsp_device::ccfunc_rsp_vrcph_scalar() |
| 6574 | { |
| 6575 | int op = m_rsp_state->arg0; |
| 6576 | |
| 6577 | m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16; |
| 6578 | m_dp_allowed = 1; |
| 6579 | |
| 6580 | for (int i = 0; i < 8; i++) |
| 6581 | { |
| 6582 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 6583 | } |
| 6584 | |
| 6585 | W_VREG_S(VDREG, VS1REG & 7) = (INT16)(m_reciprocal_res >> 16); |
| 6586 | } |
| 6587 | |
| 6588 | static void cfunc_rsp_vrcph_scalar(void *param) |
| 6589 | { |
| 6590 | ((rsp_device *)param)->ccfunc_rsp_vrcph_scalar(); |
| 6591 | } |
| 6592 | #endif |
| 6593 | |
| 6594 | #if USE_SIMD |
| 6595 | // VMOV |
| 6596 | // |
| 6597 | // 31 25 24 20 15 10 5 0 |
| 6598 | // ------------------------------------------------------ |
| 6599 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110011 | |
| 6600 | // ------------------------------------------------------ |
| 6601 | // |
| 6602 | // Moves element from vector to destination vector |
| 6603 | |
| 6604 | inline void rsp_device::ccfunc_rsp_vmov_simd() |
| 6605 | { |
| 6606 | int op = m_rsp_state->arg0; |
| 6607 | |
| 6608 | INT16 val; |
| 6609 | SIMD_EXTRACT16(m_xv[VS2REG], val, EL); |
| 6610 | SIMD_INSERT16(m_xv[VDREG], val, VS1REG); |
| 6611 | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6612 | } |
| 6613 | |
| 6614 | static void cfunc_rsp_vmov_simd(void *param) |
| 6615 | { |
| 6616 | ((rsp_device *)param)->ccfunc_rsp_vmov_simd(); |
| 6617 | } |
| 6618 | #endif |
| 6619 | |
| 6620 | #if (!USE_SIMD || SIMUL_SIMD) |
| 6621 | |
| 6622 | inline void rsp_device::ccfunc_rsp_vmov_scalar() |
| 6623 | { |
| 6624 | int op = m_rsp_state->arg0; |
| 6625 | |
| 6626 | W_VREG_S(VDREG, VS1REG & 7) = VREG_S(VS2REG, EL & 7); |
| 6627 | for (int i = 0; i < 8; i++) |
| 6628 | { |
| 6629 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 6630 | } |
| 6631 | } |
| 6632 | |
| 6633 | static void cfunc_rsp_vmov_scalar(void *param) |
| 6634 | { |
| 6635 | ((rsp_device *)param)->ccfunc_rsp_vmov_scalar(); |
| 6636 | } |
| 6637 | #endif |
| 6638 | |
| 6639 | // VRSQ |
| 6640 | // |
| 6641 | // 31 25 24 20 15 10 5 0 |
| 6642 | // ------------------------------------------------------ |
| 6643 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110100 | |
| 6644 | // ------------------------------------------------------ |
| 6645 | // |
| 6646 | // Calculates reciprocal square-root |
| 6647 | |
| 6648 | inline void rsp_device::ccfunc_rsp_vrsq_scalar() |
| 6649 | { |
| 6650 | int op = m_rsp_state->arg0; |
| 6651 | |
| 6652 | INT32 shifter = 0; |
| 6653 | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 6654 | INT32 datainput = (rec < 0) ? (-rec) : (rec); |
| 6655 | |
| 6656 | if (rec < 0) |
| 6657 | { |
| 6658 | if (rec < -32768) |
| 6659 | { |
| 6660 | datainput = ~datainput; |
| 6661 | } |
| 6662 | else |
| 6663 | { |
| 6664 | datainput = -datainput; |
| 6665 | } |
| 6666 | } |
| 6667 | |
| 6668 | if (datainput) |
| 6669 | { |
| 6670 | for (int i = 0; i < 32; i++) |
| 6671 | { |
| 6672 | if (datainput & (1 << ((~i) & 0x1f))) |
| 6673 | { |
| 6674 | shifter = i; |
| 6675 | break; |
| 6676 | } |
| 6677 | } |
| 6678 | } |
| 6679 | else |
| 6680 | { |
| 6681 | shifter = 0; |
| 6682 | } |
| 6683 | |
| 6684 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 6685 | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 6686 | |
| 6687 | INT32 fetchval = rsp_divtable[address]; |
| 6688 | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 6689 | if (rec < 0) |
| 6690 | { |
| 6691 | temp = ~temp; |
| 6692 | } |
| 6693 | if (!rec) |
| 6694 | { |
| 6695 | temp = 0x7fffffff; |
| 6696 | } |
| 6697 | else if (rec == 0xffff8000) |
| 6698 | { |
| 6699 | temp = 0xffff0000; |
| 6700 | } |
| 6701 | rec = temp; |
| 6702 | |
| 6703 | if (rec < 0) |
| 6704 | { |
| 6705 | if (m_dp_allowed) |
| 6706 | { |
| 6707 | if (rec < -32768) |
| 6708 | { |
| 6709 | datainput = ~datainput; |
| 6710 | } |
| 6711 | else |
| 6712 | { |
| 6713 | datainput = -datainput; |
| 6714 | } |
| 6715 | } |
| 6716 | else |
| 6717 | { |
| 6718 | datainput = -datainput; |
| 6719 | } |
| 6720 | } |
| 6721 | |
| 6722 | if (datainput) |
| 6723 | { |
| 6724 | for (int i = 0; i < 32; i++) |
| 6725 | { |
| 6726 | if (datainput & (1 << ((~i) & 0x1f))) |
| 6727 | { |
| 6728 | shifter = i; |
| 6729 | break; |
| 6730 | } |
| 6731 | } |
| 6732 | } |
| 6733 | else |
| 6734 | { |
| 6735 | shifter = 0; |
| 6736 | } |
| 6737 | |
| 6738 | address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 6739 | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 6740 | |
| 6741 | fetchval = rsp_divtable[address]; |
| 6742 | temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 6743 | if (rec < 0) |
| 6744 | { |
| 6745 | temp = ~temp; |
| 6746 | } |
| 6747 | if (!rec) |
| 6748 | { |
| 6749 | temp = 0x7fff; |
| 6750 | } |
| 6751 | else if (rec == 0xffff8000) |
| 6752 | { |
| 6753 | temp = 0x0000; |
| 6754 | } |
| 6755 | rec = temp; |
| 6756 | |
| 6757 | W_VREG_S(VDREG, VS1REG & 7) = (UINT16)rec; |
| 6758 | for (int i = 0; i < 8; i++) |
| 6759 | { |
| 6760 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 6761 | } |
| 6762 | } |
| 6763 | |
| 6764 | static void cfunc_rsp_vrsq_scalar(void *param) |
| 6765 | { |
| 6766 | ((rsp_device *)param)->ccfunc_rsp_vrsq_scalar(); |
| 6767 | } |
| 6768 | |
| 6769 | #if USE_SIMD |
| 6770 | // VRSQL |
| 6771 | // |
| 6772 | // 31 25 24 20 15 10 5 0 |
| 6773 | // ------------------------------------------------------ |
| 6774 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110101 | |
| 6775 | // ------------------------------------------------------ |
| 6776 | // |
| 6777 | // Calculates reciprocal square-root low part |
| 6778 | |
| 6779 | inline void rsp_device::ccfunc_rsp_vrsql_simd() |
| 6780 | { |
| 6781 | int op = m_rsp_state->arg0; |
| 6782 | |
| 6783 | #if SIMUL_SIMD |
| 6784 | m_old_reciprocal_res = m_reciprocal_res; |
| 6785 | m_old_reciprocal_high = m_reciprocal_high; |
| 6786 | m_old_dp_allowed = m_dp_allowed; |
| 6787 | #endif |
| 6788 | |
| 6789 | INT32 shifter = 0; |
| 6790 | UINT16 val; |
| 6791 | SIMD_EXTRACT16(m_xv[VS2REG], val, EL); |
| 6792 | INT32 rec = (INT16)val; |
| 6793 | INT32 datainput = rec; |
| 6794 | |
| 6795 | if (m_dp_allowed) |
| 6796 | { |
| 6797 | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 6798 | datainput = rec; |
| 6799 | |
| 6800 | if (rec < 0) |
| 6801 | { |
| 6802 | if (rec < -32768) |
| 6803 | { |
| 6804 | datainput = ~datainput; |
| 6805 | } |
| 6806 | else |
| 6807 | { |
| 6808 | datainput = -datainput; |
| 6809 | } |
| 6810 | } |
| 6811 | } |
| 6812 | else if (datainput < 0) |
| 6813 | { |
| 6814 | datainput = -datainput; |
| 6815 | |
| 6816 | shifter = 0x10; |
| 6817 | } |
| 6818 | |
| 6819 | if (datainput) |
| 6820 | { |
| 6821 | for (int i = 0; i < 32; i++) |
| 6822 | { |
| 6823 | if (datainput & (1 << ((~i) & 0x1f))) |
| 6824 | { |
| 6825 | shifter = i; |
| 6826 | break; |
| 6827 | } |
| 6828 | } |
| 6829 | } |
| 6830 | |
| 6831 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 6832 | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 6833 | |
| 6834 | INT32 fetchval = rsp_divtable[address]; |
| 6835 | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 6836 | temp ^= rec >> 31; |
| 6837 | |
| 6838 | if (!rec) |
| 6839 | { |
| 6840 | temp = 0x7fffffff; |
| 6841 | } |
| 6842 | else if (rec == 0xffff8000) |
| 6843 | { |
| 6844 | temp = 0xffff0000; |
| 6845 | } |
| 6846 | rec = temp; |
| 6847 | |
| 6848 | m_reciprocal_res = rec; |
| 6849 | m_dp_allowed = 0; |
| 6850 | |
| 6851 | SIMD_INSERT16(m_xv[VDREG], (UINT16)rec, VS1REG); |
| 6852 | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6853 | } |
| 6854 | |
| 6855 | static void cfunc_rsp_vrsql_simd(void *param) |
| 6856 | { |
| 6857 | ((rsp_device *)param)->ccfunc_rsp_vrsql_simd(); |
| 6858 | } |
| 6859 | #endif |
| 6860 | |
| 6861 | #if (!USE_SIMD || SIMUL_SIMD) |
| 6862 | |
| 6863 | inline void rsp_device::ccfunc_rsp_vrsql_scalar() |
| 6864 | { |
| 6865 | int op = m_rsp_state->arg0; |
| 6866 | |
| 6867 | INT32 shifter = 0; |
| 6868 | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 6869 | INT32 datainput = rec; |
| 6870 | |
| 6871 | if (m_dp_allowed) |
| 6872 | { |
| 6873 | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 6874 | datainput = rec; |
| 6875 | |
| 6876 | if (rec < 0) |
| 6877 | { |
| 6878 | if (rec < -32768) |
| 6879 | { |
| 6880 | datainput = ~datainput; |
| 6881 | } |
| 6882 | else |
| 6883 | { |
| 6884 | datainput = -datainput; |
| 6885 | } |
| 6886 | } |
| 6887 | } |
| 6888 | else if (datainput < 0) |
| 6889 | { |
| 6890 | datainput = -datainput; |
| 6891 | |
| 6892 | shifter = 0x10; |
| 6893 | } |
| 6894 | |
| 6895 | if (datainput) |
| 6896 | { |
| 6897 | for (int i = 0; i < 32; i++) |
| 6898 | { |
| 6899 | if (datainput & (1 << ((~i) & 0x1f))) |
| 6900 | { |
| 6901 | shifter = i; |
| 6902 | break; |
| 6903 | } |
| 6904 | } |
| 6905 | } |
| 6906 | |
| 6907 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 6908 | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 6909 | |
| 6910 | INT32 fetchval = rsp_divtable[address]; |
| 6911 | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 6912 | temp ^= rec >> 31; |
| 6913 | |
| 6914 | if (!rec) |
| 6915 | { |
| 6916 | temp = 0x7fffffff; |
| 6917 | } |
| 6918 | else if (rec == 0xffff8000) |
| 6919 | { |
| 6920 | temp = 0xffff0000; |
| 6921 | } |
| 6922 | rec = temp; |
| 6923 | |
| 6924 | m_reciprocal_res = rec; |
| 6925 | m_dp_allowed = 0; |
| 6926 | |
| 6927 | W_VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 6928 | for (int i = 0; i < 8; i++) |
| 6929 | { |
| 6930 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 6931 | } |
| 6932 | } |
| 6933 | |
| 6934 | static void cfunc_rsp_vrsql_scalar(void *param) |
| 6935 | { |
| 6936 | ((rsp_device *)param)->ccfunc_rsp_vrsql_scalar(); |
| 6937 | } |
| 6938 | #endif |
| 6939 | |
| 6940 | #if USE_SIMD |
| 6941 | // VRSQH |
| 6942 | // |
| 6943 | // 31 25 24 20 15 10 5 0 |
| 6944 | // ------------------------------------------------------ |
| 6945 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110110 | |
| 6946 | // ------------------------------------------------------ |
| 6947 | // |
| 6948 | // Calculates reciprocal square-root high part |
| 6949 | |
| 6950 | inline void rsp_device::ccfunc_rsp_vrsqh_simd() |
| 6951 | { |
| 6952 | int op = m_rsp_state->arg0; |
| 6953 | |
| 6954 | #if SIMUL_SIMD |
| 6955 | m_old_reciprocal_res = m_reciprocal_res; |
| 6956 | m_old_reciprocal_high = m_reciprocal_high; |
| 6957 | m_old_dp_allowed = m_dp_allowed; |
| 6958 | #endif |
| 6959 | |
| 6960 | UINT16 val; |
| 6961 | SIMD_EXTRACT16(m_xv[VS2REG], val, EL); |
| 6962 | m_reciprocal_high = val << 16; |
| 6963 | m_dp_allowed = 1; |
| 6964 | |
| 6965 | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6966 | |
| 6967 | SIMD_INSERT16(m_xv[VDREG], (INT16)(m_reciprocal_res >> 16), VS1REG); // store high part |
| 6968 | } |
| 6969 | |
| 6970 | static void cfunc_rsp_vrsqh_simd(void *param) |
| 6971 | { |
| 6972 | ((rsp_device *)param)->ccfunc_rsp_vrsqh_simd(); |
| 6973 | } |
| 6974 | #endif |
| 6975 | |
| 6976 | #if (!USE_SIMD || SIMUL_SIMD) |
| 6977 | |
| 6978 | inline void rsp_device::ccfunc_rsp_vrsqh_scalar() |
| 6979 | { |
| 6980 | int op = m_rsp_state->arg0; |
| 6981 | |
| 6982 | m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16; |
| 6983 | m_dp_allowed = 1; |
| 6984 | |
| 6985 | for (int i = 0; i < 8; i++) |
| 6986 | { |
| 6987 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 6988 | } |
| 6989 | |
| 6990 | W_VREG_S(VDREG, VS1REG & 7) = (INT16)(m_reciprocal_res >> 16); // store high part |
| 6991 | } |
| 6992 | |
| 6993 | static void cfunc_rsp_vrsqh_scalar(void *param) |
| 6994 | { |
| 6995 | ((rsp_device *)param)->ccfunc_rsp_vrsqh_scalar(); |
| 6996 | } |
| 6997 | #endif |
| 6998 | |
| 6999 | |
| 322 | 7000 | inline void rsp_device::ccfunc_sp_set_status_cb() |
| 323 | 7001 | { |
| 324 | 7002 | m_sp_set_status_func(0, m_rsp_state->arg0, 0xffffffff); |
| r241959 | r241960 | |
| 677 | 7355 | if (size == 1) |
| 678 | 7356 | { |
| 679 | 7357 | UML_MOV(block, mem(&m_rsp_state->arg0), I0); // mov [arg0],i0 ; address |
| 680 | | UML_CALLC(block, cfunc_read8, this); // callc read8 |
| 7358 | UML_CALLC(block, cfunc_read8, this); // callc cfunc_printf_debug |
| 681 | 7359 | UML_MOV(block, I0, mem(&m_rsp_state->arg0)); // mov i0,[arg0],i0 ; result |
| 682 | 7360 | } |
| 683 | 7361 | else if (size == 2) |
| r241959 | r241960 | |
| 903 | 7581 | UML_MAPVAR(block, MAPVAR_CYCLES, compiler->cycles); // mapvar CYCLES,compiler->cycles |
| 904 | 7582 | } |
| 905 | 7583 | |
| 7584 | |
| 7585 | /*------------------------------------------------- |
| 7586 | generate_vector_opcode - generate code for a |
| 7587 | vector opcode |
| 7588 | -------------------------------------------------*/ |
| 7589 | |
| 7590 | #if USE_SIMD |
| 7591 | |
| 7592 | int rsp_device::generate_vector_opcode(drcuml_block *block, compiler_state *compiler, const opcode_desc *desc) |
| 7593 | { |
| 7594 | UINT32 op = desc->opptr.l[0]; |
| 7595 | // Opcode legend: |
| 7596 | // E = VS2 element type |
| 7597 | // S = VS1, Source vector 1 |
| 7598 | // T = VS2, Source vector 2 |
| 7599 | // D = Destination vector |
| 7600 | |
| 7601 | switch (op & 0x3f) |
| 7602 | { |
| 7603 | case 0x00: /* VMULF */ |
| 7604 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7605 | UML_CALLC(block, cfunc_rsp_vmulf_simd, this); |
| 7606 | #if SIMUL_SIMD |
| 7607 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7608 | UML_CALLC(block, cfunc_rsp_vmulf_scalar, this); |
| 7609 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7610 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7611 | #endif |
| 7612 | return TRUE; |
| 7613 | |
| 7614 | case 0x01: /* VMULU */ |
| 7615 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7616 | UML_CALLC(block, cfunc_rsp_vmulu_simd, this); |
| 7617 | #if SIMUL_SIMD |
| 7618 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7619 | UML_CALLC(block, cfunc_rsp_vmulu_scalar, this); |
| 7620 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7621 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7622 | #endif |
| 7623 | return TRUE; |
| 7624 | |
| 7625 | case 0x04: /* VMUDL */ |
| 7626 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7627 | UML_CALLC(block, cfunc_rsp_vmudl_simd, this); |
| 7628 | #if SIMUL_SIMD |
| 7629 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7630 | UML_CALLC(block, cfunc_rsp_vmudl_scalar, this); |
| 7631 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7632 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7633 | #endif |
| 7634 | return TRUE; |
| 7635 | |
| 7636 | case 0x05: /* VMUDM */ |
| 7637 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7638 | UML_CALLC(block, cfunc_rsp_vmudm_simd, this); |
| 7639 | #if SIMUL_SIMD |
| 7640 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7641 | UML_CALLC(block, cfunc_rsp_vmudm_scalar, this); |
| 7642 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7643 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7644 | #endif |
| 7645 | return TRUE; |
| 7646 | |
| 7647 | case 0x06: /* VMUDN */ |
| 7648 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7649 | UML_CALLC(block, cfunc_rsp_vmudn_simd, this); |
| 7650 | #if SIMUL_SIMD |
| 7651 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7652 | UML_CALLC(block, cfunc_rsp_vmudn_scalar, this); |
| 7653 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7654 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7655 | #endif |
| 7656 | return TRUE; |
| 7657 | |
| 7658 | case 0x07: /* VMUDH */ |
| 7659 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7660 | UML_CALLC(block, cfunc_rsp_vmudh_simd, this); |
| 7661 | #if SIMUL_SIMD |
| 7662 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7663 | UML_CALLC(block, cfunc_rsp_vmudh_scalar, this); |
| 7664 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7665 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7666 | #endif |
| 7667 | return TRUE; |
| 7668 | |
| 7669 | case 0x08: /* VMACF */ |
| 7670 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7671 | UML_CALLC(block, cfunc_rsp_vmacf_simd, this); |
| 7672 | #if SIMUL_SIMD |
| 7673 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7674 | UML_CALLC(block, cfunc_rsp_vmacf_scalar, this); |
| 7675 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7676 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7677 | #endif |
| 7678 | return TRUE; |
| 7679 | |
| 7680 | case 0x09: /* VMACU */ |
| 7681 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7682 | UML_CALLC(block, cfunc_rsp_vmacu_simd, this); |
| 7683 | #if SIMUL_SIMD |
| 7684 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7685 | UML_CALLC(block, cfunc_rsp_vmacu_scalar, this); |
| 7686 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7687 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7688 | #endif |
| 7689 | return TRUE; |
| 7690 | |
| 7691 | case 0x0c: /* VMADL */ |
| 7692 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7693 | UML_CALLC(block, cfunc_rsp_vmadl_simd, this); |
| 7694 | #if SIMUL_SIMD |
| 7695 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7696 | UML_CALLC(block, cfunc_rsp_vmadl_scalar, this); |
| 7697 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7698 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7699 | #endif |
| 7700 | return TRUE; |
| 7701 | |
| 7702 | case 0x0d: /* VMADM */ |
| 7703 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7704 | UML_CALLC(block, cfunc_rsp_vmadm_simd, this); |
| 7705 | #if SIMUL_SIMD |
| 7706 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7707 | UML_CALLC(block, cfunc_rsp_vmadm_scalar, this); |
| 7708 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7709 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7710 | #endif |
| 7711 | return TRUE; |
| 7712 | |
| 7713 | case 0x0e: /* VMADN */ |
| 7714 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7715 | UML_CALLC(block, cfunc_rsp_vmadn_simd, this); |
| 7716 | #if SIMUL_SIMD |
| 7717 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7718 | UML_CALLC(block, cfunc_rsp_vmadn_scalar, this); |
| 7719 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7720 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7721 | #endif |
| 7722 | return TRUE; |
| 7723 | |
| 7724 | case 0x0f: /* VMADH */ |
| 7725 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7726 | UML_CALLC(block, cfunc_rsp_vmadh_simd, this); |
| 7727 | #if SIMUL_SIMD |
| 7728 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7729 | UML_CALLC(block, cfunc_rsp_vmadh_scalar, this); |
| 7730 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7731 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7732 | #endif |
| 7733 | return TRUE; |
| 7734 | |
| 7735 | case 0x10: /* VADD */ |
| 7736 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7737 | UML_CALLC(block, cfunc_rsp_vadd_simd, this); |
| 7738 | #if SIMUL_SIMD |
| 7739 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7740 | UML_CALLC(block, cfunc_rsp_vadd_scalar, this); |
| 7741 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7742 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7743 | #endif |
| 7744 | return TRUE; |
| 7745 | |
| 7746 | case 0x11: /* VSUB */ |
| 7747 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7748 | UML_CALLC(block, cfunc_rsp_vsub_simd, this); |
| 7749 | #if SIMUL_SIMD |
| 7750 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7751 | UML_CALLC(block, cfunc_rsp_vsub_scalar, this); |
| 7752 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7753 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7754 | #endif |
| 7755 | return TRUE; |
| 7756 | |
| 7757 | case 0x13: /* VABS */ |
| 7758 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7759 | UML_CALLC(block, cfunc_rsp_vabs_simd, this); |
| 7760 | #if SIMUL_SIMD |
| 7761 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7762 | UML_CALLC(block, cfunc_rsp_vabs_scalar, this); |
| 7763 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7764 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7765 | #endif |
| 7766 | return TRUE; |
| 7767 | |
| 7768 | case 0x14: /* VADDC */ |
| 7769 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7770 | UML_CALLC(block, cfunc_rsp_vaddc_simd, this); |
| 7771 | #if SIMUL_SIMD |
| 7772 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7773 | UML_CALLC(block, cfunc_rsp_vaddc_scalar, this); |
| 7774 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7775 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7776 | #endif |
| 7777 | return TRUE; |
| 7778 | |
| 7779 | case 0x15: /* VSUBC */ |
| 7780 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7781 | UML_CALLC(block, cfunc_rsp_vsubc_simd, this); |
| 7782 | #if SIMUL_SIMD |
| 7783 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7784 | UML_CALLC(block, cfunc_rsp_vsubc_scalar, this); |
| 7785 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7786 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7787 | #endif |
| 7788 | return TRUE; |
| 7789 | |
| 7790 | case 0x16: /* VADDB */ |
| 7791 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7792 | UML_CALLC(block, cfunc_rsp_vaddb_scalar, this); |
| 7793 | return TRUE; |
| 7794 | |
| 7795 | case 0x17: /* VSUBB (reserved, functionally identical to VADDB) */ |
| 7796 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7797 | UML_CALLC(block, cfunc_rsp_vaddb_scalar, this); |
| 7798 | return TRUE; |
| 7799 | |
| 7800 | case 0x18: /* VACCB (reserved, functionally identical to VADDB) */ |
| 7801 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7802 | UML_CALLC(block, cfunc_rsp_vaddb_scalar, this); |
| 7803 | return TRUE; |
| 7804 | |
| 7805 | case 0x19: /* VSUCB (reserved, functionally identical to VADDB) */ |
| 7806 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7807 | UML_CALLC(block, cfunc_rsp_vaddb_scalar, this); |
| 7808 | return TRUE; |
| 7809 | |
| 7810 | case 0x1d: /* VSAW */ |
| 7811 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7812 | UML_CALLC(block, cfunc_rsp_vsaw_simd, this); |
| 7813 | #if SIMUL_SIMD |
| 7814 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7815 | UML_CALLC(block, cfunc_rsp_vsaw_scalar, this); |
| 7816 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7817 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7818 | #endif |
| 7819 | return TRUE; |
| 7820 | |
| 7821 | case 0x20: /* VLT */ |
| 7822 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7823 | UML_CALLC(block, cfunc_rsp_vlt_simd, this); |
| 7824 | #if SIMUL_SIMD |
| 7825 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7826 | UML_CALLC(block, cfunc_rsp_vlt_scalar, this); |
| 7827 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7828 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7829 | #endif |
| 7830 | return TRUE; |
| 7831 | |
| 7832 | case 0x21: /* VEQ */ |
| 7833 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7834 | UML_CALLC(block, cfunc_rsp_veq_simd, this); |
| 7835 | #if SIMUL_SIMD |
| 7836 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7837 | UML_CALLC(block, cfunc_rsp_veq_scalar, this); |
| 7838 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7839 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7840 | #endif |
| 7841 | return TRUE; |
| 7842 | |
| 7843 | case 0x22: /* VNE */ |
| 7844 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7845 | UML_CALLC(block, cfunc_rsp_vne_simd, this); |
| 7846 | #if SIMUL_SIMD |
| 7847 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7848 | UML_CALLC(block, cfunc_rsp_vne_scalar, this); |
| 7849 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7850 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7851 | #endif |
| 7852 | return TRUE; |
| 7853 | |
| 7854 | case 0x23: /* VGE */ |
| 7855 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7856 | UML_CALLC(block, cfunc_rsp_vge_simd, this); |
| 7857 | #if SIMUL_SIMD |
| 7858 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7859 | UML_CALLC(block, cfunc_rsp_vge_scalar, this); |
| 7860 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7861 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7862 | #endif |
| 7863 | return TRUE; |
| 7864 | |
| 7865 | case 0x24: /* VCL */ |
| 7866 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7867 | UML_CALLC(block, cfunc_rsp_vcl_simd, this); |
| 7868 | #if SIMUL_SIMD |
| 7869 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7870 | UML_CALLC(block, cfunc_rsp_vcl_scalar, this); |
| 7871 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7872 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7873 | #endif |
| 7874 | return TRUE; |
| 7875 | |
| 7876 | case 0x25: /* VCH */ |
| 7877 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7878 | UML_CALLC(block, cfunc_rsp_vch_simd, this); |
| 7879 | #if SIMUL_SIMD |
| 7880 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7881 | UML_CALLC(block, cfunc_rsp_vch_scalar, this); |
| 7882 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7883 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7884 | #endif |
| 7885 | return TRUE; |
| 7886 | |
| 7887 | case 0x26: /* VCR */ |
| 7888 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7889 | UML_CALLC(block, cfunc_rsp_vcr_simd, this); |
| 7890 | #if SIMUL_SIMD |
| 7891 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7892 | UML_CALLC(block, cfunc_rsp_vcr_scalar, this); |
| 7893 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7894 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7895 | #endif |
| 7896 | return TRUE; |
| 7897 | |
| 7898 | case 0x27: /* VMRG */ |
| 7899 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7900 | UML_CALLC(block, cfunc_rsp_vmrg_simd, this); |
| 7901 | #if SIMUL_SIMD |
| 7902 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7903 | UML_CALLC(block, cfunc_rsp_vmrg_scalar, this); |
| 7904 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7905 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7906 | #endif |
| 7907 | return TRUE; |
| 7908 | |
| 7909 | case 0x28: /* VAND */ |
| 7910 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7911 | UML_CALLC(block, cfunc_rsp_vand_simd, this); |
| 7912 | #if SIMUL_SIMD |
| 7913 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7914 | UML_CALLC(block, cfunc_rsp_vand_scalar, this); |
| 7915 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7916 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7917 | #endif |
| 7918 | return TRUE; |
| 7919 | |
| 7920 | case 0x29: /* VNAND */ |
| 7921 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7922 | UML_CALLC(block, cfunc_rsp_vnand_simd, this); |
| 7923 | #if SIMUL_SIMD |
| 7924 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7925 | UML_CALLC(block, cfunc_rsp_vnand_scalar, this); |
| 7926 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7927 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7928 | #endif |
| 7929 | return TRUE; |
| 7930 | |
| 7931 | case 0x2a: /* VOR */ |
| 7932 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7933 | UML_CALLC(block, cfunc_rsp_vor_simd, this); |
| 7934 | #if SIMUL_SIMD |
| 7935 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7936 | UML_CALLC(block, cfunc_rsp_vor_scalar, this); |
| 7937 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7938 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7939 | #endif |
| 7940 | return TRUE; |
| 7941 | |
| 7942 | case 0x2b: /* VNOR */ |
| 7943 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7944 | UML_CALLC(block, cfunc_rsp_vnor_simd, this); |
| 7945 | #if SIMUL_SIMD |
| 7946 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7947 | UML_CALLC(block, cfunc_rsp_vnor_scalar, this); |
| 7948 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7949 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7950 | #endif |
| 7951 | return TRUE; |
| 7952 | |
| 7953 | case 0x2c: /* VXOR */ |
| 7954 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7955 | UML_CALLC(block, cfunc_rsp_vxor_simd, this); |
| 7956 | #if SIMUL_SIMD |
| 7957 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7958 | UML_CALLC(block, cfunc_rsp_vxor_scalar, this); |
| 7959 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7960 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7961 | #endif |
| 7962 | return TRUE; |
| 7963 | |
| 7964 | case 0x2d: /* VNXOR */ |
| 7965 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7966 | UML_CALLC(block, cfunc_rsp_vnxor_simd, this); |
| 7967 | #if SIMUL_SIMD |
| 7968 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7969 | UML_CALLC(block, cfunc_rsp_vnxor_scalar, this); |
| 7970 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7971 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7972 | #endif |
| 7973 | return TRUE; |
| 7974 | |
| 7975 | case 0x30: /* VRCP */ |
| 7976 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7977 | UML_CALLC(block, cfunc_rsp_vrcp_simd, this); |
| 7978 | #if SIMUL_SIMD |
| 7979 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7980 | UML_CALLC(block, cfunc_rsp_vrcp_scalar, this); |
| 7981 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7982 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7983 | #endif |
| 7984 | return TRUE; |
| 7985 | |
| 7986 | case 0x31: /* VRCPL */ |
| 7987 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7988 | UML_CALLC(block, cfunc_rsp_vrcpl_simd, this); |
| 7989 | #if SIMUL_SIMD |
| 7990 | UML_CALLC(block, cfunc_backup_regs, this); |
| 7991 | UML_CALLC(block, cfunc_rsp_vrcpl_scalar, this); |
| 7992 | UML_CALLC(block, cfunc_restore_regs, this); |
| 7993 | UML_CALLC(block, cfunc_verify_regs, this); |
| 7994 | #endif |
| 7995 | return TRUE; |
| 7996 | |
| 7997 | case 0x32: /* VRCPH */ |
| 7998 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7999 | UML_CALLC(block, cfunc_rsp_vrcph_simd, this); |
| 8000 | #if SIMUL_SIMD |
| 8001 | UML_CALLC(block, cfunc_backup_regs, this); |
| 8002 | UML_CALLC(block, cfunc_rsp_vrcph_scalar, this); |
| 8003 | UML_CALLC(block, cfunc_restore_regs, this); |
| 8004 | UML_CALLC(block, cfunc_verify_regs, this); |
| 8005 | #endif |
| 8006 | return TRUE; |
| 8007 | |
| 8008 | case 0x33: /* VMOV */ |
| 8009 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8010 | UML_CALLC(block, cfunc_rsp_vmov_simd, this); |
| 8011 | #if SIMUL_SIMD |
| 8012 | UML_CALLC(block, cfunc_backup_regs, this); |
| 8013 | UML_CALLC(block, cfunc_rsp_vmov_scalar, this); |
| 8014 | UML_CALLC(block, cfunc_restore_regs, this); |
| 8015 | UML_CALLC(block, cfunc_verify_regs, this); |
| 8016 | #endif |
| 8017 | return TRUE; |
| 8018 | |
| 8019 | case 0x34: /* VRSQ */ |
| 8020 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8021 | UML_CALLC_block, cfunc_rsp_vrsq_scalar, this); |
| 8022 | return TRUE; |
| 8023 | |
| 8024 | case 0x35: /* VRSQL */ |
| 8025 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8026 | UML_CALLC(block, cfunc_rsp_vrsql_simd, this); |
| 8027 | #if SIMUL_SIMD |
| 8028 | UML_CALLC(block, cfunc_backup_regs, this); |
| 8029 | UML_CALLC(block, cfunc_rsp_vrsql_scalar, this); |
| 8030 | UML_CALLC(block, cfunc_restore_regs, this); |
| 8031 | UML_CALLC(block, cfunc_verify_regs, this); |
| 8032 | #endif |
| 8033 | return TRUE; |
| 8034 | |
| 8035 | case 0x36: /* VRSQH */ |
| 8036 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8037 | UML_CALLC(block, cfunc_rsp_vrsqh_simd, this); |
| 8038 | #if SIMUL_SIMD |
| 8039 | UML_CALLC(block, cfunc_backup_regs, this); |
| 8040 | UML_CALLC(block, cfunc_rsp_vrsqh_scalar, this); |
| 8041 | UML_CALLC(block, cfunc_restore_regs, this); |
| 8042 | UML_CALLC(block, cfunc_verify_regs, this); |
| 8043 | #endif |
| 8044 | return TRUE; |
| 8045 | |
| 8046 | case 0x37: /* VNOP */ |
| 8047 | case 0x3F: /* VNULL */ |
| 8048 | return TRUE; |
| 8049 | |
| 8050 | default: |
| 8051 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8052 | UML_CALLC(block, cfunc_unimplemented_opcode, this); |
| 8053 | return FALSE; |
| 8054 | } |
| 8055 | } |
| 8056 | |
| 8057 | #else |
| 8058 | |
| 8059 | int rsp_device::generate_vector_opcode(drcuml_block *block, compiler_state *compiler, const opcode_desc *desc) |
| 8060 | { |
| 8061 | UINT32 op = desc->opptr.l[0]; |
| 8062 | // Opcode legend: |
| 8063 | // E = VS2 element type |
| 8064 | // S = VS1, Source vector 1 |
| 8065 | // T = VS2, Source vector 2 |
| 8066 | // D = Destination vector |
| 8067 | |
| 8068 | switch (op & 0x3f) |
| 8069 | { |
| 8070 | case 0x00: /* VMULF */ |
| 8071 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8072 | UML_CALLC(block, cfunc_rsp_vmulf_scalar, this); |
| 8073 | return TRUE; |
| 8074 | |
| 8075 | case 0x01: /* VMULU */ |
| 8076 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8077 | UML_CALLC(block, cfunc_rsp_vmulu_scalar, this); |
| 8078 | return TRUE; |
| 8079 | |
| 8080 | case 0x04: /* VMUDL */ |
| 8081 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8082 | UML_CALLC(block, cfunc_rsp_vmudl_scalar, this); |
| 8083 | return TRUE; |
| 8084 | |
| 8085 | case 0x05: /* VMUDM */ |
| 8086 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8087 | UML_CALLC(block, cfunc_rsp_vmudm_scalar, this); |
| 8088 | return TRUE; |
| 8089 | |
| 8090 | case 0x06: /* VMUDN */ |
| 8091 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8092 | UML_CALLC(block, cfunc_rsp_vmudn_scalar, this); |
| 8093 | return TRUE; |
| 8094 | |
| 8095 | case 0x07: /* VMUDH */ |
| 8096 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8097 | UML_CALLC(block, cfunc_rsp_vmudh_scalar, this); |
| 8098 | return TRUE; |
| 8099 | |
| 8100 | case 0x08: /* VMACF */ |
| 8101 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8102 | UML_CALLC(block, cfunc_rsp_vmacf_scalar, this); |
| 8103 | return TRUE; |
| 8104 | |
| 8105 | case 0x09: /* VMACU */ |
| 8106 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8107 | UML_CALLC(block, cfunc_rsp_vmacu_scalar, this); |
| 8108 | return TRUE; |
| 8109 | |
| 8110 | case 0x0c: /* VMADL */ |
| 8111 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8112 | UML_CALLC(block, cfunc_rsp_vmadl_scalar, this); |
| 8113 | return TRUE; |
| 8114 | |
| 8115 | case 0x0d: /* VMADM */ |
| 8116 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8117 | UML_CALLC(block, cfunc_rsp_vmadm_scalar, this); |
| 8118 | return TRUE; |
| 8119 | |
| 8120 | case 0x0e: /* VMADN */ |
| 8121 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8122 | UML_CALLC(block, cfunc_rsp_vmadn_scalar, this); |
| 8123 | return TRUE; |
| 8124 | |
| 8125 | case 0x0f: /* VMADH */ |
| 8126 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8127 | UML_CALLC(block, cfunc_rsp_vmadh_scalar, this); |
| 8128 | return TRUE; |
| 8129 | |
| 8130 | case 0x10: /* VADD */ |
| 8131 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8132 | UML_CALLC(block, cfunc_rsp_vadd_scalar, this); |
| 8133 | return TRUE; |
| 8134 | |
| 8135 | case 0x11: /* VSUB */ |
| 8136 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8137 | UML_CALLC(block, cfunc_rsp_vsub_scalar, this); |
| 8138 | return TRUE; |
| 8139 | |
| 8140 | case 0x13: /* VABS */ |
| 8141 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8142 | UML_CALLC(block, cfunc_rsp_vabs_scalar, this); |
| 8143 | return TRUE; |
| 8144 | |
| 8145 | case 0x14: /* VADDC */ |
| 8146 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8147 | UML_CALLC(block, cfunc_rsp_vaddc_scalar, this); |
| 8148 | return TRUE; |
| 8149 | |
| 8150 | case 0x15: /* VSUBC */ |
| 8151 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8152 | UML_CALLC(block, cfunc_rsp_vsubc_scalar, this); |
| 8153 | return TRUE; |
| 8154 | |
| 8155 | case 0x16: /* VADDB */ |
| 8156 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8157 | UML_CALLC(block, cfunc_rsp_vaddb_scalar, this); |
| 8158 | return TRUE; |
| 8159 | |
| 8160 | case 0x17: /* VSUBB (reserved, functionally identical to VADDB) */ |
| 8161 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8162 | UML_CALLC(block, cfunc_rsp_vaddb_scalar, this); |
| 8163 | return TRUE; |
| 8164 | |
| 8165 | case 0x18: /* VACCB (reserved, functionally identical to VADDB) */ |
| 8166 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8167 | UML_CALLC(block, cfunc_rsp_vaddb_scalar, this); |
| 8168 | return TRUE; |
| 8169 | |
| 8170 | case 0x19: /* VSUCB (reserved, functionally identical to VADDB) */ |
| 8171 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8172 | UML_CALLC(block, cfunc_rsp_vaddb_scalar, this); |
| 8173 | return TRUE; |
| 8174 | |
| 8175 | case 0x1d: /* VSAW */ |
| 8176 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8177 | UML_CALLC(block, cfunc_rsp_vsaw_scalar, this); |
| 8178 | return TRUE; |
| 8179 | |
| 8180 | case 0x20: /* VLT */ |
| 8181 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8182 | UML_CALLC(block, cfunc_rsp_vlt_scalar, this); |
| 8183 | return TRUE; |
| 8184 | |
| 8185 | case 0x21: /* VEQ */ |
| 8186 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8187 | UML_CALLC(block, cfunc_rsp_veq_scalar, this); |
| 8188 | return TRUE; |
| 8189 | |
| 8190 | case 0x22: /* VNE */ |
| 8191 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8192 | UML_CALLC(block, cfunc_rsp_vne_scalar, this); |
| 8193 | return TRUE; |
| 8194 | |
| 8195 | case 0x23: /* VGE */ |
| 8196 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8197 | UML_CALLC(block, cfunc_rsp_vge_scalar, this); |
| 8198 | return TRUE; |
| 8199 | |
| 8200 | case 0x24: /* VCL */ |
| 8201 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8202 | UML_CALLC(block, cfunc_rsp_vcl_scalar, this); |
| 8203 | return TRUE; |
| 8204 | |
| 8205 | case 0x25: /* VCH */ |
| 8206 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8207 | UML_CALLC(block, cfunc_rsp_vch_scalar, this); |
| 8208 | return TRUE; |
| 8209 | |
| 8210 | case 0x26: /* VCR */ |
| 8211 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8212 | UML_CALLC(block, cfunc_rsp_vcr_scalar, this); |
| 8213 | return TRUE; |
| 8214 | |
| 8215 | case 0x27: /* VMRG */ |
| 8216 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8217 | UML_CALLC(block, cfunc_rsp_vmrg_scalar, this); |
| 8218 | return TRUE; |
| 8219 | |
| 8220 | case 0x28: /* VAND */ |
| 8221 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8222 | UML_CALLC(block, cfunc_rsp_vand_scalar, this); |
| 8223 | return TRUE; |
| 8224 | |
| 8225 | case 0x29: /* VNAND */ |
| 8226 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8227 | UML_CALLC(block, cfunc_rsp_vnand_scalar, this); |
| 8228 | return TRUE; |
| 8229 | |
| 8230 | case 0x2a: /* VOR */ |
| 8231 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8232 | UML_CALLC(block, cfunc_rsp_vor_scalar, this); |
| 8233 | return TRUE; |
| 8234 | |
| 8235 | case 0x2b: /* VNOR */ |
| 8236 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8237 | UML_CALLC(block, cfunc_rsp_vnor_scalar, this); |
| 8238 | return TRUE; |
| 8239 | |
| 8240 | case 0x2c: /* VXOR */ |
| 8241 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8242 | UML_CALLC(block, cfunc_rsp_vxor_scalar, this); |
| 8243 | return TRUE; |
| 8244 | |
| 8245 | case 0x2d: /* VNXOR */ |
| 8246 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8247 | UML_CALLC(block, cfunc_rsp_vnxor_scalar, this); |
| 8248 | return TRUE; |
| 8249 | |
| 8250 | case 0x30: /* VRCP */ |
| 8251 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8252 | UML_CALLC(block, cfunc_rsp_vrcp_scalar, this); |
| 8253 | return TRUE; |
| 8254 | |
| 8255 | case 0x31: /* VRCPL */ |
| 8256 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8257 | UML_CALLC(block, cfunc_rsp_vrcpl_scalar, this); |
| 8258 | return TRUE; |
| 8259 | |
| 8260 | case 0x32: /* VRCPH */ |
| 8261 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8262 | UML_CALLC(block, cfunc_rsp_vrcph_scalar, this); |
| 8263 | return TRUE; |
| 8264 | |
| 8265 | case 0x33: /* VMOV */ |
| 8266 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8267 | UML_CALLC(block, cfunc_rsp_vmov_scalar, this); |
| 8268 | return TRUE; |
| 8269 | |
| 8270 | case 0x34: /* VRSQ */ |
| 8271 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8272 | UML_CALLC(block, cfunc_rsp_vrsq_scalar, this); |
| 8273 | return TRUE; |
| 8274 | |
| 8275 | case 0x35: /* VRSQL */ |
| 8276 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8277 | UML_CALLC(block, cfunc_rsp_vrsql_scalar, this); |
| 8278 | return TRUE; |
| 8279 | |
| 8280 | case 0x36: /* VRSQH */ |
| 8281 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8282 | UML_CALLC(block, cfunc_rsp_vrsqh_scalar, this); |
| 8283 | return TRUE; |
| 8284 | |
| 8285 | case 0x37: /* VNOP */ |
| 8286 | case 0x3F: /* VNULL */ |
| 8287 | return TRUE; |
| 8288 | |
| 8289 | default: |
| 8290 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8291 | UML_CALLC(block, cfunc_unimplemented_opcode, this); |
| 8292 | return FALSE; |
| 8293 | } |
| 8294 | } |
| 8295 | #endif |
| 8296 | |
| 906 | 8297 | int rsp_device::generate_opcode(drcuml_block *block, compiler_state *compiler, const opcode_desc *desc) |
| 907 | 8298 | { |
| 908 | 8299 | int in_delay_slot = ((desc->flags & OPFLAG_IN_DELAY_SLOT) != 0); |
| r241959 | r241960 | |
| 1059 | 8450 | return TRUE; |
| 1060 | 8451 | |
| 1061 | 8452 | case 0x32: /* LWC2 - MIPS I */ |
| 1062 | | return m_cop2->generate_lwc2(block, compiler, desc); |
| 8453 | return generate_lwc2(block, compiler, desc); |
| 1063 | 8454 | |
| 1064 | 8455 | |
| 1065 | 8456 | /* ----- memory store operations ----- */ |
| r241959 | r241960 | |
| 1089 | 8480 | return TRUE; |
| 1090 | 8481 | |
| 1091 | 8482 | case 0x3a: /* SWC2 - MIPS I */ |
| 1092 | | return m_cop2->generate_swc2(block, compiler, desc); |
| 8483 | return generate_swc2(block, compiler, desc); |
| 8484 | //UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8485 | //UML_CALLC(block, cfunc_swc2, this); // callc cfunc_mfc2 |
| 8486 | //return TRUE; |
| 1093 | 8487 | |
| 1094 | 8488 | /* ----- coprocessor instructions ----- */ |
| 1095 | 8489 | |
| r241959 | r241960 | |
| 1097 | 8491 | return generate_cop0(block, compiler, desc); |
| 1098 | 8492 | |
| 1099 | 8493 | case 0x12: /* COP2 - MIPS I */ |
| 1100 | | return m_cop2->generate_cop2(block, compiler, desc); |
| 8494 | return generate_cop2(block, compiler, desc); |
| 8495 | //UML_EXH(block, m_exception[EXCEPTION_INVALIDOP], 0);// exh invalidop,0 |
| 8496 | //return TRUE; |
| 1101 | 8497 | |
| 1102 | 8498 | |
| 1103 | 8499 | /* ----- unimplemented/illegal instructions ----- */ |
| r241959 | r241960 | |
| 1309 | 8705 | |
| 1310 | 8706 | |
| 1311 | 8707 | /*------------------------------------------------- |
| 8708 | generate_cop2 - compile COP2 opcodes |
| 8709 | -------------------------------------------------*/ |
| 8710 | |
| 8711 | int rsp_device::generate_cop2(drcuml_block *block, compiler_state *compiler, const opcode_desc *desc) |
| 8712 | { |
| 8713 | UINT32 op = desc->opptr.l[0]; |
| 8714 | UINT8 opswitch = RSREG; |
| 8715 | |
| 8716 | switch (opswitch) |
| 8717 | { |
| 8718 | case 0x00: /* MFCz */ |
| 8719 | if (RTREG != 0) |
| 8720 | { |
| 8721 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8722 | #if USE_SIMD |
| 8723 | UML_CALLC(block, cfunc_mfc2_simd, this); // callc cfunc_ctc2 |
| 8724 | #if SIMUL_SIMD |
| 8725 | UML_CALLC(block, cfunc_backup_regs, this); |
| 8726 | UML_CALLC(block, cfunc_mfc2_scalar, this); |
| 8727 | UML_CALLC(block, cfunc_restore_regs, this); |
| 8728 | UML_CALLC(block, cfunc_verify_regs, this); |
| 8729 | #endif |
| 8730 | #else |
| 8731 | UML_CALLC(block, cfunc_mfc2_scalar, this); |
| 8732 | #endif |
| 8733 | //UML_SEXT(block, R32(RTREG), I0, DWORD); // dsext <rtreg>,i0,dword |
| 8734 | } |
| 8735 | return TRUE; |
| 8736 | |
| 8737 | case 0x02: /* CFCz */ |
| 8738 | if (RTREG != 0) |
| 8739 | { |
| 8740 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8741 | #if USE_SIMD |
| 8742 | UML_CALLC(block, cfunc_cfc2_simd, this); // callc cfunc_ctc2 |
| 8743 | #if SIMUL_SIMD |
| 8744 | UML_CALLC(block, cfunc_backup_regs, this); |
| 8745 | UML_CALLC(block, cfunc_cfc2_scalar, this); |
| 8746 | UML_CALLC(block, cfunc_restore_regs, this); |
| 8747 | UML_CALLC(block, cfunc_verify_regs, this); |
| 8748 | #endif |
| 8749 | #else |
| 8750 | UML_CALLC(block, cfunc_cfc2_scalar, this); |
| 8751 | #endif |
| 8752 | //UML_SEXT(block, R32(RTREG), I0, DWORD); // dsext <rtreg>,i0,dword |
| 8753 | } |
| 8754 | return TRUE; |
| 8755 | |
| 8756 | case 0x04: /* MTCz */ |
| 8757 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8758 | #if USE_SIMD |
| 8759 | UML_CALLC(block, cfunc_mtc2_simd, this); // callc cfunc_ctc2 |
| 8760 | #if SIMUL_SIMD |
| 8761 | UML_CALLC(block, cfunc_backup_regs, this); |
| 8762 | UML_CALLC(block, cfunc_mtc2_scalar, this); |
| 8763 | UML_CALLC(block, cfunc_restore_regs, this); |
| 8764 | UML_CALLC(block, cfunc_verify_regs, this); |
| 8765 | #endif |
| 8766 | #else |
| 8767 | UML_CALLC(block, cfunc_mtc2_scalar, this); |
| 8768 | #endif |
| 8769 | return TRUE; |
| 8770 | |
| 8771 | case 0x06: /* CTCz */ |
| 8772 | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8773 | #if USE_SIMD |
| 8774 | UML_CALLC(block, cfunc_ctc2_simd, this); // callc cfunc_ctc2 |
| 8775 | #if SIMUL_SIMD |
| 8776 | UML_CALLC(block, cfunc_backup_regs, this); |
| 8777 | UML_CALLC(block, cfunc_ctc2_scalar, this); |
| 8778 | UML_CALLC(block, cfunc_restore_regs, this); |
| 8779 | UML_CALLC(block, cfunc_verify_regs, this); |
| 8780 | #endif |
| 8781 | #else |
| 8782 | UML_CALLC(block, cfunc_ctc2_scalar, this); |
| 8783 | #endif |
| 8784 | return TRUE; |
| 8785 | |
| 8786 | case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: |
| 8787 | case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: |
| 8788 | return generate_vector_opcode(block, compiler, desc); |
| 8789 | } |
| 8790 | return FALSE; |
| 8791 | } |
| 8792 | |
| 8793 | /*------------------------------------------------- |
| 1312 | 8794 | generate_cop0 - compile COP0 opcodes |
| 1313 | 8795 | -------------------------------------------------*/ |
| 1314 | 8796 | |
| r241959 | r241960 | |
| 1343 | 8825 | return FALSE; |
| 1344 | 8826 | } |
| 1345 | 8827 | |
| 8828 | #if USE_SIMD |
| 8829 | inline void rsp_device::ccfunc_mfc2_simd() |
| 8830 | { |
| 8831 | UINT32 op = m_rsp_state->arg0; |
| 8832 | int el = (op >> 7) & 0xf; |
| 8833 | |
| 8834 | UINT16 out; |
| 8835 | SIMD_EXTRACT16(m_xv[VS1REG], out, (el >> 1)); |
| 8836 | out >>= (1 - (el & 1)) * 8; |
| 8837 | out &= 0x00ff; |
| 8838 | |
| 8839 | el++; |
| 8840 | |
| 8841 | UINT16 temp; |
| 8842 | SIMD_EXTRACT16(m_xv[VS1REG], temp, (el >> 1)); |
| 8843 | temp >>= (1 - (el & 1)) * 8; |
| 8844 | temp &= 0x00ff; |
| 8845 | |
| 8846 | m_rsp_state->r[RTREG] = (INT32)(INT16)((out << 8) | temp); |
| 8847 | } |
| 8848 | |
| 8849 | static void cfunc_mfc2_simd(void *param) |
| 8850 | { |
| 8851 | ((rsp_device *)param)->ccfunc_mfc2_simd(); |
| 8852 | } |
| 8853 | #endif |
| 8854 | |
| 8855 | #if (!USE_SIMD || SIMUL_SIMD) |
| 8856 | inline void rsp_device::ccfunc_mfc2_scalar() |
| 8857 | { |
| 8858 | UINT32 op = m_rsp_state->arg0; |
| 8859 | int el = (op >> 7) & 0xf; |
| 8860 | |
| 8861 | UINT16 b1 = VREG_B(VS1REG, (el+0) & 0xf); |
| 8862 | UINT16 b2 = VREG_B(VS1REG, (el+1) & 0xf); |
| 8863 | if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2)); |
| 8864 | } |
| 8865 | |
| 8866 | static void cfunc_mfc2_scalar(void *param) |
| 8867 | { |
| 8868 | ((rsp_device *)param)->ccfunc_mfc2_scalar(); |
| 8869 | } |
| 8870 | #endif |
| 8871 | |
| 8872 | #if USE_SIMD |
| 8873 | inline void rsp_device::ccfunc_cfc2_simd() |
| 8874 | { |
| 8875 | UINT32 op = m_rsp_state->arg0; |
| 8876 | if (RTREG) |
| 8877 | { |
| 8878 | switch(RDREG) |
| 8879 | { |
| 8880 | case 0: |
| 8881 | RTVAL = ((VEC_CARRY_FLAG(0) & 1) << 0) | |
| 8882 | ((VEC_CARRY_FLAG(1) & 1) << 1) | |
| 8883 | ((VEC_CARRY_FLAG(2) & 1) << 2) | |
| 8884 | ((VEC_CARRY_FLAG(3) & 1) << 3) | |
| 8885 | ((VEC_CARRY_FLAG(4) & 1) << 4) | |
| 8886 | ((VEC_CARRY_FLAG(5) & 1) << 5) | |
| 8887 | ((VEC_CARRY_FLAG(6) & 1) << 6) | |
| 8888 | ((VEC_CARRY_FLAG(7) & 1) << 7) | |
| 8889 | ((VEC_ZERO_FLAG(0) & 1) << 8) | |
| 8890 | ((VEC_ZERO_FLAG(1) & 1) << 9) | |
| 8891 | ((VEC_ZERO_FLAG(2) & 1) << 10) | |
| 8892 | ((VEC_ZERO_FLAG(3) & 1) << 11) | |
| 8893 | ((VEC_ZERO_FLAG(4) & 1) << 12) | |
| 8894 | ((VEC_ZERO_FLAG(5) & 1) << 13) | |
| 8895 | ((VEC_ZERO_FLAG(6) & 1) << 14) | |
| 8896 | ((VEC_ZERO_FLAG(7) & 1) << 15); |
| 8897 | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 8898 | break; |
| 8899 | case 1: |
| 8900 | RTVAL = ((VEC_COMPARE_FLAG(0) & 1) << 0) | |
| 8901 | ((VEC_COMPARE_FLAG(1) & 1) << 1) | |
| 8902 | ((VEC_COMPARE_FLAG(2) & 1) << 2) | |
| 8903 | ((VEC_COMPARE_FLAG(3) & 1) << 3) | |
| 8904 | ((VEC_COMPARE_FLAG(4) & 1) << 4) | |
| 8905 | ((VEC_COMPARE_FLAG(5) & 1) << 5) | |
| 8906 | ((VEC_COMPARE_FLAG(6) & 1) << 6) | |
| 8907 | ((VEC_COMPARE_FLAG(7) & 1) << 7) | |
| 8908 | ((VEC_CLIP2_FLAG(0) & 1) << 8) | |
| 8909 | ((VEC_CLIP2_FLAG(1) & 1) << 9) | |
| 8910 | ((VEC_CLIP2_FLAG(2) & 1) << 10) | |
| 8911 | ((VEC_CLIP2_FLAG(3) & 1) << 11) | |
| 8912 | ((VEC_CLIP2_FLAG(4) & 1) << 12) | |
| 8913 | ((VEC_CLIP2_FLAG(5) & 1) << 13) | |
| 8914 | ((VEC_CLIP2_FLAG(6) & 1) << 14) | |
| 8915 | ((VEC_CLIP2_FLAG(7) & 1) << 15); |
| 8916 | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 8917 | break; |
| 8918 | case 2: |
| 8919 | RTVAL = ((VEC_CLIP1_FLAG(0) & 1) << 0) | |
| 8920 | ((VEC_CLIP1_FLAG(1) & 1) << 1) | |
| 8921 | ((VEC_CLIP1_FLAG(2) & 1) << 2) | |
| 8922 | ((VEC_CLIP1_FLAG(3) & 1) << 3) | |
| 8923 | ((VEC_CLIP1_FLAG(4) & 1) << 4) | |
| 8924 | ((VEC_CLIP1_FLAG(5) & 1) << 5) | |
| 8925 | ((VEC_CLIP1_FLAG(6) & 1) << 6) | |
| 8926 | ((VEC_CLIP1_FLAG(7) & 1) << 7); |
| 8927 | break; |
| 8928 | } |
| 8929 | } |
| 8930 | } |
| 8931 | |
| 8932 | static void cfunc_cfc2_simd(void *param) |
| 8933 | { |
| 8934 | ((rsp_device *)param)->ccfunc_cfc2_simd(); |
| 8935 | } |
| 8936 | #endif |
| 8937 | |
| 8938 | #if (!USE_SIMD || SIMUL_SIMD) |
| 8939 | inline void rsp_device::ccfunc_cfc2_scalar() |
| 8940 | { |
| 8941 | UINT32 op = m_rsp_state->arg0; |
| 8942 | if (RTREG) |
| 8943 | { |
| 8944 | switch(RDREG) |
| 8945 | { |
| 8946 | case 0: |
| 8947 | RTVAL = ((CARRY_FLAG(0) & 1) << 0) | |
| 8948 | ((CARRY_FLAG(1) & 1) << 1) | |
| 8949 | ((CARRY_FLAG(2) & 1) << 2) | |
| 8950 | ((CARRY_FLAG(3) & 1) << 3) | |
| 8951 | ((CARRY_FLAG(4) & 1) << 4) | |
| 8952 | ((CARRY_FLAG(5) & 1) << 5) | |
| 8953 | ((CARRY_FLAG(6) & 1) << 6) | |
| 8954 | ((CARRY_FLAG(7) & 1) << 7) | |
| 8955 | ((ZERO_FLAG(0) & 1) << 8) | |
| 8956 | ((ZERO_FLAG(1) & 1) << 9) | |
| 8957 | ((ZERO_FLAG(2) & 1) << 10) | |
| 8958 | ((ZERO_FLAG(3) & 1) << 11) | |
| 8959 | ((ZERO_FLAG(4) & 1) << 12) | |
| 8960 | ((ZERO_FLAG(5) & 1) << 13) | |
| 8961 | ((ZERO_FLAG(6) & 1) << 14) | |
| 8962 | ((ZERO_FLAG(7) & 1) << 15); |
| 8963 | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 8964 | break; |
| 8965 | case 1: |
| 8966 | RTVAL = ((COMPARE_FLAG(0) & 1) << 0) | |
| 8967 | ((COMPARE_FLAG(1) & 1) << 1) | |
| 8968 | ((COMPARE_FLAG(2) & 1) << 2) | |
| 8969 | ((COMPARE_FLAG(3) & 1) << 3) | |
| 8970 | ((COMPARE_FLAG(4) & 1) << 4) | |
| 8971 | ((COMPARE_FLAG(5) & 1) << 5) | |
| 8972 | ((COMPARE_FLAG(6) & 1) << 6) | |
| 8973 | ((COMPARE_FLAG(7) & 1) << 7) | |
| 8974 | ((CLIP2_FLAG(0) & 1) << 8) | |
| 8975 | ((CLIP2_FLAG(1) & 1) << 9) | |
| 8976 | ((CLIP2_FLAG(2) & 1) << 10) | |
| 8977 | ((CLIP2_FLAG(3) & 1) << 11) | |
| 8978 | ((CLIP2_FLAG(4) & 1) << 12) | |
| 8979 | ((CLIP2_FLAG(5) & 1) << 13) | |
| 8980 | ((CLIP2_FLAG(6) & 1) << 14) | |
| 8981 | ((CLIP2_FLAG(7) & 1) << 15); |
| 8982 | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 8983 | break; |
| 8984 | case 2: |
| 8985 | RTVAL = ((CLIP1_FLAG(0) & 1) << 0) | |
| 8986 | ((CLIP1_FLAG(1) & 1) << 1) | |
| 8987 | ((CLIP1_FLAG(2) & 1) << 2) | |
| 8988 | ((CLIP1_FLAG(3) & 1) << 3) | |
| 8989 | ((CLIP1_FLAG(4) & 1) << 4) | |
| 8990 | ((CLIP1_FLAG(5) & 1) << 5) | |
| 8991 | ((CLIP1_FLAG(6) & 1) << 6) | |
| 8992 | ((CLIP1_FLAG(7) & 1) << 7); |
| 8993 | break; |
| 8994 | } |
| 8995 | } |
| 8996 | } |
| 8997 | |
| 8998 | static void cfunc_cfc2_scalar(void *param) |
| 8999 | { |
| 9000 | ((rsp_device *)param)->ccfunc_cfc2_scalar(); |
| 9001 | } |
| 9002 | #endif |
| 9003 | |
| 9004 | #if USE_SIMD |
| 9005 | inline void rsp_device::ccfunc_mtc2_simd() |
| 9006 | { |
| 9007 | UINT32 op = m_rsp_state->arg0; |
| 9008 | int el = (op >> 7) & 0xf; |
| 9009 | SIMD_INSERT16(m_xv[VS1REG], RTVAL, el >> 1); |
| 9010 | } |
| 9011 | |
| 9012 | static void cfunc_mtc2_simd(void *param) |
| 9013 | { |
| 9014 | ((rsp_device *)param)->ccfunc_mtc2_simd(); |
| 9015 | } |
| 9016 | #endif |
| 9017 | |
| 9018 | #if (!USE_SIMD || SIMUL_SIMD) |
| 9019 | inline void rsp_device::ccfunc_mtc2_scalar() |
| 9020 | { |
| 9021 | UINT32 op = m_rsp_state->arg0; |
| 9022 | int el = (op >> 7) & 0xf; |
| 9023 | VREG_B(VS1REG, (el+0) & 0xf) = (RTVAL >> 8) & 0xff; |
| 9024 | VREG_B(VS1REG, (el+1) & 0xf) = (RTVAL >> 0) & 0xff; |
| 9025 | } |
| 9026 | |
| 9027 | static void cfunc_mtc2_scalar(void *param) |
| 9028 | { |
| 9029 | ((rsp_device *)param)->ccfunc_mtc2_scalar(); |
| 9030 | } |
| 9031 | #endif |
| 9032 | |
| 9033 | #if USE_SIMD |
| 9034 | inline void rsp_device::ccfunc_ctc2_simd() |
| 9035 | { |
| 9036 | UINT32 op = m_rsp_state->arg0; |
| 9037 | switch(RDREG) |
| 9038 | { |
| 9039 | case 0: |
| 9040 | VEC_CLEAR_CARRY_FLAGS(); |
| 9041 | VEC_CLEAR_ZERO_FLAGS(); |
| 9042 | m_vflag[0][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 9043 | m_vflag[0][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 9044 | m_vflag[0][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 9045 | m_vflag[0][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 9046 | m_vflag[0][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 9047 | m_vflag[0][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 9048 | m_vflag[0][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 9049 | m_vflag[0][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 9050 | if (RTVAL & (1 << 0)) { VEC_SET_CARRY_FLAG(0); } |
| 9051 | if (RTVAL & (1 << 1)) { VEC_SET_CARRY_FLAG(1); } |
| 9052 | if (RTVAL & (1 << 2)) { VEC_SET_CARRY_FLAG(2); } |
| 9053 | if (RTVAL & (1 << 3)) { VEC_SET_CARRY_FLAG(3); } |
| 9054 | if (RTVAL & (1 << 4)) { VEC_SET_CARRY_FLAG(4); } |
| 9055 | if (RTVAL & (1 << 5)) { VEC_SET_CARRY_FLAG(5); } |
| 9056 | if (RTVAL & (1 << 6)) { VEC_SET_CARRY_FLAG(6); } |
| 9057 | if (RTVAL & (1 << 7)) { VEC_SET_CARRY_FLAG(7); } |
| 9058 | m_vflag[3][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 9059 | m_vflag[3][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 9060 | m_vflag[3][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 9061 | m_vflag[3][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 9062 | m_vflag[3][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 9063 | m_vflag[3][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 9064 | m_vflag[3][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 9065 | m_vflag[3][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 9066 | if (RTVAL & (1 << 8)) { VEC_SET_ZERO_FLAG(0); } |
| 9067 | if (RTVAL & (1 << 9)) { VEC_SET_ZERO_FLAG(1); } |
| 9068 | if (RTVAL & (1 << 10)) { VEC_SET_ZERO_FLAG(2); } |
| 9069 | if (RTVAL & (1 << 11)) { VEC_SET_ZERO_FLAG(3); } |
| 9070 | if (RTVAL & (1 << 12)) { VEC_SET_ZERO_FLAG(4); } |
| 9071 | if (RTVAL & (1 << 13)) { VEC_SET_ZERO_FLAG(5); } |
| 9072 | if (RTVAL & (1 << 14)) { VEC_SET_ZERO_FLAG(6); } |
| 9073 | if (RTVAL & (1 << 15)) { VEC_SET_ZERO_FLAG(7); } |
| 9074 | break; |
| 9075 | case 1: |
| 9076 | VEC_CLEAR_COMPARE_FLAGS(); |
| 9077 | VEC_CLEAR_CLIP2_FLAGS(); |
| 9078 | m_vflag[1][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 9079 | m_vflag[1][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 9080 | m_vflag[1][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 9081 | m_vflag[1][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 9082 | m_vflag[1][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 9083 | m_vflag[1][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 9084 | m_vflag[1][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 9085 | m_vflag[1][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 9086 | if (RTVAL & (1 << 0)) { VEC_SET_COMPARE_FLAG(0); } |
| 9087 | if (RTVAL & (1 << 1)) { VEC_SET_COMPARE_FLAG(1); } |
| 9088 | if (RTVAL & (1 << 2)) { VEC_SET_COMPARE_FLAG(2); } |
| 9089 | if (RTVAL & (1 << 3)) { VEC_SET_COMPARE_FLAG(3); } |
| 9090 | if (RTVAL & (1 << 4)) { VEC_SET_COMPARE_FLAG(4); } |
| 9091 | if (RTVAL & (1 << 5)) { VEC_SET_COMPARE_FLAG(5); } |
| 9092 | if (RTVAL & (1 << 6)) { VEC_SET_COMPARE_FLAG(6); } |
| 9093 | if (RTVAL & (1 << 7)) { VEC_SET_COMPARE_FLAG(7); } |
| 9094 | m_vflag[4][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 9095 | m_vflag[4][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 9096 | m_vflag[4][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 9097 | m_vflag[4][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 9098 | m_vflag[4][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 9099 | m_vflag[4][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 9100 | m_vflag[4][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 9101 | m_vflag[4][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 9102 | if (RTVAL & (1 << 8)) { VEC_SET_CLIP2_FLAG(0); } |
| 9103 | if (RTVAL & (1 << 9)) { VEC_SET_CLIP2_FLAG(1); } |
| 9104 | if (RTVAL & (1 << 10)) { VEC_SET_CLIP2_FLAG(2); } |
| 9105 | if (RTVAL & (1 << 11)) { VEC_SET_CLIP2_FLAG(3); } |
| 9106 | if (RTVAL & (1 << 12)) { VEC_SET_CLIP2_FLAG(4); } |
| 9107 | if (RTVAL & (1 << 13)) { VEC_SET_CLIP2_FLAG(5); } |
| 9108 | if (RTVAL & (1 << 14)) { VEC_SET_CLIP2_FLAG(6); } |
| 9109 | if (RTVAL & (1 << 15)) { VEC_SET_CLIP2_FLAG(7); } |
| 9110 | break; |
| 9111 | case 2: |
| 9112 | VEC_CLEAR_CLIP1_FLAGS(); |
| 9113 | m_vflag[2][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 9114 | m_vflag[2][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 9115 | m_vflag[2][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 9116 | m_vflag[2][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 9117 | m_vflag[2][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 9118 | m_vflag[2][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 9119 | m_vflag[2][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 9120 | m_vflag[2][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 9121 | if (RTVAL & (1 << 0)) { VEC_SET_CLIP1_FLAG(0); } |
| 9122 | if (RTVAL & (1 << 1)) { VEC_SET_CLIP1_FLAG(1); } |
| 9123 | if (RTVAL & (1 << 2)) { VEC_SET_CLIP1_FLAG(2); } |
| 9124 | if (RTVAL & (1 << 3)) { VEC_SET_CLIP1_FLAG(3); } |
| 9125 | if (RTVAL & (1 << 4)) { VEC_SET_CLIP1_FLAG(4); } |
| 9126 | if (RTVAL & (1 << 5)) { VEC_SET_CLIP1_FLAG(5); } |
| 9127 | if (RTVAL & (1 << 6)) { VEC_SET_CLIP1_FLAG(6); } |
| 9128 | if (RTVAL & (1 << 7)) { VEC_SET_CLIP1_FLAG(7); } |
| 9129 | break; |
| 9130 | } |
| 9131 | } |
| 9132 | |
| 9133 | static void cfunc_ctc2_simd(void *param) |
| 9134 | { |
| 9135 | ((rsp_device *)param)->ccfunc_ctc2_simd(); |
| 9136 | } |
| 9137 | #endif |
| 9138 | |
| 9139 | #if (!USE_SIMD || SIMUL_SIMD) |
| 9140 | inline void rsp_device::ccfunc_ctc2_scalar() |
| 9141 | { |
| 9142 | UINT32 op = m_rsp_state->arg0; |
| 9143 | switch(RDREG) |
| 9144 | { |
| 9145 | case 0: |
| 9146 | CLEAR_CARRY_FLAGS(); |
| 9147 | CLEAR_ZERO_FLAGS(); |
| 9148 | m_vflag[0][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 9149 | m_vflag[0][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 9150 | m_vflag[0][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 9151 | m_vflag[0][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 9152 | m_vflag[0][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 9153 | m_vflag[0][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 9154 | m_vflag[0][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 9155 | m_vflag[0][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 9156 | if (RTVAL & (1 << 0)) { SET_CARRY_FLAG(0); } |
| 9157 | if (RTVAL & (1 << 1)) { SET_CARRY_FLAG(1); } |
| 9158 | if (RTVAL & (1 << 2)) { SET_CARRY_FLAG(2); } |
| 9159 | if (RTVAL & (1 << 3)) { SET_CARRY_FLAG(3); } |
| 9160 | if (RTVAL & (1 << 4)) { SET_CARRY_FLAG(4); } |
| 9161 | if (RTVAL & (1 << 5)) { SET_CARRY_FLAG(5); } |
| 9162 | if (RTVAL & (1 << 6)) { SET_CARRY_FLAG(6); } |
| 9163 | if (RTVAL & (1 << 7)) { SET_CARRY_FLAG(7); } |
| 9164 | m_vflag[3][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 9165 | m_vflag[3][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 9166 | m_vflag[3][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 9167 | m_vflag[3][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 9168 | m_vflag[3][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 9169 | m_vflag[3][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 9170 | m_vflag[3][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 9171 | m_vflag[3][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 9172 | if (RTVAL & (1 << 8)) { SET_ZERO_FLAG(0); } |
| 9173 | if (RTVAL & (1 << 9)) { SET_ZERO_FLAG(1); } |
| 9174 | if (RTVAL & (1 << 10)) { SET_ZERO_FLAG(2); } |
| 9175 | if (RTVAL & (1 << 11)) { SET_ZERO_FLAG(3); } |
| 9176 | if (RTVAL & (1 << 12)) { SET_ZERO_FLAG(4); } |
| 9177 | if (RTVAL & (1 << 13)) { SET_ZERO_FLAG(5); } |
| 9178 | if (RTVAL & (1 << 14)) { SET_ZERO_FLAG(6); } |
| 9179 | if (RTVAL & (1 << 15)) { SET_ZERO_FLAG(7); } |
| 9180 | break; |
| 9181 | case 1: |
| 9182 | CLEAR_COMPARE_FLAGS(); |
| 9183 | CLEAR_CLIP2_FLAGS(); |
| 9184 | m_vflag[1][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 9185 | m_vflag[1][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 9186 | m_vflag[1][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 9187 | m_vflag[1][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 9188 | m_vflag[1][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 9189 | m_vflag[1][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 9190 | m_vflag[1][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 9191 | m_vflag[1][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 9192 | if (RTVAL & (1 << 0)) { SET_COMPARE_FLAG(0); } |
| 9193 | if (RTVAL & (1 << 1)) { SET_COMPARE_FLAG(1); } |
| 9194 | if (RTVAL & (1 << 2)) { SET_COMPARE_FLAG(2); } |
| 9195 | if (RTVAL & (1 << 3)) { SET_COMPARE_FLAG(3); } |
| 9196 | if (RTVAL & (1 << 4)) { SET_COMPARE_FLAG(4); } |
| 9197 | if (RTVAL & (1 << 5)) { SET_COMPARE_FLAG(5); } |
| 9198 | if (RTVAL & (1 << 6)) { SET_COMPARE_FLAG(6); } |
| 9199 | if (RTVAL & (1 << 7)) { SET_COMPARE_FLAG(7); } |
| 9200 | m_vflag[4][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 9201 | m_vflag[4][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 9202 | m_vflag[4][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 9203 | m_vflag[4][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 9204 | m_vflag[4][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 9205 | m_vflag[4][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 9206 | m_vflag[4][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 9207 | m_vflag[4][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 9208 | if (RTVAL & (1 << 8)) { SET_CLIP2_FLAG(0); } |
| 9209 | if (RTVAL & (1 << 9)) { SET_CLIP2_FLAG(1); } |
| 9210 | if (RTVAL & (1 << 10)) { SET_CLIP2_FLAG(2); } |
| 9211 | if (RTVAL & (1 << 11)) { SET_CLIP2_FLAG(3); } |
| 9212 | if (RTVAL & (1 << 12)) { SET_CLIP2_FLAG(4); } |
| 9213 | if (RTVAL & (1 << 13)) { SET_CLIP2_FLAG(5); } |
| 9214 | if (RTVAL & (1 << 14)) { SET_CLIP2_FLAG(6); } |
| 9215 | if (RTVAL & (1 << 15)) { SET_CLIP2_FLAG(7); } |
| 9216 | break; |
| 9217 | case 2: |
| 9218 | CLEAR_CLIP1_FLAGS(); |
| 9219 | m_vflag[2][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 9220 | m_vflag[2][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 9221 | m_vflag[2][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 9222 | m_vflag[2][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 9223 | m_vflag[2][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 9224 | m_vflag[2][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 9225 | m_vflag[2][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 9226 | m_vflag[2][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 9227 | if (RTVAL & (1 << 0)) { SET_CLIP1_FLAG(0); } |
| 9228 | if (RTVAL & (1 << 1)) { SET_CLIP1_FLAG(1); } |
| 9229 | if (RTVAL & (1 << 2)) { SET_CLIP1_FLAG(2); } |
| 9230 | if (RTVAL & (1 << 3)) { SET_CLIP1_FLAG(3); } |
| 9231 | if (RTVAL & (1 << 4)) { SET_CLIP1_FLAG(4); } |
| 9232 | if (RTVAL & (1 << 5)) { SET_CLIP1_FLAG(5); } |
| 9233 | if (RTVAL & (1 << 6)) { SET_CLIP1_FLAG(6); } |
| 9234 | if (RTVAL & (1 << 7)) { SET_CLIP1_FLAG(7); } |
| 9235 | break; |
| 9236 | } |
| 9237 | } |
| 9238 | |
| 9239 | static void cfunc_ctc2_scalar(void *param) |
| 9240 | { |
| 9241 | ((rsp_device *)param)->ccfunc_ctc2_scalar(); |
| 9242 | } |
| 9243 | #endif |
| 9244 | |
| 1346 | 9245 | /*************************************************************************** |
| 1347 | 9246 | CODE LOGGING HELPERS |
| 1348 | 9247 | ***************************************************************************/ |