trunk/src/emu/cpu/rsp/rsp.c
| r241957 | r241958 | |
| 7 | 7 | #include "emu.h" |
| 8 | 8 | #include "debugger.h" |
| 9 | 9 | #include "rsp.h" |
| 10 | | #include "rspdiv.h" |
| 11 | 10 | #include "rspfe.h" |
| 11 | #include "rspcp2.h" |
| 12 | #include "rspcp2d.h" |
| 12 | 13 | |
| 13 | 14 | |
| 14 | 15 | const device_type RSP = &device_creator<rsp_device>; |
| r241957 | r241958 | |
| 36 | 37 | #define UIMM16 ((UINT16)(op)) |
| 37 | 38 | #define UIMM26 (op & 0x03ffffff) |
| 38 | 39 | |
| 40 | #define RSVAL (m_rsp_state->r[RSREG]) |
| 41 | #define RTVAL (m_rsp_state->r[RTREG]) |
| 42 | #define RDVAL (m_rsp_state->r[RDREG]) |
| 43 | |
| 39 | 44 | #define JUMP_ABS(addr) { m_nextpc = 0x04001000 | (((addr) << 2) & 0xfff); } |
| 40 | 45 | #define JUMP_ABS_L(addr,l) { m_nextpc = 0x04001000 | (((addr) << 2) & 0xfff); m_rsp_state->r[l] = m_rsp_state->pc + 4; } |
| 41 | 46 | #define JUMP_REL(offset) { m_nextpc = 0x04001000 | ((m_rsp_state->pc + ((offset) << 2)) & 0xfff); } |
| r241957 | r241958 | |
| 44 | 49 | #define JUMP_PC_L(addr,l) { m_nextpc = 0x04001000 | ((addr) & 0xfff); m_rsp_state->r[l] = m_rsp_state->pc + 4; } |
| 45 | 50 | #define LINK(l) { m_rsp_state->r[l] = m_rsp_state->pc + 4; } |
| 46 | 51 | |
| 47 | | #define VREG_B(reg, offset) m_v[(reg)].b[(offset)^1] |
| 48 | | #define VREG_S(reg, offset) m_v[(reg)].s[(offset)] |
| 49 | | #define VREG_L(reg, offset) m_v[(reg)].l[(offset)] |
| 50 | | |
| 51 | | #define R_VREG_B(reg, offset) m_v[(reg)].b[(offset)^1] |
| 52 | | #define R_VREG_S(reg, offset) (INT16)m_v[(reg)].s[(offset)] |
| 53 | | #define R_VREG_L(reg, offset) m_v[(reg)].l[(offset)] |
| 54 | | |
| 55 | | #define W_VREG_B(reg, offset, val) (m_v[(reg)].b[(offset)^1] = val) |
| 56 | | #define W_VREG_S(reg, offset, val) (m_v[(reg)].s[(offset)] = val) |
| 57 | | #define W_VREG_L(reg, offset, val) (m_v[(reg)].l[(offset)] = val) |
| 58 | | |
| 59 | | #define VEC_EL_2(x,z) (vector_elements[(x)][(z)]) |
| 60 | | |
| 61 | | #define ACCUM(x) m_accum[((x))].q |
| 62 | | #define ACCUM_H(x) m_accum[((x))].w[3] |
| 63 | | #define ACCUM_M(x) m_accum[((x))].w[2] |
| 64 | | #define ACCUM_L(x) m_accum[((x))].w[1] |
| 65 | | #define ACCUM_LL(x) m_accum[((x))].w[0] |
| 66 | | |
| 67 | | #define CARRY 0 |
| 68 | | #define COMPARE 1 |
| 69 | | #define CLIP1 2 |
| 70 | | #define ZERO 3 |
| 71 | | #define CLIP2 4 |
| 72 | | |
| 73 | 52 | #define CARRY_FLAG(x) (m_vflag[CARRY][x & 7] != 0 ? 0xffff : 0) |
| 74 | 53 | #define COMPARE_FLAG(x) (m_vflag[COMPARE][x & 7] != 0 ? 0xffff : 0) |
| 75 | 54 | #define CLIP1_FLAG(x) (m_vflag[CLIP1][x & 7] != 0 ? 0xffff : 0) |
| r241957 | r241958 | |
| 141 | 120 | , m_write32(NULL) |
| 142 | 121 | , m_rsp_state(NULL) |
| 143 | 122 | , m_exec_output(NULL) |
| 144 | | #if SIMUL_SIMD |
| 145 | | , m_old_reciprocal_res(0) |
| 146 | | , m_old_reciprocal_high(0) |
| 147 | | , m_old_dp_allowed(0) |
| 148 | | , m_scalar_reciprocal_res(0) |
| 149 | | , m_scalar_reciprocal_high(0) |
| 150 | | , m_scalar_dp_allowed(0) |
| 151 | | , m_simd_reciprocal_res(0) |
| 152 | | , m_simd_reciprocal_high(0) |
| 153 | | , m_simd_dp_allowed(0) |
| 154 | | #endif |
| 155 | 123 | , m_sr(0) |
| 156 | 124 | , m_step_count(0) |
| 157 | | #if USE_SIMD |
| 158 | | , m_accum_h(0) |
| 159 | | , m_accum_m(0) |
| 160 | | , m_accum_l(0) |
| 161 | | , m_accum_ll(0) |
| 162 | | #endif |
| 163 | | , m_reciprocal_res(0) |
| 164 | | , m_reciprocal_high(0) |
| 165 | | , m_dp_allowed(0) |
| 166 | 125 | , m_ppc(0) |
| 167 | 126 | , m_nextpc(0) |
| 168 | 127 | , m_dmem32(NULL) |
| r241957 | r241958 | |
| 179 | 138 | , m_sp_set_status_func(*this) |
| 180 | 139 | { |
| 181 | 140 | m_isdrc = mconfig.options().drc() ? true : false; |
| 182 | | memset(m_vres, 0, sizeof(m_vres)); |
| 183 | | memset(m_v, 0, sizeof(m_v)); |
| 184 | | memset(m_vflag, 0, sizeof(m_vflag)); |
| 185 | | #if SIMUL_SIMD |
| 186 | | memset(m_old_r, 0, sizeof(m_old_r)); |
| 187 | | memset(m_old_dmem, 0, sizeof(m_old_dmem)); |
| 188 | | memset(m_scalar_r, 0, sizeof(m_scalar_r)); |
| 189 | | memset(m_scalar_dmem, 0, sizeof(m_scalar_dmem)); |
| 190 | | #endif |
| 191 | | #if USE_SIMD |
| 192 | | memset(m_xv, 0, sizeof(m_xv)); |
| 193 | | memset(m_xvflag, 0, sizeof(m_xvflag)); |
| 194 | | #endif |
| 195 | | memset(m_accum, 0, sizeof(m_accum)); |
| 196 | 141 | } |
| 197 | 142 | |
| 198 | 143 | offs_t rsp_device::disasm_disassemble(char *buffer, offs_t pc, const UINT8 *oprom, const UINT8 *opram, UINT32 options) |
| r241957 | r241958 | |
| 201 | 146 | return CPU_DISASSEMBLE_NAME( rsp )(this, buffer, pc, oprom, opram, options); |
| 202 | 147 | } |
| 203 | 148 | |
| 204 | | inline UINT8 rsp_device::READ8(UINT32 address) |
| 149 | UINT8 rsp_device::READ8(UINT32 address) |
| 205 | 150 | { |
| 206 | 151 | UINT8 ret; |
| 207 | 152 | address &= 0xfff; |
| 208 | 153 | ret = m_program->read_byte(address); |
| 154 | //printf("R8:%08x=%02x\n", address, ret); |
| 209 | 155 | return ret; |
| 210 | 156 | } |
| 211 | 157 | |
| 212 | | inline UINT16 rsp_device::READ16(UINT32 address) |
| 158 | UINT16 rsp_device::READ16(UINT32 address) |
| 213 | 159 | { |
| 214 | 160 | UINT16 ret; |
| 215 | 161 | address &= 0xfff; |
| 216 | 162 | |
| 217 | 163 | ret = (m_program->read_byte(address) << 8) | (m_program->read_byte(address + 1) & 0xff); |
| 218 | 164 | |
| 165 | //printf("R16:%08x=%04x\n", address, ret); |
| 219 | 166 | return ret; |
| 220 | 167 | } |
| 221 | 168 | |
| 222 | | inline UINT32 rsp_device::READ32(UINT32 address) |
| 169 | UINT32 rsp_device::READ32(UINT32 address) |
| 223 | 170 | { |
| 224 | 171 | UINT32 ret; |
| 225 | 172 | address &= 0xfff; |
| r241957 | r241958 | |
| 229 | 176 | (m_program->read_byte(address + 2) << 8) | |
| 230 | 177 | (m_program->read_byte(address + 3) << 0); |
| 231 | 178 | |
| 179 | //printf("R32:%08x=%08x\n", address, ret); |
| 232 | 180 | return ret; |
| 233 | 181 | } |
| 234 | 182 | |
| r241957 | r241958 | |
| 236 | 184 | { |
| 237 | 185 | address &= 0xfff; |
| 238 | 186 | m_program->write_byte(address, data); |
| 187 | //printf("W8:%08x=%02x\n", address, data); |
| 239 | 188 | } |
| 240 | 189 | |
| 241 | 190 | void rsp_device::WRITE16(UINT32 address, UINT16 data) |
| r241957 | r241958 | |
| 244 | 193 | |
| 245 | 194 | m_program->write_byte(address, data >> 8); |
| 246 | 195 | m_program->write_byte(address + 1, data & 0xff); |
| 196 | //printf("W16:%08x=%04x\n", address, data); |
| 247 | 197 | } |
| 248 | 198 | |
| 249 | 199 | void rsp_device::WRITE32(UINT32 address, UINT32 data) |
| r241957 | r241958 | |
| 254 | 204 | m_program->write_byte(address + 1, (data >> 16) & 0xff); |
| 255 | 205 | m_program->write_byte(address + 2, (data >> 8) & 0xff); |
| 256 | 206 | m_program->write_byte(address + 3, data & 0xff); |
| 207 | //printf("W32:%08x=%08x\n", address, data); |
| 257 | 208 | } |
| 258 | 209 | |
| 259 | 210 | /*****************************************************************************/ |
| r241957 | r241958 | |
| 370 | 321 | m_direct = &m_program->direct(); |
| 371 | 322 | resolve_cb(); |
| 372 | 323 | |
| 324 | if (m_isdrc) |
| 325 | { |
| 326 | m_cop2 = auto_alloc(machine(), rsp_cop2_drc(*this, machine())); |
| 327 | } |
| 328 | else |
| 329 | { |
| 330 | m_cop2 = auto_alloc(machine(), rsp_cop2(*this, machine())); |
| 331 | } |
| 332 | m_cop2->init(); |
| 333 | m_cop2->start(); |
| 334 | |
| 373 | 335 | // RSP registers should power on to a random state |
| 374 | 336 | for(int regIdx = 0; regIdx < 32; regIdx++ ) |
| 375 | 337 | { |
| 376 | 338 | m_rsp_state->r[regIdx] = 0; |
| 377 | | m_v[regIdx].d[0] = 0; |
| 378 | | m_v[regIdx].d[1] = 0; |
| 379 | 339 | } |
| 380 | | CLEAR_CARRY_FLAGS(); |
| 381 | | CLEAR_COMPARE_FLAGS(); |
| 382 | | CLEAR_CLIP1_FLAGS(); |
| 383 | | CLEAR_ZERO_FLAGS(); |
| 384 | | CLEAR_CLIP2_FLAGS(); |
| 385 | | m_reciprocal_res = 0; |
| 386 | | m_reciprocal_high = 0; |
| 387 | 340 | |
| 388 | | // Accumulators do not power on to a random state |
| 389 | | for(int accumIdx = 0; accumIdx < 8; accumIdx++ ) |
| 390 | | { |
| 391 | | m_accum[accumIdx].q = 0; |
| 392 | | } |
| 393 | | |
| 394 | 341 | m_sr = RSP_STATUS_HALT; |
| 395 | 342 | m_step_count = 0; |
| 396 | 343 | |
| r241957 | r241958 | |
| 544 | 491 | |
| 545 | 492 | void rsp_device::state_string_export(const device_state_entry &entry, astring &string) |
| 546 | 493 | { |
| 547 | | switch (entry.index()) |
| 494 | const int index = entry.index(); |
| 495 | if (index >= RSP_V0 && index <= RSP_V31) |
| 548 | 496 | { |
| 549 | | case STATE_GENFLAGS: |
| 550 | | string.printf("%s",""); |
| 551 | | break; |
| 552 | | |
| 553 | | #if USE_SIMD |
| 554 | | case RSP_V0: |
| 555 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 0], 7), (UINT16)_mm_extract_epi16(m_xv[ 0], 6), (UINT16)_mm_extract_epi16(m_xv[ 0], 5), (UINT16)_mm_extract_epi16(m_xv[ 0], 4), (UINT16)_mm_extract_epi16(m_xv[ 0], 3), (UINT16)_mm_extract_epi16(m_xv[ 0], 2), (UINT16)_mm_extract_epi16(m_xv[ 0], 1), (UINT16)_mm_extract_epi16(m_xv[ 0], 0)); |
| 556 | | break; |
| 557 | | case RSP_V1: |
| 558 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 1], 7), (UINT16)_mm_extract_epi16(m_xv[ 1], 6), (UINT16)_mm_extract_epi16(m_xv[ 1], 5), (UINT16)_mm_extract_epi16(m_xv[ 1], 4), (UINT16)_mm_extract_epi16(m_xv[ 1], 3), (UINT16)_mm_extract_epi16(m_xv[ 1], 2), (UINT16)_mm_extract_epi16(m_xv[ 1], 1), (UINT16)_mm_extract_epi16(m_xv[ 1], 0)); |
| 559 | | break; |
| 560 | | case RSP_V2: |
| 561 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 2], 7), (UINT16)_mm_extract_epi16(m_xv[ 2], 6), (UINT16)_mm_extract_epi16(m_xv[ 2], 5), (UINT16)_mm_extract_epi16(m_xv[ 2], 4), (UINT16)_mm_extract_epi16(m_xv[ 2], 3), (UINT16)_mm_extract_epi16(m_xv[ 2], 2), (UINT16)_mm_extract_epi16(m_xv[ 2], 1), (UINT16)_mm_extract_epi16(m_xv[ 2], 0)); |
| 562 | | break; |
| 563 | | case RSP_V3: |
| 564 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 3], 7), (UINT16)_mm_extract_epi16(m_xv[ 3], 6), (UINT16)_mm_extract_epi16(m_xv[ 3], 5), (UINT16)_mm_extract_epi16(m_xv[ 3], 4), (UINT16)_mm_extract_epi16(m_xv[ 3], 3), (UINT16)_mm_extract_epi16(m_xv[ 3], 2), (UINT16)_mm_extract_epi16(m_xv[ 3], 1), (UINT16)_mm_extract_epi16(m_xv[ 3], 0)); |
| 565 | | break; |
| 566 | | case RSP_V4: |
| 567 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 4], 7), (UINT16)_mm_extract_epi16(m_xv[ 4], 6), (UINT16)_mm_extract_epi16(m_xv[ 4], 5), (UINT16)_mm_extract_epi16(m_xv[ 4], 4), (UINT16)_mm_extract_epi16(m_xv[ 4], 3), (UINT16)_mm_extract_epi16(m_xv[ 4], 2), (UINT16)_mm_extract_epi16(m_xv[ 4], 1), (UINT16)_mm_extract_epi16(m_xv[ 4], 0)); |
| 568 | | break; |
| 569 | | case RSP_V5: |
| 570 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 5], 7), (UINT16)_mm_extract_epi16(m_xv[ 5], 6), (UINT16)_mm_extract_epi16(m_xv[ 5], 5), (UINT16)_mm_extract_epi16(m_xv[ 5], 4), (UINT16)_mm_extract_epi16(m_xv[ 5], 3), (UINT16)_mm_extract_epi16(m_xv[ 5], 2), (UINT16)_mm_extract_epi16(m_xv[ 5], 1), (UINT16)_mm_extract_epi16(m_xv[ 5], 0)); |
| 571 | | break; |
| 572 | | case RSP_V6: |
| 573 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 6], 7), (UINT16)_mm_extract_epi16(m_xv[ 6], 6), (UINT16)_mm_extract_epi16(m_xv[ 6], 5), (UINT16)_mm_extract_epi16(m_xv[ 6], 4), (UINT16)_mm_extract_epi16(m_xv[ 6], 3), (UINT16)_mm_extract_epi16(m_xv[ 6], 2), (UINT16)_mm_extract_epi16(m_xv[ 6], 1), (UINT16)_mm_extract_epi16(m_xv[ 6], 0)); |
| 574 | | break; |
| 575 | | case RSP_V7: |
| 576 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 7], 7), (UINT16)_mm_extract_epi16(m_xv[ 7], 6), (UINT16)_mm_extract_epi16(m_xv[ 7], 5), (UINT16)_mm_extract_epi16(m_xv[ 7], 4), (UINT16)_mm_extract_epi16(m_xv[ 7], 3), (UINT16)_mm_extract_epi16(m_xv[ 7], 2), (UINT16)_mm_extract_epi16(m_xv[ 7], 1), (UINT16)_mm_extract_epi16(m_xv[ 7], 0)); |
| 577 | | break; |
| 578 | | case RSP_V8: |
| 579 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 8], 7), (UINT16)_mm_extract_epi16(m_xv[ 8], 6), (UINT16)_mm_extract_epi16(m_xv[ 8], 5), (UINT16)_mm_extract_epi16(m_xv[ 8], 4), (UINT16)_mm_extract_epi16(m_xv[ 8], 3), (UINT16)_mm_extract_epi16(m_xv[ 8], 2), (UINT16)_mm_extract_epi16(m_xv[ 8], 1), (UINT16)_mm_extract_epi16(m_xv[ 8], 0)); |
| 580 | | break; |
| 581 | | case RSP_V9: |
| 582 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 9], 7), (UINT16)_mm_extract_epi16(m_xv[ 9], 6), (UINT16)_mm_extract_epi16(m_xv[ 9], 5), (UINT16)_mm_extract_epi16(m_xv[ 9], 4), (UINT16)_mm_extract_epi16(m_xv[ 9], 3), (UINT16)_mm_extract_epi16(m_xv[ 9], 2), (UINT16)_mm_extract_epi16(m_xv[ 9], 1), (UINT16)_mm_extract_epi16(m_xv[ 9], 0)); |
| 583 | | break; |
| 584 | | case RSP_V10: |
| 585 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[10], 7), (UINT16)_mm_extract_epi16(m_xv[10], 6), (UINT16)_mm_extract_epi16(m_xv[10], 5), (UINT16)_mm_extract_epi16(m_xv[10], 4), (UINT16)_mm_extract_epi16(m_xv[10], 3), (UINT16)_mm_extract_epi16(m_xv[10], 2), (UINT16)_mm_extract_epi16(m_xv[10], 1), (UINT16)_mm_extract_epi16(m_xv[10], 0)); |
| 586 | | break; |
| 587 | | case RSP_V11: |
| 588 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[11], 7), (UINT16)_mm_extract_epi16(m_xv[11], 6), (UINT16)_mm_extract_epi16(m_xv[11], 5), (UINT16)_mm_extract_epi16(m_xv[11], 4), (UINT16)_mm_extract_epi16(m_xv[11], 3), (UINT16)_mm_extract_epi16(m_xv[11], 2), (UINT16)_mm_extract_epi16(m_xv[11], 1), (UINT16)_mm_extract_epi16(m_xv[11], 0)); |
| 589 | | break; |
| 590 | | case RSP_V12: |
| 591 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[12], 7), (UINT16)_mm_extract_epi16(m_xv[12], 6), (UINT16)_mm_extract_epi16(m_xv[12], 5), (UINT16)_mm_extract_epi16(m_xv[12], 4), (UINT16)_mm_extract_epi16(m_xv[12], 3), (UINT16)_mm_extract_epi16(m_xv[12], 2), (UINT16)_mm_extract_epi16(m_xv[12], 1), (UINT16)_mm_extract_epi16(m_xv[12], 0)); |
| 592 | | break; |
| 593 | | case RSP_V13: |
| 594 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[13], 7), (UINT16)_mm_extract_epi16(m_xv[13], 6), (UINT16)_mm_extract_epi16(m_xv[13], 5), (UINT16)_mm_extract_epi16(m_xv[13], 4), (UINT16)_mm_extract_epi16(m_xv[13], 3), (UINT16)_mm_extract_epi16(m_xv[13], 2), (UINT16)_mm_extract_epi16(m_xv[13], 1), (UINT16)_mm_extract_epi16(m_xv[13], 0)); |
| 595 | | break; |
| 596 | | case RSP_V14: |
| 597 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[14], 7), (UINT16)_mm_extract_epi16(m_xv[14], 6), (UINT16)_mm_extract_epi16(m_xv[14], 5), (UINT16)_mm_extract_epi16(m_xv[14], 4), (UINT16)_mm_extract_epi16(m_xv[14], 3), (UINT16)_mm_extract_epi16(m_xv[14], 2), (UINT16)_mm_extract_epi16(m_xv[14], 1), (UINT16)_mm_extract_epi16(m_xv[14], 0)); |
| 598 | | break; |
| 599 | | case RSP_V15: |
| 600 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[15], 7), (UINT16)_mm_extract_epi16(m_xv[15], 6), (UINT16)_mm_extract_epi16(m_xv[15], 5), (UINT16)_mm_extract_epi16(m_xv[15], 4), (UINT16)_mm_extract_epi16(m_xv[15], 3), (UINT16)_mm_extract_epi16(m_xv[15], 2), (UINT16)_mm_extract_epi16(m_xv[15], 1), (UINT16)_mm_extract_epi16(m_xv[15], 0)); |
| 601 | | break; |
| 602 | | case RSP_V16: |
| 603 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[16], 7), (UINT16)_mm_extract_epi16(m_xv[16], 6), (UINT16)_mm_extract_epi16(m_xv[16], 5), (UINT16)_mm_extract_epi16(m_xv[16], 4), (UINT16)_mm_extract_epi16(m_xv[16], 3), (UINT16)_mm_extract_epi16(m_xv[16], 2), (UINT16)_mm_extract_epi16(m_xv[16], 1), (UINT16)_mm_extract_epi16(m_xv[16], 0)); |
| 604 | | break; |
| 605 | | case RSP_V17: |
| 606 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[17], 7), (UINT16)_mm_extract_epi16(m_xv[17], 6), (UINT16)_mm_extract_epi16(m_xv[17], 5), (UINT16)_mm_extract_epi16(m_xv[17], 4), (UINT16)_mm_extract_epi16(m_xv[17], 3), (UINT16)_mm_extract_epi16(m_xv[17], 2), (UINT16)_mm_extract_epi16(m_xv[17], 1), (UINT16)_mm_extract_epi16(m_xv[17], 0)); |
| 607 | | break; |
| 608 | | case RSP_V18: |
| 609 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[18], 7), (UINT16)_mm_extract_epi16(m_xv[18], 6), (UINT16)_mm_extract_epi16(m_xv[18], 5), (UINT16)_mm_extract_epi16(m_xv[18], 4), (UINT16)_mm_extract_epi16(m_xv[18], 3), (UINT16)_mm_extract_epi16(m_xv[18], 2), (UINT16)_mm_extract_epi16(m_xv[18], 1), (UINT16)_mm_extract_epi16(m_xv[18], 0)); |
| 610 | | break; |
| 611 | | case RSP_V19: |
| 612 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[19], 7), (UINT16)_mm_extract_epi16(m_xv[19], 6), (UINT16)_mm_extract_epi16(m_xv[19], 5), (UINT16)_mm_extract_epi16(m_xv[19], 4), (UINT16)_mm_extract_epi16(m_xv[19], 3), (UINT16)_mm_extract_epi16(m_xv[19], 2), (UINT16)_mm_extract_epi16(m_xv[19], 1), (UINT16)_mm_extract_epi16(m_xv[19], 0)); |
| 613 | | break; |
| 614 | | case RSP_V20: |
| 615 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[20], 7), (UINT16)_mm_extract_epi16(m_xv[20], 6), (UINT16)_mm_extract_epi16(m_xv[20], 5), (UINT16)_mm_extract_epi16(m_xv[20], 4), (UINT16)_mm_extract_epi16(m_xv[20], 3), (UINT16)_mm_extract_epi16(m_xv[20], 2), (UINT16)_mm_extract_epi16(m_xv[20], 1), (UINT16)_mm_extract_epi16(m_xv[20], 0)); |
| 616 | | break; |
| 617 | | case RSP_V21: |
| 618 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[21], 7), (UINT16)_mm_extract_epi16(m_xv[21], 6), (UINT16)_mm_extract_epi16(m_xv[21], 5), (UINT16)_mm_extract_epi16(m_xv[21], 4), (UINT16)_mm_extract_epi16(m_xv[21], 3), (UINT16)_mm_extract_epi16(m_xv[21], 2), (UINT16)_mm_extract_epi16(m_xv[21], 1), (UINT16)_mm_extract_epi16(m_xv[21], 0)); |
| 619 | | break; |
| 620 | | case RSP_V22: |
| 621 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[22], 7), (UINT16)_mm_extract_epi16(m_xv[22], 6), (UINT16)_mm_extract_epi16(m_xv[22], 5), (UINT16)_mm_extract_epi16(m_xv[22], 4), (UINT16)_mm_extract_epi16(m_xv[22], 3), (UINT16)_mm_extract_epi16(m_xv[22], 2), (UINT16)_mm_extract_epi16(m_xv[22], 1), (UINT16)_mm_extract_epi16(m_xv[22], 0)); |
| 622 | | break; |
| 623 | | case RSP_V23: |
| 624 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[23], 7), (UINT16)_mm_extract_epi16(m_xv[23], 6), (UINT16)_mm_extract_epi16(m_xv[23], 5), (UINT16)_mm_extract_epi16(m_xv[23], 4), (UINT16)_mm_extract_epi16(m_xv[23], 3), (UINT16)_mm_extract_epi16(m_xv[23], 2), (UINT16)_mm_extract_epi16(m_xv[23], 1), (UINT16)_mm_extract_epi16(m_xv[23], 0)); |
| 625 | | break; |
| 626 | | case RSP_V24: |
| 627 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[24], 7), (UINT16)_mm_extract_epi16(m_xv[24], 6), (UINT16)_mm_extract_epi16(m_xv[24], 5), (UINT16)_mm_extract_epi16(m_xv[24], 4), (UINT16)_mm_extract_epi16(m_xv[24], 3), (UINT16)_mm_extract_epi16(m_xv[24], 2), (UINT16)_mm_extract_epi16(m_xv[24], 1), (UINT16)_mm_extract_epi16(m_xv[24], 0)); |
| 628 | | break; |
| 629 | | case RSP_V25: |
| 630 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[25], 7), (UINT16)_mm_extract_epi16(m_xv[25], 6), (UINT16)_mm_extract_epi16(m_xv[25], 5), (UINT16)_mm_extract_epi16(m_xv[25], 4), (UINT16)_mm_extract_epi16(m_xv[25], 3), (UINT16)_mm_extract_epi16(m_xv[25], 2), (UINT16)_mm_extract_epi16(m_xv[25], 1), (UINT16)_mm_extract_epi16(m_xv[25], 0)); |
| 631 | | break; |
| 632 | | case RSP_V26: |
| 633 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[26], 7), (UINT16)_mm_extract_epi16(m_xv[26], 6), (UINT16)_mm_extract_epi16(m_xv[26], 5), (UINT16)_mm_extract_epi16(m_xv[26], 4), (UINT16)_mm_extract_epi16(m_xv[26], 3), (UINT16)_mm_extract_epi16(m_xv[26], 2), (UINT16)_mm_extract_epi16(m_xv[26], 1), (UINT16)_mm_extract_epi16(m_xv[26], 0)); |
| 634 | | break; |
| 635 | | case RSP_V27: |
| 636 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[27], 7), (UINT16)_mm_extract_epi16(m_xv[27], 6), (UINT16)_mm_extract_epi16(m_xv[27], 5), (UINT16)_mm_extract_epi16(m_xv[27], 4), (UINT16)_mm_extract_epi16(m_xv[27], 3), (UINT16)_mm_extract_epi16(m_xv[27], 2), (UINT16)_mm_extract_epi16(m_xv[27], 1), (UINT16)_mm_extract_epi16(m_xv[27], 0)); |
| 637 | | break; |
| 638 | | case RSP_V28: |
| 639 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[28], 7), (UINT16)_mm_extract_epi16(m_xv[28], 6), (UINT16)_mm_extract_epi16(m_xv[28], 5), (UINT16)_mm_extract_epi16(m_xv[28], 4), (UINT16)_mm_extract_epi16(m_xv[28], 3), (UINT16)_mm_extract_epi16(m_xv[28], 2), (UINT16)_mm_extract_epi16(m_xv[28], 1), (UINT16)_mm_extract_epi16(m_xv[28], 0)); |
| 640 | | break; |
| 641 | | case RSP_V29: |
| 642 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[29], 7), (UINT16)_mm_extract_epi16(m_xv[29], 6), (UINT16)_mm_extract_epi16(m_xv[29], 5), (UINT16)_mm_extract_epi16(m_xv[29], 4), (UINT16)_mm_extract_epi16(m_xv[29], 3), (UINT16)_mm_extract_epi16(m_xv[29], 2), (UINT16)_mm_extract_epi16(m_xv[29], 1), (UINT16)_mm_extract_epi16(m_xv[29], 0)); |
| 643 | | break; |
| 644 | | case RSP_V30: |
| 645 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[30], 7), (UINT16)_mm_extract_epi16(m_xv[30], 6), (UINT16)_mm_extract_epi16(m_xv[30], 5), (UINT16)_mm_extract_epi16(m_xv[30], 4), (UINT16)_mm_extract_epi16(m_xv[30], 3), (UINT16)_mm_extract_epi16(m_xv[30], 2), (UINT16)_mm_extract_epi16(m_xv[30], 1), (UINT16)_mm_extract_epi16(m_xv[30], 0)); |
| 646 | | break; |
| 647 | | case RSP_V31: |
| 648 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[31], 7), (UINT16)_mm_extract_epi16(m_xv[31], 6), (UINT16)_mm_extract_epi16(m_xv[31], 5), (UINT16)_mm_extract_epi16(m_xv[31], 4), (UINT16)_mm_extract_epi16(m_xv[31], 3), (UINT16)_mm_extract_epi16(m_xv[31], 2), (UINT16)_mm_extract_epi16(m_xv[31], 1), (UINT16)_mm_extract_epi16(m_xv[31], 0)); |
| 649 | | break; |
| 650 | | #else |
| 651 | | case RSP_V0: |
| 652 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 0, 0), (UINT16)VREG_S( 0, 1), (UINT16)VREG_S( 0, 2), (UINT16)VREG_S( 0, 3), (UINT16)VREG_S( 0, 4), (UINT16)VREG_S( 0, 5), (UINT16)VREG_S( 0, 6), (UINT16)VREG_S( 0, 7)); |
| 653 | | break; |
| 654 | | case RSP_V1: |
| 655 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 1, 0), (UINT16)VREG_S( 1, 1), (UINT16)VREG_S( 1, 2), (UINT16)VREG_S( 1, 3), (UINT16)VREG_S( 1, 4), (UINT16)VREG_S( 1, 5), (UINT16)VREG_S( 1, 6), (UINT16)VREG_S( 1, 7)); |
| 656 | | break; |
| 657 | | case RSP_V2: |
| 658 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 2, 0), (UINT16)VREG_S( 2, 1), (UINT16)VREG_S( 2, 2), (UINT16)VREG_S( 2, 3), (UINT16)VREG_S( 2, 4), (UINT16)VREG_S( 2, 5), (UINT16)VREG_S( 2, 6), (UINT16)VREG_S( 2, 7)); |
| 659 | | break; |
| 660 | | case RSP_V3: |
| 661 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 3, 0), (UINT16)VREG_S( 3, 1), (UINT16)VREG_S( 3, 2), (UINT16)VREG_S( 3, 3), (UINT16)VREG_S( 3, 4), (UINT16)VREG_S( 3, 5), (UINT16)VREG_S( 3, 6), (UINT16)VREG_S( 3, 7)); |
| 662 | | break; |
| 663 | | case RSP_V4: |
| 664 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 4, 0), (UINT16)VREG_S( 4, 1), (UINT16)VREG_S( 4, 2), (UINT16)VREG_S( 4, 3), (UINT16)VREG_S( 4, 4), (UINT16)VREG_S( 4, 5), (UINT16)VREG_S( 4, 6), (UINT16)VREG_S( 4, 7)); |
| 665 | | break; |
| 666 | | case RSP_V5: |
| 667 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 5, 0), (UINT16)VREG_S( 5, 1), (UINT16)VREG_S( 5, 2), (UINT16)VREG_S( 5, 3), (UINT16)VREG_S( 5, 4), (UINT16)VREG_S( 5, 5), (UINT16)VREG_S( 5, 6), (UINT16)VREG_S( 5, 7)); |
| 668 | | break; |
| 669 | | case RSP_V6: |
| 670 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 6, 0), (UINT16)VREG_S( 6, 1), (UINT16)VREG_S( 6, 2), (UINT16)VREG_S( 6, 3), (UINT16)VREG_S( 6, 4), (UINT16)VREG_S( 6, 5), (UINT16)VREG_S( 6, 6), (UINT16)VREG_S( 6, 7)); |
| 671 | | break; |
| 672 | | case RSP_V7: |
| 673 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 7, 0), (UINT16)VREG_S( 7, 1), (UINT16)VREG_S( 7, 2), (UINT16)VREG_S( 7, 3), (UINT16)VREG_S( 7, 4), (UINT16)VREG_S( 7, 5), (UINT16)VREG_S( 7, 6), (UINT16)VREG_S( 7, 7)); |
| 674 | | break; |
| 675 | | case RSP_V8: |
| 676 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 8, 0), (UINT16)VREG_S( 8, 1), (UINT16)VREG_S( 8, 2), (UINT16)VREG_S( 8, 3), (UINT16)VREG_S( 8, 4), (UINT16)VREG_S( 8, 5), (UINT16)VREG_S( 8, 6), (UINT16)VREG_S( 8, 7)); |
| 677 | | break; |
| 678 | | case RSP_V9: |
| 679 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 9, 0), (UINT16)VREG_S( 9, 1), (UINT16)VREG_S( 9, 2), (UINT16)VREG_S( 9, 3), (UINT16)VREG_S( 9, 4), (UINT16)VREG_S( 9, 5), (UINT16)VREG_S( 9, 6), (UINT16)VREG_S( 9, 7)); |
| 680 | | break; |
| 681 | | case RSP_V10: |
| 682 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(10, 0), (UINT16)VREG_S(10, 1), (UINT16)VREG_S(10, 2), (UINT16)VREG_S(10, 3), (UINT16)VREG_S(10, 4), (UINT16)VREG_S(10, 5), (UINT16)VREG_S(10, 6), (UINT16)VREG_S(10, 7)); |
| 683 | | break; |
| 684 | | case RSP_V11: |
| 685 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(11, 0), (UINT16)VREG_S(11, 1), (UINT16)VREG_S(11, 2), (UINT16)VREG_S(11, 3), (UINT16)VREG_S(11, 4), (UINT16)VREG_S(11, 5), (UINT16)VREG_S(11, 6), (UINT16)VREG_S(11, 7)); |
| 686 | | break; |
| 687 | | case RSP_V12: |
| 688 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(12, 0), (UINT16)VREG_S(12, 1), (UINT16)VREG_S(12, 2), (UINT16)VREG_S(12, 3), (UINT16)VREG_S(12, 4), (UINT16)VREG_S(12, 5), (UINT16)VREG_S(12, 6), (UINT16)VREG_S(12, 7)); |
| 689 | | break; |
| 690 | | case RSP_V13: |
| 691 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(13, 0), (UINT16)VREG_S(13, 1), (UINT16)VREG_S(13, 2), (UINT16)VREG_S(13, 3), (UINT16)VREG_S(13, 4), (UINT16)VREG_S(13, 5), (UINT16)VREG_S(13, 6), (UINT16)VREG_S(13, 7)); |
| 692 | | break; |
| 693 | | case RSP_V14: |
| 694 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(14, 0), (UINT16)VREG_S(14, 1), (UINT16)VREG_S(14, 2), (UINT16)VREG_S(14, 3), (UINT16)VREG_S(14, 4), (UINT16)VREG_S(14, 5), (UINT16)VREG_S(14, 6), (UINT16)VREG_S(14, 7)); |
| 695 | | break; |
| 696 | | case RSP_V15: |
| 697 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(15, 0), (UINT16)VREG_S(15, 1), (UINT16)VREG_S(15, 2), (UINT16)VREG_S(15, 3), (UINT16)VREG_S(15, 4), (UINT16)VREG_S(15, 5), (UINT16)VREG_S(15, 6), (UINT16)VREG_S(15, 7)); |
| 698 | | break; |
| 699 | | case RSP_V16: |
| 700 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(16, 0), (UINT16)VREG_S(16, 1), (UINT16)VREG_S(16, 2), (UINT16)VREG_S(16, 3), (UINT16)VREG_S(16, 4), (UINT16)VREG_S(16, 5), (UINT16)VREG_S(16, 6), (UINT16)VREG_S(16, 7)); |
| 701 | | break; |
| 702 | | case RSP_V17: |
| 703 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(17, 0), (UINT16)VREG_S(17, 1), (UINT16)VREG_S(17, 2), (UINT16)VREG_S(17, 3), (UINT16)VREG_S(17, 4), (UINT16)VREG_S(17, 5), (UINT16)VREG_S(17, 6), (UINT16)VREG_S(17, 7)); |
| 704 | | break; |
| 705 | | case RSP_V18: |
| 706 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(18, 0), (UINT16)VREG_S(18, 1), (UINT16)VREG_S(18, 2), (UINT16)VREG_S(18, 3), (UINT16)VREG_S(18, 4), (UINT16)VREG_S(18, 5), (UINT16)VREG_S(18, 6), (UINT16)VREG_S(18, 7)); |
| 707 | | break; |
| 708 | | case RSP_V19: |
| 709 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(19, 0), (UINT16)VREG_S(19, 1), (UINT16)VREG_S(19, 2), (UINT16)VREG_S(19, 3), (UINT16)VREG_S(19, 4), (UINT16)VREG_S(19, 5), (UINT16)VREG_S(19, 6), (UINT16)VREG_S(19, 7)); |
| 710 | | break; |
| 711 | | case RSP_V20: |
| 712 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(20, 0), (UINT16)VREG_S(20, 1), (UINT16)VREG_S(20, 2), (UINT16)VREG_S(20, 3), (UINT16)VREG_S(20, 4), (UINT16)VREG_S(20, 5), (UINT16)VREG_S(20, 6), (UINT16)VREG_S(20, 7)); |
| 713 | | break; |
| 714 | | case RSP_V21: |
| 715 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(21, 0), (UINT16)VREG_S(21, 1), (UINT16)VREG_S(21, 2), (UINT16)VREG_S(21, 3), (UINT16)VREG_S(21, 4), (UINT16)VREG_S(21, 5), (UINT16)VREG_S(21, 6), (UINT16)VREG_S(21, 7)); |
| 716 | | break; |
| 717 | | case RSP_V22: |
| 718 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(22, 0), (UINT16)VREG_S(22, 1), (UINT16)VREG_S(22, 2), (UINT16)VREG_S(22, 3), (UINT16)VREG_S(22, 4), (UINT16)VREG_S(22, 5), (UINT16)VREG_S(22, 6), (UINT16)VREG_S(22, 7)); |
| 719 | | break; |
| 720 | | case RSP_V23: |
| 721 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(23, 0), (UINT16)VREG_S(23, 1), (UINT16)VREG_S(23, 2), (UINT16)VREG_S(23, 3), (UINT16)VREG_S(23, 4), (UINT16)VREG_S(23, 5), (UINT16)VREG_S(23, 6), (UINT16)VREG_S(23, 7)); |
| 722 | | break; |
| 723 | | case RSP_V24: |
| 724 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(24, 0), (UINT16)VREG_S(24, 1), (UINT16)VREG_S(24, 2), (UINT16)VREG_S(24, 3), (UINT16)VREG_S(24, 4), (UINT16)VREG_S(24, 5), (UINT16)VREG_S(24, 6), (UINT16)VREG_S(24, 7)); |
| 725 | | break; |
| 726 | | case RSP_V25: |
| 727 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(25, 0), (UINT16)VREG_S(25, 1), (UINT16)VREG_S(25, 2), (UINT16)VREG_S(25, 3), (UINT16)VREG_S(25, 4), (UINT16)VREG_S(25, 5), (UINT16)VREG_S(25, 6), (UINT16)VREG_S(25, 7)); |
| 728 | | break; |
| 729 | | case RSP_V26: |
| 730 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(26, 0), (UINT16)VREG_S(26, 1), (UINT16)VREG_S(26, 2), (UINT16)VREG_S(26, 3), (UINT16)VREG_S(26, 4), (UINT16)VREG_S(26, 5), (UINT16)VREG_S(26, 6), (UINT16)VREG_S(26, 7)); |
| 731 | | break; |
| 732 | | case RSP_V27: |
| 733 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(27, 0), (UINT16)VREG_S(27, 1), (UINT16)VREG_S(27, 2), (UINT16)VREG_S(27, 3), (UINT16)VREG_S(27, 4), (UINT16)VREG_S(27, 5), (UINT16)VREG_S(27, 6), (UINT16)VREG_S(27, 7)); |
| 734 | | break; |
| 735 | | case RSP_V28: |
| 736 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(28, 0), (UINT16)VREG_S(28, 1), (UINT16)VREG_S(28, 2), (UINT16)VREG_S(28, 3), (UINT16)VREG_S(28, 4), (UINT16)VREG_S(28, 5), (UINT16)VREG_S(28, 6), (UINT16)VREG_S(28, 7)); |
| 737 | | break; |
| 738 | | case RSP_V29: |
| 739 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(29, 0), (UINT16)VREG_S(29, 1), (UINT16)VREG_S(29, 2), (UINT16)VREG_S(29, 3), (UINT16)VREG_S(29, 4), (UINT16)VREG_S(29, 5), (UINT16)VREG_S(29, 6), (UINT16)VREG_S(29, 7)); |
| 740 | | break; |
| 741 | | case RSP_V30: |
| 742 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(30, 0), (UINT16)VREG_S(30, 1), (UINT16)VREG_S(30, 2), (UINT16)VREG_S(30, 3), (UINT16)VREG_S(30, 4), (UINT16)VREG_S(30, 5), (UINT16)VREG_S(30, 6), (UINT16)VREG_S(30, 7)); |
| 743 | | break; |
| 744 | | case RSP_V31: |
| 745 | | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(31, 0), (UINT16)VREG_S(31, 1), (UINT16)VREG_S(31, 2), (UINT16)VREG_S(31, 3), (UINT16)VREG_S(31, 4), (UINT16)VREG_S(31, 5), (UINT16)VREG_S(31, 6), (UINT16)VREG_S(31, 7)); |
| 746 | | break; |
| 747 | | #endif |
| 748 | | |
| 497 | m_cop2->state_string_export(index, string); |
| 749 | 498 | } |
| 499 | else if (index == STATE_GENFLAGS) |
| 500 | { |
| 501 | string.printf("%s",""); |
| 502 | } |
| 750 | 503 | } |
| 751 | 504 | |
| 752 | 505 | void rsp_device::device_stop() |
| r241957 | r241958 | |
| 795 | 548 | m_exec_output = NULL; |
| 796 | 549 | |
| 797 | 550 | /* clean up the DRC */ |
| 798 | | if ( m_drcuml ) |
| 551 | if (m_drcuml) |
| 799 | 552 | { |
| 800 | 553 | auto_free(machine(), m_drcuml); |
| 801 | 554 | } |
| 802 | | if (m_drcfe ) |
| 555 | if (m_drcfe) |
| 803 | 556 | { |
| 804 | 557 | auto_free(machine(), m_drcfe); |
| 805 | 558 | } |
| 806 | | } |
| 807 | 559 | |
| 808 | | void rsp_device::device_reset() |
| 809 | | { |
| 810 | | m_nextpc = ~0; |
| 811 | | } |
| 812 | | |
| 813 | | void rsp_device::handle_lwc2(UINT32 op) |
| 814 | | { |
| 815 | | int i, end; |
| 816 | | UINT32 ea; |
| 817 | | int dest = (op >> 16) & 0x1f; |
| 818 | | int base = (op >> 21) & 0x1f; |
| 819 | | int index = (op >> 7) & 0xf; |
| 820 | | int offset = (op & 0x7f); |
| 821 | | if (offset & 0x40) |
| 822 | | offset |= 0xffffffc0; |
| 823 | | |
| 824 | | switch ((op >> 11) & 0x1f) |
| 560 | if (m_cop2) |
| 825 | 561 | { |
| 826 | | case 0x00: /* LBV */ |
| 827 | | { |
| 828 | | // 31 25 20 15 10 6 0 |
| 829 | | // -------------------------------------------------- |
| 830 | | // | 110010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 831 | | // -------------------------------------------------- |
| 832 | | // |
| 833 | | // Load 1 byte to vector byte index |
| 834 | | |
| 835 | | ea = (base) ? m_rsp_state->r[base] + offset : offset; |
| 836 | | VREG_B(dest, index) = READ8(ea); |
| 837 | | break; |
| 838 | | } |
| 839 | | case 0x01: /* LSV */ |
| 840 | | { |
| 841 | | // 31 25 20 15 10 6 0 |
| 842 | | // -------------------------------------------------- |
| 843 | | // | 110010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 844 | | // -------------------------------------------------- |
| 845 | | // |
| 846 | | // Loads 2 bytes starting from vector byte index |
| 847 | | |
| 848 | | ea = (base) ? m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 849 | | |
| 850 | | end = index + 2; |
| 851 | | |
| 852 | | for (i=index; i < end; i++) |
| 853 | | { |
| 854 | | VREG_B(dest, i) = READ8(ea); |
| 855 | | ea++; |
| 856 | | } |
| 857 | | break; |
| 858 | | } |
| 859 | | case 0x02: /* LLV */ |
| 860 | | { |
| 861 | | // 31 25 20 15 10 6 0 |
| 862 | | // -------------------------------------------------- |
| 863 | | // | 110010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 864 | | // -------------------------------------------------- |
| 865 | | // |
| 866 | | // Loads 4 bytes starting from vector byte index |
| 867 | | |
| 868 | | ea = (base) ? m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 869 | | |
| 870 | | end = index + 4; |
| 871 | | |
| 872 | | for (i=index; i < end; i++) |
| 873 | | { |
| 874 | | VREG_B(dest, i) = READ8(ea); |
| 875 | | ea++; |
| 876 | | } |
| 877 | | break; |
| 878 | | } |
| 879 | | case 0x03: /* LDV */ |
| 880 | | { |
| 881 | | // 31 25 20 15 10 6 0 |
| 882 | | // -------------------------------------------------- |
| 883 | | // | 110010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 884 | | // -------------------------------------------------- |
| 885 | | // |
| 886 | | // Loads 8 bytes starting from vector byte index |
| 887 | | |
| 888 | | ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 889 | | |
| 890 | | end = index + 8; |
| 891 | | |
| 892 | | for (i=index; i < end; i++) |
| 893 | | { |
| 894 | | VREG_B(dest, i) = READ8(ea); |
| 895 | | ea++; |
| 896 | | } |
| 897 | | break; |
| 898 | | } |
| 899 | | case 0x04: /* LQV */ |
| 900 | | { |
| 901 | | // 31 25 20 15 10 6 0 |
| 902 | | // -------------------------------------------------- |
| 903 | | // | 110010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 904 | | // -------------------------------------------------- |
| 905 | | // |
| 906 | | // Loads up to 16 bytes starting from vector byte index |
| 907 | | |
| 908 | | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 909 | | |
| 910 | | end = index + (16 - (ea & 0xf)); |
| 911 | | if (end > 16) end = 16; |
| 912 | | |
| 913 | | for (i=index; i < end; i++) |
| 914 | | { |
| 915 | | VREG_B(dest, i) = READ8(ea); |
| 916 | | ea++; |
| 917 | | } |
| 918 | | break; |
| 919 | | } |
| 920 | | case 0x05: /* LRV */ |
| 921 | | { |
| 922 | | // 31 25 20 15 10 6 0 |
| 923 | | // -------------------------------------------------- |
| 924 | | // | 110010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 925 | | // -------------------------------------------------- |
| 926 | | // |
| 927 | | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 928 | | |
| 929 | | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 930 | | |
| 931 | | index = 16 - ((ea & 0xf) - index); |
| 932 | | end = 16; |
| 933 | | ea &= ~0xf; |
| 934 | | |
| 935 | | for (i=index; i < end; i++) |
| 936 | | { |
| 937 | | VREG_B(dest, i) = READ8(ea); |
| 938 | | ea++; |
| 939 | | } |
| 940 | | break; |
| 941 | | } |
| 942 | | case 0x06: /* LPV */ |
| 943 | | { |
| 944 | | // 31 25 20 15 10 6 0 |
| 945 | | // -------------------------------------------------- |
| 946 | | // | 110010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 947 | | // -------------------------------------------------- |
| 948 | | // |
| 949 | | // Loads a byte as the upper 8 bits of each element |
| 950 | | |
| 951 | | ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 952 | | |
| 953 | | for (i=0; i < 8; i++) |
| 954 | | { |
| 955 | | VREG_S(dest, i) = READ8(ea + (((16-index) + i) & 0xf)) << 8; |
| 956 | | } |
| 957 | | break; |
| 958 | | } |
| 959 | | case 0x07: /* LUV */ |
| 960 | | { |
| 961 | | // 31 25 20 15 10 6 0 |
| 962 | | // -------------------------------------------------- |
| 963 | | // | 110010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 964 | | // -------------------------------------------------- |
| 965 | | // |
| 966 | | // Loads a byte as the bits 14-7 of each element |
| 967 | | |
| 968 | | ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 969 | | |
| 970 | | for (i=0; i < 8; i++) |
| 971 | | { |
| 972 | | VREG_S(dest, i) = READ8(ea + (((16-index) + i) & 0xf)) << 7; |
| 973 | | } |
| 974 | | break; |
| 975 | | } |
| 976 | | case 0x08: /* LHV */ |
| 977 | | { |
| 978 | | // 31 25 20 15 10 6 0 |
| 979 | | // -------------------------------------------------- |
| 980 | | // | 110010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 981 | | // -------------------------------------------------- |
| 982 | | // |
| 983 | | // Loads a byte as the bits 14-7 of each element, with 2-byte stride |
| 984 | | |
| 985 | | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 986 | | |
| 987 | | for (i=0; i < 8; i++) |
| 988 | | { |
| 989 | | VREG_S(dest, i) = READ8(ea + (((16-index) + (i<<1)) & 0xf)) << 7; |
| 990 | | } |
| 991 | | break; |
| 992 | | } |
| 993 | | case 0x09: /* LFV */ |
| 994 | | { |
| 995 | | // 31 25 20 15 10 6 0 |
| 996 | | // -------------------------------------------------- |
| 997 | | // | 110010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 998 | | // -------------------------------------------------- |
| 999 | | // |
| 1000 | | // Loads a byte as the bits 14-7 of upper or lower quad, with 4-byte stride |
| 1001 | | |
| 1002 | | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1003 | | |
| 1004 | | // not sure what happens if 16-byte boundary is crossed... |
| 1005 | | |
| 1006 | | end = (index >> 1) + 4; |
| 1007 | | |
| 1008 | | for (i=index >> 1; i < end; i++) |
| 1009 | | { |
| 1010 | | VREG_S(dest, i) = READ8(ea) << 7; |
| 1011 | | ea += 4; |
| 1012 | | } |
| 1013 | | break; |
| 1014 | | } |
| 1015 | | case 0x0a: /* LWV */ |
| 1016 | | { |
| 1017 | | // 31 25 20 15 10 6 0 |
| 1018 | | // -------------------------------------------------- |
| 1019 | | // | 110010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 1020 | | // -------------------------------------------------- |
| 1021 | | // |
| 1022 | | // Loads the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 1023 | | // after byte index 15 |
| 1024 | | |
| 1025 | | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1026 | | |
| 1027 | | // not sure what happens if 16-byte boundary is crossed... |
| 1028 | | if ((ea & 0xf) > 0) fatalerror("RSP: LWV: 16-byte boundary crossing at %08X, recheck this!\n", m_ppc); |
| 1029 | | |
| 1030 | | end = (16 - index) + 16; |
| 1031 | | |
| 1032 | | for (i=(16 - index); i < end; i++) |
| 1033 | | { |
| 1034 | | VREG_B(dest, i & 0xf) = READ8(ea); |
| 1035 | | ea += 4; |
| 1036 | | } |
| 1037 | | break; |
| 1038 | | } |
| 1039 | | case 0x0b: /* LTV */ |
| 1040 | | { |
| 1041 | | // 31 25 20 15 10 6 0 |
| 1042 | | // -------------------------------------------------- |
| 1043 | | // | 110010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 1044 | | // -------------------------------------------------- |
| 1045 | | // |
| 1046 | | // Loads one element to maximum of 8 vectors, while incrementing element index |
| 1047 | | |
| 1048 | | // FIXME: has a small problem with odd indices |
| 1049 | | |
| 1050 | | int element; |
| 1051 | | int vs = dest; |
| 1052 | | int ve = dest + 8; |
| 1053 | | if (ve > 32) |
| 1054 | | ve = 32; |
| 1055 | | |
| 1056 | | element = 7 - (index >> 1); |
| 1057 | | |
| 1058 | | if (index & 1) fatalerror("RSP: LTV: index = %d\n", index); |
| 1059 | | |
| 1060 | | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1061 | | |
| 1062 | | ea = ((ea + 8) & ~0xf) + (index & 1); |
| 1063 | | for (i=vs; i < ve; i++) |
| 1064 | | { |
| 1065 | | element = ((8 - (index >> 1) + (i-vs)) << 1); |
| 1066 | | VREG_B(i, (element & 0xf)) = READ8(ea); |
| 1067 | | VREG_B(i, ((element + 1) & 0xf)) = READ8(ea + 1); |
| 1068 | | |
| 1069 | | ea += 2; |
| 1070 | | } |
| 1071 | | break; |
| 1072 | | } |
| 1073 | | |
| 1074 | | default: |
| 1075 | | { |
| 1076 | | unimplemented_opcode(op); |
| 1077 | | break; |
| 1078 | | } |
| 562 | auto_free(machine(), m_cop2); |
| 1079 | 563 | } |
| 1080 | 564 | } |
| 1081 | 565 | |
| 1082 | | void rsp_device::handle_swc2(UINT32 op) |
| 566 | void rsp_device::device_reset() |
| 1083 | 567 | { |
| 1084 | | int i, end; |
| 1085 | | int eaoffset; |
| 1086 | | UINT32 ea; |
| 1087 | | int dest = (op >> 16) & 0x1f; |
| 1088 | | int base = (op >> 21) & 0x1f; |
| 1089 | | int index = (op >> 7) & 0xf; |
| 1090 | | int offset = (op & 0x7f); |
| 1091 | | if (offset & 0x40) |
| 1092 | | offset |= 0xffffffc0; |
| 1093 | | |
| 1094 | | switch ((op >> 11) & 0x1f) |
| 1095 | | { |
| 1096 | | case 0x00: /* SBV */ |
| 1097 | | { |
| 1098 | | // 31 25 20 15 10 6 0 |
| 1099 | | // -------------------------------------------------- |
| 1100 | | // | 111010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 1101 | | // -------------------------------------------------- |
| 1102 | | // |
| 1103 | | // Stores 1 byte from vector byte index |
| 1104 | | |
| 1105 | | ea = (base) ? m_rsp_state->r[base] + offset : offset; |
| 1106 | | WRITE8(ea, VREG_B(dest, index)); |
| 1107 | | break; |
| 1108 | | } |
| 1109 | | case 0x01: /* SSV */ |
| 1110 | | { |
| 1111 | | // 31 25 20 15 10 6 0 |
| 1112 | | // -------------------------------------------------- |
| 1113 | | // | 111010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 1114 | | // -------------------------------------------------- |
| 1115 | | // |
| 1116 | | // Stores 2 bytes starting from vector byte index |
| 1117 | | |
| 1118 | | ea = (base) ? m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 1119 | | |
| 1120 | | end = index + 2; |
| 1121 | | |
| 1122 | | for (i=index; i < end; i++) |
| 1123 | | { |
| 1124 | | WRITE8(ea, VREG_B(dest, i)); |
| 1125 | | ea++; |
| 1126 | | } |
| 1127 | | break; |
| 1128 | | } |
| 1129 | | case 0x02: /* SLV */ |
| 1130 | | { |
| 1131 | | // 31 25 20 15 10 6 0 |
| 1132 | | // -------------------------------------------------- |
| 1133 | | // | 111010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 1134 | | // -------------------------------------------------- |
| 1135 | | // |
| 1136 | | // Stores 4 bytes starting from vector byte index |
| 1137 | | |
| 1138 | | ea = (base) ? m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 1139 | | |
| 1140 | | end = index + 4; |
| 1141 | | |
| 1142 | | for (i=index; i < end; i++) |
| 1143 | | { |
| 1144 | | WRITE8(ea, VREG_B(dest, i)); |
| 1145 | | ea++; |
| 1146 | | } |
| 1147 | | break; |
| 1148 | | } |
| 1149 | | case 0x03: /* SDV */ |
| 1150 | | { |
| 1151 | | // 31 25 20 15 10 6 0 |
| 1152 | | // -------------------------------------------------- |
| 1153 | | // | 111010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 1154 | | // -------------------------------------------------- |
| 1155 | | // |
| 1156 | | // Stores 8 bytes starting from vector byte index |
| 1157 | | |
| 1158 | | ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1159 | | |
| 1160 | | end = index + 8; |
| 1161 | | |
| 1162 | | for (i=index; i < end; i++) |
| 1163 | | { |
| 1164 | | WRITE8(ea, VREG_B(dest, i)); |
| 1165 | | ea++; |
| 1166 | | } |
| 1167 | | break; |
| 1168 | | } |
| 1169 | | case 0x04: /* SQV */ |
| 1170 | | { |
| 1171 | | // 31 25 20 15 10 6 0 |
| 1172 | | // -------------------------------------------------- |
| 1173 | | // | 111010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 1174 | | // -------------------------------------------------- |
| 1175 | | // |
| 1176 | | // Stores up to 16 bytes starting from vector byte index until 16-byte boundary |
| 1177 | | |
| 1178 | | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1179 | | |
| 1180 | | end = index + (16 - (ea & 0xf)); |
| 1181 | | |
| 1182 | | for (i=index; i < end; i++) |
| 1183 | | { |
| 1184 | | WRITE8(ea, VREG_B(dest, i & 0xf)); |
| 1185 | | ea++; |
| 1186 | | } |
| 1187 | | break; |
| 1188 | | } |
| 1189 | | case 0x05: /* SRV */ |
| 1190 | | { |
| 1191 | | // 31 25 20 15 10 6 0 |
| 1192 | | // -------------------------------------------------- |
| 1193 | | // | 111010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 1194 | | // -------------------------------------------------- |
| 1195 | | // |
| 1196 | | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 1197 | | |
| 1198 | | int o; |
| 1199 | | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1200 | | |
| 1201 | | end = index + (ea & 0xf); |
| 1202 | | o = (16 - (ea & 0xf)) & 0xf; |
| 1203 | | ea &= ~0xf; |
| 1204 | | |
| 1205 | | for (i=index; i < end; i++) |
| 1206 | | { |
| 1207 | | WRITE8(ea, VREG_B(dest, ((i + o) & 0xf))); |
| 1208 | | ea++; |
| 1209 | | } |
| 1210 | | break; |
| 1211 | | } |
| 1212 | | case 0x06: /* SPV */ |
| 1213 | | { |
| 1214 | | // 31 25 20 15 10 6 0 |
| 1215 | | // -------------------------------------------------- |
| 1216 | | // | 111010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 1217 | | // -------------------------------------------------- |
| 1218 | | // |
| 1219 | | // Stores upper 8 bits of each element |
| 1220 | | |
| 1221 | | ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1222 | | end = index + 8; |
| 1223 | | |
| 1224 | | for (i=index; i < end; i++) |
| 1225 | | { |
| 1226 | | if ((i & 0xf) < 8) |
| 1227 | | { |
| 1228 | | WRITE8(ea, VREG_B(dest, ((i & 0xf) << 1))); |
| 1229 | | } |
| 1230 | | else |
| 1231 | | { |
| 1232 | | WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); |
| 1233 | | } |
| 1234 | | ea++; |
| 1235 | | } |
| 1236 | | break; |
| 1237 | | } |
| 1238 | | case 0x07: /* SUV */ |
| 1239 | | { |
| 1240 | | // 31 25 20 15 10 6 0 |
| 1241 | | // -------------------------------------------------- |
| 1242 | | // | 111010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 1243 | | // -------------------------------------------------- |
| 1244 | | // |
| 1245 | | // Stores bits 14-7 of each element |
| 1246 | | |
| 1247 | | ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1248 | | end = index + 8; |
| 1249 | | |
| 1250 | | for (i=index; i < end; i++) |
| 1251 | | { |
| 1252 | | if ((i & 0xf) < 8) |
| 1253 | | { |
| 1254 | | WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); |
| 1255 | | } |
| 1256 | | else |
| 1257 | | { |
| 1258 | | WRITE8(ea, VREG_B(dest, ((i & 0x7) << 1))); |
| 1259 | | } |
| 1260 | | ea++; |
| 1261 | | } |
| 1262 | | break; |
| 1263 | | } |
| 1264 | | case 0x08: /* SHV */ |
| 1265 | | { |
| 1266 | | // 31 25 20 15 10 6 0 |
| 1267 | | // -------------------------------------------------- |
| 1268 | | // | 111010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 1269 | | // -------------------------------------------------- |
| 1270 | | // |
| 1271 | | // Stores bits 14-7 of each element, with 2-byte stride |
| 1272 | | |
| 1273 | | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1274 | | |
| 1275 | | for (i=0; i < 8; i++) |
| 1276 | | { |
| 1277 | | UINT8 d = ((VREG_B(dest, ((index + (i << 1) + 0) & 0xf))) << 1) | |
| 1278 | | ((VREG_B(dest, ((index + (i << 1) + 1) & 0xf))) >> 7); |
| 1279 | | |
| 1280 | | WRITE8(ea, d); |
| 1281 | | ea += 2; |
| 1282 | | } |
| 1283 | | break; |
| 1284 | | } |
| 1285 | | case 0x09: /* SFV */ |
| 1286 | | { |
| 1287 | | // 31 25 20 15 10 6 0 |
| 1288 | | // -------------------------------------------------- |
| 1289 | | // | 111010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 1290 | | // -------------------------------------------------- |
| 1291 | | // |
| 1292 | | // Stores bits 14-7 of upper or lower quad, with 4-byte stride |
| 1293 | | |
| 1294 | | // FIXME: only works for index 0 and index 8 |
| 1295 | | |
| 1296 | | if (index & 0x7) osd_printf_debug("RSP: SFV: index = %d at %08X\n", index, m_ppc); |
| 1297 | | |
| 1298 | | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1299 | | |
| 1300 | | eaoffset = ea & 0xf; |
| 1301 | | ea &= ~0xf; |
| 1302 | | |
| 1303 | | end = (index >> 1) + 4; |
| 1304 | | |
| 1305 | | for (i=index >> 1; i < end; i++) |
| 1306 | | { |
| 1307 | | WRITE8(ea + (eaoffset & 0xf), VREG_S(dest, i) >> 7); |
| 1308 | | eaoffset += 4; |
| 1309 | | } |
| 1310 | | break; |
| 1311 | | } |
| 1312 | | case 0x0a: /* SWV */ |
| 1313 | | { |
| 1314 | | // 31 25 20 15 10 6 0 |
| 1315 | | // -------------------------------------------------- |
| 1316 | | // | 111010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 1317 | | // -------------------------------------------------- |
| 1318 | | // |
| 1319 | | // Stores the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 1320 | | // after byte index 15 |
| 1321 | | |
| 1322 | | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1323 | | |
| 1324 | | eaoffset = ea & 0xf; |
| 1325 | | ea &= ~0xf; |
| 1326 | | |
| 1327 | | end = index + 16; |
| 1328 | | |
| 1329 | | for (i=index; i < end; i++) |
| 1330 | | { |
| 1331 | | WRITE8(ea + (eaoffset & 0xf), VREG_B(dest, i & 0xf)); |
| 1332 | | eaoffset++; |
| 1333 | | } |
| 1334 | | break; |
| 1335 | | } |
| 1336 | | case 0x0b: /* STV */ |
| 1337 | | { |
| 1338 | | // 31 25 20 15 10 6 0 |
| 1339 | | // -------------------------------------------------- |
| 1340 | | // | 111010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 1341 | | // -------------------------------------------------- |
| 1342 | | // |
| 1343 | | // Stores one element from maximum of 8 vectors, while incrementing element index |
| 1344 | | |
| 1345 | | int element; |
| 1346 | | int vs = dest; |
| 1347 | | int ve = dest + 8; |
| 1348 | | if (ve > 32) |
| 1349 | | ve = 32; |
| 1350 | | |
| 1351 | | element = 8 - (index >> 1); |
| 1352 | | |
| 1353 | | ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1354 | | |
| 1355 | | eaoffset = (ea & 0xf) + (element * 2); |
| 1356 | | ea &= ~0xf; |
| 1357 | | |
| 1358 | | for (i=vs; i < ve; i++) |
| 1359 | | { |
| 1360 | | WRITE16(ea + (eaoffset & 0xf), VREG_S(i, element & 0x7)); |
| 1361 | | eaoffset += 2; |
| 1362 | | element++; |
| 1363 | | } |
| 1364 | | break; |
| 1365 | | } |
| 1366 | | |
| 1367 | | default: |
| 1368 | | { |
| 1369 | | unimplemented_opcode(op); |
| 1370 | | break; |
| 1371 | | } |
| 1372 | | } |
| 568 | m_nextpc = ~0; |
| 1373 | 569 | } |
| 1374 | 570 | |
| 1375 | | inline UINT16 rsp_device::SATURATE_ACCUM(int accum, int slice, UINT16 negative, UINT16 positive) |
| 1376 | | { |
| 1377 | | if ((INT16)ACCUM_H(accum) < 0) |
| 1378 | | { |
| 1379 | | if ((UINT16)(ACCUM_H(accum)) != 0xffff) |
| 1380 | | { |
| 1381 | | return negative; |
| 1382 | | } |
| 1383 | | else |
| 1384 | | { |
| 1385 | | if ((INT16)ACCUM_M(accum) >= 0) |
| 1386 | | { |
| 1387 | | return negative; |
| 1388 | | } |
| 1389 | | else |
| 1390 | | { |
| 1391 | | if (slice == 0) |
| 1392 | | { |
| 1393 | | return ACCUM_L(accum); |
| 1394 | | } |
| 1395 | | else if (slice == 1) |
| 1396 | | { |
| 1397 | | return ACCUM_M(accum); |
| 1398 | | } |
| 1399 | | } |
| 1400 | | } |
| 1401 | | } |
| 1402 | | else |
| 1403 | | { |
| 1404 | | if ((UINT16)(ACCUM_H(accum)) != 0) |
| 1405 | | { |
| 1406 | | return positive; |
| 1407 | | } |
| 1408 | | else |
| 1409 | | { |
| 1410 | | if ((INT16)ACCUM_M(accum) < 0) |
| 1411 | | { |
| 1412 | | return positive; |
| 1413 | | } |
| 1414 | | else |
| 1415 | | { |
| 1416 | | if (slice == 0) |
| 1417 | | { |
| 1418 | | return ACCUM_L(accum); |
| 1419 | | } |
| 1420 | | else |
| 1421 | | { |
| 1422 | | return ACCUM_M(accum); |
| 1423 | | } |
| 1424 | | } |
| 1425 | | } |
| 1426 | | } |
| 1427 | | |
| 1428 | | return 0; |
| 1429 | | } |
| 1430 | | |
| 1431 | | inline UINT16 rsp_device::SATURATE_ACCUM1(int accum, UINT16 negative, UINT16 positive) |
| 1432 | | { |
| 1433 | | if ((INT16)ACCUM_H(accum) < 0) |
| 1434 | | { |
| 1435 | | if ((UINT16)(ACCUM_H(accum)) != 0xffff) |
| 1436 | | { |
| 1437 | | return negative; |
| 1438 | | } |
| 1439 | | else |
| 1440 | | { |
| 1441 | | if ((INT16)ACCUM_M(accum) >= 0) |
| 1442 | | { |
| 1443 | | return negative; |
| 1444 | | } |
| 1445 | | else |
| 1446 | | { |
| 1447 | | return ACCUM_M(accum); |
| 1448 | | } |
| 1449 | | } |
| 1450 | | } |
| 1451 | | else |
| 1452 | | { |
| 1453 | | if ((UINT16)(ACCUM_H(accum)) != 0) |
| 1454 | | { |
| 1455 | | return positive; |
| 1456 | | } |
| 1457 | | else |
| 1458 | | { |
| 1459 | | if ((INT16)ACCUM_M(accum) < 0) |
| 1460 | | { |
| 1461 | | return positive; |
| 1462 | | } |
| 1463 | | else |
| 1464 | | { |
| 1465 | | return ACCUM_M(accum); |
| 1466 | | } |
| 1467 | | } |
| 1468 | | } |
| 1469 | | } |
| 1470 | | |
| 1471 | | #define WRITEBACK_RESULT() {memcpy(&m_v[VDREG].s[0], &vres[0], 16);} |
| 1472 | | |
| 1473 | | void rsp_device::handle_vector_ops(UINT32 op) |
| 1474 | | { |
| 1475 | | int i; |
| 1476 | | UINT32 VS1REG = (op >> 11) & 0x1f; |
| 1477 | | UINT32 VS2REG = (op >> 16) & 0x1f; |
| 1478 | | UINT32 VDREG = (op >> 6) & 0x1f; |
| 1479 | | UINT32 EL = (op >> 21) & 0xf; |
| 1480 | | INT16 vres[8]; |
| 1481 | | |
| 1482 | | // Opcode legend: |
| 1483 | | // E = VS2 element type |
| 1484 | | // S = VS1, Source vector 1 |
| 1485 | | // T = VS2, Source vector 2 |
| 1486 | | // D = Destination vector |
| 1487 | | |
| 1488 | | switch (op & 0x3f) |
| 1489 | | { |
| 1490 | | case 0x00: /* VMULF */ |
| 1491 | | { |
| 1492 | | // 31 25 24 20 15 10 5 0 |
| 1493 | | // ------------------------------------------------------ |
| 1494 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000000 | |
| 1495 | | // ------------------------------------------------------ |
| 1496 | | // |
| 1497 | | // Multiplies signed integer by signed integer * 2 |
| 1498 | | |
| 1499 | | for (i=0; i < 8; i++) |
| 1500 | | { |
| 1501 | | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1502 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1503 | | |
| 1504 | | if (s1 == -32768 && s2 == -32768) |
| 1505 | | { |
| 1506 | | // overflow |
| 1507 | | ACCUM_H(i) = 0; |
| 1508 | | ACCUM_M(i) = -32768; |
| 1509 | | ACCUM_L(i) = -32768; |
| 1510 | | vres[i] = 0x7fff; |
| 1511 | | } |
| 1512 | | else |
| 1513 | | { |
| 1514 | | INT64 r = s1 * s2 * 2; |
| 1515 | | r += 0x8000; // rounding ? |
| 1516 | | ACCUM_H(i) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit |
| 1517 | | ACCUM_M(i) = (INT16)(r >> 16); |
| 1518 | | ACCUM_L(i) = (UINT16)(r); |
| 1519 | | vres[i] = ACCUM_M(i); |
| 1520 | | } |
| 1521 | | } |
| 1522 | | WRITEBACK_RESULT(); |
| 1523 | | |
| 1524 | | break; |
| 1525 | | } |
| 1526 | | |
| 1527 | | case 0x01: /* VMULU */ |
| 1528 | | { |
| 1529 | | // 31 25 24 20 15 10 5 0 |
| 1530 | | // ------------------------------------------------------ |
| 1531 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000001 | |
| 1532 | | // ------------------------------------------------------ |
| 1533 | | // |
| 1534 | | |
| 1535 | | for (i=0; i < 8; i++) |
| 1536 | | { |
| 1537 | | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1538 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1539 | | |
| 1540 | | INT64 r = s1 * s2 * 2; |
| 1541 | | r += 0x8000; // rounding ? |
| 1542 | | |
| 1543 | | ACCUM_H(i) = (UINT16)(r >> 32); |
| 1544 | | ACCUM_M(i) = (UINT16)(r >> 16); |
| 1545 | | ACCUM_L(i) = (UINT16)(r); |
| 1546 | | |
| 1547 | | if (r < 0) |
| 1548 | | { |
| 1549 | | vres[i] = 0; |
| 1550 | | } |
| 1551 | | else if (((INT16)(ACCUM_H(i)) ^ (INT16)(ACCUM_M(i))) < 0) |
| 1552 | | { |
| 1553 | | vres[i] = -1; |
| 1554 | | } |
| 1555 | | else |
| 1556 | | { |
| 1557 | | vres[i] = ACCUM_M(i); |
| 1558 | | } |
| 1559 | | } |
| 1560 | | WRITEBACK_RESULT(); |
| 1561 | | break; |
| 1562 | | } |
| 1563 | | |
| 1564 | | case 0x04: /* VMUDL */ |
| 1565 | | { |
| 1566 | | // 31 25 24 20 15 10 5 0 |
| 1567 | | // ------------------------------------------------------ |
| 1568 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000100 | |
| 1569 | | // ------------------------------------------------------ |
| 1570 | | // |
| 1571 | | // Multiplies unsigned fraction by unsigned fraction |
| 1572 | | // Stores the higher 16 bits of the 32-bit result to accumulator |
| 1573 | | // The low slice of accumulator is stored into destination element |
| 1574 | | |
| 1575 | | for (i=0; i < 8; i++) |
| 1576 | | { |
| 1577 | | UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1578 | | UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1579 | | UINT32 r = s1 * s2; |
| 1580 | | |
| 1581 | | ACCUM_H(i) = 0; |
| 1582 | | ACCUM_M(i) = 0; |
| 1583 | | ACCUM_L(i) = (UINT16)(r >> 16); |
| 1584 | | |
| 1585 | | vres[i] = ACCUM_L(i); |
| 1586 | | } |
| 1587 | | WRITEBACK_RESULT(); |
| 1588 | | break; |
| 1589 | | } |
| 1590 | | |
| 1591 | | case 0x05: /* VMUDM */ |
| 1592 | | { |
| 1593 | | // 31 25 24 20 15 10 5 0 |
| 1594 | | // ------------------------------------------------------ |
| 1595 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000101 | |
| 1596 | | // ------------------------------------------------------ |
| 1597 | | // |
| 1598 | | // Multiplies signed integer by unsigned fraction |
| 1599 | | // The result is stored into accumulator |
| 1600 | | // The middle slice of accumulator is stored into destination element |
| 1601 | | |
| 1602 | | for (i=0; i < 8; i++) |
| 1603 | | { |
| 1604 | | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1605 | | INT32 s2 = (UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); // not sign-extended |
| 1606 | | INT32 r = s1 * s2; |
| 1607 | | |
| 1608 | | ACCUM_H(i) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit |
| 1609 | | ACCUM_M(i) = (INT16)(r >> 16); |
| 1610 | | ACCUM_L(i) = (UINT16)(r); |
| 1611 | | |
| 1612 | | vres[i] = ACCUM_M(i); |
| 1613 | | } |
| 1614 | | WRITEBACK_RESULT(); |
| 1615 | | break; |
| 1616 | | |
| 1617 | | } |
| 1618 | | |
| 1619 | | case 0x06: /* VMUDN */ |
| 1620 | | { |
| 1621 | | // 31 25 24 20 15 10 5 0 |
| 1622 | | // ------------------------------------------------------ |
| 1623 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000110 | |
| 1624 | | // ------------------------------------------------------ |
| 1625 | | // |
| 1626 | | // Multiplies unsigned fraction by signed integer |
| 1627 | | // The result is stored into accumulator |
| 1628 | | // The low slice of accumulator is stored into destination element |
| 1629 | | |
| 1630 | | for (i=0; i < 8; i++) |
| 1631 | | { |
| 1632 | | INT32 s1 = (UINT16)VREG_S(VS1REG, i); // not sign-extended |
| 1633 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1634 | | INT32 r = s1 * s2; |
| 1635 | | |
| 1636 | | ACCUM_H(i) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit |
| 1637 | | ACCUM_M(i) = (INT16)(r >> 16); |
| 1638 | | ACCUM_L(i) = (UINT16)(r); |
| 1639 | | |
| 1640 | | vres[i] = ACCUM_L(i); |
| 1641 | | } |
| 1642 | | WRITEBACK_RESULT(); |
| 1643 | | break; |
| 1644 | | } |
| 1645 | | |
| 1646 | | case 0x07: /* VMUDH */ |
| 1647 | | { |
| 1648 | | // 31 25 24 20 15 10 5 0 |
| 1649 | | // ------------------------------------------------------ |
| 1650 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000111 | |
| 1651 | | // ------------------------------------------------------ |
| 1652 | | // |
| 1653 | | // Multiplies signed integer by signed integer |
| 1654 | | // The result is stored into highest 32 bits of accumulator, the low slice is zero |
| 1655 | | // The highest 32 bits of accumulator is saturated into destination element |
| 1656 | | |
| 1657 | | for (i=0; i < 8; i++) |
| 1658 | | { |
| 1659 | | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1660 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1661 | | INT32 r = s1 * s2; |
| 1662 | | |
| 1663 | | ACCUM_H(i) = (INT16)(r >> 16); |
| 1664 | | ACCUM_M(i) = (UINT16)(r); |
| 1665 | | ACCUM_L(i) = 0; |
| 1666 | | |
| 1667 | | if (r < -32768) r = -32768; |
| 1668 | | if (r > 32767) r = 32767; |
| 1669 | | vres[i] = (INT16)(r); |
| 1670 | | } |
| 1671 | | WRITEBACK_RESULT(); |
| 1672 | | break; |
| 1673 | | } |
| 1674 | | |
| 1675 | | case 0x08: /* VMACF */ |
| 1676 | | { |
| 1677 | | // 31 25 24 20 15 10 5 0 |
| 1678 | | // ------------------------------------------------------ |
| 1679 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001000 | |
| 1680 | | // ------------------------------------------------------ |
| 1681 | | // |
| 1682 | | // Multiplies signed integer by signed integer * 2 |
| 1683 | | // The result is added to accumulator |
| 1684 | | |
| 1685 | | for (i=0; i < 8; i++) |
| 1686 | | { |
| 1687 | | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1688 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1689 | | INT32 r = s1 * s2; |
| 1690 | | |
| 1691 | | UINT64 q = (UINT64)(UINT16)ACCUM_LL(i); |
| 1692 | | q |= (((UINT64)(UINT16)ACCUM_L(i)) << 16); |
| 1693 | | q |= (((UINT64)(UINT16)ACCUM_M(i)) << 32); |
| 1694 | | q |= (((UINT64)(UINT16)ACCUM_H(i)) << 48); |
| 1695 | | |
| 1696 | | q += (INT64)(r) << 17; |
| 1697 | | |
| 1698 | | ACCUM_LL(i) = (UINT16)q; |
| 1699 | | ACCUM_L(i) = (UINT16)(q >> 16); |
| 1700 | | ACCUM_M(i) = (UINT16)(q >> 32); |
| 1701 | | ACCUM_H(i) = (UINT16)(q >> 48); |
| 1702 | | |
| 1703 | | vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1704 | | } |
| 1705 | | WRITEBACK_RESULT(); |
| 1706 | | break; |
| 1707 | | } |
| 1708 | | |
| 1709 | | case 0x09: /* VMACU */ |
| 1710 | | { |
| 1711 | | // 31 25 24 20 15 10 5 0 |
| 1712 | | // ------------------------------------------------------ |
| 1713 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001001 | |
| 1714 | | // ------------------------------------------------------ |
| 1715 | | // |
| 1716 | | |
| 1717 | | for (i = 0; i < 8; i++) |
| 1718 | | { |
| 1719 | | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1720 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1721 | | INT32 r1 = s1 * s2; |
| 1722 | | UINT32 r2 = (UINT16)ACCUM_L(i) + ((UINT16)(r1) * 2); |
| 1723 | | UINT32 r3 = (UINT16)ACCUM_M(i) + (UINT16)((r1 >> 16) * 2) + (UINT16)(r2 >> 16); |
| 1724 | | |
| 1725 | | ACCUM_L(i) = (UINT16)(r2); |
| 1726 | | ACCUM_M(i) = (UINT16)(r3); |
| 1727 | | ACCUM_H(i) += (UINT16)(r3 >> 16) + (UINT16)(r1 >> 31); |
| 1728 | | |
| 1729 | | if ((INT16)ACCUM_H(i) < 0) |
| 1730 | | { |
| 1731 | | vres[i] = 0; |
| 1732 | | } |
| 1733 | | else |
| 1734 | | { |
| 1735 | | if (ACCUM_H(i) != 0) |
| 1736 | | { |
| 1737 | | vres[i] = 0xffffu; |
| 1738 | | } |
| 1739 | | else |
| 1740 | | { |
| 1741 | | if ((INT16)ACCUM_M(i) < 0) |
| 1742 | | { |
| 1743 | | vres[i] = 0xffffu; |
| 1744 | | } |
| 1745 | | else |
| 1746 | | { |
| 1747 | | vres[i] = ACCUM_M(i); |
| 1748 | | } |
| 1749 | | } |
| 1750 | | } |
| 1751 | | } |
| 1752 | | WRITEBACK_RESULT(); |
| 1753 | | break; |
| 1754 | | } |
| 1755 | | |
| 1756 | | case 0x0c: /* VMADL */ |
| 1757 | | { |
| 1758 | | // 31 25 24 20 15 10 5 0 |
| 1759 | | // ------------------------------------------------------ |
| 1760 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001100 | |
| 1761 | | // ------------------------------------------------------ |
| 1762 | | // |
| 1763 | | // Multiplies unsigned fraction by unsigned fraction |
| 1764 | | // Adds the higher 16 bits of the 32-bit result to accumulator |
| 1765 | | // The low slice of accumulator is stored into destination element |
| 1766 | | |
| 1767 | | for (i = 0; i < 8; i++) |
| 1768 | | { |
| 1769 | | UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1770 | | UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1771 | | UINT32 r1 = s1 * s2; |
| 1772 | | UINT32 r2 = (UINT16)ACCUM_L(i) + (r1 >> 16); |
| 1773 | | UINT32 r3 = (UINT16)ACCUM_M(i) + (r2 >> 16); |
| 1774 | | |
| 1775 | | ACCUM_L(i) = (UINT16)(r2); |
| 1776 | | ACCUM_M(i) = (UINT16)(r3); |
| 1777 | | ACCUM_H(i) += (INT16)(r3 >> 16); |
| 1778 | | |
| 1779 | | vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 1780 | | } |
| 1781 | | WRITEBACK_RESULT(); |
| 1782 | | break; |
| 1783 | | } |
| 1784 | | |
| 1785 | | case 0x0d: /* VMADM */ |
| 1786 | | { |
| 1787 | | // 31 25 24 20 15 10 5 0 |
| 1788 | | // ------------------------------------------------------ |
| 1789 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 | |
| 1790 | | // ------------------------------------------------------ |
| 1791 | | // |
| 1792 | | // Multiplies signed integer by unsigned fraction |
| 1793 | | // The result is added into accumulator |
| 1794 | | // The middle slice of accumulator is stored into destination element |
| 1795 | | |
| 1796 | | for (i=0; i < 8; i++) |
| 1797 | | { |
| 1798 | | UINT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1799 | | UINT32 s2 = (UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); // not sign-extended |
| 1800 | | UINT32 r1 = s1 * s2; |
| 1801 | | UINT32 r2 = (UINT16)ACCUM_L(i) + (UINT16)(r1); |
| 1802 | | UINT32 r3 = (UINT16)ACCUM_M(i) + (r1 >> 16) + (r2 >> 16); |
| 1803 | | |
| 1804 | | ACCUM_L(i) = (UINT16)(r2); |
| 1805 | | ACCUM_M(i) = (UINT16)(r3); |
| 1806 | | ACCUM_H(i) += (UINT16)(r3 >> 16); |
| 1807 | | if ((INT32)(r1) < 0) |
| 1808 | | ACCUM_H(i) -= 1; |
| 1809 | | |
| 1810 | | vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1811 | | } |
| 1812 | | WRITEBACK_RESULT(); |
| 1813 | | break; |
| 1814 | | } |
| 1815 | | |
| 1816 | | case 0x0e: /* VMADN */ |
| 1817 | | { |
| 1818 | | // 31 25 24 20 15 10 5 0 |
| 1819 | | // ------------------------------------------------------ |
| 1820 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001110 | |
| 1821 | | // ------------------------------------------------------ |
| 1822 | | // |
| 1823 | | // Multiplies unsigned fraction by signed integer |
| 1824 | | // The result is added into accumulator |
| 1825 | | // The low slice of accumulator is stored into destination element |
| 1826 | | |
| 1827 | | for (i=0; i < 8; i++) |
| 1828 | | { |
| 1829 | | INT32 s1 = (UINT16)VREG_S(VS1REG, i); // not sign-extended |
| 1830 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1831 | | |
| 1832 | | UINT64 q = (UINT64)ACCUM_LL(i); |
| 1833 | | q |= (((UINT64)ACCUM_L(i)) << 16); |
| 1834 | | q |= (((UINT64)ACCUM_M(i)) << 32); |
| 1835 | | q |= (((UINT64)ACCUM_H(i)) << 48); |
| 1836 | | q += (INT64)(s1*s2) << 16; |
| 1837 | | |
| 1838 | | ACCUM_LL(i) = (UINT16)q; |
| 1839 | | ACCUM_L(i) = (UINT16)(q >> 16); |
| 1840 | | ACCUM_M(i) = (UINT16)(q >> 32); |
| 1841 | | ACCUM_H(i) = (UINT16)(q >> 48); |
| 1842 | | |
| 1843 | | vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 1844 | | } |
| 1845 | | WRITEBACK_RESULT(); |
| 1846 | | |
| 1847 | | break; |
| 1848 | | } |
| 1849 | | |
| 1850 | | case 0x0f: /* VMADH */ |
| 1851 | | { |
| 1852 | | // 31 25 24 20 15 10 5 0 |
| 1853 | | // ------------------------------------------------------ |
| 1854 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001111 | |
| 1855 | | // ------------------------------------------------------ |
| 1856 | | // |
| 1857 | | // Multiplies signed integer by signed integer |
| 1858 | | // The result is added into highest 32 bits of accumulator, the low slice is zero |
| 1859 | | // The highest 32 bits of accumulator is saturated into destination element |
| 1860 | | |
| 1861 | | for (i = 0; i < 8; i++) |
| 1862 | | { |
| 1863 | | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1864 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1865 | | |
| 1866 | | INT32 accum = (UINT32)(UINT16)ACCUM_M(i); |
| 1867 | | accum |= ((UINT32)((UINT16)ACCUM_H(i))) << 16; |
| 1868 | | accum += s1 * s2; |
| 1869 | | |
| 1870 | | ACCUM_H(i) = (UINT16)(accum >> 16); |
| 1871 | | ACCUM_M(i) = (UINT16)accum; |
| 1872 | | |
| 1873 | | vres[i] = SATURATE_ACCUM1(i, 0x8000, 0x7fff); |
| 1874 | | } |
| 1875 | | WRITEBACK_RESULT(); |
| 1876 | | |
| 1877 | | break; |
| 1878 | | } |
| 1879 | | |
| 1880 | | case 0x10: /* VADD */ |
| 1881 | | { |
| 1882 | | // 31 25 24 20 15 10 5 0 |
| 1883 | | // ------------------------------------------------------ |
| 1884 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010000 | |
| 1885 | | // ------------------------------------------------------ |
| 1886 | | // |
| 1887 | | // Adds two vector registers and carry flag, the result is saturated to 32767 |
| 1888 | | |
| 1889 | | // TODO: check VS2REG == VDREG |
| 1890 | | |
| 1891 | | for (i=0; i < 8; i++) |
| 1892 | | { |
| 1893 | | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1894 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1895 | | INT32 r = s1 + s2 + (CARRY_FLAG(i) != 0 ? 1 : 0); |
| 1896 | | |
| 1897 | | ACCUM_L(i) = (INT16)(r); |
| 1898 | | |
| 1899 | | if (r > 32767) r = 32767; |
| 1900 | | if (r < -32768) r = -32768; |
| 1901 | | vres[i] = (INT16)(r); |
| 1902 | | } |
| 1903 | | CLEAR_ZERO_FLAGS(); |
| 1904 | | CLEAR_CARRY_FLAGS(); |
| 1905 | | WRITEBACK_RESULT(); |
| 1906 | | break; |
| 1907 | | } |
| 1908 | | |
| 1909 | | case 0x11: /* VSUB */ |
| 1910 | | { |
| 1911 | | // 31 25 24 20 15 10 5 0 |
| 1912 | | // ------------------------------------------------------ |
| 1913 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010001 | |
| 1914 | | // ------------------------------------------------------ |
| 1915 | | // |
| 1916 | | // Subtracts two vector registers and carry flag, the result is saturated to -32768 |
| 1917 | | |
| 1918 | | // TODO: check VS2REG == VDREG |
| 1919 | | |
| 1920 | | for (i = 0; i < 8; i++) |
| 1921 | | { |
| 1922 | | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1923 | | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1924 | | INT32 r = s1 - s2 - (CARRY_FLAG(i) != 0 ? 1 : 0); |
| 1925 | | |
| 1926 | | ACCUM_L(i) = (INT16)(r); |
| 1927 | | |
| 1928 | | if (r > 32767) r = 32767; |
| 1929 | | if (r < -32768) r = -32768; |
| 1930 | | |
| 1931 | | vres[i] = (INT16)(r); |
| 1932 | | } |
| 1933 | | CLEAR_ZERO_FLAGS(); |
| 1934 | | CLEAR_CARRY_FLAGS(); |
| 1935 | | WRITEBACK_RESULT(); |
| 1936 | | break; |
| 1937 | | } |
| 1938 | | |
| 1939 | | case 0x13: /* VABS */ |
| 1940 | | { |
| 1941 | | // 31 25 24 20 15 10 5 0 |
| 1942 | | // ------------------------------------------------------ |
| 1943 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010011 | |
| 1944 | | // ------------------------------------------------------ |
| 1945 | | // |
| 1946 | | // Changes the sign of source register 2 if source register 1 is negative and stores |
| 1947 | | // the result to destination register |
| 1948 | | |
| 1949 | | for (i=0; i < 8; i++) |
| 1950 | | { |
| 1951 | | INT16 s1 = (INT16)VREG_S(VS1REG, i); |
| 1952 | | INT16 s2 = (INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1953 | | |
| 1954 | | if (s1 < 0) |
| 1955 | | { |
| 1956 | | if (s2 == -32768) |
| 1957 | | { |
| 1958 | | vres[i] = 32767; |
| 1959 | | } |
| 1960 | | else |
| 1961 | | { |
| 1962 | | vres[i] = -s2; |
| 1963 | | } |
| 1964 | | } |
| 1965 | | else if (s1 > 0) |
| 1966 | | { |
| 1967 | | vres[i] = s2; |
| 1968 | | } |
| 1969 | | else |
| 1970 | | { |
| 1971 | | vres[i] = 0; |
| 1972 | | } |
| 1973 | | |
| 1974 | | ACCUM_L(i) = vres[i]; |
| 1975 | | } |
| 1976 | | WRITEBACK_RESULT(); |
| 1977 | | break; |
| 1978 | | } |
| 1979 | | |
| 1980 | | case 0x14: /* VADDC */ |
| 1981 | | { |
| 1982 | | // 31 25 24 20 15 10 5 0 |
| 1983 | | // ------------------------------------------------------ |
| 1984 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010100 | |
| 1985 | | // ------------------------------------------------------ |
| 1986 | | // |
| 1987 | | // Adds two vector registers, the carry out is stored into carry register |
| 1988 | | |
| 1989 | | // TODO: check VS2REG = VDREG |
| 1990 | | |
| 1991 | | CLEAR_ZERO_FLAGS(); |
| 1992 | | CLEAR_CARRY_FLAGS(); |
| 1993 | | |
| 1994 | | for (i=0; i < 8; i++) |
| 1995 | | { |
| 1996 | | INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1997 | | INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1998 | | INT32 r = s1 + s2; |
| 1999 | | |
| 2000 | | vres[i] = (INT16)(r); |
| 2001 | | ACCUM_L(i) = (INT16)(r); |
| 2002 | | |
| 2003 | | if (r & 0xffff0000) |
| 2004 | | { |
| 2005 | | SET_CARRY_FLAG(i); |
| 2006 | | } |
| 2007 | | } |
| 2008 | | WRITEBACK_RESULT(); |
| 2009 | | break; |
| 2010 | | } |
| 2011 | | |
| 2012 | | case 0x15: /* VSUBC */ |
| 2013 | | { |
| 2014 | | // 31 25 24 20 15 10 5 0 |
| 2015 | | // ------------------------------------------------------ |
| 2016 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010101 | |
| 2017 | | // ------------------------------------------------------ |
| 2018 | | // |
| 2019 | | // Subtracts two vector registers, the carry out is stored into carry register |
| 2020 | | |
| 2021 | | // TODO: check VS2REG = VDREG |
| 2022 | | |
| 2023 | | CLEAR_ZERO_FLAGS(); |
| 2024 | | CLEAR_CARRY_FLAGS(); |
| 2025 | | |
| 2026 | | for (i=0; i < 8; i++) |
| 2027 | | { |
| 2028 | | INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 2029 | | INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2030 | | INT32 r = s1 - s2; |
| 2031 | | |
| 2032 | | vres[i] = (INT16)(r); |
| 2033 | | ACCUM_L(i) = (UINT16)(r); |
| 2034 | | |
| 2035 | | if ((UINT16)(r) != 0) |
| 2036 | | { |
| 2037 | | SET_ZERO_FLAG(i); |
| 2038 | | } |
| 2039 | | if (r & 0xffff0000) |
| 2040 | | { |
| 2041 | | SET_CARRY_FLAG(i); |
| 2042 | | } |
| 2043 | | } |
| 2044 | | WRITEBACK_RESULT(); |
| 2045 | | break; |
| 2046 | | } |
| 2047 | | |
| 2048 | | case 0x1d: /* VSAW */ |
| 2049 | | { |
| 2050 | | // 31 25 24 20 15 10 5 0 |
| 2051 | | // ------------------------------------------------------ |
| 2052 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 011101 | |
| 2053 | | // ------------------------------------------------------ |
| 2054 | | // |
| 2055 | | // Stores high, middle or low slice of accumulator to destination vector |
| 2056 | | |
| 2057 | | switch (EL) |
| 2058 | | { |
| 2059 | | case 0x08: // VSAWH |
| 2060 | | { |
| 2061 | | for (i=0; i < 8; i++) |
| 2062 | | { |
| 2063 | | VREG_S(VDREG, i) = ACCUM_H(i); |
| 2064 | | } |
| 2065 | | break; |
| 2066 | | } |
| 2067 | | case 0x09: // VSAWM |
| 2068 | | { |
| 2069 | | for (i=0; i < 8; i++) |
| 2070 | | { |
| 2071 | | VREG_S(VDREG, i) = ACCUM_M(i); |
| 2072 | | } |
| 2073 | | break; |
| 2074 | | } |
| 2075 | | case 0x0a: // VSAWL |
| 2076 | | { |
| 2077 | | for (i=0; i < 8; i++) |
| 2078 | | { |
| 2079 | | VREG_S(VDREG, i) = ACCUM_L(i); |
| 2080 | | } |
| 2081 | | break; |
| 2082 | | } |
| 2083 | | default: //fatalerror("RSP: VSAW: el = %d\n", EL);//??????? |
| 2084 | | printf("RSP: VSAW: el = %d\n", EL);//??? ??? |
| 2085 | | exit(0); |
| 2086 | | } |
| 2087 | | break; |
| 2088 | | } |
| 2089 | | |
| 2090 | | case 0x20: /* VLT */ |
| 2091 | | { |
| 2092 | | // 31 25 24 20 15 10 5 0 |
| 2093 | | // ------------------------------------------------------ |
| 2094 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100000 | |
| 2095 | | // ------------------------------------------------------ |
| 2096 | | // |
| 2097 | | // Sets compare flags if elements in VS1 are less than VS2 |
| 2098 | | // Moves the element in VS2 to destination vector |
| 2099 | | |
| 2100 | | CLEAR_COMPARE_FLAGS(); |
| 2101 | | CLEAR_CLIP2_FLAGS(); |
| 2102 | | |
| 2103 | | for (i=0; i < 8; i++) |
| 2104 | | { |
| 2105 | | INT16 s1, s2; |
| 2106 | | s1 = VREG_S(VS1REG, i); |
| 2107 | | s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2108 | | if (s1 < s2) |
| 2109 | | { |
| 2110 | | SET_COMPARE_FLAG(i); |
| 2111 | | } |
| 2112 | | else if (s1 == s2) |
| 2113 | | { |
| 2114 | | if (ZERO_FLAG(i) != 0 && CARRY_FLAG(i) != 0) |
| 2115 | | { |
| 2116 | | SET_COMPARE_FLAG(i); |
| 2117 | | } |
| 2118 | | } |
| 2119 | | |
| 2120 | | if (COMPARE_FLAG(i) != 0) |
| 2121 | | { |
| 2122 | | vres[i] = s1; |
| 2123 | | } |
| 2124 | | else |
| 2125 | | { |
| 2126 | | vres[i] = s2; |
| 2127 | | } |
| 2128 | | |
| 2129 | | ACCUM_L(i) = vres[i]; |
| 2130 | | } |
| 2131 | | |
| 2132 | | CLEAR_CARRY_FLAGS(); |
| 2133 | | CLEAR_ZERO_FLAGS(); |
| 2134 | | WRITEBACK_RESULT(); |
| 2135 | | break; |
| 2136 | | } |
| 2137 | | |
| 2138 | | case 0x21: /* VEQ */ |
| 2139 | | { |
| 2140 | | // 31 25 24 20 15 10 5 0 |
| 2141 | | // ------------------------------------------------------ |
| 2142 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100001 | |
| 2143 | | // ------------------------------------------------------ |
| 2144 | | // |
| 2145 | | // Sets compare flags if elements in VS1 are equal with VS2 |
| 2146 | | // Moves the element in VS2 to destination vector |
| 2147 | | |
| 2148 | | CLEAR_COMPARE_FLAGS(); |
| 2149 | | CLEAR_CLIP2_FLAGS(); |
| 2150 | | |
| 2151 | | for (i = 0; i < 8; i++) |
| 2152 | | { |
| 2153 | | INT16 s1 = VREG_S(VS1REG, i); |
| 2154 | | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2155 | | |
| 2156 | | if ((s1 == s2) && ZERO_FLAG(i) == 0) |
| 2157 | | { |
| 2158 | | SET_COMPARE_FLAG(i); |
| 2159 | | vres[i] = s1; |
| 2160 | | } |
| 2161 | | else |
| 2162 | | { |
| 2163 | | vres[i] = s2; |
| 2164 | | } |
| 2165 | | ACCUM_L(i) = vres[i]; |
| 2166 | | } |
| 2167 | | |
| 2168 | | CLEAR_ZERO_FLAGS(); |
| 2169 | | CLEAR_CARRY_FLAGS(); |
| 2170 | | WRITEBACK_RESULT(); |
| 2171 | | break; |
| 2172 | | } |
| 2173 | | |
| 2174 | | case 0x22: /* VNE */ |
| 2175 | | { |
| 2176 | | // 31 25 24 20 15 10 5 0 |
| 2177 | | // ------------------------------------------------------ |
| 2178 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100010 | |
| 2179 | | // ------------------------------------------------------ |
| 2180 | | // |
| 2181 | | // Sets compare flags if elements in VS1 are not equal with VS2 |
| 2182 | | // Moves the element in VS2 to destination vector |
| 2183 | | |
| 2184 | | CLEAR_COMPARE_FLAGS(); |
| 2185 | | CLEAR_CLIP2_FLAGS(); |
| 2186 | | |
| 2187 | | for (i = 0; i < 8; i++) |
| 2188 | | { |
| 2189 | | INT16 s1 = VREG_S(VS1REG, i); |
| 2190 | | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2191 | | |
| 2192 | | if (s1 != s2 || ZERO_FLAG(i) != 0) |
| 2193 | | { |
| 2194 | | SET_COMPARE_FLAG(i); |
| 2195 | | vres[i] = s1; |
| 2196 | | } |
| 2197 | | else |
| 2198 | | { |
| 2199 | | vres[i] = s2; |
| 2200 | | } |
| 2201 | | ACCUM_L(i) = vres[i]; |
| 2202 | | } |
| 2203 | | |
| 2204 | | CLEAR_CARRY_FLAGS(); |
| 2205 | | CLEAR_ZERO_FLAGS(); |
| 2206 | | WRITEBACK_RESULT(); |
| 2207 | | break; |
| 2208 | | } |
| 2209 | | |
| 2210 | | case 0x23: /* VGE */ |
| 2211 | | { |
| 2212 | | // 31 25 24 20 15 10 5 0 |
| 2213 | | // ------------------------------------------------------ |
| 2214 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100011 | |
| 2215 | | // ------------------------------------------------------ |
| 2216 | | // |
| 2217 | | // Sets compare flags if elements in VS1 are greater or equal with VS2 |
| 2218 | | // Moves the element in VS2 to destination vector |
| 2219 | | |
| 2220 | | CLEAR_COMPARE_FLAGS(); |
| 2221 | | CLEAR_CLIP2_FLAGS(); |
| 2222 | | |
| 2223 | | for (i=0; i < 8; i++) |
| 2224 | | { |
| 2225 | | INT16 s1 = VREG_S(VS1REG, i); |
| 2226 | | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2227 | | |
| 2228 | | if ((s1 == s2 && (ZERO_FLAG(i) == 0 || CARRY_FLAG(i) == 0)) || s1 > s2) |
| 2229 | | { |
| 2230 | | SET_COMPARE_FLAG(i); |
| 2231 | | vres[i] = s1; |
| 2232 | | } |
| 2233 | | else |
| 2234 | | { |
| 2235 | | vres[i] = s2; |
| 2236 | | } |
| 2237 | | |
| 2238 | | ACCUM_L(i) = vres[i]; |
| 2239 | | } |
| 2240 | | |
| 2241 | | CLEAR_CARRY_FLAGS(); |
| 2242 | | CLEAR_ZERO_FLAGS(); |
| 2243 | | WRITEBACK_RESULT(); |
| 2244 | | break; |
| 2245 | | } |
| 2246 | | |
| 2247 | | case 0x24: /* VCL */ |
| 2248 | | { |
| 2249 | | // 31 25 24 20 15 10 5 0 |
| 2250 | | // ------------------------------------------------------ |
| 2251 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100100 | |
| 2252 | | // ------------------------------------------------------ |
| 2253 | | // |
| 2254 | | // Vector clip low |
| 2255 | | |
| 2256 | | for (i = 0; i < 8; i++) |
| 2257 | | { |
| 2258 | | INT16 s1 = VREG_S(VS1REG, i); |
| 2259 | | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2260 | | |
| 2261 | | if (CARRY_FLAG(i) != 0) |
| 2262 | | { |
| 2263 | | if (ZERO_FLAG(i) != 0) |
| 2264 | | { |
| 2265 | | if (COMPARE_FLAG(i) != 0) |
| 2266 | | { |
| 2267 | | ACCUM_L(i) = -(UINT16)s2; |
| 2268 | | } |
| 2269 | | else |
| 2270 | | { |
| 2271 | | ACCUM_L(i) = s1; |
| 2272 | | } |
| 2273 | | } |
| 2274 | | else |
| 2275 | | { |
| 2276 | | if (CLIP1_FLAG(i) != 0) |
| 2277 | | { |
| 2278 | | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) > 0x10000) |
| 2279 | | { |
| 2280 | | |
| 2281 | | ACCUM_L(i) = s1; |
| 2282 | | CLEAR_COMPARE_FLAG(i); |
| 2283 | | } |
| 2284 | | else |
| 2285 | | { |
| 2286 | | ACCUM_L(i) = -((UINT16)s2); |
| 2287 | | SET_COMPARE_FLAG(i); |
| 2288 | | } |
| 2289 | | } |
| 2290 | | else |
| 2291 | | { |
| 2292 | | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) != 0) |
| 2293 | | { |
| 2294 | | ACCUM_L(i) = s1; |
| 2295 | | CLEAR_COMPARE_FLAG(i); |
| 2296 | | } |
| 2297 | | else |
| 2298 | | { |
| 2299 | | ACCUM_L(i) = -((UINT16)s2); |
| 2300 | | SET_COMPARE_FLAG(i); |
| 2301 | | } |
| 2302 | | } |
| 2303 | | } |
| 2304 | | } |
| 2305 | | else |
| 2306 | | { |
| 2307 | | if (ZERO_FLAG(i) != 0) |
| 2308 | | { |
| 2309 | | if (CLIP2_FLAG(i) != 0) |
| 2310 | | { |
| 2311 | | ACCUM_L(i) = s2; |
| 2312 | | } |
| 2313 | | else |
| 2314 | | { |
| 2315 | | ACCUM_L(i) = s1; |
| 2316 | | } |
| 2317 | | } |
| 2318 | | else |
| 2319 | | { |
| 2320 | | if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0) |
| 2321 | | { |
| 2322 | | ACCUM_L(i) = s2; |
| 2323 | | SET_CLIP2_FLAG(i); |
| 2324 | | } |
| 2325 | | else |
| 2326 | | { |
| 2327 | | ACCUM_L(i) = s1; |
| 2328 | | CLEAR_CLIP2_FLAG(i); |
| 2329 | | } |
| 2330 | | } |
| 2331 | | } |
| 2332 | | |
| 2333 | | vres[i] = ACCUM_L(i); |
| 2334 | | } |
| 2335 | | CLEAR_CARRY_FLAGS(); |
| 2336 | | CLEAR_ZERO_FLAGS(); |
| 2337 | | CLEAR_CLIP1_FLAGS(); |
| 2338 | | WRITEBACK_RESULT(); |
| 2339 | | break; |
| 2340 | | } |
| 2341 | | |
| 2342 | | case 0x25: /* VCH */ |
| 2343 | | { |
| 2344 | | // 31 25 24 20 15 10 5 0 |
| 2345 | | // ------------------------------------------------------ |
| 2346 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100101 | |
| 2347 | | // ------------------------------------------------------ |
| 2348 | | // |
| 2349 | | // Vector clip high |
| 2350 | | |
| 2351 | | CLEAR_CARRY_FLAGS(); |
| 2352 | | CLEAR_COMPARE_FLAGS(); |
| 2353 | | CLEAR_CLIP1_FLAGS(); |
| 2354 | | CLEAR_ZERO_FLAGS(); |
| 2355 | | CLEAR_CLIP2_FLAGS(); |
| 2356 | | UINT32 vce = 0; |
| 2357 | | |
| 2358 | | for (i=0; i < 8; i++) |
| 2359 | | { |
| 2360 | | INT16 s1 = VREG_S(VS1REG, i); |
| 2361 | | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2362 | | |
| 2363 | | if ((s1 ^ s2) < 0) |
| 2364 | | { |
| 2365 | | vce = (s1 + s2 == -1); |
| 2366 | | SET_CARRY_FLAG(i); |
| 2367 | | if (s2 < 0) |
| 2368 | | { |
| 2369 | | SET_CLIP2_FLAG(i); |
| 2370 | | } |
| 2371 | | |
| 2372 | | if (s1 + s2 <= 0) |
| 2373 | | { |
| 2374 | | SET_COMPARE_FLAG(i); |
| 2375 | | vres[i] = -((UINT16)s2); |
| 2376 | | } |
| 2377 | | else |
| 2378 | | { |
| 2379 | | vres[i] = s1; |
| 2380 | | } |
| 2381 | | |
| 2382 | | if (s1 + s2 != 0) |
| 2383 | | { |
| 2384 | | if (s1 != ~s2) |
| 2385 | | { |
| 2386 | | SET_ZERO_FLAG(i); |
| 2387 | | } |
| 2388 | | } |
| 2389 | | } |
| 2390 | | else |
| 2391 | | { |
| 2392 | | vce = 0; |
| 2393 | | if (s2 < 0) |
| 2394 | | { |
| 2395 | | SET_COMPARE_FLAG(i); |
| 2396 | | } |
| 2397 | | if (s1 - s2 >= 0) |
| 2398 | | { |
| 2399 | | SET_CLIP2_FLAG(i); |
| 2400 | | vres[i] = s2; |
| 2401 | | } |
| 2402 | | else |
| 2403 | | { |
| 2404 | | vres[i] = s1; |
| 2405 | | } |
| 2406 | | |
| 2407 | | if ((s1 - s2) != 0) |
| 2408 | | { |
| 2409 | | if (s1 != ~s2) |
| 2410 | | { |
| 2411 | | SET_ZERO_FLAG(i); |
| 2412 | | } |
| 2413 | | } |
| 2414 | | } |
| 2415 | | if (vce != 0) |
| 2416 | | { |
| 2417 | | SET_CLIP1_FLAG(i); |
| 2418 | | } |
| 2419 | | ACCUM_L(i) = vres[i]; |
| 2420 | | } |
| 2421 | | WRITEBACK_RESULT(); |
| 2422 | | break; |
| 2423 | | } |
| 2424 | | |
| 2425 | | case 0x26: /* VCR */ |
| 2426 | | { |
| 2427 | | // 31 25 24 20 15 10 5 0 |
| 2428 | | // ------------------------------------------------------ |
| 2429 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100110 | |
| 2430 | | // ------------------------------------------------------ |
| 2431 | | // |
| 2432 | | // Vector clip reverse |
| 2433 | | |
| 2434 | | CLEAR_CARRY_FLAGS(); |
| 2435 | | CLEAR_COMPARE_FLAGS(); |
| 2436 | | CLEAR_CLIP1_FLAGS(); |
| 2437 | | CLEAR_ZERO_FLAGS(); |
| 2438 | | CLEAR_CLIP2_FLAGS(); |
| 2439 | | |
| 2440 | | for (i=0; i < 8; i++) |
| 2441 | | { |
| 2442 | | INT16 s1 = VREG_S(VS1REG, i); |
| 2443 | | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2444 | | |
| 2445 | | if ((INT16)(s1 ^ s2) < 0) |
| 2446 | | { |
| 2447 | | if (s2 < 0) |
| 2448 | | { |
| 2449 | | SET_CLIP2_FLAG(i); |
| 2450 | | } |
| 2451 | | if ((s1 + s2) <= 0) |
| 2452 | | { |
| 2453 | | ACCUM_L(i) = ~((UINT16)s2); |
| 2454 | | SET_COMPARE_FLAG(i); |
| 2455 | | } |
| 2456 | | else |
| 2457 | | { |
| 2458 | | ACCUM_L(i) = s1; |
| 2459 | | } |
| 2460 | | } |
| 2461 | | else |
| 2462 | | { |
| 2463 | | if (s2 < 0) |
| 2464 | | { |
| 2465 | | SET_COMPARE_FLAG(i); |
| 2466 | | } |
| 2467 | | if ((s1 - s2) >= 0) |
| 2468 | | { |
| 2469 | | ACCUM_L(i) = s2; |
| 2470 | | SET_CLIP2_FLAG(i); |
| 2471 | | } |
| 2472 | | else |
| 2473 | | { |
| 2474 | | ACCUM_L(i) = s1; |
| 2475 | | } |
| 2476 | | } |
| 2477 | | |
| 2478 | | vres[i] = ACCUM_L(i); |
| 2479 | | } |
| 2480 | | WRITEBACK_RESULT(); |
| 2481 | | break; |
| 2482 | | } |
| 2483 | | |
| 2484 | | case 0x27: /* VMRG */ |
| 2485 | | { |
| 2486 | | // 31 25 24 20 15 10 5 0 |
| 2487 | | // ------------------------------------------------------ |
| 2488 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100111 | |
| 2489 | | // ------------------------------------------------------ |
| 2490 | | // |
| 2491 | | // Merges two vectors according to compare flags |
| 2492 | | |
| 2493 | | for (i = 0; i < 8; i++) |
| 2494 | | { |
| 2495 | | if (COMPARE_FLAG(i) != 0) |
| 2496 | | { |
| 2497 | | vres[i] = VREG_S(VS1REG, i); |
| 2498 | | } |
| 2499 | | else |
| 2500 | | { |
| 2501 | | vres[i] = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2502 | | } |
| 2503 | | |
| 2504 | | ACCUM_L(i) = vres[i]; |
| 2505 | | } |
| 2506 | | WRITEBACK_RESULT(); |
| 2507 | | break; |
| 2508 | | } |
| 2509 | | case 0x28: /* VAND */ |
| 2510 | | { |
| 2511 | | // 31 25 24 20 15 10 5 0 |
| 2512 | | // ------------------------------------------------------ |
| 2513 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101000 | |
| 2514 | | // ------------------------------------------------------ |
| 2515 | | // |
| 2516 | | // Bitwise AND of two vector registers |
| 2517 | | |
| 2518 | | for (i = 0; i < 8; i++) |
| 2519 | | { |
| 2520 | | vres[i] = VREG_S(VS1REG, i) & VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2521 | | ACCUM_L(i) = vres[i]; |
| 2522 | | } |
| 2523 | | WRITEBACK_RESULT(); |
| 2524 | | break; |
| 2525 | | } |
| 2526 | | case 0x29: /* VNAND */ |
| 2527 | | { |
| 2528 | | // 31 25 24 20 15 10 5 0 |
| 2529 | | // ------------------------------------------------------ |
| 2530 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101001 | |
| 2531 | | // ------------------------------------------------------ |
| 2532 | | // |
| 2533 | | // Bitwise NOT AND of two vector registers |
| 2534 | | |
| 2535 | | for (i = 0; i < 8; i++) |
| 2536 | | { |
| 2537 | | vres[i] = ~((VREG_S(VS1REG, i) & VREG_S(VS2REG, VEC_EL_2(EL, i)))); |
| 2538 | | ACCUM_L(i) = vres[i]; |
| 2539 | | } |
| 2540 | | WRITEBACK_RESULT(); |
| 2541 | | break; |
| 2542 | | } |
| 2543 | | case 0x2a: /* VOR */ |
| 2544 | | { |
| 2545 | | // 31 25 24 20 15 10 5 0 |
| 2546 | | // ------------------------------------------------------ |
| 2547 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101010 | |
| 2548 | | // ------------------------------------------------------ |
| 2549 | | // |
| 2550 | | // Bitwise OR of two vector registers |
| 2551 | | |
| 2552 | | for (i = 0; i < 8; i++) |
| 2553 | | { |
| 2554 | | vres[i] = VREG_S(VS1REG, i) | VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2555 | | ACCUM_L(i) = vres[i]; |
| 2556 | | } |
| 2557 | | WRITEBACK_RESULT(); |
| 2558 | | break; |
| 2559 | | } |
| 2560 | | case 0x2b: /* VNOR */ |
| 2561 | | { |
| 2562 | | // 31 25 24 20 15 10 5 0 |
| 2563 | | // ------------------------------------------------------ |
| 2564 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101011 | |
| 2565 | | // ------------------------------------------------------ |
| 2566 | | // |
| 2567 | | // Bitwise NOT OR of two vector registers |
| 2568 | | |
| 2569 | | for (i=0; i < 8; i++) |
| 2570 | | { |
| 2571 | | vres[i] = ~((VREG_S(VS1REG, i) | VREG_S(VS2REG, VEC_EL_2(EL, i)))); |
| 2572 | | ACCUM_L(i) = vres[i]; |
| 2573 | | } |
| 2574 | | WRITEBACK_RESULT(); |
| 2575 | | break; |
| 2576 | | } |
| 2577 | | case 0x2c: /* VXOR */ |
| 2578 | | { |
| 2579 | | // 31 25 24 20 15 10 5 0 |
| 2580 | | // ------------------------------------------------------ |
| 2581 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101100 | |
| 2582 | | // ------------------------------------------------------ |
| 2583 | | // |
| 2584 | | // Bitwise XOR of two vector registers |
| 2585 | | |
| 2586 | | for (i=0; i < 8; i++) |
| 2587 | | { |
| 2588 | | vres[i] = VREG_S(VS1REG, i) ^ VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2589 | | ACCUM_L(i) = vres[i]; |
| 2590 | | } |
| 2591 | | WRITEBACK_RESULT(); |
| 2592 | | break; |
| 2593 | | } |
| 2594 | | case 0x2d: /* VNXOR */ |
| 2595 | | { |
| 2596 | | // 31 25 24 20 15 10 5 0 |
| 2597 | | // ------------------------------------------------------ |
| 2598 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101101 | |
| 2599 | | // ------------------------------------------------------ |
| 2600 | | // |
| 2601 | | // Bitwise NOT XOR of two vector registers |
| 2602 | | |
| 2603 | | for (i=0; i < 8; i++) |
| 2604 | | { |
| 2605 | | vres[i] = ~((VREG_S(VS1REG, i) ^ VREG_S(VS2REG, VEC_EL_2(EL, i)))); |
| 2606 | | ACCUM_L(i) = vres[i]; |
| 2607 | | } |
| 2608 | | WRITEBACK_RESULT(); |
| 2609 | | break; |
| 2610 | | } |
| 2611 | | |
| 2612 | | case 0x30: /* VRCP */ |
| 2613 | | { |
| 2614 | | // 31 25 24 20 15 10 5 0 |
| 2615 | | // ------------------------------------------------------ |
| 2616 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110000 | |
| 2617 | | // ------------------------------------------------------ |
| 2618 | | // |
| 2619 | | // Calculates reciprocal |
| 2620 | | INT32 shifter = 0; |
| 2621 | | |
| 2622 | | INT32 rec = (INT16)(VREG_S(VS2REG, EL & 7)); |
| 2623 | | INT32 datainput = (rec < 0) ? (-rec) : rec; |
| 2624 | | if (datainput) |
| 2625 | | { |
| 2626 | | for (i = 0; i < 32; i++) |
| 2627 | | { |
| 2628 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 2629 | | { |
| 2630 | | shifter = i; |
| 2631 | | break; |
| 2632 | | } |
| 2633 | | } |
| 2634 | | } |
| 2635 | | else |
| 2636 | | { |
| 2637 | | shifter = 0x10; |
| 2638 | | } |
| 2639 | | |
| 2640 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2641 | | INT32 fetchval = rsp_divtable[address]; |
| 2642 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 2643 | | if (rec < 0) |
| 2644 | | { |
| 2645 | | temp = ~temp; |
| 2646 | | } |
| 2647 | | if (!rec) |
| 2648 | | { |
| 2649 | | temp = 0x7fffffff; |
| 2650 | | } |
| 2651 | | else if (rec == 0xffff8000) |
| 2652 | | { |
| 2653 | | temp = 0xffff0000; |
| 2654 | | } |
| 2655 | | rec = temp; |
| 2656 | | |
| 2657 | | m_reciprocal_res = rec; |
| 2658 | | m_dp_allowed = 0; |
| 2659 | | |
| 2660 | | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 2661 | | |
| 2662 | | for (i = 0; i < 8; i++) |
| 2663 | | { |
| 2664 | | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2665 | | } |
| 2666 | | |
| 2667 | | |
| 2668 | | break; |
| 2669 | | } |
| 2670 | | |
| 2671 | | case 0x31: /* VRCPL */ |
| 2672 | | { |
| 2673 | | // 31 25 24 20 15 10 5 0 |
| 2674 | | // ------------------------------------------------------ |
| 2675 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110001 | |
| 2676 | | // ------------------------------------------------------ |
| 2677 | | // |
| 2678 | | // Calculates reciprocal low part |
| 2679 | | |
| 2680 | | INT32 shifter = 0; |
| 2681 | | |
| 2682 | | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 2683 | | INT32 datainput = rec; |
| 2684 | | |
| 2685 | | if (m_dp_allowed) |
| 2686 | | { |
| 2687 | | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 2688 | | datainput = rec; |
| 2689 | | |
| 2690 | | if (rec < 0) |
| 2691 | | { |
| 2692 | | if (rec < -32768) |
| 2693 | | { |
| 2694 | | datainput = ~datainput; |
| 2695 | | } |
| 2696 | | else |
| 2697 | | { |
| 2698 | | datainput = -datainput; |
| 2699 | | } |
| 2700 | | } |
| 2701 | | } |
| 2702 | | else if (datainput < 0) |
| 2703 | | { |
| 2704 | | datainput = -datainput; |
| 2705 | | |
| 2706 | | shifter = 0x10; |
| 2707 | | } |
| 2708 | | |
| 2709 | | |
| 2710 | | for (i = 0; i < 32; i++) |
| 2711 | | { |
| 2712 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 2713 | | { |
| 2714 | | shifter = i; |
| 2715 | | break; |
| 2716 | | } |
| 2717 | | } |
| 2718 | | |
| 2719 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2720 | | INT32 fetchval = rsp_divtable[address]; |
| 2721 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 2722 | | temp ^= rec >> 31; |
| 2723 | | |
| 2724 | | if (!rec) |
| 2725 | | { |
| 2726 | | temp = 0x7fffffff; |
| 2727 | | } |
| 2728 | | else if (rec == 0xffff8000) |
| 2729 | | { |
| 2730 | | temp = 0xffff0000; |
| 2731 | | } |
| 2732 | | rec = temp; |
| 2733 | | |
| 2734 | | m_reciprocal_res = rec; |
| 2735 | | m_dp_allowed = 0; |
| 2736 | | |
| 2737 | | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 2738 | | |
| 2739 | | for (i = 0; i < 8; i++) |
| 2740 | | { |
| 2741 | | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2742 | | } |
| 2743 | | |
| 2744 | | break; |
| 2745 | | } |
| 2746 | | |
| 2747 | | case 0x32: /* VRCPH */ |
| 2748 | | { |
| 2749 | | // 31 25 24 20 15 10 5 0 |
| 2750 | | // ------------------------------------------------------ |
| 2751 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110010 | |
| 2752 | | // ------------------------------------------------------ |
| 2753 | | // |
| 2754 | | // Calculates reciprocal high part |
| 2755 | | |
| 2756 | | m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16; |
| 2757 | | m_dp_allowed = 1; |
| 2758 | | |
| 2759 | | for (i = 0; i < 8; i++) |
| 2760 | | { |
| 2761 | | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2762 | | } |
| 2763 | | |
| 2764 | | VREG_S(VDREG, VS1REG & 7) = (INT16)(m_reciprocal_res >> 16); |
| 2765 | | |
| 2766 | | break; |
| 2767 | | } |
| 2768 | | |
| 2769 | | case 0x33: /* VMOV */ |
| 2770 | | { |
| 2771 | | // 31 25 24 20 15 10 5 0 |
| 2772 | | // ------------------------------------------------------ |
| 2773 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110011 | |
| 2774 | | // ------------------------------------------------------ |
| 2775 | | // |
| 2776 | | // Moves element from vector to destination vector |
| 2777 | | |
| 2778 | | VREG_S(VDREG, VS1REG & 7) = VREG_S(VS2REG, EL & 7); |
| 2779 | | for (i = 0; i < 8; i++) |
| 2780 | | { |
| 2781 | | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2782 | | } |
| 2783 | | break; |
| 2784 | | } |
| 2785 | | |
| 2786 | | case 0x34: /* VRSQ */ |
| 2787 | | { |
| 2788 | | // 31 25 24 20 15 10 5 0 |
| 2789 | | // ------------------------------------------------------ |
| 2790 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110100 | |
| 2791 | | // ------------------------------------------------------ |
| 2792 | | // |
| 2793 | | // Calculates reciprocal square-root |
| 2794 | | |
| 2795 | | INT32 shifter = 0; |
| 2796 | | |
| 2797 | | INT32 rec = (INT16)(VREG_S(VS2REG, EL & 7)); |
| 2798 | | INT32 datainput = (rec < 0) ? (-rec) : rec; |
| 2799 | | if (datainput) |
| 2800 | | { |
| 2801 | | for (i = 0; i < 32; i++) |
| 2802 | | { |
| 2803 | | if (datainput & (1 << ((~i) & 0x1f)))//?.?.??? 31 - i |
| 2804 | | { |
| 2805 | | shifter = i; |
| 2806 | | break; |
| 2807 | | } |
| 2808 | | } |
| 2809 | | } |
| 2810 | | else |
| 2811 | | { |
| 2812 | | shifter = 0x10; |
| 2813 | | } |
| 2814 | | |
| 2815 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2816 | | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 2817 | | |
| 2818 | | INT32 fetchval = rsp_divtable[address]; |
| 2819 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 2820 | | if (rec < 0) |
| 2821 | | { |
| 2822 | | temp = ~temp; |
| 2823 | | } |
| 2824 | | if (!rec) |
| 2825 | | { |
| 2826 | | temp = 0x7fffffff; |
| 2827 | | } |
| 2828 | | else if (rec == 0xffff8000) |
| 2829 | | { |
| 2830 | | temp = 0xffff0000; |
| 2831 | | } |
| 2832 | | rec = temp; |
| 2833 | | |
| 2834 | | m_reciprocal_res = rec; |
| 2835 | | m_dp_allowed = 0; |
| 2836 | | |
| 2837 | | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 2838 | | |
| 2839 | | for (i = 0; i < 8; i++) |
| 2840 | | { |
| 2841 | | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2842 | | } |
| 2843 | | |
| 2844 | | break; |
| 2845 | | } |
| 2846 | | |
| 2847 | | case 0x35: /* VRSQL */ |
| 2848 | | { |
| 2849 | | // 31 25 24 20 15 10 5 0 |
| 2850 | | // ------------------------------------------------------ |
| 2851 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110101 | |
| 2852 | | // ------------------------------------------------------ |
| 2853 | | // |
| 2854 | | // Calculates reciprocal square-root low part |
| 2855 | | |
| 2856 | | INT32 shifter = 0; |
| 2857 | | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 2858 | | INT32 datainput = rec; |
| 2859 | | |
| 2860 | | if (m_dp_allowed) |
| 2861 | | { |
| 2862 | | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 2863 | | datainput = rec; |
| 2864 | | |
| 2865 | | if (rec < 0) |
| 2866 | | { |
| 2867 | | if (rec < -32768) |
| 2868 | | { |
| 2869 | | datainput = ~datainput; |
| 2870 | | } |
| 2871 | | else |
| 2872 | | { |
| 2873 | | datainput = -datainput; |
| 2874 | | } |
| 2875 | | } |
| 2876 | | } |
| 2877 | | else if (datainput < 0) |
| 2878 | | { |
| 2879 | | datainput = -datainput; |
| 2880 | | |
| 2881 | | shifter = 0x10; |
| 2882 | | } |
| 2883 | | |
| 2884 | | if (datainput) |
| 2885 | | { |
| 2886 | | for (i = 0; i < 32; i++) |
| 2887 | | { |
| 2888 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 2889 | | { |
| 2890 | | shifter = i; |
| 2891 | | break; |
| 2892 | | } |
| 2893 | | } |
| 2894 | | } |
| 2895 | | |
| 2896 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2897 | | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 2898 | | |
| 2899 | | INT32 fetchval = rsp_divtable[address]; |
| 2900 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 2901 | | temp ^= rec >> 31; |
| 2902 | | |
| 2903 | | if (!rec) |
| 2904 | | { |
| 2905 | | temp = 0x7fffffff; |
| 2906 | | } |
| 2907 | | else if (rec == 0xffff8000) |
| 2908 | | { |
| 2909 | | temp = 0xffff0000; |
| 2910 | | } |
| 2911 | | rec = temp; |
| 2912 | | |
| 2913 | | m_reciprocal_res = rec; |
| 2914 | | m_dp_allowed = 0; |
| 2915 | | |
| 2916 | | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 2917 | | |
| 2918 | | for (i = 0; i < 8; i++) |
| 2919 | | { |
| 2920 | | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2921 | | } |
| 2922 | | |
| 2923 | | break; |
| 2924 | | } |
| 2925 | | |
| 2926 | | case 0x36: /* VRSQH */ |
| 2927 | | { |
| 2928 | | // 31 25 24 20 15 10 5 0 |
| 2929 | | // ------------------------------------------------------ |
| 2930 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110110 | |
| 2931 | | // ------------------------------------------------------ |
| 2932 | | // |
| 2933 | | // Calculates reciprocal square-root high part |
| 2934 | | |
| 2935 | | m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16; |
| 2936 | | m_dp_allowed = 1; |
| 2937 | | |
| 2938 | | for (i=0; i < 8; i++) |
| 2939 | | { |
| 2940 | | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2941 | | } |
| 2942 | | |
| 2943 | | VREG_S(VDREG, VS1REG & 7) = (INT16)(m_reciprocal_res >> 16); // store high part |
| 2944 | | break; |
| 2945 | | } |
| 2946 | | |
| 2947 | | case 0x37: /* VNOP */ |
| 2948 | | { |
| 2949 | | // 31 25 24 20 15 10 5 0 |
| 2950 | | // ------------------------------------------------------ |
| 2951 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110111 | |
| 2952 | | // ------------------------------------------------------ |
| 2953 | | // |
| 2954 | | // Vector null instruction |
| 2955 | | |
| 2956 | | break; |
| 2957 | | } |
| 2958 | | |
| 2959 | | default: unimplemented_opcode(op); break; |
| 2960 | | } |
| 2961 | | } |
| 2962 | | |
| 2963 | 571 | void rsp_device::execute_run() |
| 2964 | 572 | { |
| 2965 | 573 | if (m_isdrc) |
| r241957 | r241958 | |
| 3009 | 617 | { |
| 3010 | 618 | m_sp_set_status_func(0, 0x3, 0xffffffff); |
| 3011 | 619 | m_rsp_state->icount = MIN(m_rsp_state->icount, 1); |
| 3012 | | |
| 3013 | | if (LOG_INSTRUCTION_EXECUTION) fprintf(m_exec_output, "\n---------- break ----------\n\n"); |
| 3014 | | |
| 3015 | 620 | break; |
| 3016 | 621 | } |
| 3017 | 622 | case 0x20: /* ADD */ if (RDREG) RDVAL = (INT32)(RSVAL + RTVAL); break; |
| r241957 | r241958 | |
| 3070 | 675 | |
| 3071 | 676 | case 0x12: /* COP2 */ |
| 3072 | 677 | { |
| 3073 | | switch ((op >> 21) & 0x1f) |
| 3074 | | { |
| 3075 | | case 0x00: /* MFC2 */ |
| 3076 | | { |
| 3077 | | // 31 25 20 15 10 6 0 |
| 3078 | | // --------------------------------------------------- |
| 3079 | | // | 010010 | 00000 | TTTTT | DDDDD | IIII | 0000000 | |
| 3080 | | // --------------------------------------------------- |
| 3081 | | // |
| 3082 | | |
| 3083 | | int el = (op >> 7) & 0xf; |
| 3084 | | UINT16 b1 = VREG_B(RDREG, (el+0) & 0xf); |
| 3085 | | UINT16 b2 = VREG_B(RDREG, (el+1) & 0xf); |
| 3086 | | if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2)); |
| 3087 | | break; |
| 3088 | | } |
| 3089 | | case 0x02: /* CFC2 */ |
| 3090 | | { |
| 3091 | | // 31 25 20 15 10 0 |
| 3092 | | // ------------------------------------------------ |
| 3093 | | // | 010010 | 00010 | TTTTT | DDDDD | 00000000000 | |
| 3094 | | // ------------------------------------------------ |
| 3095 | | // |
| 3096 | | |
| 3097 | | if (RTREG) |
| 3098 | | { |
| 3099 | | switch(RDREG) |
| 3100 | | { |
| 3101 | | case 0: |
| 3102 | | RTVAL = ((CARRY_FLAG(0) & 1) << 0) | |
| 3103 | | ((CARRY_FLAG(1) & 1) << 1) | |
| 3104 | | ((CARRY_FLAG(2) & 1) << 2) | |
| 3105 | | ((CARRY_FLAG(3) & 1) << 3) | |
| 3106 | | ((CARRY_FLAG(4) & 1) << 4) | |
| 3107 | | ((CARRY_FLAG(5) & 1) << 5) | |
| 3108 | | ((CARRY_FLAG(6) & 1) << 6) | |
| 3109 | | ((CARRY_FLAG(7) & 1) << 7) | |
| 3110 | | ((ZERO_FLAG(0) & 1) << 8) | |
| 3111 | | ((ZERO_FLAG(1) & 1) << 9) | |
| 3112 | | ((ZERO_FLAG(2) & 1) << 10) | |
| 3113 | | ((ZERO_FLAG(3) & 1) << 11) | |
| 3114 | | ((ZERO_FLAG(4) & 1) << 12) | |
| 3115 | | ((ZERO_FLAG(5) & 1) << 13) | |
| 3116 | | ((ZERO_FLAG(6) & 1) << 14) | |
| 3117 | | ((ZERO_FLAG(7) & 1) << 15); |
| 3118 | | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 3119 | | break; |
| 3120 | | case 1: |
| 3121 | | RTVAL = ((COMPARE_FLAG(0) & 1) << 0) | |
| 3122 | | ((COMPARE_FLAG(1) & 1) << 1) | |
| 3123 | | ((COMPARE_FLAG(2) & 1) << 2) | |
| 3124 | | ((COMPARE_FLAG(3) & 1) << 3) | |
| 3125 | | ((COMPARE_FLAG(4) & 1) << 4) | |
| 3126 | | ((COMPARE_FLAG(5) & 1) << 5) | |
| 3127 | | ((COMPARE_FLAG(6) & 1) << 6) | |
| 3128 | | ((COMPARE_FLAG(7) & 1) << 7) | |
| 3129 | | ((CLIP2_FLAG(0) & 1) << 8) | |
| 3130 | | ((CLIP2_FLAG(1) & 1) << 9) | |
| 3131 | | ((CLIP2_FLAG(2) & 1) << 10) | |
| 3132 | | ((CLIP2_FLAG(3) & 1) << 11) | |
| 3133 | | ((CLIP2_FLAG(4) & 1) << 12) | |
| 3134 | | ((CLIP2_FLAG(5) & 1) << 13) | |
| 3135 | | ((CLIP2_FLAG(6) & 1) << 14) | |
| 3136 | | ((CLIP2_FLAG(7) & 1) << 15); |
| 3137 | | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 3138 | | break; |
| 3139 | | case 2: |
| 3140 | | // Anciliary clipping flags |
| 3141 | | RTVAL = ((CLIP1_FLAG(0) & 1) << 0) | |
| 3142 | | ((CLIP1_FLAG(1) & 1) << 1) | |
| 3143 | | ((CLIP1_FLAG(2) & 1) << 2) | |
| 3144 | | ((CLIP1_FLAG(3) & 1) << 3) | |
| 3145 | | ((CLIP1_FLAG(4) & 1) << 4) | |
| 3146 | | ((CLIP1_FLAG(5) & 1) << 5) | |
| 3147 | | ((CLIP1_FLAG(6) & 1) << 6) | |
| 3148 | | ((CLIP1_FLAG(7) & 1) << 7); |
| 3149 | | } |
| 3150 | | } |
| 3151 | | break; |
| 3152 | | } |
| 3153 | | case 0x04: /* MTC2 */ |
| 3154 | | { |
| 3155 | | // 31 25 20 15 10 6 0 |
| 3156 | | // --------------------------------------------------- |
| 3157 | | // | 010010 | 00100 | TTTTT | DDDDD | IIII | 0000000 | |
| 3158 | | // --------------------------------------------------- |
| 3159 | | // |
| 3160 | | |
| 3161 | | int el = (op >> 7) & 0xf; |
| 3162 | | W_VREG_B(RDREG, (el+0) & 0xf, (RTVAL >> 8) & 0xff); |
| 3163 | | W_VREG_B(RDREG, (el+1) & 0xf, (RTVAL >> 0) & 0xff); |
| 3164 | | break; |
| 3165 | | } |
| 3166 | | case 0x06: /* CTC2 */ |
| 3167 | | { |
| 3168 | | // 31 25 20 15 10 0 |
| 3169 | | // ------------------------------------------------ |
| 3170 | | // | 010010 | 00110 | TTTTT | DDDDD | 00000000000 | |
| 3171 | | // ------------------------------------------------ |
| 3172 | | // |
| 3173 | | |
| 3174 | | switch(RDREG) |
| 3175 | | { |
| 3176 | | case 0: |
| 3177 | | CLEAR_CARRY_FLAGS(); |
| 3178 | | CLEAR_ZERO_FLAGS(); |
| 3179 | | if (RTVAL & (1 << 0)) { SET_CARRY_FLAG(0); } |
| 3180 | | if (RTVAL & (1 << 1)) { SET_CARRY_FLAG(1); } |
| 3181 | | if (RTVAL & (1 << 2)) { SET_CARRY_FLAG(2); } |
| 3182 | | if (RTVAL & (1 << 3)) { SET_CARRY_FLAG(3); } |
| 3183 | | if (RTVAL & (1 << 4)) { SET_CARRY_FLAG(4); } |
| 3184 | | if (RTVAL & (1 << 5)) { SET_CARRY_FLAG(5); } |
| 3185 | | if (RTVAL & (1 << 6)) { SET_CARRY_FLAG(6); } |
| 3186 | | if (RTVAL & (1 << 7)) { SET_CARRY_FLAG(7); } |
| 3187 | | if (RTVAL & (1 << 8)) { SET_ZERO_FLAG(0); } |
| 3188 | | if (RTVAL & (1 << 9)) { SET_ZERO_FLAG(1); } |
| 3189 | | if (RTVAL & (1 << 10)) { SET_ZERO_FLAG(2); } |
| 3190 | | if (RTVAL & (1 << 11)) { SET_ZERO_FLAG(3); } |
| 3191 | | if (RTVAL & (1 << 12)) { SET_ZERO_FLAG(4); } |
| 3192 | | if (RTVAL & (1 << 13)) { SET_ZERO_FLAG(5); } |
| 3193 | | if (RTVAL & (1 << 14)) { SET_ZERO_FLAG(6); } |
| 3194 | | if (RTVAL & (1 << 15)) { SET_ZERO_FLAG(7); } |
| 3195 | | break; |
| 3196 | | case 1: |
| 3197 | | CLEAR_COMPARE_FLAGS(); |
| 3198 | | CLEAR_CLIP2_FLAGS(); |
| 3199 | | if (RTVAL & (1 << 0)) { SET_COMPARE_FLAG(0); } |
| 3200 | | if (RTVAL & (1 << 1)) { SET_COMPARE_FLAG(1); } |
| 3201 | | if (RTVAL & (1 << 2)) { SET_COMPARE_FLAG(2); } |
| 3202 | | if (RTVAL & (1 << 3)) { SET_COMPARE_FLAG(3); } |
| 3203 | | if (RTVAL & (1 << 4)) { SET_COMPARE_FLAG(4); } |
| 3204 | | if (RTVAL & (1 << 5)) { SET_COMPARE_FLAG(5); } |
| 3205 | | if (RTVAL & (1 << 6)) { SET_COMPARE_FLAG(6); } |
| 3206 | | if (RTVAL & (1 << 7)) { SET_COMPARE_FLAG(7); } |
| 3207 | | if (RTVAL & (1 << 8)) { SET_CLIP2_FLAG(0); } |
| 3208 | | if (RTVAL & (1 << 9)) { SET_CLIP2_FLAG(1); } |
| 3209 | | if (RTVAL & (1 << 10)) { SET_CLIP2_FLAG(2); } |
| 3210 | | if (RTVAL & (1 << 11)) { SET_CLIP2_FLAG(3); } |
| 3211 | | if (RTVAL & (1 << 12)) { SET_CLIP2_FLAG(4); } |
| 3212 | | if (RTVAL & (1 << 13)) { SET_CLIP2_FLAG(5); } |
| 3213 | | if (RTVAL & (1 << 14)) { SET_CLIP2_FLAG(6); } |
| 3214 | | if (RTVAL & (1 << 15)) { SET_CLIP2_FLAG(7); } |
| 3215 | | break; |
| 3216 | | case 2: |
| 3217 | | CLEAR_CLIP1_FLAGS(); |
| 3218 | | if (RTVAL & (1 << 0)) { SET_CLIP1_FLAG(0); } |
| 3219 | | if (RTVAL & (1 << 1)) { SET_CLIP1_FLAG(1); } |
| 3220 | | if (RTVAL & (1 << 2)) { SET_CLIP1_FLAG(2); } |
| 3221 | | if (RTVAL & (1 << 3)) { SET_CLIP1_FLAG(3); } |
| 3222 | | if (RTVAL & (1 << 4)) { SET_CLIP1_FLAG(4); } |
| 3223 | | if (RTVAL & (1 << 5)) { SET_CLIP1_FLAG(5); } |
| 3224 | | if (RTVAL & (1 << 6)) { SET_CLIP1_FLAG(6); } |
| 3225 | | if (RTVAL & (1 << 7)) { SET_CLIP1_FLAG(7); } |
| 3226 | | break; |
| 3227 | | } |
| 3228 | | break; |
| 3229 | | } |
| 3230 | | |
| 3231 | | case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: |
| 3232 | | case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: |
| 3233 | | { |
| 3234 | | handle_vector_ops(op); |
| 3235 | | break; |
| 3236 | | } |
| 3237 | | |
| 3238 | | default: unimplemented_opcode(op); break; |
| 3239 | | } |
| 678 | m_cop2->handle_cop2(op); |
| 3240 | 679 | break; |
| 3241 | 680 | } |
| 3242 | 681 | |
| r241957 | r241958 | |
| 3248 | 687 | case 0x28: /* SB */ WRITE8(RSVAL + SIMM16, RTVAL); break; |
| 3249 | 688 | case 0x29: /* SH */ WRITE16(RSVAL + SIMM16, RTVAL); break; |
| 3250 | 689 | case 0x2b: /* SW */ WRITE32(RSVAL + SIMM16, RTVAL); break; |
| 3251 | | case 0x32: /* LWC2 */ handle_lwc2(op); break; |
| 3252 | | case 0x3a: /* SWC2 */ handle_swc2(op); break; |
| 690 | case 0x32: /* LWC2 */ m_cop2->handle_lwc2(op); break; |
| 691 | case 0x3a: /* SWC2 */ m_cop2->handle_swc2(op); break; |
| 3253 | 692 | |
| 3254 | 693 | default: |
| 3255 | 694 | { |
| r241957 | r241958 | |
| 3262 | 701 | { |
| 3263 | 702 | int i, l; |
| 3264 | 703 | static UINT32 prev_regs[32]; |
| 3265 | | static VECTOR_REG prev_vecs[32]; |
| 3266 | 704 | char string[200]; |
| 3267 | 705 | rsp_dasm_one(string, m_ppc, op); |
| 3268 | 706 | |
| r241957 | r241958 | |
| 3288 | 726 | prev_regs[i] = m_rsp_state->r[i]; |
| 3289 | 727 | } |
| 3290 | 728 | |
| 3291 | | for (i=0; i < 32; i++) |
| 3292 | | { |
| 3293 | | if (m_v[i].d[0] != prev_vecs[i].d[0] || m_v[i].d[1] != prev_vecs[i].d[1]) |
| 3294 | | { |
| 3295 | | fprintf(m_exec_output, "V%d: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X ", i, |
| 3296 | | (UINT16)VREG_S(i,0), (UINT16)VREG_S(i,1), (UINT16)VREG_S(i,2), (UINT16)VREG_S(i,3), (UINT16)VREG_S(i,4), (UINT16)VREG_S(i,5), (UINT16)VREG_S(i,6), (UINT16)VREG_S(i,7)); |
| 3297 | | } |
| 3298 | | prev_vecs[i].d[0] = m_v[i].d[0]; |
| 3299 | | prev_vecs[i].d[1] = m_v[i].d[1]; |
| 3300 | | } |
| 729 | m_cop2->log_instruction_execution(); |
| 3301 | 730 | |
| 3302 | 731 | fprintf(m_exec_output, "\n"); |
| 3303 | 732 | |
trunk/src/emu/cpu/rsp/rspcp2.c
| r0 | r241958 | |
| 1 | /*************************************************************************** |
| 2 | |
| 3 | rspcp2.c |
| 4 | |
| 5 | Universal machine language-based Nintendo/SGI RSP COP2 emulator. |
| 6 | Written by Harmony of the MESS team. |
| 7 | |
| 8 | Copyright the MESS team. |
| 9 | Released for general non-commercial use under the MAME license |
| 10 | Visit http://mamedev.org for licensing and usage restrictions. |
| 11 | |
| 12 | ***************************************************************************/ |
| 13 | |
| 14 | #include "emu.h" |
| 15 | #include "rsp.h" |
| 16 | #include "rspdiv.h" |
| 17 | #include "rspcp2.h" |
| 18 | #include "cpu/drcfe.h" |
| 19 | #include "cpu/drcuml.h" |
| 20 | #include "cpu/drcumlsh.h" |
| 21 | |
| 22 | using namespace uml; |
| 23 | |
| 24 | extern offs_t rsp_dasm_one(char *buffer, offs_t pc, UINT32 op); |
| 25 | |
| 26 | /*************************************************************************** |
| 27 | Helpful Defines |
| 28 | ***************************************************************************/ |
| 29 | |
| 30 | #define VDREG ((op >> 6) & 0x1f) |
| 31 | #define VS1REG ((op >> 11) & 0x1f) |
| 32 | #define VS2REG ((op >> 16) & 0x1f) |
| 33 | #define EL ((op >> 21) & 0xf) |
| 34 | |
| 35 | #define RSVAL (m_rsp.m_rsp_state->r[RSREG]) |
| 36 | #define RTVAL (m_rsp.m_rsp_state->r[RTREG]) |
| 37 | #define RDVAL (m_rsp.m_rsp_state->r[RDREG]) |
| 38 | |
| 39 | #define VREG_B(reg, offset) m_v[(reg)].b[(offset)^1] |
| 40 | #define VREG_S(reg, offset) m_v[(reg)].s[(offset)] |
| 41 | #define VREG_L(reg, offset) m_v[(reg)].l[(offset)] |
| 42 | |
| 43 | #define R_VREG_B(reg, offset) m_v[(reg)].b[(offset)^1] |
| 44 | #define R_VREG_S(reg, offset) (INT16)m_v[(reg)].s[(offset)] |
| 45 | #define R_VREG_L(reg, offset) m_v[(reg)].l[(offset)] |
| 46 | |
| 47 | #define W_VREG_B(reg, offset, val) (m_v[(reg)].b[(offset)^1] = val) |
| 48 | #define W_VREG_S(reg, offset, val) (m_v[(reg)].s[(offset)] = val) |
| 49 | #define W_VREG_L(reg, offset, val) (m_v[(reg)].l[(offset)] = val) |
| 50 | |
| 51 | #define VEC_EL_2(x,z) (vector_elements_2[(x)][(z)]) |
| 52 | |
| 53 | #define CARRY 0 |
| 54 | #define COMPARE 1 |
| 55 | #define CLIP1 2 |
| 56 | #define ZERO 3 |
| 57 | #define CLIP2 4 |
| 58 | |
| 59 | #define ACCUM(x) m_accum[x].q |
| 60 | #define ACCUM_H(x) (UINT16)m_accum[x].w[3] |
| 61 | #define ACCUM_M(x) (UINT16)m_accum[x].w[2] |
| 62 | #define ACCUM_L(x) (UINT16)m_accum[x].w[1] |
| 63 | #define ACCUM_LL(x) (UINT16)m_accum[x].w[0] |
| 64 | |
| 65 | #define SET_ACCUM_H(v, x) m_accum[x].w[3] = v; |
| 66 | #define SET_ACCUM_M(v, x) m_accum[x].w[2] = v; |
| 67 | #define SET_ACCUM_L(v, x) m_accum[x].w[1] = v; |
| 68 | #define SET_ACCUM_LL(v, x) m_accum[x].w[0] = v; |
| 69 | |
| 70 | #define CARRY_FLAG(x) (m_vflag[CARRY][x & 7] != 0 ? 0xffff : 0) |
| 71 | #define COMPARE_FLAG(x) (m_vflag[COMPARE][x & 7] != 0 ? 0xffff : 0) |
| 72 | #define CLIP1_FLAG(x) (m_vflag[CLIP1][x & 7] != 0 ? 0xffff : 0) |
| 73 | #define ZERO_FLAG(x) (m_vflag[ZERO][x & 7] != 0 ? 0xffff : 0) |
| 74 | #define CLIP2_FLAG(x) (m_vflag[CLIP2][x & 7] != 0 ? 0xffff : 0) |
| 75 | |
| 76 | #define CLEAR_CARRY_FLAGS() { memset(m_vflag[CARRY], 0, 16); } |
| 77 | #define CLEAR_COMPARE_FLAGS() { memset(m_vflag[COMPARE], 0, 16); } |
| 78 | #define CLEAR_CLIP1_FLAGS() { memset(m_vflag[CLIP1], 0, 16); } |
| 79 | #define CLEAR_ZERO_FLAGS() { memset(m_vflag[ZERO], 0, 16); } |
| 80 | #define CLEAR_CLIP2_FLAGS() { memset(m_vflag[CLIP2], 0, 16); } |
| 81 | |
| 82 | #define SET_CARRY_FLAG(x) { m_vflag[CARRY][x & 7] = 0xffff; } |
| 83 | #define SET_COMPARE_FLAG(x) { m_vflag[COMPARE][x & 7] = 0xffff; } |
| 84 | #define SET_CLIP1_FLAG(x) { m_vflag[CLIP1][x & 7] = 0xffff; } |
| 85 | #define SET_ZERO_FLAG(x) { m_vflag[ZERO][x & 7] = 0xffff; } |
| 86 | #define SET_CLIP2_FLAG(x) { m_vflag[CLIP2][x & 7] = 0xffff; } |
| 87 | |
| 88 | #define CLEAR_CARRY_FLAG(x) { m_vflag[CARRY][x & 7] = 0; } |
| 89 | #define CLEAR_COMPARE_FLAG(x) { m_vflag[COMPARE][x & 7] = 0; } |
| 90 | #define CLEAR_CLIP1_FLAG(x) { m_vflag[CLIP1][x & 7] = 0; } |
| 91 | #define CLEAR_ZERO_FLAG(x) { m_vflag[ZERO][x & 7] = 0; } |
| 92 | #define CLEAR_CLIP2_FLAG(x) { m_vflag[CLIP2][x & 7] = 0; } |
| 93 | |
| 94 | #define WRITEBACK_RESULT() { \ |
| 95 | VREG_S(VDREG, 0) = m_vres[0]; \ |
| 96 | VREG_S(VDREG, 1) = m_vres[1]; \ |
| 97 | VREG_S(VDREG, 2) = m_vres[2]; \ |
| 98 | VREG_S(VDREG, 3) = m_vres[3]; \ |
| 99 | VREG_S(VDREG, 4) = m_vres[4]; \ |
| 100 | VREG_S(VDREG, 5) = m_vres[5]; \ |
| 101 | VREG_S(VDREG, 6) = m_vres[6]; \ |
| 102 | VREG_S(VDREG, 7) = m_vres[7]; \ |
| 103 | } |
| 104 | |
| 105 | static const int vector_elements_2[16][8] = |
| 106 | { |
| 107 | { 0, 1, 2, 3, 4, 5, 6, 7 }, // none |
| 108 | { 0, 1, 2, 3, 4, 5, 6, 7 }, // ??? |
| 109 | { 0, 0, 2, 2, 4, 4, 6, 6 }, // 0q |
| 110 | { 1, 1, 3, 3, 5, 5, 7, 7 }, // 1q |
| 111 | { 0, 0, 0, 0, 4, 4, 4, 4 }, // 0h |
| 112 | { 1, 1, 1, 1, 5, 5, 5, 5 }, // 1h |
| 113 | { 2, 2, 2, 2, 6, 6, 6, 6 }, // 2h |
| 114 | { 3, 3, 3, 3, 7, 7, 7, 7 }, // 3h |
| 115 | { 0, 0, 0, 0, 0, 0, 0, 0 }, // 0 |
| 116 | { 1, 1, 1, 1, 1, 1, 1, 1 }, // 1 |
| 117 | { 2, 2, 2, 2, 2, 2, 2, 2 }, // 2 |
| 118 | { 3, 3, 3, 3, 3, 3, 3, 3 }, // 3 |
| 119 | { 4, 4, 4, 4, 4, 4, 4, 4 }, // 4 |
| 120 | { 5, 5, 5, 5, 5, 5, 5, 5 }, // 5 |
| 121 | { 6, 6, 6, 6, 6, 6, 6, 6 }, // 6 |
| 122 | { 7, 7, 7, 7, 7, 7, 7, 7 }, // 7 |
| 123 | }; |
| 124 | |
| 125 | rsp_cop2::rsp_cop2(rsp_device &rsp, running_machine &machine) |
| 126 | : m_rsp(rsp) |
| 127 | , m_machine(machine) |
| 128 | , m_reciprocal_res(0) |
| 129 | , m_reciprocal_high(0) |
| 130 | , m_dp_allowed(0) |
| 131 | { |
| 132 | memset(m_vres, 0, sizeof(m_vres)); |
| 133 | memset(m_v, 0, sizeof(m_v)); |
| 134 | memset(m_vflag, 0, sizeof(m_vflag)); |
| 135 | memset(m_accum, 0, sizeof(m_accum)); |
| 136 | } |
| 137 | |
| 138 | void rsp_cop2::init() |
| 139 | { |
| 140 | CLEAR_CARRY_FLAGS(); |
| 141 | CLEAR_COMPARE_FLAGS(); |
| 142 | CLEAR_CLIP1_FLAGS(); |
| 143 | CLEAR_ZERO_FLAGS(); |
| 144 | CLEAR_CLIP2_FLAGS(); |
| 145 | } |
| 146 | |
| 147 | void rsp_cop2::start() |
| 148 | { |
| 149 | for(int regIdx = 0; regIdx < 32; regIdx++ ) |
| 150 | { |
| 151 | m_v[regIdx].d[0] = 0; |
| 152 | m_v[regIdx].d[1] = 0; |
| 153 | } |
| 154 | |
| 155 | CLEAR_CARRY_FLAGS(); |
| 156 | CLEAR_COMPARE_FLAGS(); |
| 157 | CLEAR_CLIP1_FLAGS(); |
| 158 | CLEAR_ZERO_FLAGS(); |
| 159 | CLEAR_CLIP2_FLAGS(); |
| 160 | m_reciprocal_res = 0; |
| 161 | m_reciprocal_high = 0; |
| 162 | |
| 163 | // Accumulators do not power on to a random state |
| 164 | for(int accumIdx = 0; accumIdx < 8; accumIdx++ ) |
| 165 | { |
| 166 | m_accum[accumIdx].q = 0; |
| 167 | } |
| 168 | } |
| 169 | |
| 170 | void rsp_cop2::state_string_export(const int index, astring &string) |
| 171 | { |
| 172 | switch (index) |
| 173 | { |
| 174 | case RSP_V0: |
| 175 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 0, 0), (UINT16)VREG_S( 0, 1), (UINT16)VREG_S( 0, 2), (UINT16)VREG_S( 0, 3), (UINT16)VREG_S( 0, 4), (UINT16)VREG_S( 0, 5), (UINT16)VREG_S( 0, 6), (UINT16)VREG_S( 0, 7)); |
| 176 | break; |
| 177 | case RSP_V1: |
| 178 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 1, 0), (UINT16)VREG_S( 1, 1), (UINT16)VREG_S( 1, 2), (UINT16)VREG_S( 1, 3), (UINT16)VREG_S( 1, 4), (UINT16)VREG_S( 1, 5), (UINT16)VREG_S( 1, 6), (UINT16)VREG_S( 1, 7)); |
| 179 | break; |
| 180 | case RSP_V2: |
| 181 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 2, 0), (UINT16)VREG_S( 2, 1), (UINT16)VREG_S( 2, 2), (UINT16)VREG_S( 2, 3), (UINT16)VREG_S( 2, 4), (UINT16)VREG_S( 2, 5), (UINT16)VREG_S( 2, 6), (UINT16)VREG_S( 2, 7)); |
| 182 | break; |
| 183 | case RSP_V3: |
| 184 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 3, 0), (UINT16)VREG_S( 3, 1), (UINT16)VREG_S( 3, 2), (UINT16)VREG_S( 3, 3), (UINT16)VREG_S( 3, 4), (UINT16)VREG_S( 3, 5), (UINT16)VREG_S( 3, 6), (UINT16)VREG_S( 3, 7)); |
| 185 | break; |
| 186 | case RSP_V4: |
| 187 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 4, 0), (UINT16)VREG_S( 4, 1), (UINT16)VREG_S( 4, 2), (UINT16)VREG_S( 4, 3), (UINT16)VREG_S( 4, 4), (UINT16)VREG_S( 4, 5), (UINT16)VREG_S( 4, 6), (UINT16)VREG_S( 4, 7)); |
| 188 | break; |
| 189 | case RSP_V5: |
| 190 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 5, 0), (UINT16)VREG_S( 5, 1), (UINT16)VREG_S( 5, 2), (UINT16)VREG_S( 5, 3), (UINT16)VREG_S( 5, 4), (UINT16)VREG_S( 5, 5), (UINT16)VREG_S( 5, 6), (UINT16)VREG_S( 5, 7)); |
| 191 | break; |
| 192 | case RSP_V6: |
| 193 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 6, 0), (UINT16)VREG_S( 6, 1), (UINT16)VREG_S( 6, 2), (UINT16)VREG_S( 6, 3), (UINT16)VREG_S( 6, 4), (UINT16)VREG_S( 6, 5), (UINT16)VREG_S( 6, 6), (UINT16)VREG_S( 6, 7)); |
| 194 | break; |
| 195 | case RSP_V7: |
| 196 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 7, 0), (UINT16)VREG_S( 7, 1), (UINT16)VREG_S( 7, 2), (UINT16)VREG_S( 7, 3), (UINT16)VREG_S( 7, 4), (UINT16)VREG_S( 7, 5), (UINT16)VREG_S( 7, 6), (UINT16)VREG_S( 7, 7)); |
| 197 | break; |
| 198 | case RSP_V8: |
| 199 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 8, 0), (UINT16)VREG_S( 8, 1), (UINT16)VREG_S( 8, 2), (UINT16)VREG_S( 8, 3), (UINT16)VREG_S( 8, 4), (UINT16)VREG_S( 8, 5), (UINT16)VREG_S( 8, 6), (UINT16)VREG_S( 8, 7)); |
| 200 | break; |
| 201 | case RSP_V9: |
| 202 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 9, 0), (UINT16)VREG_S( 9, 1), (UINT16)VREG_S( 9, 2), (UINT16)VREG_S( 9, 3), (UINT16)VREG_S( 9, 4), (UINT16)VREG_S( 9, 5), (UINT16)VREG_S( 9, 6), (UINT16)VREG_S( 9, 7)); |
| 203 | break; |
| 204 | case RSP_V10: |
| 205 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(10, 0), (UINT16)VREG_S(10, 1), (UINT16)VREG_S(10, 2), (UINT16)VREG_S(10, 3), (UINT16)VREG_S(10, 4), (UINT16)VREG_S(10, 5), (UINT16)VREG_S(10, 6), (UINT16)VREG_S(10, 7)); |
| 206 | break; |
| 207 | case RSP_V11: |
| 208 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(11, 0), (UINT16)VREG_S(11, 1), (UINT16)VREG_S(11, 2), (UINT16)VREG_S(11, 3), (UINT16)VREG_S(11, 4), (UINT16)VREG_S(11, 5), (UINT16)VREG_S(11, 6), (UINT16)VREG_S(11, 7)); |
| 209 | break; |
| 210 | case RSP_V12: |
| 211 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(12, 0), (UINT16)VREG_S(12, 1), (UINT16)VREG_S(12, 2), (UINT16)VREG_S(12, 3), (UINT16)VREG_S(12, 4), (UINT16)VREG_S(12, 5), (UINT16)VREG_S(12, 6), (UINT16)VREG_S(12, 7)); |
| 212 | break; |
| 213 | case RSP_V13: |
| 214 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(13, 0), (UINT16)VREG_S(13, 1), (UINT16)VREG_S(13, 2), (UINT16)VREG_S(13, 3), (UINT16)VREG_S(13, 4), (UINT16)VREG_S(13, 5), (UINT16)VREG_S(13, 6), (UINT16)VREG_S(13, 7)); |
| 215 | break; |
| 216 | case RSP_V14: |
| 217 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(14, 0), (UINT16)VREG_S(14, 1), (UINT16)VREG_S(14, 2), (UINT16)VREG_S(14, 3), (UINT16)VREG_S(14, 4), (UINT16)VREG_S(14, 5), (UINT16)VREG_S(14, 6), (UINT16)VREG_S(14, 7)); |
| 218 | break; |
| 219 | case RSP_V15: |
| 220 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(15, 0), (UINT16)VREG_S(15, 1), (UINT16)VREG_S(15, 2), (UINT16)VREG_S(15, 3), (UINT16)VREG_S(15, 4), (UINT16)VREG_S(15, 5), (UINT16)VREG_S(15, 6), (UINT16)VREG_S(15, 7)); |
| 221 | break; |
| 222 | case RSP_V16: |
| 223 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(16, 0), (UINT16)VREG_S(16, 1), (UINT16)VREG_S(16, 2), (UINT16)VREG_S(16, 3), (UINT16)VREG_S(16, 4), (UINT16)VREG_S(16, 5), (UINT16)VREG_S(16, 6), (UINT16)VREG_S(16, 7)); |
| 224 | break; |
| 225 | case RSP_V17: |
| 226 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(17, 0), (UINT16)VREG_S(17, 1), (UINT16)VREG_S(17, 2), (UINT16)VREG_S(17, 3), (UINT16)VREG_S(17, 4), (UINT16)VREG_S(17, 5), (UINT16)VREG_S(17, 6), (UINT16)VREG_S(17, 7)); |
| 227 | break; |
| 228 | case RSP_V18: |
| 229 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(18, 0), (UINT16)VREG_S(18, 1), (UINT16)VREG_S(18, 2), (UINT16)VREG_S(18, 3), (UINT16)VREG_S(18, 4), (UINT16)VREG_S(18, 5), (UINT16)VREG_S(18, 6), (UINT16)VREG_S(18, 7)); |
| 230 | break; |
| 231 | case RSP_V19: |
| 232 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(19, 0), (UINT16)VREG_S(19, 1), (UINT16)VREG_S(19, 2), (UINT16)VREG_S(19, 3), (UINT16)VREG_S(19, 4), (UINT16)VREG_S(19, 5), (UINT16)VREG_S(19, 6), (UINT16)VREG_S(19, 7)); |
| 233 | break; |
| 234 | case RSP_V20: |
| 235 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(20, 0), (UINT16)VREG_S(20, 1), (UINT16)VREG_S(20, 2), (UINT16)VREG_S(20, 3), (UINT16)VREG_S(20, 4), (UINT16)VREG_S(20, 5), (UINT16)VREG_S(20, 6), (UINT16)VREG_S(20, 7)); |
| 236 | break; |
| 237 | case RSP_V21: |
| 238 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(21, 0), (UINT16)VREG_S(21, 1), (UINT16)VREG_S(21, 2), (UINT16)VREG_S(21, 3), (UINT16)VREG_S(21, 4), (UINT16)VREG_S(21, 5), (UINT16)VREG_S(21, 6), (UINT16)VREG_S(21, 7)); |
| 239 | break; |
| 240 | case RSP_V22: |
| 241 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(22, 0), (UINT16)VREG_S(22, 1), (UINT16)VREG_S(22, 2), (UINT16)VREG_S(22, 3), (UINT16)VREG_S(22, 4), (UINT16)VREG_S(22, 5), (UINT16)VREG_S(22, 6), (UINT16)VREG_S(22, 7)); |
| 242 | break; |
| 243 | case RSP_V23: |
| 244 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(23, 0), (UINT16)VREG_S(23, 1), (UINT16)VREG_S(23, 2), (UINT16)VREG_S(23, 3), (UINT16)VREG_S(23, 4), (UINT16)VREG_S(23, 5), (UINT16)VREG_S(23, 6), (UINT16)VREG_S(23, 7)); |
| 245 | break; |
| 246 | case RSP_V24: |
| 247 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(24, 0), (UINT16)VREG_S(24, 1), (UINT16)VREG_S(24, 2), (UINT16)VREG_S(24, 3), (UINT16)VREG_S(24, 4), (UINT16)VREG_S(24, 5), (UINT16)VREG_S(24, 6), (UINT16)VREG_S(24, 7)); |
| 248 | break; |
| 249 | case RSP_V25: |
| 250 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(25, 0), (UINT16)VREG_S(25, 1), (UINT16)VREG_S(25, 2), (UINT16)VREG_S(25, 3), (UINT16)VREG_S(25, 4), (UINT16)VREG_S(25, 5), (UINT16)VREG_S(25, 6), (UINT16)VREG_S(25, 7)); |
| 251 | break; |
| 252 | case RSP_V26: |
| 253 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(26, 0), (UINT16)VREG_S(26, 1), (UINT16)VREG_S(26, 2), (UINT16)VREG_S(26, 3), (UINT16)VREG_S(26, 4), (UINT16)VREG_S(26, 5), (UINT16)VREG_S(26, 6), (UINT16)VREG_S(26, 7)); |
| 254 | break; |
| 255 | case RSP_V27: |
| 256 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(27, 0), (UINT16)VREG_S(27, 1), (UINT16)VREG_S(27, 2), (UINT16)VREG_S(27, 3), (UINT16)VREG_S(27, 4), (UINT16)VREG_S(27, 5), (UINT16)VREG_S(27, 6), (UINT16)VREG_S(27, 7)); |
| 257 | break; |
| 258 | case RSP_V28: |
| 259 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(28, 0), (UINT16)VREG_S(28, 1), (UINT16)VREG_S(28, 2), (UINT16)VREG_S(28, 3), (UINT16)VREG_S(28, 4), (UINT16)VREG_S(28, 5), (UINT16)VREG_S(28, 6), (UINT16)VREG_S(28, 7)); |
| 260 | break; |
| 261 | case RSP_V29: |
| 262 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(29, 0), (UINT16)VREG_S(29, 1), (UINT16)VREG_S(29, 2), (UINT16)VREG_S(29, 3), (UINT16)VREG_S(29, 4), (UINT16)VREG_S(29, 5), (UINT16)VREG_S(29, 6), (UINT16)VREG_S(29, 7)); |
| 263 | break; |
| 264 | case RSP_V30: |
| 265 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(30, 0), (UINT16)VREG_S(30, 1), (UINT16)VREG_S(30, 2), (UINT16)VREG_S(30, 3), (UINT16)VREG_S(30, 4), (UINT16)VREG_S(30, 5), (UINT16)VREG_S(30, 6), (UINT16)VREG_S(30, 7)); |
| 266 | break; |
| 267 | case RSP_V31: |
| 268 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(31, 0), (UINT16)VREG_S(31, 1), (UINT16)VREG_S(31, 2), (UINT16)VREG_S(31, 3), (UINT16)VREG_S(31, 4), (UINT16)VREG_S(31, 5), (UINT16)VREG_S(31, 6), (UINT16)VREG_S(31, 7)); |
| 269 | break; |
| 270 | } |
| 271 | } |
| 272 | |
| 273 | /*************************************************************************** |
| 274 | Vector Load Instructions |
| 275 | ***************************************************************************/ |
| 276 | |
| 277 | void rsp_cop2::handle_lwc2(UINT32 op) |
| 278 | { |
| 279 | int i, end; |
| 280 | UINT32 ea; |
| 281 | int dest = (op >> 16) & 0x1f; |
| 282 | int base = (op >> 21) & 0x1f; |
| 283 | int index = (op >> 7) & 0xf; |
| 284 | int offset = (op & 0x7f); |
| 285 | if (offset & 0x40) |
| 286 | offset |= 0xffffffc0; |
| 287 | |
| 288 | switch ((op >> 11) & 0x1f) |
| 289 | { |
| 290 | case 0x00: /* LBV */ |
| 291 | { |
| 292 | // 31 25 20 15 10 6 0 |
| 293 | // -------------------------------------------------- |
| 294 | // | 110010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 295 | // -------------------------------------------------- |
| 296 | // |
| 297 | // Load 1 byte to vector byte index |
| 298 | |
| 299 | ea = (base) ? m_rsp.m_rsp_state->r[base] + offset : offset; |
| 300 | VREG_B(dest, index) = m_rsp.READ8(ea); |
| 301 | break; |
| 302 | } |
| 303 | case 0x01: /* LSV */ |
| 304 | { |
| 305 | // 31 25 20 15 10 6 0 |
| 306 | // -------------------------------------------------- |
| 307 | // | 110010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 308 | // -------------------------------------------------- |
| 309 | // |
| 310 | // Loads 2 bytes starting from vector byte index |
| 311 | |
| 312 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 313 | |
| 314 | end = index + 2; |
| 315 | |
| 316 | for (i=index; i < end; i++) |
| 317 | { |
| 318 | VREG_B(dest, i) = m_rsp.READ8(ea); |
| 319 | ea++; |
| 320 | } |
| 321 | break; |
| 322 | } |
| 323 | case 0x02: /* LLV */ |
| 324 | { |
| 325 | // 31 25 20 15 10 6 0 |
| 326 | // -------------------------------------------------- |
| 327 | // | 110010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 328 | // -------------------------------------------------- |
| 329 | // |
| 330 | // Loads 4 bytes starting from vector byte index |
| 331 | |
| 332 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 333 | |
| 334 | end = index + 4; |
| 335 | |
| 336 | for (i=index; i < end; i++) |
| 337 | { |
| 338 | VREG_B(dest, i) = m_rsp.READ8(ea); |
| 339 | ea++; |
| 340 | } |
| 341 | break; |
| 342 | } |
| 343 | case 0x03: /* LDV */ |
| 344 | { |
| 345 | // 31 25 20 15 10 6 0 |
| 346 | // -------------------------------------------------- |
| 347 | // | 110010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 348 | // -------------------------------------------------- |
| 349 | // |
| 350 | // Loads 8 bytes starting from vector byte index |
| 351 | |
| 352 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 353 | |
| 354 | end = index + 8; |
| 355 | |
| 356 | for (i=index; i < end; i++) |
| 357 | { |
| 358 | VREG_B(dest, i) = m_rsp.READ8(ea); |
| 359 | ea++; |
| 360 | } |
| 361 | break; |
| 362 | } |
| 363 | case 0x04: /* LQV */ |
| 364 | { |
| 365 | // 31 25 20 15 10 6 0 |
| 366 | // -------------------------------------------------- |
| 367 | // | 110010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 368 | // -------------------------------------------------- |
| 369 | // |
| 370 | // Loads up to 16 bytes starting from vector byte index |
| 371 | |
| 372 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 373 | |
| 374 | end = index + (16 - (ea & 0xf)); |
| 375 | if (end > 16) end = 16; |
| 376 | |
| 377 | for (i=index; i < end; i++) |
| 378 | { |
| 379 | VREG_B(dest, i) = m_rsp.READ8(ea); |
| 380 | ea++; |
| 381 | } |
| 382 | break; |
| 383 | } |
| 384 | case 0x05: /* LRV */ |
| 385 | { |
| 386 | // 31 25 20 15 10 6 0 |
| 387 | // -------------------------------------------------- |
| 388 | // | 110010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 389 | // -------------------------------------------------- |
| 390 | // |
| 391 | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 392 | |
| 393 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 394 | |
| 395 | index = 16 - ((ea & 0xf) - index); |
| 396 | end = 16; |
| 397 | ea &= ~0xf; |
| 398 | |
| 399 | for (i=index; i < end; i++) |
| 400 | { |
| 401 | VREG_B(dest, i) = m_rsp.READ8(ea); |
| 402 | ea++; |
| 403 | } |
| 404 | break; |
| 405 | } |
| 406 | case 0x06: /* LPV */ |
| 407 | { |
| 408 | // 31 25 20 15 10 6 0 |
| 409 | // -------------------------------------------------- |
| 410 | // | 110010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 411 | // -------------------------------------------------- |
| 412 | // |
| 413 | // Loads a byte as the upper 8 bits of each element |
| 414 | |
| 415 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 416 | |
| 417 | for (i=0; i < 8; i++) |
| 418 | { |
| 419 | VREG_S(dest, i) = m_rsp.READ8(ea + (((16-index) + i) & 0xf)) << 8; |
| 420 | } |
| 421 | break; |
| 422 | } |
| 423 | case 0x07: /* LUV */ |
| 424 | { |
| 425 | // 31 25 20 15 10 6 0 |
| 426 | // -------------------------------------------------- |
| 427 | // | 110010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 428 | // -------------------------------------------------- |
| 429 | // |
| 430 | // Loads a byte as the bits 14-7 of each element |
| 431 | |
| 432 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 433 | |
| 434 | for (i=0; i < 8; i++) |
| 435 | { |
| 436 | VREG_S(dest, i) = m_rsp.READ8(ea + (((16-index) + i) & 0xf)) << 7; |
| 437 | } |
| 438 | break; |
| 439 | } |
| 440 | case 0x08: /* LHV */ |
| 441 | { |
| 442 | // 31 25 20 15 10 6 0 |
| 443 | // -------------------------------------------------- |
| 444 | // | 110010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 445 | // -------------------------------------------------- |
| 446 | // |
| 447 | // Loads a byte as the bits 14-7 of each element, with 2-byte stride |
| 448 | |
| 449 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 450 | |
| 451 | for (i=0; i < 8; i++) |
| 452 | { |
| 453 | VREG_S(dest, i) = m_rsp.READ8(ea + (((16-index) + (i<<1)) & 0xf)) << 7; |
| 454 | } |
| 455 | break; |
| 456 | } |
| 457 | case 0x09: /* LFV */ |
| 458 | { |
| 459 | // 31 25 20 15 10 6 0 |
| 460 | // -------------------------------------------------- |
| 461 | // | 110010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 462 | // -------------------------------------------------- |
| 463 | // |
| 464 | // Loads a byte as the bits 14-7 of upper or lower quad, with 4-byte stride |
| 465 | |
| 466 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 467 | |
| 468 | // not sure what happens if 16-byte boundary is crossed... |
| 469 | |
| 470 | end = (index >> 1) + 4; |
| 471 | |
| 472 | for (i=index >> 1; i < end; i++) |
| 473 | { |
| 474 | VREG_S(dest, i) = m_rsp.READ8(ea) << 7; |
| 475 | ea += 4; |
| 476 | } |
| 477 | break; |
| 478 | } |
| 479 | case 0x0a: /* LWV */ |
| 480 | { |
| 481 | // 31 25 20 15 10 6 0 |
| 482 | // -------------------------------------------------- |
| 483 | // | 110010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 484 | // -------------------------------------------------- |
| 485 | // |
| 486 | // Loads the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 487 | // after byte index 15 |
| 488 | |
| 489 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 490 | |
| 491 | end = (16 - index) + 16; |
| 492 | |
| 493 | for (i=(16 - index); i < end; i++) |
| 494 | { |
| 495 | VREG_B(dest, i & 0xf) = m_rsp.READ8(ea); |
| 496 | ea += 4; |
| 497 | } |
| 498 | break; |
| 499 | } |
| 500 | case 0x0b: /* LTV */ |
| 501 | { |
| 502 | // 31 25 20 15 10 6 0 |
| 503 | // -------------------------------------------------- |
| 504 | // | 110010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 505 | // -------------------------------------------------- |
| 506 | // |
| 507 | // Loads one element to maximum of 8 vectors, while incrementing element index |
| 508 | |
| 509 | // FIXME: has a small problem with odd indices |
| 510 | |
| 511 | int element; |
| 512 | int vs = dest; |
| 513 | int ve = dest + 8; |
| 514 | if (ve > 32) |
| 515 | ve = 32; |
| 516 | |
| 517 | element = 7 - (index >> 1); |
| 518 | |
| 519 | if (index & 1) fatalerror("RSP: LTV: index = %d\n", index); |
| 520 | |
| 521 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 522 | |
| 523 | ea = ((ea + 8) & ~0xf) + (index & 1); |
| 524 | for (i=vs; i < ve; i++) |
| 525 | { |
| 526 | element = ((8 - (index >> 1) + (i-vs)) << 1); |
| 527 | VREG_B(i, (element & 0xf)) = m_rsp.READ8(ea); |
| 528 | VREG_B(i, ((element + 1) & 0xf)) = m_rsp.READ8(ea + 1); |
| 529 | |
| 530 | ea += 2; |
| 531 | } |
| 532 | break; |
| 533 | } |
| 534 | |
| 535 | default: |
| 536 | { |
| 537 | m_rsp.unimplemented_opcode(op); |
| 538 | break; |
| 539 | } |
| 540 | } |
| 541 | } |
| 542 | |
| 543 | |
| 544 | /*************************************************************************** |
| 545 | Vector Store Instructions |
| 546 | ***************************************************************************/ |
| 547 | |
| 548 | void rsp_cop2::handle_swc2(UINT32 op) |
| 549 | { |
| 550 | int i, end; |
| 551 | int eaoffset; |
| 552 | UINT32 ea; |
| 553 | int dest = (op >> 16) & 0x1f; |
| 554 | int base = (op >> 21) & 0x1f; |
| 555 | int index = (op >> 7) & 0xf; |
| 556 | int offset = (op & 0x7f); |
| 557 | if (offset & 0x40) |
| 558 | offset |= 0xffffffc0; |
| 559 | |
| 560 | switch ((op >> 11) & 0x1f) |
| 561 | { |
| 562 | case 0x00: /* SBV */ |
| 563 | { |
| 564 | // 31 25 20 15 10 6 0 |
| 565 | // -------------------------------------------------- |
| 566 | // | 111010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 567 | // -------------------------------------------------- |
| 568 | // |
| 569 | // Stores 1 byte from vector byte index |
| 570 | |
| 571 | ea = (base) ? m_rsp.m_rsp_state->r[base] + offset : offset; |
| 572 | m_rsp.WRITE8(ea, VREG_B(dest, index)); |
| 573 | break; |
| 574 | } |
| 575 | case 0x01: /* SSV */ |
| 576 | { |
| 577 | // 31 25 20 15 10 6 0 |
| 578 | // -------------------------------------------------- |
| 579 | // | 111010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 580 | // -------------------------------------------------- |
| 581 | // |
| 582 | // Stores 2 bytes starting from vector byte index |
| 583 | |
| 584 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 585 | |
| 586 | end = index + 2; |
| 587 | |
| 588 | for (i=index; i < end; i++) |
| 589 | { |
| 590 | m_rsp.WRITE8(ea, VREG_B(dest, i)); |
| 591 | ea++; |
| 592 | } |
| 593 | break; |
| 594 | } |
| 595 | case 0x02: /* SLV */ |
| 596 | { |
| 597 | // 31 25 20 15 10 6 0 |
| 598 | // -------------------------------------------------- |
| 599 | // | 111010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 600 | // -------------------------------------------------- |
| 601 | // |
| 602 | // Stores 4 bytes starting from vector byte index |
| 603 | |
| 604 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 605 | |
| 606 | end = index + 4; |
| 607 | |
| 608 | for (i=index; i < end; i++) |
| 609 | { |
| 610 | m_rsp.WRITE8(ea, VREG_B(dest, i)); |
| 611 | ea++; |
| 612 | } |
| 613 | break; |
| 614 | } |
| 615 | case 0x03: /* SDV */ |
| 616 | { |
| 617 | // 31 25 20 15 10 6 0 |
| 618 | // -------------------------------------------------- |
| 619 | // | 111010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 620 | // -------------------------------------------------- |
| 621 | // |
| 622 | // Stores 8 bytes starting from vector byte index |
| 623 | |
| 624 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 625 | |
| 626 | end = index + 8; |
| 627 | |
| 628 | for (i=index; i < end; i++) |
| 629 | { |
| 630 | m_rsp.WRITE8(ea, VREG_B(dest, i)); |
| 631 | ea++; |
| 632 | } |
| 633 | break; |
| 634 | } |
| 635 | case 0x04: /* SQV */ |
| 636 | { |
| 637 | // 31 25 20 15 10 6 0 |
| 638 | // -------------------------------------------------- |
| 639 | // | 111010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 640 | // -------------------------------------------------- |
| 641 | // |
| 642 | // Stores up to 16 bytes starting from vector byte index until 16-byte boundary |
| 643 | |
| 644 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 645 | |
| 646 | end = index + (16 - (ea & 0xf)); |
| 647 | |
| 648 | for (i=index; i < end; i++) |
| 649 | { |
| 650 | m_rsp.WRITE8(ea, VREG_B(dest, i & 0xf)); |
| 651 | ea++; |
| 652 | } |
| 653 | break; |
| 654 | } |
| 655 | case 0x05: /* SRV */ |
| 656 | { |
| 657 | // 31 25 20 15 10 6 0 |
| 658 | // -------------------------------------------------- |
| 659 | // | 111010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 660 | // -------------------------------------------------- |
| 661 | // |
| 662 | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 663 | |
| 664 | int o; |
| 665 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 666 | |
| 667 | end = index + (ea & 0xf); |
| 668 | o = (16 - (ea & 0xf)) & 0xf; |
| 669 | ea &= ~0xf; |
| 670 | |
| 671 | for (i=index; i < end; i++) |
| 672 | { |
| 673 | m_rsp.WRITE8(ea, VREG_B(dest, ((i + o) & 0xf))); |
| 674 | ea++; |
| 675 | } |
| 676 | break; |
| 677 | } |
| 678 | case 0x06: /* SPV */ |
| 679 | { |
| 680 | // 31 25 20 15 10 6 0 |
| 681 | // -------------------------------------------------- |
| 682 | // | 111010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 683 | // -------------------------------------------------- |
| 684 | // |
| 685 | // Stores upper 8 bits of each element |
| 686 | |
| 687 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 688 | end = index + 8; |
| 689 | |
| 690 | for (i=index; i < end; i++) |
| 691 | { |
| 692 | if ((i & 0xf) < 8) |
| 693 | { |
| 694 | m_rsp.WRITE8(ea, VREG_B(dest, ((i & 0xf) << 1))); |
| 695 | } |
| 696 | else |
| 697 | { |
| 698 | m_rsp.WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); |
| 699 | } |
| 700 | ea++; |
| 701 | } |
| 702 | break; |
| 703 | } |
| 704 | case 0x07: /* SUV */ |
| 705 | { |
| 706 | // 31 25 20 15 10 6 0 |
| 707 | // -------------------------------------------------- |
| 708 | // | 111010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 709 | // -------------------------------------------------- |
| 710 | // |
| 711 | // Stores bits 14-7 of each element |
| 712 | |
| 713 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 714 | end = index + 8; |
| 715 | |
| 716 | for (i=index; i < end; i++) |
| 717 | { |
| 718 | if ((i & 0xf) < 8) |
| 719 | { |
| 720 | m_rsp.WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); |
| 721 | } |
| 722 | else |
| 723 | { |
| 724 | m_rsp.WRITE8(ea, VREG_B(dest, ((i & 0x7) << 1))); |
| 725 | } |
| 726 | ea++; |
| 727 | } |
| 728 | break; |
| 729 | } |
| 730 | case 0x08: /* SHV */ |
| 731 | { |
| 732 | // 31 25 20 15 10 6 0 |
| 733 | // -------------------------------------------------- |
| 734 | // | 111010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 735 | // -------------------------------------------------- |
| 736 | // |
| 737 | // Stores bits 14-7 of each element, with 2-byte stride |
| 738 | |
| 739 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 740 | |
| 741 | for (i=0; i < 8; i++) |
| 742 | { |
| 743 | UINT8 d = ((VREG_B(dest, ((index + (i << 1) + 0) & 0xf))) << 1) | |
| 744 | ((VREG_B(dest, ((index + (i << 1) + 1) & 0xf))) >> 7); |
| 745 | |
| 746 | m_rsp.WRITE8(ea, d); |
| 747 | ea += 2; |
| 748 | } |
| 749 | break; |
| 750 | } |
| 751 | case 0x09: /* SFV */ |
| 752 | { |
| 753 | // 31 25 20 15 10 6 0 |
| 754 | // -------------------------------------------------- |
| 755 | // | 111010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 756 | // -------------------------------------------------- |
| 757 | // |
| 758 | // Stores bits 14-7 of upper or lower quad, with 4-byte stride |
| 759 | |
| 760 | // FIXME: only works for index 0 and index 8 |
| 761 | |
| 762 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 763 | |
| 764 | eaoffset = ea & 0xf; |
| 765 | ea &= ~0xf; |
| 766 | |
| 767 | end = (index >> 1) + 4; |
| 768 | |
| 769 | for (i=index >> 1; i < end; i++) |
| 770 | { |
| 771 | m_rsp.WRITE8(ea + (eaoffset & 0xf), VREG_S(dest, i) >> 7); |
| 772 | eaoffset += 4; |
| 773 | } |
| 774 | break; |
| 775 | } |
| 776 | case 0x0a: /* SWV */ |
| 777 | { |
| 778 | // 31 25 20 15 10 6 0 |
| 779 | // -------------------------------------------------- |
| 780 | // | 111010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 781 | // -------------------------------------------------- |
| 782 | // |
| 783 | // Stores the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 784 | // after byte index 15 |
| 785 | |
| 786 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 787 | |
| 788 | eaoffset = ea & 0xf; |
| 789 | ea &= ~0xf; |
| 790 | |
| 791 | end = index + 16; |
| 792 | |
| 793 | for (i=index; i < end; i++) |
| 794 | { |
| 795 | m_rsp.WRITE8(ea + (eaoffset & 0xf), VREG_B(dest, i & 0xf)); |
| 796 | eaoffset++; |
| 797 | } |
| 798 | break; |
| 799 | } |
| 800 | case 0x0b: /* STV */ |
| 801 | { |
| 802 | // 31 25 20 15 10 6 0 |
| 803 | // -------------------------------------------------- |
| 804 | // | 111010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 805 | // -------------------------------------------------- |
| 806 | // |
| 807 | // Stores one element from maximum of 8 vectors, while incrementing element index |
| 808 | |
| 809 | int element; |
| 810 | int vs = dest; |
| 811 | int ve = dest + 8; |
| 812 | if (ve > 32) |
| 813 | ve = 32; |
| 814 | |
| 815 | element = 8 - (index >> 1); |
| 816 | |
| 817 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 818 | |
| 819 | eaoffset = (ea & 0xf) + (element * 2); |
| 820 | ea &= ~0xf; |
| 821 | |
| 822 | for (i=vs; i < ve; i++) |
| 823 | { |
| 824 | m_rsp.WRITE16(ea + (eaoffset & 0xf), VREG_S(i, element & 0x7)); |
| 825 | eaoffset += 2; |
| 826 | element++; |
| 827 | } |
| 828 | break; |
| 829 | } |
| 830 | |
| 831 | default: |
| 832 | { |
| 833 | m_rsp.unimplemented_opcode(op); |
| 834 | break; |
| 835 | } |
| 836 | } |
| 837 | } |
| 838 | |
| 839 | /*************************************************************************** |
| 840 | Vector Accumulator Helpers |
| 841 | ***************************************************************************/ |
| 842 | |
| 843 | inline UINT16 rsp_cop2::SATURATE_ACCUM1(int accum, UINT16 negative, UINT16 positive) |
| 844 | { |
| 845 | if ((INT16)ACCUM_H(accum) < 0) |
| 846 | { |
| 847 | if ((UINT16)(ACCUM_H(accum)) != 0xffff) |
| 848 | { |
| 849 | return negative; |
| 850 | } |
| 851 | else |
| 852 | { |
| 853 | if ((INT16)ACCUM_M(accum) >= 0) |
| 854 | { |
| 855 | return negative; |
| 856 | } |
| 857 | else |
| 858 | { |
| 859 | return ACCUM_M(accum); |
| 860 | } |
| 861 | } |
| 862 | } |
| 863 | else |
| 864 | { |
| 865 | if ((UINT16)(ACCUM_H(accum)) != 0) |
| 866 | { |
| 867 | return positive; |
| 868 | } |
| 869 | else |
| 870 | { |
| 871 | if ((INT16)ACCUM_M(accum) < 0) |
| 872 | { |
| 873 | return positive; |
| 874 | } |
| 875 | else |
| 876 | { |
| 877 | return ACCUM_M(accum); |
| 878 | } |
| 879 | } |
| 880 | } |
| 881 | } |
| 882 | |
| 883 | UINT16 rsp_cop2::SATURATE_ACCUM(int accum, int slice, UINT16 negative, UINT16 positive) |
| 884 | { |
| 885 | if ((INT16)ACCUM_H(accum) < 0) |
| 886 | { |
| 887 | if ((UINT16)(ACCUM_H(accum)) != 0xffff) |
| 888 | { |
| 889 | return negative; |
| 890 | } |
| 891 | else |
| 892 | { |
| 893 | if ((INT16)ACCUM_M(accum) >= 0) |
| 894 | { |
| 895 | return negative; |
| 896 | } |
| 897 | else |
| 898 | { |
| 899 | if (slice == 0) |
| 900 | { |
| 901 | return ACCUM_L(accum); |
| 902 | } |
| 903 | else if (slice == 1) |
| 904 | { |
| 905 | return ACCUM_M(accum); |
| 906 | } |
| 907 | } |
| 908 | } |
| 909 | } |
| 910 | else |
| 911 | { |
| 912 | if ((UINT16)(ACCUM_H(accum)) != 0) |
| 913 | { |
| 914 | return positive; |
| 915 | } |
| 916 | else |
| 917 | { |
| 918 | if ((INT16)ACCUM_M(accum) < 0) |
| 919 | { |
| 920 | return positive; |
| 921 | } |
| 922 | else |
| 923 | { |
| 924 | if (slice == 0) |
| 925 | { |
| 926 | return ACCUM_L(accum); |
| 927 | } |
| 928 | else |
| 929 | { |
| 930 | return ACCUM_M(accum); |
| 931 | } |
| 932 | } |
| 933 | } |
| 934 | } |
| 935 | return 0; |
| 936 | } |
| 937 | |
| 938 | |
| 939 | /*************************************************************************** |
| 940 | Vector Opcodes |
| 941 | ***************************************************************************/ |
| 942 | |
| 943 | void rsp_cop2::handle_vector_ops(UINT32 op) |
| 944 | { |
| 945 | int i; |
| 946 | |
| 947 | // Opcode legend: |
| 948 | // E = VS2 element type |
| 949 | // S = VS1, Source vector 1 |
| 950 | // T = VS2, Source vector 2 |
| 951 | // D = Destination vector |
| 952 | |
| 953 | switch (op & 0x3f) |
| 954 | { |
| 955 | case 0x00: /* VMULF */ |
| 956 | { |
| 957 | // 31 25 24 20 15 10 5 0 |
| 958 | // ------------------------------------------------------ |
| 959 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000000 | |
| 960 | // ------------------------------------------------------ |
| 961 | // |
| 962 | // Multiplies signed integer by signed integer * 2 |
| 963 | |
| 964 | for (i=0; i < 8; i++) |
| 965 | { |
| 966 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 967 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 968 | |
| 969 | if (s1 == -32768 && s2 == -32768) |
| 970 | { |
| 971 | // overflow |
| 972 | SET_ACCUM_H(0, i); |
| 973 | SET_ACCUM_M(-32768, i); |
| 974 | SET_ACCUM_L(-32768, i); |
| 975 | m_vres[i] = 0x7fff; |
| 976 | } |
| 977 | else |
| 978 | { |
| 979 | INT64 r = s1 * s2 * 2; |
| 980 | r += 0x8000; // rounding ? |
| 981 | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); // sign-extend to 48-bit |
| 982 | SET_ACCUM_M((INT16)(r >> 16), i); |
| 983 | SET_ACCUM_L((UINT16)(r), i); |
| 984 | m_vres[i] = ACCUM_M(i); |
| 985 | } |
| 986 | } |
| 987 | WRITEBACK_RESULT(); |
| 988 | |
| 989 | break; |
| 990 | } |
| 991 | |
| 992 | case 0x01: /* VMULU */ |
| 993 | { |
| 994 | // 31 25 24 20 15 10 5 0 |
| 995 | // ------------------------------------------------------ |
| 996 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000001 | |
| 997 | // ------------------------------------------------------ |
| 998 | // |
| 999 | |
| 1000 | for (i=0; i < 8; i++) |
| 1001 | { |
| 1002 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1003 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1004 | |
| 1005 | INT64 r = s1 * s2 * 2; |
| 1006 | r += 0x8000; // rounding ? |
| 1007 | |
| 1008 | SET_ACCUM_H((UINT16)(r >> 32), i); |
| 1009 | SET_ACCUM_M((UINT16)(r >> 16), i); |
| 1010 | SET_ACCUM_L((UINT16)(r), i); |
| 1011 | |
| 1012 | if (r < 0) |
| 1013 | { |
| 1014 | m_vres[i] = 0; |
| 1015 | } |
| 1016 | else if (((INT16)(ACCUM_H(i)) ^ (INT16)(ACCUM_M(i))) < 0) |
| 1017 | { |
| 1018 | m_vres[i] = -1; |
| 1019 | } |
| 1020 | else |
| 1021 | { |
| 1022 | m_vres[i] = ACCUM_M(i); |
| 1023 | } |
| 1024 | } |
| 1025 | WRITEBACK_RESULT(); |
| 1026 | break; |
| 1027 | } |
| 1028 | |
| 1029 | case 0x04: /* VMUDL */ |
| 1030 | { |
| 1031 | // 31 25 24 20 15 10 5 0 |
| 1032 | // ------------------------------------------------------ |
| 1033 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000100 | |
| 1034 | // ------------------------------------------------------ |
| 1035 | // |
| 1036 | // Multiplies unsigned fraction by unsigned fraction |
| 1037 | // Stores the higher 16 bits of the 32-bit result to accumulator |
| 1038 | // The low slice of accumulator is stored into destination element |
| 1039 | |
| 1040 | for (i=0; i < 8; i++) |
| 1041 | { |
| 1042 | UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1043 | UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1044 | UINT32 r = s1 * s2; |
| 1045 | |
| 1046 | SET_ACCUM_H(0, i); |
| 1047 | SET_ACCUM_M(0, i); |
| 1048 | SET_ACCUM_L((UINT16)(r >> 16), i); |
| 1049 | |
| 1050 | m_vres[i] = ACCUM_L(i); |
| 1051 | } |
| 1052 | WRITEBACK_RESULT(); |
| 1053 | break; |
| 1054 | } |
| 1055 | |
| 1056 | case 0x05: /* VMUDM */ |
| 1057 | { |
| 1058 | // 31 25 24 20 15 10 5 0 |
| 1059 | // ------------------------------------------------------ |
| 1060 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000101 | |
| 1061 | // ------------------------------------------------------ |
| 1062 | // |
| 1063 | // Multiplies signed integer by unsigned fraction |
| 1064 | // The result is stored into accumulator |
| 1065 | // The middle slice of accumulator is stored into destination element |
| 1066 | |
| 1067 | for (i=0; i < 8; i++) |
| 1068 | { |
| 1069 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1070 | INT32 s2 = (UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); // not sign-extended |
| 1071 | INT32 r = s1 * s2; |
| 1072 | |
| 1073 | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); // sign-extend to 48-bit |
| 1074 | SET_ACCUM_M((INT16)(r >> 16), i); |
| 1075 | SET_ACCUM_L((UINT16)(r), i); |
| 1076 | |
| 1077 | m_vres[i] = ACCUM_M(i); |
| 1078 | } |
| 1079 | WRITEBACK_RESULT(); |
| 1080 | break; |
| 1081 | |
| 1082 | } |
| 1083 | |
| 1084 | case 0x06: /* VMUDN */ |
| 1085 | { |
| 1086 | // 31 25 24 20 15 10 5 0 |
| 1087 | // ------------------------------------------------------ |
| 1088 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000110 | |
| 1089 | // ------------------------------------------------------ |
| 1090 | // |
| 1091 | // Multiplies unsigned fraction by signed integer |
| 1092 | // The result is stored into accumulator |
| 1093 | // The low slice of accumulator is stored into destination element |
| 1094 | |
| 1095 | for (i=0; i < 8; i++) |
| 1096 | { |
| 1097 | INT32 s1 = (UINT16)VREG_S(VS1REG, i); // not sign-extended |
| 1098 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1099 | INT32 r = s1 * s2; |
| 1100 | |
| 1101 | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); // sign-extend to 48-bit |
| 1102 | SET_ACCUM_M((INT16)(r >> 16), i); |
| 1103 | SET_ACCUM_L((UINT16)(r), i); |
| 1104 | |
| 1105 | m_vres[i] = ACCUM_L(i); |
| 1106 | } |
| 1107 | WRITEBACK_RESULT(); |
| 1108 | break; |
| 1109 | } |
| 1110 | |
| 1111 | case 0x07: /* VMUDH */ |
| 1112 | { |
| 1113 | // 31 25 24 20 15 10 5 0 |
| 1114 | // ------------------------------------------------------ |
| 1115 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000111 | |
| 1116 | // ------------------------------------------------------ |
| 1117 | // |
| 1118 | // Multiplies signed integer by signed integer |
| 1119 | // The result is stored into highest 32 bits of accumulator, the low slice is zero |
| 1120 | // The highest 32 bits of accumulator is saturated into destination element |
| 1121 | |
| 1122 | for (i=0; i < 8; i++) |
| 1123 | { |
| 1124 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1125 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1126 | INT32 r = s1 * s2; |
| 1127 | |
| 1128 | SET_ACCUM_H((INT16)(r >> 16), i); |
| 1129 | SET_ACCUM_M((UINT16)(r), i); |
| 1130 | SET_ACCUM_L(0, i); |
| 1131 | |
| 1132 | if (r < -32768) r = -32768; |
| 1133 | if (r > 32767) r = 32767; |
| 1134 | m_vres[i] = (INT16)(r); |
| 1135 | } |
| 1136 | WRITEBACK_RESULT(); |
| 1137 | break; |
| 1138 | } |
| 1139 | |
| 1140 | case 0x08: /* VMACF */ |
| 1141 | { |
| 1142 | // 31 25 24 20 15 10 5 0 |
| 1143 | // ------------------------------------------------------ |
| 1144 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001000 | |
| 1145 | // ------------------------------------------------------ |
| 1146 | // |
| 1147 | // Multiplies signed integer by signed integer * 2 |
| 1148 | // The result is added to accumulator |
| 1149 | |
| 1150 | for (i=0; i < 8; i++) |
| 1151 | { |
| 1152 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1153 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1154 | INT32 r = s1 * s2; |
| 1155 | |
| 1156 | UINT64 q = (UINT64)(UINT16)ACCUM_LL(i); |
| 1157 | q |= (((UINT64)(UINT16)ACCUM_L(i)) << 16); |
| 1158 | q |= (((UINT64)(UINT16)ACCUM_M(i)) << 32); |
| 1159 | q |= (((UINT64)(UINT16)ACCUM_H(i)) << 48); |
| 1160 | |
| 1161 | q += (INT64)(r) << 17; |
| 1162 | |
| 1163 | SET_ACCUM_LL((UINT16)q, i); |
| 1164 | SET_ACCUM_L((UINT16)(q >> 16), i); |
| 1165 | SET_ACCUM_M((UINT16)(q >> 32), i); |
| 1166 | SET_ACCUM_H((UINT16)(q >> 48), i); |
| 1167 | |
| 1168 | m_vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1169 | } |
| 1170 | WRITEBACK_RESULT(); |
| 1171 | break; |
| 1172 | } |
| 1173 | |
| 1174 | case 0x09: /* VMACU */ |
| 1175 | { |
| 1176 | // 31 25 24 20 15 10 5 0 |
| 1177 | // ------------------------------------------------------ |
| 1178 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001001 | |
| 1179 | // ------------------------------------------------------ |
| 1180 | // |
| 1181 | |
| 1182 | for (i = 0; i < 8; i++) |
| 1183 | { |
| 1184 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1185 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1186 | INT32 r1 = s1 * s2; |
| 1187 | UINT32 r2 = (UINT16)ACCUM_L(i) + ((UINT16)(r1) * 2); |
| 1188 | UINT32 r3 = (UINT16)ACCUM_M(i) + (UINT16)((r1 >> 16) * 2) + (UINT16)(r2 >> 16); |
| 1189 | |
| 1190 | SET_ACCUM_L((UINT16)(r2), i); |
| 1191 | SET_ACCUM_M((UINT16)(r3), i); |
| 1192 | SET_ACCUM_H(ACCUM_H(i) + (UINT16)(r3 >> 16) + (UINT16)(r1 >> 31), i); |
| 1193 | |
| 1194 | if ((INT16)ACCUM_H(i) < 0) |
| 1195 | { |
| 1196 | m_vres[i] = 0; |
| 1197 | } |
| 1198 | else |
| 1199 | { |
| 1200 | if (ACCUM_H(i) != 0) |
| 1201 | { |
| 1202 | m_vres[i] = 0xffff; |
| 1203 | } |
| 1204 | else |
| 1205 | { |
| 1206 | if ((INT16)ACCUM_M(i) < 0) |
| 1207 | { |
| 1208 | m_vres[i] = 0xffff; |
| 1209 | } |
| 1210 | else |
| 1211 | { |
| 1212 | m_vres[i] = ACCUM_M(i); |
| 1213 | } |
| 1214 | } |
| 1215 | } |
| 1216 | } |
| 1217 | WRITEBACK_RESULT(); |
| 1218 | break; |
| 1219 | } |
| 1220 | |
| 1221 | case 0x0c: /* VMADL */ |
| 1222 | { |
| 1223 | // 31 25 24 20 15 10 5 0 |
| 1224 | // ------------------------------------------------------ |
| 1225 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001100 | |
| 1226 | // ------------------------------------------------------ |
| 1227 | // |
| 1228 | // Multiplies unsigned fraction by unsigned fraction |
| 1229 | // Adds the higher 16 bits of the 32-bit result to accumulator |
| 1230 | // The low slice of accumulator is stored into destination element |
| 1231 | |
| 1232 | for (i = 0; i < 8; i++) |
| 1233 | { |
| 1234 | UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1235 | UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1236 | UINT32 r1 = s1 * s2; |
| 1237 | UINT32 r2 = (UINT16)ACCUM_L(i) + (r1 >> 16); |
| 1238 | UINT32 r3 = (UINT16)ACCUM_M(i) + (r2 >> 16); |
| 1239 | |
| 1240 | SET_ACCUM_L((UINT16)(r2), i); |
| 1241 | SET_ACCUM_M((UINT16)(r3), i); |
| 1242 | SET_ACCUM_H(ACCUM_H(i) + (INT16)(r3 >> 16), i); |
| 1243 | |
| 1244 | m_vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 1245 | } |
| 1246 | WRITEBACK_RESULT(); |
| 1247 | break; |
| 1248 | } |
| 1249 | |
| 1250 | case 0x0d: /* VMADM */ |
| 1251 | { |
| 1252 | // 31 25 24 20 15 10 5 0 |
| 1253 | // ------------------------------------------------------ |
| 1254 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 | |
| 1255 | // ------------------------------------------------------ |
| 1256 | // |
| 1257 | // Multiplies signed integer by unsigned fraction |
| 1258 | // The result is added into accumulator |
| 1259 | // The middle slice of accumulator is stored into destination element |
| 1260 | |
| 1261 | for (i=0; i < 8; i++) |
| 1262 | { |
| 1263 | UINT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1264 | UINT32 s2 = (UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); // not sign-extended |
| 1265 | UINT32 r1 = s1 * s2; |
| 1266 | UINT32 r2 = (UINT16)ACCUM_L(i) + (UINT16)(r1); |
| 1267 | UINT32 r3 = (UINT16)ACCUM_M(i) + (r1 >> 16) + (r2 >> 16); |
| 1268 | |
| 1269 | SET_ACCUM_L((UINT16)(r2), i); |
| 1270 | SET_ACCUM_M((UINT16)(r3), i); |
| 1271 | SET_ACCUM_H(ACCUM_H(i) + (UINT16)(r3 >> 16), i); |
| 1272 | if ((INT32)(r1) < 0) |
| 1273 | SET_ACCUM_H(i, ACCUM_H(i) - 1); |
| 1274 | |
| 1275 | m_vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1276 | } |
| 1277 | WRITEBACK_RESULT(); |
| 1278 | break; |
| 1279 | } |
| 1280 | |
| 1281 | case 0x0e: /* VMADN */ |
| 1282 | { |
| 1283 | // 31 25 24 20 15 10 5 0 |
| 1284 | // ------------------------------------------------------ |
| 1285 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001110 | |
| 1286 | // ------------------------------------------------------ |
| 1287 | // |
| 1288 | // Multiplies unsigned fraction by signed integer |
| 1289 | // The result is added into accumulator |
| 1290 | // The low slice of accumulator is stored into destination element |
| 1291 | |
| 1292 | for (i=0; i < 8; i++) |
| 1293 | { |
| 1294 | INT32 s1 = (UINT16)VREG_S(VS1REG, i); // not sign-extended |
| 1295 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1296 | |
| 1297 | UINT64 q = (UINT64)ACCUM_LL(i); |
| 1298 | q |= (((UINT64)ACCUM_L(i)) << 16); |
| 1299 | q |= (((UINT64)ACCUM_M(i)) << 32); |
| 1300 | q |= (((UINT64)ACCUM_H(i)) << 48); |
| 1301 | q += (INT64)(s1*s2) << 16; |
| 1302 | |
| 1303 | SET_ACCUM_LL((UINT16)q, i); |
| 1304 | SET_ACCUM_L((UINT16)(q >> 16), i); |
| 1305 | SET_ACCUM_M((UINT16)(q >> 32), i); |
| 1306 | SET_ACCUM_H((UINT16)(q >> 48), i); |
| 1307 | |
| 1308 | m_vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 1309 | } |
| 1310 | WRITEBACK_RESULT(); |
| 1311 | |
| 1312 | break; |
| 1313 | } |
| 1314 | |
| 1315 | case 0x0f: /* VMADH */ |
| 1316 | { |
| 1317 | // 31 25 24 20 15 10 5 0 |
| 1318 | // ------------------------------------------------------ |
| 1319 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001111 | |
| 1320 | // ------------------------------------------------------ |
| 1321 | // |
| 1322 | // Multiplies signed integer by signed integer |
| 1323 | // The result is added into highest 32 bits of accumulator, the low slice is zero |
| 1324 | // The highest 32 bits of accumulator is saturated into destination element |
| 1325 | |
| 1326 | for (i = 0; i < 8; i++) |
| 1327 | { |
| 1328 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1329 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1330 | |
| 1331 | INT32 accum = (UINT32)(UINT16)ACCUM_M(i); |
| 1332 | accum |= ((UINT32)((UINT16)ACCUM_H(i))) << 16; |
| 1333 | accum += s1 * s2; |
| 1334 | |
| 1335 | SET_ACCUM_H((UINT16)(accum >> 16), i); |
| 1336 | SET_ACCUM_M((UINT16)accum, i); |
| 1337 | |
| 1338 | m_vres[i] = SATURATE_ACCUM1(i, 0x8000, 0x7fff); |
| 1339 | } |
| 1340 | WRITEBACK_RESULT(); |
| 1341 | |
| 1342 | break; |
| 1343 | } |
| 1344 | |
| 1345 | case 0x10: /* VADD */ |
| 1346 | { |
| 1347 | // 31 25 24 20 15 10 5 0 |
| 1348 | // ------------------------------------------------------ |
| 1349 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010000 | |
| 1350 | // ------------------------------------------------------ |
| 1351 | // |
| 1352 | // Adds two vector registers and carry flag, the result is saturated to 32767 |
| 1353 | |
| 1354 | // TODO: check VS2REG == VDREG |
| 1355 | |
| 1356 | for (i=0; i < 8; i++) |
| 1357 | { |
| 1358 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1359 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1360 | INT32 r = s1 + s2 + (CARRY_FLAG(i) != 0 ? 1 : 0); |
| 1361 | |
| 1362 | SET_ACCUM_L((INT16)(r), i); |
| 1363 | |
| 1364 | if (r > 32767) r = 32767; |
| 1365 | if (r < -32768) r = -32768; |
| 1366 | m_vres[i] = (INT16)(r); |
| 1367 | } |
| 1368 | CLEAR_ZERO_FLAGS(); |
| 1369 | CLEAR_CARRY_FLAGS(); |
| 1370 | WRITEBACK_RESULT(); |
| 1371 | break; |
| 1372 | } |
| 1373 | |
| 1374 | case 0x11: /* VSUB */ |
| 1375 | { |
| 1376 | // 31 25 24 20 15 10 5 0 |
| 1377 | // ------------------------------------------------------ |
| 1378 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010001 | |
| 1379 | // ------------------------------------------------------ |
| 1380 | // |
| 1381 | // Subtracts two vector registers and carry flag, the result is saturated to -32768 |
| 1382 | |
| 1383 | // TODO: check VS2REG == VDREG |
| 1384 | |
| 1385 | for (i = 0; i < 8; i++) |
| 1386 | { |
| 1387 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1388 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1389 | INT32 r = s1 - s2 - (CARRY_FLAG(i) != 0 ? 1 : 0); |
| 1390 | |
| 1391 | SET_ACCUM_L((INT16)(r), i); |
| 1392 | |
| 1393 | if (r > 32767) r = 32767; |
| 1394 | if (r < -32768) r = -32768; |
| 1395 | |
| 1396 | m_vres[i] = (INT16)(r); |
| 1397 | } |
| 1398 | CLEAR_ZERO_FLAGS(); |
| 1399 | CLEAR_CARRY_FLAGS(); |
| 1400 | WRITEBACK_RESULT(); |
| 1401 | break; |
| 1402 | } |
| 1403 | |
| 1404 | case 0x13: /* VABS */ |
| 1405 | { |
| 1406 | // 31 25 24 20 15 10 5 0 |
| 1407 | // ------------------------------------------------------ |
| 1408 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010011 | |
| 1409 | // ------------------------------------------------------ |
| 1410 | // |
| 1411 | // Changes the sign of source register 2 if source register 1 is negative and stores |
| 1412 | // the result to destination register |
| 1413 | |
| 1414 | for (i=0; i < 8; i++) |
| 1415 | { |
| 1416 | INT16 s1 = (INT16)VREG_S(VS1REG, i); |
| 1417 | INT16 s2 = (INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1418 | |
| 1419 | if (s1 < 0) |
| 1420 | { |
| 1421 | if (s2 == -32768) |
| 1422 | { |
| 1423 | m_vres[i] = 32767; |
| 1424 | } |
| 1425 | else |
| 1426 | { |
| 1427 | m_vres[i] = -s2; |
| 1428 | } |
| 1429 | } |
| 1430 | else if (s1 > 0) |
| 1431 | { |
| 1432 | m_vres[i] = s2; |
| 1433 | } |
| 1434 | else |
| 1435 | { |
| 1436 | m_vres[i] = 0; |
| 1437 | } |
| 1438 | |
| 1439 | SET_ACCUM_L(m_vres[i], i); |
| 1440 | } |
| 1441 | WRITEBACK_RESULT(); |
| 1442 | break; |
| 1443 | } |
| 1444 | |
| 1445 | case 0x14: /* VADDC */ |
| 1446 | { |
| 1447 | // 31 25 24 20 15 10 5 0 |
| 1448 | // ------------------------------------------------------ |
| 1449 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010100 | |
| 1450 | // ------------------------------------------------------ |
| 1451 | // |
| 1452 | // Adds two vector registers, the carry out is stored into carry register |
| 1453 | |
| 1454 | // TODO: check VS2REG = VDREG |
| 1455 | |
| 1456 | CLEAR_ZERO_FLAGS(); |
| 1457 | CLEAR_CARRY_FLAGS(); |
| 1458 | |
| 1459 | for (i=0; i < 8; i++) |
| 1460 | { |
| 1461 | INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1462 | INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1463 | INT32 r = s1 + s2; |
| 1464 | |
| 1465 | m_vres[i] = (INT16)(r); |
| 1466 | SET_ACCUM_L((INT16)(r), i); |
| 1467 | |
| 1468 | if (r & 0xffff0000) |
| 1469 | { |
| 1470 | SET_CARRY_FLAG(i); |
| 1471 | } |
| 1472 | } |
| 1473 | WRITEBACK_RESULT(); |
| 1474 | break; |
| 1475 | } |
| 1476 | |
| 1477 | case 0x15: /* VSUBC */ |
| 1478 | { |
| 1479 | // 31 25 24 20 15 10 5 0 |
| 1480 | // ------------------------------------------------------ |
| 1481 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010101 | |
| 1482 | // ------------------------------------------------------ |
| 1483 | // |
| 1484 | // Subtracts two vector registers, the carry out is stored into carry register |
| 1485 | |
| 1486 | // TODO: check VS2REG = VDREG |
| 1487 | |
| 1488 | CLEAR_ZERO_FLAGS(); |
| 1489 | CLEAR_CARRY_FLAGS(); |
| 1490 | |
| 1491 | for (i=0; i < 8; i++) |
| 1492 | { |
| 1493 | INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1494 | INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1495 | INT32 r = s1 - s2; |
| 1496 | |
| 1497 | m_vres[i] = (INT16)(r); |
| 1498 | SET_ACCUM_L((UINT16)(r), i); |
| 1499 | |
| 1500 | if ((UINT16)(r) != 0) |
| 1501 | { |
| 1502 | SET_ZERO_FLAG(i); |
| 1503 | } |
| 1504 | if (r & 0xffff0000) |
| 1505 | { |
| 1506 | SET_CARRY_FLAG(i); |
| 1507 | } |
| 1508 | } |
| 1509 | WRITEBACK_RESULT(); |
| 1510 | break; |
| 1511 | } |
| 1512 | |
| 1513 | case 0x1d: /* VSAW */ |
| 1514 | { |
| 1515 | // 31 25 24 20 15 10 5 0 |
| 1516 | // ------------------------------------------------------ |
| 1517 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 011101 | |
| 1518 | // ------------------------------------------------------ |
| 1519 | // |
| 1520 | // Stores high, middle or low slice of accumulator to destination vector |
| 1521 | |
| 1522 | switch (EL) |
| 1523 | { |
| 1524 | case 0x08: // VSAWH |
| 1525 | { |
| 1526 | for (i=0; i < 8; i++) |
| 1527 | { |
| 1528 | VREG_S(VDREG, i) = ACCUM_H(i); |
| 1529 | } |
| 1530 | break; |
| 1531 | } |
| 1532 | case 0x09: // VSAWM |
| 1533 | { |
| 1534 | for (i=0; i < 8; i++) |
| 1535 | { |
| 1536 | VREG_S(VDREG, i) = ACCUM_M(i); |
| 1537 | } |
| 1538 | break; |
| 1539 | } |
| 1540 | case 0x0a: // VSAWL |
| 1541 | { |
| 1542 | for (i=0; i < 8; i++) |
| 1543 | { |
| 1544 | VREG_S(VDREG, i) = ACCUM_L(i); |
| 1545 | } |
| 1546 | break; |
| 1547 | } |
| 1548 | default: //fatalerror("RSP: VSAW: el = %d\n", EL);//??????? |
| 1549 | printf("RSP: VSAW: el = %d\n", EL);//??? ??? |
| 1550 | exit(0); |
| 1551 | } |
| 1552 | break; |
| 1553 | } |
| 1554 | |
| 1555 | case 0x20: /* VLT */ |
| 1556 | { |
| 1557 | // 31 25 24 20 15 10 5 0 |
| 1558 | // ------------------------------------------------------ |
| 1559 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100000 | |
| 1560 | // ------------------------------------------------------ |
| 1561 | // |
| 1562 | // Sets compare flags if elements in VS1 are less than VS2 |
| 1563 | // Moves the element in VS2 to destination vector |
| 1564 | |
| 1565 | CLEAR_COMPARE_FLAGS(); |
| 1566 | CLEAR_CLIP2_FLAGS(); |
| 1567 | |
| 1568 | for (i=0; i < 8; i++) |
| 1569 | { |
| 1570 | INT16 s1, s2; |
| 1571 | s1 = VREG_S(VS1REG, i); |
| 1572 | s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1573 | if (s1 < s2) |
| 1574 | { |
| 1575 | SET_COMPARE_FLAG(i); |
| 1576 | } |
| 1577 | else if (s1 == s2) |
| 1578 | { |
| 1579 | if (ZERO_FLAG(i) != 0 && CARRY_FLAG(i) != 0) |
| 1580 | { |
| 1581 | SET_COMPARE_FLAG(i); |
| 1582 | } |
| 1583 | } |
| 1584 | |
| 1585 | if (COMPARE_FLAG(i) != 0) |
| 1586 | { |
| 1587 | m_vres[i] = s1; |
| 1588 | } |
| 1589 | else |
| 1590 | { |
| 1591 | m_vres[i] = s2; |
| 1592 | } |
| 1593 | |
| 1594 | SET_ACCUM_L(m_vres[i], i); |
| 1595 | } |
| 1596 | |
| 1597 | CLEAR_CARRY_FLAGS(); |
| 1598 | CLEAR_ZERO_FLAGS(); |
| 1599 | WRITEBACK_RESULT(); |
| 1600 | break; |
| 1601 | } |
| 1602 | |
| 1603 | case 0x21: /* VEQ */ |
| 1604 | { |
| 1605 | // 31 25 24 20 15 10 5 0 |
| 1606 | // ------------------------------------------------------ |
| 1607 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100001 | |
| 1608 | // ------------------------------------------------------ |
| 1609 | // |
| 1610 | // Sets compare flags if elements in VS1 are equal with VS2 |
| 1611 | // Moves the element in VS2 to destination vector |
| 1612 | |
| 1613 | CLEAR_COMPARE_FLAGS(); |
| 1614 | CLEAR_CLIP2_FLAGS(); |
| 1615 | |
| 1616 | for (i = 0; i < 8; i++) |
| 1617 | { |
| 1618 | INT16 s1 = VREG_S(VS1REG, i); |
| 1619 | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1620 | |
| 1621 | if ((s1 == s2) && ZERO_FLAG(i) == 0) |
| 1622 | { |
| 1623 | SET_COMPARE_FLAG(i); |
| 1624 | m_vres[i] = s1; |
| 1625 | } |
| 1626 | else |
| 1627 | { |
| 1628 | m_vres[i] = s2; |
| 1629 | } |
| 1630 | SET_ACCUM_L(m_vres[i], i); |
| 1631 | } |
| 1632 | |
| 1633 | CLEAR_ZERO_FLAGS(); |
| 1634 | CLEAR_CARRY_FLAGS(); |
| 1635 | WRITEBACK_RESULT(); |
| 1636 | break; |
| 1637 | } |
| 1638 | |
| 1639 | case 0x22: /* VNE */ |
| 1640 | { |
| 1641 | // 31 25 24 20 15 10 5 0 |
| 1642 | // ------------------------------------------------------ |
| 1643 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100010 | |
| 1644 | // ------------------------------------------------------ |
| 1645 | // |
| 1646 | // Sets compare flags if elements in VS1 are not equal with VS2 |
| 1647 | // Moves the element in VS2 to destination vector |
| 1648 | |
| 1649 | CLEAR_COMPARE_FLAGS(); |
| 1650 | CLEAR_CLIP2_FLAGS(); |
| 1651 | |
| 1652 | for (i = 0; i < 8; i++) |
| 1653 | { |
| 1654 | INT16 s1 = VREG_S(VS1REG, i); |
| 1655 | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1656 | |
| 1657 | if (s1 != s2 || ZERO_FLAG(i) != 0) |
| 1658 | { |
| 1659 | SET_COMPARE_FLAG(i); |
| 1660 | m_vres[i] = s1; |
| 1661 | } |
| 1662 | else |
| 1663 | { |
| 1664 | m_vres[i] = s2; |
| 1665 | } |
| 1666 | |
| 1667 | SET_ACCUM_L(m_vres[i], i); |
| 1668 | } |
| 1669 | |
| 1670 | CLEAR_CARRY_FLAGS(); |
| 1671 | CLEAR_ZERO_FLAGS(); |
| 1672 | WRITEBACK_RESULT(); |
| 1673 | break; |
| 1674 | } |
| 1675 | |
| 1676 | case 0x23: /* VGE */ |
| 1677 | { |
| 1678 | // 31 25 24 20 15 10 5 0 |
| 1679 | // ------------------------------------------------------ |
| 1680 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100011 | |
| 1681 | // ------------------------------------------------------ |
| 1682 | // |
| 1683 | // Sets compare flags if elements in VS1 are greater or equal with VS2 |
| 1684 | // Moves the element in VS2 to destination vector |
| 1685 | |
| 1686 | CLEAR_COMPARE_FLAGS(); |
| 1687 | CLEAR_CLIP2_FLAGS(); |
| 1688 | |
| 1689 | for (i=0; i < 8; i++) |
| 1690 | { |
| 1691 | INT16 s1 = VREG_S(VS1REG, i); |
| 1692 | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1693 | |
| 1694 | if ((s1 == s2 && (ZERO_FLAG(i) == 0 || CARRY_FLAG(i) == 0)) || s1 > s2) |
| 1695 | { |
| 1696 | SET_COMPARE_FLAG(i); |
| 1697 | m_vres[i] = s1; |
| 1698 | } |
| 1699 | else |
| 1700 | { |
| 1701 | m_vres[i] = s2; |
| 1702 | } |
| 1703 | |
| 1704 | SET_ACCUM_L(m_vres[i], i); |
| 1705 | } |
| 1706 | |
| 1707 | CLEAR_CARRY_FLAGS(); |
| 1708 | CLEAR_ZERO_FLAGS(); |
| 1709 | WRITEBACK_RESULT(); |
| 1710 | break; |
| 1711 | } |
| 1712 | |
| 1713 | case 0x24: /* VCL */ |
| 1714 | { |
| 1715 | // 31 25 24 20 15 10 5 0 |
| 1716 | // ------------------------------------------------------ |
| 1717 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100100 | |
| 1718 | // ------------------------------------------------------ |
| 1719 | // |
| 1720 | // Vector clip low |
| 1721 | |
| 1722 | for (i = 0; i < 8; i++) |
| 1723 | { |
| 1724 | INT16 s1 = VREG_S(VS1REG, i); |
| 1725 | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1726 | |
| 1727 | if (CARRY_FLAG(i) != 0) |
| 1728 | { |
| 1729 | if (ZERO_FLAG(i) != 0) |
| 1730 | { |
| 1731 | if (COMPARE_FLAG(i) != 0) |
| 1732 | { |
| 1733 | SET_ACCUM_L(-(UINT16)s2, i); |
| 1734 | } |
| 1735 | else |
| 1736 | { |
| 1737 | SET_ACCUM_L(s1, i); |
| 1738 | } |
| 1739 | } |
| 1740 | else |
| 1741 | { |
| 1742 | if (CLIP1_FLAG(i) != 0) |
| 1743 | { |
| 1744 | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) > 0x10000) |
| 1745 | { |
| 1746 | SET_ACCUM_L(s1, i); |
| 1747 | CLEAR_COMPARE_FLAG(i); |
| 1748 | } |
| 1749 | else |
| 1750 | { |
| 1751 | SET_ACCUM_L(-((UINT16)s2), i); |
| 1752 | SET_COMPARE_FLAG(i); |
| 1753 | } |
| 1754 | } |
| 1755 | else |
| 1756 | { |
| 1757 | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) != 0) |
| 1758 | { |
| 1759 | SET_ACCUM_L(s1, i); |
| 1760 | CLEAR_COMPARE_FLAG(i); |
| 1761 | } |
| 1762 | else |
| 1763 | { |
| 1764 | SET_ACCUM_L(-((UINT16)s2), i); |
| 1765 | SET_COMPARE_FLAG(i); |
| 1766 | } |
| 1767 | } |
| 1768 | } |
| 1769 | } |
| 1770 | else |
| 1771 | { |
| 1772 | if (ZERO_FLAG(i) != 0) |
| 1773 | { |
| 1774 | if (CLIP2_FLAG(i) != 0) |
| 1775 | { |
| 1776 | SET_ACCUM_L(s2, i); |
| 1777 | } |
| 1778 | else |
| 1779 | { |
| 1780 | SET_ACCUM_L(s1, i); |
| 1781 | } |
| 1782 | } |
| 1783 | else |
| 1784 | { |
| 1785 | if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0) |
| 1786 | { |
| 1787 | SET_ACCUM_L(s2, i); |
| 1788 | SET_CLIP2_FLAG(i); |
| 1789 | } |
| 1790 | else |
| 1791 | { |
| 1792 | SET_ACCUM_L(s1, i); |
| 1793 | CLEAR_CLIP2_FLAG(i); |
| 1794 | } |
| 1795 | } |
| 1796 | } |
| 1797 | |
| 1798 | m_vres[i] = ACCUM_L(i); |
| 1799 | } |
| 1800 | CLEAR_CARRY_FLAGS(); |
| 1801 | CLEAR_ZERO_FLAGS(); |
| 1802 | CLEAR_CLIP1_FLAGS(); |
| 1803 | WRITEBACK_RESULT(); |
| 1804 | break; |
| 1805 | } |
| 1806 | |
| 1807 | case 0x25: /* VCH */ |
| 1808 | { |
| 1809 | // 31 25 24 20 15 10 5 0 |
| 1810 | // ------------------------------------------------------ |
| 1811 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100101 | |
| 1812 | // ------------------------------------------------------ |
| 1813 | // |
| 1814 | // Vector clip high |
| 1815 | |
| 1816 | CLEAR_CARRY_FLAGS(); |
| 1817 | CLEAR_COMPARE_FLAGS(); |
| 1818 | CLEAR_CLIP1_FLAGS(); |
| 1819 | CLEAR_ZERO_FLAGS(); |
| 1820 | CLEAR_CLIP2_FLAGS(); |
| 1821 | UINT32 vce = 0; |
| 1822 | |
| 1823 | for (i=0; i < 8; i++) |
| 1824 | { |
| 1825 | INT16 s1 = VREG_S(VS1REG, i); |
| 1826 | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1827 | |
| 1828 | if ((s1 ^ s2) < 0) |
| 1829 | { |
| 1830 | vce = (s1 + s2 == -1); |
| 1831 | SET_CARRY_FLAG(i); |
| 1832 | if (s2 < 0) |
| 1833 | { |
| 1834 | SET_CLIP2_FLAG(i); |
| 1835 | } |
| 1836 | |
| 1837 | if (s1 + s2 <= 0) |
| 1838 | { |
| 1839 | SET_COMPARE_FLAG(i); |
| 1840 | m_vres[i] = -((UINT16)s2); |
| 1841 | } |
| 1842 | else |
| 1843 | { |
| 1844 | m_vres[i] = s1; |
| 1845 | } |
| 1846 | |
| 1847 | if (s1 + s2 != 0) |
| 1848 | { |
| 1849 | if (s1 != ~s2) |
| 1850 | { |
| 1851 | SET_ZERO_FLAG(i); |
| 1852 | } |
| 1853 | } |
| 1854 | } |
| 1855 | else |
| 1856 | { |
| 1857 | vce = 0; |
| 1858 | if (s2 < 0) |
| 1859 | { |
| 1860 | SET_COMPARE_FLAG(i); |
| 1861 | } |
| 1862 | if (s1 - s2 >= 0) |
| 1863 | { |
| 1864 | SET_CLIP2_FLAG(i); |
| 1865 | m_vres[i] = s2; |
| 1866 | } |
| 1867 | else |
| 1868 | { |
| 1869 | m_vres[i] = s1; |
| 1870 | } |
| 1871 | |
| 1872 | if ((s1 - s2) != 0) |
| 1873 | { |
| 1874 | if (s1 != ~s2) |
| 1875 | { |
| 1876 | SET_ZERO_FLAG(i); |
| 1877 | } |
| 1878 | } |
| 1879 | } |
| 1880 | if (vce != 0) |
| 1881 | { |
| 1882 | SET_CLIP1_FLAG(i); |
| 1883 | } |
| 1884 | |
| 1885 | SET_ACCUM_L(m_vres[i], i); |
| 1886 | } |
| 1887 | WRITEBACK_RESULT(); |
| 1888 | break; |
| 1889 | } |
| 1890 | |
| 1891 | case 0x26: /* VCR */ |
| 1892 | { |
| 1893 | // 31 25 24 20 15 10 5 0 |
| 1894 | // ------------------------------------------------------ |
| 1895 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100110 | |
| 1896 | // ------------------------------------------------------ |
| 1897 | // |
| 1898 | // Vector clip reverse |
| 1899 | |
| 1900 | CLEAR_CARRY_FLAGS(); |
| 1901 | CLEAR_COMPARE_FLAGS(); |
| 1902 | CLEAR_CLIP1_FLAGS(); |
| 1903 | CLEAR_ZERO_FLAGS(); |
| 1904 | CLEAR_CLIP2_FLAGS(); |
| 1905 | |
| 1906 | for (i=0; i < 8; i++) |
| 1907 | { |
| 1908 | INT16 s1 = VREG_S(VS1REG, i); |
| 1909 | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1910 | |
| 1911 | if ((INT16)(s1 ^ s2) < 0) |
| 1912 | { |
| 1913 | if (s2 < 0) |
| 1914 | { |
| 1915 | SET_CLIP2_FLAG(i); |
| 1916 | } |
| 1917 | if ((s1 + s2) <= 0) |
| 1918 | { |
| 1919 | SET_ACCUM_L(~((UINT16)s2), i); |
| 1920 | SET_COMPARE_FLAG(i); |
| 1921 | } |
| 1922 | else |
| 1923 | { |
| 1924 | SET_ACCUM_L(s1, i); |
| 1925 | } |
| 1926 | } |
| 1927 | else |
| 1928 | { |
| 1929 | if (s2 < 0) |
| 1930 | { |
| 1931 | SET_COMPARE_FLAG(i); |
| 1932 | } |
| 1933 | if ((s1 - s2) >= 0) |
| 1934 | { |
| 1935 | SET_ACCUM_L(s2, i); |
| 1936 | SET_CLIP2_FLAG(i); |
| 1937 | } |
| 1938 | else |
| 1939 | { |
| 1940 | SET_ACCUM_L(s1, i); |
| 1941 | } |
| 1942 | } |
| 1943 | |
| 1944 | m_vres[i] = ACCUM_L(i); |
| 1945 | } |
| 1946 | WRITEBACK_RESULT(); |
| 1947 | break; |
| 1948 | } |
| 1949 | |
| 1950 | case 0x27: /* VMRG */ |
| 1951 | { |
| 1952 | // 31 25 24 20 15 10 5 0 |
| 1953 | // ------------------------------------------------------ |
| 1954 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100111 | |
| 1955 | // ------------------------------------------------------ |
| 1956 | // |
| 1957 | // Merges two vectors according to compare flags |
| 1958 | |
| 1959 | for (i = 0; i < 8; i++) |
| 1960 | { |
| 1961 | if (COMPARE_FLAG(i) != 0) |
| 1962 | { |
| 1963 | m_vres[i] = VREG_S(VS1REG, i); |
| 1964 | } |
| 1965 | else |
| 1966 | { |
| 1967 | m_vres[i] = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1968 | } |
| 1969 | |
| 1970 | SET_ACCUM_L(m_vres[i], i); |
| 1971 | } |
| 1972 | WRITEBACK_RESULT(); |
| 1973 | break; |
| 1974 | } |
| 1975 | case 0x28: /* VAND */ |
| 1976 | { |
| 1977 | // 31 25 24 20 15 10 5 0 |
| 1978 | // ------------------------------------------------------ |
| 1979 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101000 | |
| 1980 | // ------------------------------------------------------ |
| 1981 | // |
| 1982 | // Bitwise AND of two vector registers |
| 1983 | |
| 1984 | for (i = 0; i < 8; i++) |
| 1985 | { |
| 1986 | m_vres[i] = VREG_S(VS1REG, i) & VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1987 | SET_ACCUM_L(m_vres[i], i); |
| 1988 | } |
| 1989 | WRITEBACK_RESULT(); |
| 1990 | break; |
| 1991 | } |
| 1992 | case 0x29: /* VNAND */ |
| 1993 | { |
| 1994 | // 31 25 24 20 15 10 5 0 |
| 1995 | // ------------------------------------------------------ |
| 1996 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101001 | |
| 1997 | // ------------------------------------------------------ |
| 1998 | // |
| 1999 | // Bitwise NOT AND of two vector registers |
| 2000 | |
| 2001 | for (i = 0; i < 8; i++) |
| 2002 | { |
| 2003 | m_vres[i] = ~((VREG_S(VS1REG, i) & VREG_S(VS2REG, VEC_EL_2(EL, i)))); |
| 2004 | SET_ACCUM_L(m_vres[i], i); |
| 2005 | } |
| 2006 | WRITEBACK_RESULT(); |
| 2007 | break; |
| 2008 | } |
| 2009 | case 0x2a: /* VOR */ |
| 2010 | { |
| 2011 | // 31 25 24 20 15 10 5 0 |
| 2012 | // ------------------------------------------------------ |
| 2013 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101010 | |
| 2014 | // ------------------------------------------------------ |
| 2015 | // |
| 2016 | // Bitwise OR of two vector registers |
| 2017 | |
| 2018 | for (i = 0; i < 8; i++) |
| 2019 | { |
| 2020 | m_vres[i] = VREG_S(VS1REG, i) | VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2021 | SET_ACCUM_L(m_vres[i], i); |
| 2022 | } |
| 2023 | WRITEBACK_RESULT(); |
| 2024 | break; |
| 2025 | } |
| 2026 | case 0x2b: /* VNOR */ |
| 2027 | { |
| 2028 | // 31 25 24 20 15 10 5 0 |
| 2029 | // ------------------------------------------------------ |
| 2030 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101011 | |
| 2031 | // ------------------------------------------------------ |
| 2032 | // |
| 2033 | // Bitwise NOT OR of two vector registers |
| 2034 | |
| 2035 | for (i=0; i < 8; i++) |
| 2036 | { |
| 2037 | m_vres[i] = ~((VREG_S(VS1REG, i) | VREG_S(VS2REG, VEC_EL_2(EL, i)))); |
| 2038 | SET_ACCUM_L(m_vres[i], i); |
| 2039 | } |
| 2040 | WRITEBACK_RESULT(); |
| 2041 | break; |
| 2042 | } |
| 2043 | case 0x2c: /* VXOR */ |
| 2044 | { |
| 2045 | // 31 25 24 20 15 10 5 0 |
| 2046 | // ------------------------------------------------------ |
| 2047 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101100 | |
| 2048 | // ------------------------------------------------------ |
| 2049 | // |
| 2050 | // Bitwise XOR of two vector registers |
| 2051 | |
| 2052 | for (i=0; i < 8; i++) |
| 2053 | { |
| 2054 | m_vres[i] = VREG_S(VS1REG, i) ^ VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2055 | SET_ACCUM_L(m_vres[i], i); |
| 2056 | } |
| 2057 | WRITEBACK_RESULT(); |
| 2058 | break; |
| 2059 | } |
| 2060 | case 0x2d: /* VNXOR */ |
| 2061 | { |
| 2062 | // 31 25 24 20 15 10 5 0 |
| 2063 | // ------------------------------------------------------ |
| 2064 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101101 | |
| 2065 | // ------------------------------------------------------ |
| 2066 | // |
| 2067 | // Bitwise NOT XOR of two vector registers |
| 2068 | |
| 2069 | for (i=0; i < 8; i++) |
| 2070 | { |
| 2071 | m_vres[i] = ~((VREG_S(VS1REG, i) ^ VREG_S(VS2REG, VEC_EL_2(EL, i)))); |
| 2072 | SET_ACCUM_L(m_vres[i], i); |
| 2073 | } |
| 2074 | WRITEBACK_RESULT(); |
| 2075 | break; |
| 2076 | } |
| 2077 | |
| 2078 | case 0x30: /* VRCP */ |
| 2079 | { |
| 2080 | // 31 25 24 20 15 10 5 0 |
| 2081 | // ------------------------------------------------------ |
| 2082 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110000 | |
| 2083 | // ------------------------------------------------------ |
| 2084 | // |
| 2085 | // Calculates reciprocal |
| 2086 | INT32 shifter = 0; |
| 2087 | |
| 2088 | INT32 rec = (INT16)(VREG_S(VS2REG, EL & 7)); |
| 2089 | INT32 datainput = (rec < 0) ? (-rec) : rec; |
| 2090 | if (datainput) |
| 2091 | { |
| 2092 | for (i = 0; i < 32; i++) |
| 2093 | { |
| 2094 | if (datainput & (1 << ((~i) & 0x1f))) |
| 2095 | { |
| 2096 | shifter = i; |
| 2097 | break; |
| 2098 | } |
| 2099 | } |
| 2100 | } |
| 2101 | else |
| 2102 | { |
| 2103 | shifter = 0x10; |
| 2104 | } |
| 2105 | |
| 2106 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2107 | INT32 fetchval = rsp_divtable[address]; |
| 2108 | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 2109 | if (rec < 0) |
| 2110 | { |
| 2111 | temp = ~temp; |
| 2112 | } |
| 2113 | if (!rec) |
| 2114 | { |
| 2115 | temp = 0x7fffffff; |
| 2116 | } |
| 2117 | else if (rec == 0xffff8000) |
| 2118 | { |
| 2119 | temp = 0xffff0000; |
| 2120 | } |
| 2121 | rec = temp; |
| 2122 | |
| 2123 | m_reciprocal_res = rec; |
| 2124 | m_dp_allowed = 0; |
| 2125 | |
| 2126 | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 2127 | |
| 2128 | for (i = 0; i < 8; i++) |
| 2129 | { |
| 2130 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 2131 | } |
| 2132 | |
| 2133 | |
| 2134 | break; |
| 2135 | } |
| 2136 | |
| 2137 | case 0x31: /* VRCPL */ |
| 2138 | { |
| 2139 | // 31 25 24 20 15 10 5 0 |
| 2140 | // ------------------------------------------------------ |
| 2141 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110001 | |
| 2142 | // ------------------------------------------------------ |
| 2143 | // |
| 2144 | // Calculates reciprocal low part |
| 2145 | |
| 2146 | INT32 shifter = 0; |
| 2147 | |
| 2148 | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 2149 | INT32 datainput = rec; |
| 2150 | |
| 2151 | if (m_dp_allowed) |
| 2152 | { |
| 2153 | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 2154 | datainput = rec; |
| 2155 | |
| 2156 | if (rec < 0) |
| 2157 | { |
| 2158 | if (rec < -32768) |
| 2159 | { |
| 2160 | datainput = ~datainput; |
| 2161 | } |
| 2162 | else |
| 2163 | { |
| 2164 | datainput = -datainput; |
| 2165 | } |
| 2166 | } |
| 2167 | } |
| 2168 | else if (datainput < 0) |
| 2169 | { |
| 2170 | datainput = -datainput; |
| 2171 | |
| 2172 | shifter = 0x10; |
| 2173 | } |
| 2174 | |
| 2175 | |
| 2176 | for (i = 0; i < 32; i++) |
| 2177 | { |
| 2178 | if (datainput & (1 << ((~i) & 0x1f))) |
| 2179 | { |
| 2180 | shifter = i; |
| 2181 | break; |
| 2182 | } |
| 2183 | } |
| 2184 | |
| 2185 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2186 | INT32 fetchval = rsp_divtable[address]; |
| 2187 | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 2188 | temp ^= rec >> 31; |
| 2189 | |
| 2190 | if (!rec) |
| 2191 | { |
| 2192 | temp = 0x7fffffff; |
| 2193 | } |
| 2194 | else if (rec == 0xffff8000) |
| 2195 | { |
| 2196 | temp = 0xffff0000; |
| 2197 | } |
| 2198 | rec = temp; |
| 2199 | |
| 2200 | m_reciprocal_res = rec; |
| 2201 | m_dp_allowed = 0; |
| 2202 | |
| 2203 | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 2204 | |
| 2205 | for (i = 0; i < 8; i++) |
| 2206 | { |
| 2207 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 2208 | } |
| 2209 | |
| 2210 | break; |
| 2211 | } |
| 2212 | |
| 2213 | case 0x32: /* VRCPH */ |
| 2214 | { |
| 2215 | // 31 25 24 20 15 10 5 0 |
| 2216 | // ------------------------------------------------------ |
| 2217 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110010 | |
| 2218 | // ------------------------------------------------------ |
| 2219 | // |
| 2220 | // Calculates reciprocal high part |
| 2221 | |
| 2222 | m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16; |
| 2223 | m_dp_allowed = 1; |
| 2224 | |
| 2225 | for (i = 0; i < 8; i++) |
| 2226 | { |
| 2227 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 2228 | } |
| 2229 | |
| 2230 | VREG_S(VDREG, VS1REG & 7) = (INT16)(m_reciprocal_res >> 16); |
| 2231 | |
| 2232 | break; |
| 2233 | } |
| 2234 | |
| 2235 | case 0x33: /* VMOV */ |
| 2236 | { |
| 2237 | // 31 25 24 20 15 10 5 0 |
| 2238 | // ------------------------------------------------------ |
| 2239 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110011 | |
| 2240 | // ------------------------------------------------------ |
| 2241 | // |
| 2242 | // Moves element from vector to destination vector |
| 2243 | |
| 2244 | VREG_S(VDREG, VS1REG & 7) = VREG_S(VS2REG, EL & 7); |
| 2245 | for (i = 0; i < 8; i++) |
| 2246 | { |
| 2247 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 2248 | } |
| 2249 | break; |
| 2250 | } |
| 2251 | |
| 2252 | case 0x34: /* VRSQ */ |
| 2253 | { |
| 2254 | // 31 25 24 20 15 10 5 0 |
| 2255 | // ------------------------------------------------------ |
| 2256 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110100 | |
| 2257 | // ------------------------------------------------------ |
| 2258 | // |
| 2259 | // Calculates reciprocal square-root |
| 2260 | |
| 2261 | INT32 shifter = 0; |
| 2262 | |
| 2263 | INT32 rec = (INT16)(VREG_S(VS2REG, EL & 7)); |
| 2264 | INT32 datainput = (rec < 0) ? (-rec) : rec; |
| 2265 | if (datainput) |
| 2266 | { |
| 2267 | for (i = 0; i < 32; i++) |
| 2268 | { |
| 2269 | if (datainput & (1 << ((~i) & 0x1f)))//?.?.??? 31 - i |
| 2270 | { |
| 2271 | shifter = i; |
| 2272 | break; |
| 2273 | } |
| 2274 | } |
| 2275 | } |
| 2276 | else |
| 2277 | { |
| 2278 | shifter = 0x10; |
| 2279 | } |
| 2280 | |
| 2281 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2282 | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 2283 | |
| 2284 | INT32 fetchval = rsp_divtable[address]; |
| 2285 | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 2286 | if (rec < 0) |
| 2287 | { |
| 2288 | temp = ~temp; |
| 2289 | } |
| 2290 | if (!rec) |
| 2291 | { |
| 2292 | temp = 0x7fffffff; |
| 2293 | } |
| 2294 | else if (rec == 0xffff8000) |
| 2295 | { |
| 2296 | temp = 0xffff0000; |
| 2297 | } |
| 2298 | rec = temp; |
| 2299 | |
| 2300 | m_reciprocal_res = rec; |
| 2301 | m_dp_allowed = 0; |
| 2302 | |
| 2303 | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 2304 | |
| 2305 | for (i = 0; i < 8; i++) |
| 2306 | { |
| 2307 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 2308 | } |
| 2309 | |
| 2310 | break; |
| 2311 | } |
| 2312 | |
| 2313 | case 0x35: /* VRSQL */ |
| 2314 | { |
| 2315 | // 31 25 24 20 15 10 5 0 |
| 2316 | // ------------------------------------------------------ |
| 2317 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110101 | |
| 2318 | // ------------------------------------------------------ |
| 2319 | // |
| 2320 | // Calculates reciprocal square-root low part |
| 2321 | |
| 2322 | INT32 shifter = 0; |
| 2323 | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 2324 | INT32 datainput = rec; |
| 2325 | |
| 2326 | if (m_dp_allowed) |
| 2327 | { |
| 2328 | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 2329 | datainput = rec; |
| 2330 | |
| 2331 | if (rec < 0) |
| 2332 | { |
| 2333 | if (rec < -32768) |
| 2334 | { |
| 2335 | datainput = ~datainput; |
| 2336 | } |
| 2337 | else |
| 2338 | { |
| 2339 | datainput = -datainput; |
| 2340 | } |
| 2341 | } |
| 2342 | } |
| 2343 | else if (datainput < 0) |
| 2344 | { |
| 2345 | datainput = -datainput; |
| 2346 | |
| 2347 | shifter = 0x10; |
| 2348 | } |
| 2349 | |
| 2350 | if (datainput) |
| 2351 | { |
| 2352 | for (i = 0; i < 32; i++) |
| 2353 | { |
| 2354 | if (datainput & (1 << ((~i) & 0x1f))) |
| 2355 | { |
| 2356 | shifter = i; |
| 2357 | break; |
| 2358 | } |
| 2359 | } |
| 2360 | } |
| 2361 | |
| 2362 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2363 | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 2364 | |
| 2365 | INT32 fetchval = rsp_divtable[address]; |
| 2366 | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 2367 | temp ^= rec >> 31; |
| 2368 | |
| 2369 | if (!rec) |
| 2370 | { |
| 2371 | temp = 0x7fffffff; |
| 2372 | } |
| 2373 | else if (rec == 0xffff8000) |
| 2374 | { |
| 2375 | temp = 0xffff0000; |
| 2376 | } |
| 2377 | rec = temp; |
| 2378 | |
| 2379 | m_reciprocal_res = rec; |
| 2380 | m_dp_allowed = 0; |
| 2381 | |
| 2382 | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 2383 | |
| 2384 | for (i = 0; i < 8; i++) |
| 2385 | { |
| 2386 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 2387 | } |
| 2388 | |
| 2389 | break; |
| 2390 | } |
| 2391 | |
| 2392 | case 0x36: /* VRSQH */ |
| 2393 | { |
| 2394 | // 31 25 24 20 15 10 5 0 |
| 2395 | // ------------------------------------------------------ |
| 2396 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110110 | |
| 2397 | // ------------------------------------------------------ |
| 2398 | // |
| 2399 | // Calculates reciprocal square-root high part |
| 2400 | |
| 2401 | m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16; |
| 2402 | m_dp_allowed = 1; |
| 2403 | |
| 2404 | for (i=0; i < 8; i++) |
| 2405 | { |
| 2406 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 2407 | } |
| 2408 | |
| 2409 | VREG_S(VDREG, VS1REG & 7) = (INT16)(m_reciprocal_res >> 16); // store high part |
| 2410 | break; |
| 2411 | } |
| 2412 | |
| 2413 | case 0x37: /* VNOP */ |
| 2414 | { |
| 2415 | // 31 25 24 20 15 10 5 0 |
| 2416 | // ------------------------------------------------------ |
| 2417 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110111 | |
| 2418 | // ------------------------------------------------------ |
| 2419 | // |
| 2420 | // Vector null instruction |
| 2421 | |
| 2422 | break; |
| 2423 | } |
| 2424 | |
| 2425 | default: m_rsp.unimplemented_opcode(op); break; |
| 2426 | } |
| 2427 | } |
| 2428 | |
| 2429 | /*************************************************************************** |
| 2430 | Vector Flag Reading/Writing |
| 2431 | ***************************************************************************/ |
| 2432 | |
| 2433 | void rsp_cop2::handle_cop2(UINT32 op) |
| 2434 | { |
| 2435 | switch ((op >> 21) & 0x1f) |
| 2436 | { |
| 2437 | case 0x00: /* MFC2 */ |
| 2438 | { |
| 2439 | // 31 25 20 15 10 6 0 |
| 2440 | // --------------------------------------------------- |
| 2441 | // | 010010 | 00000 | TTTTT | DDDDD | IIII | 0000000 | |
| 2442 | // --------------------------------------------------- |
| 2443 | // |
| 2444 | int el = (op >> 7) & 0xf; |
| 2445 | UINT16 b1 = VREG_B(RDREG, (el+0) & 0xf); |
| 2446 | UINT16 b2 = VREG_B(RDREG, (el+1) & 0xf); |
| 2447 | if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2)); |
| 2448 | break; |
| 2449 | } |
| 2450 | |
| 2451 | case 0x02: /* CFC2 */ |
| 2452 | { |
| 2453 | // 31 25 20 15 10 0 |
| 2454 | // ------------------------------------------------ |
| 2455 | // | 010010 | 00010 | TTTTT | DDDDD | 00000000000 | |
| 2456 | // ------------------------------------------------ |
| 2457 | // |
| 2458 | if (RTREG) |
| 2459 | { |
| 2460 | switch(RDREG) |
| 2461 | { |
| 2462 | case 0: |
| 2463 | RTVAL = ((CARRY_FLAG(0) & 1) << 0) | |
| 2464 | ((CARRY_FLAG(1) & 1) << 1) | |
| 2465 | ((CARRY_FLAG(2) & 1) << 2) | |
| 2466 | ((CARRY_FLAG(3) & 1) << 3) | |
| 2467 | ((CARRY_FLAG(4) & 1) << 4) | |
| 2468 | ((CARRY_FLAG(5) & 1) << 5) | |
| 2469 | ((CARRY_FLAG(6) & 1) << 6) | |
| 2470 | ((CARRY_FLAG(7) & 1) << 7) | |
| 2471 | ((ZERO_FLAG(0) & 1) << 8) | |
| 2472 | ((ZERO_FLAG(1) & 1) << 9) | |
| 2473 | ((ZERO_FLAG(2) & 1) << 10) | |
| 2474 | ((ZERO_FLAG(3) & 1) << 11) | |
| 2475 | ((ZERO_FLAG(4) & 1) << 12) | |
| 2476 | ((ZERO_FLAG(5) & 1) << 13) | |
| 2477 | ((ZERO_FLAG(6) & 1) << 14) | |
| 2478 | ((ZERO_FLAG(7) & 1) << 15); |
| 2479 | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 2480 | break; |
| 2481 | case 1: |
| 2482 | RTVAL = ((COMPARE_FLAG(0) & 1) << 0) | |
| 2483 | ((COMPARE_FLAG(1) & 1) << 1) | |
| 2484 | ((COMPARE_FLAG(2) & 1) << 2) | |
| 2485 | ((COMPARE_FLAG(3) & 1) << 3) | |
| 2486 | ((COMPARE_FLAG(4) & 1) << 4) | |
| 2487 | ((COMPARE_FLAG(5) & 1) << 5) | |
| 2488 | ((COMPARE_FLAG(6) & 1) << 6) | |
| 2489 | ((COMPARE_FLAG(7) & 1) << 7) | |
| 2490 | ((CLIP2_FLAG(0) & 1) << 8) | |
| 2491 | ((CLIP2_FLAG(1) & 1) << 9) | |
| 2492 | ((CLIP2_FLAG(2) & 1) << 10) | |
| 2493 | ((CLIP2_FLAG(3) & 1) << 11) | |
| 2494 | ((CLIP2_FLAG(4) & 1) << 12) | |
| 2495 | ((CLIP2_FLAG(5) & 1) << 13) | |
| 2496 | ((CLIP2_FLAG(6) & 1) << 14) | |
| 2497 | ((CLIP2_FLAG(7) & 1) << 15); |
| 2498 | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 2499 | break; |
| 2500 | case 2: |
| 2501 | // Anciliary clipping flags |
| 2502 | RTVAL = ((CLIP1_FLAG(0) & 1) << 0) | |
| 2503 | ((CLIP1_FLAG(1) & 1) << 1) | |
| 2504 | ((CLIP1_FLAG(2) & 1) << 2) | |
| 2505 | ((CLIP1_FLAG(3) & 1) << 3) | |
| 2506 | ((CLIP1_FLAG(4) & 1) << 4) | |
| 2507 | ((CLIP1_FLAG(5) & 1) << 5) | |
| 2508 | ((CLIP1_FLAG(6) & 1) << 6) | |
| 2509 | ((CLIP1_FLAG(7) & 1) << 7); |
| 2510 | } |
| 2511 | } |
| 2512 | break; |
| 2513 | } |
| 2514 | |
| 2515 | case 0x04: /* MTC2 */ |
| 2516 | { |
| 2517 | // 31 25 20 15 10 6 0 |
| 2518 | // --------------------------------------------------- |
| 2519 | // | 010010 | 00100 | TTTTT | DDDDD | IIII | 0000000 | |
| 2520 | // --------------------------------------------------- |
| 2521 | // |
| 2522 | int el = (op >> 7) & 0xf; |
| 2523 | W_VREG_B(RDREG, (el+0) & 0xf, (RTVAL >> 8) & 0xff); |
| 2524 | W_VREG_B(RDREG, (el+1) & 0xf, (RTVAL >> 0) & 0xff); |
| 2525 | break; |
| 2526 | } |
| 2527 | |
| 2528 | case 0x06: /* CTC2 */ |
| 2529 | { |
| 2530 | // 31 25 20 15 10 0 |
| 2531 | // ------------------------------------------------ |
| 2532 | // | 010010 | 00110 | TTTTT | DDDDD | 00000000000 | |
| 2533 | // ------------------------------------------------ |
| 2534 | // |
| 2535 | switch(RDREG) |
| 2536 | { |
| 2537 | case 0: |
| 2538 | CLEAR_CARRY_FLAGS(); |
| 2539 | CLEAR_ZERO_FLAGS(); |
| 2540 | if (RTVAL & (1 << 0)) { SET_CARRY_FLAG(0); } |
| 2541 | if (RTVAL & (1 << 1)) { SET_CARRY_FLAG(1); } |
| 2542 | if (RTVAL & (1 << 2)) { SET_CARRY_FLAG(2); } |
| 2543 | if (RTVAL & (1 << 3)) { SET_CARRY_FLAG(3); } |
| 2544 | if (RTVAL & (1 << 4)) { SET_CARRY_FLAG(4); } |
| 2545 | if (RTVAL & (1 << 5)) { SET_CARRY_FLAG(5); } |
| 2546 | if (RTVAL & (1 << 6)) { SET_CARRY_FLAG(6); } |
| 2547 | if (RTVAL & (1 << 7)) { SET_CARRY_FLAG(7); } |
| 2548 | if (RTVAL & (1 << 8)) { SET_ZERO_FLAG(0); } |
| 2549 | if (RTVAL & (1 << 9)) { SET_ZERO_FLAG(1); } |
| 2550 | if (RTVAL & (1 << 10)) { SET_ZERO_FLAG(2); } |
| 2551 | if (RTVAL & (1 << 11)) { SET_ZERO_FLAG(3); } |
| 2552 | if (RTVAL & (1 << 12)) { SET_ZERO_FLAG(4); } |
| 2553 | if (RTVAL & (1 << 13)) { SET_ZERO_FLAG(5); } |
| 2554 | if (RTVAL & (1 << 14)) { SET_ZERO_FLAG(6); } |
| 2555 | if (RTVAL & (1 << 15)) { SET_ZERO_FLAG(7); } |
| 2556 | break; |
| 2557 | |
| 2558 | case 1: |
| 2559 | CLEAR_COMPARE_FLAGS(); |
| 2560 | CLEAR_CLIP2_FLAGS(); |
| 2561 | if (RTVAL & (1 << 0)) { SET_COMPARE_FLAG(0); } |
| 2562 | if (RTVAL & (1 << 1)) { SET_COMPARE_FLAG(1); } |
| 2563 | if (RTVAL & (1 << 2)) { SET_COMPARE_FLAG(2); } |
| 2564 | if (RTVAL & (1 << 3)) { SET_COMPARE_FLAG(3); } |
| 2565 | if (RTVAL & (1 << 4)) { SET_COMPARE_FLAG(4); } |
| 2566 | if (RTVAL & (1 << 5)) { SET_COMPARE_FLAG(5); } |
| 2567 | if (RTVAL & (1 << 6)) { SET_COMPARE_FLAG(6); } |
| 2568 | if (RTVAL & (1 << 7)) { SET_COMPARE_FLAG(7); } |
| 2569 | if (RTVAL & (1 << 8)) { SET_CLIP2_FLAG(0); } |
| 2570 | if (RTVAL & (1 << 9)) { SET_CLIP2_FLAG(1); } |
| 2571 | if (RTVAL & (1 << 10)) { SET_CLIP2_FLAG(2); } |
| 2572 | if (RTVAL & (1 << 11)) { SET_CLIP2_FLAG(3); } |
| 2573 | if (RTVAL & (1 << 12)) { SET_CLIP2_FLAG(4); } |
| 2574 | if (RTVAL & (1 << 13)) { SET_CLIP2_FLAG(5); } |
| 2575 | if (RTVAL & (1 << 14)) { SET_CLIP2_FLAG(6); } |
| 2576 | if (RTVAL & (1 << 15)) { SET_CLIP2_FLAG(7); } |
| 2577 | break; |
| 2578 | |
| 2579 | case 2: |
| 2580 | CLEAR_CLIP1_FLAGS(); |
| 2581 | if (RTVAL & (1 << 0)) { SET_CLIP1_FLAG(0); } |
| 2582 | if (RTVAL & (1 << 1)) { SET_CLIP1_FLAG(1); } |
| 2583 | if (RTVAL & (1 << 2)) { SET_CLIP1_FLAG(2); } |
| 2584 | if (RTVAL & (1 << 3)) { SET_CLIP1_FLAG(3); } |
| 2585 | if (RTVAL & (1 << 4)) { SET_CLIP1_FLAG(4); } |
| 2586 | if (RTVAL & (1 << 5)) { SET_CLIP1_FLAG(5); } |
| 2587 | if (RTVAL & (1 << 6)) { SET_CLIP1_FLAG(6); } |
| 2588 | if (RTVAL & (1 << 7)) { SET_CLIP1_FLAG(7); } |
| 2589 | break; |
| 2590 | } |
| 2591 | break; |
| 2592 | } |
| 2593 | |
| 2594 | case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: |
| 2595 | case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: |
| 2596 | { |
| 2597 | handle_vector_ops(op); |
| 2598 | break; |
| 2599 | } |
| 2600 | |
| 2601 | default: |
| 2602 | m_rsp.unimplemented_opcode(op); |
| 2603 | break; |
| 2604 | } |
| 2605 | } |
| 2606 | |
| 2607 | inline void rsp_cop2::mfc2() |
| 2608 | { |
| 2609 | UINT32 op = m_op; |
| 2610 | int el = (op >> 7) & 0xf; |
| 2611 | |
| 2612 | UINT16 b1 = VREG_B(VS1REG, (el+0) & 0xf); |
| 2613 | UINT16 b2 = VREG_B(VS1REG, (el+1) & 0xf); |
| 2614 | if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2)); |
| 2615 | } |
| 2616 | |
| 2617 | inline void rsp_cop2::cfc2() |
| 2618 | { |
| 2619 | UINT32 op = m_op; |
| 2620 | if (RTREG) |
| 2621 | { |
| 2622 | switch(RDREG) |
| 2623 | { |
| 2624 | case 0: |
| 2625 | RTVAL = ((CARRY_FLAG(0) & 1) << 0) | |
| 2626 | ((CARRY_FLAG(1) & 1) << 1) | |
| 2627 | ((CARRY_FLAG(2) & 1) << 2) | |
| 2628 | ((CARRY_FLAG(3) & 1) << 3) | |
| 2629 | ((CARRY_FLAG(4) & 1) << 4) | |
| 2630 | ((CARRY_FLAG(5) & 1) << 5) | |
| 2631 | ((CARRY_FLAG(6) & 1) << 6) | |
| 2632 | ((CARRY_FLAG(7) & 1) << 7) | |
| 2633 | ((ZERO_FLAG(0) & 1) << 8) | |
| 2634 | ((ZERO_FLAG(1) & 1) << 9) | |
| 2635 | ((ZERO_FLAG(2) & 1) << 10) | |
| 2636 | ((ZERO_FLAG(3) & 1) << 11) | |
| 2637 | ((ZERO_FLAG(4) & 1) << 12) | |
| 2638 | ((ZERO_FLAG(5) & 1) << 13) | |
| 2639 | ((ZERO_FLAG(6) & 1) << 14) | |
| 2640 | ((ZERO_FLAG(7) & 1) << 15); |
| 2641 | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 2642 | break; |
| 2643 | case 1: |
| 2644 | RTVAL = ((COMPARE_FLAG(0) & 1) << 0) | |
| 2645 | ((COMPARE_FLAG(1) & 1) << 1) | |
| 2646 | ((COMPARE_FLAG(2) & 1) << 2) | |
| 2647 | ((COMPARE_FLAG(3) & 1) << 3) | |
| 2648 | ((COMPARE_FLAG(4) & 1) << 4) | |
| 2649 | ((COMPARE_FLAG(5) & 1) << 5) | |
| 2650 | ((COMPARE_FLAG(6) & 1) << 6) | |
| 2651 | ((COMPARE_FLAG(7) & 1) << 7) | |
| 2652 | ((CLIP2_FLAG(0) & 1) << 8) | |
| 2653 | ((CLIP2_FLAG(1) & 1) << 9) | |
| 2654 | ((CLIP2_FLAG(2) & 1) << 10) | |
| 2655 | ((CLIP2_FLAG(3) & 1) << 11) | |
| 2656 | ((CLIP2_FLAG(4) & 1) << 12) | |
| 2657 | ((CLIP2_FLAG(5) & 1) << 13) | |
| 2658 | ((CLIP2_FLAG(6) & 1) << 14) | |
| 2659 | ((CLIP2_FLAG(7) & 1) << 15); |
| 2660 | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 2661 | break; |
| 2662 | case 2: |
| 2663 | RTVAL = ((CLIP1_FLAG(0) & 1) << 0) | |
| 2664 | ((CLIP1_FLAG(1) & 1) << 1) | |
| 2665 | ((CLIP1_FLAG(2) & 1) << 2) | |
| 2666 | ((CLIP1_FLAG(3) & 1) << 3) | |
| 2667 | ((CLIP1_FLAG(4) & 1) << 4) | |
| 2668 | ((CLIP1_FLAG(5) & 1) << 5) | |
| 2669 | ((CLIP1_FLAG(6) & 1) << 6) | |
| 2670 | ((CLIP1_FLAG(7) & 1) << 7); |
| 2671 | break; |
| 2672 | } |
| 2673 | } |
| 2674 | } |
| 2675 | |
| 2676 | inline void rsp_cop2::mtc2() |
| 2677 | { |
| 2678 | UINT32 op = m_op; |
| 2679 | int el = (op >> 7) & 0xf; |
| 2680 | VREG_B(VS1REG, (el+0) & 0xf) = (RTVAL >> 8) & 0xff; |
| 2681 | VREG_B(VS1REG, (el+1) & 0xf) = (RTVAL >> 0) & 0xff; |
| 2682 | } |
| 2683 | |
| 2684 | inline void rsp_cop2::ctc2() |
| 2685 | { |
| 2686 | UINT32 op = m_op; |
| 2687 | switch(RDREG) |
| 2688 | { |
| 2689 | case 0: |
| 2690 | CLEAR_CARRY_FLAGS(); |
| 2691 | CLEAR_ZERO_FLAGS(); |
| 2692 | m_vflag[0][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 2693 | m_vflag[0][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 2694 | m_vflag[0][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 2695 | m_vflag[0][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 2696 | m_vflag[0][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 2697 | m_vflag[0][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 2698 | m_vflag[0][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 2699 | m_vflag[0][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 2700 | if (RTVAL & (1 << 0)) { SET_CARRY_FLAG(0); } |
| 2701 | if (RTVAL & (1 << 1)) { SET_CARRY_FLAG(1); } |
| 2702 | if (RTVAL & (1 << 2)) { SET_CARRY_FLAG(2); } |
| 2703 | if (RTVAL & (1 << 3)) { SET_CARRY_FLAG(3); } |
| 2704 | if (RTVAL & (1 << 4)) { SET_CARRY_FLAG(4); } |
| 2705 | if (RTVAL & (1 << 5)) { SET_CARRY_FLAG(5); } |
| 2706 | if (RTVAL & (1 << 6)) { SET_CARRY_FLAG(6); } |
| 2707 | if (RTVAL & (1 << 7)) { SET_CARRY_FLAG(7); } |
| 2708 | m_vflag[3][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 2709 | m_vflag[3][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 2710 | m_vflag[3][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 2711 | m_vflag[3][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 2712 | m_vflag[3][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 2713 | m_vflag[3][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 2714 | m_vflag[3][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 2715 | m_vflag[3][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 2716 | if (RTVAL & (1 << 8)) { SET_ZERO_FLAG(0); } |
| 2717 | if (RTVAL & (1 << 9)) { SET_ZERO_FLAG(1); } |
| 2718 | if (RTVAL & (1 << 10)) { SET_ZERO_FLAG(2); } |
| 2719 | if (RTVAL & (1 << 11)) { SET_ZERO_FLAG(3); } |
| 2720 | if (RTVAL & (1 << 12)) { SET_ZERO_FLAG(4); } |
| 2721 | if (RTVAL & (1 << 13)) { SET_ZERO_FLAG(5); } |
| 2722 | if (RTVAL & (1 << 14)) { SET_ZERO_FLAG(6); } |
| 2723 | if (RTVAL & (1 << 15)) { SET_ZERO_FLAG(7); } |
| 2724 | break; |
| 2725 | case 1: |
| 2726 | CLEAR_COMPARE_FLAGS(); |
| 2727 | CLEAR_CLIP2_FLAGS(); |
| 2728 | m_vflag[1][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 2729 | m_vflag[1][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 2730 | m_vflag[1][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 2731 | m_vflag[1][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 2732 | m_vflag[1][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 2733 | m_vflag[1][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 2734 | m_vflag[1][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 2735 | m_vflag[1][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 2736 | if (RTVAL & (1 << 0)) { SET_COMPARE_FLAG(0); } |
| 2737 | if (RTVAL & (1 << 1)) { SET_COMPARE_FLAG(1); } |
| 2738 | if (RTVAL & (1 << 2)) { SET_COMPARE_FLAG(2); } |
| 2739 | if (RTVAL & (1 << 3)) { SET_COMPARE_FLAG(3); } |
| 2740 | if (RTVAL & (1 << 4)) { SET_COMPARE_FLAG(4); } |
| 2741 | if (RTVAL & (1 << 5)) { SET_COMPARE_FLAG(5); } |
| 2742 | if (RTVAL & (1 << 6)) { SET_COMPARE_FLAG(6); } |
| 2743 | if (RTVAL & (1 << 7)) { SET_COMPARE_FLAG(7); } |
| 2744 | m_vflag[4][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 2745 | m_vflag[4][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 2746 | m_vflag[4][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 2747 | m_vflag[4][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 2748 | m_vflag[4][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 2749 | m_vflag[4][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 2750 | m_vflag[4][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 2751 | m_vflag[4][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 2752 | if (RTVAL & (1 << 8)) { SET_CLIP2_FLAG(0); } |
| 2753 | if (RTVAL & (1 << 9)) { SET_CLIP2_FLAG(1); } |
| 2754 | if (RTVAL & (1 << 10)) { SET_CLIP2_FLAG(2); } |
| 2755 | if (RTVAL & (1 << 11)) { SET_CLIP2_FLAG(3); } |
| 2756 | if (RTVAL & (1 << 12)) { SET_CLIP2_FLAG(4); } |
| 2757 | if (RTVAL & (1 << 13)) { SET_CLIP2_FLAG(5); } |
| 2758 | if (RTVAL & (1 << 14)) { SET_CLIP2_FLAG(6); } |
| 2759 | if (RTVAL & (1 << 15)) { SET_CLIP2_FLAG(7); } |
| 2760 | break; |
| 2761 | case 2: |
| 2762 | CLEAR_CLIP1_FLAGS(); |
| 2763 | m_vflag[2][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 2764 | m_vflag[2][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 2765 | m_vflag[2][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 2766 | m_vflag[2][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 2767 | m_vflag[2][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 2768 | m_vflag[2][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 2769 | m_vflag[2][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 2770 | m_vflag[2][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 2771 | if (RTVAL & (1 << 0)) { SET_CLIP1_FLAG(0); } |
| 2772 | if (RTVAL & (1 << 1)) { SET_CLIP1_FLAG(1); } |
| 2773 | if (RTVAL & (1 << 2)) { SET_CLIP1_FLAG(2); } |
| 2774 | if (RTVAL & (1 << 3)) { SET_CLIP1_FLAG(3); } |
| 2775 | if (RTVAL & (1 << 4)) { SET_CLIP1_FLAG(4); } |
| 2776 | if (RTVAL & (1 << 5)) { SET_CLIP1_FLAG(5); } |
| 2777 | if (RTVAL & (1 << 6)) { SET_CLIP1_FLAG(6); } |
| 2778 | if (RTVAL & (1 << 7)) { SET_CLIP1_FLAG(7); } |
| 2779 | break; |
| 2780 | } |
| 2781 | } |
| 2782 | |
| 2783 | void rsp_cop2::log_instruction_execution() |
| 2784 | { |
| 2785 | static VECTOR_REG prev_vecs[32]; |
| 2786 | |
| 2787 | for (int i = 0; i < 32; i++) |
| 2788 | { |
| 2789 | if (m_v[i].d[0] != prev_vecs[i].d[0] || m_v[i].d[1] != prev_vecs[i].d[1]) |
| 2790 | { |
| 2791 | fprintf(m_rsp.m_exec_output, "V%d: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X ", i, |
| 2792 | (UINT16)VREG_S(i,0), (UINT16)VREG_S(i,1), (UINT16)VREG_S(i,2), (UINT16)VREG_S(i,3), (UINT16)VREG_S(i,4), (UINT16)VREG_S(i,5), (UINT16)VREG_S(i,6), (UINT16)VREG_S(i,7)); |
| 2793 | } |
| 2794 | prev_vecs[i].d[0] = m_v[i].d[0]; |
| 2795 | prev_vecs[i].d[1] = m_v[i].d[1]; |
| 2796 | } |
| 2797 | } |
trunk/src/emu/cpu/rsp/rspcp2d.c
| r0 | r241958 | |
| 1 | /*************************************************************************** |
| 2 | |
| 3 | rspcp2d.c |
| 4 | |
| 5 | Universal machine language-based Nintendo/SGI RSP COP2 emulator. |
| 6 | Written by Harmony of the MESS team. |
| 7 | |
| 8 | Copyright the MESS team. |
| 9 | Released for general non-commercial use under the MAME license |
| 10 | Visit http://mamedev.org for licensing and usage restrictions. |
| 11 | |
| 12 | ***************************************************************************/ |
| 13 | |
| 14 | #include "emu.h" |
| 15 | #include "rsp.h" |
| 16 | #include "rspdiv.h" |
| 17 | #include "rspcp2.h" |
| 18 | #include "rspcp2d.h" |
| 19 | #include "cpu/drcfe.h" |
| 20 | #include "cpu/drcuml.h" |
| 21 | #include "cpu/drcumlsh.h" |
| 22 | |
| 23 | using namespace uml; |
| 24 | |
| 25 | extern offs_t rsp_dasm_one(char *buffer, offs_t pc, UINT32 op); |
| 26 | |
| 27 | /*************************************************************************** |
| 28 | Helpful Defines |
| 29 | ***************************************************************************/ |
| 30 | |
| 31 | #define VDREG ((op >> 6) & 0x1f) |
| 32 | #define VS1REG ((op >> 11) & 0x1f) |
| 33 | #define VS2REG ((op >> 16) & 0x1f) |
| 34 | #define EL ((op >> 21) & 0xf) |
| 35 | |
| 36 | #define RSVAL (m_rsp.m_rsp_state->r[RSREG]) |
| 37 | #define RTVAL (m_rsp.m_rsp_state->r[RTREG]) |
| 38 | #define RDVAL (m_rsp.m_rsp_state->r[RDREG]) |
| 39 | |
| 40 | #define VREG_B(reg, offset) m_v[(reg)].b[(offset)^1] |
| 41 | #define W_VREG_S(reg, offset) m_v[(reg)].s[(offset)] |
| 42 | #define VREG_S(reg, offset) (INT16)m_v[(reg)].s[(offset)] |
| 43 | |
| 44 | #define VEC_EL_2(x,z) (vector_elements_2[(x)][(z)]) |
| 45 | |
| 46 | #define ACCUM(x) m_accum[x].q |
| 47 | |
| 48 | #define CARRY 0 |
| 49 | #define COMPARE 1 |
| 50 | #define CLIP1 2 |
| 51 | #define ZERO 3 |
| 52 | #define CLIP2 4 |
| 53 | |
| 54 | static void cfunc_mfc2(void *param); |
| 55 | static void cfunc_cfc2(void *param); |
| 56 | static void cfunc_mtc2(void *param); |
| 57 | static void cfunc_ctc2(void *param); |
| 58 | |
| 59 | #define ACCUM_H(x) (UINT16)m_accum[x].w[3] |
| 60 | #define ACCUM_M(x) (UINT16)m_accum[x].w[2] |
| 61 | #define ACCUM_L(x) (UINT16)m_accum[x].w[1] |
| 62 | #define ACCUM_LL(x) (UINT16)m_accum[x].w[0] |
| 63 | |
| 64 | #define SET_ACCUM_H(v, x) m_accum[x].w[3] = v; |
| 65 | #define SET_ACCUM_M(v, x) m_accum[x].w[2] = v; |
| 66 | #define SET_ACCUM_L(v, x) m_accum[x].w[1] = v; |
| 67 | #define SET_ACCUM_LL(v, x) m_accum[x].w[0] = v; |
| 68 | |
| 69 | #define GET_VS1(out, i) out = VREG_S(VS1REG, i) |
| 70 | #define GET_VS2(out, i) out = VREG_S(VS2REG, VEC_EL_2(EL, i)) |
| 71 | |
| 72 | #define CARRY_FLAG(x) (m_vflag[CARRY][x & 7] != 0 ? 0xffff : 0) |
| 73 | #define COMPARE_FLAG(x) (m_vflag[COMPARE][x & 7] != 0 ? 0xffff : 0) |
| 74 | #define CLIP1_FLAG(x) (m_vflag[CLIP1][x & 7] != 0 ? 0xffff : 0) |
| 75 | #define ZERO_FLAG(x) (m_vflag[ZERO][x & 7] != 0 ? 0xffff : 0) |
| 76 | #define CLIP2_FLAG(x) (m_vflag[CLIP2][x & 7] != 0 ? 0xffff : 0) |
| 77 | |
| 78 | #define CLEAR_CARRY_FLAGS() { memset(m_vflag[CARRY], 0, 16); } |
| 79 | #define CLEAR_COMPARE_FLAGS() { memset(m_vflag[COMPARE], 0, 16); } |
| 80 | #define CLEAR_CLIP1_FLAGS() { memset(m_vflag[CLIP1], 0, 16); } |
| 81 | #define CLEAR_ZERO_FLAGS() { memset(m_vflag[ZERO], 0, 16); } |
| 82 | #define CLEAR_CLIP2_FLAGS() { memset(m_vflag[CLIP2], 0, 16); } |
| 83 | |
| 84 | #define SET_CARRY_FLAG(x) { m_vflag[CARRY][x & 7] = 0xffff; } |
| 85 | #define SET_COMPARE_FLAG(x) { m_vflag[COMPARE][x & 7] = 0xffff; } |
| 86 | #define SET_CLIP1_FLAG(x) { m_vflag[CLIP1][x & 7] = 0xffff; } |
| 87 | #define SET_ZERO_FLAG(x) { m_vflag[ZERO][x & 7] = 0xffff; } |
| 88 | #define SET_CLIP2_FLAG(x) { m_vflag[CLIP2][x & 7] = 0xffff; } |
| 89 | |
| 90 | #define CLEAR_CARRY_FLAG(x) { m_vflag[CARRY][x & 7] = 0; } |
| 91 | #define CLEAR_COMPARE_FLAG(x) { m_vflag[COMPARE][x & 7] = 0; } |
| 92 | #define CLEAR_CLIP1_FLAG(x) { m_vflag[CLIP1][x & 7] = 0; } |
| 93 | #define CLEAR_ZERO_FLAG(x) { m_vflag[ZERO][x & 7] = 0; } |
| 94 | #define CLEAR_CLIP2_FLAG(x) { m_vflag[CLIP2][x & 7] = 0; } |
| 95 | |
| 96 | #define WRITEBACK_RESULT() { \ |
| 97 | W_VREG_S(VDREG, 0) = m_vres[0]; \ |
| 98 | W_VREG_S(VDREG, 1) = m_vres[1]; \ |
| 99 | W_VREG_S(VDREG, 2) = m_vres[2]; \ |
| 100 | W_VREG_S(VDREG, 3) = m_vres[3]; \ |
| 101 | W_VREG_S(VDREG, 4) = m_vres[4]; \ |
| 102 | W_VREG_S(VDREG, 5) = m_vres[5]; \ |
| 103 | W_VREG_S(VDREG, 6) = m_vres[6]; \ |
| 104 | W_VREG_S(VDREG, 7) = m_vres[7]; \ |
| 105 | } |
| 106 | |
| 107 | static const int vector_elements_2[16][8] = |
| 108 | { |
| 109 | { 0, 1, 2, 3, 4, 5, 6, 7 }, // none |
| 110 | { 0, 1, 2, 3, 4, 5, 6, 7 }, // ??? |
| 111 | { 0, 0, 2, 2, 4, 4, 6, 6 }, // 0q |
| 112 | { 1, 1, 3, 3, 5, 5, 7, 7 }, // 1q |
| 113 | { 0, 0, 0, 0, 4, 4, 4, 4 }, // 0h |
| 114 | { 1, 1, 1, 1, 5, 5, 5, 5 }, // 1h |
| 115 | { 2, 2, 2, 2, 6, 6, 6, 6 }, // 2h |
| 116 | { 3, 3, 3, 3, 7, 7, 7, 7 }, // 3h |
| 117 | { 0, 0, 0, 0, 0, 0, 0, 0 }, // 0 |
| 118 | { 1, 1, 1, 1, 1, 1, 1, 1 }, // 1 |
| 119 | { 2, 2, 2, 2, 2, 2, 2, 2 }, // 2 |
| 120 | { 3, 3, 3, 3, 3, 3, 3, 3 }, // 3 |
| 121 | { 4, 4, 4, 4, 4, 4, 4, 4 }, // 4 |
| 122 | { 5, 5, 5, 5, 5, 5, 5, 5 }, // 5 |
| 123 | { 6, 6, 6, 6, 6, 6, 6, 6 }, // 6 |
| 124 | { 7, 7, 7, 7, 7, 7, 7, 7 }, // 7 |
| 125 | }; |
| 126 | |
| 127 | void rsp_cop2_drc::cfunc_unimplemented_opcode() |
| 128 | { |
| 129 | const UINT32 ppc = m_rsp.m_ppc; |
| 130 | if ((m_machine.debug_flags & DEBUG_FLAG_ENABLED) != 0) |
| 131 | { |
| 132 | char string[200]; |
| 133 | rsp_dasm_one(string, ppc, m_op); |
| 134 | osd_printf_debug("%08X: %s\n", ppc, string); |
| 135 | } |
| 136 | fatalerror("RSP: unknown opcode %02X (%08X) at %08X\n", m_op >> 26, m_op, ppc); |
| 137 | } |
| 138 | |
| 139 | static void unimplemented_opcode(void *param) |
| 140 | { |
| 141 | ((rsp_cop2 *)param)->cfunc_unimplemented_opcode(); |
| 142 | } |
| 143 | |
| 144 | void rsp_cop2_drc::state_string_export(const int index, astring &string) |
| 145 | { |
| 146 | switch (index) |
| 147 | { |
| 148 | case RSP_V0: |
| 149 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 0, 0), (UINT16)VREG_S( 0, 1), (UINT16)VREG_S( 0, 2), (UINT16)VREG_S( 0, 3), (UINT16)VREG_S( 0, 4), (UINT16)VREG_S( 0, 5), (UINT16)VREG_S( 0, 6), (UINT16)VREG_S( 0, 7)); |
| 150 | break; |
| 151 | case RSP_V1: |
| 152 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 1, 0), (UINT16)VREG_S( 1, 1), (UINT16)VREG_S( 1, 2), (UINT16)VREG_S( 1, 3), (UINT16)VREG_S( 1, 4), (UINT16)VREG_S( 1, 5), (UINT16)VREG_S( 1, 6), (UINT16)VREG_S( 1, 7)); |
| 153 | break; |
| 154 | case RSP_V2: |
| 155 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 2, 0), (UINT16)VREG_S( 2, 1), (UINT16)VREG_S( 2, 2), (UINT16)VREG_S( 2, 3), (UINT16)VREG_S( 2, 4), (UINT16)VREG_S( 2, 5), (UINT16)VREG_S( 2, 6), (UINT16)VREG_S( 2, 7)); |
| 156 | break; |
| 157 | case RSP_V3: |
| 158 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 3, 0), (UINT16)VREG_S( 3, 1), (UINT16)VREG_S( 3, 2), (UINT16)VREG_S( 3, 3), (UINT16)VREG_S( 3, 4), (UINT16)VREG_S( 3, 5), (UINT16)VREG_S( 3, 6), (UINT16)VREG_S( 3, 7)); |
| 159 | break; |
| 160 | case RSP_V4: |
| 161 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 4, 0), (UINT16)VREG_S( 4, 1), (UINT16)VREG_S( 4, 2), (UINT16)VREG_S( 4, 3), (UINT16)VREG_S( 4, 4), (UINT16)VREG_S( 4, 5), (UINT16)VREG_S( 4, 6), (UINT16)VREG_S( 4, 7)); |
| 162 | break; |
| 163 | case RSP_V5: |
| 164 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 5, 0), (UINT16)VREG_S( 5, 1), (UINT16)VREG_S( 5, 2), (UINT16)VREG_S( 5, 3), (UINT16)VREG_S( 5, 4), (UINT16)VREG_S( 5, 5), (UINT16)VREG_S( 5, 6), (UINT16)VREG_S( 5, 7)); |
| 165 | break; |
| 166 | case RSP_V6: |
| 167 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 6, 0), (UINT16)VREG_S( 6, 1), (UINT16)VREG_S( 6, 2), (UINT16)VREG_S( 6, 3), (UINT16)VREG_S( 6, 4), (UINT16)VREG_S( 6, 5), (UINT16)VREG_S( 6, 6), (UINT16)VREG_S( 6, 7)); |
| 168 | break; |
| 169 | case RSP_V7: |
| 170 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 7, 0), (UINT16)VREG_S( 7, 1), (UINT16)VREG_S( 7, 2), (UINT16)VREG_S( 7, 3), (UINT16)VREG_S( 7, 4), (UINT16)VREG_S( 7, 5), (UINT16)VREG_S( 7, 6), (UINT16)VREG_S( 7, 7)); |
| 171 | break; |
| 172 | case RSP_V8: |
| 173 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 8, 0), (UINT16)VREG_S( 8, 1), (UINT16)VREG_S( 8, 2), (UINT16)VREG_S( 8, 3), (UINT16)VREG_S( 8, 4), (UINT16)VREG_S( 8, 5), (UINT16)VREG_S( 8, 6), (UINT16)VREG_S( 8, 7)); |
| 174 | break; |
| 175 | case RSP_V9: |
| 176 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S( 9, 0), (UINT16)VREG_S( 9, 1), (UINT16)VREG_S( 9, 2), (UINT16)VREG_S( 9, 3), (UINT16)VREG_S( 9, 4), (UINT16)VREG_S( 9, 5), (UINT16)VREG_S( 9, 6), (UINT16)VREG_S( 9, 7)); |
| 177 | break; |
| 178 | case RSP_V10: |
| 179 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(10, 0), (UINT16)VREG_S(10, 1), (UINT16)VREG_S(10, 2), (UINT16)VREG_S(10, 3), (UINT16)VREG_S(10, 4), (UINT16)VREG_S(10, 5), (UINT16)VREG_S(10, 6), (UINT16)VREG_S(10, 7)); |
| 180 | break; |
| 181 | case RSP_V11: |
| 182 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(11, 0), (UINT16)VREG_S(11, 1), (UINT16)VREG_S(11, 2), (UINT16)VREG_S(11, 3), (UINT16)VREG_S(11, 4), (UINT16)VREG_S(11, 5), (UINT16)VREG_S(11, 6), (UINT16)VREG_S(11, 7)); |
| 183 | break; |
| 184 | case RSP_V12: |
| 185 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(12, 0), (UINT16)VREG_S(12, 1), (UINT16)VREG_S(12, 2), (UINT16)VREG_S(12, 3), (UINT16)VREG_S(12, 4), (UINT16)VREG_S(12, 5), (UINT16)VREG_S(12, 6), (UINT16)VREG_S(12, 7)); |
| 186 | break; |
| 187 | case RSP_V13: |
| 188 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(13, 0), (UINT16)VREG_S(13, 1), (UINT16)VREG_S(13, 2), (UINT16)VREG_S(13, 3), (UINT16)VREG_S(13, 4), (UINT16)VREG_S(13, 5), (UINT16)VREG_S(13, 6), (UINT16)VREG_S(13, 7)); |
| 189 | break; |
| 190 | case RSP_V14: |
| 191 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(14, 0), (UINT16)VREG_S(14, 1), (UINT16)VREG_S(14, 2), (UINT16)VREG_S(14, 3), (UINT16)VREG_S(14, 4), (UINT16)VREG_S(14, 5), (UINT16)VREG_S(14, 6), (UINT16)VREG_S(14, 7)); |
| 192 | break; |
| 193 | case RSP_V15: |
| 194 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(15, 0), (UINT16)VREG_S(15, 1), (UINT16)VREG_S(15, 2), (UINT16)VREG_S(15, 3), (UINT16)VREG_S(15, 4), (UINT16)VREG_S(15, 5), (UINT16)VREG_S(15, 6), (UINT16)VREG_S(15, 7)); |
| 195 | break; |
| 196 | case RSP_V16: |
| 197 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(16, 0), (UINT16)VREG_S(16, 1), (UINT16)VREG_S(16, 2), (UINT16)VREG_S(16, 3), (UINT16)VREG_S(16, 4), (UINT16)VREG_S(16, 5), (UINT16)VREG_S(16, 6), (UINT16)VREG_S(16, 7)); |
| 198 | break; |
| 199 | case RSP_V17: |
| 200 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(17, 0), (UINT16)VREG_S(17, 1), (UINT16)VREG_S(17, 2), (UINT16)VREG_S(17, 3), (UINT16)VREG_S(17, 4), (UINT16)VREG_S(17, 5), (UINT16)VREG_S(17, 6), (UINT16)VREG_S(17, 7)); |
| 201 | break; |
| 202 | case RSP_V18: |
| 203 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(18, 0), (UINT16)VREG_S(18, 1), (UINT16)VREG_S(18, 2), (UINT16)VREG_S(18, 3), (UINT16)VREG_S(18, 4), (UINT16)VREG_S(18, 5), (UINT16)VREG_S(18, 6), (UINT16)VREG_S(18, 7)); |
| 204 | break; |
| 205 | case RSP_V19: |
| 206 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(19, 0), (UINT16)VREG_S(19, 1), (UINT16)VREG_S(19, 2), (UINT16)VREG_S(19, 3), (UINT16)VREG_S(19, 4), (UINT16)VREG_S(19, 5), (UINT16)VREG_S(19, 6), (UINT16)VREG_S(19, 7)); |
| 207 | break; |
| 208 | case RSP_V20: |
| 209 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(20, 0), (UINT16)VREG_S(20, 1), (UINT16)VREG_S(20, 2), (UINT16)VREG_S(20, 3), (UINT16)VREG_S(20, 4), (UINT16)VREG_S(20, 5), (UINT16)VREG_S(20, 6), (UINT16)VREG_S(20, 7)); |
| 210 | break; |
| 211 | case RSP_V21: |
| 212 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(21, 0), (UINT16)VREG_S(21, 1), (UINT16)VREG_S(21, 2), (UINT16)VREG_S(21, 3), (UINT16)VREG_S(21, 4), (UINT16)VREG_S(21, 5), (UINT16)VREG_S(21, 6), (UINT16)VREG_S(21, 7)); |
| 213 | break; |
| 214 | case RSP_V22: |
| 215 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(22, 0), (UINT16)VREG_S(22, 1), (UINT16)VREG_S(22, 2), (UINT16)VREG_S(22, 3), (UINT16)VREG_S(22, 4), (UINT16)VREG_S(22, 5), (UINT16)VREG_S(22, 6), (UINT16)VREG_S(22, 7)); |
| 216 | break; |
| 217 | case RSP_V23: |
| 218 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(23, 0), (UINT16)VREG_S(23, 1), (UINT16)VREG_S(23, 2), (UINT16)VREG_S(23, 3), (UINT16)VREG_S(23, 4), (UINT16)VREG_S(23, 5), (UINT16)VREG_S(23, 6), (UINT16)VREG_S(23, 7)); |
| 219 | break; |
| 220 | case RSP_V24: |
| 221 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(24, 0), (UINT16)VREG_S(24, 1), (UINT16)VREG_S(24, 2), (UINT16)VREG_S(24, 3), (UINT16)VREG_S(24, 4), (UINT16)VREG_S(24, 5), (UINT16)VREG_S(24, 6), (UINT16)VREG_S(24, 7)); |
| 222 | break; |
| 223 | case RSP_V25: |
| 224 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(25, 0), (UINT16)VREG_S(25, 1), (UINT16)VREG_S(25, 2), (UINT16)VREG_S(25, 3), (UINT16)VREG_S(25, 4), (UINT16)VREG_S(25, 5), (UINT16)VREG_S(25, 6), (UINT16)VREG_S(25, 7)); |
| 225 | break; |
| 226 | case RSP_V26: |
| 227 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(26, 0), (UINT16)VREG_S(26, 1), (UINT16)VREG_S(26, 2), (UINT16)VREG_S(26, 3), (UINT16)VREG_S(26, 4), (UINT16)VREG_S(26, 5), (UINT16)VREG_S(26, 6), (UINT16)VREG_S(26, 7)); |
| 228 | break; |
| 229 | case RSP_V27: |
| 230 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(27, 0), (UINT16)VREG_S(27, 1), (UINT16)VREG_S(27, 2), (UINT16)VREG_S(27, 3), (UINT16)VREG_S(27, 4), (UINT16)VREG_S(27, 5), (UINT16)VREG_S(27, 6), (UINT16)VREG_S(27, 7)); |
| 231 | break; |
| 232 | case RSP_V28: |
| 233 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(28, 0), (UINT16)VREG_S(28, 1), (UINT16)VREG_S(28, 2), (UINT16)VREG_S(28, 3), (UINT16)VREG_S(28, 4), (UINT16)VREG_S(28, 5), (UINT16)VREG_S(28, 6), (UINT16)VREG_S(28, 7)); |
| 234 | break; |
| 235 | case RSP_V29: |
| 236 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(29, 0), (UINT16)VREG_S(29, 1), (UINT16)VREG_S(29, 2), (UINT16)VREG_S(29, 3), (UINT16)VREG_S(29, 4), (UINT16)VREG_S(29, 5), (UINT16)VREG_S(29, 6), (UINT16)VREG_S(29, 7)); |
| 237 | break; |
| 238 | case RSP_V30: |
| 239 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(30, 0), (UINT16)VREG_S(30, 1), (UINT16)VREG_S(30, 2), (UINT16)VREG_S(30, 3), (UINT16)VREG_S(30, 4), (UINT16)VREG_S(30, 5), (UINT16)VREG_S(30, 6), (UINT16)VREG_S(30, 7)); |
| 240 | break; |
| 241 | case RSP_V31: |
| 242 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(31, 0), (UINT16)VREG_S(31, 1), (UINT16)VREG_S(31, 2), (UINT16)VREG_S(31, 3), (UINT16)VREG_S(31, 4), (UINT16)VREG_S(31, 5), (UINT16)VREG_S(31, 6), (UINT16)VREG_S(31, 7)); |
| 243 | break; |
| 244 | } |
| 245 | } |
| 246 | |
| 247 | |
| 248 | /*************************************************************************** |
| 249 | Vector Load Instructions |
| 250 | ***************************************************************************/ |
| 251 | |
| 252 | // LBV |
| 253 | // |
| 254 | // 31 25 20 15 10 6 0 |
| 255 | // -------------------------------------------------- |
| 256 | // | 110010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 257 | // -------------------------------------------------- |
| 258 | // |
| 259 | // Load 1 byte to vector byte index |
| 260 | |
| 261 | inline void rsp_cop2_drc::lbv() |
| 262 | { |
| 263 | UINT32 op = m_op; |
| 264 | |
| 265 | UINT32 ea = 0; |
| 266 | int dest = (op >> 16) & 0x1f; |
| 267 | int base = (op >> 21) & 0x1f; |
| 268 | int index = (op >> 7) & 0xf; |
| 269 | int offset = (op & 0x7f); |
| 270 | if (offset & 0x40) |
| 271 | { |
| 272 | offset |= 0xffffffc0; |
| 273 | } |
| 274 | |
| 275 | ea = (base) ? m_rsp.m_rsp_state->r[base] + offset : offset; |
| 276 | VREG_B(dest, index) = m_rsp.DM_READ8(ea); |
| 277 | } |
| 278 | |
| 279 | static void cfunc_lbv(void *param) |
| 280 | { |
| 281 | ((rsp_cop2 *)param)->lbv(); |
| 282 | } |
| 283 | |
| 284 | |
| 285 | // LSV |
| 286 | // |
| 287 | // 31 25 20 15 10 6 0 |
| 288 | // -------------------------------------------------- |
| 289 | // | 110010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 290 | // -------------------------------------------------- |
| 291 | // |
| 292 | // Loads 2 bytes starting from vector byte index |
| 293 | |
| 294 | inline void rsp_cop2_drc::lsv() |
| 295 | { |
| 296 | UINT32 op = m_op; |
| 297 | int dest = (op >> 16) & 0x1f; |
| 298 | int base = (op >> 21) & 0x1f; |
| 299 | int index = (op >> 7) & 0xe; |
| 300 | int offset = (op & 0x7f); |
| 301 | if (offset & 0x40) |
| 302 | { |
| 303 | offset |= 0xffffffc0; |
| 304 | } |
| 305 | |
| 306 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 307 | int end = index + 2; |
| 308 | for (int i = index; i < end; i++) |
| 309 | { |
| 310 | VREG_B(dest, i) = m_rsp.DM_READ8(ea); |
| 311 | ea++; |
| 312 | } |
| 313 | } |
| 314 | |
| 315 | static void cfunc_lsv(void *param) |
| 316 | { |
| 317 | ((rsp_cop2 *)param)->lsv(); |
| 318 | } |
| 319 | |
| 320 | |
| 321 | // LLV |
| 322 | // |
| 323 | // 31 25 20 15 10 6 0 |
| 324 | // -------------------------------------------------- |
| 325 | // | 110010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 326 | // -------------------------------------------------- |
| 327 | // |
| 328 | // Loads 4 bytes starting from vector byte index |
| 329 | |
| 330 | inline void rsp_cop2_drc::llv() |
| 331 | { |
| 332 | UINT32 op = m_op; |
| 333 | UINT32 ea = 0; |
| 334 | int dest = (op >> 16) & 0x1f; |
| 335 | int base = (op >> 21) & 0x1f; |
| 336 | int index = (op >> 7) & 0xc; |
| 337 | int offset = (op & 0x7f); |
| 338 | if (offset & 0x40) |
| 339 | { |
| 340 | offset |= 0xffffffc0; |
| 341 | } |
| 342 | |
| 343 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 344 | |
| 345 | int end = index + 4; |
| 346 | |
| 347 | for (int i = index; i < end; i++) |
| 348 | { |
| 349 | VREG_B(dest, i) = m_rsp.DM_READ8(ea); |
| 350 | ea++; |
| 351 | } |
| 352 | } |
| 353 | |
| 354 | static void cfunc_llv(void *param) |
| 355 | { |
| 356 | ((rsp_cop2 *)param)->llv(); |
| 357 | } |
| 358 | |
| 359 | |
| 360 | // LDV |
| 361 | // |
| 362 | // 31 25 20 15 10 6 0 |
| 363 | // -------------------------------------------------- |
| 364 | // | 110010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 365 | // -------------------------------------------------- |
| 366 | // |
| 367 | // Loads 8 bytes starting from vector byte index |
| 368 | |
| 369 | inline void rsp_cop2_drc::ldv() |
| 370 | { |
| 371 | UINT32 op = m_op; |
| 372 | UINT32 ea = 0; |
| 373 | int dest = (op >> 16) & 0x1f; |
| 374 | int base = (op >> 21) & 0x1f; |
| 375 | int index = (op >> 7) & 0x8; |
| 376 | int offset = (op & 0x7f); |
| 377 | if (offset & 0x40) |
| 378 | { |
| 379 | offset |= 0xffffffc0; |
| 380 | } |
| 381 | |
| 382 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 383 | |
| 384 | int end = index + 8; |
| 385 | |
| 386 | for (int i = index; i < end; i++) |
| 387 | { |
| 388 | VREG_B(dest, i) = m_rsp.DM_READ8(ea); |
| 389 | ea++; |
| 390 | } |
| 391 | } |
| 392 | |
| 393 | static void cfunc_ldv(void *param) |
| 394 | { |
| 395 | ((rsp_cop2 *)param)->ldv(); |
| 396 | } |
| 397 | |
| 398 | |
| 399 | // LQV |
| 400 | // |
| 401 | // 31 25 20 15 10 6 0 |
| 402 | // -------------------------------------------------- |
| 403 | // | 110010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 404 | // -------------------------------------------------- |
| 405 | // |
| 406 | // Loads up to 16 bytes starting from vector byte index |
| 407 | |
| 408 | inline void rsp_cop2_drc::lqv() |
| 409 | { |
| 410 | UINT32 op = m_op; |
| 411 | int dest = (op >> 16) & 0x1f; |
| 412 | int base = (op >> 21) & 0x1f; |
| 413 | int offset = (op & 0x7f); |
| 414 | if (offset & 0x40) |
| 415 | { |
| 416 | offset |= 0xffffffc0; |
| 417 | } |
| 418 | |
| 419 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 420 | |
| 421 | int end = 16 - (ea & 0xf); |
| 422 | if (end > 16) end = 16; |
| 423 | |
| 424 | for (int i = 0; i < end; i++) |
| 425 | { |
| 426 | VREG_B(dest, i) = m_rsp.DM_READ8(ea); |
| 427 | ea++; |
| 428 | } |
| 429 | } |
| 430 | |
| 431 | static void cfunc_lqv(void *param) |
| 432 | { |
| 433 | ((rsp_cop2 *)param)->lqv(); |
| 434 | } |
| 435 | |
| 436 | |
| 437 | // LRV |
| 438 | // |
| 439 | // 31 25 20 15 10 6 0 |
| 440 | // -------------------------------------------------- |
| 441 | // | 110010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 442 | // -------------------------------------------------- |
| 443 | // |
| 444 | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 445 | |
| 446 | inline void rsp_cop2_drc::lrv() |
| 447 | { |
| 448 | UINT32 op = m_op; |
| 449 | int dest = (op >> 16) & 0x1f; |
| 450 | int base = (op >> 21) & 0x1f; |
| 451 | int index = (op >> 7) & 0xf; |
| 452 | int offset = (op & 0x7f); |
| 453 | if (offset & 0x40) |
| 454 | { |
| 455 | offset |= 0xffffffc0; |
| 456 | } |
| 457 | |
| 458 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 459 | |
| 460 | index = 16 - ((ea & 0xf) - index); |
| 461 | ea &= ~0xf; |
| 462 | |
| 463 | for (int i = index; i < 16; i++) |
| 464 | { |
| 465 | VREG_B(dest, i) = m_rsp.DM_READ8(ea); |
| 466 | ea++; |
| 467 | } |
| 468 | } |
| 469 | |
| 470 | static void cfunc_lrv(void *param) |
| 471 | { |
| 472 | ((rsp_cop2 *)param)->lrv(); |
| 473 | } |
| 474 | |
| 475 | |
| 476 | // LPV |
| 477 | // |
| 478 | // 31 25 20 15 10 6 0 |
| 479 | // -------------------------------------------------- |
| 480 | // | 110010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 481 | // -------------------------------------------------- |
| 482 | // |
| 483 | // Loads a byte as the upper 8 bits of each element |
| 484 | |
| 485 | inline void rsp_cop2_drc::lpv() |
| 486 | { |
| 487 | UINT32 op = m_op; |
| 488 | int dest = (op >> 16) & 0x1f; |
| 489 | int base = (op >> 21) & 0x1f; |
| 490 | int index = (op >> 7) & 0xf; |
| 491 | int offset = (op & 0x7f); |
| 492 | if (offset & 0x40) |
| 493 | { |
| 494 | offset |= 0xffffffc0; |
| 495 | } |
| 496 | |
| 497 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 498 | |
| 499 | for (int i = 0; i < 8; i++) |
| 500 | { |
| 501 | W_VREG_S(dest, i) = m_rsp.DM_READ8(ea + (((16-index) + i) & 0xf)) << 8; |
| 502 | } |
| 503 | } |
| 504 | |
| 505 | static void cfunc_lpv(void *param) |
| 506 | { |
| 507 | ((rsp_cop2 *)param)->lpv(); |
| 508 | } |
| 509 | |
| 510 | |
| 511 | // LUV |
| 512 | // |
| 513 | // 31 25 20 15 10 6 0 |
| 514 | // -------------------------------------------------- |
| 515 | // | 110010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 516 | // -------------------------------------------------- |
| 517 | // |
| 518 | // Loads a byte as the bits 14-7 of each element |
| 519 | |
| 520 | inline void rsp_cop2_drc::luv() |
| 521 | { |
| 522 | UINT32 op = m_op; |
| 523 | int dest = (op >> 16) & 0x1f; |
| 524 | int base = (op >> 21) & 0x1f; |
| 525 | int index = (op >> 7) & 0xf; |
| 526 | int offset = (op & 0x7f); |
| 527 | if (offset & 0x40) |
| 528 | { |
| 529 | offset |= 0xffffffc0; |
| 530 | } |
| 531 | |
| 532 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 533 | |
| 534 | for (int i = 0; i < 8; i++) |
| 535 | { |
| 536 | W_VREG_S(dest, i) = m_rsp.DM_READ8(ea + (((16-index) + i) & 0xf)) << 7; |
| 537 | } |
| 538 | } |
| 539 | |
| 540 | static void cfunc_luv(void *param) |
| 541 | { |
| 542 | ((rsp_cop2 *)param)->luv(); |
| 543 | } |
| 544 | |
| 545 | |
| 546 | // LHV |
| 547 | // |
| 548 | // 31 25 20 15 10 6 0 |
| 549 | // -------------------------------------------------- |
| 550 | // | 110010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 551 | // -------------------------------------------------- |
| 552 | // |
| 553 | // Loads a byte as the bits 14-7 of each element, with 2-byte stride |
| 554 | |
| 555 | inline void rsp_cop2_drc::lhv() |
| 556 | { |
| 557 | UINT32 op = m_op; |
| 558 | int dest = (op >> 16) & 0x1f; |
| 559 | int base = (op >> 21) & 0x1f; |
| 560 | int index = (op >> 7) & 0xf; |
| 561 | int offset = (op & 0x7f); |
| 562 | if (offset & 0x40) |
| 563 | { |
| 564 | offset |= 0xffffffc0; |
| 565 | } |
| 566 | |
| 567 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 568 | |
| 569 | for (int i = 0; i < 8; i++) |
| 570 | { |
| 571 | W_VREG_S(dest, i) = m_rsp.DM_READ8(ea + (((16-index) + (i<<1)) & 0xf)) << 7; |
| 572 | } |
| 573 | } |
| 574 | |
| 575 | static void cfunc_lhv(void *param) |
| 576 | { |
| 577 | ((rsp_cop2 *)param)->lhv(); |
| 578 | } |
| 579 | |
| 580 | |
| 581 | // LFV |
| 582 | // 31 25 20 15 10 6 0 |
| 583 | // -------------------------------------------------- |
| 584 | // | 110010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 585 | // -------------------------------------------------- |
| 586 | // |
| 587 | // Loads a byte as the bits 14-7 of upper or lower quad, with 4-byte stride |
| 588 | |
| 589 | inline void rsp_cop2_drc::lfv() |
| 590 | { |
| 591 | UINT32 op = m_op; |
| 592 | int dest = (op >> 16) & 0x1f; |
| 593 | int base = (op >> 21) & 0x1f; |
| 594 | int index = (op >> 7) & 0xf; |
| 595 | int offset = (op & 0x7f); |
| 596 | if (offset & 0x40) |
| 597 | { |
| 598 | offset |= 0xffffffc0; |
| 599 | } |
| 600 | |
| 601 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 602 | |
| 603 | // not sure what happens if 16-byte boundary is crossed... |
| 604 | |
| 605 | int end = (index >> 1) + 4; |
| 606 | |
| 607 | for (int i = index >> 1; i < end; i++) |
| 608 | { |
| 609 | W_VREG_S(dest, i) = m_rsp.DM_READ8(ea) << 7; |
| 610 | ea += 4; |
| 611 | } |
| 612 | } |
| 613 | |
| 614 | static void cfunc_lfv(void *param) |
| 615 | { |
| 616 | ((rsp_cop2 *)param)->lfv(); |
| 617 | } |
| 618 | |
| 619 | |
| 620 | // LWV |
| 621 | // |
| 622 | // 31 25 20 15 10 6 0 |
| 623 | // -------------------------------------------------- |
| 624 | // | 110010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 625 | // -------------------------------------------------- |
| 626 | // |
| 627 | // Loads the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 628 | // after byte index 15 |
| 629 | |
| 630 | inline void rsp_cop2_drc::lwv() |
| 631 | { |
| 632 | UINT32 op = m_op; |
| 633 | int dest = (op >> 16) & 0x1f; |
| 634 | int base = (op >> 21) & 0x1f; |
| 635 | int index = (op >> 7) & 0xf; |
| 636 | int offset = (op & 0x7f); |
| 637 | if (offset & 0x40) |
| 638 | { |
| 639 | offset |= 0xffffffc0; |
| 640 | } |
| 641 | |
| 642 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 643 | int end = (16 - index) + 16; |
| 644 | |
| 645 | for (int i = (16 - index); i < end; i++) |
| 646 | { |
| 647 | VREG_B(dest, i & 0xf) = m_rsp.DM_READ8(ea); |
| 648 | ea += 4; |
| 649 | } |
| 650 | } |
| 651 | |
| 652 | static void cfunc_lwv(void *param) |
| 653 | { |
| 654 | ((rsp_cop2 *)param)->lwv(); |
| 655 | } |
| 656 | |
| 657 | |
| 658 | // LTV |
| 659 | // |
| 660 | // 31 25 20 15 10 6 0 |
| 661 | // -------------------------------------------------- |
| 662 | // | 110010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 663 | // -------------------------------------------------- |
| 664 | // |
| 665 | // Loads one element to maximum of 8 vectors, while incrementing element index |
| 666 | |
| 667 | inline void rsp_cop2_drc::ltv() |
| 668 | { |
| 669 | UINT32 op = m_op; |
| 670 | int dest = (op >> 16) & 0x1f; |
| 671 | int base = (op >> 21) & 0x1f; |
| 672 | int index = (op >> 7) & 0xf; |
| 673 | int offset = (op & 0x7f); |
| 674 | |
| 675 | // FIXME: has a small problem with odd indices |
| 676 | |
| 677 | int vs = dest; |
| 678 | int ve = dest + 8; |
| 679 | if (ve > 32) |
| 680 | { |
| 681 | ve = 32; |
| 682 | } |
| 683 | |
| 684 | int element = 7 - (index >> 1); |
| 685 | |
| 686 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 687 | |
| 688 | ea = ((ea + 8) & ~0xf) + (index & 1); |
| 689 | for (int i = vs; i < ve; i++) |
| 690 | { |
| 691 | element = (8 - (index >> 1) + (i - vs)) << 1; |
| 692 | VREG_B(i, (element & 0xf)) = m_rsp.DM_READ8(ea); |
| 693 | VREG_B(i, ((element + 1) & 0xf)) = m_rsp.DM_READ8(ea + 1); |
| 694 | ea += 2; |
| 695 | } |
| 696 | } |
| 697 | |
| 698 | static void cfunc_ltv(void *param) |
| 699 | { |
| 700 | ((rsp_cop2 *)param)->ltv(); |
| 701 | } |
| 702 | |
| 703 | |
| 704 | int rsp_cop2_drc::generate_lwc2(drcuml_block *block, rsp_device::compiler_state *compiler, const opcode_desc *desc) |
| 705 | { |
| 706 | UINT32 op = desc->opptr.l[0]; |
| 707 | int offset = (op & 0x7f); |
| 708 | if (offset & 0x40) |
| 709 | { |
| 710 | offset |= 0xffffffc0; |
| 711 | } |
| 712 | |
| 713 | switch ((op >> 11) & 0x1f) |
| 714 | { |
| 715 | case 0x00: /* LBV */ |
| 716 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 717 | UML_CALLC(block, cfunc_lbv, this); |
| 718 | return TRUE; |
| 719 | |
| 720 | case 0x01: /* LSV */ |
| 721 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 722 | UML_CALLC(block, cfunc_lsv, this); |
| 723 | return TRUE; |
| 724 | |
| 725 | case 0x02: /* LLV */ |
| 726 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 727 | UML_CALLC(block, cfunc_llv, this); |
| 728 | return TRUE; |
| 729 | |
| 730 | case 0x03: /* LDV */ |
| 731 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 732 | UML_CALLC(block, cfunc_ldv, this); |
| 733 | return TRUE; |
| 734 | |
| 735 | case 0x04: /* LQV */ |
| 736 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 737 | UML_CALLC(block, cfunc_lqv, this); |
| 738 | return TRUE; |
| 739 | |
| 740 | case 0x05: /* LRV */ |
| 741 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 742 | UML_CALLC(block, cfunc_lrv, this); |
| 743 | return TRUE; |
| 744 | |
| 745 | case 0x06: /* LPV */ |
| 746 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 747 | UML_CALLC(block, cfunc_lpv, this); |
| 748 | return TRUE; |
| 749 | |
| 750 | case 0x07: /* LUV */ |
| 751 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 752 | UML_CALLC(block, cfunc_luv, this); |
| 753 | return TRUE; |
| 754 | |
| 755 | case 0x08: /* LHV */ |
| 756 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 757 | UML_CALLC(block, cfunc_lhv, this); |
| 758 | return TRUE; |
| 759 | |
| 760 | case 0x09: /* LFV */ |
| 761 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 762 | UML_CALLC(block, cfunc_lfv, this); |
| 763 | return TRUE; |
| 764 | |
| 765 | case 0x0a: /* LWV */ |
| 766 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 767 | UML_CALLC(block, cfunc_lwv, this); |
| 768 | return TRUE; |
| 769 | |
| 770 | case 0x0b: /* LTV */ |
| 771 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [m_op],desc->opptr.l |
| 772 | UML_CALLC(block, cfunc_ltv, this); |
| 773 | return TRUE; |
| 774 | |
| 775 | default: |
| 776 | return FALSE; |
| 777 | } |
| 778 | } |
| 779 | |
| 780 | |
| 781 | /*************************************************************************** |
| 782 | Vector Store Instructions |
| 783 | ***************************************************************************/ |
| 784 | |
| 785 | // SBV |
| 786 | // |
| 787 | // 31 25 20 15 10 6 0 |
| 788 | // -------------------------------------------------- |
| 789 | // | 111010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 790 | // -------------------------------------------------- |
| 791 | // |
| 792 | // Stores 1 byte from vector byte index |
| 793 | |
| 794 | inline void rsp_cop2_drc::sbv() |
| 795 | { |
| 796 | UINT32 op = m_op; |
| 797 | int dest = (op >> 16) & 0x1f; |
| 798 | int base = (op >> 21) & 0x1f; |
| 799 | int index = (op >> 7) & 0xf; |
| 800 | int offset = (op & 0x7f); |
| 801 | if (offset & 0x40) |
| 802 | { |
| 803 | offset |= 0xffffffc0; |
| 804 | } |
| 805 | |
| 806 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + offset : offset; |
| 807 | m_rsp.DM_WRITE8(ea, VREG_B(dest, index)); |
| 808 | } |
| 809 | |
| 810 | static void cfunc_sbv(void *param) |
| 811 | { |
| 812 | ((rsp_cop2 *)param)->sbv(); |
| 813 | } |
| 814 | |
| 815 | |
| 816 | // SSV |
| 817 | // |
| 818 | // 31 25 20 15 10 6 0 |
| 819 | // -------------------------------------------------- |
| 820 | // | 111010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 821 | // -------------------------------------------------- |
| 822 | // |
| 823 | // Stores 2 bytes starting from vector byte index |
| 824 | |
| 825 | inline void rsp_cop2_drc::ssv() |
| 826 | { |
| 827 | UINT32 op = m_op; |
| 828 | int dest = (op >> 16) & 0x1f; |
| 829 | int base = (op >> 21) & 0x1f; |
| 830 | int index = (op >> 7) & 0xf; |
| 831 | int offset = (op & 0x7f); |
| 832 | if (offset & 0x40) |
| 833 | { |
| 834 | offset |= 0xffffffc0; |
| 835 | } |
| 836 | |
| 837 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 838 | |
| 839 | int end = index + 2; |
| 840 | for (int i = index; i < end; i++) |
| 841 | { |
| 842 | m_rsp.DM_WRITE8(ea, VREG_B(dest, i)); |
| 843 | ea++; |
| 844 | } |
| 845 | } |
| 846 | |
| 847 | static void cfunc_ssv(void *param) |
| 848 | { |
| 849 | ((rsp_cop2 *)param)->ssv(); |
| 850 | } |
| 851 | |
| 852 | |
| 853 | // SLV |
| 854 | // |
| 855 | // 31 25 20 15 10 6 0 |
| 856 | // -------------------------------------------------- |
| 857 | // | 111010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 858 | // -------------------------------------------------- |
| 859 | // |
| 860 | // Stores 4 bytes starting from vector byte index |
| 861 | |
| 862 | inline void rsp_cop2_drc::slv() |
| 863 | { |
| 864 | UINT32 op = m_op; |
| 865 | int dest = (op >> 16) & 0x1f; |
| 866 | int base = (op >> 21) & 0x1f; |
| 867 | int index = (op >> 7) & 0xf; |
| 868 | int offset = (op & 0x7f); |
| 869 | if (offset & 0x40) |
| 870 | { |
| 871 | offset |= 0xffffffc0; |
| 872 | } |
| 873 | |
| 874 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 875 | |
| 876 | int end = index + 4; |
| 877 | for (int i = index; i < end; i++) |
| 878 | { |
| 879 | m_rsp.DM_WRITE8(ea, VREG_B(dest, i)); |
| 880 | ea++; |
| 881 | } |
| 882 | } |
| 883 | |
| 884 | static void cfunc_slv(void *param) |
| 885 | { |
| 886 | ((rsp_cop2 *)param)->slv(); |
| 887 | } |
| 888 | |
| 889 | |
| 890 | // SDV |
| 891 | // |
| 892 | // 31 25 20 15 10 6 0 |
| 893 | // -------------------------------------------------- |
| 894 | // | 111010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 895 | // -------------------------------------------------- |
| 896 | // |
| 897 | // Stores 8 bytes starting from vector byte index |
| 898 | |
| 899 | inline void rsp_cop2_drc::sdv() |
| 900 | { |
| 901 | UINT32 op = m_op; |
| 902 | int dest = (op >> 16) & 0x1f; |
| 903 | int base = (op >> 21) & 0x1f; |
| 904 | int index = (op >> 7) & 0x8; |
| 905 | int offset = (op & 0x7f); |
| 906 | if (offset & 0x40) |
| 907 | { |
| 908 | offset |= 0xffffffc0; |
| 909 | } |
| 910 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 911 | |
| 912 | int end = index + 8; |
| 913 | for (int i = index; i < end; i++) |
| 914 | { |
| 915 | m_rsp.DM_WRITE8(ea, VREG_B(dest, i)); |
| 916 | ea++; |
| 917 | } |
| 918 | } |
| 919 | |
| 920 | static void cfunc_sdv(void *param) |
| 921 | { |
| 922 | ((rsp_cop2 *)param)->sdv(); |
| 923 | } |
| 924 | |
| 925 | |
| 926 | // SQV |
| 927 | // |
| 928 | // 31 25 20 15 10 6 0 |
| 929 | // -------------------------------------------------- |
| 930 | // | 111010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 931 | // -------------------------------------------------- |
| 932 | // |
| 933 | // Stores up to 16 bytes starting from vector byte index until 16-byte boundary |
| 934 | |
| 935 | inline void rsp_cop2_drc::sqv() |
| 936 | { |
| 937 | UINT32 op = m_op; |
| 938 | int dest = (op >> 16) & 0x1f; |
| 939 | int base = (op >> 21) & 0x1f; |
| 940 | int index = (op >> 7) & 0xf; |
| 941 | int offset = (op & 0x7f); |
| 942 | if (offset & 0x40) |
| 943 | { |
| 944 | offset |= 0xffffffc0; |
| 945 | } |
| 946 | |
| 947 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 948 | int end = index + (16 - (ea & 0xf)); |
| 949 | for (int i=index; i < end; i++) |
| 950 | { |
| 951 | m_rsp.DM_WRITE8(ea, VREG_B(dest, i & 0xf)); |
| 952 | ea++; |
| 953 | } |
| 954 | } |
| 955 | |
| 956 | static void cfunc_sqv(void *param) |
| 957 | { |
| 958 | ((rsp_cop2 *)param)->sqv(); |
| 959 | } |
| 960 | |
| 961 | |
| 962 | // SRV |
| 963 | // |
| 964 | // 31 25 20 15 10 6 0 |
| 965 | // -------------------------------------------------- |
| 966 | // | 111010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 967 | // -------------------------------------------------- |
| 968 | // |
| 969 | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 970 | |
| 971 | inline void rsp_cop2_drc::srv() |
| 972 | { |
| 973 | UINT32 op = m_op; |
| 974 | int dest = (op >> 16) & 0x1f; |
| 975 | int base = (op >> 21) & 0x1f; |
| 976 | int index = (op >> 7) & 0xf; |
| 977 | int offset = (op & 0x7f); |
| 978 | if (offset & 0x40) |
| 979 | { |
| 980 | offset |= 0xffffffc0; |
| 981 | } |
| 982 | |
| 983 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 984 | |
| 985 | int end = index + (ea & 0xf); |
| 986 | int o = (16 - (ea & 0xf)) & 0xf; |
| 987 | ea &= ~0xf; |
| 988 | |
| 989 | for (int i = index; i < end; i++) |
| 990 | { |
| 991 | m_rsp.DM_WRITE8(ea, VREG_B(dest, ((i + o) & 0xf))); |
| 992 | ea++; |
| 993 | } |
| 994 | } |
| 995 | |
| 996 | static void cfunc_srv(void *param) |
| 997 | { |
| 998 | ((rsp_cop2 *)param)->srv(); |
| 999 | } |
| 1000 | |
| 1001 | |
| 1002 | // SPV |
| 1003 | // |
| 1004 | // 31 25 20 15 10 6 0 |
| 1005 | // -------------------------------------------------- |
| 1006 | // | 111010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 1007 | // -------------------------------------------------- |
| 1008 | // |
| 1009 | // Stores upper 8 bits of each element |
| 1010 | |
| 1011 | inline void rsp_cop2_drc::spv() |
| 1012 | { |
| 1013 | UINT32 op = m_op; |
| 1014 | int dest = (op >> 16) & 0x1f; |
| 1015 | int base = (op >> 21) & 0x1f; |
| 1016 | int index = (op >> 7) & 0xf; |
| 1017 | int offset = (op & 0x7f); |
| 1018 | if (offset & 0x40) |
| 1019 | { |
| 1020 | offset |= 0xffffffc0; |
| 1021 | } |
| 1022 | |
| 1023 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1024 | int end = index + 8; |
| 1025 | for (int i=index; i < end; i++) |
| 1026 | { |
| 1027 | if ((i & 0xf) < 8) |
| 1028 | { |
| 1029 | m_rsp.DM_WRITE8(ea, VREG_B(dest, (i & 0xf) << 1)); |
| 1030 | } |
| 1031 | else |
| 1032 | { |
| 1033 | m_rsp.DM_WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); |
| 1034 | } |
| 1035 | ea++; |
| 1036 | } |
| 1037 | } |
| 1038 | |
| 1039 | static void cfunc_spv(void *param) |
| 1040 | { |
| 1041 | ((rsp_cop2 *)param)->spv(); |
| 1042 | } |
| 1043 | |
| 1044 | |
| 1045 | // SUV |
| 1046 | // |
| 1047 | // 31 25 20 15 10 6 0 |
| 1048 | // -------------------------------------------------- |
| 1049 | // | 111010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 1050 | // -------------------------------------------------- |
| 1051 | // |
| 1052 | // Stores bits 14-7 of each element |
| 1053 | |
| 1054 | inline void rsp_cop2_drc::suv() |
| 1055 | { |
| 1056 | UINT32 op = m_op; |
| 1057 | int dest = (op >> 16) & 0x1f; |
| 1058 | int base = (op >> 21) & 0x1f; |
| 1059 | int index = (op >> 7) & 0xf; |
| 1060 | int offset = (op & 0x7f); |
| 1061 | if (offset & 0x40) |
| 1062 | { |
| 1063 | offset |= 0xffffffc0; |
| 1064 | } |
| 1065 | |
| 1066 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1067 | int end = index + 8; |
| 1068 | for (int i=index; i < end; i++) |
| 1069 | { |
| 1070 | if ((i & 0xf) < 8) |
| 1071 | { |
| 1072 | m_rsp.DM_WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); |
| 1073 | } |
| 1074 | else |
| 1075 | { |
| 1076 | m_rsp.DM_WRITE8(ea, VREG_B(dest, ((i & 0x7) << 1))); |
| 1077 | } |
| 1078 | ea++; |
| 1079 | } |
| 1080 | } |
| 1081 | |
| 1082 | static void cfunc_suv(void *param) |
| 1083 | { |
| 1084 | ((rsp_cop2 *)param)->suv(); |
| 1085 | } |
| 1086 | |
| 1087 | |
| 1088 | // SHV |
| 1089 | // |
| 1090 | // 31 25 20 15 10 6 0 |
| 1091 | // -------------------------------------------------- |
| 1092 | // | 111010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 1093 | // -------------------------------------------------- |
| 1094 | // |
| 1095 | // Stores bits 14-7 of each element, with 2-byte stride |
| 1096 | |
| 1097 | inline void rsp_cop2_drc::shv() |
| 1098 | { |
| 1099 | UINT32 op = m_op; |
| 1100 | int dest = (op >> 16) & 0x1f; |
| 1101 | int base = (op >> 21) & 0x1f; |
| 1102 | int index = (op >> 7) & 0xf; |
| 1103 | int offset = (op & 0x7f); |
| 1104 | if (offset & 0x40) |
| 1105 | { |
| 1106 | offset |= 0xffffffc0; |
| 1107 | } |
| 1108 | |
| 1109 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1110 | for (int i=0; i < 8; i++) |
| 1111 | { |
| 1112 | int element = index + (i << 1); |
| 1113 | UINT8 d = (VREG_B(dest, (element & 0xf)) << 1) | |
| 1114 | (VREG_B(dest, ((element + 1) & 0xf)) >> 7); |
| 1115 | m_rsp.DM_WRITE8(ea, d); |
| 1116 | ea += 2; |
| 1117 | } |
| 1118 | } |
| 1119 | |
| 1120 | static void cfunc_shv(void *param) |
| 1121 | { |
| 1122 | ((rsp_cop2 *)param)->shv(); |
| 1123 | } |
| 1124 | |
| 1125 | |
| 1126 | // SFV |
| 1127 | // |
| 1128 | // 31 25 20 15 10 6 0 |
| 1129 | // -------------------------------------------------- |
| 1130 | // | 111010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 1131 | // -------------------------------------------------- |
| 1132 | // |
| 1133 | // Stores bits 14-7 of upper or lower quad, with 4-byte stride |
| 1134 | |
| 1135 | inline void rsp_cop2_drc::sfv() |
| 1136 | { |
| 1137 | UINT32 op = m_op; |
| 1138 | int dest = (op >> 16) & 0x1f; |
| 1139 | int base = (op >> 21) & 0x1f; |
| 1140 | int index = (op >> 7) & 0xf; |
| 1141 | int offset = (op & 0x7f); |
| 1142 | if (offset & 0x40) |
| 1143 | { |
| 1144 | offset |= 0xffffffc0; |
| 1145 | } |
| 1146 | |
| 1147 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1148 | int eaoffset = ea & 0xf; |
| 1149 | ea &= ~0xf; |
| 1150 | |
| 1151 | int end = (index >> 1) + 4; |
| 1152 | |
| 1153 | for (int i = index>>1; i < end; i++) |
| 1154 | { |
| 1155 | m_rsp.DM_WRITE8(ea + (eaoffset & 0xf), VREG_S(dest, i) >> 7); |
| 1156 | eaoffset += 4; |
| 1157 | } |
| 1158 | } |
| 1159 | |
| 1160 | static void cfunc_sfv(void *param) |
| 1161 | { |
| 1162 | ((rsp_cop2 *)param)->sfv(); |
| 1163 | } |
| 1164 | |
| 1165 | |
| 1166 | // SWV |
| 1167 | // |
| 1168 | // 31 25 20 15 10 6 0 |
| 1169 | // -------------------------------------------------- |
| 1170 | // | 111010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 1171 | // -------------------------------------------------- |
| 1172 | // |
| 1173 | // Stores the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 1174 | // after byte index 15 |
| 1175 | |
| 1176 | inline void rsp_cop2_drc::swv() |
| 1177 | { |
| 1178 | UINT32 op = m_op; |
| 1179 | int dest = (op >> 16) & 0x1f; |
| 1180 | int base = (op >> 21) & 0x1f; |
| 1181 | int index = (op >> 7) & 0xf; |
| 1182 | int offset = (op & 0x7f); |
| 1183 | if (offset & 0x40) |
| 1184 | { |
| 1185 | offset |= 0xffffffc0; |
| 1186 | } |
| 1187 | |
| 1188 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1189 | int eaoffset = ea & 0xf; |
| 1190 | ea &= ~0xf; |
| 1191 | |
| 1192 | int end = index + 16; |
| 1193 | for (int i = index; i < end; i++) |
| 1194 | { |
| 1195 | m_rsp.DM_WRITE8(ea + (eaoffset & 0xf), VREG_B(dest, i & 0xf)); |
| 1196 | eaoffset++; |
| 1197 | } |
| 1198 | } |
| 1199 | |
| 1200 | static void cfunc_swv(void *param) |
| 1201 | { |
| 1202 | ((rsp_cop2 *)param)->swv(); |
| 1203 | } |
| 1204 | |
| 1205 | |
| 1206 | // STV |
| 1207 | // |
| 1208 | // 31 25 20 15 10 6 0 |
| 1209 | // -------------------------------------------------- |
| 1210 | // | 111010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 1211 | // -------------------------------------------------- |
| 1212 | // |
| 1213 | // Stores one element from maximum of 8 vectors, while incrementing element index |
| 1214 | |
| 1215 | inline void rsp_cop2_drc::stv() |
| 1216 | { |
| 1217 | UINT32 op = m_op; |
| 1218 | int dest = (op >> 16) & 0x1f; |
| 1219 | int base = (op >> 21) & 0x1f; |
| 1220 | int index = (op >> 7) & 0xf; |
| 1221 | int offset = (op & 0x7f); |
| 1222 | |
| 1223 | if (offset & 0x40) |
| 1224 | { |
| 1225 | offset |= 0xffffffc0; |
| 1226 | } |
| 1227 | |
| 1228 | int vs = dest; |
| 1229 | int ve = dest + 8; |
| 1230 | if (ve > 32) |
| 1231 | { |
| 1232 | ve = 32; |
| 1233 | } |
| 1234 | |
| 1235 | int element = 8 - (index >> 1); |
| 1236 | |
| 1237 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1238 | int eaoffset = (ea & 0xf) + (element * 2); |
| 1239 | ea &= ~0xf; |
| 1240 | |
| 1241 | for (int i = vs; i < ve; i++) |
| 1242 | { |
| 1243 | m_rsp.DM_WRITE16(ea + (eaoffset & 0xf), VREG_S(i, element & 0x7)); |
| 1244 | eaoffset += 2; |
| 1245 | element++; |
| 1246 | } |
| 1247 | } |
| 1248 | |
| 1249 | static void cfunc_stv(void *param) |
| 1250 | { |
| 1251 | ((rsp_cop2 *)param)->stv(); |
| 1252 | } |
| 1253 | |
| 1254 | int rsp_cop2_drc::generate_swc2(drcuml_block *block, rsp_device::compiler_state *compiler, const opcode_desc *desc) |
| 1255 | { |
| 1256 | UINT32 op = desc->opptr.l[0]; |
| 1257 | int offset = (op & 0x7f); |
| 1258 | if (offset & 0x40) |
| 1259 | { |
| 1260 | offset |= 0xffffffc0; |
| 1261 | } |
| 1262 | |
| 1263 | switch ((op >> 11) & 0x1f) |
| 1264 | { |
| 1265 | case 0x00: /* SBV */ |
| 1266 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1267 | UML_CALLC(block, cfunc_sbv, this); |
| 1268 | return TRUE; |
| 1269 | |
| 1270 | case 0x01: /* SSV */ |
| 1271 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1272 | UML_CALLC(block, cfunc_ssv, this); |
| 1273 | return TRUE; |
| 1274 | |
| 1275 | case 0x02: /* SLV */ |
| 1276 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1277 | UML_CALLC(block, cfunc_slv, this); |
| 1278 | return TRUE; |
| 1279 | |
| 1280 | case 0x03: /* SDV */ |
| 1281 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1282 | UML_CALLC(block, cfunc_sdv, this); |
| 1283 | return TRUE; |
| 1284 | |
| 1285 | case 0x04: /* SQV */ |
| 1286 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1287 | UML_CALLC(block, cfunc_sqv, this); |
| 1288 | return TRUE; |
| 1289 | |
| 1290 | case 0x05: /* SRV */ |
| 1291 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1292 | UML_CALLC(block, cfunc_srv, this); |
| 1293 | return TRUE; |
| 1294 | |
| 1295 | case 0x06: /* SPV */ |
| 1296 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1297 | UML_CALLC(block, cfunc_spv, this); |
| 1298 | return TRUE; |
| 1299 | |
| 1300 | case 0x07: /* SUV */ |
| 1301 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1302 | UML_CALLC(block, cfunc_suv, this); |
| 1303 | return TRUE; |
| 1304 | |
| 1305 | case 0x08: /* SHV */ |
| 1306 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1307 | UML_CALLC(block, cfunc_shv, this); |
| 1308 | return TRUE; |
| 1309 | |
| 1310 | case 0x09: /* SFV */ |
| 1311 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1312 | UML_CALLC(block, cfunc_sfv, this); |
| 1313 | return TRUE; |
| 1314 | |
| 1315 | case 0x0a: /* SWV */ |
| 1316 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1317 | UML_CALLC(block, cfunc_swv, this); |
| 1318 | return TRUE; |
| 1319 | |
| 1320 | case 0x0b: /* STV */ |
| 1321 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1322 | UML_CALLC(block, cfunc_stv, this); |
| 1323 | return TRUE; |
| 1324 | |
| 1325 | default: |
| 1326 | m_rsp.unimplemented_opcode(op); |
| 1327 | return FALSE; |
| 1328 | } |
| 1329 | |
| 1330 | return TRUE; |
| 1331 | } |
| 1332 | |
| 1333 | |
| 1334 | /*************************************************************************** |
| 1335 | Vector Opcodes |
| 1336 | ***************************************************************************/ |
| 1337 | |
| 1338 | // VMULF |
| 1339 | // |
| 1340 | // 31 25 24 20 15 10 5 0 |
| 1341 | // ------------------------------------------------------ |
| 1342 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000000 | |
| 1343 | // ------------------------------------------------------ |
| 1344 | // |
| 1345 | // Multiplies signed integer by signed integer * 2 |
| 1346 | |
| 1347 | inline void rsp_cop2_drc::vmulf() |
| 1348 | { |
| 1349 | int op = m_op; |
| 1350 | |
| 1351 | for (int i = 0; i < 8; i++) |
| 1352 | { |
| 1353 | UINT16 w1, w2; |
| 1354 | GET_VS1(w1, i); |
| 1355 | GET_VS2(w2, i); |
| 1356 | INT32 s1 = (INT32)(INT16)w1; |
| 1357 | INT32 s2 = (INT32)(INT16)w2; |
| 1358 | |
| 1359 | if (s1 == -32768 && s2 == -32768) |
| 1360 | { |
| 1361 | // overflow |
| 1362 | SET_ACCUM_H(0, i); |
| 1363 | SET_ACCUM_M(-32768, i); |
| 1364 | SET_ACCUM_L(-32768, i); |
| 1365 | m_vres[i] = 0x7fff; |
| 1366 | } |
| 1367 | else |
| 1368 | { |
| 1369 | INT64 r = s1 * s2 * 2; |
| 1370 | r += 0x8000; // rounding ? |
| 1371 | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); |
| 1372 | SET_ACCUM_M((INT16)(r >> 16), i); |
| 1373 | SET_ACCUM_L((UINT16)(r), i); |
| 1374 | m_vres[i] = ACCUM_M(i); |
| 1375 | } |
| 1376 | } |
| 1377 | WRITEBACK_RESULT(); |
| 1378 | } |
| 1379 | |
| 1380 | static void cfunc_vmulf(void *param) |
| 1381 | { |
| 1382 | ((rsp_cop2 *)param)->vmulf(); |
| 1383 | } |
| 1384 | |
| 1385 | |
| 1386 | // VMULU |
| 1387 | // |
| 1388 | // 31 25 24 20 15 10 5 0 |
| 1389 | // ------------------------------------------------------ |
| 1390 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000001 | |
| 1391 | // ------------------------------------------------------ |
| 1392 | // |
| 1393 | |
| 1394 | inline void rsp_cop2_drc::vmulu() |
| 1395 | { |
| 1396 | int op = m_op; |
| 1397 | |
| 1398 | for (int i = 0; i < 8; i++) |
| 1399 | { |
| 1400 | UINT16 w1, w2; |
| 1401 | GET_VS1(w1, i); |
| 1402 | GET_VS2(w2, i); |
| 1403 | INT32 s1 = (INT32)(INT16)w1; |
| 1404 | INT32 s2 = (INT32)(INT16)w2; |
| 1405 | |
| 1406 | INT64 r = s1 * s2 * 2; |
| 1407 | r += 0x8000; // rounding ? |
| 1408 | |
| 1409 | SET_ACCUM_H((UINT16)(r >> 32), i); |
| 1410 | SET_ACCUM_M((UINT16)(r >> 16), i); |
| 1411 | SET_ACCUM_L((UINT16)(r), i); |
| 1412 | |
| 1413 | if (r < 0) |
| 1414 | { |
| 1415 | m_vres[i] = 0; |
| 1416 | } |
| 1417 | else if (((INT16)(ACCUM_H(i)) ^ (INT16)(ACCUM_M(i))) < 0) |
| 1418 | { |
| 1419 | m_vres[i] = -1; |
| 1420 | } |
| 1421 | else |
| 1422 | { |
| 1423 | m_vres[i] = ACCUM_M(i); |
| 1424 | } |
| 1425 | } |
| 1426 | WRITEBACK_RESULT(); |
| 1427 | } |
| 1428 | |
| 1429 | static void cfunc_vmulu(void *param) |
| 1430 | { |
| 1431 | ((rsp_cop2 *)param)->vmulu(); |
| 1432 | } |
| 1433 | |
| 1434 | |
| 1435 | // VMUDL |
| 1436 | // |
| 1437 | // 31 25 24 20 15 10 5 0 |
| 1438 | // ------------------------------------------------------ |
| 1439 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 | |
| 1440 | // ------------------------------------------------------ |
| 1441 | // |
| 1442 | // Multiplies signed integer by unsigned fraction |
| 1443 | // The result is added into accumulator |
| 1444 | // The middle slice of accumulator is stored into destination element |
| 1445 | |
| 1446 | inline void rsp_cop2_drc::vmudl() |
| 1447 | { |
| 1448 | int op = m_op; |
| 1449 | |
| 1450 | for (int i = 0; i < 8; i++) |
| 1451 | { |
| 1452 | UINT16 w1, w2; |
| 1453 | GET_VS1(w1, i); |
| 1454 | GET_VS2(w2, i); |
| 1455 | UINT32 s1 = (UINT32)(UINT16)w1; |
| 1456 | UINT32 s2 = (UINT32)(UINT16)w2; |
| 1457 | |
| 1458 | UINT32 r = s1 * s2; |
| 1459 | |
| 1460 | SET_ACCUM_H(0, i); |
| 1461 | SET_ACCUM_M(0, i); |
| 1462 | SET_ACCUM_L((UINT16)(r >> 16), i); |
| 1463 | |
| 1464 | m_vres[i] = ACCUM_L(i); |
| 1465 | } |
| 1466 | WRITEBACK_RESULT(); |
| 1467 | } |
| 1468 | |
| 1469 | static void cfunc_vmudl(void *param) |
| 1470 | { |
| 1471 | ((rsp_cop2 *)param)->vmudl(); |
| 1472 | } |
| 1473 | |
| 1474 | |
| 1475 | // VMUDM |
| 1476 | // |
| 1477 | // 31 25 24 20 15 10 5 0 |
| 1478 | // ------------------------------------------------------ |
| 1479 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000101 | |
| 1480 | // ------------------------------------------------------ |
| 1481 | // |
| 1482 | // Multiplies signed integer by unsigned fraction |
| 1483 | // The result is stored into accumulator |
| 1484 | // The middle slice of accumulator is stored into destination element |
| 1485 | |
| 1486 | inline void rsp_cop2_drc::vmudm() |
| 1487 | { |
| 1488 | int op = m_op; |
| 1489 | |
| 1490 | for (int i = 0; i < 8; i++) |
| 1491 | { |
| 1492 | UINT16 w1, w2; |
| 1493 | GET_VS1(w1, i); |
| 1494 | GET_VS2(w2, i); |
| 1495 | INT32 s1 = (INT32)(INT16)w1; |
| 1496 | INT32 s2 = (UINT16)w2; |
| 1497 | |
| 1498 | INT32 r = s1 * s2; |
| 1499 | |
| 1500 | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); // sign-extend to 48-bit |
| 1501 | SET_ACCUM_M((INT16)(r >> 16), i); |
| 1502 | SET_ACCUM_L((UINT16)r, i); |
| 1503 | |
| 1504 | m_vres[i] = ACCUM_M(i); |
| 1505 | } |
| 1506 | WRITEBACK_RESULT(); |
| 1507 | } |
| 1508 | |
| 1509 | static void cfunc_vmudm(void *param) |
| 1510 | { |
| 1511 | ((rsp_cop2 *)param)->vmudm(); |
| 1512 | } |
| 1513 | |
| 1514 | |
| 1515 | // VMUDN |
| 1516 | // |
| 1517 | // 31 25 24 20 15 10 5 0 |
| 1518 | // ------------------------------------------------------ |
| 1519 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000110 | |
| 1520 | // ------------------------------------------------------ |
| 1521 | // |
| 1522 | // Multiplies unsigned fraction by signed integer |
| 1523 | // The result is stored into accumulator |
| 1524 | // The low slice of accumulator is stored into destination element |
| 1525 | |
| 1526 | inline void rsp_cop2_drc::vmudn() |
| 1527 | { |
| 1528 | int op = m_op; |
| 1529 | |
| 1530 | for (int i = 0; i < 8; i++) |
| 1531 | { |
| 1532 | UINT16 w1, w2; |
| 1533 | GET_VS1(w1, i); |
| 1534 | GET_VS2(w2, i); |
| 1535 | INT32 s1 = (UINT16)w1; |
| 1536 | INT32 s2 = (INT32)(INT16)w2; |
| 1537 | |
| 1538 | INT32 r = s1 * s2; |
| 1539 | |
| 1540 | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); // sign-extend to 48-bit |
| 1541 | SET_ACCUM_M((INT16)(r >> 16), i); |
| 1542 | SET_ACCUM_L((UINT16)(r), i); |
| 1543 | |
| 1544 | m_vres[i] = (UINT16)(r); |
| 1545 | } |
| 1546 | WRITEBACK_RESULT(); |
| 1547 | } |
| 1548 | |
| 1549 | static void cfunc_vmudn(void *param) |
| 1550 | { |
| 1551 | ((rsp_cop2 *)param)->vmudn(); |
| 1552 | } |
| 1553 | |
| 1554 | |
| 1555 | // VMUDH |
| 1556 | // |
| 1557 | // 31 25 24 20 15 10 5 0 |
| 1558 | // ------------------------------------------------------ |
| 1559 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000111 | |
| 1560 | // ------------------------------------------------------ |
| 1561 | // |
| 1562 | // Multiplies signed integer by signed integer |
| 1563 | // The result is stored into highest 32 bits of accumulator, the low slice is zero |
| 1564 | // The highest 32 bits of accumulator is saturated into destination element |
| 1565 | |
| 1566 | inline void rsp_cop2_drc::vmudh() |
| 1567 | { |
| 1568 | int op = m_op; |
| 1569 | |
| 1570 | for (int i = 0; i < 8; i++) |
| 1571 | { |
| 1572 | UINT16 w1, w2; |
| 1573 | GET_VS1(w1, i); |
| 1574 | GET_VS2(w2, i); |
| 1575 | INT32 s1 = (INT32)(INT16)w1; |
| 1576 | INT32 s2 = (INT32)(INT16)w2; |
| 1577 | |
| 1578 | INT32 r = s1 * s2; |
| 1579 | |
| 1580 | SET_ACCUM_H((INT16)(r >> 16), i); |
| 1581 | SET_ACCUM_M((UINT16)(r), i); |
| 1582 | SET_ACCUM_L(0, i); |
| 1583 | |
| 1584 | if (r < -32768) r = -32768; |
| 1585 | if (r > 32767) r = 32767; |
| 1586 | m_vres[i] = (INT16)(r); |
| 1587 | } |
| 1588 | WRITEBACK_RESULT(); |
| 1589 | } |
| 1590 | |
| 1591 | static void cfunc_vmudh(void *param) |
| 1592 | { |
| 1593 | ((rsp_cop2 *)param)->vmudh(); |
| 1594 | } |
| 1595 | |
| 1596 | |
| 1597 | // VMACF |
| 1598 | // |
| 1599 | // 31 25 24 20 15 10 5 0 |
| 1600 | // ------------------------------------------------------ |
| 1601 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001000 | |
| 1602 | // ------------------------------------------------------ |
| 1603 | // |
| 1604 | |
| 1605 | inline void rsp_cop2_drc::vmacf() |
| 1606 | { |
| 1607 | int op = m_op; |
| 1608 | |
| 1609 | for (int i = 0; i < 8; i++) |
| 1610 | { |
| 1611 | UINT16 w1, w2; |
| 1612 | GET_VS1(w1, i); |
| 1613 | GET_VS2(w2, i); |
| 1614 | INT32 s1 = (INT32)(INT16)w1; |
| 1615 | INT32 s2 = (INT32)(INT16)w2; |
| 1616 | |
| 1617 | INT32 r = s1 * s2; |
| 1618 | |
| 1619 | UINT64 q = (UINT64)(UINT16)ACCUM_LL(i); |
| 1620 | q |= (((UINT64)(UINT16)ACCUM_L(i)) << 16); |
| 1621 | q |= (((UINT64)(UINT16)ACCUM_M(i)) << 32); |
| 1622 | q |= (((UINT64)(UINT16)ACCUM_H(i)) << 48); |
| 1623 | |
| 1624 | q += (INT64)(r) << 17; |
| 1625 | SET_ACCUM_LL((UINT16)q, i); |
| 1626 | SET_ACCUM_L((UINT16)(q >> 16), i); |
| 1627 | SET_ACCUM_M((UINT16)(q >> 32), i); |
| 1628 | SET_ACCUM_H((UINT16)(q >> 48), i); |
| 1629 | |
| 1630 | m_vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1631 | } |
| 1632 | WRITEBACK_RESULT(); |
| 1633 | } |
| 1634 | |
| 1635 | static void cfunc_vmacf(void *param) |
| 1636 | { |
| 1637 | ((rsp_cop2 *)param)->vmacf(); |
| 1638 | } |
| 1639 | |
| 1640 | |
| 1641 | // VMACU |
| 1642 | // |
| 1643 | // 31 25 24 20 15 10 5 0 |
| 1644 | // ------------------------------------------------------ |
| 1645 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001001 | |
| 1646 | // ------------------------------------------------------ |
| 1647 | // |
| 1648 | |
| 1649 | inline void rsp_cop2_drc::vmacu() |
| 1650 | { |
| 1651 | int op = m_op; |
| 1652 | |
| 1653 | for (int i = 0; i < 8; i++) |
| 1654 | { |
| 1655 | UINT16 w1, w2; |
| 1656 | GET_VS1(w1, i); |
| 1657 | GET_VS2(w2, i); |
| 1658 | INT32 s1 = (INT32)(INT16)w1; |
| 1659 | INT32 s2 = (INT32)(INT16)w2; |
| 1660 | |
| 1661 | INT32 r1 = s1 * s2; |
| 1662 | UINT32 r2 = (UINT16)ACCUM_L(i) + ((UINT16)(r1) * 2); |
| 1663 | UINT32 r3 = (UINT16)ACCUM_M(i) + (UINT16)((r1 >> 16) * 2) + (UINT16)(r2 >> 16); |
| 1664 | |
| 1665 | SET_ACCUM_L((UINT16)(r2), i); |
| 1666 | SET_ACCUM_M((UINT16)(r3), i); |
| 1667 | SET_ACCUM_H(ACCUM_H(i) + (UINT16)(r3 >> 16) + (UINT16)(r1 >> 31), i); |
| 1668 | |
| 1669 | if ((INT16)ACCUM_H(i) < 0) |
| 1670 | { |
| 1671 | m_vres[i] = 0; |
| 1672 | } |
| 1673 | else |
| 1674 | { |
| 1675 | if (ACCUM_H(i) != 0) |
| 1676 | { |
| 1677 | m_vres[i] = (INT16)0xffff; |
| 1678 | } |
| 1679 | else |
| 1680 | { |
| 1681 | if ((INT16)ACCUM_M(i) < 0) |
| 1682 | { |
| 1683 | m_vres[i] = (INT16)0xffff; |
| 1684 | } |
| 1685 | else |
| 1686 | { |
| 1687 | m_vres[i] = ACCUM_M(i); |
| 1688 | } |
| 1689 | } |
| 1690 | } |
| 1691 | } |
| 1692 | WRITEBACK_RESULT(); |
| 1693 | } |
| 1694 | |
| 1695 | static void cfunc_vmacu(void *param) |
| 1696 | { |
| 1697 | ((rsp_cop2 *)param)->vmacu(); |
| 1698 | } |
| 1699 | |
| 1700 | |
| 1701 | // VMADL |
| 1702 | // |
| 1703 | // 31 25 24 20 15 10 5 0 |
| 1704 | // ------------------------------------------------------ |
| 1705 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001100 | |
| 1706 | // ------------------------------------------------------ |
| 1707 | // |
| 1708 | // Multiplies unsigned fraction by unsigned fraction |
| 1709 | // Adds the higher 16 bits of the 32-bit result to accumulator |
| 1710 | // The low slice of accumulator is stored into destination element |
| 1711 | |
| 1712 | inline void rsp_cop2_drc::vmadl() |
| 1713 | { |
| 1714 | int op = m_op; |
| 1715 | |
| 1716 | for (int i = 0; i < 8; i++) |
| 1717 | { |
| 1718 | UINT16 w1, w2; |
| 1719 | GET_VS1(w1, i); |
| 1720 | GET_VS2(w2, i); |
| 1721 | UINT32 s1 = w1; |
| 1722 | UINT32 s2 = w2; |
| 1723 | |
| 1724 | UINT32 r1 = s1 * s2; |
| 1725 | UINT32 r2 = (UINT16)ACCUM_L(i) + (r1 >> 16); |
| 1726 | UINT32 r3 = (UINT16)ACCUM_M(i) + (r2 >> 16); |
| 1727 | |
| 1728 | SET_ACCUM_L((UINT16)r2, i); |
| 1729 | SET_ACCUM_M((UINT16)r3, i); |
| 1730 | SET_ACCUM_H(ACCUM_H(i) + (INT16)(r3 >> 16), i); |
| 1731 | |
| 1732 | m_vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 1733 | } |
| 1734 | WRITEBACK_RESULT(); |
| 1735 | } |
| 1736 | |
| 1737 | static void cfunc_vmadl(void *param) |
| 1738 | { |
| 1739 | ((rsp_cop2 *)param)->vmadl(); |
| 1740 | } |
| 1741 | |
| 1742 | |
| 1743 | // VMADM |
| 1744 | // |
| 1745 | |
| 1746 | inline void rsp_cop2_drc::vmadm() |
| 1747 | { |
| 1748 | int op = m_op; |
| 1749 | |
| 1750 | for (int i = 0; i < 8; i++) |
| 1751 | { |
| 1752 | UINT16 w1, w2; |
| 1753 | GET_VS1(w1, i); |
| 1754 | GET_VS2(w2, i); |
| 1755 | UINT32 s1 = (INT32)(INT16)w1; |
| 1756 | UINT32 s2 = (UINT16)w2; |
| 1757 | |
| 1758 | UINT32 r1 = s1 * s2; |
| 1759 | UINT32 r2 = (UINT16)ACCUM_L(i) + (UINT16)(r1); |
| 1760 | UINT32 r3 = (UINT16)ACCUM_M(i) + (r1 >> 16) + (r2 >> 16); |
| 1761 | |
| 1762 | SET_ACCUM_L((UINT16)r2, i); |
| 1763 | SET_ACCUM_M((UINT16)r3, i); |
| 1764 | SET_ACCUM_H((UINT16)ACCUM_H(i) + (UINT16)(r3 >> 16), i); |
| 1765 | if ((INT32)(r1) < 0) |
| 1766 | { |
| 1767 | SET_ACCUM_H((UINT16)ACCUM_H(i) - 1, i); |
| 1768 | } |
| 1769 | |
| 1770 | m_vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1771 | } |
| 1772 | WRITEBACK_RESULT(); |
| 1773 | } |
| 1774 | |
| 1775 | static void cfunc_vmadm(void *param) |
| 1776 | { |
| 1777 | ((rsp_cop2 *)param)->vmadm(); |
| 1778 | } |
| 1779 | |
| 1780 | |
| 1781 | // VMADN |
| 1782 | // |
| 1783 | |
| 1784 | inline void rsp_cop2_drc::vmadn() |
| 1785 | { |
| 1786 | int op = m_op; |
| 1787 | |
| 1788 | for (int i = 0; i < 8; i++) |
| 1789 | { |
| 1790 | UINT16 w1, w2; |
| 1791 | GET_VS1(w1, i); |
| 1792 | GET_VS2(w2, i); |
| 1793 | INT32 s1 = (UINT16)w1; |
| 1794 | INT32 s2 = (INT32)(INT16)w2; |
| 1795 | |
| 1796 | UINT64 q = (UINT64)ACCUM_LL(i); |
| 1797 | q |= (((UINT64)ACCUM_L(i)) << 16); |
| 1798 | q |= (((UINT64)ACCUM_M(i)) << 32); |
| 1799 | q |= (((UINT64)ACCUM_H(i)) << 48); |
| 1800 | q += (INT64)(s1*s2) << 16; |
| 1801 | |
| 1802 | SET_ACCUM_LL((UINT16)q, i); |
| 1803 | SET_ACCUM_L((UINT16)(q >> 16), i); |
| 1804 | SET_ACCUM_M((UINT16)(q >> 32), i); |
| 1805 | SET_ACCUM_H((UINT16)(q >> 48), i); |
| 1806 | |
| 1807 | m_vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 1808 | } |
| 1809 | WRITEBACK_RESULT(); |
| 1810 | } |
| 1811 | |
| 1812 | static void cfunc_vmadn(void *param) |
| 1813 | { |
| 1814 | ((rsp_cop2 *)param)->vmadn(); |
| 1815 | } |
| 1816 | |
| 1817 | |
| 1818 | // VMADH |
| 1819 | // |
| 1820 | // 31 25 24 20 15 10 5 0 |
| 1821 | // ------------------------------------------------------ |
| 1822 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001111 | |
| 1823 | // ------------------------------------------------------ |
| 1824 | // |
| 1825 | // Multiplies signed integer by signed integer |
| 1826 | // The result is added into highest 32 bits of accumulator, the low slice is zero |
| 1827 | // The highest 32 bits of accumulator is saturated into destination element |
| 1828 | |
| 1829 | inline void rsp_cop2_drc::vmadh() |
| 1830 | { |
| 1831 | int op = m_op; |
| 1832 | |
| 1833 | for (int i = 0; i < 8; i++) |
| 1834 | { |
| 1835 | INT16 w1, w2; |
| 1836 | GET_VS1(w1, i); |
| 1837 | GET_VS2(w2, i); |
| 1838 | INT32 s1 = (INT32)(INT16)w1; |
| 1839 | INT32 s2 = (INT32)(INT16)w2; |
| 1840 | |
| 1841 | INT32 accum = (UINT32)(UINT16)ACCUM_M(i); |
| 1842 | accum |= ((UINT32)((UINT16)ACCUM_H(i))) << 16; |
| 1843 | accum += s1 * s2; |
| 1844 | |
| 1845 | SET_ACCUM_H((UINT16)(accum >> 16), i); |
| 1846 | SET_ACCUM_M((UINT16)accum, i); |
| 1847 | |
| 1848 | m_vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1849 | } |
| 1850 | WRITEBACK_RESULT(); |
| 1851 | } |
| 1852 | |
| 1853 | static void cfunc_vmadh(void *param) |
| 1854 | { |
| 1855 | ((rsp_cop2 *)param)->vmadh(); |
| 1856 | } |
| 1857 | |
| 1858 | |
| 1859 | // VADD |
| 1860 | // 31 25 24 20 15 10 5 0 |
| 1861 | // ------------------------------------------------------ |
| 1862 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010000 | |
| 1863 | // ------------------------------------------------------ |
| 1864 | // |
| 1865 | // Adds two vector registers and carry flag, the result is saturated to 32767 |
| 1866 | |
| 1867 | inline void rsp_cop2_drc::vadd() |
| 1868 | { |
| 1869 | int op = m_op; |
| 1870 | |
| 1871 | for (int i = 0; i < 8; i++) |
| 1872 | { |
| 1873 | INT16 w1, w2; |
| 1874 | GET_VS1(w1, i); |
| 1875 | GET_VS2(w2, i); |
| 1876 | INT32 s1 = (INT32)(INT16)w1; |
| 1877 | INT32 s2 = (INT32)(INT16)w2; |
| 1878 | INT32 r = s1 + s2 + (((CARRY_FLAG(i)) != 0) ? 1 : 0); |
| 1879 | |
| 1880 | SET_ACCUM_L((INT16)(r), i); |
| 1881 | |
| 1882 | if (r > 32767) r = 32767; |
| 1883 | if (r < -32768) r = -32768; |
| 1884 | m_vres[i] = (INT16)(r); |
| 1885 | } |
| 1886 | CLEAR_ZERO_FLAGS(); |
| 1887 | CLEAR_CARRY_FLAGS(); |
| 1888 | WRITEBACK_RESULT(); |
| 1889 | } |
| 1890 | |
| 1891 | static void cfunc_vadd(void *param) |
| 1892 | { |
| 1893 | ((rsp_cop2 *)param)->vadd(); |
| 1894 | } |
| 1895 | |
| 1896 | |
| 1897 | // VSUB |
| 1898 | // |
| 1899 | // 31 25 24 20 15 10 5 0 |
| 1900 | // ------------------------------------------------------ |
| 1901 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010001 | |
| 1902 | // ------------------------------------------------------ |
| 1903 | // |
| 1904 | // Subtracts two vector registers and carry flag, the result is saturated to -32768 |
| 1905 | // TODO: check VS2REG == VDREG |
| 1906 | |
| 1907 | inline void rsp_cop2_drc::vsub() |
| 1908 | { |
| 1909 | int op = m_op; |
| 1910 | |
| 1911 | for (int i = 0; i < 8; i++) |
| 1912 | { |
| 1913 | INT16 w1, w2; |
| 1914 | GET_VS1(w1, i); |
| 1915 | GET_VS2(w2, i); |
| 1916 | INT32 s1 = (INT32)(INT16)w1; |
| 1917 | INT32 s2 = (INT32)(INT16)w2; |
| 1918 | INT32 r = s1 - s2 - (((CARRY_FLAG(i)) != 0) ? 1 : 0); |
| 1919 | |
| 1920 | SET_ACCUM_L((INT16)(r), i); |
| 1921 | |
| 1922 | if (r > 32767) r = 32767; |
| 1923 | if (r < -32768) r = -32768; |
| 1924 | |
| 1925 | m_vres[i] = (INT16)(r); |
| 1926 | } |
| 1927 | CLEAR_ZERO_FLAGS(); |
| 1928 | CLEAR_CARRY_FLAGS(); |
| 1929 | WRITEBACK_RESULT(); |
| 1930 | } |
| 1931 | |
| 1932 | static void cfunc_vsub(void *param) |
| 1933 | { |
| 1934 | ((rsp_cop2 *)param)->vsub(); |
| 1935 | } |
| 1936 | |
| 1937 | |
| 1938 | // VABS |
| 1939 | // |
| 1940 | // 31 25 24 20 15 10 5 0 |
| 1941 | // ------------------------------------------------------ |
| 1942 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010011 | |
| 1943 | // ------------------------------------------------------ |
| 1944 | // |
| 1945 | // Changes the sign of source register 2 if source register 1 is negative and stores the result to destination register |
| 1946 | |
| 1947 | inline void rsp_cop2_drc::vabs() |
| 1948 | { |
| 1949 | int op = m_op; |
| 1950 | |
| 1951 | for (int i = 0; i < 8; i++) |
| 1952 | { |
| 1953 | INT16 s1, s2; |
| 1954 | GET_VS1(s1, i); |
| 1955 | GET_VS2(s2, i); |
| 1956 | |
| 1957 | if (s1 < 0) |
| 1958 | { |
| 1959 | if (s2 == -32768) |
| 1960 | { |
| 1961 | m_vres[i] = 32767; |
| 1962 | } |
| 1963 | else |
| 1964 | { |
| 1965 | m_vres[i] = -s2; |
| 1966 | } |
| 1967 | } |
| 1968 | else if (s1 > 0) |
| 1969 | { |
| 1970 | m_vres[i] = s2; |
| 1971 | } |
| 1972 | else |
| 1973 | { |
| 1974 | m_vres[i] = 0; |
| 1975 | } |
| 1976 | |
| 1977 | SET_ACCUM_L(m_vres[i], i); |
| 1978 | } |
| 1979 | WRITEBACK_RESULT(); |
| 1980 | } |
| 1981 | |
| 1982 | static void cfunc_vabs(void *param) |
| 1983 | { |
| 1984 | ((rsp_cop2 *)param)->vabs(); |
| 1985 | } |
| 1986 | |
| 1987 | |
| 1988 | // VADDC |
| 1989 | // |
| 1990 | // 31 25 24 20 15 10 5 0 |
| 1991 | // ------------------------------------------------------ |
| 1992 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010100 | |
| 1993 | // ------------------------------------------------------ |
| 1994 | // |
| 1995 | // Adds two vector registers, the carry out is stored into carry register |
| 1996 | // TODO: check VS2REG = VDREG |
| 1997 | |
| 1998 | inline void rsp_cop2_drc::vaddc() |
| 1999 | { |
| 2000 | int op = m_op; |
| 2001 | |
| 2002 | CLEAR_ZERO_FLAGS(); |
| 2003 | CLEAR_CARRY_FLAGS(); |
| 2004 | |
| 2005 | for (int i = 0; i < 8; i++) |
| 2006 | { |
| 2007 | INT16 w1, w2; |
| 2008 | GET_VS1(w1, i); |
| 2009 | GET_VS2(w2, i); |
| 2010 | INT32 s1 = (UINT32)(UINT16)w1; |
| 2011 | INT32 s2 = (UINT32)(UINT16)w2; |
| 2012 | INT32 r = s1 + s2; |
| 2013 | |
| 2014 | m_vres[i] = (INT16)(r); |
| 2015 | SET_ACCUM_L((INT16)r, i); |
| 2016 | |
| 2017 | if (r & 0xffff0000) |
| 2018 | { |
| 2019 | SET_CARRY_FLAG(i); |
| 2020 | } |
| 2021 | } |
| 2022 | WRITEBACK_RESULT(); |
| 2023 | } |
| 2024 | |
| 2025 | static void cfunc_vaddc(void *param) |
| 2026 | { |
| 2027 | ((rsp_cop2 *)param)->vaddc(); |
| 2028 | } |
| 2029 | |
| 2030 | |
| 2031 | // VSUBC |
| 2032 | // |
| 2033 | // 31 25 24 20 15 10 5 0 |
| 2034 | // ------------------------------------------------------ |
| 2035 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010101 | |
| 2036 | // ------------------------------------------------------ |
| 2037 | // |
| 2038 | // Subtracts two vector registers, the carry out is stored into carry register |
| 2039 | // TODO: check VS2REG = VDREG |
| 2040 | |
| 2041 | inline void rsp_cop2_drc::vsubc() |
| 2042 | { |
| 2043 | int op = m_op; |
| 2044 | |
| 2045 | CLEAR_ZERO_FLAGS(); |
| 2046 | CLEAR_CARRY_FLAGS(); |
| 2047 | |
| 2048 | for (int i = 0; i < 8; i++) |
| 2049 | { |
| 2050 | INT16 w1, w2; |
| 2051 | GET_VS1(w1, i); |
| 2052 | GET_VS2(w2, i); |
| 2053 | INT32 s1 = (UINT32)(UINT16)w1; |
| 2054 | INT32 s2 = (UINT32)(UINT16)w2; |
| 2055 | INT32 r = s1 - s2; |
| 2056 | |
| 2057 | m_vres[i] = (INT16)(r); |
| 2058 | SET_ACCUM_L((UINT16)r, i); |
| 2059 | |
| 2060 | if ((UINT16)(r) != 0) |
| 2061 | { |
| 2062 | SET_ZERO_FLAG(i); |
| 2063 | } |
| 2064 | if (r & 0xffff0000) |
| 2065 | { |
| 2066 | SET_CARRY_FLAG(i); |
| 2067 | } |
| 2068 | } |
| 2069 | WRITEBACK_RESULT(); |
| 2070 | } |
| 2071 | |
| 2072 | static void cfunc_vsubc(void *param) |
| 2073 | { |
| 2074 | ((rsp_cop2 *)param)->vsubc(); |
| 2075 | } |
| 2076 | |
| 2077 | |
| 2078 | // VADDB |
| 2079 | // |
| 2080 | // 31 25 24 20 15 10 5 0 |
| 2081 | // ------------------------------------------------------ |
| 2082 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010110 | |
| 2083 | // ------------------------------------------------------ |
| 2084 | // |
| 2085 | // Adds two vector registers bytewise with rounding |
| 2086 | |
| 2087 | inline void rsp_cop2_drc::vaddb() |
| 2088 | { |
| 2089 | const int op = m_op; |
| 2090 | const int round = (EL == 0) ? 0 : (1 << (EL - 1)); |
| 2091 | |
| 2092 | for (int i = 0; i < 8; i++) |
| 2093 | { |
| 2094 | UINT16 w1, w2; |
| 2095 | GET_VS1(w1, i); |
| 2096 | GET_VS2(w2, i); |
| 2097 | |
| 2098 | UINT8 hb1 = w1 >> 8; |
| 2099 | UINT8 lb1 = w1 & 0xff; |
| 2100 | UINT8 hb2 = w2 >> 8; |
| 2101 | UINT8 lb2 = w2 & 0xff; |
| 2102 | |
| 2103 | UINT16 hs = hb1 + hb2 + round; |
| 2104 | UINT16 ls = lb1 + lb2 + round; |
| 2105 | |
| 2106 | SET_ACCUM_L((hs << 8) | ls, i); |
| 2107 | |
| 2108 | hs >>= EL; |
| 2109 | if (hs > 255) |
| 2110 | { |
| 2111 | hs = 255; |
| 2112 | } |
| 2113 | |
| 2114 | ls >>= EL; |
| 2115 | if (ls > 255) |
| 2116 | { |
| 2117 | ls = 255; |
| 2118 | } |
| 2119 | |
| 2120 | m_vres[i] = 0; // VD writeback disabled on production hardware |
| 2121 | // m_vres[i] = (hs << 8) | ls; |
| 2122 | } |
| 2123 | WRITEBACK_RESULT(); |
| 2124 | } |
| 2125 | |
| 2126 | static void cfunc_vaddb(void *param) |
| 2127 | { |
| 2128 | ((rsp_cop2 *)param)->vaddb(); |
| 2129 | } |
| 2130 | |
| 2131 | |
| 2132 | // VSAW |
| 2133 | // |
| 2134 | // 31 25 24 20 15 10 5 0 |
| 2135 | // ------------------------------------------------------ |
| 2136 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 011101 | |
| 2137 | // ------------------------------------------------------ |
| 2138 | // |
| 2139 | // Stores high, middle or low slice of accumulator to destination vector |
| 2140 | |
| 2141 | inline void rsp_cop2_drc::vsaw() |
| 2142 | { |
| 2143 | int op = m_op; |
| 2144 | |
| 2145 | switch (EL) |
| 2146 | { |
| 2147 | case 0x08: // VSAWH |
| 2148 | for (int i = 0; i < 8; i++) |
| 2149 | { |
| 2150 | W_VREG_S(VDREG, i) = ACCUM_H(i); |
| 2151 | } |
| 2152 | break; |
| 2153 | case 0x09: // VSAWM |
| 2154 | for (int i = 0; i < 8; i++) |
| 2155 | { |
| 2156 | W_VREG_S(VDREG, i) = ACCUM_M(i); |
| 2157 | } |
| 2158 | break; |
| 2159 | case 0x0a: // VSAWL |
| 2160 | for (int i = 0; i < 8; i++) |
| 2161 | { |
| 2162 | W_VREG_S(VDREG, i) = ACCUM_L(i); |
| 2163 | } |
| 2164 | break; |
| 2165 | default: // Unsupported |
| 2166 | { |
| 2167 | for (int i = 0; i < 8; i++) |
| 2168 | { |
| 2169 | W_VREG_S(VDREG, i) = 0; |
| 2170 | } |
| 2171 | } |
| 2172 | } |
| 2173 | } |
| 2174 | |
| 2175 | static void cfunc_vsaw(void *param) |
| 2176 | { |
| 2177 | ((rsp_cop2 *)param)->vsaw(); |
| 2178 | } |
| 2179 | |
| 2180 | |
| 2181 | // VLT |
| 2182 | // |
| 2183 | // 31 25 24 20 15 10 5 0 |
| 2184 | // ------------------------------------------------------ |
| 2185 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100000 | |
| 2186 | // ------------------------------------------------------ |
| 2187 | // |
| 2188 | // Sets compare flags if elements in VS1 are less than VS2 |
| 2189 | // Moves the element in VS2 to destination vector |
| 2190 | |
| 2191 | inline void rsp_cop2_drc::vlt() |
| 2192 | { |
| 2193 | int op = m_op; |
| 2194 | |
| 2195 | CLEAR_COMPARE_FLAGS(); |
| 2196 | CLEAR_CLIP2_FLAGS(); |
| 2197 | |
| 2198 | for (int i = 0; i < 8; i++) |
| 2199 | { |
| 2200 | INT16 s1, s2; |
| 2201 | GET_VS1(s1, i); |
| 2202 | GET_VS2(s2, i); |
| 2203 | |
| 2204 | if (s1 < s2) |
| 2205 | { |
| 2206 | SET_COMPARE_FLAG(i); |
| 2207 | } |
| 2208 | else if (s1 == s2) |
| 2209 | { |
| 2210 | if (ZERO_FLAG(i) != 0 && CARRY_FLAG(i) != 0) |
| 2211 | { |
| 2212 | SET_COMPARE_FLAG(i); |
| 2213 | } |
| 2214 | } |
| 2215 | |
| 2216 | if (COMPARE_FLAG(i) != 0) |
| 2217 | { |
| 2218 | m_vres[i] = s1; |
| 2219 | } |
| 2220 | else |
| 2221 | { |
| 2222 | m_vres[i] = s2; |
| 2223 | } |
| 2224 | |
| 2225 | SET_ACCUM_L(m_vres[i], i); |
| 2226 | } |
| 2227 | |
| 2228 | CLEAR_ZERO_FLAGS(); |
| 2229 | CLEAR_CARRY_FLAGS(); |
| 2230 | WRITEBACK_RESULT(); |
| 2231 | } |
| 2232 | |
| 2233 | static void cfunc_vlt(void *param) |
| 2234 | { |
| 2235 | ((rsp_cop2 *)param)->vlt(); |
| 2236 | } |
| 2237 | |
| 2238 | |
| 2239 | // VEQ |
| 2240 | // |
| 2241 | // 31 25 24 20 15 10 5 0 |
| 2242 | // ------------------------------------------------------ |
| 2243 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100001 | |
| 2244 | // ------------------------------------------------------ |
| 2245 | // |
| 2246 | // Sets compare flags if elements in VS1 are equal with VS2 |
| 2247 | // Moves the element in VS2 to destination vector |
| 2248 | |
| 2249 | inline void rsp_cop2_drc::veq() |
| 2250 | { |
| 2251 | int op = m_op; |
| 2252 | |
| 2253 | CLEAR_COMPARE_FLAGS(); |
| 2254 | CLEAR_CLIP2_FLAGS(); |
| 2255 | |
| 2256 | for (int i = 0; i < 8; i++) |
| 2257 | { |
| 2258 | INT16 s1, s2; |
| 2259 | GET_VS1(s1, i); |
| 2260 | GET_VS2(s2, i); |
| 2261 | |
| 2262 | if ((s1 == s2) && ZERO_FLAG(i) == 0) |
| 2263 | { |
| 2264 | SET_COMPARE_FLAG(i); |
| 2265 | m_vres[i] = s1; |
| 2266 | } |
| 2267 | else |
| 2268 | { |
| 2269 | m_vres[i] = s2; |
| 2270 | } |
| 2271 | |
| 2272 | SET_ACCUM_L(m_vres[i], i); |
| 2273 | } |
| 2274 | |
| 2275 | CLEAR_ZERO_FLAGS(); |
| 2276 | CLEAR_CARRY_FLAGS(); |
| 2277 | WRITEBACK_RESULT(); |
| 2278 | } |
| 2279 | |
| 2280 | static void cfunc_veq(void *param) |
| 2281 | { |
| 2282 | ((rsp_cop2 *)param)->veq(); |
| 2283 | } |
| 2284 | |
| 2285 | |
| 2286 | // VNE |
| 2287 | // |
| 2288 | // 31 25 24 20 15 10 5 0 |
| 2289 | // ------------------------------------------------------ |
| 2290 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100010 | |
| 2291 | // ------------------------------------------------------ |
| 2292 | // |
| 2293 | // Sets compare flags if elements in VS1 are not equal with VS2 |
| 2294 | // Moves the element in VS2 to destination vector |
| 2295 | |
| 2296 | inline void rsp_cop2_drc::vne() |
| 2297 | { |
| 2298 | int op = m_op; |
| 2299 | |
| 2300 | CLEAR_COMPARE_FLAGS(); |
| 2301 | CLEAR_CLIP2_FLAGS(); |
| 2302 | |
| 2303 | for (int i = 0; i < 8; i++) |
| 2304 | { |
| 2305 | INT16 s1, s2; |
| 2306 | GET_VS1(s1, i); |
| 2307 | GET_VS2(s2, i); |
| 2308 | |
| 2309 | if (s1 != s2 || ZERO_FLAG(i) != 0) |
| 2310 | { |
| 2311 | SET_COMPARE_FLAG(i); |
| 2312 | m_vres[i] = s1; |
| 2313 | } |
| 2314 | else |
| 2315 | { |
| 2316 | m_vres[i] = s2; |
| 2317 | } |
| 2318 | |
| 2319 | SET_ACCUM_L(m_vres[i], i); |
| 2320 | } |
| 2321 | |
| 2322 | CLEAR_ZERO_FLAGS(); |
| 2323 | CLEAR_CARRY_FLAGS(); |
| 2324 | WRITEBACK_RESULT(); |
| 2325 | } |
| 2326 | |
| 2327 | static void cfunc_vne(void *param) |
| 2328 | { |
| 2329 | ((rsp_cop2 *)param)->vne(); |
| 2330 | } |
| 2331 | |
| 2332 | |
| 2333 | // VGE |
| 2334 | // |
| 2335 | // 31 25 24 20 15 10 5 0 |
| 2336 | // ------------------------------------------------------ |
| 2337 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100011 | |
| 2338 | // ------------------------------------------------------ |
| 2339 | // |
| 2340 | // Sets compare flags if elements in VS1 are greater or equal with VS2 |
| 2341 | // Moves the element in VS2 to destination vector |
| 2342 | |
| 2343 | inline void rsp_cop2_drc::vge() |
| 2344 | { |
| 2345 | int op = m_op; |
| 2346 | |
| 2347 | CLEAR_COMPARE_FLAGS(); |
| 2348 | CLEAR_CLIP2_FLAGS(); |
| 2349 | |
| 2350 | for (int i = 0; i < 8; i++) |
| 2351 | { |
| 2352 | INT16 s1, s2; |
| 2353 | GET_VS1(s1, i); |
| 2354 | GET_VS2(s2, i); |
| 2355 | if ((s1 == s2 && (ZERO_FLAG(i) == 0 || CARRY_FLAG(i) == 0)) || s1 > s2) |
| 2356 | { |
| 2357 | SET_COMPARE_FLAG(i); |
| 2358 | m_vres[i] = s1; |
| 2359 | } |
| 2360 | else |
| 2361 | { |
| 2362 | m_vres[i] = s2; |
| 2363 | } |
| 2364 | |
| 2365 | SET_ACCUM_L(m_vres[i], i); |
| 2366 | } |
| 2367 | |
| 2368 | CLEAR_ZERO_FLAGS(); |
| 2369 | CLEAR_CARRY_FLAGS(); |
| 2370 | WRITEBACK_RESULT(); |
| 2371 | } |
| 2372 | |
| 2373 | static void cfunc_vge(void *param) |
| 2374 | { |
| 2375 | ((rsp_cop2 *)param)->vge(); |
| 2376 | } |
| 2377 | |
| 2378 | |
| 2379 | // VCL |
| 2380 | // |
| 2381 | // 31 25 24 20 15 10 5 0 |
| 2382 | // ------------------------------------------------------ |
| 2383 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100100 | |
| 2384 | // ------------------------------------------------------ |
| 2385 | // |
| 2386 | // Vector clip low |
| 2387 | |
| 2388 | inline void rsp_cop2_drc::vcl() |
| 2389 | { |
| 2390 | int op = m_op; |
| 2391 | |
| 2392 | for (int i = 0; i < 8; i++) |
| 2393 | { |
| 2394 | INT16 s1, s2; |
| 2395 | GET_VS1(s1, i); |
| 2396 | GET_VS2(s2, i); |
| 2397 | |
| 2398 | if (CARRY_FLAG(i) != 0) |
| 2399 | { |
| 2400 | if (ZERO_FLAG(i) != 0) |
| 2401 | { |
| 2402 | if (COMPARE_FLAG(i) != 0) |
| 2403 | { |
| 2404 | SET_ACCUM_L(-(UINT16)s2, i); |
| 2405 | } |
| 2406 | else |
| 2407 | { |
| 2408 | SET_ACCUM_L(s1, i); |
| 2409 | } |
| 2410 | } |
| 2411 | else |
| 2412 | { |
| 2413 | if (CLIP1_FLAG(i) != 0) |
| 2414 | { |
| 2415 | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) > 0x10000) |
| 2416 | { |
| 2417 | SET_ACCUM_L(s1, i); |
| 2418 | CLEAR_COMPARE_FLAG(i); |
| 2419 | } |
| 2420 | else |
| 2421 | { |
| 2422 | SET_ACCUM_L(-((UINT16)s2), i); |
| 2423 | SET_COMPARE_FLAG(i); |
| 2424 | } |
| 2425 | } |
| 2426 | else |
| 2427 | { |
| 2428 | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) != 0) |
| 2429 | { |
| 2430 | SET_ACCUM_L(s1, i); |
| 2431 | CLEAR_COMPARE_FLAG(i); |
| 2432 | } |
| 2433 | else |
| 2434 | { |
| 2435 | SET_ACCUM_L(-((UINT16)s2), i); |
| 2436 | SET_COMPARE_FLAG(i); |
| 2437 | } |
| 2438 | } |
| 2439 | } |
| 2440 | } |
| 2441 | else |
| 2442 | { |
| 2443 | if (ZERO_FLAG(i) != 0) |
| 2444 | { |
| 2445 | if (CLIP2_FLAG(i) != 0) |
| 2446 | { |
| 2447 | SET_ACCUM_L(s2, i); |
| 2448 | } |
| 2449 | else |
| 2450 | { |
| 2451 | SET_ACCUM_L(s1, i); |
| 2452 | } |
| 2453 | } |
| 2454 | else |
| 2455 | { |
| 2456 | if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0) |
| 2457 | { |
| 2458 | SET_ACCUM_L(s2, i); |
| 2459 | SET_CLIP2_FLAG(i); |
| 2460 | } |
| 2461 | else |
| 2462 | { |
| 2463 | SET_ACCUM_L(s1, i); |
| 2464 | CLEAR_CLIP2_FLAG(i); |
| 2465 | } |
| 2466 | } |
| 2467 | } |
| 2468 | m_vres[i] = ACCUM_L(i); |
| 2469 | } |
| 2470 | CLEAR_ZERO_FLAGS(); |
| 2471 | CLEAR_CARRY_FLAGS(); |
| 2472 | CLEAR_CLIP1_FLAGS(); |
| 2473 | WRITEBACK_RESULT(); |
| 2474 | } |
| 2475 | |
| 2476 | static void cfunc_vcl(void *param) |
| 2477 | { |
| 2478 | ((rsp_cop2 *)param)->vcl(); |
| 2479 | } |
| 2480 | |
| 2481 | |
| 2482 | // VCH |
| 2483 | // |
| 2484 | // 31 25 24 20 15 10 5 0 |
| 2485 | // ------------------------------------------------------ |
| 2486 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100101 | |
| 2487 | // ------------------------------------------------------ |
| 2488 | // |
| 2489 | // Vector clip high |
| 2490 | |
| 2491 | inline void rsp_cop2_drc::vch() |
| 2492 | { |
| 2493 | int op = m_op; |
| 2494 | |
| 2495 | CLEAR_CARRY_FLAGS(); |
| 2496 | CLEAR_COMPARE_FLAGS(); |
| 2497 | CLEAR_CLIP1_FLAGS(); |
| 2498 | CLEAR_ZERO_FLAGS(); |
| 2499 | CLEAR_CLIP2_FLAGS(); |
| 2500 | |
| 2501 | UINT32 vce = 0; |
| 2502 | for (int i = 0; i < 8; i++) |
| 2503 | { |
| 2504 | INT16 s1, s2; |
| 2505 | GET_VS1(s1, i); |
| 2506 | GET_VS2(s2, i); |
| 2507 | |
| 2508 | if ((s1 ^ s2) < 0) |
| 2509 | { |
| 2510 | vce = (s1 + s2 == -1); |
| 2511 | SET_CARRY_FLAG(i); |
| 2512 | if (s2 < 0) |
| 2513 | { |
| 2514 | SET_CLIP2_FLAG(i); |
| 2515 | } |
| 2516 | |
| 2517 | if ((s1 + s2) <= 0) |
| 2518 | { |
| 2519 | SET_COMPARE_FLAG(i); |
| 2520 | m_vres[i] = -((UINT16)s2); |
| 2521 | } |
| 2522 | else |
| 2523 | { |
| 2524 | m_vres[i] = s1; |
| 2525 | } |
| 2526 | |
| 2527 | if ((s1 + s2) != 0 && s1 != ~s2) |
| 2528 | { |
| 2529 | SET_ZERO_FLAG(i); |
| 2530 | } |
| 2531 | }//sign |
| 2532 | else |
| 2533 | { |
| 2534 | vce = 0; |
| 2535 | if (s2 < 0) |
| 2536 | { |
| 2537 | SET_COMPARE_FLAG(i); |
| 2538 | } |
| 2539 | if ((s1 - s2) >= 0) |
| 2540 | { |
| 2541 | SET_CLIP2_FLAG(i); |
| 2542 | m_vres[i] = s2; |
| 2543 | } |
| 2544 | else |
| 2545 | { |
| 2546 | m_vres[i] = s1; |
| 2547 | } |
| 2548 | |
| 2549 | if ((s1 - s2) != 0 && s1 != ~s2) |
| 2550 | { |
| 2551 | SET_ZERO_FLAG(i); |
| 2552 | } |
| 2553 | } |
| 2554 | if (vce) |
| 2555 | { |
| 2556 | SET_CLIP1_FLAG(i); |
| 2557 | } |
| 2558 | SET_ACCUM_L(m_vres[i], i); |
| 2559 | } |
| 2560 | WRITEBACK_RESULT(); |
| 2561 | } |
| 2562 | |
| 2563 | static void cfunc_vch(void *param) |
| 2564 | { |
| 2565 | ((rsp_cop2 *)param)->vch(); |
| 2566 | } |
| 2567 | |
| 2568 | |
| 2569 | // VCR |
| 2570 | // |
| 2571 | // 31 25 24 20 15 10 5 0 |
| 2572 | // ------------------------------------------------------ |
| 2573 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100110 | |
| 2574 | // ------------------------------------------------------ |
| 2575 | // |
| 2576 | // Vector clip reverse |
| 2577 | |
| 2578 | inline void rsp_cop2_drc::vcr() |
| 2579 | { |
| 2580 | int op = m_op; |
| 2581 | |
| 2582 | CLEAR_CARRY_FLAGS(); |
| 2583 | CLEAR_COMPARE_FLAGS(); |
| 2584 | CLEAR_CLIP1_FLAGS(); |
| 2585 | CLEAR_ZERO_FLAGS(); |
| 2586 | CLEAR_CLIP2_FLAGS(); |
| 2587 | |
| 2588 | for (int i = 0; i < 8; i++) |
| 2589 | { |
| 2590 | INT16 s1, s2; |
| 2591 | GET_VS1(s1, i); |
| 2592 | GET_VS2(s2, i); |
| 2593 | |
| 2594 | if ((INT16)(s1 ^ s2) < 0) |
| 2595 | { |
| 2596 | if (s2 < 0) |
| 2597 | { |
| 2598 | SET_CLIP2_FLAG(i); |
| 2599 | } |
| 2600 | if ((s1 + s2) <= 0) |
| 2601 | { |
| 2602 | SET_ACCUM_L(~((UINT16)s2), i); |
| 2603 | SET_COMPARE_FLAG(i); |
| 2604 | } |
| 2605 | else |
| 2606 | { |
| 2607 | SET_ACCUM_L(s1, i); |
| 2608 | } |
| 2609 | } |
| 2610 | else |
| 2611 | { |
| 2612 | if (s2 < 0) |
| 2613 | { |
| 2614 | SET_COMPARE_FLAG(i); |
| 2615 | } |
| 2616 | if ((s1 - s2) >= 0) |
| 2617 | { |
| 2618 | SET_ACCUM_L(s2, i); |
| 2619 | SET_CLIP2_FLAG(i); |
| 2620 | } |
| 2621 | else |
| 2622 | { |
| 2623 | SET_ACCUM_L(s1, i); |
| 2624 | } |
| 2625 | } |
| 2626 | |
| 2627 | m_vres[i] = ACCUM_L(i); |
| 2628 | } |
| 2629 | WRITEBACK_RESULT(); |
| 2630 | } |
| 2631 | |
| 2632 | static void cfunc_vcr(void *param) |
| 2633 | { |
| 2634 | ((rsp_cop2 *)param)->vcr(); |
| 2635 | } |
| 2636 | |
| 2637 | |
| 2638 | // VMRG |
| 2639 | // |
| 2640 | // 31 25 24 20 15 10 5 0 |
| 2641 | // ------------------------------------------------------ |
| 2642 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100111 | |
| 2643 | // ------------------------------------------------------ |
| 2644 | // |
| 2645 | // Merges two vectors according to compare flags |
| 2646 | |
| 2647 | inline void rsp_cop2_drc::vmrg() |
| 2648 | { |
| 2649 | int op = m_op; |
| 2650 | |
| 2651 | for (int i = 0; i < 8; i++) |
| 2652 | { |
| 2653 | INT16 s1, s2; |
| 2654 | GET_VS1(s1, i); |
| 2655 | GET_VS2(s2, i); |
| 2656 | if (COMPARE_FLAG(i) != 0) |
| 2657 | { |
| 2658 | m_vres[i] = s1; |
| 2659 | } |
| 2660 | else |
| 2661 | { |
| 2662 | m_vres[i] = s2; |
| 2663 | } |
| 2664 | |
| 2665 | SET_ACCUM_L(m_vres[i], i); |
| 2666 | } |
| 2667 | WRITEBACK_RESULT(); |
| 2668 | } |
| 2669 | |
| 2670 | static void cfunc_vmrg(void *param) |
| 2671 | { |
| 2672 | ((rsp_cop2 *)param)->vmrg(); |
| 2673 | } |
| 2674 | |
| 2675 | |
| 2676 | // VAND |
| 2677 | // |
| 2678 | // 31 25 24 20 15 10 5 0 |
| 2679 | // ------------------------------------------------------ |
| 2680 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101000 | |
| 2681 | // ------------------------------------------------------ |
| 2682 | // |
| 2683 | // Bitwise AND of two vector registers |
| 2684 | |
| 2685 | inline void rsp_cop2_drc::vand() |
| 2686 | { |
| 2687 | int op = m_op; |
| 2688 | |
| 2689 | for (int i = 0; i < 8; i++) |
| 2690 | { |
| 2691 | UINT16 s1, s2; |
| 2692 | GET_VS1(s1, i); |
| 2693 | GET_VS2(s2, i); |
| 2694 | m_vres[i] = s1 & s2; |
| 2695 | SET_ACCUM_L(m_vres[i], i); |
| 2696 | } |
| 2697 | WRITEBACK_RESULT(); |
| 2698 | } |
| 2699 | |
| 2700 | static void cfunc_vand(void *param) |
| 2701 | { |
| 2702 | ((rsp_cop2 *)param)->vand(); |
| 2703 | } |
| 2704 | |
| 2705 | |
| 2706 | // VNAND |
| 2707 | // |
| 2708 | // 31 25 24 20 15 10 5 0 |
| 2709 | // ------------------------------------------------------ |
| 2710 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101001 | |
| 2711 | // ------------------------------------------------------ |
| 2712 | // |
| 2713 | // Bitwise NOT AND of two vector registers |
| 2714 | |
| 2715 | inline void rsp_cop2_drc::vnand() |
| 2716 | { |
| 2717 | int op = m_op; |
| 2718 | |
| 2719 | for (int i = 0; i < 8; i++) |
| 2720 | { |
| 2721 | UINT16 s1, s2; |
| 2722 | GET_VS1(s1, i); |
| 2723 | GET_VS2(s2, i); |
| 2724 | m_vres[i] = ~((s1 & s2)); |
| 2725 | SET_ACCUM_L(m_vres[i], i); |
| 2726 | } |
| 2727 | WRITEBACK_RESULT(); |
| 2728 | } |
| 2729 | |
| 2730 | static void cfunc_vnand(void *param) |
| 2731 | { |
| 2732 | ((rsp_cop2 *)param)->vnand(); |
| 2733 | } |
| 2734 | |
| 2735 | |
| 2736 | // VOR |
| 2737 | // |
| 2738 | // 31 25 24 20 15 10 5 0 |
| 2739 | // ------------------------------------------------------ |
| 2740 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101010 | |
| 2741 | // ------------------------------------------------------ |
| 2742 | // |
| 2743 | // Bitwise OR of two vector registers |
| 2744 | |
| 2745 | inline void rsp_cop2_drc::vor() |
| 2746 | { |
| 2747 | int op = m_op; |
| 2748 | |
| 2749 | for (int i = 0; i < 8; i++) |
| 2750 | { |
| 2751 | UINT16 s1, s2; |
| 2752 | GET_VS1(s1, i); |
| 2753 | GET_VS2(s2, i); |
| 2754 | m_vres[i] = s1 | s2; |
| 2755 | SET_ACCUM_L(m_vres[i], i); |
| 2756 | } |
| 2757 | WRITEBACK_RESULT(); |
| 2758 | } |
| 2759 | |
| 2760 | static void cfunc_vor(void *param) |
| 2761 | { |
| 2762 | ((rsp_cop2 *)param)->vor(); |
| 2763 | } |
| 2764 | |
| 2765 | |
| 2766 | // VNOR |
| 2767 | // |
| 2768 | // 31 25 24 20 15 10 5 0 |
| 2769 | // ------------------------------------------------------ |
| 2770 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101011 | |
| 2771 | // ------------------------------------------------------ |
| 2772 | // |
| 2773 | // Bitwise NOT OR of two vector registers |
| 2774 | |
| 2775 | inline void rsp_cop2_drc::vnor() |
| 2776 | { |
| 2777 | int op = m_op; |
| 2778 | |
| 2779 | for (int i = 0; i < 8; i++) |
| 2780 | { |
| 2781 | UINT16 s1, s2; |
| 2782 | GET_VS1(s1, i); |
| 2783 | GET_VS2(s2, i); |
| 2784 | m_vres[i] = ~(s1 | s2); |
| 2785 | SET_ACCUM_L(m_vres[i], i); |
| 2786 | } |
| 2787 | WRITEBACK_RESULT(); |
| 2788 | } |
| 2789 | |
| 2790 | static void cfunc_vnor(void *param) |
| 2791 | { |
| 2792 | ((rsp_cop2 *)param)->vnor(); |
| 2793 | } |
| 2794 | |
| 2795 | |
| 2796 | // VXOR |
| 2797 | // |
| 2798 | // 31 25 24 20 15 10 5 0 |
| 2799 | // ------------------------------------------------------ |
| 2800 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101100 | |
| 2801 | // ------------------------------------------------------ |
| 2802 | // |
| 2803 | // Bitwise XOR of two vector registers |
| 2804 | |
| 2805 | inline void rsp_cop2_drc::vxor() |
| 2806 | { |
| 2807 | int op = m_op; |
| 2808 | |
| 2809 | for (int i = 0; i < 8; i++) |
| 2810 | { |
| 2811 | UINT16 s1, s2; |
| 2812 | GET_VS1(s1, i); |
| 2813 | GET_VS2(s2, i); |
| 2814 | m_vres[i] = s1 ^ s2; |
| 2815 | SET_ACCUM_L(m_vres[i], i); |
| 2816 | } |
| 2817 | WRITEBACK_RESULT(); |
| 2818 | } |
| 2819 | |
| 2820 | static void cfunc_vxor(void *param) |
| 2821 | { |
| 2822 | ((rsp_cop2 *)param)->vxor(); |
| 2823 | } |
| 2824 | |
| 2825 | |
| 2826 | // VNXOR |
| 2827 | // |
| 2828 | // 31 25 24 20 15 10 5 0 |
| 2829 | // ------------------------------------------------------ |
| 2830 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101101 | |
| 2831 | // ------------------------------------------------------ |
| 2832 | // |
| 2833 | // Bitwise NOT XOR of two vector registers |
| 2834 | |
| 2835 | inline void rsp_cop2_drc::vnxor() |
| 2836 | { |
| 2837 | int op = m_op; |
| 2838 | |
| 2839 | for (int i = 0; i < 8; i++) |
| 2840 | { |
| 2841 | UINT16 s1, s2; |
| 2842 | GET_VS1(s1, i); |
| 2843 | GET_VS2(s2, i); |
| 2844 | m_vres[i] = ~(s1 ^ s2); |
| 2845 | SET_ACCUM_L(m_vres[i], i); |
| 2846 | } |
| 2847 | WRITEBACK_RESULT(); |
| 2848 | } |
| 2849 | |
| 2850 | static void cfunc_vnxor(void *param) |
| 2851 | { |
| 2852 | ((rsp_cop2 *)param)->vnxor(); |
| 2853 | } |
| 2854 | |
| 2855 | |
| 2856 | // VRCP |
| 2857 | // |
| 2858 | // 31 25 24 20 15 10 5 0 |
| 2859 | // ------------------------------------------------------ |
| 2860 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110000 | |
| 2861 | // ------------------------------------------------------ |
| 2862 | // |
| 2863 | // Calculates reciprocal |
| 2864 | |
| 2865 | inline void rsp_cop2_drc::vrcp() |
| 2866 | { |
| 2867 | int op = m_op; |
| 2868 | |
| 2869 | INT32 shifter = 0; |
| 2870 | INT32 rec = (INT16)(VREG_S(VS2REG, EL & 7)); |
| 2871 | INT32 datainput = (rec < 0) ? (-rec) : rec; |
| 2872 | if (datainput) |
| 2873 | { |
| 2874 | for (int i = 0; i < 32; i++) |
| 2875 | { |
| 2876 | if (datainput & (1 << ((~i) & 0x1f))) |
| 2877 | { |
| 2878 | shifter = i; |
| 2879 | break; |
| 2880 | } |
| 2881 | } |
| 2882 | } |
| 2883 | else |
| 2884 | { |
| 2885 | shifter = 0x10; |
| 2886 | } |
| 2887 | |
| 2888 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2889 | INT32 fetchval = rsp_divtable[address]; |
| 2890 | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 2891 | if (rec < 0) |
| 2892 | { |
| 2893 | temp = ~temp; |
| 2894 | } |
| 2895 | if (!rec) |
| 2896 | { |
| 2897 | temp = 0x7fffffff; |
| 2898 | } |
| 2899 | else if (rec == 0xffff8000) |
| 2900 | { |
| 2901 | temp = 0xffff0000; |
| 2902 | } |
| 2903 | rec = temp; |
| 2904 | |
| 2905 | m_reciprocal_res = rec; |
| 2906 | m_dp_allowed = 0; |
| 2907 | |
| 2908 | W_VREG_S(VDREG, VS1REG & 7) = (UINT16)rec; |
| 2909 | for (int i = 0; i < 8; i++) |
| 2910 | { |
| 2911 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 2912 | } |
| 2913 | } |
| 2914 | |
| 2915 | static void cfunc_vrcp(void *param) |
| 2916 | { |
| 2917 | ((rsp_cop2 *)param)->vrcp(); |
| 2918 | } |
| 2919 | |
| 2920 | |
| 2921 | // VRCPL |
| 2922 | // |
| 2923 | // 31 25 24 20 15 10 5 0 |
| 2924 | // ------------------------------------------------------ |
| 2925 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110001 | |
| 2926 | // ------------------------------------------------------ |
| 2927 | // |
| 2928 | // Calculates reciprocal low part |
| 2929 | |
| 2930 | inline void rsp_cop2_drc::vrcpl() |
| 2931 | { |
| 2932 | int op = m_op; |
| 2933 | |
| 2934 | INT32 shifter = 0; |
| 2935 | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 2936 | INT32 datainput = rec; |
| 2937 | |
| 2938 | if (m_dp_allowed) |
| 2939 | { |
| 2940 | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 2941 | datainput = rec; |
| 2942 | |
| 2943 | if (rec < 0) |
| 2944 | { |
| 2945 | if (rec < -32768) |
| 2946 | { |
| 2947 | datainput = ~datainput; |
| 2948 | } |
| 2949 | else |
| 2950 | { |
| 2951 | datainput = -datainput; |
| 2952 | } |
| 2953 | } |
| 2954 | } |
| 2955 | else if (datainput < 0) |
| 2956 | { |
| 2957 | datainput = -datainput; |
| 2958 | |
| 2959 | shifter = 0x10; |
| 2960 | } |
| 2961 | |
| 2962 | if (datainput) |
| 2963 | { |
| 2964 | for (int i = 0; i < 32; i++) |
| 2965 | { |
| 2966 | if (datainput & (1 << ((~i) & 0x1f))) |
| 2967 | { |
| 2968 | shifter = i; |
| 2969 | break; |
| 2970 | } |
| 2971 | } |
| 2972 | } |
| 2973 | |
| 2974 | UINT32 address = (datainput << shifter) >> 22; |
| 2975 | INT32 fetchval = rsp_divtable[address & 0x1ff]; |
| 2976 | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 2977 | temp ^= rec >> 31; |
| 2978 | |
| 2979 | if (!rec) |
| 2980 | { |
| 2981 | temp = 0x7fffffff; |
| 2982 | } |
| 2983 | else if (rec == 0xffff8000) |
| 2984 | { |
| 2985 | temp = 0xffff0000; |
| 2986 | } |
| 2987 | rec = temp; |
| 2988 | |
| 2989 | m_reciprocal_res = rec; |
| 2990 | m_dp_allowed = 0; |
| 2991 | |
| 2992 | W_VREG_S(VDREG, VS1REG & 7) = (UINT16)rec; |
| 2993 | |
| 2994 | for (int i = 0; i < 8; i++) |
| 2995 | { |
| 2996 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 2997 | } |
| 2998 | } |
| 2999 | |
| 3000 | static void cfunc_vrcpl(void *param) |
| 3001 | { |
| 3002 | ((rsp_cop2 *)param)->vrcpl(); |
| 3003 | } |
| 3004 | |
| 3005 | |
| 3006 | // VRCPH |
| 3007 | // |
| 3008 | // 31 25 24 20 15 10 5 0 |
| 3009 | // ------------------------------------------------------ |
| 3010 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110010 | |
| 3011 | // ------------------------------------------------------ |
| 3012 | // |
| 3013 | // Calculates reciprocal high part |
| 3014 | |
| 3015 | inline void rsp_cop2_drc::vrcph() |
| 3016 | { |
| 3017 | int op = m_op; |
| 3018 | |
| 3019 | m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16; |
| 3020 | m_dp_allowed = 1; |
| 3021 | |
| 3022 | for (int i = 0; i < 8; i++) |
| 3023 | { |
| 3024 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 3025 | } |
| 3026 | |
| 3027 | W_VREG_S(VDREG, VS1REG & 7) = (INT16)(m_reciprocal_res >> 16); |
| 3028 | } |
| 3029 | |
| 3030 | static void cfunc_vrcph(void *param) |
| 3031 | { |
| 3032 | ((rsp_cop2 *)param)->vrcph(); |
| 3033 | } |
| 3034 | |
| 3035 | |
| 3036 | // VMOV |
| 3037 | // |
| 3038 | // 31 25 24 20 15 10 5 0 |
| 3039 | // ------------------------------------------------------ |
| 3040 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110011 | |
| 3041 | // ------------------------------------------------------ |
| 3042 | // |
| 3043 | // Moves element from vector to destination vector |
| 3044 | |
| 3045 | inline void rsp_cop2_drc::vmov() |
| 3046 | { |
| 3047 | int op = m_op; |
| 3048 | |
| 3049 | W_VREG_S(VDREG, VS1REG & 7) = VREG_S(VS2REG, EL & 7); |
| 3050 | for (int i = 0; i < 8; i++) |
| 3051 | { |
| 3052 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 3053 | } |
| 3054 | } |
| 3055 | |
| 3056 | static void cfunc_vmov(void *param) |
| 3057 | { |
| 3058 | ((rsp_cop2 *)param)->vmov(); |
| 3059 | } |
| 3060 | |
| 3061 | |
| 3062 | // VRSQ |
| 3063 | // |
| 3064 | // 31 25 24 20 15 10 5 0 |
| 3065 | // ------------------------------------------------------ |
| 3066 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110100 | |
| 3067 | // ------------------------------------------------------ |
| 3068 | // |
| 3069 | // Calculates reciprocal square-root |
| 3070 | |
| 3071 | inline void rsp_cop2_drc::vrsq() |
| 3072 | { |
| 3073 | int op = m_op; |
| 3074 | |
| 3075 | INT32 shifter = 0; |
| 3076 | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 3077 | INT32 datainput = (rec < 0) ? (-rec) : (rec); |
| 3078 | |
| 3079 | if (rec < 0) |
| 3080 | { |
| 3081 | if (rec < -32768) |
| 3082 | { |
| 3083 | datainput = ~datainput; |
| 3084 | } |
| 3085 | else |
| 3086 | { |
| 3087 | datainput = -datainput; |
| 3088 | } |
| 3089 | } |
| 3090 | |
| 3091 | if (datainput) |
| 3092 | { |
| 3093 | for (int i = 0; i < 32; i++) |
| 3094 | { |
| 3095 | if (datainput & (1 << ((~i) & 0x1f))) |
| 3096 | { |
| 3097 | shifter = i; |
| 3098 | break; |
| 3099 | } |
| 3100 | } |
| 3101 | } |
| 3102 | else |
| 3103 | { |
| 3104 | shifter = 0; |
| 3105 | } |
| 3106 | |
| 3107 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 3108 | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 3109 | |
| 3110 | INT32 fetchval = rsp_divtable[address]; |
| 3111 | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 3112 | if (rec < 0) |
| 3113 | { |
| 3114 | temp = ~temp; |
| 3115 | } |
| 3116 | if (!rec) |
| 3117 | { |
| 3118 | temp = 0x7fffffff; |
| 3119 | } |
| 3120 | else if (rec == 0xffff8000) |
| 3121 | { |
| 3122 | temp = 0xffff0000; |
| 3123 | } |
| 3124 | rec = temp; |
| 3125 | |
| 3126 | if (rec < 0) |
| 3127 | { |
| 3128 | if (m_dp_allowed) |
| 3129 | { |
| 3130 | if (rec < -32768) |
| 3131 | { |
| 3132 | datainput = ~datainput; |
| 3133 | } |
| 3134 | else |
| 3135 | { |
| 3136 | datainput = -datainput; |
| 3137 | } |
| 3138 | } |
| 3139 | else |
| 3140 | { |
| 3141 | datainput = -datainput; |
| 3142 | } |
| 3143 | } |
| 3144 | |
| 3145 | if (datainput) |
| 3146 | { |
| 3147 | for (int i = 0; i < 32; i++) |
| 3148 | { |
| 3149 | if (datainput & (1 << ((~i) & 0x1f))) |
| 3150 | { |
| 3151 | shifter = i; |
| 3152 | break; |
| 3153 | } |
| 3154 | } |
| 3155 | } |
| 3156 | else |
| 3157 | { |
| 3158 | shifter = 0; |
| 3159 | } |
| 3160 | |
| 3161 | address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 3162 | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 3163 | |
| 3164 | fetchval = rsp_divtable[address]; |
| 3165 | temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 3166 | if (rec < 0) |
| 3167 | { |
| 3168 | temp = ~temp; |
| 3169 | } |
| 3170 | if (!rec) |
| 3171 | { |
| 3172 | temp = 0x7fff; |
| 3173 | } |
| 3174 | else if (rec == 0xffff8000) |
| 3175 | { |
| 3176 | temp = 0x0000; |
| 3177 | } |
| 3178 | rec = temp; |
| 3179 | |
| 3180 | W_VREG_S(VDREG, VS1REG & 7) = (UINT16)rec; |
| 3181 | for (int i = 0; i < 8; i++) |
| 3182 | { |
| 3183 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 3184 | } |
| 3185 | } |
| 3186 | |
| 3187 | static void cfunc_vrsq(void *param) |
| 3188 | { |
| 3189 | ((rsp_cop2 *)param)->vrsq(); |
| 3190 | } |
| 3191 | |
| 3192 | |
| 3193 | // VRSQL |
| 3194 | // |
| 3195 | // 31 25 24 20 15 10 5 0 |
| 3196 | // ------------------------------------------------------ |
| 3197 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110101 | |
| 3198 | // ------------------------------------------------------ |
| 3199 | // |
| 3200 | // Calculates reciprocal square-root low part |
| 3201 | |
| 3202 | inline void rsp_cop2_drc::vrsql() |
| 3203 | { |
| 3204 | int op = m_op; |
| 3205 | |
| 3206 | INT32 shifter = 0; |
| 3207 | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 3208 | INT32 datainput = rec; |
| 3209 | |
| 3210 | if (m_dp_allowed) |
| 3211 | { |
| 3212 | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 3213 | datainput = rec; |
| 3214 | |
| 3215 | if (rec < 0) |
| 3216 | { |
| 3217 | if (rec < -32768) |
| 3218 | { |
| 3219 | datainput = ~datainput; |
| 3220 | } |
| 3221 | else |
| 3222 | { |
| 3223 | datainput = -datainput; |
| 3224 | } |
| 3225 | } |
| 3226 | } |
| 3227 | else if (datainput < 0) |
| 3228 | { |
| 3229 | datainput = -datainput; |
| 3230 | |
| 3231 | shifter = 0x10; |
| 3232 | } |
| 3233 | |
| 3234 | if (datainput) |
| 3235 | { |
| 3236 | for (int i = 0; i < 32; i++) |
| 3237 | { |
| 3238 | if (datainput & (1 << ((~i) & 0x1f))) |
| 3239 | { |
| 3240 | shifter = i; |
| 3241 | break; |
| 3242 | } |
| 3243 | } |
| 3244 | } |
| 3245 | |
| 3246 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 3247 | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 3248 | |
| 3249 | INT32 fetchval = rsp_divtable[address]; |
| 3250 | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 3251 | temp ^= rec >> 31; |
| 3252 | |
| 3253 | if (!rec) |
| 3254 | { |
| 3255 | temp = 0x7fffffff; |
| 3256 | } |
| 3257 | else if (rec == 0xffff8000) |
| 3258 | { |
| 3259 | temp = 0xffff0000; |
| 3260 | } |
| 3261 | rec = temp; |
| 3262 | |
| 3263 | m_reciprocal_res = rec; |
| 3264 | m_dp_allowed = 0; |
| 3265 | |
| 3266 | W_VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 3267 | for (int i = 0; i < 8; i++) |
| 3268 | { |
| 3269 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 3270 | } |
| 3271 | } |
| 3272 | |
| 3273 | static void cfunc_vrsql(void *param) |
| 3274 | { |
| 3275 | ((rsp_cop2 *)param)->vrsql(); |
| 3276 | } |
| 3277 | |
| 3278 | |
| 3279 | // VRSQH |
| 3280 | // |
| 3281 | // 31 25 24 20 15 10 5 0 |
| 3282 | // ------------------------------------------------------ |
| 3283 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110110 | |
| 3284 | // ------------------------------------------------------ |
| 3285 | // |
| 3286 | // Calculates reciprocal square-root high part |
| 3287 | |
| 3288 | inline void rsp_cop2_drc::vrsqh() |
| 3289 | { |
| 3290 | int op = m_op; |
| 3291 | |
| 3292 | m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16; |
| 3293 | m_dp_allowed = 1; |
| 3294 | |
| 3295 | for (int i = 0; i < 8; i++) |
| 3296 | { |
| 3297 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 3298 | } |
| 3299 | |
| 3300 | W_VREG_S(VDREG, VS1REG & 7) = (INT16)(m_reciprocal_res >> 16); // store high part |
| 3301 | } |
| 3302 | |
| 3303 | static void cfunc_vrsqh(void *param) |
| 3304 | { |
| 3305 | ((rsp_cop2 *)param)->vrsqh(); |
| 3306 | } |
| 3307 | |
| 3308 | |
| 3309 | /*------------------------------------------------- |
| 3310 | generate_vector_opcode - generate code for a |
| 3311 | vector opcode |
| 3312 | -------------------------------------------------*/ |
| 3313 | |
| 3314 | int rsp_cop2_drc::generate_vector_opcode(drcuml_block *block, rsp_device::compiler_state *compiler, const opcode_desc *desc) |
| 3315 | { |
| 3316 | UINT32 op = desc->opptr.l[0]; |
| 3317 | // Opcode legend: |
| 3318 | // E = VS2 element type |
| 3319 | // S = VS1, Source vector 1 |
| 3320 | // T = VS2, Source vector 2 |
| 3321 | // D = Destination vector |
| 3322 | |
| 3323 | switch (op & 0x3f) |
| 3324 | { |
| 3325 | case 0x00: /* VMULF */ |
| 3326 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3327 | UML_CALLC(block, cfunc_vmulf, this); |
| 3328 | return TRUE; |
| 3329 | |
| 3330 | case 0x01: /* VMULU */ |
| 3331 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3332 | UML_CALLC(block, cfunc_vmulu, this); |
| 3333 | return TRUE; |
| 3334 | |
| 3335 | case 0x04: /* VMUDL */ |
| 3336 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3337 | UML_CALLC(block, cfunc_vmudl, this); |
| 3338 | return TRUE; |
| 3339 | |
| 3340 | case 0x05: /* VMUDM */ |
| 3341 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3342 | UML_CALLC(block, cfunc_vmudm, this); |
| 3343 | return TRUE; |
| 3344 | |
| 3345 | case 0x06: /* VMUDN */ |
| 3346 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3347 | UML_CALLC(block, cfunc_vmudn, this); |
| 3348 | return TRUE; |
| 3349 | |
| 3350 | case 0x07: /* VMUDH */ |
| 3351 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3352 | UML_CALLC(block, cfunc_vmudh, this); |
| 3353 | return TRUE; |
| 3354 | |
| 3355 | case 0x08: /* VMACF */ |
| 3356 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3357 | UML_CALLC(block, cfunc_vmacf, this); |
| 3358 | return TRUE; |
| 3359 | |
| 3360 | case 0x09: /* VMACU */ |
| 3361 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3362 | UML_CALLC(block, cfunc_vmacu, this); |
| 3363 | return TRUE; |
| 3364 | |
| 3365 | case 0x0c: /* VMADL */ |
| 3366 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3367 | UML_CALLC(block, cfunc_vmadl, this); |
| 3368 | return TRUE; |
| 3369 | |
| 3370 | case 0x0d: /* VMADM */ |
| 3371 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3372 | UML_CALLC(block, cfunc_vmadm, this); |
| 3373 | return TRUE; |
| 3374 | |
| 3375 | case 0x0e: /* VMADN */ |
| 3376 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3377 | UML_CALLC(block, cfunc_vmadn, this); |
| 3378 | return TRUE; |
| 3379 | |
| 3380 | case 0x0f: /* VMADH */ |
| 3381 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3382 | UML_CALLC(block, cfunc_vmadh, this); |
| 3383 | return TRUE; |
| 3384 | |
| 3385 | case 0x10: /* VADD */ |
| 3386 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3387 | UML_CALLC(block, cfunc_vadd, this); |
| 3388 | return TRUE; |
| 3389 | |
| 3390 | case 0x11: /* VSUB */ |
| 3391 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3392 | UML_CALLC(block, cfunc_vsub, this); |
| 3393 | return TRUE; |
| 3394 | |
| 3395 | case 0x13: /* VABS */ |
| 3396 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3397 | UML_CALLC(block, cfunc_vabs, this); |
| 3398 | return TRUE; |
| 3399 | |
| 3400 | case 0x14: /* VADDC */ |
| 3401 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3402 | UML_CALLC(block, cfunc_vaddc, this); |
| 3403 | return TRUE; |
| 3404 | |
| 3405 | case 0x15: /* VSUBC */ |
| 3406 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3407 | UML_CALLC(block, cfunc_vsubc, this); |
| 3408 | return TRUE; |
| 3409 | |
| 3410 | case 0x16: /* VADDB */ |
| 3411 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3412 | UML_CALLC(block, cfunc_vaddb, this); |
| 3413 | return TRUE; |
| 3414 | |
| 3415 | case 0x17: /* VSUBB (reserved, functionally identical to VADDB) */ |
| 3416 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3417 | UML_CALLC(block, cfunc_vaddb, this); |
| 3418 | return TRUE; |
| 3419 | |
| 3420 | case 0x18: /* VACCB (reserved, functionally identical to VADDB) */ |
| 3421 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3422 | UML_CALLC(block, cfunc_vaddb, this); |
| 3423 | return TRUE; |
| 3424 | |
| 3425 | case 0x19: /* VSUCB (reserved, functionally identical to VADDB) */ |
| 3426 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3427 | UML_CALLC(block, cfunc_vaddb, this); |
| 3428 | return TRUE; |
| 3429 | |
| 3430 | case 0x1d: /* VSAW */ |
| 3431 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3432 | UML_CALLC(block, cfunc_vsaw, this); |
| 3433 | return TRUE; |
| 3434 | |
| 3435 | case 0x20: /* VLT */ |
| 3436 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3437 | UML_CALLC(block, cfunc_vlt, this); |
| 3438 | return TRUE; |
| 3439 | |
| 3440 | case 0x21: /* VEQ */ |
| 3441 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3442 | UML_CALLC(block, cfunc_veq, this); |
| 3443 | return TRUE; |
| 3444 | |
| 3445 | case 0x22: /* VNE */ |
| 3446 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3447 | UML_CALLC(block, cfunc_vne, this); |
| 3448 | return TRUE; |
| 3449 | |
| 3450 | case 0x23: /* VGE */ |
| 3451 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3452 | UML_CALLC(block, cfunc_vge, this); |
| 3453 | return TRUE; |
| 3454 | |
| 3455 | case 0x24: /* VCL */ |
| 3456 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3457 | UML_CALLC(block, cfunc_vcl, this); |
| 3458 | return TRUE; |
| 3459 | |
| 3460 | case 0x25: /* VCH */ |
| 3461 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3462 | UML_CALLC(block, cfunc_vch, this); |
| 3463 | return TRUE; |
| 3464 | |
| 3465 | case 0x26: /* VCR */ |
| 3466 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3467 | UML_CALLC(block, cfunc_vcr, this); |
| 3468 | return TRUE; |
| 3469 | |
| 3470 | case 0x27: /* VMRG */ |
| 3471 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3472 | UML_CALLC(block, cfunc_vmrg, this); |
| 3473 | return TRUE; |
| 3474 | |
| 3475 | case 0x28: /* VAND */ |
| 3476 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3477 | UML_CALLC(block, cfunc_vand, this); |
| 3478 | return TRUE; |
| 3479 | |
| 3480 | case 0x29: /* VNAND */ |
| 3481 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3482 | UML_CALLC(block, cfunc_vnand, this); |
| 3483 | return TRUE; |
| 3484 | |
| 3485 | case 0x2a: /* VOR */ |
| 3486 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3487 | UML_CALLC(block, cfunc_vor, this); |
| 3488 | return TRUE; |
| 3489 | |
| 3490 | case 0x2b: /* VNOR */ |
| 3491 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3492 | UML_CALLC(block, cfunc_vnor, this); |
| 3493 | return TRUE; |
| 3494 | |
| 3495 | case 0x2c: /* VXOR */ |
| 3496 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3497 | UML_CALLC(block, cfunc_vxor, this); |
| 3498 | return TRUE; |
| 3499 | |
| 3500 | case 0x2d: /* VNXOR */ |
| 3501 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3502 | UML_CALLC(block, cfunc_vnxor, this); |
| 3503 | return TRUE; |
| 3504 | |
| 3505 | case 0x30: /* VRCP */ |
| 3506 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3507 | UML_CALLC(block, cfunc_vrcp, this); |
| 3508 | return TRUE; |
| 3509 | |
| 3510 | case 0x31: /* VRCPL */ |
| 3511 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3512 | UML_CALLC(block, cfunc_vrcpl, this); |
| 3513 | return TRUE; |
| 3514 | |
| 3515 | case 0x32: /* VRCPH */ |
| 3516 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3517 | UML_CALLC(block, cfunc_vrcph, this); |
| 3518 | return TRUE; |
| 3519 | |
| 3520 | case 0x33: /* VMOV */ |
| 3521 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3522 | UML_CALLC(block, cfunc_vmov, this); |
| 3523 | return TRUE; |
| 3524 | |
| 3525 | case 0x34: /* VRSQ */ |
| 3526 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3527 | UML_CALLC(block, cfunc_vrsq, this); |
| 3528 | return TRUE; |
| 3529 | |
| 3530 | case 0x35: /* VRSQL */ |
| 3531 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3532 | UML_CALLC(block, cfunc_vrsql, this); |
| 3533 | return TRUE; |
| 3534 | |
| 3535 | case 0x36: /* VRSQH */ |
| 3536 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3537 | UML_CALLC(block, cfunc_vrsqh, this); |
| 3538 | return TRUE; |
| 3539 | |
| 3540 | case 0x37: /* VNOP */ |
| 3541 | case 0x3F: /* VNULL */ |
| 3542 | return TRUE; |
| 3543 | |
| 3544 | default: |
| 3545 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3546 | UML_CALLC(block, unimplemented_opcode, &m_rsp); |
| 3547 | return FALSE; |
| 3548 | } |
| 3549 | } |
| 3550 | |
| 3551 | |
| 3552 | /*************************************************************************** |
| 3553 | Vector Flag Reading/Writing |
| 3554 | ***************************************************************************/ |
| 3555 | |
| 3556 | inline void rsp_cop2_drc::mfc2() |
| 3557 | { |
| 3558 | UINT32 op = m_op; |
| 3559 | int el = (op >> 7) & 0xf; |
| 3560 | |
| 3561 | UINT16 b1 = VREG_B(VS1REG, (el+0) & 0xf); |
| 3562 | UINT16 b2 = VREG_B(VS1REG, (el+1) & 0xf); |
| 3563 | if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2)); |
| 3564 | } |
| 3565 | |
| 3566 | static void cfunc_mfc2(void *param) |
| 3567 | { |
| 3568 | ((rsp_cop2 *)param)->mfc2(); |
| 3569 | } |
| 3570 | |
| 3571 | inline void rsp_cop2_drc::cfc2() |
| 3572 | { |
| 3573 | UINT32 op = m_op; |
| 3574 | if (RTREG) |
| 3575 | { |
| 3576 | switch(RDREG) |
| 3577 | { |
| 3578 | case 0: |
| 3579 | RTVAL = ((CARRY_FLAG(0) & 1) << 0) | |
| 3580 | ((CARRY_FLAG(1) & 1) << 1) | |
| 3581 | ((CARRY_FLAG(2) & 1) << 2) | |
| 3582 | ((CARRY_FLAG(3) & 1) << 3) | |
| 3583 | ((CARRY_FLAG(4) & 1) << 4) | |
| 3584 | ((CARRY_FLAG(5) & 1) << 5) | |
| 3585 | ((CARRY_FLAG(6) & 1) << 6) | |
| 3586 | ((CARRY_FLAG(7) & 1) << 7) | |
| 3587 | ((ZERO_FLAG(0) & 1) << 8) | |
| 3588 | ((ZERO_FLAG(1) & 1) << 9) | |
| 3589 | ((ZERO_FLAG(2) & 1) << 10) | |
| 3590 | ((ZERO_FLAG(3) & 1) << 11) | |
| 3591 | ((ZERO_FLAG(4) & 1) << 12) | |
| 3592 | ((ZERO_FLAG(5) & 1) << 13) | |
| 3593 | ((ZERO_FLAG(6) & 1) << 14) | |
| 3594 | ((ZERO_FLAG(7) & 1) << 15); |
| 3595 | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 3596 | break; |
| 3597 | case 1: |
| 3598 | RTVAL = ((COMPARE_FLAG(0) & 1) << 0) | |
| 3599 | ((COMPARE_FLAG(1) & 1) << 1) | |
| 3600 | ((COMPARE_FLAG(2) & 1) << 2) | |
| 3601 | ((COMPARE_FLAG(3) & 1) << 3) | |
| 3602 | ((COMPARE_FLAG(4) & 1) << 4) | |
| 3603 | ((COMPARE_FLAG(5) & 1) << 5) | |
| 3604 | ((COMPARE_FLAG(6) & 1) << 6) | |
| 3605 | ((COMPARE_FLAG(7) & 1) << 7) | |
| 3606 | ((CLIP2_FLAG(0) & 1) << 8) | |
| 3607 | ((CLIP2_FLAG(1) & 1) << 9) | |
| 3608 | ((CLIP2_FLAG(2) & 1) << 10) | |
| 3609 | ((CLIP2_FLAG(3) & 1) << 11) | |
| 3610 | ((CLIP2_FLAG(4) & 1) << 12) | |
| 3611 | ((CLIP2_FLAG(5) & 1) << 13) | |
| 3612 | ((CLIP2_FLAG(6) & 1) << 14) | |
| 3613 | ((CLIP2_FLAG(7) & 1) << 15); |
| 3614 | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 3615 | break; |
| 3616 | case 2: |
| 3617 | RTVAL = ((CLIP1_FLAG(0) & 1) << 0) | |
| 3618 | ((CLIP1_FLAG(1) & 1) << 1) | |
| 3619 | ((CLIP1_FLAG(2) & 1) << 2) | |
| 3620 | ((CLIP1_FLAG(3) & 1) << 3) | |
| 3621 | ((CLIP1_FLAG(4) & 1) << 4) | |
| 3622 | ((CLIP1_FLAG(5) & 1) << 5) | |
| 3623 | ((CLIP1_FLAG(6) & 1) << 6) | |
| 3624 | ((CLIP1_FLAG(7) & 1) << 7); |
| 3625 | break; |
| 3626 | } |
| 3627 | } |
| 3628 | } |
| 3629 | |
| 3630 | static void cfunc_cfc2(void *param) |
| 3631 | { |
| 3632 | ((rsp_cop2 *)param)->cfc2(); |
| 3633 | } |
| 3634 | |
| 3635 | |
| 3636 | inline void rsp_cop2_drc::mtc2() |
| 3637 | { |
| 3638 | UINT32 op = m_op; |
| 3639 | int el = (op >> 7) & 0xf; |
| 3640 | VREG_B(VS1REG, (el+0) & 0xf) = (RTVAL >> 8) & 0xff; |
| 3641 | VREG_B(VS1REG, (el+1) & 0xf) = (RTVAL >> 0) & 0xff; |
| 3642 | } |
| 3643 | |
| 3644 | static void cfunc_mtc2(void *param) |
| 3645 | { |
| 3646 | ((rsp_cop2 *)param)->mtc2(); |
| 3647 | } |
| 3648 | |
| 3649 | |
| 3650 | inline void rsp_cop2_drc::ctc2() |
| 3651 | { |
| 3652 | UINT32 op = m_op; |
| 3653 | switch(RDREG) |
| 3654 | { |
| 3655 | case 0: |
| 3656 | CLEAR_CARRY_FLAGS(); |
| 3657 | CLEAR_ZERO_FLAGS(); |
| 3658 | m_vflag[0][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 3659 | m_vflag[0][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 3660 | m_vflag[0][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 3661 | m_vflag[0][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 3662 | m_vflag[0][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 3663 | m_vflag[0][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 3664 | m_vflag[0][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 3665 | m_vflag[0][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 3666 | if (RTVAL & (1 << 0)) { SET_CARRY_FLAG(0); } |
| 3667 | if (RTVAL & (1 << 1)) { SET_CARRY_FLAG(1); } |
| 3668 | if (RTVAL & (1 << 2)) { SET_CARRY_FLAG(2); } |
| 3669 | if (RTVAL & (1 << 3)) { SET_CARRY_FLAG(3); } |
| 3670 | if (RTVAL & (1 << 4)) { SET_CARRY_FLAG(4); } |
| 3671 | if (RTVAL & (1 << 5)) { SET_CARRY_FLAG(5); } |
| 3672 | if (RTVAL & (1 << 6)) { SET_CARRY_FLAG(6); } |
| 3673 | if (RTVAL & (1 << 7)) { SET_CARRY_FLAG(7); } |
| 3674 | m_vflag[3][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 3675 | m_vflag[3][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 3676 | m_vflag[3][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 3677 | m_vflag[3][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 3678 | m_vflag[3][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 3679 | m_vflag[3][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 3680 | m_vflag[3][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 3681 | m_vflag[3][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 3682 | if (RTVAL & (1 << 8)) { SET_ZERO_FLAG(0); } |
| 3683 | if (RTVAL & (1 << 9)) { SET_ZERO_FLAG(1); } |
| 3684 | if (RTVAL & (1 << 10)) { SET_ZERO_FLAG(2); } |
| 3685 | if (RTVAL & (1 << 11)) { SET_ZERO_FLAG(3); } |
| 3686 | if (RTVAL & (1 << 12)) { SET_ZERO_FLAG(4); } |
| 3687 | if (RTVAL & (1 << 13)) { SET_ZERO_FLAG(5); } |
| 3688 | if (RTVAL & (1 << 14)) { SET_ZERO_FLAG(6); } |
| 3689 | if (RTVAL & (1 << 15)) { SET_ZERO_FLAG(7); } |
| 3690 | break; |
| 3691 | case 1: |
| 3692 | CLEAR_COMPARE_FLAGS(); |
| 3693 | CLEAR_CLIP2_FLAGS(); |
| 3694 | m_vflag[1][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 3695 | m_vflag[1][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 3696 | m_vflag[1][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 3697 | m_vflag[1][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 3698 | m_vflag[1][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 3699 | m_vflag[1][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 3700 | m_vflag[1][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 3701 | m_vflag[1][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 3702 | if (RTVAL & (1 << 0)) { SET_COMPARE_FLAG(0); } |
| 3703 | if (RTVAL & (1 << 1)) { SET_COMPARE_FLAG(1); } |
| 3704 | if (RTVAL & (1 << 2)) { SET_COMPARE_FLAG(2); } |
| 3705 | if (RTVAL & (1 << 3)) { SET_COMPARE_FLAG(3); } |
| 3706 | if (RTVAL & (1 << 4)) { SET_COMPARE_FLAG(4); } |
| 3707 | if (RTVAL & (1 << 5)) { SET_COMPARE_FLAG(5); } |
| 3708 | if (RTVAL & (1 << 6)) { SET_COMPARE_FLAG(6); } |
| 3709 | if (RTVAL & (1 << 7)) { SET_COMPARE_FLAG(7); } |
| 3710 | m_vflag[4][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 3711 | m_vflag[4][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 3712 | m_vflag[4][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 3713 | m_vflag[4][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 3714 | m_vflag[4][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 3715 | m_vflag[4][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 3716 | m_vflag[4][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 3717 | m_vflag[4][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 3718 | if (RTVAL & (1 << 8)) { SET_CLIP2_FLAG(0); } |
| 3719 | if (RTVAL & (1 << 9)) { SET_CLIP2_FLAG(1); } |
| 3720 | if (RTVAL & (1 << 10)) { SET_CLIP2_FLAG(2); } |
| 3721 | if (RTVAL & (1 << 11)) { SET_CLIP2_FLAG(3); } |
| 3722 | if (RTVAL & (1 << 12)) { SET_CLIP2_FLAG(4); } |
| 3723 | if (RTVAL & (1 << 13)) { SET_CLIP2_FLAG(5); } |
| 3724 | if (RTVAL & (1 << 14)) { SET_CLIP2_FLAG(6); } |
| 3725 | if (RTVAL & (1 << 15)) { SET_CLIP2_FLAG(7); } |
| 3726 | break; |
| 3727 | case 2: |
| 3728 | CLEAR_CLIP1_FLAGS(); |
| 3729 | m_vflag[2][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 3730 | m_vflag[2][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 3731 | m_vflag[2][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 3732 | m_vflag[2][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 3733 | m_vflag[2][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 3734 | m_vflag[2][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 3735 | m_vflag[2][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 3736 | m_vflag[2][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 3737 | if (RTVAL & (1 << 0)) { SET_CLIP1_FLAG(0); } |
| 3738 | if (RTVAL & (1 << 1)) { SET_CLIP1_FLAG(1); } |
| 3739 | if (RTVAL & (1 << 2)) { SET_CLIP1_FLAG(2); } |
| 3740 | if (RTVAL & (1 << 3)) { SET_CLIP1_FLAG(3); } |
| 3741 | if (RTVAL & (1 << 4)) { SET_CLIP1_FLAG(4); } |
| 3742 | if (RTVAL & (1 << 5)) { SET_CLIP1_FLAG(5); } |
| 3743 | if (RTVAL & (1 << 6)) { SET_CLIP1_FLAG(6); } |
| 3744 | if (RTVAL & (1 << 7)) { SET_CLIP1_FLAG(7); } |
| 3745 | break; |
| 3746 | } |
| 3747 | } |
| 3748 | |
| 3749 | static void cfunc_ctc2(void *param) |
| 3750 | { |
| 3751 | ((rsp_cop2 *)param)->ctc2(); |
| 3752 | } |
| 3753 | |
| 3754 | /*************************************************************************** |
| 3755 | COP2 Opcode Compilation |
| 3756 | ***************************************************************************/ |
| 3757 | |
| 3758 | int rsp_cop2_drc::generate_cop2(drcuml_block *block, rsp_device::compiler_state *compiler, const opcode_desc *desc) |
| 3759 | { |
| 3760 | UINT32 op = desc->opptr.l[0]; |
| 3761 | UINT8 opswitch = RSREG; |
| 3762 | |
| 3763 | switch (opswitch) |
| 3764 | { |
| 3765 | case 0x00: /* MFCz */ |
| 3766 | if (RTREG != 0) |
| 3767 | { |
| 3768 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3769 | UML_CALLC(block, cfunc_mfc2, this); // callc mfc2 |
| 3770 | } |
| 3771 | return TRUE; |
| 3772 | |
| 3773 | case 0x02: /* CFCz */ |
| 3774 | if (RTREG != 0) |
| 3775 | { |
| 3776 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3777 | UML_CALLC(block, cfunc_cfc2, this); // callc cfc2 |
| 3778 | } |
| 3779 | return TRUE; |
| 3780 | |
| 3781 | case 0x04: /* MTCz */ |
| 3782 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3783 | UML_CALLC(block, cfunc_mtc2, this); // callc mtc2 |
| 3784 | return TRUE; |
| 3785 | |
| 3786 | case 0x06: /* CTCz */ |
| 3787 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3788 | UML_CALLC(block, cfunc_ctc2, this); // callc ctc2 |
| 3789 | return TRUE; |
| 3790 | |
| 3791 | case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: |
| 3792 | case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: |
| 3793 | return generate_vector_opcode(block, compiler, desc); |
| 3794 | } |
| 3795 | return FALSE; |
| 3796 | } |
| | No newline at end of file |
trunk/src/emu/cpu/rsp/rspcp2s.c
| r0 | r241958 | |
| 1 | /*************************************************************************** |
| 2 | |
| 3 | rspcp2s.c |
| 4 | |
| 5 | Universal machine language-based Nintendo/SGI RSP COP2 emulator, with |
| 6 | SSSE3 SIMD optimizations. |
| 7 | Written by Harmony of the MESS team. |
| 8 | |
| 9 | Copyright the MESS team. |
| 10 | Released for general non-commercial use under the MAME license |
| 11 | Visit http://mamedev.org for licensing and usage restrictions. |
| 12 | |
| 13 | ***************************************************************************/ |
| 14 | |
| 15 | #include "emu.h" |
| 16 | #include "rsp.h" |
| 17 | #include "rspdiv.h" |
| 18 | #include "rspcp2.h" |
| 19 | #include "cpu/drcfe.h" |
| 20 | #include "cpu/drcuml.h" |
| 21 | #include "cpu/drcumlsh.h" |
| 22 | |
| 23 | using namespace uml; |
| 24 | |
| 25 | /*************************************************************************** |
| 26 | Helpful Defines |
| 27 | ***************************************************************************/ |
| 28 | |
| 29 | #define VDREG ((op >> 6) & 0x1f) |
| 30 | #define VS1REG ((op >> 11) & 0x1f) |
| 31 | #define VS2REG ((op >> 16) & 0x1f) |
| 32 | #define EL ((op >> 21) & 0xf) |
| 33 | |
| 34 | #define RSVAL (m_rsp.m_rsp_state->r[RSREG]) |
| 35 | #define RTVAL (m_rsp.m_rsp_state->r[RTREG]) |
| 36 | #define RDVAL (m_rsp.m_rsp_state->r[RDREG]) |
| 37 | |
| 38 | #define EXTRACT16(reg, value, element) \ |
| 39 | switch((element) & 7) \ |
| 40 | { \ |
| 41 | case 0: value = _mm_extract_epi16(reg, 0); break; \ |
| 42 | case 1: value = _mm_extract_epi16(reg, 1); break; \ |
| 43 | case 2: value = _mm_extract_epi16(reg, 2); break; \ |
| 44 | case 3: value = _mm_extract_epi16(reg, 3); break; \ |
| 45 | case 4: value = _mm_extract_epi16(reg, 4); break; \ |
| 46 | case 5: value = _mm_extract_epi16(reg, 5); break; \ |
| 47 | case 6: value = _mm_extract_epi16(reg, 6); break; \ |
| 48 | case 7: value = _mm_extract_epi16(reg, 7); break; \ |
| 49 | } |
| 50 | |
| 51 | |
| 52 | #define INSERT16(reg, value, element) \ |
| 53 | switch((element) & 7) \ |
| 54 | { \ |
| 55 | case 0: reg = _mm_insert_epi16(reg, value, 0); break; \ |
| 56 | case 1: reg = _mm_insert_epi16(reg, value, 1); break; \ |
| 57 | case 2: reg = _mm_insert_epi16(reg, value, 2); break; \ |
| 58 | case 3: reg = _mm_insert_epi16(reg, value, 3); break; \ |
| 59 | case 4: reg = _mm_insert_epi16(reg, value, 4); break; \ |
| 60 | case 5: reg = _mm_insert_epi16(reg, value, 5); break; \ |
| 61 | case 6: reg = _mm_insert_epi16(reg, value, 6); break; \ |
| 62 | case 7: reg = _mm_insert_epi16(reg, value, 7); break; \ |
| 63 | } |
| 64 | |
| 65 | |
| 66 | #define VREG_B(reg, offset) m_v[(reg)].b[(offset)^1] |
| 67 | #define W_VREG_S(reg, offset) m_v[(reg)].s[(offset)] |
| 68 | #define VREG_S(reg, offset) (INT16)m_v[(reg)].s[(offset)] |
| 69 | |
| 70 | #define VEC_EL_2(x,z) (vector_elements_2[(x)][(z)]) |
| 71 | |
| 72 | #define ACCUM(x) m_accum[x].q |
| 73 | |
| 74 | #define CARRY 0 |
| 75 | #define COMPARE 1 |
| 76 | #define CLIP1 2 |
| 77 | #define ZERO 3 |
| 78 | #define CLIP2 4 |
| 79 | |
| 80 | static void cfunc_mfc2(void *param); |
| 81 | static void cfunc_cfc2(void *param); |
| 82 | static void cfunc_mtc2(void *param); |
| 83 | static void cfunc_ctc2(void *param); |
| 84 | |
| 85 | inline UINT16 rsp_cop2_simd::ACCUM_H(int x) |
| 86 | { |
| 87 | UINT16 out; |
| 88 | EXTRACT16(m_accum_h, out, x); |
| 89 | return out; |
| 90 | } |
| 91 | |
| 92 | inline UINT16 rsp_cop2_simd::ACCUM_M(int x) |
| 93 | { |
| 94 | UINT16 out; |
| 95 | EXTRACT16(m_accum_m, out, x); |
| 96 | return out; |
| 97 | } |
| 98 | |
| 99 | inline UINT16 rsp_cop2_simd::ACCUM_L(int x) |
| 100 | { |
| 101 | UINT16 out; |
| 102 | EXTRACT16(m_accum_l, out, x); |
| 103 | return out; |
| 104 | } |
| 105 | |
| 106 | inline UINT16 rsp_cop2_simd::ACCUM_LL(int x) |
| 107 | { |
| 108 | UINT16 out; |
| 109 | EXTRACT16(m_accum_ll, out, x); |
| 110 | return out; |
| 111 | } |
| 112 | |
| 113 | #define SET_ACCUM_H(v, x) INSERT16(m_accum_h, v, x); |
| 114 | #define SET_ACCUM_M(v, x) INSERT16(m_>accum_m, v, x); |
| 115 | #define SET_ACCUM_L(v, x) INSERT16(m_accum_l, v, x); |
| 116 | #define SET_ACCUM_LL(v, x) INSERT16(m_accum_ll, v, x); |
| 117 | |
| 118 | #define GET_VS1(out, i) EXTRACT16(m_xv[VS1REG], out, i); |
| 119 | #define GET_VS2(out, i) EXTRACT16(m_xv[VS2REG], out, VEC_EL_2(EL, i)); |
| 120 | |
| 121 | inline UINT16 rsp_cop2_simd::CARRY_FLAG(const int x) |
| 122 | { |
| 123 | UINT16 out; |
| 124 | EXTRACT16(m_xvflag[CARRY], out, x); |
| 125 | return out; |
| 126 | } |
| 127 | |
| 128 | inline UINT16 rsp_cop2_simd::COMPARE_FLAG(const int x) |
| 129 | { |
| 130 | UINT16 out; |
| 131 | EXTRACT16(m_xvflag[COMPARE], out, x); |
| 132 | return out; |
| 133 | } |
| 134 | |
| 135 | inline UINT16 rsp_cop2_simd::CLIP1_FLAG(const int x) |
| 136 | { |
| 137 | UINT16 out; |
| 138 | EXTRACT16(m_xvflag[CLIP1], out, x); |
| 139 | return out; |
| 140 | } |
| 141 | |
| 142 | inline UINT16 rsp_cop2_simd::ZERO_FLAG(const int x) |
| 143 | { |
| 144 | UINT16 out; |
| 145 | EXTRACT16(m_xvflag[ZERO], out, x); |
| 146 | return out; |
| 147 | } |
| 148 | |
| 149 | inline UINT16 rsp_cop2_simd::CLIP2_FLAG(const int x) |
| 150 | { |
| 151 | UINT16 out; |
| 152 | EXTRACT16(m_xvflag[CLIP2], out, x); |
| 153 | return out; |
| 154 | } |
| 155 | |
| 156 | #define CLEAR_CARRY_FLAGS() { m_xvflag[CARRY] = _mm_setzero_si128(); } |
| 157 | #define CLEAR_COMPARE_FLAGS() { m_xvflag[COMPARE] = _mm_setzero_si128(); } |
| 158 | #define CLEAR_CLIP1_FLAGS() { m_xvflag[CLIP1] = _mm_setzero_si128(); } |
| 159 | #define CLEAR_ZERO_FLAGS() { m_xvflag[ZERO] = _mm_setzero_si128(); } |
| 160 | #define CLEAR_CLIP2_FLAGS() { m_xvflag[CLIP2] = _mm_setzero_si128(); } |
| 161 | |
| 162 | #define SET_CARRY_FLAG(x) { INSERT16(m_xvflag[CARRY], 0xffff, x); } |
| 163 | #define SET_COMPARE_FLAG(x) { INSERT16(m_xvflag[COMPARE], 0xffff, x); } |
| 164 | #define SET_CLIP1_FLAG(x) { INSERT16(m_xvflag[CLIP1], 0xffff, x); } |
| 165 | #define SET_ZERO_FLAG(x) { INSERT16(m_xvflag[ZERO], 0xffff, x); } |
| 166 | #define SET_CLIP2_FLAG(x) { INSERT16(m_xvflag[CLIP2], 0xffff, x); } |
| 167 | |
| 168 | #define CLEAR_CARRY_FLAG(x) { INSERT16(m_xvflag[CARRY], 0, x); } |
| 169 | #define CLEAR_COMPARE_FLAG(x) { INSERT16(m_xvflag[COMPARE], 0, x); } |
| 170 | #define CLEAR_CLIP1_FLAG(x) { INSERT16(m_xvflag[CLIP1], 0, x); } |
| 171 | #define CLEAR_ZERO_FLAG(x) { INSERT16(m_xvflag[ZERO], 0, x); } |
| 172 | #define CLEAR_CLIP2_FLAG(x) { INSERT16(m_xvflag[CLIP2], 0, x); } |
| 173 | |
| 174 | #define WRITEBACK_RESULT() { \ |
| 175 | INSERT16(m_xv[VDREG], m_vres[0], 0); \ |
| 176 | INSERT16(m_xv[VDREG], m_vres[1], 1); \ |
| 177 | INSERT16(m_xv[VDREG], m_vres[2], 2); \ |
| 178 | INSERT16(m_xv[VDREG], m_vres[3], 3); \ |
| 179 | INSERT16(m_xv[VDREG], m_vres[4], 4); \ |
| 180 | INSERT16(m_xv[VDREG], m_vres[5], 5); \ |
| 181 | INSERT16(m_xv[VDREG], m_vres[6], 6); \ |
| 182 | INSERT16(m_xv[VDREG], m_vres[7], 7); \ |
| 183 | } |
| 184 | #endif |
| 185 | |
| 186 | static const int vector_elements_2[16][8] = |
| 187 | { |
| 188 | { 0, 1, 2, 3, 4, 5, 6, 7 }, // none |
| 189 | { 0, 1, 2, 3, 4, 5, 6, 7 }, // ??? |
| 190 | { 0, 0, 2, 2, 4, 4, 6, 6 }, // 0q |
| 191 | { 1, 1, 3, 3, 5, 5, 7, 7 }, // 1q |
| 192 | { 0, 0, 0, 0, 4, 4, 4, 4 }, // 0h |
| 193 | { 1, 1, 1, 1, 5, 5, 5, 5 }, // 1h |
| 194 | { 2, 2, 2, 2, 6, 6, 6, 6 }, // 2h |
| 195 | { 3, 3, 3, 3, 7, 7, 7, 7 }, // 3h |
| 196 | { 0, 0, 0, 0, 0, 0, 0, 0 }, // 0 |
| 197 | { 1, 1, 1, 1, 1, 1, 1, 1 }, // 1 |
| 198 | { 2, 2, 2, 2, 2, 2, 2, 2 }, // 2 |
| 199 | { 3, 3, 3, 3, 3, 3, 3, 3 }, // 3 |
| 200 | { 4, 4, 4, 4, 4, 4, 4, 4 }, // 4 |
| 201 | { 5, 5, 5, 5, 5, 5, 5, 5 }, // 5 |
| 202 | { 6, 6, 6, 6, 6, 6, 6, 6 }, // 6 |
| 203 | { 7, 7, 7, 7, 7, 7, 7, 7 }, // 7 |
| 204 | }; |
| 205 | |
| 206 | static __m128i vec_himask; |
| 207 | static __m128i vec_lomask; |
| 208 | static __m128i vec_hibit; |
| 209 | static __m128i vec_lobit; |
| 210 | static __m128i vec_n32768; |
| 211 | static __m128i vec_32767; |
| 212 | static __m128i vec_flagmask; |
| 213 | static __m128i vec_shiftmask2; |
| 214 | static __m128i vec_shiftmask4; |
| 215 | static __m128i vec_flag_reverse; |
| 216 | static __m128i vec_neg1; |
| 217 | static __m128i vec_zero; |
| 218 | static __m128i vec_shuf[16]; |
| 219 | static __m128i vec_shuf_inverse[16]; |
| 220 | |
| 221 | rsp_cop2_simd::rsp_cop2_simd(rsp_device &rsp, running_machine &machine) : rsp_cop2(rsp, machine) |
| 222 | : m_accum_h(0) |
| 223 | , m_accum_m(0) |
| 224 | , m_accum_l(0) |
| 225 | , m_accum_ll(0) |
| 226 | #if SIMUL_SIMD |
| 227 | , m_old_reciprocal_res(0) |
| 228 | , m_old_reciprocal_high(0) |
| 229 | , m_old_dp_allowed(0) |
| 230 | , m_scalar_reciprocal_res(0) |
| 231 | , m_scalar_reciprocal_high(0) |
| 232 | , m_scalar_dp_allowed(0) |
| 233 | , m_simd_reciprocal_res(0) |
| 234 | , m_simd_reciprocal_high(0) |
| 235 | , m_simd_dp_allowed(0) |
| 236 | #endif |
| 237 | { |
| 238 | #if SIMUL_SIMD |
| 239 | memset(m_old_r, 0, sizeof(m_old_r)); |
| 240 | memset(m_old_dmem, 0, sizeof(m_old_dmem)); |
| 241 | memset(m_scalar_r, 0, sizeof(m_scalar_r)); |
| 242 | memset(m_scalar_dmem, 0, sizeof(m_scalar_dmem)); |
| 243 | #endif |
| 244 | memset(m_xv, 0, sizeof(m_xv)); |
| 245 | memset(m_xvflag, 0, sizeof(m_xvflag)); |
| 246 | |
| 247 | vec_shuf_inverse[ 0] = _mm_set_epi16(0x0f0e, 0x0d0c, 0x0b0a, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100); // none |
| 248 | vec_shuf_inverse[ 1] = _mm_set_epi16(0x0f0e, 0x0d0c, 0x0b0a, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100); // ??? |
| 249 | vec_shuf_inverse[ 2] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0908, 0x0908, 0x0504, 0x0504, 0x0100, 0x0100); // 0q |
| 250 | vec_shuf_inverse[ 3] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0b0a, 0x0b0a, 0x0706, 0x0706, 0x0302, 0x0302); // 1q |
| 251 | vec_shuf_inverse[ 4] = _mm_set_epi16(0x0908, 0x0908, 0x0908, 0x0908, 0x0100, 0x0100, 0x0100, 0x0100); // 0h |
| 252 | vec_shuf_inverse[ 5] = _mm_set_epi16(0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0302, 0x0302, 0x0302, 0x0302); // 1h |
| 253 | vec_shuf_inverse[ 6] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0504, 0x0504, 0x0504, 0x0504); // 2h |
| 254 | vec_shuf_inverse[ 7] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0706, 0x0706, 0x0706, 0x0706); // 3h |
| 255 | vec_shuf_inverse[ 8] = _mm_set_epi16(0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100); // 0 |
| 256 | vec_shuf_inverse[ 9] = _mm_set_epi16(0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302); // 1 |
| 257 | vec_shuf_inverse[10] = _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504); // 2 |
| 258 | vec_shuf_inverse[11] = _mm_set_epi16(0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706); // 3 |
| 259 | vec_shuf_inverse[12] = _mm_set_epi16(0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908); // 4 |
| 260 | vec_shuf_inverse[13] = _mm_set_epi16(0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a); // 5 |
| 261 | vec_shuf_inverse[14] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c); // 6 |
| 262 | vec_shuf_inverse[15] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e); // 7 |
| 263 | |
| 264 | vec_shuf[ 0] = _mm_set_epi16(0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e); // none |
| 265 | vec_shuf[ 1] = _mm_set_epi16(0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e); // ??? |
| 266 | vec_shuf[ 2] = _mm_set_epi16(0x0302, 0x0302, 0x0706, 0x0706, 0x0b0a, 0x0b0a, 0x0f0e, 0x0f0e); // 0q |
| 267 | vec_shuf[ 3] = _mm_set_epi16(0x0100, 0x0100, 0x0504, 0x0706, 0x0908, 0x0908, 0x0d0c, 0x0d0c); // 1q |
| 268 | vec_shuf[ 4] = _mm_set_epi16(0x0706, 0x0706, 0x0706, 0x0706, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e); // 0q |
| 269 | vec_shuf[ 5] = _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c); // 1q |
| 270 | vec_shuf[ 6] = _mm_set_epi16(0x0302, 0x0302, 0x0302, 0x0302, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a); // 2q |
| 271 | vec_shuf[ 7] = _mm_set_epi16(0x0100, 0x0100, 0x0100, 0x0100, 0x0908, 0x0908, 0x0908, 0x0908); // 3q |
| 272 | vec_shuf[ 8] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e); // 0 |
| 273 | vec_shuf[ 9] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c); // 1 |
| 274 | vec_shuf[10] = _mm_set_epi16(0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a); // 2 |
| 275 | vec_shuf[11] = _mm_set_epi16(0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908); // 3 |
| 276 | vec_shuf[12] = _mm_set_epi16(0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706); // 4 |
| 277 | vec_shuf[13] = _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504); // 5 |
| 278 | vec_shuf[14] = _mm_set_epi16(0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302); // 6 |
| 279 | vec_shuf[15] = _mm_set_epi16(0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100); // 7 |
| 280 | m_accum_h = _mm_setzero_si128(); |
| 281 | m_accum_m = _mm_setzero_si128(); |
| 282 | m_accum_l = _mm_setzero_si128(); |
| 283 | m_accum_ll = _mm_setzero_si128(); |
| 284 | vec_neg1 = _mm_set_epi64x(0xffffffffffffffffL, 0xffffffffffffffffL); |
| 285 | vec_zero = _mm_setzero_si128(); |
| 286 | vec_himask = _mm_set_epi64x(0xffff0000ffff0000L, 0xffff0000ffff0000L); |
| 287 | vec_lomask = _mm_set_epi64x(0x0000ffff0000ffffL, 0x0000ffff0000ffffL); |
| 288 | vec_hibit = _mm_set_epi64x(0x0001000000010000L, 0x0001000000010000L); |
| 289 | vec_lobit = _mm_set_epi64x(0x0000000100000001L, 0x0000000100000001L); |
| 290 | vec_32767 = _mm_set_epi64x(0x7fff7fff7fff7fffL, 0x7fff7fff7fff7fffL); |
| 291 | vec_n32768 = _mm_set_epi64x(0x8000800080008000L, 0x8000800080008000L); |
| 292 | vec_flagmask = _mm_set_epi64x(0x0001000100010001L, 0x0001000100010001L); |
| 293 | vec_shiftmask2 = _mm_set_epi64x(0x0000000300000003L, 0x0000000300000003L); |
| 294 | vec_shiftmask4 = _mm_set_epi64x(0x000000000000000fL, 0x000000000000000fL); |
| 295 | vec_flag_reverse = _mm_set_epi16(0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e); |
| 296 | } |
| 297 | |
| 298 | void rsp_cop2_simd::state_string_export(const int index, astring &string) |
| 299 | { |
| 300 | switch (index) |
| 301 | { |
| 302 | case RSP_V0: |
| 303 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 0], 7), (UINT16)_mm_extract_epi16(m_xv[ 0], 6), (UINT16)_mm_extract_epi16(m_xv[ 0], 5), (UINT16)_mm_extract_epi16(m_xv[ 0], 4), (UINT16)_mm_extract_epi16(m_xv[ 0], 3), (UINT16)_mm_extract_epi16(m_xv[ 0], 2), (UINT16)_mm_extract_epi16(m_xv[ 0], 1), (UINT16)_mm_extract_epi16(m_xv[ 0], 0)); |
| 304 | break; |
| 305 | case RSP_V1: |
| 306 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 1], 7), (UINT16)_mm_extract_epi16(m_xv[ 1], 6), (UINT16)_mm_extract_epi16(m_xv[ 1], 5), (UINT16)_mm_extract_epi16(m_xv[ 1], 4), (UINT16)_mm_extract_epi16(m_xv[ 1], 3), (UINT16)_mm_extract_epi16(m_xv[ 1], 2), (UINT16)_mm_extract_epi16(m_xv[ 1], 1), (UINT16)_mm_extract_epi16(m_xv[ 1], 0)); |
| 307 | break; |
| 308 | case RSP_V2: |
| 309 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 2], 7), (UINT16)_mm_extract_epi16(m_xv[ 2], 6), (UINT16)_mm_extract_epi16(m_xv[ 2], 5), (UINT16)_mm_extract_epi16(m_xv[ 2], 4), (UINT16)_mm_extract_epi16(m_xv[ 2], 3), (UINT16)_mm_extract_epi16(m_xv[ 2], 2), (UINT16)_mm_extract_epi16(m_xv[ 2], 1), (UINT16)_mm_extract_epi16(m_xv[ 2], 0)); |
| 310 | break; |
| 311 | case RSP_V3: |
| 312 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 3], 7), (UINT16)_mm_extract_epi16(m_xv[ 3], 6), (UINT16)_mm_extract_epi16(m_xv[ 3], 5), (UINT16)_mm_extract_epi16(m_xv[ 3], 4), (UINT16)_mm_extract_epi16(m_xv[ 3], 3), (UINT16)_mm_extract_epi16(m_xv[ 3], 2), (UINT16)_mm_extract_epi16(m_xv[ 3], 1), (UINT16)_mm_extract_epi16(m_xv[ 3], 0)); |
| 313 | break; |
| 314 | case RSP_V4: |
| 315 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 4], 7), (UINT16)_mm_extract_epi16(m_xv[ 4], 6), (UINT16)_mm_extract_epi16(m_xv[ 4], 5), (UINT16)_mm_extract_epi16(m_xv[ 4], 4), (UINT16)_mm_extract_epi16(m_xv[ 4], 3), (UINT16)_mm_extract_epi16(m_xv[ 4], 2), (UINT16)_mm_extract_epi16(m_xv[ 4], 1), (UINT16)_mm_extract_epi16(m_xv[ 4], 0)); |
| 316 | break; |
| 317 | case RSP_V5: |
| 318 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 5], 7), (UINT16)_mm_extract_epi16(m_xv[ 5], 6), (UINT16)_mm_extract_epi16(m_xv[ 5], 5), (UINT16)_mm_extract_epi16(m_xv[ 5], 4), (UINT16)_mm_extract_epi16(m_xv[ 5], 3), (UINT16)_mm_extract_epi16(m_xv[ 5], 2), (UINT16)_mm_extract_epi16(m_xv[ 5], 1), (UINT16)_mm_extract_epi16(m_xv[ 5], 0)); |
| 319 | break; |
| 320 | case RSP_V6: |
| 321 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 6], 7), (UINT16)_mm_extract_epi16(m_xv[ 6], 6), (UINT16)_mm_extract_epi16(m_xv[ 6], 5), (UINT16)_mm_extract_epi16(m_xv[ 6], 4), (UINT16)_mm_extract_epi16(m_xv[ 6], 3), (UINT16)_mm_extract_epi16(m_xv[ 6], 2), (UINT16)_mm_extract_epi16(m_xv[ 6], 1), (UINT16)_mm_extract_epi16(m_xv[ 6], 0)); |
| 322 | break; |
| 323 | case RSP_V7: |
| 324 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 7], 7), (UINT16)_mm_extract_epi16(m_xv[ 7], 6), (UINT16)_mm_extract_epi16(m_xv[ 7], 5), (UINT16)_mm_extract_epi16(m_xv[ 7], 4), (UINT16)_mm_extract_epi16(m_xv[ 7], 3), (UINT16)_mm_extract_epi16(m_xv[ 7], 2), (UINT16)_mm_extract_epi16(m_xv[ 7], 1), (UINT16)_mm_extract_epi16(m_xv[ 7], 0)); |
| 325 | break; |
| 326 | case RSP_V8: |
| 327 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 8], 7), (UINT16)_mm_extract_epi16(m_xv[ 8], 6), (UINT16)_mm_extract_epi16(m_xv[ 8], 5), (UINT16)_mm_extract_epi16(m_xv[ 8], 4), (UINT16)_mm_extract_epi16(m_xv[ 8], 3), (UINT16)_mm_extract_epi16(m_xv[ 8], 2), (UINT16)_mm_extract_epi16(m_xv[ 8], 1), (UINT16)_mm_extract_epi16(m_xv[ 8], 0)); |
| 328 | break; |
| 329 | case RSP_V9: |
| 330 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[ 9], 7), (UINT16)_mm_extract_epi16(m_xv[ 9], 6), (UINT16)_mm_extract_epi16(m_xv[ 9], 5), (UINT16)_mm_extract_epi16(m_xv[ 9], 4), (UINT16)_mm_extract_epi16(m_xv[ 9], 3), (UINT16)_mm_extract_epi16(m_xv[ 9], 2), (UINT16)_mm_extract_epi16(m_xv[ 9], 1), (UINT16)_mm_extract_epi16(m_xv[ 9], 0)); |
| 331 | break; |
| 332 | case RSP_V10: |
| 333 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[10], 7), (UINT16)_mm_extract_epi16(m_xv[10], 6), (UINT16)_mm_extract_epi16(m_xv[10], 5), (UINT16)_mm_extract_epi16(m_xv[10], 4), (UINT16)_mm_extract_epi16(m_xv[10], 3), (UINT16)_mm_extract_epi16(m_xv[10], 2), (UINT16)_mm_extract_epi16(m_xv[10], 1), (UINT16)_mm_extract_epi16(m_xv[10], 0)); |
| 334 | break; |
| 335 | case RSP_V11: |
| 336 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[11], 7), (UINT16)_mm_extract_epi16(m_xv[11], 6), (UINT16)_mm_extract_epi16(m_xv[11], 5), (UINT16)_mm_extract_epi16(m_xv[11], 4), (UINT16)_mm_extract_epi16(m_xv[11], 3), (UINT16)_mm_extract_epi16(m_xv[11], 2), (UINT16)_mm_extract_epi16(m_xv[11], 1), (UINT16)_mm_extract_epi16(m_xv[11], 0)); |
| 337 | break; |
| 338 | case RSP_V12: |
| 339 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[12], 7), (UINT16)_mm_extract_epi16(m_xv[12], 6), (UINT16)_mm_extract_epi16(m_xv[12], 5), (UINT16)_mm_extract_epi16(m_xv[12], 4), (UINT16)_mm_extract_epi16(m_xv[12], 3), (UINT16)_mm_extract_epi16(m_xv[12], 2), (UINT16)_mm_extract_epi16(m_xv[12], 1), (UINT16)_mm_extract_epi16(m_xv[12], 0)); |
| 340 | break; |
| 341 | case RSP_V13: |
| 342 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[13], 7), (UINT16)_mm_extract_epi16(m_xv[13], 6), (UINT16)_mm_extract_epi16(m_xv[13], 5), (UINT16)_mm_extract_epi16(m_xv[13], 4), (UINT16)_mm_extract_epi16(m_xv[13], 3), (UINT16)_mm_extract_epi16(m_xv[13], 2), (UINT16)_mm_extract_epi16(m_xv[13], 1), (UINT16)_mm_extract_epi16(m_xv[13], 0)); |
| 343 | break; |
| 344 | case RSP_V14: |
| 345 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[14], 7), (UINT16)_mm_extract_epi16(m_xv[14], 6), (UINT16)_mm_extract_epi16(m_xv[14], 5), (UINT16)_mm_extract_epi16(m_xv[14], 4), (UINT16)_mm_extract_epi16(m_xv[14], 3), (UINT16)_mm_extract_epi16(m_xv[14], 2), (UINT16)_mm_extract_epi16(m_xv[14], 1), (UINT16)_mm_extract_epi16(m_xv[14], 0)); |
| 346 | break; |
| 347 | case RSP_V15: |
| 348 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[15], 7), (UINT16)_mm_extract_epi16(m_xv[15], 6), (UINT16)_mm_extract_epi16(m_xv[15], 5), (UINT16)_mm_extract_epi16(m_xv[15], 4), (UINT16)_mm_extract_epi16(m_xv[15], 3), (UINT16)_mm_extract_epi16(m_xv[15], 2), (UINT16)_mm_extract_epi16(m_xv[15], 1), (UINT16)_mm_extract_epi16(m_xv[15], 0)); |
| 349 | break; |
| 350 | case RSP_V16: |
| 351 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[16], 7), (UINT16)_mm_extract_epi16(m_xv[16], 6), (UINT16)_mm_extract_epi16(m_xv[16], 5), (UINT16)_mm_extract_epi16(m_xv[16], 4), (UINT16)_mm_extract_epi16(m_xv[16], 3), (UINT16)_mm_extract_epi16(m_xv[16], 2), (UINT16)_mm_extract_epi16(m_xv[16], 1), (UINT16)_mm_extract_epi16(m_xv[16], 0)); |
| 352 | break; |
| 353 | case RSP_V17: |
| 354 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[17], 7), (UINT16)_mm_extract_epi16(m_xv[17], 6), (UINT16)_mm_extract_epi16(m_xv[17], 5), (UINT16)_mm_extract_epi16(m_xv[17], 4), (UINT16)_mm_extract_epi16(m_xv[17], 3), (UINT16)_mm_extract_epi16(m_xv[17], 2), (UINT16)_mm_extract_epi16(m_xv[17], 1), (UINT16)_mm_extract_epi16(m_xv[17], 0)); |
| 355 | break; |
| 356 | case RSP_V18: |
| 357 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[18], 7), (UINT16)_mm_extract_epi16(m_xv[18], 6), (UINT16)_mm_extract_epi16(m_xv[18], 5), (UINT16)_mm_extract_epi16(m_xv[18], 4), (UINT16)_mm_extract_epi16(m_xv[18], 3), (UINT16)_mm_extract_epi16(m_xv[18], 2), (UINT16)_mm_extract_epi16(m_xv[18], 1), (UINT16)_mm_extract_epi16(m_xv[18], 0)); |
| 358 | break; |
| 359 | case RSP_V19: |
| 360 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[19], 7), (UINT16)_mm_extract_epi16(m_xv[19], 6), (UINT16)_mm_extract_epi16(m_xv[19], 5), (UINT16)_mm_extract_epi16(m_xv[19], 4), (UINT16)_mm_extract_epi16(m_xv[19], 3), (UINT16)_mm_extract_epi16(m_xv[19], 2), (UINT16)_mm_extract_epi16(m_xv[19], 1), (UINT16)_mm_extract_epi16(m_xv[19], 0)); |
| 361 | break; |
| 362 | case RSP_V20: |
| 363 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[20], 7), (UINT16)_mm_extract_epi16(m_xv[20], 6), (UINT16)_mm_extract_epi16(m_xv[20], 5), (UINT16)_mm_extract_epi16(m_xv[20], 4), (UINT16)_mm_extract_epi16(m_xv[20], 3), (UINT16)_mm_extract_epi16(m_xv[20], 2), (UINT16)_mm_extract_epi16(m_xv[20], 1), (UINT16)_mm_extract_epi16(m_xv[20], 0)); |
| 364 | break; |
| 365 | case RSP_V21: |
| 366 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[21], 7), (UINT16)_mm_extract_epi16(m_xv[21], 6), (UINT16)_mm_extract_epi16(m_xv[21], 5), (UINT16)_mm_extract_epi16(m_xv[21], 4), (UINT16)_mm_extract_epi16(m_xv[21], 3), (UINT16)_mm_extract_epi16(m_xv[21], 2), (UINT16)_mm_extract_epi16(m_xv[21], 1), (UINT16)_mm_extract_epi16(m_xv[21], 0)); |
| 367 | break; |
| 368 | case RSP_V22: |
| 369 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[22], 7), (UINT16)_mm_extract_epi16(m_xv[22], 6), (UINT16)_mm_extract_epi16(m_xv[22], 5), (UINT16)_mm_extract_epi16(m_xv[22], 4), (UINT16)_mm_extract_epi16(m_xv[22], 3), (UINT16)_mm_extract_epi16(m_xv[22], 2), (UINT16)_mm_extract_epi16(m_xv[22], 1), (UINT16)_mm_extract_epi16(m_xv[22], 0)); |
| 370 | break; |
| 371 | case RSP_V23: |
| 372 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[23], 7), (UINT16)_mm_extract_epi16(m_xv[23], 6), (UINT16)_mm_extract_epi16(m_xv[23], 5), (UINT16)_mm_extract_epi16(m_xv[23], 4), (UINT16)_mm_extract_epi16(m_xv[23], 3), (UINT16)_mm_extract_epi16(m_xv[23], 2), (UINT16)_mm_extract_epi16(m_xv[23], 1), (UINT16)_mm_extract_epi16(m_xv[23], 0)); |
| 373 | break; |
| 374 | case RSP_V24: |
| 375 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[24], 7), (UINT16)_mm_extract_epi16(m_xv[24], 6), (UINT16)_mm_extract_epi16(m_xv[24], 5), (UINT16)_mm_extract_epi16(m_xv[24], 4), (UINT16)_mm_extract_epi16(m_xv[24], 3), (UINT16)_mm_extract_epi16(m_xv[24], 2), (UINT16)_mm_extract_epi16(m_xv[24], 1), (UINT16)_mm_extract_epi16(m_xv[24], 0)); |
| 376 | break; |
| 377 | case RSP_V25: |
| 378 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[25], 7), (UINT16)_mm_extract_epi16(m_xv[25], 6), (UINT16)_mm_extract_epi16(m_xv[25], 5), (UINT16)_mm_extract_epi16(m_xv[25], 4), (UINT16)_mm_extract_epi16(m_xv[25], 3), (UINT16)_mm_extract_epi16(m_xv[25], 2), (UINT16)_mm_extract_epi16(m_xv[25], 1), (UINT16)_mm_extract_epi16(m_xv[25], 0)); |
| 379 | break; |
| 380 | case RSP_V26: |
| 381 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[26], 7), (UINT16)_mm_extract_epi16(m_xv[26], 6), (UINT16)_mm_extract_epi16(m_xv[26], 5), (UINT16)_mm_extract_epi16(m_xv[26], 4), (UINT16)_mm_extract_epi16(m_xv[26], 3), (UINT16)_mm_extract_epi16(m_xv[26], 2), (UINT16)_mm_extract_epi16(m_xv[26], 1), (UINT16)_mm_extract_epi16(m_xv[26], 0)); |
| 382 | break; |
| 383 | case RSP_V27: |
| 384 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[27], 7), (UINT16)_mm_extract_epi16(m_xv[27], 6), (UINT16)_mm_extract_epi16(m_xv[27], 5), (UINT16)_mm_extract_epi16(m_xv[27], 4), (UINT16)_mm_extract_epi16(m_xv[27], 3), (UINT16)_mm_extract_epi16(m_xv[27], 2), (UINT16)_mm_extract_epi16(m_xv[27], 1), (UINT16)_mm_extract_epi16(m_xv[27], 0)); |
| 385 | break; |
| 386 | case RSP_V28: |
| 387 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[28], 7), (UINT16)_mm_extract_epi16(m_xv[28], 6), (UINT16)_mm_extract_epi16(m_xv[28], 5), (UINT16)_mm_extract_epi16(m_xv[28], 4), (UINT16)_mm_extract_epi16(m_xv[28], 3), (UINT16)_mm_extract_epi16(m_xv[28], 2), (UINT16)_mm_extract_epi16(m_xv[28], 1), (UINT16)_mm_extract_epi16(m_xv[28], 0)); |
| 388 | break; |
| 389 | case RSP_V29: |
| 390 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[29], 7), (UINT16)_mm_extract_epi16(m_xv[29], 6), (UINT16)_mm_extract_epi16(m_xv[29], 5), (UINT16)_mm_extract_epi16(m_xv[29], 4), (UINT16)_mm_extract_epi16(m_xv[29], 3), (UINT16)_mm_extract_epi16(m_xv[29], 2), (UINT16)_mm_extract_epi16(m_xv[29], 1), (UINT16)_mm_extract_epi16(m_xv[29], 0)); |
| 391 | break; |
| 392 | case RSP_V30: |
| 393 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[30], 7), (UINT16)_mm_extract_epi16(m_xv[30], 6), (UINT16)_mm_extract_epi16(m_xv[30], 5), (UINT16)_mm_extract_epi16(m_xv[30], 4), (UINT16)_mm_extract_epi16(m_xv[30], 3), (UINT16)_mm_extract_epi16(m_xv[30], 2), (UINT16)_mm_extract_epi16(m_xv[30], 1), (UINT16)_mm_extract_epi16(m_xv[30], 0)); |
| 394 | break; |
| 395 | case RSP_V31: |
| 396 | string.printf("%04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(m_xv[31], 7), (UINT16)_mm_extract_epi16(m_xv[31], 6), (UINT16)_mm_extract_epi16(m_xv[31], 5), (UINT16)_mm_extract_epi16(m_xv[31], 4), (UINT16)_mm_extract_epi16(m_xv[31], 3), (UINT16)_mm_extract_epi16(m_xv[31], 2), (UINT16)_mm_extract_epi16(m_xv[31], 1), (UINT16)_mm_extract_epi16(m_xv[31], 0)); |
| 397 | break; |
| 398 | } |
| 399 | } |
| 400 | |
| 401 | /*************************************************************************** |
| 402 | Vector Load Instructions |
| 403 | ***************************************************************************/ |
| 404 | |
| 405 | // LBV |
| 406 | // |
| 407 | // 31 25 20 15 10 6 0 |
| 408 | // -------------------------------------------------- |
| 409 | // | 110010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 410 | // -------------------------------------------------- |
| 411 | // |
| 412 | // Load 1 byte to vector byte index |
| 413 | |
| 414 | inline void rsp_cop2_simd::lbv() |
| 415 | { |
| 416 | UINT32 op = m_op; |
| 417 | |
| 418 | UINT32 ea = 0; |
| 419 | int dest = (op >> 16) & 0x1f; |
| 420 | int base = (op >> 21) & 0x1f; |
| 421 | int index = (op >> 7) & 0xf; |
| 422 | int offset = (op & 0x7f); |
| 423 | if (offset & 0x40) |
| 424 | { |
| 425 | offset |= 0xffffffc0; |
| 426 | } |
| 427 | |
| 428 | ea = (base) ? m_rsp.m_rsp_state->r[base] + offset : offset; |
| 429 | |
| 430 | UINT16 element; |
| 431 | EXTRACT16(m_xv[dest], element, (index >> 1)); |
| 432 | element &= 0xff00 >> ((1-(index & 1)) * 8); |
| 433 | element |= m_rsp.DM_READ8(ea) << ((1-(index & 1)) * 8); |
| 434 | INSERT16(m_xv[dest], element, (index >> 1)); |
| 435 | } |
| 436 | |
| 437 | static void cfunc_lbv(void *param) |
| 438 | { |
| 439 | ((rsp_cop2 *)param)->lbv(); |
| 440 | } |
| 441 | |
| 442 | |
| 443 | // LSV |
| 444 | // |
| 445 | // 31 25 20 15 10 6 0 |
| 446 | // -------------------------------------------------- |
| 447 | // | 110010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 448 | // -------------------------------------------------- |
| 449 | // |
| 450 | // Loads 2 bytes starting from vector byte index |
| 451 | |
| 452 | inline void rsp_cop2_simd::lsv() |
| 453 | { |
| 454 | UINT32 op = m_op; |
| 455 | int dest = (op >> 16) & 0x1f; |
| 456 | int base = (op >> 21) & 0x1f; |
| 457 | int index = (op >> 7) & 0xe; |
| 458 | int offset = (op & 0x7f); |
| 459 | if (offset & 0x40) |
| 460 | { |
| 461 | offset |= 0xffffffc0; |
| 462 | } |
| 463 | |
| 464 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 465 | int end = index + 2; |
| 466 | for (int i = index; i < end; i++) |
| 467 | { |
| 468 | UINT16 element; |
| 469 | EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 470 | element &= 0xff00 >> ((1 - (i & 1)) * 8); |
| 471 | element |= m_rsp.DM_READ8(ea) << ((1 - (i & 1)) * 8); |
| 472 | INSERT16(m_xv[dest], element, (i >> 1)); |
| 473 | ea++; |
| 474 | } |
| 475 | } |
| 476 | |
| 477 | static void cfunc_lsv(void *param) |
| 478 | { |
| 479 | ((rsp_cop2 *)param)->lsv(); |
| 480 | } |
| 481 | |
| 482 | |
| 483 | // LLV |
| 484 | // |
| 485 | // 31 25 20 15 10 6 0 |
| 486 | // -------------------------------------------------- |
| 487 | // | 110010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 488 | // -------------------------------------------------- |
| 489 | // |
| 490 | // Loads 4 bytes starting from vector byte index |
| 491 | |
| 492 | inline void rsp_cop2_simd::llv() |
| 493 | { |
| 494 | UINT32 op = m_op; |
| 495 | UINT32 ea = 0; |
| 496 | int dest = (op >> 16) & 0x1f; |
| 497 | int base = (op >> 21) & 0x1f; |
| 498 | int index = (op >> 7) & 0xc; |
| 499 | int offset = (op & 0x7f); |
| 500 | if (offset & 0x40) |
| 501 | { |
| 502 | offset |= 0xffffffc0; |
| 503 | } |
| 504 | |
| 505 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 506 | |
| 507 | int end = index + 4; |
| 508 | |
| 509 | for (int i = index; i < end; i++) |
| 510 | { |
| 511 | UINT16 element; |
| 512 | EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 513 | element &= 0xff00 >> ((1 - (i & 1)) * 8); |
| 514 | element |= m_rsp.DM_READ8(ea) << ((1 - (i & 1)) * 8); |
| 515 | INSERT16(m_xv[dest], element, (i >> 1)); |
| 516 | ea++; |
| 517 | } |
| 518 | } |
| 519 | |
| 520 | static void cfunc_llv(void *param) |
| 521 | { |
| 522 | ((rsp_cop2 *)param)->llv(); |
| 523 | } |
| 524 | #endif |
| 525 | |
| 526 | |
| 527 | // LDV |
| 528 | // |
| 529 | // 31 25 20 15 10 6 0 |
| 530 | // -------------------------------------------------- |
| 531 | // | 110010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 532 | // -------------------------------------------------- |
| 533 | // |
| 534 | // Loads 8 bytes starting from vector byte index |
| 535 | |
| 536 | inline void rsp_cop2_simd::ldv() |
| 537 | { |
| 538 | UINT32 op = m_op; |
| 539 | UINT32 ea = 0; |
| 540 | int dest = (op >> 16) & 0x1f; |
| 541 | int base = (op >> 21) & 0x1f; |
| 542 | int index = (op >> 7) & 0x8; |
| 543 | int offset = (op & 0x7f); |
| 544 | if (offset & 0x40) |
| 545 | { |
| 546 | offset |= 0xffffffc0; |
| 547 | } |
| 548 | |
| 549 | ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 550 | |
| 551 | int end = index + 8; |
| 552 | |
| 553 | for (int i = index; i < end; i++) |
| 554 | { |
| 555 | UINT16 element; |
| 556 | EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 557 | element &= 0xff00 >> ((1 - (i & 1)) * 8); |
| 558 | element |= m_rsp.DM_READ8(ea) << ((1 - (i & 1)) * 8); |
| 559 | INSERT16(m_xv[dest], element, (i >> 1)); |
| 560 | ea++; |
| 561 | } |
| 562 | } |
| 563 | |
| 564 | static void cfunc_ldv(void *param) |
| 565 | { |
| 566 | ((rsp_cop2 *)param)->ldv(); |
| 567 | } |
| 568 | #endif |
| 569 | |
| 570 | |
| 571 | // LQV |
| 572 | // |
| 573 | // 31 25 20 15 10 6 0 |
| 574 | // -------------------------------------------------- |
| 575 | // | 110010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 576 | // -------------------------------------------------- |
| 577 | // |
| 578 | // Loads up to 16 bytes starting from vector byte index |
| 579 | |
| 580 | inline void rsp_cop2_simd::lqv() |
| 581 | { |
| 582 | UINT32 op = m_op; |
| 583 | int dest = (op >> 16) & 0x1f; |
| 584 | int base = (op >> 21) & 0x1f; |
| 585 | int offset = (op & 0x7f); |
| 586 | if (offset & 0x40) |
| 587 | { |
| 588 | offset |= 0xffffffc0; |
| 589 | } |
| 590 | |
| 591 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 592 | |
| 593 | int end = 16 - (ea & 0xf); |
| 594 | if (end > 16) end = 16; |
| 595 | |
| 596 | for (int i = 0; i < end; i++) |
| 597 | { |
| 598 | UINT16 element; |
| 599 | EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 600 | element &= 0xff00 >> ((1 - (i & 1)) * 8); |
| 601 | element |= m_rsp.DM_READ8(ea) << ((1 - (i & 1)) * 8); |
| 602 | INSERT16(m_xv[dest], element, (i >> 1)); |
| 603 | ea++; |
| 604 | } |
| 605 | } |
| 606 | |
| 607 | static void cfunc_lqv(void *param) |
| 608 | { |
| 609 | ((rsp_cop2 *)param)->lqv(); |
| 610 | } |
| 611 | |
| 612 | |
| 613 | // LRV |
| 614 | // |
| 615 | // 31 25 20 15 10 6 0 |
| 616 | // -------------------------------------------------- |
| 617 | // | 110010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 618 | // -------------------------------------------------- |
| 619 | // |
| 620 | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 621 | |
| 622 | inline void rsp_cop2_simd::lrv() |
| 623 | { |
| 624 | UINT32 op = m_op; |
| 625 | int dest = (op >> 16) & 0x1f; |
| 626 | int base = (op >> 21) & 0x1f; |
| 627 | int index = (op >> 7) & 0xf; |
| 628 | int offset = (op & 0x7f); |
| 629 | if (offset & 0x40) |
| 630 | { |
| 631 | offset |= 0xffffffc0; |
| 632 | } |
| 633 | |
| 634 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 635 | |
| 636 | index = 16 - ((ea & 0xf) - index); |
| 637 | ea &= ~0xf; |
| 638 | |
| 639 | for (int i = index; i < 16; i++) |
| 640 | { |
| 641 | UINT16 element; |
| 642 | EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 643 | element &= 0xff00 >> ((1-(i & 1)) * 8); |
| 644 | element |= m_rsp.DM_READ8(ea) << ((1-(i & 1)) * 8); |
| 645 | INSERT16(m_xv[dest], element, (i >> 1)); |
| 646 | ea++; |
| 647 | } |
| 648 | } |
| 649 | |
| 650 | static void cfunc_lrv(void *param) |
| 651 | { |
| 652 | ((rsp_cop2 *)param)->lrv(); |
| 653 | } |
| 654 | |
| 655 | |
| 656 | // LPV |
| 657 | // |
| 658 | // 31 25 20 15 10 6 0 |
| 659 | // -------------------------------------------------- |
| 660 | // | 110010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 661 | // -------------------------------------------------- |
| 662 | // |
| 663 | // Loads a byte as the upper 8 bits of each element |
| 664 | |
| 665 | inline void rsp_cop2_simd::lpv() |
| 666 | { |
| 667 | UINT32 op = m_op; |
| 668 | int dest = (op >> 16) & 0x1f; |
| 669 | int base = (op >> 21) & 0x1f; |
| 670 | int index = (op >> 7) & 0xf; |
| 671 | int offset = (op & 0x7f); |
| 672 | if (offset & 0x40) |
| 673 | { |
| 674 | offset |= 0xffffffc0; |
| 675 | } |
| 676 | |
| 677 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 678 | |
| 679 | for (int i = 0; i < 8; i++) |
| 680 | { |
| 681 | INSERT16(m_xv[dest], m_rsp.DM_READ8(ea + (((16-index) + i) & 0xf)) << 8, i); |
| 682 | } |
| 683 | } |
| 684 | |
| 685 | static void cfunc_lpv(void *param) |
| 686 | { |
| 687 | ((rsp_cop2 *)param)->lpv(); |
| 688 | } |
| 689 | |
| 690 | |
| 691 | // LUV |
| 692 | // |
| 693 | // 31 25 20 15 10 6 0 |
| 694 | // -------------------------------------------------- |
| 695 | // | 110010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 696 | // -------------------------------------------------- |
| 697 | // |
| 698 | // Loads a byte as the bits 14-7 of each element |
| 699 | |
| 700 | inline void rsp_cop2_simd::luv() |
| 701 | { |
| 702 | UINT32 op = m_op; |
| 703 | int dest = (op >> 16) & 0x1f; |
| 704 | int base = (op >> 21) & 0x1f; |
| 705 | int index = (op >> 7) & 0xf; |
| 706 | int offset = (op & 0x7f); |
| 707 | if (offset & 0x40) |
| 708 | { |
| 709 | offset |= 0xffffffc0; |
| 710 | } |
| 711 | |
| 712 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 713 | |
| 714 | for (int i = 0; i < 8; i++) |
| 715 | { |
| 716 | INSERT16(m_xv[dest], m_rsp.DM_READ8(ea + (((16-index) + i) & 0xf)) << 7, i); |
| 717 | } |
| 718 | } |
| 719 | |
| 720 | static void cfunc_luv(void *param) |
| 721 | { |
| 722 | ((rsp_cop2 *)param)->luv(); |
| 723 | } |
| 724 | |
| 725 | |
| 726 | // LHV |
| 727 | // |
| 728 | // 31 25 20 15 10 6 0 |
| 729 | // -------------------------------------------------- |
| 730 | // | 110010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 731 | // -------------------------------------------------- |
| 732 | // |
| 733 | // Loads a byte as the bits 14-7 of each element, with 2-byte stride |
| 734 | |
| 735 | inline void rsp_cop2_simd::lhv() |
| 736 | { |
| 737 | UINT32 op = m_op; |
| 738 | int dest = (op >> 16) & 0x1f; |
| 739 | int base = (op >> 21) & 0x1f; |
| 740 | int index = (op >> 7) & 0xf; |
| 741 | int offset = (op & 0x7f); |
| 742 | if (offset & 0x40) |
| 743 | { |
| 744 | offset |= 0xffffffc0; |
| 745 | } |
| 746 | |
| 747 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 748 | |
| 749 | for (int i = 0; i < 8; i++) |
| 750 | { |
| 751 | INSERT16(m_xv[dest], m_rsp.DM_READ8(ea + (((16-index) + (i<<1)) & 0xf)) << 7, i); |
| 752 | } |
| 753 | } |
| 754 | |
| 755 | static void cfunc_lhv(void *param) |
| 756 | { |
| 757 | ((rsp_cop2 *)param)->lhv(); |
| 758 | } |
| 759 | |
| 760 | |
| 761 | // LFV |
| 762 | // 31 25 20 15 10 6 0 |
| 763 | // -------------------------------------------------- |
| 764 | // | 110010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 765 | // -------------------------------------------------- |
| 766 | // |
| 767 | // Loads a byte as the bits 14-7 of upper or lower quad, with 4-byte stride |
| 768 | |
| 769 | inline void rsp_cop2_simd::lfv() |
| 770 | { |
| 771 | UINT32 op = m_op; |
| 772 | int dest = (op >> 16) & 0x1f; |
| 773 | int base = (op >> 21) & 0x1f; |
| 774 | int index = (op >> 7) & 0xf; |
| 775 | int offset = (op & 0x7f); |
| 776 | if (offset & 0x40) |
| 777 | { |
| 778 | offset |= 0xffffffc0; |
| 779 | } |
| 780 | |
| 781 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 782 | |
| 783 | // not sure what happens if 16-byte boundary is crossed... |
| 784 | |
| 785 | int end = (index >> 1) + 4; |
| 786 | |
| 787 | for (int i = index >> 1; i < end; i++) |
| 788 | { |
| 789 | INSERT16(m_xv[dest], m_rsp.DM_READ8(ea) << 7, i); |
| 790 | ea += 4; |
| 791 | } |
| 792 | } |
| 793 | |
| 794 | static void cfunc_lfv(void *param) |
| 795 | { |
| 796 | ((rsp_cop2 *)param)->lfv(); |
| 797 | } |
| 798 | |
| 799 | |
| 800 | // LWV |
| 801 | // |
| 802 | // 31 25 20 15 10 6 0 |
| 803 | // -------------------------------------------------- |
| 804 | // | 110010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 805 | // -------------------------------------------------- |
| 806 | // |
| 807 | // Loads the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 808 | // after byte index 15 |
| 809 | |
| 810 | inline void rsp_cop2_simd::lwv() |
| 811 | { |
| 812 | UINT32 op = m_op; |
| 813 | int dest = (op >> 16) & 0x1f; |
| 814 | int base = (op >> 21) & 0x1f; |
| 815 | int index = (op >> 7) & 0xf; |
| 816 | int offset = (op & 0x7f); |
| 817 | if (offset & 0x40) |
| 818 | { |
| 819 | offset |= 0xffffffc0; |
| 820 | } |
| 821 | |
| 822 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 823 | int end = (16 - index) + 16; |
| 824 | |
| 825 | UINT8 val[16]; |
| 826 | for (int i = (16 - index); i < end; i++) |
| 827 | { |
| 828 | val[i & 0xf] = m_rsp.DM_READ8(ea); |
| 829 | ea += 4; |
| 830 | } |
| 831 | |
| 832 | m_xv[dest] = _mm_set_epi8(val[15], val[14], val[13], val[12], val[11], val[10], val[ 9], val[ 8], |
| 833 | val[ 7], val[ 6], val[ 5], val[ 4], val[ 3], val[ 2], val[ 1], val[ 0]); |
| 834 | } |
| 835 | |
| 836 | static void cfunc_lwv(void *param) |
| 837 | { |
| 838 | ((rsp_cop2 *)param)->lwv(); |
| 839 | } |
| 840 | |
| 841 | |
| 842 | // LTV |
| 843 | // |
| 844 | // 31 25 20 15 10 6 0 |
| 845 | // -------------------------------------------------- |
| 846 | // | 110010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 847 | // -------------------------------------------------- |
| 848 | // |
| 849 | // Loads one element to maximum of 8 vectors, while incrementing element index |
| 850 | |
| 851 | inline void rsp_cop2_simd::ltv() |
| 852 | { |
| 853 | UINT32 op = m_op; |
| 854 | int dest = (op >> 16) & 0x1f; |
| 855 | int base = (op >> 21) & 0x1f; |
| 856 | int index = (op >> 7) & 0xf; |
| 857 | int offset = (op & 0x7f); |
| 858 | |
| 859 | // FIXME: has a small problem with odd indices |
| 860 | |
| 861 | int vs = dest; |
| 862 | int ve = dest + 8; |
| 863 | if (ve > 32) |
| 864 | { |
| 865 | ve = 32; |
| 866 | } |
| 867 | |
| 868 | int element = 7 - (index >> 1); |
| 869 | |
| 870 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 871 | |
| 872 | ea = ((ea + 8) & ~0xf) + (index & 1); |
| 873 | for (int i = vs; i < ve; i++) |
| 874 | { |
| 875 | element = (8 - (index >> 1) + (i - vs)) << 1; |
| 876 | UINT16 value = (m_rsp.DM_READ8(ea) << 8) | m_rsp.DM_READ8(ea + 1); |
| 877 | INSERT16(m_xv[i], value, (element >> 1)); |
| 878 | ea += 2; |
| 879 | } |
| 880 | } |
| 881 | |
| 882 | static void cfunc_ltv(void *param) |
| 883 | { |
| 884 | ((rsp_cop2 *)param)->ltv(); |
| 885 | } |
| 886 | |
| 887 | |
| 888 | /*************************************************************************** |
| 889 | Vector Store Instructions |
| 890 | ***************************************************************************/ |
| 891 | |
| 892 | // SBV |
| 893 | // |
| 894 | // 31 25 20 15 10 6 0 |
| 895 | // -------------------------------------------------- |
| 896 | // | 111010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 897 | // -------------------------------------------------- |
| 898 | // |
| 899 | // Stores 1 byte from vector byte index |
| 900 | |
| 901 | inline void rsp_cop2_simd::sbv() |
| 902 | { |
| 903 | UINT32 op = m_op; |
| 904 | int dest = (op >> 16) & 0x1f; |
| 905 | int base = (op >> 21) & 0x1f; |
| 906 | int index = (op >> 7) & 0xf; |
| 907 | int offset = (op & 0x7f); |
| 908 | if (offset & 0x40) |
| 909 | { |
| 910 | offset |= 0xffffffc0; |
| 911 | } |
| 912 | |
| 913 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + offset : offset; |
| 914 | UINT16 value; |
| 915 | EXTRACT16(m_xv[dest], value, (index >> 1)); |
| 916 | value >>= (1-(index & 1)) * 8; |
| 917 | m_rsp.DM_WRITE8(ea, (UINT8)value); |
| 918 | } |
| 919 | |
| 920 | static void cfunc_sbv(void *param) |
| 921 | { |
| 922 | ((rsp_cop2 *)param)->sbv(); |
| 923 | } |
| 924 | |
| 925 | |
| 926 | // SSV |
| 927 | // |
| 928 | // 31 25 20 15 10 6 0 |
| 929 | // -------------------------------------------------- |
| 930 | // | 111010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 931 | // -------------------------------------------------- |
| 932 | // |
| 933 | // Stores 2 bytes starting from vector byte index |
| 934 | |
| 935 | inline void rsp_cop2_simd::ssv() |
| 936 | { |
| 937 | UINT32 op = m_op; |
| 938 | int dest = (op >> 16) & 0x1f; |
| 939 | int base = (op >> 21) & 0x1f; |
| 940 | int index = (op >> 7) & 0xf; |
| 941 | int offset = (op & 0x7f); |
| 942 | if (offset & 0x40) |
| 943 | { |
| 944 | offset |= 0xffffffc0; |
| 945 | } |
| 946 | |
| 947 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 948 | |
| 949 | int end = index + 2; |
| 950 | for (int i = index; i < end; i++) |
| 951 | { |
| 952 | UINT16 value; |
| 953 | EXTRACT16(m_xv[dest], value, (i >> 1)); |
| 954 | value >>= (1 - (i & 1)) * 8; |
| 955 | m_rsp.DM_WRITE8(ea, (UINT8)value); |
| 956 | ea++; |
| 957 | } |
| 958 | } |
| 959 | |
| 960 | static void cfunc_ssv(void *param) |
| 961 | { |
| 962 | ((rsp_cop2 *)param)->ssv(); |
| 963 | } |
| 964 | |
| 965 | |
| 966 | // SLV |
| 967 | // |
| 968 | // 31 25 20 15 10 6 0 |
| 969 | // -------------------------------------------------- |
| 970 | // | 111010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 971 | // -------------------------------------------------- |
| 972 | // |
| 973 | // Stores 4 bytes starting from vector byte index |
| 974 | |
| 975 | inline void rsp_cop2_simd::slv() |
| 976 | { |
| 977 | UINT32 op = m_op; |
| 978 | int dest = (op >> 16) & 0x1f; |
| 979 | int base = (op >> 21) & 0x1f; |
| 980 | int index = (op >> 7) & 0xf; |
| 981 | int offset = (op & 0x7f); |
| 982 | if (offset & 0x40) |
| 983 | { |
| 984 | offset |= 0xffffffc0; |
| 985 | } |
| 986 | |
| 987 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 988 | |
| 989 | int end = index + 4; |
| 990 | for (int i = index; i < end; i++) |
| 991 | { |
| 992 | UINT16 value; |
| 993 | EXTRACT16(m_xv[dest], value, (i >> 1)); |
| 994 | value >>= (1 - (i & 1)) * 8; |
| 995 | m_rsp.DM_WRITE8(ea, (UINT8)value); |
| 996 | ea++; |
| 997 | } |
| 998 | } |
| 999 | |
| 1000 | static void cfunc_slv(void *param) |
| 1001 | { |
| 1002 | ((rsp_cop2 *)param)->slv(); |
| 1003 | } |
| 1004 | |
| 1005 | |
| 1006 | // SDV |
| 1007 | // |
| 1008 | // 31 25 20 15 10 6 0 |
| 1009 | // -------------------------------------------------- |
| 1010 | // | 111010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 1011 | // -------------------------------------------------- |
| 1012 | // |
| 1013 | // Stores 8 bytes starting from vector byte index |
| 1014 | |
| 1015 | inline void rsp_cop2_simd::sdv() |
| 1016 | { |
| 1017 | UINT32 op = m_op; |
| 1018 | int dest = (op >> 16) & 0x1f; |
| 1019 | int base = (op >> 21) & 0x1f; |
| 1020 | int index = (op >> 7) & 0x8; |
| 1021 | int offset = (op & 0x7f); |
| 1022 | if (offset & 0x40) |
| 1023 | { |
| 1024 | offset |= 0xffffffc0; |
| 1025 | } |
| 1026 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1027 | |
| 1028 | int end = index + 8; |
| 1029 | for (int i = index; i < end; i++) |
| 1030 | { |
| 1031 | UINT16 value; |
| 1032 | EXTRACT16(m_xv[dest], value, (i >> 1)); |
| 1033 | value >>= (1 - (i & 1)) * 8; |
| 1034 | m_rsp.DM_WRITE8(ea, (UINT8)value); |
| 1035 | ea++; |
| 1036 | } |
| 1037 | } |
| 1038 | |
| 1039 | static void cfunc_sdv(void *param) |
| 1040 | { |
| 1041 | ((rsp_cop2 *)param)->sdv(); |
| 1042 | } |
| 1043 | |
| 1044 | |
| 1045 | // SQV |
| 1046 | // |
| 1047 | // 31 25 20 15 10 6 0 |
| 1048 | // -------------------------------------------------- |
| 1049 | // | 111010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 1050 | // -------------------------------------------------- |
| 1051 | // |
| 1052 | // Stores up to 16 bytes starting from vector byte index until 16-byte boundary |
| 1053 | |
| 1054 | inline void rsp_cop2_simd::sqv() |
| 1055 | { |
| 1056 | UINT32 op = m_op; |
| 1057 | int dest = (op >> 16) & 0x1f; |
| 1058 | int base = (op >> 21) & 0x1f; |
| 1059 | int index = (op >> 7) & 0xf; |
| 1060 | int offset = (op & 0x7f); |
| 1061 | if (offset & 0x40) |
| 1062 | { |
| 1063 | offset |= 0xffffffc0; |
| 1064 | } |
| 1065 | |
| 1066 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1067 | int end = index + (16 - (ea & 0xf)); |
| 1068 | for (int i=index; i < end; i++) |
| 1069 | { |
| 1070 | UINT16 value; |
| 1071 | EXTRACT16(m_xv[dest], value, (i >> 1)); |
| 1072 | value >>= (1-(i & 1)) * 8; |
| 1073 | m_rsp.DM_WRITE8(ea, (UINT8)value); |
| 1074 | ea++; |
| 1075 | } |
| 1076 | } |
| 1077 | |
| 1078 | static void cfunc_sqv(void *param) |
| 1079 | { |
| 1080 | ((rsp_cop2 *)param)->sqv(); |
| 1081 | } |
| 1082 | |
| 1083 | |
| 1084 | // SRV |
| 1085 | // |
| 1086 | // 31 25 20 15 10 6 0 |
| 1087 | // -------------------------------------------------- |
| 1088 | // | 111010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 1089 | // -------------------------------------------------- |
| 1090 | // |
| 1091 | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 1092 | |
| 1093 | inline void rsp_cop2_simd::srv() |
| 1094 | { |
| 1095 | UINT32 op = m_op; |
| 1096 | int dest = (op >> 16) & 0x1f; |
| 1097 | int base = (op >> 21) & 0x1f; |
| 1098 | int index = (op >> 7) & 0xf; |
| 1099 | int offset = (op & 0x7f); |
| 1100 | if (offset & 0x40) |
| 1101 | { |
| 1102 | offset |= 0xffffffc0; |
| 1103 | } |
| 1104 | |
| 1105 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1106 | |
| 1107 | int end = index + (ea & 0xf); |
| 1108 | int o = (16 - (ea & 0xf)) & 0xf; |
| 1109 | ea &= ~0xf; |
| 1110 | |
| 1111 | for (int i = index; i < end; i++) |
| 1112 | { |
| 1113 | UINT32 bi = (i + o) & 0xf; |
| 1114 | UINT16 value; |
| 1115 | EXTRACT16(m_xv[dest], value, (bi >> 1)); |
| 1116 | value >>= (1-(bi & 1)) * 8; |
| 1117 | m_rsp.DM_WRITE8(ea, (UINT8)value); |
| 1118 | ea++; |
| 1119 | } |
| 1120 | } |
| 1121 | |
| 1122 | static void cfunc_srv(void *param) |
| 1123 | { |
| 1124 | ((rsp_cop2 *)param)->srv(); |
| 1125 | } |
| 1126 | |
| 1127 | |
| 1128 | // SPV |
| 1129 | // |
| 1130 | // 31 25 20 15 10 6 0 |
| 1131 | // -------------------------------------------------- |
| 1132 | // | 111010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 1133 | // -------------------------------------------------- |
| 1134 | // |
| 1135 | // Stores upper 8 bits of each element |
| 1136 | |
| 1137 | inline void rsp_cop2_simd::spv() |
| 1138 | { |
| 1139 | UINT32 op = m_op; |
| 1140 | int dest = (op >> 16) & 0x1f; |
| 1141 | int base = (op >> 21) & 0x1f; |
| 1142 | int index = (op >> 7) & 0xf; |
| 1143 | int offset = (op & 0x7f); |
| 1144 | if (offset & 0x40) |
| 1145 | { |
| 1146 | offset |= 0xffffffc0; |
| 1147 | } |
| 1148 | |
| 1149 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1150 | int end = index + 8; |
| 1151 | for (int i=index; i < end; i++) |
| 1152 | { |
| 1153 | if ((i & 0xf) < 8) |
| 1154 | { |
| 1155 | UINT16 value; |
| 1156 | EXTRACT16(m_xv[dest], value, i); |
| 1157 | m_rsp.DM_WRITE8(ea, (UINT8)(value >> 8)); |
| 1158 | } |
| 1159 | else |
| 1160 | { |
| 1161 | UINT16 value; |
| 1162 | EXTRACT16(m_xv[dest], value, i); |
| 1163 | m_rsp.DM_WRITE8(ea, (UINT8)(value >> 7)); |
| 1164 | } |
| 1165 | ea++; |
| 1166 | } |
| 1167 | } |
| 1168 | |
| 1169 | static void cfunc_spv(void *param) |
| 1170 | { |
| 1171 | ((rsp_cop2 *)param)->spv(); |
| 1172 | } |
| 1173 | |
| 1174 | |
| 1175 | // SUV |
| 1176 | // |
| 1177 | // 31 25 20 15 10 6 0 |
| 1178 | // -------------------------------------------------- |
| 1179 | // | 111010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 1180 | // -------------------------------------------------- |
| 1181 | // |
| 1182 | // Stores bits 14-7 of each element |
| 1183 | |
| 1184 | inline void rsp_cop2_simd::suv() |
| 1185 | { |
| 1186 | UINT32 op = m_op; |
| 1187 | int dest = (op >> 16) & 0x1f; |
| 1188 | int base = (op >> 21) & 0x1f; |
| 1189 | int index = (op >> 7) & 0xf; |
| 1190 | int offset = (op & 0x7f); |
| 1191 | if (offset & 0x40) |
| 1192 | { |
| 1193 | offset |= 0xffffffc0; |
| 1194 | } |
| 1195 | |
| 1196 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1197 | int end = index + 8; |
| 1198 | for (int i=index; i < end; i++) |
| 1199 | { |
| 1200 | if ((i & 0xf) < 8) |
| 1201 | { |
| 1202 | UINT16 value; |
| 1203 | EXTRACT16(m_xv[dest], value, i); |
| 1204 | m_rsp.DM_WRITE8(ea, (UINT8)(value >> 7)); |
| 1205 | } |
| 1206 | else |
| 1207 | { |
| 1208 | UINT16 value; |
| 1209 | EXTRACT16(m_xv[dest], value, i); |
| 1210 | m_rsp.DM_WRITE8(ea, (UINT8)(value >> 8)); |
| 1211 | } |
| 1212 | ea++; |
| 1213 | } |
| 1214 | } |
| 1215 | |
| 1216 | static void cfunc_suv(void *param) |
| 1217 | { |
| 1218 | ((rsp_cop2 *)param)->suv(); |
| 1219 | } |
| 1220 | |
| 1221 | |
| 1222 | // SHV |
| 1223 | // |
| 1224 | // 31 25 20 15 10 6 0 |
| 1225 | // -------------------------------------------------- |
| 1226 | // | 111010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 1227 | // -------------------------------------------------- |
| 1228 | // |
| 1229 | // Stores bits 14-7 of each element, with 2-byte stride |
| 1230 | |
| 1231 | inline void rsp_cop2_simd::shv() |
| 1232 | { |
| 1233 | UINT32 op = m_op; |
| 1234 | int dest = (op >> 16) & 0x1f; |
| 1235 | int base = (op >> 21) & 0x1f; |
| 1236 | int index = (op >> 7) & 0xf; |
| 1237 | int offset = (op & 0x7f); |
| 1238 | if (offset & 0x40) |
| 1239 | { |
| 1240 | offset |= 0xffffffc0; |
| 1241 | } |
| 1242 | |
| 1243 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1244 | for (int i=0; i < 8; i++) |
| 1245 | { |
| 1246 | int element = index + (i << 1); |
| 1247 | UINT16 value; |
| 1248 | EXTRACT16(m_xv[dest], value, element >> 1); |
| 1249 | m_rsp.DM_WRITE8(ea, (value >> 7) & 0x00ff); |
| 1250 | ea += 2; |
| 1251 | } |
| 1252 | } |
| 1253 | |
| 1254 | static void cfunc_shv(void *param) |
| 1255 | { |
| 1256 | ((rsp_cop2 *)param)->shv(); |
| 1257 | } |
| 1258 | |
| 1259 | |
| 1260 | // SFV |
| 1261 | // |
| 1262 | // 31 25 20 15 10 6 0 |
| 1263 | // -------------------------------------------------- |
| 1264 | // | 111010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 1265 | // -------------------------------------------------- |
| 1266 | // |
| 1267 | // Stores bits 14-7 of upper or lower quad, with 4-byte stride |
| 1268 | |
| 1269 | inline void rsp_cop2_simd::sfv() |
| 1270 | { |
| 1271 | UINT32 op = m_op; |
| 1272 | int dest = (op >> 16) & 0x1f; |
| 1273 | int base = (op >> 21) & 0x1f; |
| 1274 | int index = (op >> 7) & 0xf; |
| 1275 | int offset = (op & 0x7f); |
| 1276 | if (offset & 0x40) |
| 1277 | { |
| 1278 | offset |= 0xffffffc0; |
| 1279 | } |
| 1280 | |
| 1281 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1282 | int eaoffset = ea & 0xf; |
| 1283 | ea &= ~0xf; |
| 1284 | |
| 1285 | int end = (index >> 1) + 4; |
| 1286 | |
| 1287 | for (int i = index>>1; i < end; i++) |
| 1288 | { |
| 1289 | UINT16 value; |
| 1290 | EXTRACT16(m_xv[dest], value, i); |
| 1291 | m_rsp.DM_WRITE8(ea + (eaoffset & 0xf), (value >> 7) & 0x00ff); |
| 1292 | eaoffset += 4; |
| 1293 | } |
| 1294 | } |
| 1295 | |
| 1296 | static void cfunc_sfv(void *param) |
| 1297 | { |
| 1298 | ((rsp_cop2 *)param)->sfv(); |
| 1299 | } |
| 1300 | |
| 1301 | |
| 1302 | // SWV |
| 1303 | // |
| 1304 | // 31 25 20 15 10 6 0 |
| 1305 | // -------------------------------------------------- |
| 1306 | // | 111010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 1307 | // -------------------------------------------------- |
| 1308 | // |
| 1309 | // Stores the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 1310 | // after byte index 15 |
| 1311 | |
| 1312 | inline void rsp_cop2_simd::swv() |
| 1313 | { |
| 1314 | UINT32 op = m_op; |
| 1315 | int dest = (op >> 16) & 0x1f; |
| 1316 | int base = (op >> 21) & 0x1f; |
| 1317 | int index = (op >> 7) & 0xf; |
| 1318 | int offset = (op & 0x7f); |
| 1319 | if (offset & 0x40) |
| 1320 | { |
| 1321 | offset |= 0xffffffc0; |
| 1322 | } |
| 1323 | |
| 1324 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1325 | int eaoffset = ea & 0xf; |
| 1326 | ea &= ~0xf; |
| 1327 | |
| 1328 | int end = index + 16; |
| 1329 | for (int i = index; i < end; i++) |
| 1330 | { |
| 1331 | UINT16 value; |
| 1332 | EXTRACT16(m_xv[dest], value, i >> 1); |
| 1333 | m_rsp.DM_WRITE8(ea + (eaoffset & 0xf), (value >> ((1-(i & 1)) * 8)) & 0xff); |
| 1334 | eaoffset++; |
| 1335 | } |
| 1336 | } |
| 1337 | |
| 1338 | static void cfunc_swv(void *param) |
| 1339 | { |
| 1340 | ((rsp_cop2 *)param)->swv(); |
| 1341 | } |
| 1342 | |
| 1343 | |
| 1344 | // STV |
| 1345 | // |
| 1346 | // 31 25 20 15 10 6 0 |
| 1347 | // -------------------------------------------------- |
| 1348 | // | 111010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 1349 | // -------------------------------------------------- |
| 1350 | // |
| 1351 | // Stores one element from maximum of 8 vectors, while incrementing element index |
| 1352 | |
| 1353 | inline void rsp_cop2_simd::stv() |
| 1354 | { |
| 1355 | UINT32 op = m_op; |
| 1356 | int dest = (op >> 16) & 0x1f; |
| 1357 | int base = (op >> 21) & 0x1f; |
| 1358 | int index = (op >> 7) & 0xf; |
| 1359 | int offset = (op & 0x7f); |
| 1360 | |
| 1361 | if (offset & 0x40) |
| 1362 | { |
| 1363 | offset |= 0xffffffc0; |
| 1364 | } |
| 1365 | |
| 1366 | int vs = dest; |
| 1367 | int ve = dest + 8; |
| 1368 | if (ve > 32) |
| 1369 | { |
| 1370 | ve = 32; |
| 1371 | } |
| 1372 | |
| 1373 | int element = 8 - (index >> 1); |
| 1374 | |
| 1375 | UINT32 ea = (base) ? m_rsp.m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1376 | int eaoffset = (ea & 0xf) + (element * 2); |
| 1377 | ea &= ~0xf; |
| 1378 | |
| 1379 | for (int i = vs; i < ve; i++) |
| 1380 | { |
| 1381 | UINT16 value; |
| 1382 | EXTRACT16(m_xv[i], value, element); |
| 1383 | m_rsp.DM_WRITE16(ea + (eaoffset & 0xf), value); |
| 1384 | eaoffset += 2; |
| 1385 | element++; |
| 1386 | } |
| 1387 | } |
| 1388 | |
| 1389 | static void cfunc_stv(void *param) |
| 1390 | { |
| 1391 | ((rsp_cop2 *)param)->stv(); |
| 1392 | } |
| 1393 | |
| 1394 | |
| 1395 | /*************************************************************************** |
| 1396 | SIMD Accelerators |
| 1397 | ***************************************************************************/ |
| 1398 | |
| 1399 | /* ============================================================================ |
| 1400 | * RSPPackLo32to16: Pack LSBs of 32-bit vectors to 16-bits without saturation. |
| 1401 | * TODO: 5 SSE2 operations is kind of expensive just to truncate values? |
| 1402 | * ========================================================================= */ |
| 1403 | INLINE __m128i RSPPackLo32to16(__m128i vectorLow, __m128i vectorHigh) |
| 1404 | { |
| 1405 | vectorLow = _mm_slli_epi32(vectorLow, 16); |
| 1406 | vectorHigh = _mm_slli_epi32(vectorHigh, 16); |
| 1407 | vectorLow = _mm_srai_epi32(vectorLow, 16); |
| 1408 | vectorHigh = _mm_srai_epi32(vectorHigh, 16); |
| 1409 | return _mm_packs_epi32(vectorLow, vectorHigh); |
| 1410 | } |
| 1411 | |
| 1412 | /* ============================================================================ |
| 1413 | * RSPPackHi32to16: Pack MSBs of 32-bit vectors to 16-bits without saturation. |
| 1414 | * ========================================================================= */ |
| 1415 | INLINE __m128i RSPPackHi32to16(__m128i vectorLow, __m128i vectorHigh) |
| 1416 | { |
| 1417 | vectorLow = _mm_srai_epi32(vectorLow, 16); |
| 1418 | vectorHigh = _mm_srai_epi32(vectorHigh, 16); |
| 1419 | return _mm_packs_epi32(vectorLow, vectorHigh); |
| 1420 | } |
| 1421 | |
| 1422 | /* ============================================================================ |
| 1423 | * RSPSignExtend16to32: Sign-extend 16-bit slices to 32-bit slices. |
| 1424 | * ========================================================================= */ |
| 1425 | INLINE void RSPSignExtend16to32(__m128i source, __m128i *vectorLow, __m128i *vectorHigh) |
| 1426 | { |
| 1427 | __m128i vMask = _mm_srai_epi16(source, 15); |
| 1428 | *vectorHigh = _mm_unpackhi_epi16(source, vMask); |
| 1429 | *vectorLow = _mm_unpacklo_epi16(source, vMask); |
| 1430 | } |
| 1431 | |
| 1432 | /* ============================================================================ |
| 1433 | * RSPZeroExtend16to32: Zero-extend 16-bit slices to 32-bit slices. |
| 1434 | * ========================================================================= */ |
| 1435 | INLINE void RSPZeroExtend16to32(__m128i source, __m128i *vectorLow, __m128i *vectorHigh) |
| 1436 | { |
| 1437 | *vectorHigh = _mm_unpackhi_epi16(source, _mm_setzero_si128()); |
| 1438 | *vectorLow = _mm_unpacklo_epi16(source, _mm_setzero_si128()); |
| 1439 | } |
| 1440 | |
| 1441 | /* ============================================================================ |
| 1442 | * _mm_mullo_epi32: SSE2 lacks _mm_mullo_epi32, define it manually. |
| 1443 | * TODO/WARNING/DISCLAIMER: Assumes one argument is positive. |
| 1444 | * ========================================================================= */ |
| 1445 | INLINE __m128i _mm_mullo_epi32(__m128i a, __m128i b) |
| 1446 | { |
| 1447 | __m128i a4 = _mm_srli_si128(a, 4); |
| 1448 | __m128i b4 = _mm_srli_si128(b, 4); |
| 1449 | __m128i ba = _mm_mul_epu32(b, a); |
| 1450 | __m128i b4a4 = _mm_mul_epu32(b4, a4); |
| 1451 | |
| 1452 | __m128i mask = _mm_setr_epi32(~0, 0, ~0, 0); |
| 1453 | __m128i baMask = _mm_and_si128(ba, mask); |
| 1454 | __m128i b4a4Mask = _mm_and_si128(b4a4, mask); |
| 1455 | __m128i b4a4MaskShift = _mm_slli_si128(b4a4Mask, 4); |
| 1456 | |
| 1457 | return _mm_or_si128(baMask, b4a4MaskShift); |
| 1458 | } |
| 1459 | |
| 1460 | /* ============================================================================ |
| 1461 | * RSPClampLowToVal: Clamps the low word of the accumulator. |
| 1462 | * ========================================================================= */ |
| 1463 | INLINE __m128i RSPClampLowToVal(__m128i vaccLow, __m128i vaccMid, __m128i vaccHigh) |
| 1464 | { |
| 1465 | __m128i setMask = _mm_cmpeq_epi16(_mm_setzero_si128(), _mm_setzero_si128()); |
| 1466 | __m128i negCheck, useValMask, negVal, posVal; |
| 1467 | |
| 1468 | /* Compute some common values ahead of time. */ |
| 1469 | negCheck = _mm_cmplt_epi16(vaccHigh, _mm_setzero_si128()); |
| 1470 | |
| 1471 | /* If accmulator < 0, clamp to val if val != TMin. */ |
| 1472 | useValMask = _mm_and_si128(vaccHigh, _mm_srai_epi16(vaccMid, 15)); |
| 1473 | useValMask = _mm_cmpeq_epi16(useValMask, setMask); |
| 1474 | negVal = _mm_and_si128(useValMask, vaccLow); |
| 1475 | |
| 1476 | /* Otherwise, clamp to ~0 if any high bits are set. */ |
| 1477 | useValMask = _mm_or_si128(vaccHigh, _mm_srai_epi16(vaccMid, 15)); |
| 1478 | useValMask = _mm_cmpeq_epi16(useValMask, _mm_setzero_si128()); |
| 1479 | posVal = _mm_and_si128(useValMask, vaccLow); |
| 1480 | |
| 1481 | negVal = _mm_and_si128(negCheck, negVal); |
| 1482 | posVal = _mm_andnot_si128(negCheck, posVal); |
| 1483 | return _mm_or_si128(negVal, posVal); |
| 1484 | } |
| 1485 | |
| 1486 | |
| 1487 | /*************************************************************************** |
| 1488 | Vector Opcodes |
| 1489 | ***************************************************************************/ |
| 1490 | |
| 1491 | // VMULF |
| 1492 | // |
| 1493 | // 31 25 24 20 15 10 5 0 |
| 1494 | // ------------------------------------------------------ |
| 1495 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000000 | |
| 1496 | // ------------------------------------------------------ |
| 1497 | // |
| 1498 | // Multiplies signed integer by signed integer * 2 |
| 1499 | |
| 1500 | inline void rsp_cop2_simd::vmulf() |
| 1501 | { |
| 1502 | int op = m_op; |
| 1503 | |
| 1504 | for (int i = 0; i < 8; i++) |
| 1505 | { |
| 1506 | UINT16 w1, w2; |
| 1507 | GET_VS1(w1, i); |
| 1508 | GET_VS2(w2, i); |
| 1509 | INT32 s1 = (INT32)(INT16)w1; |
| 1510 | INT32 s2 = (INT32)(INT16)w2; |
| 1511 | |
| 1512 | if (s1 == -32768 && s2 == -32768) |
| 1513 | { |
| 1514 | // overflow |
| 1515 | SET_ACCUM_H(0, i); |
| 1516 | SET_ACCUM_M(-32768, i); |
| 1517 | SET_ACCUM_L(-32768, i); |
| 1518 | m_vres[i] = 0x7fff; |
| 1519 | } |
| 1520 | else |
| 1521 | { |
| 1522 | INT64 r = s1 * s2 * 2; |
| 1523 | r += 0x8000; // rounding ? |
| 1524 | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); |
| 1525 | SET_ACCUM_M((INT16)(r >> 16), i); |
| 1526 | SET_ACCUM_L((UINT16)(r), i); |
| 1527 | m_vres[i] = ACCUM_M(i); |
| 1528 | } |
| 1529 | } |
| 1530 | WRITEBACK_RESULT(); |
| 1531 | } |
| 1532 | |
| 1533 | static void cfunc_vmulf(void *param) |
| 1534 | { |
| 1535 | ((rsp_cop2 *)param)->vmulf(); |
| 1536 | } |
| 1537 | |
| 1538 | |
| 1539 | // VMULU |
| 1540 | // |
| 1541 | // 31 25 24 20 15 10 5 0 |
| 1542 | // ------------------------------------------------------ |
| 1543 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000001 | |
| 1544 | // ------------------------------------------------------ |
| 1545 | // |
| 1546 | |
| 1547 | inline void rsp_cop2_simd::vmulu() |
| 1548 | { |
| 1549 | int op = m_op; |
| 1550 | |
| 1551 | for (int i = 0; i < 8; i++) |
| 1552 | { |
| 1553 | UINT16 w1, w2; |
| 1554 | GET_VS1(w1, i); |
| 1555 | GET_VS2(w2, i); |
| 1556 | INT32 s1 = (INT32)(INT16)w1; |
| 1557 | INT32 s2 = (INT32)(INT16)w2; |
| 1558 | |
| 1559 | INT64 r = s1 * s2 * 2; |
| 1560 | r += 0x8000; // rounding ? |
| 1561 | |
| 1562 | SET_ACCUM_H((UINT16)(r >> 32), i); |
| 1563 | SET_ACCUM_M((UINT16)(r >> 16), i); |
| 1564 | SET_ACCUM_L((UINT16)(r), i); |
| 1565 | |
| 1566 | if (r < 0) |
| 1567 | { |
| 1568 | m_vres[i] = 0; |
| 1569 | } |
| 1570 | else if (((INT16)(ACCUM_H(i)) ^ (INT16)(ACCUM_M(i))) < 0) |
| 1571 | { |
| 1572 | m_vres[i] = -1; |
| 1573 | } |
| 1574 | else |
| 1575 | { |
| 1576 | m_vres[i] = ACCUM_M(i); |
| 1577 | } |
| 1578 | } |
| 1579 | WRITEBACK_RESULT(); |
| 1580 | } |
| 1581 | |
| 1582 | static void cfunc_vmulu(void *param) |
| 1583 | { |
| 1584 | ((rsp_cop2 *)param)->vmulu(); |
| 1585 | } |
| 1586 | |
| 1587 | |
| 1588 | // VMUDL |
| 1589 | // |
| 1590 | // 31 25 24 20 15 10 5 0 |
| 1591 | // ------------------------------------------------------ |
| 1592 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 | |
| 1593 | // ------------------------------------------------------ |
| 1594 | // |
| 1595 | // Multiplies signed integer by unsigned fraction |
| 1596 | // The result is added into accumulator |
| 1597 | // The middle slice of accumulator is stored into destination element |
| 1598 | |
| 1599 | inline void rsp_cop2_simd::vmudl() |
| 1600 | { |
| 1601 | int op = m_op; |
| 1602 | |
| 1603 | __m128i vsReg = m_xv[VS1REG]; |
| 1604 | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 1605 | |
| 1606 | /* Unpack to obtain for 32-bit precision. */ |
| 1607 | __m128i unpackLo = _mm_mullo_epi16(vsReg, vtReg); |
| 1608 | __m128i unpackHi = _mm_mulhi_epu16(vsReg, vtReg); |
| 1609 | __m128i loProduct = _mm_unpacklo_epi16(unpackLo, unpackHi); |
| 1610 | __m128i hiProduct = _mm_unpackhi_epi16(unpackLo, unpackHi); |
| 1611 | |
| 1612 | m_xv[VDREG] = m_accum_l = RSPPackHi32to16(loProduct, hiProduct); |
| 1613 | |
| 1614 | m_accum_m = _mm_setzero_si128(); |
| 1615 | m_accum_h = _mm_setzero_si128(); |
| 1616 | } |
| 1617 | |
| 1618 | static void cfunc_vmudl(void *param) |
| 1619 | { |
| 1620 | ((rsp_cop2 *)param)->vmudl(); |
| 1621 | } |
| 1622 | |
| 1623 | |
| 1624 | // VMUDM |
| 1625 | // |
| 1626 | // 31 25 24 20 15 10 5 0 |
| 1627 | // ------------------------------------------------------ |
| 1628 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000101 | |
| 1629 | // ------------------------------------------------------ |
| 1630 | // |
| 1631 | // Multiplies signed integer by unsigned fraction |
| 1632 | // The result is stored into accumulator |
| 1633 | // The middle slice of accumulator is stored into destination element |
| 1634 | |
| 1635 | inline void rsp_cop2_simd::vmudm() |
| 1636 | { |
| 1637 | int op = m_op; |
| 1638 | |
| 1639 | __m128i vsRegLo, vsRegHi, vtRegLo, vtRegHi; |
| 1640 | |
| 1641 | __m128i vsReg = m_xv[VS1REG]; |
| 1642 | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 1643 | |
| 1644 | /* Unpack to obtain for 32-bit precision. */ |
| 1645 | RSPSignExtend16to32(vsReg, &vsRegLo, &vsRegHi); |
| 1646 | RSPZeroExtend16to32(vtReg, &vtRegLo, &vtRegHi); |
| 1647 | |
| 1648 | /* Begin accumulating the products. */ |
| 1649 | __m128i loProduct = _mm_mullo_epi32(vsRegLo, vtRegLo); |
| 1650 | __m128i hiProduct = _mm_mullo_epi32(vsRegHi, vtRegHi); |
| 1651 | m_accum_l = RSPPackLo32to16(loProduct, hiProduct); |
| 1652 | m_accum_m = m_xv[VDREG] = RSPPackHi32to16(loProduct, hiProduct); |
| 1653 | |
| 1654 | loProduct = _mm_cmplt_epi32(loProduct, _mm_setzero_si128()); |
| 1655 | hiProduct = _mm_cmplt_epi32(hiProduct, _mm_setzero_si128()); |
| 1656 | m_accum_h = _mm_packs_epi32(loProduct, hiProduct); |
| 1657 | } |
| 1658 | |
| 1659 | static void cfunc_vmudm(void *param) |
| 1660 | { |
| 1661 | ((rsp_cop2 *)param)->vmudm(); |
| 1662 | } |
| 1663 | |
| 1664 | |
| 1665 | // VMUDN |
| 1666 | // |
| 1667 | // 31 25 24 20 15 10 5 0 |
| 1668 | // ------------------------------------------------------ |
| 1669 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000110 | |
| 1670 | // ------------------------------------------------------ |
| 1671 | // |
| 1672 | // Multiplies unsigned fraction by signed integer |
| 1673 | // The result is stored into accumulator |
| 1674 | // The low slice of accumulator is stored into destination element |
| 1675 | |
| 1676 | inline void rsp_cop2_simd::vmudn() |
| 1677 | { |
| 1678 | int op = m_op; |
| 1679 | |
| 1680 | __m128i vsRegLo, vsRegHi, vtRegLo, vtRegHi; |
| 1681 | |
| 1682 | __m128i vsReg = m_xv[VS1REG]; |
| 1683 | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 1684 | |
| 1685 | /* Unpack to obtain for 32-bit precision. */ |
| 1686 | RSPZeroExtend16to32(vsReg, &vsRegLo, &vsRegHi); |
| 1687 | RSPSignExtend16to32(vtReg, &vtRegLo, &vtRegHi); |
| 1688 | |
| 1689 | /* Begin accumulating the products. */ |
| 1690 | __m128i loProduct = _mm_mullo_epi32(vsRegLo, vtRegLo); |
| 1691 | __m128i hiProduct = _mm_mullo_epi32(vsRegHi, vtRegHi); |
| 1692 | m_xv[VDREG] = m_accum_l = RSPPackLo32to16(loProduct, hiProduct); |
| 1693 | m_accum_m = RSPPackHi32to16(loProduct, hiProduct); |
| 1694 | m_accum_h = _mm_cmplt_epi16(m_accum_m, _mm_setzero_si128()); |
| 1695 | } |
| 1696 | |
| 1697 | static void cfunc_vmudn(void *param) |
| 1698 | { |
| 1699 | ((rsp_cop2 *)param)->vmudn(); |
| 1700 | } |
| 1701 | |
| 1702 | |
| 1703 | // VMUDH |
| 1704 | // |
| 1705 | // 31 25 24 20 15 10 5 0 |
| 1706 | // ------------------------------------------------------ |
| 1707 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000111 | |
| 1708 | // ------------------------------------------------------ |
| 1709 | // |
| 1710 | // Multiplies signed integer by signed integer |
| 1711 | // The result is stored into highest 32 bits of accumulator, the low slice is zero |
| 1712 | // The highest 32 bits of accumulator is saturated into destination element |
| 1713 | |
| 1714 | inline void rsp_cop2_simd::vmudh() |
| 1715 | { |
| 1716 | int op = m_op; |
| 1717 | |
| 1718 | __m128i vaccLow, vaccHigh; |
| 1719 | __m128i unpackLo, unpackHi; |
| 1720 | |
| 1721 | __m128i vsReg = m_xv[VS1REG]; |
| 1722 | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 1723 | |
| 1724 | /* Multiply the sources, accumulate the product. */ |
| 1725 | unpackLo = _mm_mullo_epi16(vsReg, vtReg); |
| 1726 | unpackHi = _mm_mulhi_epi16(vsReg, vtReg); |
| 1727 | vaccHigh = _mm_unpackhi_epi16(unpackLo, unpackHi); |
| 1728 | vaccLow = _mm_unpacklo_epi16(unpackLo, unpackHi); |
| 1729 | |
| 1730 | /* Pack the accumulator and result back up. */ |
| 1731 | m_xv[VDREG] = _mm_packs_epi32(vaccLow, vaccHigh); |
| 1732 | m_accum_l = _mm_setzero_si128(); |
| 1733 | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 1734 | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 1735 | } |
| 1736 | |
| 1737 | static void cfunc_vmudh(void *param) |
| 1738 | { |
| 1739 | ((rsp_cop2 *)param)->vmudh(); |
| 1740 | } |
| 1741 | |
| 1742 | |
| 1743 | // VMACF |
| 1744 | // |
| 1745 | // 31 25 24 20 15 10 5 0 |
| 1746 | // ------------------------------------------------------ |
| 1747 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001000 | |
| 1748 | // ------------------------------------------------------ |
| 1749 | // |
| 1750 | |
| 1751 | inline void rsp_cop2_simd::vmacf() |
| 1752 | { |
| 1753 | int op = m_op; |
| 1754 | |
| 1755 | for (int i = 0; i < 8; i++) |
| 1756 | { |
| 1757 | UINT16 w1, w2; |
| 1758 | GET_VS1(w1, i); |
| 1759 | GET_VS2(w2, i); |
| 1760 | INT32 s1 = (INT32)(INT16)w1; |
| 1761 | INT32 s2 = (INT32)(INT16)w2; |
| 1762 | |
| 1763 | INT32 r = s1 * s2; |
| 1764 | |
| 1765 | UINT64 q = (UINT64)(UINT16)ACCUM_LL(i); |
| 1766 | q |= (((UINT64)(UINT16)ACCUM_L(i)) << 16); |
| 1767 | q |= (((UINT64)(UINT16)ACCUM_M(i)) << 32); |
| 1768 | q |= (((UINT64)(UINT16)ACCUM_H(i)) << 48); |
| 1769 | |
| 1770 | q += (INT64)(r) << 17; |
| 1771 | SET_ACCUM_LL((UINT16)q, i); |
| 1772 | SET_ACCUM_L((UINT16)(q >> 16), i); |
| 1773 | SET_ACCUM_M((UINT16)(q >> 32), i); |
| 1774 | SET_ACCUM_H((UINT16)(q >> 48), i); |
| 1775 | |
| 1776 | m_vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1777 | } |
| 1778 | WRITEBACK_RESULT(); |
| 1779 | } |
| 1780 | |
| 1781 | static void cfunc_vmacf(void *param) |
| 1782 | { |
| 1783 | ((rsp_cop2 *)param)->vmacf(); |
| 1784 | } |
| 1785 | |
| 1786 | |
| 1787 | // VMACU |
| 1788 | // |
| 1789 | // 31 25 24 20 15 10 5 0 |
| 1790 | // ------------------------------------------------------ |
| 1791 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001001 | |
| 1792 | // ------------------------------------------------------ |
| 1793 | // |
| 1794 | |
| 1795 | inline void rsp_cop2_simd::vmacu() |
| 1796 | { |
| 1797 | int op = m_op; |
| 1798 | |
| 1799 | __m128i loProduct, hiProduct, unpackLo, unpackHi; |
| 1800 | __m128i vaccHigh; |
| 1801 | __m128i vdReg, vdRegLo, vdRegHi; |
| 1802 | |
| 1803 | __m128i vsReg = m_xv[VS1REG]; |
| 1804 | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 1805 | |
| 1806 | __m128i vaccLow = m_accum_l; |
| 1807 | |
| 1808 | /* Unpack to obtain for 32-bit precision. */ |
| 1809 | RSPZeroExtend16to32(vaccLow, &vaccLow, &vaccHigh); |
| 1810 | |
| 1811 | /* Begin accumulating the products. */ |
| 1812 | unpackLo = _mm_mullo_epi16(vsReg, vtReg); |
| 1813 | unpackHi = _mm_mulhi_epi16(vsReg, vtReg); |
| 1814 | loProduct = _mm_unpacklo_epi16(unpackLo, unpackHi); |
| 1815 | hiProduct = _mm_unpackhi_epi16(unpackLo, unpackHi); |
| 1816 | loProduct = _mm_slli_epi32(loProduct, 1); |
| 1817 | hiProduct = _mm_slli_epi32(hiProduct, 1); |
| 1818 | |
| 1819 | vdRegLo = _mm_srli_epi32(loProduct, 16); |
| 1820 | vdRegHi = _mm_srli_epi32(hiProduct, 16); |
| 1821 | vdRegLo = _mm_slli_epi32(vdRegLo, 16); |
| 1822 | vdRegHi = _mm_slli_epi32(vdRegHi, 16); |
| 1823 | vdRegLo = _mm_xor_si128(vdRegLo, loProduct); |
| 1824 | vdRegHi = _mm_xor_si128(vdRegHi, hiProduct); |
| 1825 | |
| 1826 | vaccLow = _mm_add_epi32(vaccLow, vdRegLo); |
| 1827 | vaccHigh = _mm_add_epi32(vaccHigh, vdRegHi); |
| 1828 | |
| 1829 | m_accum_l = vdReg = RSPPackLo32to16(vaccLow, vaccHigh); |
| 1830 | |
| 1831 | /* Multiply the MSB of sources, accumulate the product. */ |
| 1832 | vdRegLo = _mm_unpacklo_epi16(m_accum_m, m_accum_h); |
| 1833 | vdRegHi = _mm_unpackhi_epi16(m_accum_m, m_accum_h); |
| 1834 | |
| 1835 | loProduct = _mm_srai_epi32(loProduct, 16); |
| 1836 | hiProduct = _mm_srai_epi32(hiProduct, 16); |
| 1837 | vaccLow = _mm_srai_epi32(vaccLow, 16); |
| 1838 | vaccHigh = _mm_srai_epi32(vaccHigh, 16); |
| 1839 | |
| 1840 | vaccLow = _mm_add_epi32(loProduct, vaccLow); |
| 1841 | vaccHigh = _mm_add_epi32(hiProduct, vaccHigh); |
| 1842 | vaccLow = _mm_add_epi32(vdRegLo, vaccLow); |
| 1843 | vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh); |
| 1844 | |
| 1845 | /* Clamp the accumulator and write it all out. */ |
| 1846 | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 1847 | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 1848 | } |
| 1849 | |
| 1850 | static void cfunc_vmacu(void *param) |
| 1851 | { |
| 1852 | ((rsp_cop2 *)param)->vmacu(); |
| 1853 | } |
| 1854 | |
| 1855 | |
| 1856 | // VMADL |
| 1857 | // |
| 1858 | // 31 25 24 20 15 10 5 0 |
| 1859 | // ------------------------------------------------------ |
| 1860 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001100 | |
| 1861 | // ------------------------------------------------------ |
| 1862 | // |
| 1863 | // Multiplies unsigned fraction by unsigned fraction |
| 1864 | // Adds the higher 16 bits of the 32-bit result to accumulator |
| 1865 | // The low slice of accumulator is stored into destination element |
| 1866 | |
| 1867 | inline void rsp_cop2_simd::vmadl() |
| 1868 | { |
| 1869 | int op = m_op; |
| 1870 | |
| 1871 | for (int i = 0; i < 8; i++) |
| 1872 | { |
| 1873 | UINT16 w1, w2; |
| 1874 | GET_VS1(w1, i); |
| 1875 | GET_VS2(w2, i); |
| 1876 | UINT32 s1 = w1; |
| 1877 | UINT32 s2 = w2; |
| 1878 | |
| 1879 | UINT32 r1 = s1 * s2; |
| 1880 | UINT32 r2 = (UINT16)ACCUM_L(i) + (r1 >> 16); |
| 1881 | UINT32 r3 = (UINT16)ACCUM_M(i) + (r2 >> 16); |
| 1882 | |
| 1883 | SET_ACCUM_L((UINT16)r2, i); |
| 1884 | SET_ACCUM_M((UINT16)r3, i); |
| 1885 | SET_ACCUM_H(ACCUM_H(i) + (INT16)(r3 >> 16), i); |
| 1886 | |
| 1887 | m_vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 1888 | } |
| 1889 | WRITEBACK_RESULT(); |
| 1890 | } |
| 1891 | |
| 1892 | static void cfunc_vmadl(void *param) |
| 1893 | { |
| 1894 | ((rsp_cop2 *)param)->vmadl(); |
| 1895 | } |
| 1896 | |
| 1897 | |
| 1898 | // VMADM |
| 1899 | // |
| 1900 | // 31 25 24 20 15 10 5 0 |
| 1901 | // ------------------------------------------------------ |
| 1902 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 | |
| 1903 | // ------------------------------------------------------ |
| 1904 | // |
| 1905 | // Multiplies signed fraction by unsigned fraction |
| 1906 | // Adds the higher 16 bits of the 32-bit result to accumulator |
| 1907 | // The medium slice of accumulator is stored into destination element |
| 1908 | |
| 1909 | inline void rsp_cop2_simd::vmadm() |
| 1910 | { |
| 1911 | int op = m_op; |
| 1912 | |
| 1913 | __m128i vaccLow, vaccHigh, loProduct, hiProduct; |
| 1914 | __m128i vsRegLo, vsRegHi, vtRegLo, vtRegHi, vdRegLo, vdRegHi; |
| 1915 | |
| 1916 | __m128i vsReg = m_xv[VS1REG]; |
| 1917 | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 1918 | |
| 1919 | /* Unpack to obtain for 32-bit precision. */ |
| 1920 | RSPSignExtend16to32(vsReg, &vsRegLo, &vsRegHi); |
| 1921 | RSPZeroExtend16to32(vtReg, &vtRegLo, &vtRegHi); |
| 1922 | RSPZeroExtend16to32(m_accum_l, &vaccLow, &vaccHigh); |
| 1923 | |
| 1924 | /* Begin accumulating the products. */ |
| 1925 | loProduct = _mm_mullo_epi32(vsRegLo, vtRegLo); |
| 1926 | hiProduct = _mm_mullo_epi32(vsRegHi, vtRegHi); |
| 1927 | |
| 1928 | vdRegLo = _mm_srli_epi32(loProduct, 16); |
| 1929 | vdRegHi = _mm_srli_epi32(hiProduct, 16); |
| 1930 | vdRegLo = _mm_slli_epi32(vdRegLo, 16); |
| 1931 | vdRegHi = _mm_slli_epi32(vdRegHi, 16); |
| 1932 | vdRegLo = _mm_xor_si128(vdRegLo, loProduct); |
| 1933 | vdRegHi = _mm_xor_si128(vdRegHi, hiProduct); |
| 1934 | vaccLow = _mm_add_epi32(vaccLow, vdRegLo); |
| 1935 | vaccHigh = _mm_add_epi32(vaccHigh, vdRegHi); |
| 1936 | |
| 1937 | m_accum_l = m_xv[VDREG] = RSPPackLo32to16(vaccLow, vaccHigh); |
| 1938 | |
| 1939 | /* Multiply the MSB of sources, accumulate the product. */ |
| 1940 | vdRegLo = _mm_unpacklo_epi16(m_accum_m, m_accum_h); |
| 1941 | vdRegHi = _mm_unpackhi_epi16(m_accum_m, m_accum_h); |
| 1942 | |
| 1943 | loProduct = _mm_srai_epi32(loProduct, 16); |
| 1944 | hiProduct = _mm_srai_epi32(hiProduct, 16); |
| 1945 | vaccLow = _mm_srai_epi32(vaccLow, 16); |
| 1946 | vaccHigh = _mm_srai_epi32(vaccHigh, 16); |
| 1947 | |
| 1948 | vaccLow = _mm_add_epi32(loProduct, vaccLow); |
| 1949 | vaccHigh = _mm_add_epi32(hiProduct, vaccHigh); |
| 1950 | vaccLow = _mm_add_epi32(vdRegLo, vaccLow); |
| 1951 | vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh); |
| 1952 | |
| 1953 | /* Clamp the accumulator and write it all out. */ |
| 1954 | m_xv[VDREG] = _mm_packs_epi32(vaccLow, vaccHigh); |
| 1955 | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 1956 | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 1957 | } |
| 1958 | |
| 1959 | static void cfunc_vmadm(void *param) |
| 1960 | { |
| 1961 | ((rsp_cop2 *)param)->vmadm(); |
| 1962 | } |
| 1963 | |
| 1964 | |
| 1965 | // VMADN |
| 1966 | // |
| 1967 | // 31 25 24 20 15 10 5 0 |
| 1968 | // ------------------------------------------------------ |
| 1969 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001110 | |
| 1970 | // ------------------------------------------------------ |
| 1971 | // |
| 1972 | // Multiplies unsigned fraction by signed fraction |
| 1973 | // Adds the 32-bit result to the medium and high slices of the accumulator |
| 1974 | // The low slice of accumulator is saturated into destination element |
| 1975 | |
| 1976 | inline void rsp_cop2_simd::vmadn() |
| 1977 | { |
| 1978 | int op = m_op; |
| 1979 | |
| 1980 | for (int i = 0; i < 8; i++) |
| 1981 | { |
| 1982 | UINT16 w1, w2; |
| 1983 | GET_VS1(w1, i); |
| 1984 | GET_VS2(w2, i); |
| 1985 | INT32 s1 = (UINT16)w1; |
| 1986 | INT32 s2 = (INT32)(INT16)w2; |
| 1987 | |
| 1988 | UINT64 q = (UINT64)ACCUM_LL(i); |
| 1989 | q |= (((UINT64)ACCUM_L(i)) << 16); |
| 1990 | q |= (((UINT64)ACCUM_M(i)) << 32); |
| 1991 | q |= (((UINT64)ACCUM_H(i)) << 48); |
| 1992 | q += (INT64)(s1*s2) << 16; |
| 1993 | |
| 1994 | SET_ACCUM_LL((UINT16)q, i); |
| 1995 | SET_ACCUM_L((UINT16)(q >> 16), i); |
| 1996 | SET_ACCUM_M((UINT16)(q >> 32), i); |
| 1997 | SET_ACCUM_H((UINT16)(q >> 48), i); |
| 1998 | |
| 1999 | m_vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 2000 | } |
| 2001 | WRITEBACK_RESULT(); |
| 2002 | } |
| 2003 | |
| 2004 | static void cfunc_vmadn(void *param) |
| 2005 | { |
| 2006 | ((rsp_cop2 *)param)->vmadn(); |
| 2007 | } |
| 2008 | |
| 2009 | |
| 2010 | // VMADH |
| 2011 | // |
| 2012 | // 31 25 24 20 15 10 5 0 |
| 2013 | // ------------------------------------------------------ |
| 2014 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001111 | |
| 2015 | // ------------------------------------------------------ |
| 2016 | // |
| 2017 | // Multiplies signed integer by signed integer |
| 2018 | // The result is added into highest 32 bits of accumulator, the low slice is zero |
| 2019 | // The highest 32 bits of accumulator is saturated into destination element |
| 2020 | |
| 2021 | inline void rsp_cop2_simd::vmadh() |
| 2022 | { |
| 2023 | int op = m_op; |
| 2024 | |
| 2025 | __m128i vsReg = m_xv[VS1REG]; |
| 2026 | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2027 | |
| 2028 | /* Unpack to obtain for 32-bit precision. */ |
| 2029 | __m128i vaccLow = _mm_unpacklo_epi16(m_accum_m, m_accum_h); |
| 2030 | __m128i vaccHigh = _mm_unpackhi_epi16(m_accum_m, m_accum_h); |
| 2031 | |
| 2032 | /* Multiply the sources, accumulate the product. */ |
| 2033 | __m128i unpackLo = _mm_mullo_epi16(vsReg, vtReg); |
| 2034 | __m128i unpackHi = _mm_mulhi_epi16(vsReg, vtReg); |
| 2035 | __m128i loProduct = _mm_unpacklo_epi16(unpackLo, unpackHi); |
| 2036 | __m128i hiProduct = _mm_unpackhi_epi16(unpackLo, unpackHi); |
| 2037 | vaccLow = _mm_add_epi32(vaccLow, loProduct); |
| 2038 | vaccHigh = _mm_add_epi32(vaccHigh, hiProduct); |
| 2039 | |
| 2040 | /* Pack the accumulator and result back up. */ |
| 2041 | m_xv[VDREG] = _mm_packs_epi32(vaccLow, vaccHigh); |
| 2042 | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 2043 | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 2044 | } |
| 2045 | |
| 2046 | static void cfunc_vmadh(void *param) |
| 2047 | { |
| 2048 | ((rsp_cop2 *)param)->vmadh(); |
| 2049 | } |
| 2050 | |
| 2051 | |
| 2052 | // VADD |
| 2053 | // 31 25 24 20 15 10 5 0 |
| 2054 | // ------------------------------------------------------ |
| 2055 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010000 | |
| 2056 | // ------------------------------------------------------ |
| 2057 | // |
| 2058 | // Adds two vector registers and carry flag, the result is saturated to 32767 |
| 2059 | |
| 2060 | inline void rsp_cop2_simd::vadd() |
| 2061 | { |
| 2062 | int op = m_op; |
| 2063 | |
| 2064 | __m128i shuffled = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2065 | __m128i carry = _mm_and_si128(m_xvflag[CARRY], vec_flagmask); |
| 2066 | m_accum_l = _mm_add_epi16(_mm_add_epi16(m_xv[VS1REG], shuffled), carry); |
| 2067 | |
| 2068 | __m128i addvec = _mm_adds_epi16(m_xv[VS1REG], shuffled); |
| 2069 | |
| 2070 | carry = _mm_and_si128(carry, _mm_xor_si128(_mm_cmpeq_epi16(addvec, vec_32767), vec_neg1)); |
| 2071 | carry = _mm_and_si128(carry, _mm_xor_si128(_mm_cmpeq_epi16(addvec, vec_n32768), vec_neg1)); |
| 2072 | |
| 2073 | m_xv[VDREG] = _mm_add_epi16(addvec, carry); |
| 2074 | |
| 2075 | m_xvflag[ZERO] = vec_zero; |
| 2076 | m_xvflag[CARRY] = vec_zero; |
| 2077 | } |
| 2078 | |
| 2079 | static void cfunc_vadd(void *param) |
| 2080 | { |
| 2081 | ((rsp_cop2 *)param)->vadd(); |
| 2082 | } |
| 2083 | |
| 2084 | |
| 2085 | // VSUB |
| 2086 | // |
| 2087 | // 31 25 24 20 15 10 5 0 |
| 2088 | // ------------------------------------------------------ |
| 2089 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010001 | |
| 2090 | // ------------------------------------------------------ |
| 2091 | // |
| 2092 | // Subtracts two vector registers and carry flag, the result is saturated to -32768 |
| 2093 | // TODO: check VS2REG == VDREG |
| 2094 | |
| 2095 | inline void rsp_cop2_simd::vsub() |
| 2096 | { |
| 2097 | int op = m_op; |
| 2098 | |
| 2099 | __m128i shuffled = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2100 | __m128i carry = _mm_and_si128(m_xvflag[CARRY], vec_flagmask); |
| 2101 | __m128i unsat = _mm_sub_epi16(m_xv[VS1REG], shuffled); |
| 2102 | |
| 2103 | __m128i vs2neg = _mm_cmplt_epi16(shuffled, vec_zero); |
| 2104 | __m128i vs2pos = _mm_cmpeq_epi16(vs2neg, vec_zero); |
| 2105 | |
| 2106 | __m128i saturated = _mm_subs_epi16(m_xv[VS1REG], shuffled); |
| 2107 | __m128i carry_mask = _mm_cmpeq_epi16(unsat, saturated); |
| 2108 | carry_mask = _mm_and_si128(vs2neg, carry_mask); |
| 2109 | |
| 2110 | vs2neg = _mm_and_si128(carry_mask, carry); |
| 2111 | vs2pos = _mm_and_si128(vs2pos, carry); |
| 2112 | __m128i dest_carry = _mm_or_si128(vs2neg, vs2pos); |
| 2113 | m_xv[VDREG] = _mm_subs_epi16(saturated, dest_carry); |
| 2114 | |
| 2115 | m_accum_l = _mm_sub_epi16(unsat, carry); |
| 2116 | |
| 2117 | m_xvflag[ZERO] = _mm_setzero_si128(); |
| 2118 | m_xvflag[CARRY] = _mm_setzero_si128(); |
| 2119 | } |
| 2120 | |
| 2121 | static void cfunc_vsub(void *param) |
| 2122 | { |
| 2123 | ((rsp_cop2 *)param)->vsub(); |
| 2124 | } |
| 2125 | |
| 2126 | |
| 2127 | // VABS |
| 2128 | // |
| 2129 | // 31 25 24 20 15 10 5 0 |
| 2130 | // ------------------------------------------------------ |
| 2131 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010011 | |
| 2132 | // ------------------------------------------------------ |
| 2133 | // |
| 2134 | // Changes the sign of source register 2 if source register 1 is negative and stores the result to destination register |
| 2135 | |
| 2136 | inline void rsp_cop2_simd::vabs() |
| 2137 | { |
| 2138 | int op = m_op; |
| 2139 | |
| 2140 | __m128i shuf2 = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2141 | __m128i negs2 = _mm_sub_epi16(_mm_setzero_si128(), shuf2); |
| 2142 | __m128i s2_n32768 = _mm_cmpeq_epi16(shuf2, vec_n32768); |
| 2143 | __m128i s1_lz = _mm_cmplt_epi16(m_xv[VS1REG], _mm_setzero_si128()); |
| 2144 | |
| 2145 | __m128i result_gz = _mm_and_si128(shuf2, _mm_cmpgt_epi16(m_xv[VS1REG], _mm_setzero_si128())); |
| 2146 | __m128i result_n32768 = _mm_and_si128(s1_lz, _mm_and_si128(vec_32767, s2_n32768)); |
| 2147 | __m128i result_negs2 = _mm_and_si128(s1_lz, _mm_and_si128(negs2, _mm_xor_si128(s2_n32768, vec_neg1))); |
| 2148 | m_xv[VDREG] = m_accum_l = _mm_or_si128(result_gz, _mm_or_si128(result_n32768, result_negs2)); |
| 2149 | } |
| 2150 | |
| 2151 | static void cfunc_vabs(void *param) |
| 2152 | { |
| 2153 | ((rsp_cop2 *)param)->vabs(); |
| 2154 | } |
| 2155 | |
| 2156 | |
| 2157 | // VADDC |
| 2158 | // |
| 2159 | // 31 25 24 20 15 10 5 0 |
| 2160 | // ------------------------------------------------------ |
| 2161 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010100 | |
| 2162 | // ------------------------------------------------------ |
| 2163 | // |
| 2164 | // Adds two vector registers, the carry out is stored into carry register |
| 2165 | // TODO: check VS2REG = VDREG |
| 2166 | |
| 2167 | inline void rsp_cop2_simd::vaddc() |
| 2168 | { |
| 2169 | int op = m_op; |
| 2170 | |
| 2171 | CLEAR_ZERO_FLAGS(); |
| 2172 | CLEAR_CARRY_FLAGS(); |
| 2173 | |
| 2174 | __m128i shuf2 = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2175 | __m128i vec7531 = _mm_and_si128(m_xv[VS1REG], vec_lomask); |
| 2176 | __m128i vec6420 = _mm_srli_epi32(m_xv[VS1REG], 16); |
| 2177 | __m128i shuf7531 = _mm_and_si128(shuf2, vec_lomask); |
| 2178 | __m128i shuf6420 = _mm_srli_epi32(shuf2, 16); |
| 2179 | __m128i sum7531 = _mm_add_epi32(vec7531, shuf7531); |
| 2180 | __m128i sum6420 = _mm_add_epi32(vec6420, shuf6420); |
| 2181 | |
| 2182 | __m128i over7531 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum7531, _mm_setzero_si128()), vec_neg1), vec_himask); |
| 2183 | __m128i over6420 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum6420, _mm_setzero_si128()), vec_neg1), vec_himask); |
| 2184 | |
| 2185 | sum7531 = _mm_and_si128(sum7531, vec_lomask); |
| 2186 | sum6420 = _mm_and_si128(sum6420, vec_lomask); |
| 2187 | |
| 2188 | m_xvflag[CARRY] = _mm_or_si128(over6420, _mm_srli_epi32(over7531, 16)); |
| 2189 | m_accum_l = m_xv[VDREG] = _mm_or_si128(_mm_slli_epi32(sum6420, 16), sum7531); |
| 2190 | } |
| 2191 | |
| 2192 | static void cfunc_vaddc(void *param) |
| 2193 | { |
| 2194 | ((rsp_cop2 *)param)->vaddc(); |
| 2195 | } |
| 2196 | |
| 2197 | |
| 2198 | // VSUBC |
| 2199 | // |
| 2200 | // 31 25 24 20 15 10 5 0 |
| 2201 | // ------------------------------------------------------ |
| 2202 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010101 | |
| 2203 | // ------------------------------------------------------ |
| 2204 | // |
| 2205 | // Subtracts two vector registers, the carry out is stored into carry register |
| 2206 | // TODO: check VS2REG = VDREG |
| 2207 | |
| 2208 | inline void rsp_cop2_simd::vsubc() |
| 2209 | { |
| 2210 | int op = m_op; |
| 2211 | |
| 2212 | CLEAR_ZERO_FLAGS(); |
| 2213 | CLEAR_CARRY_FLAGS(); |
| 2214 | |
| 2215 | __m128i shuf2 = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2216 | __m128i vec7531 = _mm_and_si128(m_xv[VS1REG], vec_lomask); |
| 2217 | __m128i vec6420 = _mm_srli_epi32(m_xv[VS1REG], 16); |
| 2218 | __m128i shuf7531 = _mm_and_si128(shuf2, vec_lomask); |
| 2219 | __m128i shuf6420 = _mm_srli_epi32(shuf2, 16); |
| 2220 | __m128i sum7531 = _mm_sub_epi32(vec7531, shuf7531); |
| 2221 | __m128i sum6420 = _mm_sub_epi32(vec6420, shuf6420); |
| 2222 | |
| 2223 | __m128i over7531 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum7531, _mm_setzero_si128()), vec_neg1), vec_himask); |
| 2224 | __m128i over6420 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum6420, _mm_setzero_si128()), vec_neg1), vec_himask); |
| 2225 | sum7531 = _mm_and_si128(sum7531, vec_lomask); |
| 2226 | sum6420 = _mm_and_si128(sum6420, vec_lomask); |
| 2227 | __m128i zero7531 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum7531, _mm_setzero_si128()), vec_neg1), vec_lomask); |
| 2228 | __m128i zero6420 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum6420, _mm_setzero_si128()), vec_neg1), vec_lomask); |
| 2229 | |
| 2230 | m_xvflag[CARRY] = _mm_or_si128(over6420, _mm_srli_epi32(over7531, 16)); |
| 2231 | m_xvflag[ZERO] = _mm_or_si128(_mm_slli_epi32(zero6420, 16), zero7531); |
| 2232 | |
| 2233 | m_accum_l = m_xv[VDREG] = _mm_or_si128(_mm_slli_epi32(sum6420, 16), sum7531); |
| 2234 | } |
| 2235 | |
| 2236 | static void cfunc_vsubc(void *param) |
| 2237 | { |
| 2238 | ((rsp_cop2 *)param)->vsubc(); |
| 2239 | } |
| 2240 | |
| 2241 | |
| 2242 | // VADDB |
| 2243 | // |
| 2244 | // 31 25 24 20 15 10 5 0 |
| 2245 | // ------------------------------------------------------ |
| 2246 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010110 | |
| 2247 | // ------------------------------------------------------ |
| 2248 | // |
| 2249 | // Adds two vector registers bytewise with rounding |
| 2250 | inline void rsp_cop2_simd::vaddb() |
| 2251 | { |
| 2252 | const int op = m_op; |
| 2253 | const int round = (EL == 0) ? 0 : (1 << (EL - 1)); |
| 2254 | |
| 2255 | for (int i = 0; i < 8; i++) |
| 2256 | { |
| 2257 | UINT16 w1, w2; |
| 2258 | GET_VS1(w1, i); |
| 2259 | GET_VS2(w2, i); |
| 2260 | |
| 2261 | UINT8 hb1 = w1 >> 8; |
| 2262 | UINT8 lb1 = w1 & 0xff; |
| 2263 | UINT8 hb2 = w2 >> 8; |
| 2264 | UINT8 lb2 = w2 & 0xff; |
| 2265 | |
| 2266 | UINT16 hs = hb1 + hb2 + round; |
| 2267 | UINT16 ls = lb1 + lb2 + round; |
| 2268 | |
| 2269 | SET_ACCUM_L((hs << 8) | ls, i); |
| 2270 | |
| 2271 | hs >>= EL; |
| 2272 | if (hs > 255) |
| 2273 | { |
| 2274 | hs = 255; |
| 2275 | } |
| 2276 | |
| 2277 | ls >>= EL; |
| 2278 | if (ls > 255) |
| 2279 | { |
| 2280 | ls = 255; |
| 2281 | } |
| 2282 | |
| 2283 | m_vres[i] = 0; // VD writeback disabled on production hardware |
| 2284 | // m_vres[i] = (hs << 8) | ls; |
| 2285 | } |
| 2286 | WRITEBACK_RESULT(); |
| 2287 | } |
| 2288 | |
| 2289 | static void cfunc_vaddb(void *param) |
| 2290 | { |
| 2291 | ((rsp_cop2 *)param)->vaddb(); |
| 2292 | } |
| 2293 | |
| 2294 | |
| 2295 | // VSAW |
| 2296 | // |
| 2297 | // 31 25 24 20 15 10 5 0 |
| 2298 | // ------------------------------------------------------ |
| 2299 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 011101 | |
| 2300 | // ------------------------------------------------------ |
| 2301 | // |
| 2302 | // Stores high, middle or low slice of accumulator to destination vector |
| 2303 | |
| 2304 | inline void rsp_cop2_simd::vsaw() |
| 2305 | { |
| 2306 | int op = m_op; |
| 2307 | |
| 2308 | switch (EL) |
| 2309 | { |
| 2310 | case 0x08: // VSAWH |
| 2311 | { |
| 2312 | m_xv[VDREG] = m_accum_h; |
| 2313 | break; |
| 2314 | } |
| 2315 | case 0x09: // VSAWM |
| 2316 | { |
| 2317 | m_xv[VDREG] = m_accum_m; |
| 2318 | break; |
| 2319 | } |
| 2320 | case 0x0a: // VSAWL |
| 2321 | { |
| 2322 | m_xv[VDREG] = m_accum_l; |
| 2323 | break; |
| 2324 | } |
| 2325 | default: // Unsupported, writes 0 to VD |
| 2326 | { |
| 2327 | |
| 2328 | } |
| 2329 | } |
| 2330 | } |
| 2331 | |
| 2332 | static void cfunc_vsaw(void *param) |
| 2333 | { |
| 2334 | ((rsp_cop2 *)param)->vsaw(); |
| 2335 | } |
| 2336 | |
| 2337 | |
| 2338 | // VLT |
| 2339 | // |
| 2340 | // 31 25 24 20 15 10 5 0 |
| 2341 | // ------------------------------------------------------ |
| 2342 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100000 | |
| 2343 | // ------------------------------------------------------ |
| 2344 | // |
| 2345 | // Sets compare flags if elements in VS1 are less than VS2 |
| 2346 | // Moves the element in VS2 to destination vector |
| 2347 | |
| 2348 | inline void rsp_cop2_simd::vlt() |
| 2349 | { |
| 2350 | int op = m_op; |
| 2351 | |
| 2352 | m_xvflag[COMPARE] = m_xvflag[CLIP2] = _mm_setzero_si128(); |
| 2353 | |
| 2354 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2355 | __m128i zc_mask = _mm_and_si128(m_xvflag[ZERO], m_xvflag[CARRY]); |
| 2356 | __m128i lt_mask = _mm_cmplt_epi16(m_xv[VS1REG], shuf); |
| 2357 | __m128i eq_mask = _mm_and_si128(_mm_cmpeq_epi16(m_xv[VS1REG], shuf), zc_mask); |
| 2358 | |
| 2359 | m_xvflag[COMPARE] = _mm_or_si128(lt_mask, eq_mask); |
| 2360 | |
| 2361 | __m128i result = _mm_and_si128(m_xv[VS1REG], m_xvflag[COMPARE]); |
| 2362 | m_accum_l = m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(m_xvflag[COMPARE], vec_neg1))); |
| 2363 | |
| 2364 | m_xvflag[ZERO] = m_xvflag[CARRY] = _mm_setzero_si128(); |
| 2365 | } |
| 2366 | |
| 2367 | static void cfunc_void vlt(void *param) |
| 2368 | { |
| 2369 | ((rsp_cop2 *)param)->vlt(); |
| 2370 | } |
| 2371 | |
| 2372 | |
| 2373 | // VEQ |
| 2374 | // |
| 2375 | // 31 25 24 20 15 10 5 0 |
| 2376 | // ------------------------------------------------------ |
| 2377 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100001 | |
| 2378 | // ------------------------------------------------------ |
| 2379 | // |
| 2380 | // Sets compare flags if elements in VS1 are equal with VS2 |
| 2381 | // Moves the element in VS2 to destination vector |
| 2382 | |
| 2383 | inline void rsp_cop2_simd::veq() |
| 2384 | { |
| 2385 | int op = m_op; |
| 2386 | |
| 2387 | m_xvflag[COMPARE] = m_xvflag[CLIP2] = _mm_setzero_si128(); |
| 2388 | |
| 2389 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2390 | __m128i zero_mask = _mm_cmpeq_epi16(m_xvflag[ZERO], _mm_setzero_si128()); |
| 2391 | __m128i eq_mask = _mm_cmpeq_epi16(m_xv[VS1REG], shuf); |
| 2392 | |
| 2393 | m_xvflag[COMPARE] = _mm_and_si128(zero_mask, eq_mask); |
| 2394 | |
| 2395 | __m128i result = _mm_and_si128(m_xv[VS1REG], m_xvflag[COMPARE]); |
| 2396 | m_accum_l = m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(m_xvflag[COMPARE], vec_neg1))); |
| 2397 | |
| 2398 | m_xvflag[ZERO] = m_xvflag[CARRY] = _mm_setzero_si128(); |
| 2399 | } |
| 2400 | |
| 2401 | static void cfunc_veq(void *param) |
| 2402 | { |
| 2403 | ((rsp_cop2 *)param)->veq(); |
| 2404 | } |
| 2405 | |
| 2406 | |
| 2407 | // VNE |
| 2408 | // |
| 2409 | // 31 25 24 20 15 10 5 0 |
| 2410 | // ------------------------------------------------------ |
| 2411 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100010 | |
| 2412 | // ------------------------------------------------------ |
| 2413 | // |
| 2414 | // Sets compare flags if elements in VS1 are not equal with VS2 |
| 2415 | // Moves the element in VS2 to destination vector |
| 2416 | |
| 2417 | inline void rsp_cop2_simd::vne() |
| 2418 | { |
| 2419 | int op = m_op; |
| 2420 | |
| 2421 | m_xvflag[COMPARE] = m_xvflag[CLIP2] = _mm_setzero_si128(); |
| 2422 | |
| 2423 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2424 | __m128i neq_mask = _mm_xor_si128(_mm_cmpeq_epi16(m_xv[VS1REG], shuf), vec_neg1); |
| 2425 | |
| 2426 | m_xvflag[COMPARE] = _mm_or_si128(m_xvflag[ZERO], neq_mask); |
| 2427 | |
| 2428 | __m128i result = _mm_and_si128(m_xv[VS1REG], m_xvflag[COMPARE]); |
| 2429 | m_accum_l = m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(m_xvflag[COMPARE], vec_neg1))); |
| 2430 | |
| 2431 | m_xvflag[ZERO] = m_xvflag[CARRY] = _mm_setzero_si128(); |
| 2432 | } |
| 2433 | |
| 2434 | static void cfunc_vne(void *param) |
| 2435 | { |
| 2436 | ((rsp_cop2 *)param)->vne(); |
| 2437 | } |
| 2438 | |
| 2439 | |
| 2440 | // VGE |
| 2441 | // |
| 2442 | // 31 25 24 20 15 10 5 0 |
| 2443 | // ------------------------------------------------------ |
| 2444 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100011 | |
| 2445 | // ------------------------------------------------------ |
| 2446 | // |
| 2447 | // Sets compare flags if elements in VS1 are greater or equal with VS2 |
| 2448 | // Moves the element in VS2 to destination vector |
| 2449 | |
| 2450 | inline void rsp_cop2_simd::vge() |
| 2451 | { |
| 2452 | int op = m_op; |
| 2453 | |
| 2454 | m_xvflag[COMPARE] = m_xvflag[CLIP2] = _mm_setzero_si128(); |
| 2455 | |
| 2456 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2457 | __m128i zero_mask = _mm_cmpeq_epi16(m_xvflag[ZERO], _mm_setzero_si128()); |
| 2458 | __m128i carry_mask = _mm_cmpeq_epi16(m_xvflag[CARRY], _mm_setzero_si128()); |
| 2459 | __m128i flag_mask = _mm_or_si128(zero_mask, carry_mask); |
| 2460 | __m128i eq_mask = _mm_and_si128(_mm_cmpeq_epi16(m_xv[VS1REG], shuf), flag_mask); |
| 2461 | __m128i gt_mask = _mm_cmpgt_epi16(m_xv[VS1REG], shuf); |
| 2462 | m_xvflag[COMPARE] = _mm_or_si128(eq_mask, gt_mask); |
| 2463 | |
| 2464 | __m128i result = _mm_and_si128(m_xv[VS1REG], m_xvflag[COMPARE]); |
| 2465 | m_accum_l = m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(m_xvflag[COMPARE], vec_neg1))); |
| 2466 | |
| 2467 | m_xvflag[ZERO] = m_xvflag[CARRY] = _mm_setzero_si128(); |
| 2468 | } |
| 2469 | |
| 2470 | static void cfunc_vge(void *param) |
| 2471 | { |
| 2472 | ((rsp_cop2 *)param)->vge(); |
| 2473 | } |
| 2474 | |
| 2475 | |
| 2476 | // VCL |
| 2477 | // |
| 2478 | // 31 25 24 20 15 10 5 0 |
| 2479 | // ------------------------------------------------------ |
| 2480 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100100 | |
| 2481 | // ------------------------------------------------------ |
| 2482 | // |
| 2483 | // Vector clip low |
| 2484 | |
| 2485 | inline void rsp_cop2_simd::vcl() |
| 2486 | { |
| 2487 | int op = m_op; |
| 2488 | |
| 2489 | for (int i = 0; i < 8; i++) |
| 2490 | { |
| 2491 | INT16 s1, s2; |
| 2492 | GET_VS1(s1, i); |
| 2493 | GET_VS2(s2, i); |
| 2494 | |
| 2495 | if (CARRY_FLAG(i) != 0) |
| 2496 | { |
| 2497 | if (ZERO_FLAG(i) != 0) |
| 2498 | { |
| 2499 | if (COMPARE_FLAG(i) != 0) |
| 2500 | { |
| 2501 | SET_ACCUM_L(-(UINT16)s2, i); |
| 2502 | } |
| 2503 | else |
| 2504 | { |
| 2505 | SET_ACCUM_L(s1, i); |
| 2506 | } |
| 2507 | } |
| 2508 | else |
| 2509 | { |
| 2510 | if (CLIP1_FLAG(i) != 0) |
| 2511 | { |
| 2512 | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) > 0x10000) |
| 2513 | { |
| 2514 | SET_ACCUM_L(s1, i); |
| 2515 | CLEAR_COMPARE_FLAG(i); |
| 2516 | } |
| 2517 | else |
| 2518 | { |
| 2519 | SET_ACCUM_L(-((UINT16)s2), i); |
| 2520 | SET_COMPARE_FLAG(i); |
| 2521 | } |
| 2522 | } |
| 2523 | else |
| 2524 | { |
| 2525 | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) != 0) |
| 2526 | { |
| 2527 | SET_ACCUM_L(s1, i); |
| 2528 | CLEAR_COMPARE_FLAG(i); |
| 2529 | } |
| 2530 | else |
| 2531 | { |
| 2532 | SET_ACCUM_L(-((UINT16)s2), i); |
| 2533 | SET_COMPARE_FLAG(i); |
| 2534 | } |
| 2535 | } |
| 2536 | } |
| 2537 | } |
| 2538 | else |
| 2539 | { |
| 2540 | if (ZERO_FLAG(i) != 0) |
| 2541 | { |
| 2542 | if (CLIP2_FLAG(i) != 0) |
| 2543 | { |
| 2544 | SET_ACCUM_L(s2, i); |
| 2545 | } |
| 2546 | else |
| 2547 | { |
| 2548 | SET_ACCUM_L(s1, i); |
| 2549 | } |
| 2550 | } |
| 2551 | else |
| 2552 | { |
| 2553 | if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0) |
| 2554 | { |
| 2555 | SET_ACCUM_L(s2, i); |
| 2556 | SET_CLIP2_FLAG(i); |
| 2557 | } |
| 2558 | else |
| 2559 | { |
| 2560 | SET_ACCUM_L(s1, i); |
| 2561 | CLEAR_CLIP2_FLAG(i); |
| 2562 | } |
| 2563 | } |
| 2564 | } |
| 2565 | m_vres[i] = ACCUM_L(i); |
| 2566 | } |
| 2567 | CLEAR_ZERO_FLAGS(); |
| 2568 | CLEAR_CARRY_FLAGS(); |
| 2569 | CLEAR_CLIP1_FLAGS(); |
| 2570 | WRITEBACK_RESULT(); |
| 2571 | } |
| 2572 | |
| 2573 | static void cfunc_vcl(void *param) |
| 2574 | { |
| 2575 | ((rsp_cop2 *)param)->vcl(); |
| 2576 | } |
| 2577 | |
| 2578 | |
| 2579 | // VCH |
| 2580 | // |
| 2581 | // 31 25 24 20 15 10 5 0 |
| 2582 | // ------------------------------------------------------ |
| 2583 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100101 | |
| 2584 | // ------------------------------------------------------ |
| 2585 | // |
| 2586 | // Vector clip high |
| 2587 | |
| 2588 | inline void rsp_cop2_simd::vch() |
| 2589 | { |
| 2590 | int op = m_op; |
| 2591 | |
| 2592 | CLEAR_CARRY_FLAGS(); |
| 2593 | CLEAR_COMPARE_FLAGS(); |
| 2594 | CLEAR_CLIP1_FLAGS(); |
| 2595 | CLEAR_ZERO_FLAGS(); |
| 2596 | CLEAR_CLIP2_FLAGS(); |
| 2597 | |
| 2598 | UINT32 vce = 0; |
| 2599 | for (int i = 0; i < 8; i++) |
| 2600 | { |
| 2601 | INT16 s1, s2; |
| 2602 | GET_VS1(s1, i); |
| 2603 | GET_VS2(s2, i); |
| 2604 | |
| 2605 | if ((s1 ^ s2) < 0) |
| 2606 | { |
| 2607 | vce = (s1 + s2 == -1); |
| 2608 | SET_CARRY_FLAG(i); |
| 2609 | if (s2 < 0) |
| 2610 | { |
| 2611 | SET_CLIP2_FLAG(i); |
| 2612 | } |
| 2613 | |
| 2614 | if ((s1 + s2) <= 0) |
| 2615 | { |
| 2616 | SET_COMPARE_FLAG(i); |
| 2617 | m_vres[i] = -((UINT16)s2); |
| 2618 | } |
| 2619 | else |
| 2620 | { |
| 2621 | m_vres[i] = s1; |
| 2622 | } |
| 2623 | |
| 2624 | if ((s1 + s2) != 0 && s1 != ~s2) |
| 2625 | { |
| 2626 | SET_ZERO_FLAG(i); |
| 2627 | } |
| 2628 | }//sign |
| 2629 | else |
| 2630 | { |
| 2631 | vce = 0; |
| 2632 | if (s2 < 0) |
| 2633 | { |
| 2634 | SET_COMPARE_FLAG(i); |
| 2635 | } |
| 2636 | if ((s1 - s2) >= 0) |
| 2637 | { |
| 2638 | SET_CLIP2_FLAG(i); |
| 2639 | m_vres[i] = s2; |
| 2640 | } |
| 2641 | else |
| 2642 | { |
| 2643 | m_vres[i] = s1; |
| 2644 | } |
| 2645 | |
| 2646 | if ((s1 - s2) != 0 && s1 != ~s2) |
| 2647 | { |
| 2648 | SET_ZERO_FLAG(i); |
| 2649 | } |
| 2650 | } |
| 2651 | if (vce) |
| 2652 | { |
| 2653 | SET_CLIP1_FLAG(i); |
| 2654 | } |
| 2655 | SET_ACCUM_L(m_vres[i], i); |
| 2656 | } |
| 2657 | WRITEBACK_RESULT(); |
| 2658 | } |
| 2659 | |
| 2660 | static void cfunc_vch(void *param) |
| 2661 | { |
| 2662 | ((rsp_cop2 *)param)->vch(); |
| 2663 | } |
| 2664 | |
| 2665 | |
| 2666 | // VCR |
| 2667 | // |
| 2668 | // 31 25 24 20 15 10 5 0 |
| 2669 | // ------------------------------------------------------ |
| 2670 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100110 | |
| 2671 | // ------------------------------------------------------ |
| 2672 | // |
| 2673 | // Vector clip reverse |
| 2674 | |
| 2675 | inline void rsp_cop2_simd::vcr() |
| 2676 | { |
| 2677 | int op = m_op; |
| 2678 | |
| 2679 | CLEAR_CARRY_FLAGS(); |
| 2680 | CLEAR_COMPARE_FLAGS(); |
| 2681 | CLEAR_CLIP1_FLAGS(); |
| 2682 | CLEAR_ZERO_FLAGS(); |
| 2683 | CLEAR_CLIP2_FLAGS(); |
| 2684 | |
| 2685 | for (int i = 0; i < 8; i++) |
| 2686 | { |
| 2687 | INT16 s1, s2; |
| 2688 | GET_VS1(s1, i); |
| 2689 | GET_VS2(s2, i); |
| 2690 | |
| 2691 | if ((INT16)(s1 ^ s2) < 0) |
| 2692 | { |
| 2693 | if (s2 < 0) |
| 2694 | { |
| 2695 | SET_CLIP2_FLAG(i); |
| 2696 | } |
| 2697 | if ((s1 + s2) <= 0) |
| 2698 | { |
| 2699 | SET_ACCUM_L(~((UINT16)s2), i); |
| 2700 | SET_COMPARE_FLAG(i); |
| 2701 | } |
| 2702 | else |
| 2703 | { |
| 2704 | SET_ACCUM_L(s1, i); |
| 2705 | } |
| 2706 | } |
| 2707 | else |
| 2708 | { |
| 2709 | if (s2 < 0) |
| 2710 | { |
| 2711 | SET_COMPARE_FLAG(i); |
| 2712 | } |
| 2713 | if ((s1 - s2) >= 0) |
| 2714 | { |
| 2715 | SET_ACCUM_L(s2, i); |
| 2716 | SET_CLIP2_FLAG(i); |
| 2717 | } |
| 2718 | else |
| 2719 | { |
| 2720 | SET_ACCUM_L(s1, i); |
| 2721 | } |
| 2722 | } |
| 2723 | |
| 2724 | m_vres[i] = ACCUM_L(i); |
| 2725 | } |
| 2726 | WRITEBACK_RESULT(); |
| 2727 | } |
| 2728 | |
| 2729 | static void cfunc_vcr(void *param) |
| 2730 | { |
| 2731 | ((rsp_cop2 *)param)->vcr(); |
| 2732 | } |
| 2733 | |
| 2734 | |
| 2735 | // VMRG |
| 2736 | // |
| 2737 | // 31 25 24 20 15 10 5 0 |
| 2738 | // ------------------------------------------------------ |
| 2739 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100111 | |
| 2740 | // ------------------------------------------------------ |
| 2741 | // |
| 2742 | // Merges two vectors according to compare flags |
| 2743 | |
| 2744 | inline void rsp_cop2_simd::vmrg() |
| 2745 | { |
| 2746 | int op = m_op; |
| 2747 | |
| 2748 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2749 | __m128i s2mask = _mm_cmpeq_epi16(m_xvflag[COMPARE], _mm_setzero_si128()); |
| 2750 | __m128i s1mask = _mm_xor_si128(s2mask, vec_neg1); |
| 2751 | __m128i result = _mm_and_si128(m_xv[VS1REG], s1mask); |
| 2752 | m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, s2mask)); |
| 2753 | m_accum_l = m_xv[VDREG]; |
| 2754 | } |
| 2755 | |
| 2756 | static void cfunc_vmrg(void *param) |
| 2757 | { |
| 2758 | ((rsp_cop2 *)param)->vmrg(); |
| 2759 | } |
| 2760 | |
| 2761 | |
| 2762 | // VAND |
| 2763 | // |
| 2764 | // 31 25 24 20 15 10 5 0 |
| 2765 | // ------------------------------------------------------ |
| 2766 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101000 | |
| 2767 | // ------------------------------------------------------ |
| 2768 | // |
| 2769 | // Bitwise AND of two vector registers |
| 2770 | |
| 2771 | inline void rsp_cop2_simd::vand() |
| 2772 | { |
| 2773 | int op = m_op; |
| 2774 | |
| 2775 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2776 | m_accum_l = m_xv[VDREG] = _mm_and_si128(m_xv[VS1REG], shuf); |
| 2777 | } |
| 2778 | |
| 2779 | static void cfunc_vand(void *param) |
| 2780 | { |
| 2781 | ((rsp_cop2 *)param)->vand(); |
| 2782 | } |
| 2783 | |
| 2784 | |
| 2785 | // VNAND |
| 2786 | // |
| 2787 | // 31 25 24 20 15 10 5 0 |
| 2788 | // ------------------------------------------------------ |
| 2789 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101001 | |
| 2790 | // ------------------------------------------------------ |
| 2791 | // |
| 2792 | // Bitwise NOT AND of two vector registers |
| 2793 | |
| 2794 | inline void rsp_cop2_simd::vnand() |
| 2795 | { |
| 2796 | int op = m_op; |
| 2797 | |
| 2798 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2799 | m_accum_l = m_xv[VDREG] = _mm_xor_si128(_mm_and_si128(m_xv[VS1REG], shuf), vec_neg1); |
| 2800 | } |
| 2801 | |
| 2802 | static void cfunc_vnand(void *param) |
| 2803 | { |
| 2804 | ((rsp_cop2 *)param)->vnand(); |
| 2805 | } |
| 2806 | |
| 2807 | |
| 2808 | // VOR |
| 2809 | // |
| 2810 | // 31 25 24 20 15 10 5 0 |
| 2811 | // ------------------------------------------------------ |
| 2812 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101010 | |
| 2813 | // ------------------------------------------------------ |
| 2814 | // |
| 2815 | // Bitwise OR of two vector registers |
| 2816 | |
| 2817 | inline void rsp_cop2_simd::vor() |
| 2818 | { |
| 2819 | int op = m_op; |
| 2820 | |
| 2821 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2822 | m_accum_l = m_xv[VDREG] = _mm_or_si128(m_xv[VS1REG], shuf); |
| 2823 | } |
| 2824 | |
| 2825 | static void cfunc_vor_simd(void *param) |
| 2826 | { |
| 2827 | ((rsp_cop2 *)param)->vor(); |
| 2828 | } |
| 2829 | |
| 2830 | |
| 2831 | // VNOR |
| 2832 | // |
| 2833 | // 31 25 24 20 15 10 5 0 |
| 2834 | // ------------------------------------------------------ |
| 2835 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101011 | |
| 2836 | // ------------------------------------------------------ |
| 2837 | // |
| 2838 | // Bitwise NOT OR of two vector registers |
| 2839 | |
| 2840 | inline void rsp_cop2_simd::vnor() |
| 2841 | { |
| 2842 | int op = m_op; |
| 2843 | |
| 2844 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2845 | m_accum_l = m_xv[VDREG] = _mm_xor_si128(_mm_or_si128(m_xv[VS1REG], shuf), vec_neg1); |
| 2846 | } |
| 2847 | |
| 2848 | static void cfunc_vnor(void *param) |
| 2849 | { |
| 2850 | ((rsp_cop2 *)param)->vnor(); |
| 2851 | } |
| 2852 | |
| 2853 | |
| 2854 | // VXOR |
| 2855 | // |
| 2856 | // 31 25 24 20 15 10 5 0 |
| 2857 | // ------------------------------------------------------ |
| 2858 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101100 | |
| 2859 | // ------------------------------------------------------ |
| 2860 | // |
| 2861 | // Bitwise XOR of two vector registers |
| 2862 | |
| 2863 | inline void rsp_cop2_simd::vxor() |
| 2864 | { |
| 2865 | int op = m_op; |
| 2866 | |
| 2867 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2868 | m_accum_l = m_xv[VDREG] = _mm_xor_si128(m_xv[VS1REG], shuf); |
| 2869 | } |
| 2870 | |
| 2871 | static void cfunc_vxor(void *param) |
| 2872 | { |
| 2873 | ((rsp_cop2 *)param)->vxor(); |
| 2874 | } |
| 2875 | |
| 2876 | |
| 2877 | // VNXOR |
| 2878 | // |
| 2879 | // 31 25 24 20 15 10 5 0 |
| 2880 | // ------------------------------------------------------ |
| 2881 | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101101 | |
| 2882 | // ------------------------------------------------------ |
| 2883 | // |
| 2884 | // Bitwise NOT XOR of two vector registers |
| 2885 | |
| 2886 | inline void rsp_cop2_simd::vnxor() |
| 2887 | { |
| 2888 | int op = m_op; |
| 2889 | |
| 2890 | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2891 | m_accum_l = m_xv[VDREG] = _mm_xor_si128(_mm_xor_si128(m_xv[VS1REG], shuf), vec_neg1); |
| 2892 | } |
| 2893 | |
| 2894 | static void cfunc_vnxor(void *param) |
| 2895 | { |
| 2896 | ((rsp_cop2 *)param)->vnxor(); |
| 2897 | } |
| 2898 | |
| 2899 | |
| 2900 | // VRCP |
| 2901 | // |
| 2902 | // 31 25 24 20 15 10 5 0 |
| 2903 | // ------------------------------------------------------ |
| 2904 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110000 | |
| 2905 | // ------------------------------------------------------ |
| 2906 | // |
| 2907 | // Calculates reciprocal |
| 2908 | |
| 2909 | inline void rsp_cop2_simd::vrcp() |
| 2910 | { |
| 2911 | int op = m_op; |
| 2912 | |
| 2913 | INT32 shifter = 0; |
| 2914 | UINT16 urec; |
| 2915 | INT32 rec; |
| 2916 | EXTRACT16(m_xv[VS2REG], urec, EL); |
| 2917 | rec = (INT16)urec; |
| 2918 | INT32 datainput = (rec < 0) ? (-rec) : rec; |
| 2919 | if (datainput) |
| 2920 | { |
| 2921 | for (int i = 0; i < 32; i++) |
| 2922 | { |
| 2923 | if (datainput & (1 << ((~i) & 0x1f))) |
| 2924 | { |
| 2925 | shifter = i; |
| 2926 | break; |
| 2927 | } |
| 2928 | } |
| 2929 | } |
| 2930 | else |
| 2931 | { |
| 2932 | shifter = 0x10; |
| 2933 | } |
| 2934 | |
| 2935 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2936 | INT32 fetchval = rsp_divtable[address]; |
| 2937 | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 2938 | if (rec < 0) |
| 2939 | { |
| 2940 | temp = ~temp; |
| 2941 | } |
| 2942 | if (!rec) |
| 2943 | { |
| 2944 | temp = 0x7fffffff; |
| 2945 | } |
| 2946 | else if (rec == 0xffff8000) |
| 2947 | { |
| 2948 | temp = 0xffff0000; |
| 2949 | } |
| 2950 | rec = temp; |
| 2951 | |
| 2952 | m_reciprocal_res = rec; |
| 2953 | m_dp_allowed = 0; |
| 2954 | |
| 2955 | INSERT16(m_xv[VDREG], (UINT16)rec, VS1REG); |
| 2956 | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 2957 | } |
| 2958 | |
| 2959 | static void cfunc_vrcp(void *param) |
| 2960 | { |
| 2961 | ((rsp_cop2 *)param)->vrcp(); |
| 2962 | } |
| 2963 | |
| 2964 | |
| 2965 | // VRCPL |
| 2966 | // |
| 2967 | // 31 25 24 20 15 10 5 0 |
| 2968 | // ------------------------------------------------------ |
| 2969 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110001 | |
| 2970 | // ------------------------------------------------------ |
| 2971 | // |
| 2972 | // Calculates reciprocal low part |
| 2973 | |
| 2974 | inline void rsp_cop2_simd::vrcpl() |
| 2975 | { |
| 2976 | int op = m_op; |
| 2977 | |
| 2978 | #if SIMUL_SIMD |
| 2979 | m_old_reciprocal_res = m_reciprocal_res; |
| 2980 | m_old_reciprocal_high = m_reciprocal_high; |
| 2981 | m_old_dp_allowed = m_dp_allowed; |
| 2982 | #endif |
| 2983 | |
| 2984 | INT32 shifter = 0; |
| 2985 | |
| 2986 | UINT16 urec; |
| 2987 | EXTRACT16(m_xv[VS2REG], urec, EL); |
| 2988 | INT32 rec = (INT16)urec; |
| 2989 | INT32 datainput = rec; |
| 2990 | |
| 2991 | if (m_dp_allowed) |
| 2992 | { |
| 2993 | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 2994 | datainput = rec; |
| 2995 | |
| 2996 | if (rec < 0) |
| 2997 | { |
| 2998 | if (rec < -32768) |
| 2999 | { |
| 3000 | datainput = ~datainput; |
| 3001 | } |
| 3002 | else |
| 3003 | { |
| 3004 | datainput = -datainput; |
| 3005 | } |
| 3006 | } |
| 3007 | } |
| 3008 | else if (datainput < 0) |
| 3009 | { |
| 3010 | datainput = -datainput; |
| 3011 | |
| 3012 | shifter = 0x10; |
| 3013 | } |
| 3014 | |
| 3015 | if (datainput) |
| 3016 | { |
| 3017 | for (int i = 0; i < 32; i++) |
| 3018 | { |
| 3019 | if (datainput & (1 << ((~i) & 0x1f))) |
| 3020 | { |
| 3021 | shifter = i; |
| 3022 | break; |
| 3023 | } |
| 3024 | } |
| 3025 | } |
| 3026 | |
| 3027 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 3028 | INT32 fetchval = rsp_divtable[address]; |
| 3029 | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 3030 | temp ^= rec >> 31; |
| 3031 | |
| 3032 | if (!rec) |
| 3033 | { |
| 3034 | temp = 0x7fffffff; |
| 3035 | } |
| 3036 | else if (rec == 0xffff8000) |
| 3037 | { |
| 3038 | temp = 0xffff0000; |
| 3039 | } |
| 3040 | rec = temp; |
| 3041 | |
| 3042 | m_reciprocal_res = rec; |
| 3043 | m_dp_allowed = 0; |
| 3044 | |
| 3045 | INSERT16(m_xv[VDREG], (UINT16)rec, VS1REG); |
| 3046 | |
| 3047 | for (int i = 0; i < 8; i++) |
| 3048 | { |
| 3049 | INT16 val; |
| 3050 | EXTRACT16(m_xv[VS2REG], val, VEC_EL_2(EL, i)); |
| 3051 | SET_ACCUM_L(val, i); |
| 3052 | } |
| 3053 | } |
| 3054 | |
| 3055 | static void cfunc_vrcpl(void *param) |
| 3056 | { |
| 3057 | ((rsp_cop2 *)param)->vrcpl(); |
| 3058 | } |
| 3059 | |
| 3060 | |
| 3061 | // VRCPH |
| 3062 | // |
| 3063 | // 31 25 24 20 15 10 5 0 |
| 3064 | // ------------------------------------------------------ |
| 3065 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110010 | |
| 3066 | // ------------------------------------------------------ |
| 3067 | // |
| 3068 | // Calculates reciprocal high part |
| 3069 | |
| 3070 | inline void rsp_cop2_simd::vrcph() |
| 3071 | { |
| 3072 | int op = m_op; |
| 3073 | |
| 3074 | #if SIMUL_SIMD |
| 3075 | m_old_reciprocal_res = m_reciprocal_res; |
| 3076 | m_old_reciprocal_high = m_reciprocal_high; |
| 3077 | m_old_dp_allowed = m_dp_allowed; |
| 3078 | #endif |
| 3079 | |
| 3080 | UINT16 rcph; |
| 3081 | EXTRACT16(m_xv[VS2REG], rcph, EL); |
| 3082 | m_reciprocal_high = rcph << 16; |
| 3083 | m_dp_allowed = 1; |
| 3084 | |
| 3085 | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3086 | |
| 3087 | INSERT16(m_xv[VDREG], (INT16)(m_reciprocal_res >> 16), VS1REG); |
| 3088 | } |
| 3089 | |
| 3090 | static void cfunc_vrcph(void *param) |
| 3091 | { |
| 3092 | ((rsp_cop2 *)param)->vrcph(); |
| 3093 | } |
| 3094 | |
| 3095 | |
| 3096 | // VMOV |
| 3097 | // |
| 3098 | // 31 25 24 20 15 10 5 0 |
| 3099 | // ------------------------------------------------------ |
| 3100 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110011 | |
| 3101 | // ------------------------------------------------------ |
| 3102 | // |
| 3103 | // Moves element from vector to destination vector |
| 3104 | |
| 3105 | inline void rsp_cop2_simd::vmov() |
| 3106 | { |
| 3107 | int op = m_op; |
| 3108 | |
| 3109 | INT16 val; |
| 3110 | EXTRACT16(m_xv[VS2REG], val, EL); |
| 3111 | INSERT16(m_xv[VDREG], val, VS1REG); |
| 3112 | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3113 | } |
| 3114 | |
| 3115 | static void cfunc_vmov(void *param) |
| 3116 | { |
| 3117 | ((rsp_cop2 *)param)->vmov(); |
| 3118 | } |
| 3119 | |
| 3120 | |
| 3121 | // VRSQ |
| 3122 | // |
| 3123 | // 31 25 24 20 15 10 5 0 |
| 3124 | // ------------------------------------------------------ |
| 3125 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110100 | |
| 3126 | // ------------------------------------------------------ |
| 3127 | // |
| 3128 | // Calculates reciprocal square-root |
| 3129 | |
| 3130 | inline void rsp_cop2_simd::vrsq() |
| 3131 | { |
| 3132 | int op = m_op; |
| 3133 | |
| 3134 | INT32 shifter = 0; |
| 3135 | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 3136 | INT32 datainput = (rec < 0) ? (-rec) : (rec); |
| 3137 | |
| 3138 | if (rec < 0) |
| 3139 | { |
| 3140 | if (rec < -32768) |
| 3141 | { |
| 3142 | datainput = ~datainput; |
| 3143 | } |
| 3144 | else |
| 3145 | { |
| 3146 | datainput = -datainput; |
| 3147 | } |
| 3148 | } |
| 3149 | |
| 3150 | if (datainput) |
| 3151 | { |
| 3152 | for (int i = 0; i < 32; i++) |
| 3153 | { |
| 3154 | if (datainput & (1 << ((~i) & 0x1f))) |
| 3155 | { |
| 3156 | shifter = i; |
| 3157 | break; |
| 3158 | } |
| 3159 | } |
| 3160 | } |
| 3161 | else |
| 3162 | { |
| 3163 | shifter = 0; |
| 3164 | } |
| 3165 | |
| 3166 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 3167 | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 3168 | |
| 3169 | INT32 fetchval = rsp_divtable[address]; |
| 3170 | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 3171 | if (rec < 0) |
| 3172 | { |
| 3173 | temp = ~temp; |
| 3174 | } |
| 3175 | if (!rec) |
| 3176 | { |
| 3177 | temp = 0x7fffffff; |
| 3178 | } |
| 3179 | else if (rec == 0xffff8000) |
| 3180 | { |
| 3181 | temp = 0xffff0000; |
| 3182 | } |
| 3183 | rec = temp; |
| 3184 | |
| 3185 | if (rec < 0) |
| 3186 | { |
| 3187 | if (m_dp_allowed) |
| 3188 | { |
| 3189 | if (rec < -32768) |
| 3190 | { |
| 3191 | datainput = ~datainput; |
| 3192 | } |
| 3193 | else |
| 3194 | { |
| 3195 | datainput = -datainput; |
| 3196 | } |
| 3197 | } |
| 3198 | else |
| 3199 | { |
| 3200 | datainput = -datainput; |
| 3201 | } |
| 3202 | } |
| 3203 | |
| 3204 | if (datainput) |
| 3205 | { |
| 3206 | for (int i = 0; i < 32; i++) |
| 3207 | { |
| 3208 | if (datainput & (1 << ((~i) & 0x1f))) |
| 3209 | { |
| 3210 | shifter = i; |
| 3211 | break; |
| 3212 | } |
| 3213 | } |
| 3214 | } |
| 3215 | else |
| 3216 | { |
| 3217 | shifter = 0; |
| 3218 | } |
| 3219 | |
| 3220 | address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 3221 | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 3222 | |
| 3223 | fetchval = rsp_divtable[address]; |
| 3224 | temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 3225 | if (rec < 0) |
| 3226 | { |
| 3227 | temp = ~temp; |
| 3228 | } |
| 3229 | if (!rec) |
| 3230 | { |
| 3231 | temp = 0x7fff; |
| 3232 | } |
| 3233 | else if (rec == 0xffff8000) |
| 3234 | { |
| 3235 | temp = 0x0000; |
| 3236 | } |
| 3237 | rec = temp; |
| 3238 | |
| 3239 | W_VREG_S(VDREG, VS1REG & 7) = (UINT16)rec; |
| 3240 | for (int i = 0; i < 8; i++) |
| 3241 | { |
| 3242 | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 3243 | } |
| 3244 | } |
| 3245 | |
| 3246 | static void cfunc_vrsq(void *param) |
| 3247 | { |
| 3248 | ((rsp_cop2 *)param)->vrsq(); |
| 3249 | } |
| 3250 | |
| 3251 | |
| 3252 | // VRSQL |
| 3253 | // |
| 3254 | // 31 25 24 20 15 10 5 0 |
| 3255 | // ------------------------------------------------------ |
| 3256 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110101 | |
| 3257 | // ------------------------------------------------------ |
| 3258 | // |
| 3259 | // Calculates reciprocal square-root low part |
| 3260 | |
| 3261 | inline void rsp_cop2_simd::vrsql() |
| 3262 | { |
| 3263 | int op = m_op; |
| 3264 | |
| 3265 | #if SIMUL_SIMD |
| 3266 | m_old_reciprocal_res = m_reciprocal_res; |
| 3267 | m_old_reciprocal_high = m_reciprocal_high; |
| 3268 | m_old_dp_allowed = m_dp_allowed; |
| 3269 | #endif |
| 3270 | |
| 3271 | INT32 shifter = 0; |
| 3272 | UINT16 val; |
| 3273 | EXTRACT16(m_xv[VS2REG], val, EL); |
| 3274 | INT32 rec = (INT16)val; |
| 3275 | INT32 datainput = rec; |
| 3276 | |
| 3277 | if (m_dp_allowed) |
| 3278 | { |
| 3279 | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 3280 | datainput = rec; |
| 3281 | |
| 3282 | if (rec < 0) |
| 3283 | { |
| 3284 | if (rec < -32768) |
| 3285 | { |
| 3286 | datainput = ~datainput; |
| 3287 | } |
| 3288 | else |
| 3289 | { |
| 3290 | datainput = -datainput; |
| 3291 | } |
| 3292 | } |
| 3293 | } |
| 3294 | else if (datainput < 0) |
| 3295 | { |
| 3296 | datainput = -datainput; |
| 3297 | |
| 3298 | shifter = 0x10; |
| 3299 | } |
| 3300 | |
| 3301 | if (datainput) |
| 3302 | { |
| 3303 | for (int i = 0; i < 32; i++) |
| 3304 | { |
| 3305 | if (datainput & (1 << ((~i) & 0x1f))) |
| 3306 | { |
| 3307 | shifter = i; |
| 3308 | break; |
| 3309 | } |
| 3310 | } |
| 3311 | } |
| 3312 | |
| 3313 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 3314 | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 3315 | |
| 3316 | INT32 fetchval = rsp_divtable[address]; |
| 3317 | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 3318 | temp ^= rec >> 31; |
| 3319 | |
| 3320 | if (!rec) |
| 3321 | { |
| 3322 | temp = 0x7fffffff; |
| 3323 | } |
| 3324 | else if (rec == 0xffff8000) |
| 3325 | { |
| 3326 | temp = 0xffff0000; |
| 3327 | } |
| 3328 | rec = temp; |
| 3329 | |
| 3330 | m_reciprocal_res = rec; |
| 3331 | m_dp_allowed = 0; |
| 3332 | |
| 3333 | INSERT16(m_xv[VDREG], (UINT16)rec, VS1REG); |
| 3334 | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3335 | } |
| 3336 | |
| 3337 | static void cfunc_vrsql(void *param) |
| 3338 | { |
| 3339 | ((rsp_cop2 *)param)->vrsql(); |
| 3340 | } |
| 3341 | |
| 3342 | |
| 3343 | // VRSQH |
| 3344 | // |
| 3345 | // 31 25 24 20 15 10 5 0 |
| 3346 | // ------------------------------------------------------ |
| 3347 | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110110 | |
| 3348 | // ------------------------------------------------------ |
| 3349 | // |
| 3350 | // Calculates reciprocal square-root high part |
| 3351 | |
| 3352 | inline void rsp_cop2_simd::vrsqh() |
| 3353 | { |
| 3354 | int op = m_op; |
| 3355 | |
| 3356 | #if SIMUL_SIMD |
| 3357 | m_old_reciprocal_res = m_reciprocal_res; |
| 3358 | m_old_reciprocal_high = m_reciprocal_high; |
| 3359 | m_old_dp_allowed = m_dp_allowed; |
| 3360 | #endif |
| 3361 | |
| 3362 | UINT16 val; |
| 3363 | EXTRACT16(m_xv[VS2REG], val, EL); |
| 3364 | m_reciprocal_high = val << 16; |
| 3365 | m_dp_allowed = 1; |
| 3366 | |
| 3367 | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3368 | |
| 3369 | INSERT16(m_xv[VDREG], (INT16)(m_reciprocal_res >> 16), VS1REG); // store high part |
| 3370 | } |
| 3371 | |
| 3372 | static void cfunc_vrsqh(void *param) |
| 3373 | { |
| 3374 | ((rsp_cop2 *)param)->vrsqh(); |
| 3375 | } |
| 3376 | |
| 3377 | |
| 3378 | /*************************************************************************** |
| 3379 | Vector Flag Reading/Writing |
| 3380 | ***************************************************************************/ |
| 3381 | |
| 3382 | inline void rsp_cop2_simd::mfc2() |
| 3383 | { |
| 3384 | UINT32 op = m_op; |
| 3385 | int el = (op >> 7) & 0xf; |
| 3386 | |
| 3387 | UINT16 out; |
| 3388 | EXTRACT16(m_xv[VS1REG], out, (el >> 1)); |
| 3389 | out >>= (1 - (el & 1)) * 8; |
| 3390 | out &= 0x00ff; |
| 3391 | |
| 3392 | el++; |
| 3393 | |
| 3394 | UINT16 temp; |
| 3395 | EXTRACT16(m_xv[VS1REG], temp, (el >> 1)); |
| 3396 | temp >>= (1 - (el & 1)) * 8; |
| 3397 | temp &= 0x00ff; |
| 3398 | |
| 3399 | m_rsp.m_rsp_state->r[RTREG] = (INT32)(INT16)((out << 8) | temp); |
| 3400 | } |
| 3401 | |
| 3402 | static void cfunc_mfc2(void *param) |
| 3403 | { |
| 3404 | ((rsp_cop2 *)param)->mfc2(); |
| 3405 | } |
| 3406 | |
| 3407 | |
| 3408 | inline void rsp_cop2_simd::cfc2() |
| 3409 | { |
| 3410 | UINT32 op = m_op; |
| 3411 | if (RTREG) |
| 3412 | { |
| 3413 | switch(RDREG) |
| 3414 | { |
| 3415 | case 0: |
| 3416 | RTVAL = ((CARRY_FLAG(0) & 1) << 0) | |
| 3417 | ((CARRY_FLAG(1) & 1) << 1) | |
| 3418 | ((CARRY_FLAG(2) & 1) << 2) | |
| 3419 | ((CARRY_FLAG(3) & 1) << 3) | |
| 3420 | ((CARRY_FLAG(4) & 1) << 4) | |
| 3421 | ((CARRY_FLAG(5) & 1) << 5) | |
| 3422 | ((CARRY_FLAG(6) & 1) << 6) | |
| 3423 | ((CARRY_FLAG(7) & 1) << 7) | |
| 3424 | ((ZERO_FLAG(0) & 1) << 8) | |
| 3425 | ((ZERO_FLAG(1) & 1) << 9) | |
| 3426 | ((ZERO_FLAG(2) & 1) << 10) | |
| 3427 | ((ZERO_FLAG(3) & 1) << 11) | |
| 3428 | ((ZERO_FLAG(4) & 1) << 12) | |
| 3429 | ((ZERO_FLAG(5) & 1) << 13) | |
| 3430 | ((ZERO_FLAG(6) & 1) << 14) | |
| 3431 | ((ZERO_FLAG(7) & 1) << 15); |
| 3432 | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 3433 | break; |
| 3434 | case 1: |
| 3435 | RTVAL = ((COMPARE_FLAG(0) & 1) << 0) | |
| 3436 | ((COMPARE_FLAG(1) & 1) << 1) | |
| 3437 | ((COMPARE_FLAG(2) & 1) << 2) | |
| 3438 | ((COMPARE_FLAG(3) & 1) << 3) | |
| 3439 | ((COMPARE_FLAG(4) & 1) << 4) | |
| 3440 | ((COMPARE_FLAG(5) & 1) << 5) | |
| 3441 | ((COMPARE_FLAG(6) & 1) << 6) | |
| 3442 | ((COMPARE_FLAG(7) & 1) << 7) | |
| 3443 | ((CLIP2_FLAG(0) & 1) << 8) | |
| 3444 | ((CLIP2_FLAG(1) & 1) << 9) | |
| 3445 | ((CLIP2_FLAG(2) & 1) << 10) | |
| 3446 | ((CLIP2_FLAG(3) & 1) << 11) | |
| 3447 | ((CLIP2_FLAG(4) & 1) << 12) | |
| 3448 | ((CLIP2_FLAG(5) & 1) << 13) | |
| 3449 | ((CLIP2_FLAG(6) & 1) << 14) | |
| 3450 | ((CLIP2_FLAG(7) & 1) << 15); |
| 3451 | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 3452 | break; |
| 3453 | case 2: |
| 3454 | RTVAL = ((CLIP1_FLAG(0) & 1) << 0) | |
| 3455 | ((CLIP1_FLAG(1) & 1) << 1) | |
| 3456 | ((CLIP1_FLAG(2) & 1) << 2) | |
| 3457 | ((CLIP1_FLAG(3) & 1) << 3) | |
| 3458 | ((CLIP1_FLAG(4) & 1) << 4) | |
| 3459 | ((CLIP1_FLAG(5) & 1) << 5) | |
| 3460 | ((CLIP1_FLAG(6) & 1) << 6) | |
| 3461 | ((CLIP1_FLAG(7) & 1) << 7); |
| 3462 | break; |
| 3463 | } |
| 3464 | } |
| 3465 | } |
| 3466 | |
| 3467 | static void cfunc_cfc2(void *param) |
| 3468 | { |
| 3469 | ((rsp_cop2 *)param)->cfc2(); |
| 3470 | } |
| 3471 | |
| 3472 | |
| 3473 | inline void rsp_cop2_simd::mtc2() |
| 3474 | { |
| 3475 | UINT32 op = m_op; |
| 3476 | int el = (op >> 7) & 0xf; |
| 3477 | INSERT16(m_xv[VS1REG], RTVAL, el >> 1); |
| 3478 | } |
| 3479 | |
| 3480 | static void cfunc_mtc2(void *param) |
| 3481 | { |
| 3482 | ((rsp_cop2 *)param)->mtc2(); |
| 3483 | } |
| 3484 | |
| 3485 | |
| 3486 | inline void rsp_cop2_simd::ctc2() |
| 3487 | { |
| 3488 | UINT32 op = m_op; |
| 3489 | switch(RDREG) |
| 3490 | { |
| 3491 | case 0: |
| 3492 | CLEAR_CARRY_FLAGS(); |
| 3493 | CLEAR_ZERO_FLAGS(); |
| 3494 | m_vflag[0][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 3495 | m_vflag[0][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 3496 | m_vflag[0][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 3497 | m_vflag[0][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 3498 | m_vflag[0][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 3499 | m_vflag[0][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 3500 | m_vflag[0][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 3501 | m_vflag[0][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 3502 | if (RTVAL & (1 << 0)) { SET_CARRY_FLAG(0); } |
| 3503 | if (RTVAL & (1 << 1)) { SET_CARRY_FLAG(1); } |
| 3504 | if (RTVAL & (1 << 2)) { SET_CARRY_FLAG(2); } |
| 3505 | if (RTVAL & (1 << 3)) { SET_CARRY_FLAG(3); } |
| 3506 | if (RTVAL & (1 << 4)) { SET_CARRY_FLAG(4); } |
| 3507 | if (RTVAL & (1 << 5)) { SET_CARRY_FLAG(5); } |
| 3508 | if (RTVAL & (1 << 6)) { SET_CARRY_FLAG(6); } |
| 3509 | if (RTVAL & (1 << 7)) { SET_CARRY_FLAG(7); } |
| 3510 | m_vflag[3][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 3511 | m_vflag[3][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 3512 | m_vflag[3][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 3513 | m_vflag[3][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 3514 | m_vflag[3][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 3515 | m_vflag[3][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 3516 | m_vflag[3][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 3517 | m_vflag[3][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 3518 | if (RTVAL & (1 << 8)) { SET_ZERO_FLAG(0); } |
| 3519 | if (RTVAL & (1 << 9)) { SET_ZERO_FLAG(1); } |
| 3520 | if (RTVAL & (1 << 10)) { SET_ZERO_FLAG(2); } |
| 3521 | if (RTVAL & (1 << 11)) { SET_ZERO_FLAG(3); } |
| 3522 | if (RTVAL & (1 << 12)) { SET_ZERO_FLAG(4); } |
| 3523 | if (RTVAL & (1 << 13)) { SET_ZERO_FLAG(5); } |
| 3524 | if (RTVAL & (1 << 14)) { SET_ZERO_FLAG(6); } |
| 3525 | if (RTVAL & (1 << 15)) { SET_ZERO_FLAG(7); } |
| 3526 | break; |
| 3527 | case 1: |
| 3528 | CLEAR_COMPARE_FLAGS(); |
| 3529 | CLEAR_CLIP2_FLAGS(); |
| 3530 | m_vflag[1][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 3531 | m_vflag[1][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 3532 | m_vflag[1][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 3533 | m_vflag[1][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 3534 | m_vflag[1][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 3535 | m_vflag[1][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 3536 | m_vflag[1][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 3537 | m_vflag[1][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 3538 | if (RTVAL & (1 << 0)) { SET_COMPARE_FLAG(0); } |
| 3539 | if (RTVAL & (1 << 1)) { SET_COMPARE_FLAG(1); } |
| 3540 | if (RTVAL & (1 << 2)) { SET_COMPARE_FLAG(2); } |
| 3541 | if (RTVAL & (1 << 3)) { SET_COMPARE_FLAG(3); } |
| 3542 | if (RTVAL & (1 << 4)) { SET_COMPARE_FLAG(4); } |
| 3543 | if (RTVAL & (1 << 5)) { SET_COMPARE_FLAG(5); } |
| 3544 | if (RTVAL & (1 << 6)) { SET_COMPARE_FLAG(6); } |
| 3545 | if (RTVAL & (1 << 7)) { SET_COMPARE_FLAG(7); } |
| 3546 | m_vflag[4][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 3547 | m_vflag[4][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 3548 | m_vflag[4][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 3549 | m_vflag[4][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 3550 | m_vflag[4][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 3551 | m_vflag[4][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 3552 | m_vflag[4][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 3553 | m_vflag[4][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 3554 | if (RTVAL & (1 << 8)) { SET_CLIP2_FLAG(0); } |
| 3555 | if (RTVAL & (1 << 9)) { SET_CLIP2_FLAG(1); } |
| 3556 | if (RTVAL & (1 << 10)) { SET_CLIP2_FLAG(2); } |
| 3557 | if (RTVAL & (1 << 11)) { SET_CLIP2_FLAG(3); } |
| 3558 | if (RTVAL & (1 << 12)) { SET_CLIP2_FLAG(4); } |
| 3559 | if (RTVAL & (1 << 13)) { SET_CLIP2_FLAG(5); } |
| 3560 | if (RTVAL & (1 << 14)) { SET_CLIP2_FLAG(6); } |
| 3561 | if (RTVAL & (1 << 15)) { SET_CLIP2_FLAG(7); } |
| 3562 | break; |
| 3563 | case 2: |
| 3564 | CLEAR_CLIP1_FLAGS(); |
| 3565 | m_vflag[2][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 3566 | m_vflag[2][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 3567 | m_vflag[2][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 3568 | m_vflag[2][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 3569 | m_vflag[2][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 3570 | m_vflag[2][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 3571 | m_vflag[2][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 3572 | m_vflag[2][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 3573 | if (RTVAL & (1 << 0)) { SET_CLIP1_FLAG(0); } |
| 3574 | if (RTVAL & (1 << 1)) { SET_CLIP1_FLAG(1); } |
| 3575 | if (RTVAL & (1 << 2)) { SET_CLIP1_FLAG(2); } |
| 3576 | if (RTVAL & (1 << 3)) { SET_CLIP1_FLAG(3); } |
| 3577 | if (RTVAL & (1 << 4)) { SET_CLIP1_FLAG(4); } |
| 3578 | if (RTVAL & (1 << 5)) { SET_CLIP1_FLAG(5); } |
| 3579 | if (RTVAL & (1 << 6)) { SET_CLIP1_FLAG(6); } |
| 3580 | if (RTVAL & (1 << 7)) { SET_CLIP1_FLAG(7); } |
| 3581 | break; |
| 3582 | } |
| 3583 | } |
| 3584 | |
| 3585 | static void cfunc_ctc2(void *param) |
| 3586 | { |
| 3587 | ((rsp_cop2 *)param)->ctc2(); |
| 3588 | } |
| 3589 | |
| 3590 | |
| 3591 | /*************************************************************************** |
| 3592 | COP2 Opcode Compilation |
| 3593 | ***************************************************************************/ |
| 3594 | |
| 3595 | int rsp_cop2_simd::generate_cop2(drcuml_block *block, rsp_device::compiler_state *compiler, const opcode_desc *desc) |
| 3596 | { |
| 3597 | UINT32 op = desc->opptr.l[0]; |
| 3598 | UINT8 opswitch = RSREG; |
| 3599 | |
| 3600 | switch (opswitch) |
| 3601 | { |
| 3602 | case 0x00: /* MFCz */ |
| 3603 | if (RTREG != 0) |
| 3604 | { |
| 3605 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3606 | UML_CALLC(block, cfunc_mfc2, this); // callc mfc2 |
| 3607 | } |
| 3608 | return TRUE; |
| 3609 | |
| 3610 | case 0x02: /* CFCz */ |
| 3611 | if (RTREG != 0) |
| 3612 | { |
| 3613 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3614 | UML_CALLC(block, cfunc_cfc2, this); // callc cfc2 |
| 3615 | } |
| 3616 | return TRUE; |
| 3617 | |
| 3618 | case 0x04: /* MTCz */ |
| 3619 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3620 | UML_CALLC(block, cfunc_mtc2, this); // callc mtc2 |
| 3621 | return TRUE; |
| 3622 | |
| 3623 | case 0x06: /* CTCz */ |
| 3624 | UML_MOV(block, mem(&m_op), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 3625 | UML_CALLC(block, cfunc_ctc2, this); // callc ctc2 |
| 3626 | return TRUE; |
| 3627 | |
| 3628 | case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: |
| 3629 | case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: |
| 3630 | return generate_vector_opcode(block, compiler, desc); |
| 3631 | } |
| 3632 | return FALSE; |
| 3633 | } |
trunk/src/emu/cpu/rsp/rspdrc.c
| r241957 | r241958 | |
| 25 | 25 | #include "rsp.h" |
| 26 | 26 | #include "rspdiv.h" |
| 27 | 27 | #include "rspfe.h" |
| 28 | #include "rspcp2.h" |
| 28 | 29 | #include "cpu/drcfe.h" |
| 29 | 30 | #include "cpu/drcuml.h" |
| 30 | 31 | #include "cpu/drcumlsh.h" |
| r241957 | r241958 | |
| 52 | 53 | |
| 53 | 54 | |
| 54 | 55 | /*************************************************************************** |
| 55 | | MACROS |
| 56 | Macros |
| 56 | 57 | ***************************************************************************/ |
| 57 | 58 | |
| 58 | 59 | #define R32(reg) m_regmap[reg] |
| 59 | 60 | |
| 60 | 61 | /*************************************************************************** |
| 61 | | HELPFUL DEFINES |
| 62 | Inline Functions |
| 62 | 63 | ***************************************************************************/ |
| 63 | 64 | |
| 64 | | #define VDREG ((op >> 6) & 0x1f) |
| 65 | | #define VS1REG ((op >> 11) & 0x1f) |
| 66 | | #define VS2REG ((op >> 16) & 0x1f) |
| 67 | | #define EL ((op >> 21) & 0xf) |
| 68 | | |
| 69 | | #define SIMD_EXTRACT16(reg, value, element) \ |
| 70 | | switch((element) & 7) \ |
| 71 | | { \ |
| 72 | | case 0: value = _mm_extract_epi16(reg, 0); break; \ |
| 73 | | case 1: value = _mm_extract_epi16(reg, 1); break; \ |
| 74 | | case 2: value = _mm_extract_epi16(reg, 2); break; \ |
| 75 | | case 3: value = _mm_extract_epi16(reg, 3); break; \ |
| 76 | | case 4: value = _mm_extract_epi16(reg, 4); break; \ |
| 77 | | case 5: value = _mm_extract_epi16(reg, 5); break; \ |
| 78 | | case 6: value = _mm_extract_epi16(reg, 6); break; \ |
| 79 | | case 7: value = _mm_extract_epi16(reg, 7); break; \ |
| 80 | | } |
| 81 | | |
| 82 | | |
| 83 | | #define SIMD_INSERT16(reg, value, element) \ |
| 84 | | switch((element) & 7) \ |
| 85 | | { \ |
| 86 | | case 0: reg = _mm_insert_epi16(reg, value, 0); break; \ |
| 87 | | case 1: reg = _mm_insert_epi16(reg, value, 1); break; \ |
| 88 | | case 2: reg = _mm_insert_epi16(reg, value, 2); break; \ |
| 89 | | case 3: reg = _mm_insert_epi16(reg, value, 3); break; \ |
| 90 | | case 4: reg = _mm_insert_epi16(reg, value, 4); break; \ |
| 91 | | case 5: reg = _mm_insert_epi16(reg, value, 5); break; \ |
| 92 | | case 6: reg = _mm_insert_epi16(reg, value, 6); break; \ |
| 93 | | case 7: reg = _mm_insert_epi16(reg, value, 7); break; \ |
| 94 | | } |
| 95 | | |
| 96 | | |
| 97 | | #define SIMD_EXTRACT16C(reg, value, element) value = _mm_extract_epi16(reg, element); |
| 98 | | #define SIMD_INSERT16C(reg, value, element) reg = _mm_insert_epi16(reg, value, element); |
| 99 | | |
| 100 | | #define VREG_B(reg, offset) m_v[(reg)].b[(offset)^1] |
| 101 | | #define W_VREG_S(reg, offset) m_v[(reg)].s[(offset)] |
| 102 | | #define VREG_S(reg, offset) (INT16)m_v[(reg)].s[(offset)] |
| 103 | | |
| 104 | | #define VEC_EL_2(x,z) (vector_elements_2[(x)][(z)]) |
| 105 | | |
| 106 | | #define ACCUM(x) m_accum[x].q |
| 107 | | |
| 108 | | #define CARRY 0 |
| 109 | | #define COMPARE 1 |
| 110 | | #define CLIP1 2 |
| 111 | | #define ZERO 3 |
| 112 | | #define CLIP2 4 |
| 113 | | |
| 114 | | |
| 115 | | #if USE_SIMD |
| 116 | | static void cfunc_mfc2_simd(void *param); |
| 117 | | static void cfunc_cfc2_simd(void *param); |
| 118 | | static void cfunc_mtc2_simd(void *param); |
| 119 | | static void cfunc_ctc2_simd(void *param); |
| 120 | | #endif |
| 121 | | |
| 122 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 123 | | static void cfunc_mfc2_scalar(void *param); |
| 124 | | static void cfunc_cfc2_scalar(void *param); |
| 125 | | static void cfunc_mtc2_scalar(void *param); |
| 126 | | static void cfunc_ctc2_scalar(void *param); |
| 127 | | #endif |
| 128 | | |
| 129 | | |
| 130 | | #if USE_SIMD |
| 131 | | inline UINT16 rsp_device::VEC_ACCUM_H(int x) |
| 132 | | { |
| 133 | | UINT16 out; |
| 134 | | SIMD_EXTRACT16(m_accum_h, out, x); |
| 135 | | return out; |
| 136 | | } |
| 137 | | |
| 138 | | inline UINT16 rsp_device::VEC_ACCUM_M(int x) |
| 139 | | { |
| 140 | | UINT16 out; |
| 141 | | SIMD_EXTRACT16(m_accum_m, out, x); |
| 142 | | return out; |
| 143 | | } |
| 144 | | |
| 145 | | inline UINT16 rsp_device::VEC_ACCUM_L(int x) |
| 146 | | { |
| 147 | | UINT16 out; |
| 148 | | SIMD_EXTRACT16(m_accum_l, out, x); |
| 149 | | return out; |
| 150 | | } |
| 151 | | |
| 152 | | inline UINT16 rsp_device::VEC_ACCUM_LL(int x) |
| 153 | | { |
| 154 | | UINT16 out; |
| 155 | | SIMD_EXTRACT16(m_accum_ll, out, x); |
| 156 | | return out; |
| 157 | | } |
| 158 | | |
| 159 | | #define VEC_SET_ACCUM_H(v, x) SIMD_INSERT16(m_accum_h, v, x); |
| 160 | | #define VEC_SET_ACCUM_M(v, x) SIMD_INSERT16(m_>accum_m, v, x); |
| 161 | | #define VEC_SET_ACCUM_L(v, x) SIMD_INSERT16(m_accum_l, v, x); |
| 162 | | #define VEC_SET_ACCUM_LL(v, x) SIMD_INSERT16(m_accum_ll, v, x); |
| 163 | | |
| 164 | | #define VEC_GET_SCALAR_VS1(out, i) SIMD_EXTRACT16(m_xv[VS1REG], out, i); |
| 165 | | #define VEC_GET_SCALAR_VS2(out, i) SIMD_EXTRACT16(m_xv[VS2REG], out, VEC_EL_2(EL, i)); |
| 166 | | |
| 167 | | inline UINT16 rsp_device::VEC_CARRY_FLAG(const int x) |
| 168 | | { |
| 169 | | UINT16 out; |
| 170 | | SIMD_EXTRACT16(m_xvflag[CARRY], out, x); |
| 171 | | return out; |
| 172 | | } |
| 173 | | |
| 174 | | inline UINT16 rsp_device::VEC_COMPARE_FLAG(const int x) |
| 175 | | { |
| 176 | | UINT16 out; |
| 177 | | SIMD_EXTRACT16(m_xvflag[COMPARE], out, x); |
| 178 | | return out; |
| 179 | | } |
| 180 | | |
| 181 | | inline UINT16 rsp_device::VEC_CLIP1_FLAG(const int x) |
| 182 | | { |
| 183 | | UINT16 out; |
| 184 | | SIMD_EXTRACT16(m_xvflag[CLIP1], out, x); |
| 185 | | return out; |
| 186 | | } |
| 187 | | |
| 188 | | inline UINT16 rsp_device::VEC_ZERO_FLAG(const int x) |
| 189 | | { |
| 190 | | UINT16 out; |
| 191 | | SIMD_EXTRACT16(m_xvflag[ZERO], out, x); |
| 192 | | return out; |
| 193 | | } |
| 194 | | |
| 195 | | inline UINT16 rsp_device::VEC_CLIP2_FLAG(const int x) |
| 196 | | { |
| 197 | | UINT16 out; |
| 198 | | SIMD_EXTRACT16(m_xvflag[CLIP2], out, x); |
| 199 | | return out; |
| 200 | | } |
| 201 | | |
| 202 | | #define VEC_CLEAR_CARRY_FLAGS() { m_xvflag[CARRY] = _mm_setzero_si128(); } |
| 203 | | #define VEC_CLEAR_COMPARE_FLAGS() { m_xvflag[COMPARE] = _mm_setzero_si128(); } |
| 204 | | #define VEC_CLEAR_CLIP1_FLAGS() { m_xvflag[CLIP1] = _mm_setzero_si128(); } |
| 205 | | #define VEC_CLEAR_ZERO_FLAGS() { m_xvflag[ZERO] = _mm_setzero_si128(); } |
| 206 | | #define VEC_CLEAR_CLIP2_FLAGS() { m_xvflag[CLIP2] = _mm_setzero_si128(); } |
| 207 | | |
| 208 | | #define VEC_SET_CARRY_FLAG(x) { SIMD_INSERT16(m_xvflag[CARRY], 0xffff, x); } |
| 209 | | #define VEC_SET_COMPARE_FLAG(x) { SIMD_INSERT16(m_xvflag[COMPARE], 0xffff, x); } |
| 210 | | #define VEC_SET_CLIP1_FLAG(x) { SIMD_INSERT16(m_xvflag[CLIP1], 0xffff, x); } |
| 211 | | #define VEC_SET_ZERO_FLAG(x) { SIMD_INSERT16(m_xvflag[ZERO], 0xffff, x); } |
| 212 | | #define VEC_SET_CLIP2_FLAG(x) { SIMD_INSERT16(m_xvflag[CLIP2], 0xffff, x); } |
| 213 | | |
| 214 | | #define VEC_CLEAR_CARRY_FLAG(x) { SIMD_INSERT16(m_xvflag[CARRY], 0, x); } |
| 215 | | #define VEC_CLEAR_COMPARE_FLAG(x) { SIMD_INSERT16(m_xvflag[COMPARE], 0, x); } |
| 216 | | #define VEC_CLEAR_CLIP1_FLAG(x) { SIMD_INSERT16(m_xvflag[CLIP1], 0, x); } |
| 217 | | #define VEC_CLEAR_ZERO_FLAG(x) { SIMD_INSERT16(m_xvflag[ZERO], 0, x); } |
| 218 | | #define VEC_CLEAR_CLIP2_FLAG(x) { SIMD_INSERT16(m_xvflag[CLIP2], 0, x); } |
| 219 | | |
| 220 | | #endif |
| 221 | | |
| 222 | | #define ACCUM_H(x) (UINT16)m_accum[x].w[3] |
| 223 | | #define ACCUM_M(x) (UINT16)m_accum[x].w[2] |
| 224 | | #define ACCUM_L(x) (UINT16)m_accum[x].w[1] |
| 225 | | #define ACCUM_LL(x) (UINT16)m_accum[x].w[0] |
| 226 | | |
| 227 | | #define SET_ACCUM_H(v, x) m_accum[x].w[3] = v; |
| 228 | | #define SET_ACCUM_M(v, x) m_accum[x].w[2] = v; |
| 229 | | #define SET_ACCUM_L(v, x) m_accum[x].w[1] = v; |
| 230 | | #define SET_ACCUM_LL(v, x) m_accum[x].w[0] = v; |
| 231 | | |
| 232 | | #define SCALAR_GET_VS1(out, i) out = VREG_S(VS1REG, i) |
| 233 | | #define SCALAR_GET_VS2(out, i) out = VREG_S(VS2REG, VEC_EL_2(EL, i)) |
| 234 | | |
| 235 | | #define CARRY_FLAG(x) (m_vflag[CARRY][x & 7] != 0 ? 0xffff : 0) |
| 236 | | #define COMPARE_FLAG(x) (m_vflag[COMPARE][x & 7] != 0 ? 0xffff : 0) |
| 237 | | #define CLIP1_FLAG(x) (m_vflag[CLIP1][x & 7] != 0 ? 0xffff : 0) |
| 238 | | #define ZERO_FLAG(x) (m_vflag[ZERO][x & 7] != 0 ? 0xffff : 0) |
| 239 | | #define CLIP2_FLAG(x) (m_vflag[CLIP2][x & 7] != 0 ? 0xffff : 0) |
| 240 | | |
| 241 | | #define CLEAR_CARRY_FLAGS() { memset(m_vflag[CARRY], 0, 16); } |
| 242 | | #define CLEAR_COMPARE_FLAGS() { memset(m_vflag[COMPARE], 0, 16); } |
| 243 | | #define CLEAR_CLIP1_FLAGS() { memset(m_vflag[CLIP1], 0, 16); } |
| 244 | | #define CLEAR_ZERO_FLAGS() { memset(m_vflag[ZERO], 0, 16); } |
| 245 | | #define CLEAR_CLIP2_FLAGS() { memset(m_vflag[CLIP2], 0, 16); } |
| 246 | | |
| 247 | | #define SET_CARRY_FLAG(x) { m_vflag[CARRY][x & 7] = 0xffff; } |
| 248 | | #define SET_COMPARE_FLAG(x) { m_vflag[COMPARE][x & 7] = 0xffff; } |
| 249 | | #define SET_CLIP1_FLAG(x) { m_vflag[CLIP1][x & 7] = 0xffff; } |
| 250 | | #define SET_ZERO_FLAG(x) { m_vflag[ZERO][x & 7] = 0xffff; } |
| 251 | | #define SET_CLIP2_FLAG(x) { m_vflag[CLIP2][x & 7] = 0xffff; } |
| 252 | | |
| 253 | | #define CLEAR_CARRY_FLAG(x) { m_vflag[CARRY][x & 7] = 0; } |
| 254 | | #define CLEAR_COMPARE_FLAG(x) { m_vflag[COMPARE][x & 7] = 0; } |
| 255 | | #define CLEAR_CLIP1_FLAG(x) { m_vflag[CLIP1][x & 7] = 0; } |
| 256 | | #define CLEAR_ZERO_FLAG(x) { m_vflag[ZERO][x & 7] = 0; } |
| 257 | | #define CLEAR_CLIP2_FLAG(x) { m_vflag[CLIP2][x & 7] = 0; } |
| 258 | | |
| 259 | | |
| 260 | | /*************************************************************************** |
| 261 | | INLINE FUNCTIONS |
| 262 | | ***************************************************************************/ |
| 263 | | |
| 264 | 65 | /*------------------------------------------------- |
| 265 | 66 | epc - compute the exception PC from a |
| 266 | 67 | descriptor |
| r241957 | r241958 | |
| 331 | 132 | m_dmem8 = (UINT8*)base; |
| 332 | 133 | } |
| 333 | 134 | |
| 334 | | inline UINT8 rsp_device::DM_READ8(UINT32 address) |
| 135 | UINT8 rsp_device::DM_READ8(UINT32 address) |
| 335 | 136 | { |
| 336 | 137 | UINT8 ret = m_dmem8[BYTE4_XOR_BE(address & 0xfff)]; |
| 138 | //printf("R8:%08x=%02x\n", address, ret); |
| 337 | 139 | return ret; |
| 338 | 140 | } |
| 339 | 141 | |
| r241957 | r241958 | |
| 347 | 149 | ((rsp_device *)param)->ccfunc_read8(); |
| 348 | 150 | } |
| 349 | 151 | |
| 350 | | inline UINT16 rsp_device::DM_READ16(UINT32 address) |
| 152 | UINT16 rsp_device::DM_READ16(UINT32 address) |
| 351 | 153 | { |
| 352 | 154 | UINT16 ret; |
| 353 | 155 | address &= 0xfff; |
| 354 | 156 | ret = m_dmem8[BYTE4_XOR_BE(address)] << 8; |
| 355 | 157 | ret |= m_dmem8[BYTE4_XOR_BE(address + 1)]; |
| 158 | //printf("R16:%08x=%04x\n", address, ret); |
| 356 | 159 | return ret; |
| 357 | 160 | } |
| 358 | 161 | |
| r241957 | r241958 | |
| 366 | 169 | ((rsp_device *)param)->ccfunc_read16(); |
| 367 | 170 | } |
| 368 | 171 | |
| 369 | | inline UINT32 rsp_device::DM_READ32(UINT32 address) |
| 172 | UINT32 rsp_device::DM_READ32(UINT32 address) |
| 370 | 173 | { |
| 371 | 174 | UINT32 ret; |
| 372 | 175 | address &= 0xfff; |
| r241957 | r241958 | |
| 374 | 177 | ret |= m_dmem8[BYTE4_XOR_BE(address + 1)] << 16; |
| 375 | 178 | ret |= m_dmem8[BYTE4_XOR_BE(address + 2)] << 8; |
| 376 | 179 | ret |= m_dmem8[BYTE4_XOR_BE(address + 3)]; |
| 180 | //printf("R32:%08x=%08x\n", address, ret); |
| 377 | 181 | return ret; |
| 378 | 182 | } |
| 379 | 183 | |
| r241957 | r241958 | |
| 387 | 191 | ((rsp_device *)param)->ccfunc_read32();; |
| 388 | 192 | } |
| 389 | 193 | |
| 390 | | inline void rsp_device::DM_WRITE8(UINT32 address, UINT8 data) |
| 194 | void rsp_device::DM_WRITE8(UINT32 address, UINT8 data) |
| 391 | 195 | { |
| 392 | 196 | address &= 0xfff; |
| 393 | 197 | m_dmem8[BYTE4_XOR_BE(address)] = data; |
| 198 | //printf("W8:%08x=%02x\n", address, data); |
| 394 | 199 | } |
| 395 | 200 | |
| 396 | 201 | inline void rsp_device::ccfunc_write8() |
| r241957 | r241958 | |
| 403 | 208 | ((rsp_device *)param)->ccfunc_write8();; |
| 404 | 209 | } |
| 405 | 210 | |
| 406 | | inline void rsp_device::DM_WRITE16(UINT32 address, UINT16 data) |
| 211 | void rsp_device::DM_WRITE16(UINT32 address, UINT16 data) |
| 407 | 212 | { |
| 408 | 213 | address &= 0xfff; |
| 409 | 214 | m_dmem8[BYTE4_XOR_BE(address)] = data >> 8; |
| 410 | 215 | m_dmem8[BYTE4_XOR_BE(address + 1)] = data & 0xff; |
| 216 | //printf("W16:%08x=%04x\n", address, data); |
| 411 | 217 | } |
| 412 | 218 | |
| 413 | 219 | inline void rsp_device::ccfunc_write16() |
| r241957 | r241958 | |
| 420 | 226 | ((rsp_device *)param)->ccfunc_write16();; |
| 421 | 227 | } |
| 422 | 228 | |
| 423 | | inline void rsp_device::DM_WRITE32(UINT32 address, UINT32 data) |
| 229 | void rsp_device::DM_WRITE32(UINT32 address, UINT32 data) |
| 424 | 230 | { |
| 425 | 231 | address &= 0xfff; |
| 426 | 232 | m_dmem8[BYTE4_XOR_BE(address)] = data >> 24; |
| 427 | 233 | m_dmem8[BYTE4_XOR_BE(address + 1)] = (data >> 16) & 0xff; |
| 428 | 234 | m_dmem8[BYTE4_XOR_BE(address + 2)] = (data >> 8) & 0xff; |
| 429 | 235 | m_dmem8[BYTE4_XOR_BE(address + 3)] = data & 0xff; |
| 236 | //printf("W32:%08x=%08x\n", address, data); |
| 430 | 237 | } |
| 431 | 238 | |
| 432 | 239 | inline void rsp_device::ccfunc_write32() |
| r241957 | r241958 | |
| 452 | 259 | } |
| 453 | 260 | |
| 454 | 261 | |
| 455 | | /*------------------------------------------------- |
| 456 | | cfunc_printf_debug - generic printf for |
| 457 | | debugging |
| 458 | | -------------------------------------------------*/ |
| 459 | | |
| 460 | | #ifdef UNUSED_CODE |
| 461 | | inline void rs_device::cfunc_printf_debug() |
| 462 | | { |
| 463 | | switch(m_arg2) |
| 464 | | { |
| 465 | | case 0: // WRITE8 |
| 466 | | printf("%04x:%02x\n", m_rsp_state->arg0 & 0xffff, (UINT8)m_rsp_state->arg1); |
| 467 | | break; |
| 468 | | case 1: // WRITE16 |
| 469 | | printf("%04x:%04x\n", m_rsp_state->arg0 & 0xffff, (UINT16)m_rsp_state->arg1); |
| 470 | | break; |
| 471 | | case 2: // WRITE32 |
| 472 | | printf("%04x:%08x\n", m_rsp_state->arg0 & 0xffff, m_rsp_state->arg1); |
| 473 | | break; |
| 474 | | case 3: // READ8 |
| 475 | | printf("%04xr%02x\n", m_rsp_state->arg0 & 0xffff, (UINT8)m_rsp_state->arg1); |
| 476 | | break; |
| 477 | | case 4: // READ16 |
| 478 | | printf("%04xr%04x\n", m_rsp_state->arg0 & 0xffff, (UINT16)m_rsp_state->arg1); |
| 479 | | break; |
| 480 | | case 5: // READ32 |
| 481 | | printf("%04xr%08x\n", m_rsp_state->arg0 & 0xffff, m_rsp_state->arg1); |
| 482 | | break; |
| 483 | | case 6: // Checksum |
| 484 | | printf("Sum: %08x\n", m_rsp_state->arg0); |
| 485 | | break; |
| 486 | | case 7: // Checksum |
| 487 | | printf("Correct Sum: %08x\n", m_rsp_state->arg0); |
| 488 | | break; |
| 489 | | default: // ??? |
| 490 | | printf("%08x %08x\n", m_rsp_state->arg0 & 0xffff, m_rsp_state->arg1); |
| 491 | | break; |
| 492 | | } |
| 493 | | } |
| 494 | | |
| 495 | | static void cfunc_printf_debug(void *param) |
| 496 | | { |
| 497 | | ((rsp_device *)param)->ccfunc_printf_debug(); |
| 498 | | } |
| 499 | | #endif |
| 500 | | |
| 501 | 262 | inline void rsp_device::ccfunc_get_cop0_reg() |
| 502 | 263 | { |
| 503 | 264 | int reg = m_rsp_state->arg0; |
| r241957 | r241958 | |
| 552 | 313 | ((rsp_device *)param)->ccfunc_set_cop0_reg(); |
| 553 | 314 | } |
| 554 | 315 | |
| 555 | | inline void rsp_device::ccfunc_unimplemented_opcode() |
| 556 | | { |
| 557 | | int op = m_rsp_state->arg0; |
| 558 | | if ((machine().debug_flags & DEBUG_FLAG_ENABLED) != 0) |
| 559 | | { |
| 560 | | char string[200]; |
| 561 | | rsp_dasm_one(string, m_ppc, op); |
| 562 | | osd_printf_debug("%08X: %s\n", m_ppc, string); |
| 563 | | } |
| 564 | | |
| 565 | | fatalerror("RSP: unknown opcode %02X (%08X) at %08X\n", op >> 26, op, m_ppc); |
| 566 | | } |
| 567 | | |
| 568 | | static void cfunc_unimplemented_opcode(void *param) |
| 569 | | { |
| 570 | | ((rsp_device *)param)->ccfunc_unimplemented_opcode(); |
| 571 | | } |
| 572 | | |
| 573 | 316 | /*****************************************************************************/ |
| 574 | 317 | |
| 575 | | /* Legacy. Going forward, this will be transitioned into unrolled opcode decodes. */ |
| 576 | | static const int vector_elements_2[16][8] = |
| 577 | | { |
| 578 | | { 0, 1, 2, 3, 4, 5, 6, 7 }, // none |
| 579 | | { 0, 1, 2, 3, 4, 5, 6, 7 }, // ??? |
| 580 | | { 0, 0, 2, 2, 4, 4, 6, 6 }, // 0q |
| 581 | | { 1, 1, 3, 3, 5, 5, 7, 7 }, // 1q |
| 582 | | { 0, 0, 0, 0, 4, 4, 4, 4 }, // 0h |
| 583 | | { 1, 1, 1, 1, 5, 5, 5, 5 }, // 1h |
| 584 | | { 2, 2, 2, 2, 6, 6, 6, 6 }, // 2h |
| 585 | | { 3, 3, 3, 3, 7, 7, 7, 7 }, // 3h |
| 586 | | { 0, 0, 0, 0, 0, 0, 0, 0 }, // 0 |
| 587 | | { 1, 1, 1, 1, 1, 1, 1, 1 }, // 1 |
| 588 | | { 2, 2, 2, 2, 2, 2, 2, 2 }, // 2 |
| 589 | | { 3, 3, 3, 3, 3, 3, 3, 3 }, // 3 |
| 590 | | { 4, 4, 4, 4, 4, 4, 4, 4 }, // 4 |
| 591 | | { 5, 5, 5, 5, 5, 5, 5, 5 }, // 5 |
| 592 | | { 6, 6, 6, 6, 6, 6, 6, 6 }, // 6 |
| 593 | | { 7, 7, 7, 7, 7, 7, 7, 7 }, // 7 |
| 594 | | }; |
| 595 | | |
| 596 | | #if USE_SIMD |
| 597 | | static __m128i vec_himask; |
| 598 | | static __m128i vec_lomask; |
| 599 | | static __m128i vec_hibit; |
| 600 | | static __m128i vec_lobit; |
| 601 | | static __m128i vec_n32768; |
| 602 | | static __m128i vec_32767; |
| 603 | | static __m128i vec_flagmask; |
| 604 | | static __m128i vec_shiftmask2; |
| 605 | | static __m128i vec_shiftmask4; |
| 606 | | static __m128i vec_flag_reverse; |
| 607 | | static __m128i vec_neg1; |
| 608 | | static __m128i vec_zero; |
| 609 | | static __m128i vec_shuf[16]; |
| 610 | | static __m128i vec_shuf_inverse[16]; |
| 611 | | #endif |
| 612 | | |
| 613 | 318 | void rsp_device::rspcom_init() |
| 614 | 319 | { |
| 615 | | #if USE_SIMD |
| 616 | | VEC_CLEAR_CARRY_FLAGS(); |
| 617 | | VEC_CLEAR_COMPARE_FLAGS(); |
| 618 | | VEC_CLEAR_CLIP1_FLAGS(); |
| 619 | | VEC_CLEAR_ZERO_FLAGS(); |
| 620 | | VEC_CLEAR_CLIP2_FLAGS(); |
| 621 | | #endif |
| 622 | | |
| 623 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 624 | | CLEAR_CARRY_FLAGS(); |
| 625 | | CLEAR_COMPARE_FLAGS(); |
| 626 | | CLEAR_CLIP1_FLAGS(); |
| 627 | | CLEAR_ZERO_FLAGS(); |
| 628 | | CLEAR_CLIP2_FLAGS(); |
| 629 | | #endif |
| 630 | | |
| 631 | | #if USE_SIMD |
| 632 | | vec_shuf_inverse[ 0] = _mm_set_epi16(0x0f0e, 0x0d0c, 0x0b0a, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100); // none |
| 633 | | vec_shuf_inverse[ 1] = _mm_set_epi16(0x0f0e, 0x0d0c, 0x0b0a, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100); // ??? |
| 634 | | vec_shuf_inverse[ 2] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0908, 0x0908, 0x0504, 0x0504, 0x0100, 0x0100); // 0q |
| 635 | | vec_shuf_inverse[ 3] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0b0a, 0x0b0a, 0x0706, 0x0706, 0x0302, 0x0302); // 1q |
| 636 | | vec_shuf_inverse[ 4] = _mm_set_epi16(0x0908, 0x0908, 0x0908, 0x0908, 0x0100, 0x0100, 0x0100, 0x0100); // 0h |
| 637 | | vec_shuf_inverse[ 5] = _mm_set_epi16(0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0302, 0x0302, 0x0302, 0x0302); // 1h |
| 638 | | vec_shuf_inverse[ 6] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0504, 0x0504, 0x0504, 0x0504); // 2h |
| 639 | | vec_shuf_inverse[ 7] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0706, 0x0706, 0x0706, 0x0706); // 3h |
| 640 | | vec_shuf_inverse[ 8] = _mm_set_epi16(0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100); // 0 |
| 641 | | vec_shuf_inverse[ 9] = _mm_set_epi16(0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302); // 1 |
| 642 | | vec_shuf_inverse[10] = _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504); // 2 |
| 643 | | vec_shuf_inverse[11] = _mm_set_epi16(0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706); // 3 |
| 644 | | vec_shuf_inverse[12] = _mm_set_epi16(0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908); // 4 |
| 645 | | vec_shuf_inverse[13] = _mm_set_epi16(0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a); // 5 |
| 646 | | vec_shuf_inverse[14] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c); // 6 |
| 647 | | vec_shuf_inverse[15] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e); // 7 |
| 648 | | |
| 649 | | vec_shuf[ 0] = _mm_set_epi16(0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e); // none |
| 650 | | vec_shuf[ 1] = _mm_set_epi16(0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e); // ??? |
| 651 | | vec_shuf[ 2] = _mm_set_epi16(0x0302, 0x0302, 0x0706, 0x0706, 0x0b0a, 0x0b0a, 0x0f0e, 0x0f0e); // 0q |
| 652 | | vec_shuf[ 3] = _mm_set_epi16(0x0100, 0x0100, 0x0504, 0x0706, 0x0908, 0x0908, 0x0d0c, 0x0d0c); // 1q |
| 653 | | vec_shuf[ 4] = _mm_set_epi16(0x0706, 0x0706, 0x0706, 0x0706, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e); // 0q |
| 654 | | vec_shuf[ 5] = _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c); // 1q |
| 655 | | vec_shuf[ 6] = _mm_set_epi16(0x0302, 0x0302, 0x0302, 0x0302, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a); // 2q |
| 656 | | vec_shuf[ 7] = _mm_set_epi16(0x0100, 0x0100, 0x0100, 0x0100, 0x0908, 0x0908, 0x0908, 0x0908); // 3q |
| 657 | | vec_shuf[ 8] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e); // 0 |
| 658 | | vec_shuf[ 9] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c); // 1 |
| 659 | | vec_shuf[10] = _mm_set_epi16(0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a); // 2 |
| 660 | | vec_shuf[11] = _mm_set_epi16(0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908); // 3 |
| 661 | | vec_shuf[12] = _mm_set_epi16(0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706); // 4 |
| 662 | | vec_shuf[13] = _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504); // 5 |
| 663 | | vec_shuf[14] = _mm_set_epi16(0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302); // 6 |
| 664 | | vec_shuf[15] = _mm_set_epi16(0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100); // 7 |
| 665 | | m_accum_h = _mm_setzero_si128(); |
| 666 | | m_accum_m = _mm_setzero_si128(); |
| 667 | | m_accum_l = _mm_setzero_si128(); |
| 668 | | m_accum_ll = _mm_setzero_si128(); |
| 669 | | vec_neg1 = _mm_set_epi64x(0xffffffffffffffffL, 0xffffffffffffffffL); |
| 670 | | vec_zero = _mm_setzero_si128(); |
| 671 | | vec_himask = _mm_set_epi64x(0xffff0000ffff0000L, 0xffff0000ffff0000L); |
| 672 | | vec_lomask = _mm_set_epi64x(0x0000ffff0000ffffL, 0x0000ffff0000ffffL); |
| 673 | | vec_hibit = _mm_set_epi64x(0x0001000000010000L, 0x0001000000010000L); |
| 674 | | vec_lobit = _mm_set_epi64x(0x0000000100000001L, 0x0000000100000001L); |
| 675 | | vec_32767 = _mm_set_epi64x(0x7fff7fff7fff7fffL, 0x7fff7fff7fff7fffL); |
| 676 | | vec_n32768 = _mm_set_epi64x(0x8000800080008000L, 0x8000800080008000L); |
| 677 | | vec_flagmask = _mm_set_epi64x(0x0001000100010001L, 0x0001000100010001L); |
| 678 | | vec_shiftmask2 = _mm_set_epi64x(0x0000000300000003L, 0x0000000300000003L); |
| 679 | | vec_shiftmask4 = _mm_set_epi64x(0x000000000000000fL, 0x000000000000000fL); |
| 680 | | vec_flag_reverse = _mm_set_epi16(0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e); |
| 681 | | #endif |
| 682 | 320 | } |
| 683 | 321 | |
| 684 | | |
| 685 | | #if USE_SIMD |
| 686 | | // LBV |
| 687 | | // |
| 688 | | // 31 25 20 15 10 6 0 |
| 689 | | // -------------------------------------------------- |
| 690 | | // | 110010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 691 | | // -------------------------------------------------- |
| 692 | | // |
| 693 | | // Load 1 byte to vector byte index |
| 694 | | |
| 695 | | inline void rsp_device::ccfunc_rsp_lbv_simd() |
| 696 | | { |
| 697 | | UINT32 op = m_rsp_state->arg0; |
| 698 | | |
| 699 | | UINT32 ea = 0; |
| 700 | | int dest = (op >> 16) & 0x1f; |
| 701 | | int base = (op >> 21) & 0x1f; |
| 702 | | int index = (op >> 7) & 0xf; |
| 703 | | int offset = (op & 0x7f); |
| 704 | | if (offset & 0x40) |
| 705 | | { |
| 706 | | offset |= 0xffffffc0; |
| 707 | | } |
| 708 | | |
| 709 | | ea = (base) ? m_rsp_state->r[base] + offset : offset; |
| 710 | | |
| 711 | | UINT16 element; |
| 712 | | SIMD_EXTRACT16(m_xv[dest], element, (index >> 1)); |
| 713 | | element &= 0xff00 >> ((1-(index & 1)) * 8); |
| 714 | | element |= DM_READ8(ea) << ((1-(index & 1)) * 8); |
| 715 | | SIMD_INSERT16(m_xv[dest], element, (index >> 1)); |
| 716 | | } |
| 717 | | |
| 718 | | static void cfunc_rsp_lbv_simd(void *param) |
| 719 | | { |
| 720 | | ((rsp_device *)param)->ccfunc_rsp_lbv_simd(); |
| 721 | | } |
| 722 | | #endif |
| 723 | | |
| 724 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 725 | | inline void rsp_device::ccfunc_rsp_lbv_scalar() |
| 726 | | { |
| 727 | | UINT32 op = m_rsp_state->arg0; |
| 728 | | |
| 729 | | UINT32 ea = 0; |
| 730 | | int dest = (op >> 16) & 0x1f; |
| 731 | | int base = (op >> 21) & 0x1f; |
| 732 | | int index = (op >> 7) & 0xf; |
| 733 | | int offset = (op & 0x7f); |
| 734 | | if (offset & 0x40) |
| 735 | | { |
| 736 | | offset |= 0xffffffc0; |
| 737 | | } |
| 738 | | |
| 739 | | ea = (base) ? m_rsp_state->r[base] + offset : offset; |
| 740 | | VREG_B(dest, index) = DM_READ8(ea); |
| 741 | | } |
| 742 | | |
| 743 | | static void cfunc_rsp_lbv_scalar(void *param) |
| 744 | | { |
| 745 | | ((rsp_device *)param)->ccfunc_rsp_lbv_scalar(); |
| 746 | | } |
| 747 | | #endif |
| 748 | | |
| 749 | | #if USE_SIMD |
| 750 | | // LSV |
| 751 | | // |
| 752 | | // 31 25 20 15 10 6 0 |
| 753 | | // -------------------------------------------------- |
| 754 | | // | 110010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 755 | | // -------------------------------------------------- |
| 756 | | // |
| 757 | | // Loads 2 bytes starting from vector byte index |
| 758 | | |
| 759 | | inline void rsp_device::ccfunc_rsp_lsv_simd() |
| 760 | | { |
| 761 | | UINT32 op = m_rsp_state->arg0; |
| 762 | | int dest = (op >> 16) & 0x1f; |
| 763 | | int base = (op >> 21) & 0x1f; |
| 764 | | int index = (op >> 7) & 0xe; |
| 765 | | int offset = (op & 0x7f); |
| 766 | | if (offset & 0x40) |
| 767 | | { |
| 768 | | offset |= 0xffffffc0; |
| 769 | | } |
| 770 | | |
| 771 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 772 | | int end = index + 2; |
| 773 | | for (int i = index; i < end; i++) |
| 774 | | { |
| 775 | | UINT16 element; |
| 776 | | SIMD_EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 777 | | element &= 0xff00 >> ((1 - (i & 1)) * 8); |
| 778 | | element |= DM_READ8(ea) << ((1 - (i & 1)) * 8); |
| 779 | | SIMD_INSERT16(m_xv[dest], element, (i >> 1)); |
| 780 | | ea++; |
| 781 | | } |
| 782 | | } |
| 783 | | |
| 784 | | static void cfunc_rsp_lsv_simd(void *param) |
| 785 | | { |
| 786 | | ((rsp_device *)param)->ccfunc_rsp_lsv_simd(); |
| 787 | | } |
| 788 | | #endif |
| 789 | | |
| 790 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 791 | | inline void rsp_device::ccfunc_rsp_lsv_scalar() |
| 792 | | { |
| 793 | | UINT32 op = m_rsp_state->arg0; |
| 794 | | int dest = (op >> 16) & 0x1f; |
| 795 | | int base = (op >> 21) & 0x1f; |
| 796 | | int index = (op >> 7) & 0xe; |
| 797 | | int offset = (op & 0x7f); |
| 798 | | if (offset & 0x40) |
| 799 | | { |
| 800 | | offset |= 0xffffffc0; |
| 801 | | } |
| 802 | | |
| 803 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 804 | | int end = index + 2; |
| 805 | | for (int i = index; i < end; i++) |
| 806 | | { |
| 807 | | VREG_B(dest, i) = DM_READ8(ea); |
| 808 | | ea++; |
| 809 | | } |
| 810 | | } |
| 811 | | |
| 812 | | static void cfunc_rsp_lsv_scalar(void *param) |
| 813 | | { |
| 814 | | ((rsp_device *)param)->ccfunc_rsp_lsv_scalar(); |
| 815 | | } |
| 816 | | #endif |
| 817 | | |
| 818 | | #if USE_SIMD |
| 819 | | // LLV |
| 820 | | // |
| 821 | | // 31 25 20 15 10 6 0 |
| 822 | | // -------------------------------------------------- |
| 823 | | // | 110010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 824 | | // -------------------------------------------------- |
| 825 | | // |
| 826 | | // Loads 4 bytes starting from vector byte index |
| 827 | | |
| 828 | | inline void rsp_device::ccfunc_rsp_llv_simd() |
| 829 | | { |
| 830 | | UINT32 op = m_rsp_state->arg0; |
| 831 | | UINT32 ea = 0; |
| 832 | | int dest = (op >> 16) & 0x1f; |
| 833 | | int base = (op >> 21) & 0x1f; |
| 834 | | int index = (op >> 7) & 0xc; |
| 835 | | int offset = (op & 0x7f); |
| 836 | | if (offset & 0x40) |
| 837 | | { |
| 838 | | offset |= 0xffffffc0; |
| 839 | | } |
| 840 | | |
| 841 | | ea = (base) ? m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 842 | | |
| 843 | | int end = index + 4; |
| 844 | | |
| 845 | | for (int i = index; i < end; i++) |
| 846 | | { |
| 847 | | UINT16 element; |
| 848 | | SIMD_EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 849 | | element &= 0xff00 >> ((1 - (i & 1)) * 8); |
| 850 | | element |= DM_READ8(ea) << ((1 - (i & 1)) * 8); |
| 851 | | SIMD_INSERT16(m_xv[dest], element, (i >> 1)); |
| 852 | | ea++; |
| 853 | | } |
| 854 | | } |
| 855 | | |
| 856 | | static void cfunc_rsp_llv_simd(void *param) |
| 857 | | { |
| 858 | | ((rsp_device *)param)->ccfunc_rsp_llv_simd(); |
| 859 | | } |
| 860 | | #endif |
| 861 | | |
| 862 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 863 | | |
| 864 | | inline void rsp_device::ccfunc_rsp_llv_scalar() |
| 865 | | { |
| 866 | | UINT32 op = m_rsp_state->arg0; |
| 867 | | UINT32 ea = 0; |
| 868 | | int dest = (op >> 16) & 0x1f; |
| 869 | | int base = (op >> 21) & 0x1f; |
| 870 | | int index = (op >> 7) & 0xc; |
| 871 | | int offset = (op & 0x7f); |
| 872 | | if (offset & 0x40) |
| 873 | | { |
| 874 | | offset |= 0xffffffc0; |
| 875 | | } |
| 876 | | |
| 877 | | ea = (base) ? m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 878 | | |
| 879 | | int end = index + 4; |
| 880 | | |
| 881 | | for (int i = index; i < end; i++) |
| 882 | | { |
| 883 | | VREG_B(dest, i) = DM_READ8(ea); |
| 884 | | ea++; |
| 885 | | } |
| 886 | | } |
| 887 | | |
| 888 | | static void cfunc_rsp_llv_scalar(void *param) |
| 889 | | { |
| 890 | | ((rsp_device *)param)->ccfunc_rsp_llv_scalar(); |
| 891 | | } |
| 892 | | #endif |
| 893 | | |
| 894 | | #if USE_SIMD |
| 895 | | // LDV |
| 896 | | // |
| 897 | | // 31 25 20 15 10 6 0 |
| 898 | | // -------------------------------------------------- |
| 899 | | // | 110010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 900 | | // -------------------------------------------------- |
| 901 | | // |
| 902 | | // Loads 8 bytes starting from vector byte index |
| 903 | | |
| 904 | | inline void rsp_device::ccfunc_rsp_ldv_simd() |
| 905 | | { |
| 906 | | UINT32 op = m_rsp_state->arg0; |
| 907 | | UINT32 ea = 0; |
| 908 | | int dest = (op >> 16) & 0x1f; |
| 909 | | int base = (op >> 21) & 0x1f; |
| 910 | | int index = (op >> 7) & 0x8; |
| 911 | | int offset = (op & 0x7f); |
| 912 | | if (offset & 0x40) |
| 913 | | { |
| 914 | | offset |= 0xffffffc0; |
| 915 | | } |
| 916 | | |
| 917 | | ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 918 | | |
| 919 | | int end = index + 8; |
| 920 | | |
| 921 | | for (int i = index; i < end; i++) |
| 922 | | { |
| 923 | | UINT16 element; |
| 924 | | SIMD_EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 925 | | element &= 0xff00 >> ((1 - (i & 1)) * 8); |
| 926 | | element |= DM_READ8(ea) << ((1 - (i & 1)) * 8); |
| 927 | | SIMD_INSERT16(m_xv[dest], element, (i >> 1)); |
| 928 | | ea++; |
| 929 | | } |
| 930 | | } |
| 931 | | |
| 932 | | static void cfunc_rsp_ldv_simd(void *param) |
| 933 | | { |
| 934 | | ((rsp_device *)param)->ccfunc_rsp_ldv_simd(); |
| 935 | | } |
| 936 | | #endif |
| 937 | | |
| 938 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 939 | | |
| 940 | | inline void rsp_device::ccfunc_rsp_ldv_scalar() |
| 941 | | { |
| 942 | | UINT32 op = m_rsp_state->arg0; |
| 943 | | UINT32 ea = 0; |
| 944 | | int dest = (op >> 16) & 0x1f; |
| 945 | | int base = (op >> 21) & 0x1f; |
| 946 | | int index = (op >> 7) & 0x8; |
| 947 | | int offset = (op & 0x7f); |
| 948 | | if (offset & 0x40) |
| 949 | | { |
| 950 | | offset |= 0xffffffc0; |
| 951 | | } |
| 952 | | |
| 953 | | ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 954 | | |
| 955 | | int end = index + 8; |
| 956 | | |
| 957 | | for (int i = index; i < end; i++) |
| 958 | | { |
| 959 | | VREG_B(dest, i) = DM_READ8(ea); |
| 960 | | ea++; |
| 961 | | } |
| 962 | | } |
| 963 | | |
| 964 | | static void cfunc_rsp_ldv_scalar(void *param) |
| 965 | | { |
| 966 | | ((rsp_device *)param)->ccfunc_rsp_ldv_scalar(); |
| 967 | | } |
| 968 | | #endif |
| 969 | | |
| 970 | | #if USE_SIMD |
| 971 | | // LQV |
| 972 | | // |
| 973 | | // 31 25 20 15 10 6 0 |
| 974 | | // -------------------------------------------------- |
| 975 | | // | 110010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 976 | | // -------------------------------------------------- |
| 977 | | // |
| 978 | | // Loads up to 16 bytes starting from vector byte index |
| 979 | | |
| 980 | | inline void rsp_device::ccfunc_rsp_lqv_simd() |
| 981 | | { |
| 982 | | UINT32 op = m_rsp_state->arg0; |
| 983 | | int dest = (op >> 16) & 0x1f; |
| 984 | | int base = (op >> 21) & 0x1f; |
| 985 | | int offset = (op & 0x7f); |
| 986 | | if (offset & 0x40) |
| 987 | | { |
| 988 | | offset |= 0xffffffc0; |
| 989 | | } |
| 990 | | |
| 991 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 992 | | |
| 993 | | int end = 16 - (ea & 0xf); |
| 994 | | if (end > 16) end = 16; |
| 995 | | |
| 996 | | for (int i = 0; i < end; i++) |
| 997 | | { |
| 998 | | UINT16 element; |
| 999 | | SIMD_EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 1000 | | element &= 0xff00 >> ((1 - (i & 1)) * 8); |
| 1001 | | element |= DM_READ8(ea) << ((1 - (i & 1)) * 8); |
| 1002 | | SIMD_INSERT16(m_xv[dest], element, (i >> 1)); |
| 1003 | | ea++; |
| 1004 | | } |
| 1005 | | } |
| 1006 | | |
| 1007 | | static void cfunc_rsp_lqv_simd(void *param) |
| 1008 | | { |
| 1009 | | ((rsp_device *)param)->ccfunc_rsp_lqv_simd(); |
| 1010 | | } |
| 1011 | | #endif |
| 1012 | | |
| 1013 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 1014 | | |
| 1015 | | inline void rsp_device::ccfunc_rsp_lqv_scalar() |
| 1016 | | { |
| 1017 | | UINT32 op = m_rsp_state->arg0; |
| 1018 | | int dest = (op >> 16) & 0x1f; |
| 1019 | | int base = (op >> 21) & 0x1f; |
| 1020 | | int offset = (op & 0x7f); |
| 1021 | | if (offset & 0x40) |
| 1022 | | { |
| 1023 | | offset |= 0xffffffc0; |
| 1024 | | } |
| 1025 | | |
| 1026 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1027 | | |
| 1028 | | int end = 16 - (ea & 0xf); |
| 1029 | | if (end > 16) end = 16; |
| 1030 | | |
| 1031 | | for (int i = 0; i < end; i++) |
| 1032 | | { |
| 1033 | | VREG_B(dest, i) = DM_READ8(ea); |
| 1034 | | ea++; |
| 1035 | | } |
| 1036 | | } |
| 1037 | | |
| 1038 | | static void cfunc_rsp_lqv_scalar(void *param) |
| 1039 | | { |
| 1040 | | ((rsp_device *)param)->ccfunc_rsp_lqv_scalar(); |
| 1041 | | } |
| 1042 | | #endif |
| 1043 | | |
| 1044 | | #if USE_SIMD |
| 1045 | | // LRV |
| 1046 | | // |
| 1047 | | // 31 25 20 15 10 6 0 |
| 1048 | | // -------------------------------------------------- |
| 1049 | | // | 110010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 1050 | | // -------------------------------------------------- |
| 1051 | | // |
| 1052 | | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 1053 | | |
| 1054 | | inline void rsp_device::ccfunc_rsp_lrv_simd() |
| 1055 | | { |
| 1056 | | UINT32 op = m_rsp_state->arg0; |
| 1057 | | int dest = (op >> 16) & 0x1f; |
| 1058 | | int base = (op >> 21) & 0x1f; |
| 1059 | | int index = (op >> 7) & 0xf; |
| 1060 | | int offset = (op & 0x7f); |
| 1061 | | if (offset & 0x40) |
| 1062 | | { |
| 1063 | | offset |= 0xffffffc0; |
| 1064 | | } |
| 1065 | | |
| 1066 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1067 | | |
| 1068 | | index = 16 - ((ea & 0xf) - index); |
| 1069 | | ea &= ~0xf; |
| 1070 | | |
| 1071 | | for (int i = index; i < 16; i++) |
| 1072 | | { |
| 1073 | | UINT16 element; |
| 1074 | | SIMD_EXTRACT16(m_xv[dest], element, (i >> 1)); |
| 1075 | | element &= 0xff00 >> ((1-(i & 1)) * 8); |
| 1076 | | element |= DM_READ8(ea) << ((1-(i & 1)) * 8); |
| 1077 | | SIMD_INSERT16(m_xv[dest], element, (i >> 1)); |
| 1078 | | ea++; |
| 1079 | | } |
| 1080 | | } |
| 1081 | | |
| 1082 | | static void cfunc_rsp_lrv_simd(void *param) |
| 1083 | | { |
| 1084 | | ((rsp_device *)param)->ccfunc_rsp_lrv_simd(); |
| 1085 | | } |
| 1086 | | #endif |
| 1087 | | |
| 1088 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 1089 | | |
| 1090 | | inline void rsp_device::ccfunc_rsp_lrv_scalar() |
| 1091 | | { |
| 1092 | | UINT32 op = m_rsp_state->arg0; |
| 1093 | | int dest = (op >> 16) & 0x1f; |
| 1094 | | int base = (op >> 21) & 0x1f; |
| 1095 | | int index = (op >> 7) & 0xf; |
| 1096 | | int offset = (op & 0x7f); |
| 1097 | | if (offset & 0x40) |
| 1098 | | { |
| 1099 | | offset |= 0xffffffc0; |
| 1100 | | } |
| 1101 | | |
| 1102 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1103 | | |
| 1104 | | index = 16 - ((ea & 0xf) - index); |
| 1105 | | ea &= ~0xf; |
| 1106 | | |
| 1107 | | for (int i = index; i < 16; i++) |
| 1108 | | { |
| 1109 | | VREG_B(dest, i) = DM_READ8(ea); |
| 1110 | | ea++; |
| 1111 | | } |
| 1112 | | } |
| 1113 | | |
| 1114 | | static void cfunc_rsp_lrv_scalar(void *param) |
| 1115 | | { |
| 1116 | | ((rsp_device *)param)->ccfunc_rsp_lrv_scalar(); |
| 1117 | | } |
| 1118 | | #endif |
| 1119 | | |
| 1120 | | #if USE_SIMD |
| 1121 | | // LPV |
| 1122 | | // |
| 1123 | | // 31 25 20 15 10 6 0 |
| 1124 | | // -------------------------------------------------- |
| 1125 | | // | 110010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 1126 | | // -------------------------------------------------- |
| 1127 | | // |
| 1128 | | // Loads a byte as the upper 8 bits of each element |
| 1129 | | |
| 1130 | | inline void rsp_device::ccfunc_rsp_lpv_simd() |
| 1131 | | { |
| 1132 | | UINT32 op = m_rsp_state->arg0; |
| 1133 | | int dest = (op >> 16) & 0x1f; |
| 1134 | | int base = (op >> 21) & 0x1f; |
| 1135 | | int index = (op >> 7) & 0xf; |
| 1136 | | int offset = (op & 0x7f); |
| 1137 | | if (offset & 0x40) |
| 1138 | | { |
| 1139 | | offset |= 0xffffffc0; |
| 1140 | | } |
| 1141 | | |
| 1142 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1143 | | |
| 1144 | | for (int i = 0; i < 8; i++) |
| 1145 | | { |
| 1146 | | SIMD_INSERT16(m_xv[dest], DM_READ8(ea + (((16-index) + i) & 0xf)) << 8, i); |
| 1147 | | } |
| 1148 | | } |
| 1149 | | |
| 1150 | | static void cfunc_rsp_lpv_simd(void *param) |
| 1151 | | { |
| 1152 | | ((rsp_device *)param)->ccfunc_rsp_lpv_simd(); |
| 1153 | | } |
| 1154 | | #endif |
| 1155 | | |
| 1156 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 1157 | | |
| 1158 | | inline void rsp_device::ccfunc_rsp_lpv_scalar() |
| 1159 | | { |
| 1160 | | UINT32 op = m_rsp_state->arg0; |
| 1161 | | int dest = (op >> 16) & 0x1f; |
| 1162 | | int base = (op >> 21) & 0x1f; |
| 1163 | | int index = (op >> 7) & 0xf; |
| 1164 | | int offset = (op & 0x7f); |
| 1165 | | if (offset & 0x40) |
| 1166 | | { |
| 1167 | | offset |= 0xffffffc0; |
| 1168 | | } |
| 1169 | | |
| 1170 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1171 | | |
| 1172 | | for (int i = 0; i < 8; i++) |
| 1173 | | { |
| 1174 | | W_VREG_S(dest, i) = DM_READ8(ea + (((16-index) + i) & 0xf)) << 8; |
| 1175 | | } |
| 1176 | | } |
| 1177 | | |
| 1178 | | static void cfunc_rsp_lpv_scalar(void *param) |
| 1179 | | { |
| 1180 | | ((rsp_device *)param)->ccfunc_rsp_lpv_scalar(); |
| 1181 | | } |
| 1182 | | #endif |
| 1183 | | |
| 1184 | | #if USE_SIMD |
| 1185 | | // LUV |
| 1186 | | // |
| 1187 | | // 31 25 20 15 10 6 0 |
| 1188 | | // -------------------------------------------------- |
| 1189 | | // | 110010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 1190 | | // -------------------------------------------------- |
| 1191 | | // |
| 1192 | | // Loads a byte as the bits 14-7 of each element |
| 1193 | | |
| 1194 | | inline void rsp_device::ccfunc_rsp_luv_simd() |
| 1195 | | { |
| 1196 | | UINT32 op = m_rsp_state->arg0; |
| 1197 | | int dest = (op >> 16) & 0x1f; |
| 1198 | | int base = (op >> 21) & 0x1f; |
| 1199 | | int index = (op >> 7) & 0xf; |
| 1200 | | int offset = (op & 0x7f); |
| 1201 | | if (offset & 0x40) |
| 1202 | | { |
| 1203 | | offset |= 0xffffffc0; |
| 1204 | | } |
| 1205 | | |
| 1206 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1207 | | |
| 1208 | | for (int i = 0; i < 8; i++) |
| 1209 | | { |
| 1210 | | SIMD_INSERT16(m_xv[dest], DM_READ8(ea + (((16-index) + i) & 0xf)) << 7, i); |
| 1211 | | } |
| 1212 | | } |
| 1213 | | |
| 1214 | | static void cfunc_rsp_luv_simd(void *param) |
| 1215 | | { |
| 1216 | | ((rsp_device *)param)->ccfunc_rsp_luv_simd(); |
| 1217 | | } |
| 1218 | | #endif |
| 1219 | | |
| 1220 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 1221 | | |
| 1222 | | inline void rsp_device::ccfunc_rsp_luv_scalar() |
| 1223 | | { |
| 1224 | | UINT32 op = m_rsp_state->arg0; |
| 1225 | | int dest = (op >> 16) & 0x1f; |
| 1226 | | int base = (op >> 21) & 0x1f; |
| 1227 | | int index = (op >> 7) & 0xf; |
| 1228 | | int offset = (op & 0x7f); |
| 1229 | | if (offset & 0x40) |
| 1230 | | { |
| 1231 | | offset |= 0xffffffc0; |
| 1232 | | } |
| 1233 | | |
| 1234 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 1235 | | |
| 1236 | | for (int i = 0; i < 8; i++) |
| 1237 | | { |
| 1238 | | W_VREG_S(dest, i) = DM_READ8(ea + (((16-index) + i) & 0xf)) << 7; |
| 1239 | | } |
| 1240 | | } |
| 1241 | | |
| 1242 | | static void cfunc_rsp_luv_scalar(void *param) |
| 1243 | | { |
| 1244 | | ((rsp_device *)param)->ccfunc_rsp_luv_scalar(); |
| 1245 | | } |
| 1246 | | #endif |
| 1247 | | |
| 1248 | | #if USE_SIMD |
| 1249 | | // LHV |
| 1250 | | // |
| 1251 | | // 31 25 20 15 10 6 0 |
| 1252 | | // -------------------------------------------------- |
| 1253 | | // | 110010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 1254 | | // -------------------------------------------------- |
| 1255 | | // |
| 1256 | | // Loads a byte as the bits 14-7 of each element, with 2-byte stride |
| 1257 | | |
| 1258 | | inline void rsp_device::ccfunc_rsp_lhv_simd() |
| 1259 | | { |
| 1260 | | UINT32 op = m_rsp_state->arg0; |
| 1261 | | int dest = (op >> 16) & 0x1f; |
| 1262 | | int base = (op >> 21) & 0x1f; |
| 1263 | | int index = (op >> 7) & 0xf; |
| 1264 | | int offset = (op & 0x7f); |
| 1265 | | if (offset & 0x40) |
| 1266 | | { |
| 1267 | | offset |= 0xffffffc0; |
| 1268 | | } |
| 1269 | | |
| 1270 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1271 | | |
| 1272 | | for (int i = 0; i < 8; i++) |
| 1273 | | { |
| 1274 | | SIMD_INSERT16(m_xv[dest], DM_READ8(ea + (((16-index) + (i<<1)) & 0xf)) << 7, i); |
| 1275 | | } |
| 1276 | | } |
| 1277 | | |
| 1278 | | static void cfunc_rsp_lhv_simd(void *param) |
| 1279 | | { |
| 1280 | | ((rsp_device *)param)->ccfunc_rsp_lhv_simd(); |
| 1281 | | } |
| 1282 | | #endif |
| 1283 | | |
| 1284 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 1285 | | |
| 1286 | | inline void rsp_device::ccfunc_rsp_lhv_scalar() |
| 1287 | | { |
| 1288 | | UINT32 op = m_rsp_state->arg0; |
| 1289 | | int dest = (op >> 16) & 0x1f; |
| 1290 | | int base = (op >> 21) & 0x1f; |
| 1291 | | int index = (op >> 7) & 0xf; |
| 1292 | | int offset = (op & 0x7f); |
| 1293 | | if (offset & 0x40) |
| 1294 | | { |
| 1295 | | offset |= 0xffffffc0; |
| 1296 | | } |
| 1297 | | |
| 1298 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1299 | | |
| 1300 | | for (int i = 0; i < 8; i++) |
| 1301 | | { |
| 1302 | | W_VREG_S(dest, i) = DM_READ8(ea + (((16-index) + (i<<1)) & 0xf)) << 7; |
| 1303 | | } |
| 1304 | | } |
| 1305 | | |
| 1306 | | static void cfunc_rsp_lhv_scalar(void *param) |
| 1307 | | { |
| 1308 | | ((rsp_device *)param)->ccfunc_rsp_lhv_scalar(); |
| 1309 | | } |
| 1310 | | #endif |
| 1311 | | |
| 1312 | | #if USE_SIMD |
| 1313 | | // LFV |
| 1314 | | // 31 25 20 15 10 6 0 |
| 1315 | | // -------------------------------------------------- |
| 1316 | | // | 110010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 1317 | | // -------------------------------------------------- |
| 1318 | | // |
| 1319 | | // Loads a byte as the bits 14-7 of upper or lower quad, with 4-byte stride |
| 1320 | | |
| 1321 | | inline void rsp_device::ccfunc_rsp_lfv_simd() |
| 1322 | | { |
| 1323 | | UINT32 op = m_rsp_state->arg0; |
| 1324 | | int dest = (op >> 16) & 0x1f; |
| 1325 | | int base = (op >> 21) & 0x1f; |
| 1326 | | int index = (op >> 7) & 0xf; |
| 1327 | | int offset = (op & 0x7f); |
| 1328 | | if (offset & 0x40) |
| 1329 | | { |
| 1330 | | offset |= 0xffffffc0; |
| 1331 | | } |
| 1332 | | |
| 1333 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1334 | | |
| 1335 | | // not sure what happens if 16-byte boundary is crossed... |
| 1336 | | |
| 1337 | | int end = (index >> 1) + 4; |
| 1338 | | |
| 1339 | | for (int i = index >> 1; i < end; i++) |
| 1340 | | { |
| 1341 | | SIMD_INSERT16(m_xv[dest], DM_READ8(ea) << 7, i); |
| 1342 | | ea += 4; |
| 1343 | | } |
| 1344 | | } |
| 1345 | | |
| 1346 | | static void cfunc_rsp_lfv_simd(void *param) |
| 1347 | | { |
| 1348 | | ((rsp_device *)param)->ccfunc_rsp_lfv_simd(); |
| 1349 | | } |
| 1350 | | #endif |
| 1351 | | |
| 1352 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 1353 | | |
| 1354 | | inline void rsp_device::ccfunc_rsp_lfv_scalar() |
| 1355 | | { |
| 1356 | | UINT32 op = m_rsp_state->arg0; |
| 1357 | | int dest = (op >> 16) & 0x1f; |
| 1358 | | int base = (op >> 21) & 0x1f; |
| 1359 | | int index = (op >> 7) & 0xf; |
| 1360 | | int offset = (op & 0x7f); |
| 1361 | | if (offset & 0x40) |
| 1362 | | { |
| 1363 | | offset |= 0xffffffc0; |
| 1364 | | } |
| 1365 | | |
| 1366 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1367 | | |
| 1368 | | // not sure what happens if 16-byte boundary is crossed... |
| 1369 | | |
| 1370 | | int end = (index >> 1) + 4; |
| 1371 | | |
| 1372 | | for (int i = index >> 1; i < end; i++) |
| 1373 | | { |
| 1374 | | W_VREG_S(dest, i) = DM_READ8(ea) << 7; |
| 1375 | | ea += 4; |
| 1376 | | } |
| 1377 | | } |
| 1378 | | |
| 1379 | | static void cfunc_rsp_lfv_scalar(void *param) |
| 1380 | | { |
| 1381 | | ((rsp_device *)param)->ccfunc_rsp_lfv_scalar(); |
| 1382 | | } |
| 1383 | | #endif |
| 1384 | | |
| 1385 | | #if USE_SIMD |
| 1386 | | // LWV |
| 1387 | | // |
| 1388 | | // 31 25 20 15 10 6 0 |
| 1389 | | // -------------------------------------------------- |
| 1390 | | // | 110010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 1391 | | // -------------------------------------------------- |
| 1392 | | // |
| 1393 | | // Loads the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 1394 | | // after byte index 15 |
| 1395 | | |
| 1396 | | inline void rsp_device::ccfunc_rsp_lwv_simd() |
| 1397 | | { |
| 1398 | | UINT32 op = m_rsp_state->arg0; |
| 1399 | | int dest = (op >> 16) & 0x1f; |
| 1400 | | int base = (op >> 21) & 0x1f; |
| 1401 | | int index = (op >> 7) & 0xf; |
| 1402 | | int offset = (op & 0x7f); |
| 1403 | | if (offset & 0x40) |
| 1404 | | { |
| 1405 | | offset |= 0xffffffc0; |
| 1406 | | } |
| 1407 | | |
| 1408 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1409 | | int end = (16 - index) + 16; |
| 1410 | | |
| 1411 | | UINT8 val[16]; |
| 1412 | | for (int i = (16 - index); i < end; i++) |
| 1413 | | { |
| 1414 | | val[i & 0xf] = DM_READ8(ea); |
| 1415 | | ea += 4; |
| 1416 | | } |
| 1417 | | |
| 1418 | | m_xv[dest] = _mm_set_epi8(val[15], val[14], val[13], val[12], val[11], val[10], val[ 9], val[ 8], |
| 1419 | | val[ 7], val[ 6], val[ 5], val[ 4], val[ 3], val[ 2], val[ 1], val[ 0]); |
| 1420 | | } |
| 1421 | | |
| 1422 | | static void cfunc_rsp_lwv_simd(void *param) |
| 1423 | | { |
| 1424 | | ((rsp_device *)param)->ccfunc_rsp_lwv_simd(); |
| 1425 | | } |
| 1426 | | #endif |
| 1427 | | |
| 1428 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 1429 | | |
| 1430 | | inline void rsp_device::ccfunc_rsp_lwv_scalar() |
| 1431 | | { |
| 1432 | | UINT32 op = m_rsp_state->arg0; |
| 1433 | | int dest = (op >> 16) & 0x1f; |
| 1434 | | int base = (op >> 21) & 0x1f; |
| 1435 | | int index = (op >> 7) & 0xf; |
| 1436 | | int offset = (op & 0x7f); |
| 1437 | | if (offset & 0x40) |
| 1438 | | { |
| 1439 | | offset |= 0xffffffc0; |
| 1440 | | } |
| 1441 | | |
| 1442 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1443 | | int end = (16 - index) + 16; |
| 1444 | | |
| 1445 | | for (int i = (16 - index); i < end; i++) |
| 1446 | | { |
| 1447 | | VREG_B(dest, i & 0xf) = DM_READ8(ea); |
| 1448 | | ea += 4; |
| 1449 | | } |
| 1450 | | } |
| 1451 | | |
| 1452 | | static void cfunc_rsp_lwv_scalar(void *param) |
| 1453 | | { |
| 1454 | | ((rsp_device *)param)->ccfunc_rsp_lwv_scalar(); |
| 1455 | | } |
| 1456 | | #endif |
| 1457 | | |
| 1458 | | #if USE_SIMD |
| 1459 | | // LTV |
| 1460 | | // |
| 1461 | | // 31 25 20 15 10 6 0 |
| 1462 | | // -------------------------------------------------- |
| 1463 | | // | 110010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 1464 | | // -------------------------------------------------- |
| 1465 | | // |
| 1466 | | // Loads one element to maximum of 8 vectors, while incrementing element index |
| 1467 | | |
| 1468 | | inline void rsp_device::ccfunc_rsp_ltv_simd() |
| 1469 | | { |
| 1470 | | UINT32 op = m_rsp_state->arg0; |
| 1471 | | int dest = (op >> 16) & 0x1f; |
| 1472 | | int base = (op >> 21) & 0x1f; |
| 1473 | | int index = (op >> 7) & 0xf; |
| 1474 | | int offset = (op & 0x7f); |
| 1475 | | |
| 1476 | | // FIXME: has a small problem with odd indices |
| 1477 | | |
| 1478 | | int vs = dest; |
| 1479 | | int ve = dest + 8; |
| 1480 | | if (ve > 32) |
| 1481 | | { |
| 1482 | | ve = 32; |
| 1483 | | } |
| 1484 | | |
| 1485 | | int element = 7 - (index >> 1); |
| 1486 | | |
| 1487 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1488 | | |
| 1489 | | ea = ((ea + 8) & ~0xf) + (index & 1); |
| 1490 | | for (int i = vs; i < ve; i++) |
| 1491 | | { |
| 1492 | | element = (8 - (index >> 1) + (i - vs)) << 1; |
| 1493 | | UINT16 value = (DM_READ8(ea) << 8) | DM_READ8(ea + 1); |
| 1494 | | SIMD_INSERT16(m_xv[i], value, (element >> 1)); |
| 1495 | | ea += 2; |
| 1496 | | } |
| 1497 | | } |
| 1498 | | |
| 1499 | | static void cfunc_rsp_ltv_simd(void *param) |
| 1500 | | { |
| 1501 | | ((rsp_device *)param)->ccfunc_rsp_ltv_simd(); |
| 1502 | | } |
| 1503 | | #endif |
| 1504 | | |
| 1505 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 1506 | | |
| 1507 | | inline void rsp_device::ccfunc_rsp_ltv_scalar() |
| 1508 | | { |
| 1509 | | UINT32 op = m_rsp_state->arg0; |
| 1510 | | int dest = (op >> 16) & 0x1f; |
| 1511 | | int base = (op >> 21) & 0x1f; |
| 1512 | | int index = (op >> 7) & 0xf; |
| 1513 | | int offset = (op & 0x7f); |
| 1514 | | |
| 1515 | | // FIXME: has a small problem with odd indices |
| 1516 | | |
| 1517 | | int vs = dest; |
| 1518 | | int ve = dest + 8; |
| 1519 | | if (ve > 32) |
| 1520 | | { |
| 1521 | | ve = 32; |
| 1522 | | } |
| 1523 | | |
| 1524 | | int element = 7 - (index >> 1); |
| 1525 | | |
| 1526 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 1527 | | |
| 1528 | | ea = ((ea + 8) & ~0xf) + (index & 1); |
| 1529 | | for (int i = vs; i < ve; i++) |
| 1530 | | { |
| 1531 | | element = (8 - (index >> 1) + (i - vs)) << 1; |
| 1532 | | VREG_B(i, (element & 0xf)) = DM_READ8(ea); |
| 1533 | | VREG_B(i, ((element + 1) & 0xf)) = DM_READ8(ea + 1); |
| 1534 | | ea += 2; |
| 1535 | | } |
| 1536 | | } |
| 1537 | | |
| 1538 | | static void cfunc_rsp_ltv_scalar(void *param) |
| 1539 | | { |
| 1540 | | ((rsp_device *)param)->ccfunc_rsp_ltv_scalar(); |
| 1541 | | } |
| 1542 | | #endif |
| 1543 | | |
| 1544 | | #if USE_SIMD && SIMUL_SIMD |
| 1545 | | inline void rsp_device::ccfunc_backup_regs() |
| 1546 | | { |
| 1547 | | memcpy(m_old_dmem, m_dmem8, sizeof(m_old_dmem)); |
| 1548 | | memcpy(m_old_r, m_r, sizeof(m_r)); |
| 1549 | | |
| 1550 | | m_simd_reciprocal_res = m_reciprocal_res; |
| 1551 | | m_simd_reciprocal_high = m_reciprocal_high; |
| 1552 | | m_simd_dp_allowed = m_dp_allowed; |
| 1553 | | |
| 1554 | | m_reciprocal_res = m_old_reciprocal_res; |
| 1555 | | m_reciprocal_high = m_old_reciprocal_high; |
| 1556 | | m_dp_allowed = m_old_dp_allowed; |
| 1557 | | } |
| 1558 | | |
| 1559 | | static void cfunc_backup_regs(void *param) |
| 1560 | | { |
| 1561 | | ((rsp_device *)param)->ccfunc_backup_regs(); |
| 1562 | | } |
| 1563 | | |
| 1564 | | inline void rsp_device::ccfunc_restore_regs() |
| 1565 | | { |
| 1566 | | memcpy(m_scalar_r, m_r, sizeof(m_r)); |
| 1567 | | memcpy(m_r, m_old_r, sizeof(m_r)); |
| 1568 | | memcpy(m_scalar_dmem, m_dmem8, sizeof(m_scalar_dmem)); |
| 1569 | | memcpy(m_dmem8, m_old_dmem, sizeof(m_old_dmem)); |
| 1570 | | |
| 1571 | | m_scalar_reciprocal_res = m_reciprocal_res; |
| 1572 | | m_scalar_reciprocal_high = m_reciprocal_high; |
| 1573 | | m_scalar_dp_allowed = m_dp_allowed; |
| 1574 | | |
| 1575 | | m_reciprocal_res = m_simd_reciprocal_res; |
| 1576 | | m_reciprocal_high = m_simd_reciprocal_high; |
| 1577 | | m_dp_allowed = m_simd_dp_allowed; |
| 1578 | | } |
| 1579 | | |
| 1580 | | static void cfunc_restore_regs(void *param) |
| 1581 | | { |
| 1582 | | ((rsp_device *)param)->ccfunc_restore_regs(); |
| 1583 | | } |
| 1584 | | |
| 1585 | | inline void rsp_device::ccfunc_verify_regs() |
| 1586 | | { |
| 1587 | | int op = m_rsp_state->arg0; |
| 1588 | | if (VEC_ACCUM_H(0) != ACCUM_H(0)) fatalerror("ACCUM_H element 0 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_H(0), ACCUM_H(0), op); |
| 1589 | | if (VEC_ACCUM_H(1) != ACCUM_H(1)) fatalerror("ACCUM_H element 1 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_H(1), ACCUM_H(1), op); |
| 1590 | | if (VEC_ACCUM_H(2) != ACCUM_H(2)) fatalerror("ACCUM_H element 2 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_H(2), ACCUM_H(2), op); |
| 1591 | | if (VEC_ACCUM_H(3) != ACCUM_H(3)) fatalerror("ACCUM_H element 3 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_H(3), ACCUM_H(3), op); |
| 1592 | | if (VEC_ACCUM_H(4) != ACCUM_H(4)) fatalerror("ACCUM_H element 4 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_H(4), ACCUM_H(4), op); |
| 1593 | | if (VEC_ACCUM_H(5) != ACCUM_H(5)) fatalerror("ACCUM_H element 5 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_H(5), ACCUM_H(5), op); |
| 1594 | | if (VEC_ACCUM_H(6) != ACCUM_H(6)) fatalerror("ACCUM_H element 6 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_H(6), ACCUM_H(6), op); |
| 1595 | | if (VEC_ACCUM_H(7) != ACCUM_H(7)) fatalerror("ACCUM_H element 7 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_H(7), ACCUM_H(7), op); |
| 1596 | | if (VEC_ACCUM_M(0) != ACCUM_M(0)) fatalerror("ACCUM_M element 0 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_M(0), ACCUM_M(0), op); |
| 1597 | | if (VEC_ACCUM_M(1) != ACCUM_M(1)) fatalerror("ACCUM_M element 1 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_M(1), ACCUM_M(1), op); |
| 1598 | | if (VEC_ACCUM_M(2) != ACCUM_M(2)) fatalerror("ACCUM_M element 2 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_M(2), ACCUM_M(2), op); |
| 1599 | | if (VEC_ACCUM_M(3) != ACCUM_M(3)) fatalerror("ACCUM_M element 3 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_M(3), ACCUM_M(3), op); |
| 1600 | | if (VEC_ACCUM_M(4) != ACCUM_M(4)) fatalerror("ACCUM_M element 4 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_M(4), ACCUM_M(4), op); |
| 1601 | | if (VEC_ACCUM_M(5) != ACCUM_M(5)) fatalerror("ACCUM_M element 5 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_M(5), ACCUM_M(5), op); |
| 1602 | | if (VEC_ACCUM_M(6) != ACCUM_M(6)) fatalerror("ACCUM_M element 6 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_M(6), ACCUM_M(6), op); |
| 1603 | | if (VEC_ACCUM_M(7) != ACCUM_M(7)) fatalerror("ACCUM_M element 7 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_M(7), ACCUM_M(7), op); |
| 1604 | | if (VEC_ACCUM_L(0) != ACCUM_L(0)) fatalerror("ACCUM_L element 0 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_L(0), ACCUM_L(0), op); |
| 1605 | | if (VEC_ACCUM_L(1) != ACCUM_L(1)) fatalerror("ACCUM_L element 1 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_L(1), ACCUM_L(1), op); |
| 1606 | | if (VEC_ACCUM_L(2) != ACCUM_L(2)) fatalerror("ACCUM_L element 2 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_L(2), ACCUM_L(2), op); |
| 1607 | | if (VEC_ACCUM_L(3) != ACCUM_L(3)) fatalerror("ACCUM_L element 3 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_L(3), ACCUM_L(3), op); |
| 1608 | | if (VEC_ACCUM_L(4) != ACCUM_L(4)) fatalerror("ACCUM_L element 4 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_L(4), ACCUM_L(4), op); |
| 1609 | | if (VEC_ACCUM_L(5) != ACCUM_L(5)) fatalerror("ACCUM_L element 5 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_L(5), ACCUM_L(5), op); |
| 1610 | | if (VEC_ACCUM_L(6) != ACCUM_L(6)) fatalerror("ACCUM_L element 6 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_L(6), ACCUM_L(6), op); |
| 1611 | | if (VEC_ACCUM_L(7) != ACCUM_L(7)) fatalerror("ACCUM_L element 7 mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", VEC_ACCUM_L(7), ACCUM_L(7), op); |
| 1612 | | for (int i = 0; i < 32; i++) |
| 1613 | | { |
| 1614 | | if (m_rsp_state->r[i] != m_scalar_r[i]) fatalerror("r[%d] mismatch (SIMD %08x vs. Scalar %08x) after op: %08x\n", i, m_rsp_state->r[i], m_scalar_r[i], op); |
| 1615 | | for (int el = 0; el < 8; el++) |
| 1616 | | { |
| 1617 | | UINT16 out; |
| 1618 | | SIMD_EXTRACT16(m_xv[i], out, el); |
| 1619 | | if ((UINT16)VREG_S(i, el) != out) fatalerror("Vector %d element %d mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", i, el, out, (UINT16)VREG_S(i, el), op); |
| 1620 | | } |
| 1621 | | } |
| 1622 | | for (int i = 0; i < 4096; i++) |
| 1623 | | { |
| 1624 | | if (m_dmem8[i] != m_scalar_dmem[i]) fatalerror("dmem[%d] mismatch (SIMD %02x vs. Scalar %02x) after op: %08x\n", i, m_dmem8[i], m_scalar_dmem[i], op); |
| 1625 | | } |
| 1626 | | for (int i = 0; i < 5; i++) |
| 1627 | | { |
| 1628 | | for (int el = 0; el < 8; el++) |
| 1629 | | { |
| 1630 | | UINT16 out; |
| 1631 | | SIMD_EXTRACT16(m_xvflag[i], out, el); |
| 1632 | | if (m_vflag[i][el] != out) fatalerror("flag[%d][%d] mismatch (SIMD %04x vs. Scalar %04x) after op: %08x\n", i, el, out, m_vflag[i][el], op); |
| 1633 | | } |
| 1634 | | } |
| 1635 | | } |
| 1636 | | |
| 1637 | | static void cfunc_verify_regs(void *param) |
| 1638 | | { |
| 1639 | | ((rsp_device *)param)->ccfunc_verify_regs(); |
| 1640 | | } |
| 1641 | | #endif |
| 1642 | | |
| 1643 | | #if USE_SIMD |
| 1644 | | int rsp_device::generate_lwc2(drcuml_block *block, compiler_state *compiler, const opcode_desc *desc) |
| 1645 | | { |
| 1646 | | //int loopdest; |
| 1647 | | UINT32 op = desc->opptr.l[0]; |
| 1648 | | //int dest = (op >> 16) & 0x1f; |
| 1649 | | //int base = (op >> 21) & 0x1f; |
| 1650 | | //int index = (op >> 7) & 0xf; |
| 1651 | | int offset = (op & 0x7f); |
| 1652 | | //int skip; |
| 1653 | | if (offset & 0x40) |
| 1654 | | { |
| 1655 | | offset |= 0xffffffc0; |
| 1656 | | } |
| 1657 | | |
| 1658 | | switch ((op >> 11) & 0x1f) |
| 1659 | | { |
| 1660 | | case 0x00: /* LBV */ |
| 1661 | | //UML_ADD(block, I0, R32(RSREG), offset); |
| 1662 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1663 | | UML_CALLC(block, cfunc_rsp_lbv_simd, this); |
| 1664 | | #if SIMUL_SIMD |
| 1665 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 1666 | | UML_CALLC(block, cfunc_rsp_lbv_scalar, this); |
| 1667 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 1668 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 1669 | | #endif |
| 1670 | | return TRUE; |
| 1671 | | case 0x01: /* LSV */ |
| 1672 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1673 | | UML_CALLC(block, cfunc_rsp_lsv_simd, this); |
| 1674 | | #if SIMUL_SIMD |
| 1675 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 1676 | | UML_CALLC(block, cfunc_rsp_lsv_scalar, this); |
| 1677 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 1678 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 1679 | | #endif |
| 1680 | | return TRUE; |
| 1681 | | case 0x02: /* LLV */ |
| 1682 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1683 | | UML_CALLC(block, cfunc_rsp_llv_simd, this); |
| 1684 | | #if SIMUL_SIMD |
| 1685 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 1686 | | UML_CALLC(block, cfunc_rsp_llv_scalar, this); |
| 1687 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 1688 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 1689 | | #endif |
| 1690 | | return TRUE; |
| 1691 | | case 0x03: /* LDV */ |
| 1692 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1693 | | UML_CALLC(block, cfunc_rsp_ldv_simd, this); |
| 1694 | | #if SIMUL_SIMD |
| 1695 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 1696 | | UML_CALLC(block, cfunc_rsp_ldv_scalar, this); |
| 1697 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 1698 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 1699 | | #endif |
| 1700 | | return TRUE; |
| 1701 | | case 0x04: /* LQV */ |
| 1702 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1703 | | UML_CALLC(block, cfunc_rsp_lqv_simd, this); |
| 1704 | | #if SIMUL_SIMD |
| 1705 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 1706 | | UML_CALLC(block, cfunc_rsp_lqv_scalar, this); |
| 1707 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 1708 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 1709 | | #endif |
| 1710 | | return TRUE; |
| 1711 | | case 0x05: /* LRV */ |
| 1712 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1713 | | UML_CALLC(block, cfunc_rsp_lrv_simd, this); |
| 1714 | | #if SIMUL_SIMD |
| 1715 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 1716 | | UML_CALLC(block, cfunc_rsp_lrv_scalar, this); |
| 1717 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 1718 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 1719 | | #endif |
| 1720 | | return TRUE; |
| 1721 | | case 0x06: /* LPV */ |
| 1722 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1723 | | UML_CALLC(block, cfunc_rsp_lpv_simd, this); |
| 1724 | | #if SIMUL_SIMD |
| 1725 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 1726 | | UML_CALLC(block, cfunc_rsp_lpv_scalar, this); |
| 1727 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 1728 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 1729 | | #endif |
| 1730 | | return TRUE; |
| 1731 | | case 0x07: /* LUV */ |
| 1732 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1733 | | UML_CALLC(block, cfunc_rsp_luv_simd, this); |
| 1734 | | #if SIMUL_SIMD |
| 1735 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 1736 | | UML_CALLC(block, cfunc_rsp_luv_scalar, this); |
| 1737 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 1738 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 1739 | | #endif |
| 1740 | | return TRUE; |
| 1741 | | case 0x08: /* LHV */ |
| 1742 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1743 | | UML_CALLC(block, cfunc_rsp_lhv_simd, this); |
| 1744 | | #if SIMUL_SIMD |
| 1745 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 1746 | | UML_CALLC(block, cfunc_rsp_lhv_scalar, this); |
| 1747 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 1748 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 1749 | | #endif |
| 1750 | | return TRUE; |
| 1751 | | case 0x09: /* LFV */ |
| 1752 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1753 | | UML_CALLC(block, cfunc_rsp_lfv_simd, this); |
| 1754 | | #if SIMUL_SIMD |
| 1755 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 1756 | | UML_CALLC(block, cfunc_rsp_lfv_scalar, this); |
| 1757 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 1758 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 1759 | | #endif |
| 1760 | | return TRUE; |
| 1761 | | case 0x0a: /* LWV */ |
| 1762 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1763 | | UML_CALLC(block, cfunc_rsp_lwv_simd, this); |
| 1764 | | #if SIMUL_SIMD |
| 1765 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 1766 | | UML_CALLC(block, cfunc_rsp_lwv_scalar, this); |
| 1767 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 1768 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 1769 | | #endif |
| 1770 | | return TRUE; |
| 1771 | | case 0x0b: /* LTV */ |
| 1772 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1773 | | UML_CALLC(block, cfunc_rsp_ltv_simd, this); |
| 1774 | | #if SIMUL_SIMD |
| 1775 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 1776 | | UML_CALLC(block, cfunc_rsp_ltv_scalar, this); |
| 1777 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 1778 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 1779 | | #endif |
| 1780 | | return TRUE; |
| 1781 | | |
| 1782 | | default: |
| 1783 | | return FALSE; |
| 1784 | | } |
| 1785 | | } |
| 1786 | | |
| 1787 | | #else |
| 1788 | | |
| 1789 | | int rsp_device::generate_lwc2(drcuml_block *block, compiler_state *compiler, const opcode_desc *desc) |
| 1790 | | { |
| 1791 | | //int loopdest; |
| 1792 | | UINT32 op = desc->opptr.l[0]; |
| 1793 | | //int dest = (op >> 16) & 0x1f; |
| 1794 | | //int base = (op >> 21) & 0x1f; |
| 1795 | | //int index = (op >> 7) & 0xf; |
| 1796 | | int offset = (op & 0x7f); |
| 1797 | | //int skip; |
| 1798 | | if (offset & 0x40) |
| 1799 | | { |
| 1800 | | offset |= 0xffffffc0; |
| 1801 | | } |
| 1802 | | |
| 1803 | | switch ((op >> 11) & 0x1f) |
| 1804 | | { |
| 1805 | | case 0x00: /* LBV */ |
| 1806 | | //UML_ADD(block, I0, R32(RSREG), offset); |
| 1807 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1808 | | UML_CALLC(block, cfunc_rsp_lbv_scalar, this); |
| 1809 | | return TRUE; |
| 1810 | | case 0x01: /* LSV */ |
| 1811 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1812 | | UML_CALLC(block, cfunc_rsp_lsv_scalar, this); |
| 1813 | | return TRUE; |
| 1814 | | case 0x02: /* LLV */ |
| 1815 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1816 | | UML_CALLC(block, cfunc_rsp_llv_scalar, this); |
| 1817 | | return TRUE; |
| 1818 | | case 0x03: /* LDV */ |
| 1819 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1820 | | UML_CALLC(block, cfunc_rsp_ldv_scalar, this); |
| 1821 | | return TRUE; |
| 1822 | | case 0x04: /* LQV */ |
| 1823 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1824 | | UML_CALLC(block, cfunc_rsp_lqv_scalar, this); |
| 1825 | | return TRUE; |
| 1826 | | case 0x05: /* LRV */ |
| 1827 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1828 | | UML_CALLC(block, cfunc_rsp_lrv_scalar, this); |
| 1829 | | return TRUE; |
| 1830 | | case 0x06: /* LPV */ |
| 1831 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1832 | | UML_CALLC(block, cfunc_rsp_lpv_scalar, this); |
| 1833 | | return TRUE; |
| 1834 | | case 0x07: /* LUV */ |
| 1835 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1836 | | UML_CALLC(block, cfunc_rsp_luv_scalar, this); |
| 1837 | | return TRUE; |
| 1838 | | case 0x08: /* LHV */ |
| 1839 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1840 | | UML_CALLC(block, cfunc_rsp_lhv_scalar, this); |
| 1841 | | return TRUE; |
| 1842 | | case 0x09: /* LFV */ |
| 1843 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1844 | | UML_CALLC(block, cfunc_rsp_lfv_scalar, this); |
| 1845 | | return TRUE; |
| 1846 | | case 0x0a: /* LWV */ |
| 1847 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1848 | | UML_CALLC(block, cfunc_rsp_lwv_scalar, this); |
| 1849 | | return TRUE; |
| 1850 | | case 0x0b: /* LTV */ |
| 1851 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 1852 | | UML_CALLC(block, cfunc_rsp_ltv_scalar, this); |
| 1853 | | return TRUE; |
| 1854 | | |
| 1855 | | default: |
| 1856 | | return FALSE; |
| 1857 | | } |
| 1858 | | } |
| 1859 | | #endif |
| 1860 | | |
| 1861 | | #if USE_SIMD |
| 1862 | | // SBV |
| 1863 | | // |
| 1864 | | // 31 25 20 15 10 6 0 |
| 1865 | | // -------------------------------------------------- |
| 1866 | | // | 111010 | BBBBB | TTTTT | 00000 | IIII | Offset | |
| 1867 | | // -------------------------------------------------- |
| 1868 | | // |
| 1869 | | // Stores 1 byte from vector byte index |
| 1870 | | |
| 1871 | | inline void rsp_device::ccfunc_rsp_sbv_simd() |
| 1872 | | { |
| 1873 | | UINT32 op = m_rsp_state->arg0; |
| 1874 | | int dest = (op >> 16) & 0x1f; |
| 1875 | | int base = (op >> 21) & 0x1f; |
| 1876 | | int index = (op >> 7) & 0xf; |
| 1877 | | int offset = (op & 0x7f); |
| 1878 | | if (offset & 0x40) |
| 1879 | | { |
| 1880 | | offset |= 0xffffffc0; |
| 1881 | | } |
| 1882 | | |
| 1883 | | UINT32 ea = (base) ? m_rsp_state->r[base] + offset : offset; |
| 1884 | | UINT16 value; |
| 1885 | | SIMD_EXTRACT16(m_xv[dest], value, (index >> 1)); |
| 1886 | | value >>= (1-(index & 1)) * 8; |
| 1887 | | DM_WRITE8(ea, (UINT8)value); |
| 1888 | | } |
| 1889 | | |
| 1890 | | static void cfunc_rsp_sbv_simd(void *param) |
| 1891 | | { |
| 1892 | | ((rsp_device *)param)->ccfunc_rsp_sbv_simd(); |
| 1893 | | } |
| 1894 | | #endif |
| 1895 | | |
| 1896 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 1897 | | |
| 1898 | | inline void rsp_device::ccfunc_rsp_sbv_scalar() |
| 1899 | | { |
| 1900 | | UINT32 op = m_rsp_state->arg0; |
| 1901 | | int dest = (op >> 16) & 0x1f; |
| 1902 | | int base = (op >> 21) & 0x1f; |
| 1903 | | int index = (op >> 7) & 0xf; |
| 1904 | | int offset = (op & 0x7f); |
| 1905 | | if (offset & 0x40) |
| 1906 | | { |
| 1907 | | offset |= 0xffffffc0; |
| 1908 | | } |
| 1909 | | |
| 1910 | | UINT32 ea = (base) ? m_rsp_state->r[base] + offset : offset; |
| 1911 | | DM_WRITE8(ea, VREG_B(dest, index)); |
| 1912 | | } |
| 1913 | | |
| 1914 | | static void cfunc_rsp_sbv_scalar(void *param) |
| 1915 | | { |
| 1916 | | ((rsp_device *)param)->ccfunc_rsp_sbv_scalar(); |
| 1917 | | } |
| 1918 | | #endif |
| 1919 | | |
| 1920 | | #if USE_SIMD |
| 1921 | | // SSV |
| 1922 | | // |
| 1923 | | // 31 25 20 15 10 6 0 |
| 1924 | | // -------------------------------------------------- |
| 1925 | | // | 111010 | BBBBB | TTTTT | 00001 | IIII | Offset | |
| 1926 | | // -------------------------------------------------- |
| 1927 | | // |
| 1928 | | // Stores 2 bytes starting from vector byte index |
| 1929 | | |
| 1930 | | inline void rsp_device::ccfunc_rsp_ssv_simd() |
| 1931 | | { |
| 1932 | | UINT32 op = m_rsp_state->arg0; |
| 1933 | | int dest = (op >> 16) & 0x1f; |
| 1934 | | int base = (op >> 21) & 0x1f; |
| 1935 | | int index = (op >> 7) & 0xf; |
| 1936 | | int offset = (op & 0x7f); |
| 1937 | | if (offset & 0x40) |
| 1938 | | { |
| 1939 | | offset |= 0xffffffc0; |
| 1940 | | } |
| 1941 | | |
| 1942 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 1943 | | |
| 1944 | | int end = index + 2; |
| 1945 | | for (int i = index; i < end; i++) |
| 1946 | | { |
| 1947 | | UINT16 value; |
| 1948 | | SIMD_EXTRACT16(m_xv[dest], value, (i >> 1)); |
| 1949 | | value >>= (1 - (i & 1)) * 8; |
| 1950 | | DM_WRITE8(ea, (UINT8)value); |
| 1951 | | ea++; |
| 1952 | | } |
| 1953 | | } |
| 1954 | | |
| 1955 | | static void cfunc_rsp_ssv_simd(void *param) |
| 1956 | | { |
| 1957 | | ((rsp_device *)param)->ccfunc_rsp_ssv_simd(); |
| 1958 | | } |
| 1959 | | #endif |
| 1960 | | |
| 1961 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 1962 | | |
| 1963 | | inline void rsp_device::ccfunc_rsp_ssv_scalar() |
| 1964 | | { |
| 1965 | | UINT32 op = m_rsp_state->arg0; |
| 1966 | | int dest = (op >> 16) & 0x1f; |
| 1967 | | int base = (op >> 21) & 0x1f; |
| 1968 | | int index = (op >> 7) & 0xf; |
| 1969 | | int offset = (op & 0x7f); |
| 1970 | | if (offset & 0x40) |
| 1971 | | { |
| 1972 | | offset |= 0xffffffc0; |
| 1973 | | } |
| 1974 | | |
| 1975 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 2) : (offset * 2); |
| 1976 | | |
| 1977 | | int end = index + 2; |
| 1978 | | for (int i = index; i < end; i++) |
| 1979 | | { |
| 1980 | | DM_WRITE8(ea, VREG_B(dest, i)); |
| 1981 | | ea++; |
| 1982 | | } |
| 1983 | | } |
| 1984 | | |
| 1985 | | static void cfunc_rsp_ssv_scalar(void *param) |
| 1986 | | { |
| 1987 | | ((rsp_device *)param)->ccfunc_rsp_ssv_scalar(); |
| 1988 | | } |
| 1989 | | #endif |
| 1990 | | |
| 1991 | | #if USE_SIMD |
| 1992 | | // SLV |
| 1993 | | // |
| 1994 | | // 31 25 20 15 10 6 0 |
| 1995 | | // -------------------------------------------------- |
| 1996 | | // | 111010 | BBBBB | TTTTT | 00010 | IIII | Offset | |
| 1997 | | // -------------------------------------------------- |
| 1998 | | // |
| 1999 | | // Stores 4 bytes starting from vector byte index |
| 2000 | | |
| 2001 | | inline void rsp_device::ccfunc_rsp_slv_simd() |
| 2002 | | { |
| 2003 | | UINT32 op = m_rsp_state->arg0; |
| 2004 | | int dest = (op >> 16) & 0x1f; |
| 2005 | | int base = (op >> 21) & 0x1f; |
| 2006 | | int index = (op >> 7) & 0xf; |
| 2007 | | int offset = (op & 0x7f); |
| 2008 | | if (offset & 0x40) |
| 2009 | | { |
| 2010 | | offset |= 0xffffffc0; |
| 2011 | | } |
| 2012 | | |
| 2013 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 2014 | | |
| 2015 | | int end = index + 4; |
| 2016 | | for (int i = index; i < end; i++) |
| 2017 | | { |
| 2018 | | UINT16 value; |
| 2019 | | SIMD_EXTRACT16(m_xv[dest], value, (i >> 1)); |
| 2020 | | value >>= (1 - (i & 1)) * 8; |
| 2021 | | DM_WRITE8(ea, (UINT8)value); |
| 2022 | | ea++; |
| 2023 | | } |
| 2024 | | } |
| 2025 | | |
| 2026 | | static void cfunc_rsp_slv_simd(void *param) |
| 2027 | | { |
| 2028 | | ((rsp_device *)param)->ccfunc_rsp_slv_simd(); |
| 2029 | | } |
| 2030 | | #endif |
| 2031 | | |
| 2032 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 2033 | | |
| 2034 | | inline void rsp_device::ccfunc_rsp_slv_scalar() |
| 2035 | | { |
| 2036 | | UINT32 op = m_rsp_state->arg0; |
| 2037 | | int dest = (op >> 16) & 0x1f; |
| 2038 | | int base = (op >> 21) & 0x1f; |
| 2039 | | int index = (op >> 7) & 0xf; |
| 2040 | | int offset = (op & 0x7f); |
| 2041 | | if (offset & 0x40) |
| 2042 | | { |
| 2043 | | offset |= 0xffffffc0; |
| 2044 | | } |
| 2045 | | |
| 2046 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 4) : (offset * 4); |
| 2047 | | |
| 2048 | | int end = index + 4; |
| 2049 | | for (int i = index; i < end; i++) |
| 2050 | | { |
| 2051 | | DM_WRITE8(ea, VREG_B(dest, i)); |
| 2052 | | ea++; |
| 2053 | | } |
| 2054 | | } |
| 2055 | | |
| 2056 | | static void cfunc_rsp_slv_scalar(void *param) |
| 2057 | | { |
| 2058 | | ((rsp_device *)param)->ccfunc_rsp_slv_scalar(); |
| 2059 | | } |
| 2060 | | #endif |
| 2061 | | |
| 2062 | | #if USE_SIMD |
| 2063 | | // SDV |
| 2064 | | // |
| 2065 | | // 31 25 20 15 10 6 0 |
| 2066 | | // -------------------------------------------------- |
| 2067 | | // | 111010 | BBBBB | TTTTT | 00011 | IIII | Offset | |
| 2068 | | // -------------------------------------------------- |
| 2069 | | // |
| 2070 | | // Stores 8 bytes starting from vector byte index |
| 2071 | | |
| 2072 | | inline void rsp_device::ccfunc_rsp_sdv_simd() |
| 2073 | | { |
| 2074 | | UINT32 op = m_rsp_state->arg0; |
| 2075 | | int dest = (op >> 16) & 0x1f; |
| 2076 | | int base = (op >> 21) & 0x1f; |
| 2077 | | int index = (op >> 7) & 0x8; |
| 2078 | | int offset = (op & 0x7f); |
| 2079 | | if (offset & 0x40) |
| 2080 | | { |
| 2081 | | offset |= 0xffffffc0; |
| 2082 | | } |
| 2083 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 2084 | | |
| 2085 | | int end = index + 8; |
| 2086 | | for (int i = index; i < end; i++) |
| 2087 | | { |
| 2088 | | UINT16 value; |
| 2089 | | SIMD_EXTRACT16(m_xv[dest], value, (i >> 1)); |
| 2090 | | value >>= (1 - (i & 1)) * 8; |
| 2091 | | DM_WRITE8(ea, (UINT8)value); |
| 2092 | | ea++; |
| 2093 | | } |
| 2094 | | } |
| 2095 | | |
| 2096 | | static void cfunc_rsp_sdv_simd(void *param) |
| 2097 | | { |
| 2098 | | ((rsp_device *)param)->ccfunc_rsp_sdv_simd(); |
| 2099 | | } |
| 2100 | | #endif |
| 2101 | | |
| 2102 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 2103 | | |
| 2104 | | inline void rsp_device::ccfunc_rsp_sdv_scalar() |
| 2105 | | { |
| 2106 | | UINT32 op = m_rsp_state->arg0; |
| 2107 | | int dest = (op >> 16) & 0x1f; |
| 2108 | | int base = (op >> 21) & 0x1f; |
| 2109 | | int index = (op >> 7) & 0x8; |
| 2110 | | int offset = (op & 0x7f); |
| 2111 | | if (offset & 0x40) |
| 2112 | | { |
| 2113 | | offset |= 0xffffffc0; |
| 2114 | | } |
| 2115 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 2116 | | |
| 2117 | | int end = index + 8; |
| 2118 | | for (int i = index; i < end; i++) |
| 2119 | | { |
| 2120 | | DM_WRITE8(ea, VREG_B(dest, i)); |
| 2121 | | ea++; |
| 2122 | | } |
| 2123 | | } |
| 2124 | | |
| 2125 | | static void cfunc_rsp_sdv_scalar(void *param) |
| 2126 | | { |
| 2127 | | ((rsp_device *)param)->ccfunc_rsp_sdv_scalar(); |
| 2128 | | } |
| 2129 | | #endif |
| 2130 | | |
| 2131 | | #if USE_SIMD |
| 2132 | | // SQV |
| 2133 | | // |
| 2134 | | // 31 25 20 15 10 6 0 |
| 2135 | | // -------------------------------------------------- |
| 2136 | | // | 111010 | BBBBB | TTTTT | 00100 | IIII | Offset | |
| 2137 | | // -------------------------------------------------- |
| 2138 | | // |
| 2139 | | // Stores up to 16 bytes starting from vector byte index until 16-byte boundary |
| 2140 | | |
| 2141 | | inline void rsp_device::ccfunc_rsp_sqv_simd() |
| 2142 | | { |
| 2143 | | UINT32 op = m_rsp_state->arg0; |
| 2144 | | int dest = (op >> 16) & 0x1f; |
| 2145 | | int base = (op >> 21) & 0x1f; |
| 2146 | | int index = (op >> 7) & 0xf; |
| 2147 | | int offset = (op & 0x7f); |
| 2148 | | if (offset & 0x40) |
| 2149 | | { |
| 2150 | | offset |= 0xffffffc0; |
| 2151 | | } |
| 2152 | | |
| 2153 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2154 | | int end = index + (16 - (ea & 0xf)); |
| 2155 | | for (int i=index; i < end; i++) |
| 2156 | | { |
| 2157 | | UINT16 value; |
| 2158 | | SIMD_EXTRACT16(m_xv[dest], value, (i >> 1)); |
| 2159 | | value >>= (1-(i & 1)) * 8; |
| 2160 | | DM_WRITE8(ea, (UINT8)value); |
| 2161 | | ea++; |
| 2162 | | } |
| 2163 | | } |
| 2164 | | |
| 2165 | | static void cfunc_rsp_sqv_simd(void *param) |
| 2166 | | { |
| 2167 | | ((rsp_device *)param)->ccfunc_rsp_sqv_simd(); |
| 2168 | | } |
| 2169 | | #endif |
| 2170 | | |
| 2171 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 2172 | | |
| 2173 | | inline void rsp_device::ccfunc_rsp_sqv_scalar() |
| 2174 | | { |
| 2175 | | UINT32 op = m_rsp_state->arg0; |
| 2176 | | int dest = (op >> 16) & 0x1f; |
| 2177 | | int base = (op >> 21) & 0x1f; |
| 2178 | | int index = (op >> 7) & 0xf; |
| 2179 | | int offset = (op & 0x7f); |
| 2180 | | if (offset & 0x40) |
| 2181 | | { |
| 2182 | | offset |= 0xffffffc0; |
| 2183 | | } |
| 2184 | | |
| 2185 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2186 | | int end = index + (16 - (ea & 0xf)); |
| 2187 | | for (int i=index; i < end; i++) |
| 2188 | | { |
| 2189 | | DM_WRITE8(ea, VREG_B(dest, i & 0xf)); |
| 2190 | | ea++; |
| 2191 | | } |
| 2192 | | } |
| 2193 | | |
| 2194 | | static void cfunc_rsp_sqv_scalar(void *param) |
| 2195 | | { |
| 2196 | | ((rsp_device *)param)->ccfunc_rsp_sqv_scalar(); |
| 2197 | | } |
| 2198 | | #endif |
| 2199 | | |
| 2200 | | #if USE_SIMD |
| 2201 | | // SRV |
| 2202 | | // |
| 2203 | | // 31 25 20 15 10 6 0 |
| 2204 | | // -------------------------------------------------- |
| 2205 | | // | 111010 | BBBBB | TTTTT | 00101 | IIII | Offset | |
| 2206 | | // -------------------------------------------------- |
| 2207 | | // |
| 2208 | | // Stores up to 16 bytes starting from right side until 16-byte boundary |
| 2209 | | |
| 2210 | | inline void rsp_device::ccfunc_rsp_srv_simd() |
| 2211 | | { |
| 2212 | | UINT32 op = m_rsp_state->arg0; |
| 2213 | | int dest = (op >> 16) & 0x1f; |
| 2214 | | int base = (op >> 21) & 0x1f; |
| 2215 | | int index = (op >> 7) & 0xf; |
| 2216 | | int offset = (op & 0x7f); |
| 2217 | | if (offset & 0x40) |
| 2218 | | { |
| 2219 | | offset |= 0xffffffc0; |
| 2220 | | } |
| 2221 | | |
| 2222 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2223 | | |
| 2224 | | int end = index + (ea & 0xf); |
| 2225 | | int o = (16 - (ea & 0xf)) & 0xf; |
| 2226 | | ea &= ~0xf; |
| 2227 | | |
| 2228 | | for (int i = index; i < end; i++) |
| 2229 | | { |
| 2230 | | UINT32 bi = (i + o) & 0xf; |
| 2231 | | UINT16 value; |
| 2232 | | SIMD_EXTRACT16(m_xv[dest], value, (bi >> 1)); |
| 2233 | | value >>= (1-(bi & 1)) * 8; |
| 2234 | | DM_WRITE8(ea, (UINT8)value); |
| 2235 | | ea++; |
| 2236 | | } |
| 2237 | | } |
| 2238 | | |
| 2239 | | static void cfunc_rsp_srv_simd(void *param) |
| 2240 | | { |
| 2241 | | ((rsp_device *)param)->ccfunc_rsp_srv_simd(); |
| 2242 | | } |
| 2243 | | #endif |
| 2244 | | |
| 2245 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 2246 | | |
| 2247 | | inline void rsp_device::ccfunc_rsp_srv_scalar() |
| 2248 | | { |
| 2249 | | UINT32 op = m_rsp_state->arg0; |
| 2250 | | int dest = (op >> 16) & 0x1f; |
| 2251 | | int base = (op >> 21) & 0x1f; |
| 2252 | | int index = (op >> 7) & 0xf; |
| 2253 | | int offset = (op & 0x7f); |
| 2254 | | if (offset & 0x40) |
| 2255 | | { |
| 2256 | | offset |= 0xffffffc0; |
| 2257 | | } |
| 2258 | | |
| 2259 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2260 | | |
| 2261 | | int end = index + (ea & 0xf); |
| 2262 | | int o = (16 - (ea & 0xf)) & 0xf; |
| 2263 | | ea &= ~0xf; |
| 2264 | | |
| 2265 | | for (int i = index; i < end; i++) |
| 2266 | | { |
| 2267 | | DM_WRITE8(ea, VREG_B(dest, ((i + o) & 0xf))); |
| 2268 | | ea++; |
| 2269 | | } |
| 2270 | | } |
| 2271 | | |
| 2272 | | static void cfunc_rsp_srv_scalar(void *param) |
| 2273 | | { |
| 2274 | | ((rsp_device *)param)->ccfunc_rsp_srv_scalar(); |
| 2275 | | } |
| 2276 | | #endif |
| 2277 | | |
| 2278 | | #if USE_SIMD |
| 2279 | | // SPV |
| 2280 | | // |
| 2281 | | // 31 25 20 15 10 6 0 |
| 2282 | | // -------------------------------------------------- |
| 2283 | | // | 111010 | BBBBB | TTTTT | 00110 | IIII | Offset | |
| 2284 | | // -------------------------------------------------- |
| 2285 | | // |
| 2286 | | // Stores upper 8 bits of each element |
| 2287 | | |
| 2288 | | inline void rsp_device::ccfunc_rsp_spv_simd() |
| 2289 | | { |
| 2290 | | UINT32 op = m_rsp_state->arg0; |
| 2291 | | int dest = (op >> 16) & 0x1f; |
| 2292 | | int base = (op >> 21) & 0x1f; |
| 2293 | | int index = (op >> 7) & 0xf; |
| 2294 | | int offset = (op & 0x7f); |
| 2295 | | if (offset & 0x40) |
| 2296 | | { |
| 2297 | | offset |= 0xffffffc0; |
| 2298 | | } |
| 2299 | | |
| 2300 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 2301 | | int end = index + 8; |
| 2302 | | for (int i=index; i < end; i++) |
| 2303 | | { |
| 2304 | | if ((i & 0xf) < 8) |
| 2305 | | { |
| 2306 | | UINT16 value; |
| 2307 | | SIMD_EXTRACT16(m_xv[dest], value, i); |
| 2308 | | DM_WRITE8(ea, (UINT8)(value >> 8)); |
| 2309 | | } |
| 2310 | | else |
| 2311 | | { |
| 2312 | | UINT16 value; |
| 2313 | | SIMD_EXTRACT16(m_xv[dest], value, i); |
| 2314 | | DM_WRITE8(ea, (UINT8)(value >> 7)); |
| 2315 | | } |
| 2316 | | ea++; |
| 2317 | | } |
| 2318 | | } |
| 2319 | | |
| 2320 | | static void cfunc_rsp_spv_simd(void *param) |
| 2321 | | { |
| 2322 | | ((rsp_device *)param)->ccfunc_rsp_spv_simd(); |
| 2323 | | } |
| 2324 | | #endif |
| 2325 | | |
| 2326 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 2327 | | |
| 2328 | | inline void rsp_device::ccfunc_rsp_spv_scalar() |
| 2329 | | { |
| 2330 | | UINT32 op = m_rsp_state->arg0; |
| 2331 | | int dest = (op >> 16) & 0x1f; |
| 2332 | | int base = (op >> 21) & 0x1f; |
| 2333 | | int index = (op >> 7) & 0xf; |
| 2334 | | int offset = (op & 0x7f); |
| 2335 | | if (offset & 0x40) |
| 2336 | | { |
| 2337 | | offset |= 0xffffffc0; |
| 2338 | | } |
| 2339 | | |
| 2340 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 2341 | | int end = index + 8; |
| 2342 | | for (int i=index; i < end; i++) |
| 2343 | | { |
| 2344 | | if ((i & 0xf) < 8) |
| 2345 | | { |
| 2346 | | DM_WRITE8(ea, VREG_B(dest, (i & 0xf) << 1)); |
| 2347 | | } |
| 2348 | | else |
| 2349 | | { |
| 2350 | | DM_WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); |
| 2351 | | } |
| 2352 | | ea++; |
| 2353 | | } |
| 2354 | | } |
| 2355 | | |
| 2356 | | static void cfunc_rsp_spv_scalar(void *param) |
| 2357 | | { |
| 2358 | | ((rsp_device *)param)->ccfunc_rsp_spv_scalar(); |
| 2359 | | } |
| 2360 | | #endif |
| 2361 | | |
| 2362 | | #if USE_SIMD |
| 2363 | | // SUV |
| 2364 | | // |
| 2365 | | // 31 25 20 15 10 6 0 |
| 2366 | | // -------------------------------------------------- |
| 2367 | | // | 111010 | BBBBB | TTTTT | 00111 | IIII | Offset | |
| 2368 | | // -------------------------------------------------- |
| 2369 | | // |
| 2370 | | // Stores bits 14-7 of each element |
| 2371 | | |
| 2372 | | inline void rsp_device::ccfunc_rsp_suv_simd() |
| 2373 | | { |
| 2374 | | UINT32 op = m_rsp_state->arg0; |
| 2375 | | int dest = (op >> 16) & 0x1f; |
| 2376 | | int base = (op >> 21) & 0x1f; |
| 2377 | | int index = (op >> 7) & 0xf; |
| 2378 | | int offset = (op & 0x7f); |
| 2379 | | if (offset & 0x40) |
| 2380 | | { |
| 2381 | | offset |= 0xffffffc0; |
| 2382 | | } |
| 2383 | | |
| 2384 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 2385 | | int end = index + 8; |
| 2386 | | for (int i=index; i < end; i++) |
| 2387 | | { |
| 2388 | | if ((i & 0xf) < 8) |
| 2389 | | { |
| 2390 | | UINT16 value; |
| 2391 | | SIMD_EXTRACT16(m_xv[dest], value, i); |
| 2392 | | DM_WRITE8(ea, (UINT8)(value >> 7)); |
| 2393 | | } |
| 2394 | | else |
| 2395 | | { |
| 2396 | | UINT16 value; |
| 2397 | | SIMD_EXTRACT16(m_xv[dest], value, i); |
| 2398 | | DM_WRITE8(ea, (UINT8)(value >> 8)); |
| 2399 | | } |
| 2400 | | ea++; |
| 2401 | | } |
| 2402 | | } |
| 2403 | | |
| 2404 | | static void cfunc_rsp_suv_simd(void *param) |
| 2405 | | { |
| 2406 | | ((rsp_device *)param)->ccfunc_rsp_suv_simd(); |
| 2407 | | } |
| 2408 | | #endif |
| 2409 | | |
| 2410 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 2411 | | |
| 2412 | | inline void rsp_device::ccfunc_rsp_suv_scalar() |
| 2413 | | { |
| 2414 | | UINT32 op = m_rsp_state->arg0; |
| 2415 | | int dest = (op >> 16) & 0x1f; |
| 2416 | | int base = (op >> 21) & 0x1f; |
| 2417 | | int index = (op >> 7) & 0xf; |
| 2418 | | int offset = (op & 0x7f); |
| 2419 | | if (offset & 0x40) |
| 2420 | | { |
| 2421 | | offset |= 0xffffffc0; |
| 2422 | | } |
| 2423 | | |
| 2424 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 8) : (offset * 8); |
| 2425 | | int end = index + 8; |
| 2426 | | for (int i=index; i < end; i++) |
| 2427 | | { |
| 2428 | | if ((i & 0xf) < 8) |
| 2429 | | { |
| 2430 | | DM_WRITE8(ea, VREG_S(dest, (i & 0x7)) >> 7); |
| 2431 | | } |
| 2432 | | else |
| 2433 | | { |
| 2434 | | DM_WRITE8(ea, VREG_B(dest, ((i & 0x7) << 1))); |
| 2435 | | } |
| 2436 | | ea++; |
| 2437 | | } |
| 2438 | | } |
| 2439 | | |
| 2440 | | static void cfunc_rsp_suv_scalar(void *param) |
| 2441 | | { |
| 2442 | | ((rsp_device *)param)->ccfunc_rsp_suv_scalar(); |
| 2443 | | } |
| 2444 | | #endif |
| 2445 | | |
| 2446 | | #if USE_SIMD |
| 2447 | | // SHV |
| 2448 | | // |
| 2449 | | // 31 25 20 15 10 6 0 |
| 2450 | | // -------------------------------------------------- |
| 2451 | | // | 111010 | BBBBB | TTTTT | 01000 | IIII | Offset | |
| 2452 | | // -------------------------------------------------- |
| 2453 | | // |
| 2454 | | // Stores bits 14-7 of each element, with 2-byte stride |
| 2455 | | |
| 2456 | | inline void rsp_device::ccfunc_rsp_shv_simd() |
| 2457 | | { |
| 2458 | | UINT32 op = m_rsp_state->arg0; |
| 2459 | | int dest = (op >> 16) & 0x1f; |
| 2460 | | int base = (op >> 21) & 0x1f; |
| 2461 | | int index = (op >> 7) & 0xf; |
| 2462 | | int offset = (op & 0x7f); |
| 2463 | | if (offset & 0x40) |
| 2464 | | { |
| 2465 | | offset |= 0xffffffc0; |
| 2466 | | } |
| 2467 | | |
| 2468 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2469 | | for (int i=0; i < 8; i++) |
| 2470 | | { |
| 2471 | | int element = index + (i << 1); |
| 2472 | | UINT16 value; |
| 2473 | | SIMD_EXTRACT16(m_xv[dest], value, element >> 1); |
| 2474 | | DM_WRITE8(ea, (value >> 7) & 0x00ff); |
| 2475 | | ea += 2; |
| 2476 | | } |
| 2477 | | } |
| 2478 | | |
| 2479 | | static void cfunc_rsp_shv_simd(void *param) |
| 2480 | | { |
| 2481 | | ((rsp_device *)param)->ccfunc_rsp_shv_simd(); |
| 2482 | | } |
| 2483 | | #endif |
| 2484 | | |
| 2485 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 2486 | | |
| 2487 | | inline void rsp_device::ccfunc_rsp_shv_scalar() |
| 2488 | | { |
| 2489 | | UINT32 op = m_rsp_state->arg0; |
| 2490 | | int dest = (op >> 16) & 0x1f; |
| 2491 | | int base = (op >> 21) & 0x1f; |
| 2492 | | int index = (op >> 7) & 0xf; |
| 2493 | | int offset = (op & 0x7f); |
| 2494 | | if (offset & 0x40) |
| 2495 | | { |
| 2496 | | offset |= 0xffffffc0; |
| 2497 | | } |
| 2498 | | |
| 2499 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2500 | | for (int i=0; i < 8; i++) |
| 2501 | | { |
| 2502 | | int element = index + (i << 1); |
| 2503 | | UINT8 d = (VREG_B(dest, (element & 0xf)) << 1) | |
| 2504 | | (VREG_B(dest, ((element + 1) & 0xf)) >> 7); |
| 2505 | | DM_WRITE8(ea, d); |
| 2506 | | ea += 2; |
| 2507 | | } |
| 2508 | | } |
| 2509 | | |
| 2510 | | static void cfunc_rsp_shv_scalar(void *param) |
| 2511 | | { |
| 2512 | | ((rsp_device *)param)->ccfunc_rsp_shv_scalar(); |
| 2513 | | } |
| 2514 | | #endif |
| 2515 | | |
| 2516 | | #if USE_SIMD |
| 2517 | | // SFV |
| 2518 | | // |
| 2519 | | // 31 25 20 15 10 6 0 |
| 2520 | | // -------------------------------------------------- |
| 2521 | | // | 111010 | BBBBB | TTTTT | 01001 | IIII | Offset | |
| 2522 | | // -------------------------------------------------- |
| 2523 | | // |
| 2524 | | // Stores bits 14-7 of upper or lower quad, with 4-byte stride |
| 2525 | | |
| 2526 | | inline void rsp_device::ccfunc_rsp_sfv_simd() |
| 2527 | | { |
| 2528 | | UINT32 op = m_rsp_state->arg0; |
| 2529 | | int dest = (op >> 16) & 0x1f; |
| 2530 | | int base = (op >> 21) & 0x1f; |
| 2531 | | int index = (op >> 7) & 0xf; |
| 2532 | | int offset = (op & 0x7f); |
| 2533 | | if (offset & 0x40) |
| 2534 | | { |
| 2535 | | offset |= 0xffffffc0; |
| 2536 | | } |
| 2537 | | |
| 2538 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2539 | | int eaoffset = ea & 0xf; |
| 2540 | | ea &= ~0xf; |
| 2541 | | |
| 2542 | | int end = (index >> 1) + 4; |
| 2543 | | |
| 2544 | | for (int i = index>>1; i < end; i++) |
| 2545 | | { |
| 2546 | | UINT16 value; |
| 2547 | | SIMD_EXTRACT16(m_xv[dest], value, i); |
| 2548 | | DM_WRITE8(ea + (eaoffset & 0xf), (value >> 7) & 0x00ff); |
| 2549 | | eaoffset += 4; |
| 2550 | | } |
| 2551 | | } |
| 2552 | | |
| 2553 | | static void cfunc_rsp_sfv_simd(void *param) |
| 2554 | | { |
| 2555 | | ((rsp_device *)param)->ccfunc_rsp_sfv_simd(); |
| 2556 | | } |
| 2557 | | #endif |
| 2558 | | |
| 2559 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 2560 | | |
| 2561 | | inline void rsp_device::ccfunc_rsp_sfv_scalar() |
| 2562 | | { |
| 2563 | | UINT32 op = m_rsp_state->arg0; |
| 2564 | | int dest = (op >> 16) & 0x1f; |
| 2565 | | int base = (op >> 21) & 0x1f; |
| 2566 | | int index = (op >> 7) & 0xf; |
| 2567 | | int offset = (op & 0x7f); |
| 2568 | | if (offset & 0x40) |
| 2569 | | { |
| 2570 | | offset |= 0xffffffc0; |
| 2571 | | } |
| 2572 | | |
| 2573 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2574 | | int eaoffset = ea & 0xf; |
| 2575 | | ea &= ~0xf; |
| 2576 | | |
| 2577 | | int end = (index >> 1) + 4; |
| 2578 | | |
| 2579 | | for (int i = index>>1; i < end; i++) |
| 2580 | | { |
| 2581 | | DM_WRITE8(ea + (eaoffset & 0xf), VREG_S(dest, i) >> 7); |
| 2582 | | eaoffset += 4; |
| 2583 | | } |
| 2584 | | } |
| 2585 | | |
| 2586 | | static void cfunc_rsp_sfv_scalar(void *param) |
| 2587 | | { |
| 2588 | | ((rsp_device *)param)->ccfunc_rsp_sfv_scalar(); |
| 2589 | | } |
| 2590 | | #endif |
| 2591 | | |
| 2592 | | #if USE_SIMD |
| 2593 | | // SWV |
| 2594 | | // |
| 2595 | | // 31 25 20 15 10 6 0 |
| 2596 | | // -------------------------------------------------- |
| 2597 | | // | 111010 | BBBBB | TTTTT | 01010 | IIII | Offset | |
| 2598 | | // -------------------------------------------------- |
| 2599 | | // |
| 2600 | | // Stores the full 128-bit vector starting from vector byte index and wrapping to index 0 |
| 2601 | | // after byte index 15 |
| 2602 | | |
| 2603 | | inline void rsp_device::ccfunc_rsp_swv_simd() |
| 2604 | | { |
| 2605 | | UINT32 op = m_rsp_state->arg0; |
| 2606 | | int dest = (op >> 16) & 0x1f; |
| 2607 | | int base = (op >> 21) & 0x1f; |
| 2608 | | int index = (op >> 7) & 0xf; |
| 2609 | | int offset = (op & 0x7f); |
| 2610 | | if (offset & 0x40) |
| 2611 | | { |
| 2612 | | offset |= 0xffffffc0; |
| 2613 | | } |
| 2614 | | |
| 2615 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2616 | | int eaoffset = ea & 0xf; |
| 2617 | | ea &= ~0xf; |
| 2618 | | |
| 2619 | | int end = index + 16; |
| 2620 | | for (int i = index; i < end; i++) |
| 2621 | | { |
| 2622 | | UINT16 value; |
| 2623 | | SIMD_EXTRACT16(m_xv[dest], value, i >> 1); |
| 2624 | | DM_WRITE8(ea + (eaoffset & 0xf), (value >> ((1-(i & 1)) * 8)) & 0xff); |
| 2625 | | eaoffset++; |
| 2626 | | } |
| 2627 | | } |
| 2628 | | |
| 2629 | | static void cfunc_rsp_swv_simd(void *param) |
| 2630 | | { |
| 2631 | | ((rsp_device *)param)->ccfunc_rsp_swv_simd(); |
| 2632 | | } |
| 2633 | | #endif |
| 2634 | | |
| 2635 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 2636 | | |
| 2637 | | inline void rsp_device::ccfunc_rsp_swv_scalar() |
| 2638 | | { |
| 2639 | | UINT32 op = m_rsp_state->arg0; |
| 2640 | | int dest = (op >> 16) & 0x1f; |
| 2641 | | int base = (op >> 21) & 0x1f; |
| 2642 | | int index = (op >> 7) & 0xf; |
| 2643 | | int offset = (op & 0x7f); |
| 2644 | | if (offset & 0x40) |
| 2645 | | { |
| 2646 | | offset |= 0xffffffc0; |
| 2647 | | } |
| 2648 | | |
| 2649 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2650 | | int eaoffset = ea & 0xf; |
| 2651 | | ea &= ~0xf; |
| 2652 | | |
| 2653 | | int end = index + 16; |
| 2654 | | for (int i = index; i < end; i++) |
| 2655 | | { |
| 2656 | | DM_WRITE8(ea + (eaoffset & 0xf), VREG_B(dest, i & 0xf)); |
| 2657 | | eaoffset++; |
| 2658 | | } |
| 2659 | | } |
| 2660 | | |
| 2661 | | static void cfunc_rsp_swv_scalar(void *param) |
| 2662 | | { |
| 2663 | | ((rsp_device *)param)->ccfunc_rsp_swv_scalar(); |
| 2664 | | } |
| 2665 | | #endif |
| 2666 | | |
| 2667 | | #if USE_SIMD |
| 2668 | | // STV |
| 2669 | | // |
| 2670 | | // 31 25 20 15 10 6 0 |
| 2671 | | // -------------------------------------------------- |
| 2672 | | // | 111010 | BBBBB | TTTTT | 01011 | IIII | Offset | |
| 2673 | | // -------------------------------------------------- |
| 2674 | | // |
| 2675 | | // Stores one element from maximum of 8 vectors, while incrementing element index |
| 2676 | | |
| 2677 | | inline void rsp_device::ccfunc_rsp_stv_simd() |
| 2678 | | { |
| 2679 | | UINT32 op = m_rsp_state->arg0; |
| 2680 | | int dest = (op >> 16) & 0x1f; |
| 2681 | | int base = (op >> 21) & 0x1f; |
| 2682 | | int index = (op >> 7) & 0xf; |
| 2683 | | int offset = (op & 0x7f); |
| 2684 | | |
| 2685 | | if (offset & 0x40) |
| 2686 | | { |
| 2687 | | offset |= 0xffffffc0; |
| 2688 | | } |
| 2689 | | |
| 2690 | | int vs = dest; |
| 2691 | | int ve = dest + 8; |
| 2692 | | if (ve > 32) |
| 2693 | | { |
| 2694 | | ve = 32; |
| 2695 | | } |
| 2696 | | |
| 2697 | | int element = 8 - (index >> 1); |
| 2698 | | |
| 2699 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2700 | | int eaoffset = (ea & 0xf) + (element * 2); |
| 2701 | | ea &= ~0xf; |
| 2702 | | |
| 2703 | | for (int i = vs; i < ve; i++) |
| 2704 | | { |
| 2705 | | UINT16 value; |
| 2706 | | SIMD_EXTRACT16(m_xv[i], value, element); |
| 2707 | | DM_WRITE16(ea + (eaoffset & 0xf), value); |
| 2708 | | eaoffset += 2; |
| 2709 | | element++; |
| 2710 | | } |
| 2711 | | } |
| 2712 | | |
| 2713 | | static void cfunc_rsp_stv_simd(void *param) |
| 2714 | | { |
| 2715 | | ((rsp_device *)param)->ccfunc_rsp_stv_simd(); |
| 2716 | | } |
| 2717 | | #endif |
| 2718 | | |
| 2719 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 2720 | | |
| 2721 | | inline void rsp_device::ccfunc_rsp_stv_scalar() |
| 2722 | | { |
| 2723 | | UINT32 op = m_rsp_state->arg0; |
| 2724 | | int dest = (op >> 16) & 0x1f; |
| 2725 | | int base = (op >> 21) & 0x1f; |
| 2726 | | int index = (op >> 7) & 0xf; |
| 2727 | | int offset = (op & 0x7f); |
| 2728 | | |
| 2729 | | if (offset & 0x40) |
| 2730 | | { |
| 2731 | | offset |= 0xffffffc0; |
| 2732 | | } |
| 2733 | | |
| 2734 | | int vs = dest; |
| 2735 | | int ve = dest + 8; |
| 2736 | | if (ve > 32) |
| 2737 | | { |
| 2738 | | ve = 32; |
| 2739 | | } |
| 2740 | | |
| 2741 | | int element = 8 - (index >> 1); |
| 2742 | | |
| 2743 | | UINT32 ea = (base) ? m_rsp_state->r[base] + (offset * 16) : (offset * 16); |
| 2744 | | int eaoffset = (ea & 0xf) + (element * 2); |
| 2745 | | ea &= ~0xf; |
| 2746 | | |
| 2747 | | for (int i = vs; i < ve; i++) |
| 2748 | | { |
| 2749 | | DM_WRITE16(ea + (eaoffset & 0xf), VREG_S(i, element & 0x7)); |
| 2750 | | eaoffset += 2; |
| 2751 | | element++; |
| 2752 | | } |
| 2753 | | } |
| 2754 | | |
| 2755 | | static void cfunc_rsp_stv_scalar(void *param) |
| 2756 | | { |
| 2757 | | ((rsp_device *)param)->ccfunc_rsp_stv_scalar(); |
| 2758 | | } |
| 2759 | | #endif |
| 2760 | | |
| 2761 | | #if USE_SIMD |
| 2762 | | int rsp_device::generate_swc2(drcuml_block *block, compiler_state *compiler, const opcode_desc *desc) |
| 2763 | | { |
| 2764 | | // int loopdest; |
| 2765 | | UINT32 op = desc->opptr.l[0]; |
| 2766 | | //int dest = (op >> 16) & 0x1f; |
| 2767 | | //int base = (op >> 21) & 0x1f; |
| 2768 | | //int index = (op >> 7) & 0xf; |
| 2769 | | int offset = (op & 0x7f); |
| 2770 | | //int skip; |
| 2771 | | if (offset & 0x40) |
| 2772 | | { |
| 2773 | | offset |= 0xffffffc0; |
| 2774 | | } |
| 2775 | | |
| 2776 | | switch ((op >> 11) & 0x1f) |
| 2777 | | { |
| 2778 | | case 0x00: /* SBV */ |
| 2779 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2780 | | UML_CALLC(block, cfunc_rsp_sbv_simd, this); |
| 2781 | | #if SIMUL_SIMD |
| 2782 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 2783 | | UML_CALLC(block, cfunc_rsp_sbv_scalar, this); |
| 2784 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 2785 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 2786 | | #endif |
| 2787 | | return TRUE; |
| 2788 | | case 0x01: /* SSV */ |
| 2789 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2790 | | UML_CALLC(block, cfunc_rsp_ssv_simd, this); |
| 2791 | | #if SIMUL_SIMD |
| 2792 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 2793 | | UML_CALLC(block, cfunc_rsp_ssv_scalar, this); |
| 2794 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 2795 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 2796 | | #endif |
| 2797 | | return TRUE; |
| 2798 | | case 0x02: /* SLV */ |
| 2799 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2800 | | UML_CALLC(block, cfunc_rsp_slv_simd, this); |
| 2801 | | #if SIMUL_SIMD |
| 2802 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 2803 | | UML_CALLC(block, cfunc_rsp_slv_scalar, this); |
| 2804 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 2805 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 2806 | | #endif |
| 2807 | | return TRUE; |
| 2808 | | case 0x03: /* SDV */ |
| 2809 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2810 | | UML_CALLC(block, cfunc_rsp_sdv_simd, this); |
| 2811 | | #if SIMUL_SIMD |
| 2812 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 2813 | | UML_CALLC(block, cfunc_rsp_sdv_scalar, this); |
| 2814 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 2815 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 2816 | | #endif |
| 2817 | | return TRUE; |
| 2818 | | case 0x04: /* SQV */ |
| 2819 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2820 | | UML_CALLC(block, cfunc_rsp_sqv_simd, this); |
| 2821 | | #if SIMUL_SIMD |
| 2822 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 2823 | | UML_CALLC(block, cfunc_rsp_sqv_scalar, this); |
| 2824 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 2825 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 2826 | | #endif |
| 2827 | | return TRUE; |
| 2828 | | case 0x05: /* SRV */ |
| 2829 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2830 | | UML_CALLC(block, cfunc_rsp_srv_simd, this); |
| 2831 | | #if SIMUL_SIMD |
| 2832 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 2833 | | UML_CALLC(block, cfunc_rsp_srv_scalar, this); |
| 2834 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 2835 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 2836 | | #endif |
| 2837 | | return TRUE; |
| 2838 | | case 0x06: /* SPV */ |
| 2839 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2840 | | UML_CALLC(block, cfunc_rsp_spv_simd, this); |
| 2841 | | #if SIMUL_SIMD |
| 2842 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 2843 | | UML_CALLC(block, cfunc_rsp_spv_scalar, this); |
| 2844 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 2845 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 2846 | | #endif |
| 2847 | | return TRUE; |
| 2848 | | case 0x07: /* SUV */ |
| 2849 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2850 | | UML_CALLC(block, cfunc_rsp_suv_simd, this); |
| 2851 | | #if SIMUL_SIMD |
| 2852 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 2853 | | UML_CALLC(block, cfunc_rsp_suv_scalar, this); |
| 2854 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 2855 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 2856 | | #endif |
| 2857 | | return TRUE; |
| 2858 | | case 0x08: /* SHV */ |
| 2859 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2860 | | UML_CALLC(block, cfunc_rsp_shv_simd, this); |
| 2861 | | #if SIMUL_SIMD |
| 2862 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 2863 | | UML_CALLC(block, cfunc_rsp_shv_scalar, this); |
| 2864 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 2865 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 2866 | | #endif |
| 2867 | | return TRUE; |
| 2868 | | case 0x09: /* SFV */ |
| 2869 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2870 | | UML_CALLC(block, cfunc_rsp_sfv_simd, this); |
| 2871 | | #if SIMUL_SIMD |
| 2872 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 2873 | | UML_CALLC(block, cfunc_rsp_sfv_scalar, this); |
| 2874 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 2875 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 2876 | | #endif |
| 2877 | | return TRUE; |
| 2878 | | case 0x0a: /* SWV */ |
| 2879 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2880 | | UML_CALLC(block, cfunc_rsp_swv_simd, this); |
| 2881 | | #if SIMUL_SIMD |
| 2882 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 2883 | | UML_CALLC(block, cfunc_rsp_swv_scalar, this); |
| 2884 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 2885 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 2886 | | #endif |
| 2887 | | return TRUE; |
| 2888 | | case 0x0b: /* STV */ |
| 2889 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2890 | | UML_CALLC(block, cfunc_rsp_stv_simd, this); |
| 2891 | | #if SIMUL_SIMD |
| 2892 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 2893 | | UML_CALLC(block, cfunc_rsp_stv_scalar, this); |
| 2894 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 2895 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 2896 | | #endif |
| 2897 | | return TRUE; |
| 2898 | | |
| 2899 | | default: |
| 2900 | | unimplemented_opcode(op); |
| 2901 | | return FALSE; |
| 2902 | | } |
| 2903 | | |
| 2904 | | return TRUE; |
| 2905 | | } |
| 2906 | | |
| 2907 | | #else |
| 2908 | | |
| 2909 | | int rsp_device::generate_swc2(drcuml_block *block, compiler_state *compiler, const opcode_desc *desc) |
| 2910 | | { |
| 2911 | | // int loopdest; |
| 2912 | | UINT32 op = desc->opptr.l[0]; |
| 2913 | | //int dest = (op >> 16) & 0x1f; |
| 2914 | | //int base = (op >> 21) & 0x1f; |
| 2915 | | //int index = (op >> 7) & 0xf; |
| 2916 | | int offset = (op & 0x7f); |
| 2917 | | //int skip; |
| 2918 | | if (offset & 0x40) |
| 2919 | | { |
| 2920 | | offset |= 0xffffffc0; |
| 2921 | | } |
| 2922 | | |
| 2923 | | switch ((op >> 11) & 0x1f) |
| 2924 | | { |
| 2925 | | case 0x00: /* SBV */ |
| 2926 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2927 | | UML_CALLC(block, cfunc_rsp_sbv_scalar, this); |
| 2928 | | return TRUE; |
| 2929 | | case 0x01: /* SSV */ |
| 2930 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2931 | | UML_CALLC(block, cfunc_rsp_ssv_scalar, this); |
| 2932 | | return TRUE; |
| 2933 | | case 0x02: /* SLV */ |
| 2934 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2935 | | UML_CALLC(block, cfunc_rsp_slv_scalar, this); |
| 2936 | | return TRUE; |
| 2937 | | case 0x03: /* SDV */ |
| 2938 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2939 | | UML_CALLC(block, cfunc_rsp_sdv_scalar, this); |
| 2940 | | return TRUE; |
| 2941 | | case 0x04: /* SQV */ |
| 2942 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2943 | | UML_CALLC(block, cfunc_rsp_sqv_scalar, this); |
| 2944 | | return TRUE; |
| 2945 | | case 0x05: /* SRV */ |
| 2946 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2947 | | UML_CALLC(block, cfunc_rsp_srv_scalar, this); |
| 2948 | | return TRUE; |
| 2949 | | case 0x06: /* SPV */ |
| 2950 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2951 | | UML_CALLC(block, cfunc_rsp_spv_scalar, this); |
| 2952 | | return TRUE; |
| 2953 | | case 0x07: /* SUV */ |
| 2954 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2955 | | UML_CALLC(block, cfunc_rsp_suv_scalar, this); |
| 2956 | | return TRUE; |
| 2957 | | case 0x08: /* SHV */ |
| 2958 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2959 | | UML_CALLC(block, cfunc_rsp_shv_scalar, this); |
| 2960 | | return TRUE; |
| 2961 | | case 0x09: /* SFV */ |
| 2962 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2963 | | UML_CALLC(block, cfunc_rsp_sfv_scalar, this); |
| 2964 | | return TRUE; |
| 2965 | | case 0x0a: /* SWV */ |
| 2966 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2967 | | UML_CALLC(block, cfunc_rsp_swv_scalar, this); |
| 2968 | | return TRUE; |
| 2969 | | case 0x0b: /* STV */ |
| 2970 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 2971 | | UML_CALLC(block, cfunc_rsp_stv_scalar, this); |
| 2972 | | return TRUE; |
| 2973 | | |
| 2974 | | default: |
| 2975 | | unimplemented_opcode(op); |
| 2976 | | return FALSE; |
| 2977 | | } |
| 2978 | | |
| 2979 | | return TRUE; |
| 2980 | | } |
| 2981 | | #endif |
| 2982 | | |
| 2983 | | #if USE_SIMD |
| 2984 | | inline UINT16 rsp_device::VEC_SATURATE_ACCUM(int accum, int slice, UINT16 negative, UINT16 positive) |
| 2985 | | { |
| 2986 | | if ((INT16)VEC_ACCUM_H(accum) < 0) |
| 2987 | | { |
| 2988 | | if ((UINT16)(VEC_ACCUM_H(accum)) != 0xffff) |
| 2989 | | { |
| 2990 | | return negative; |
| 2991 | | } |
| 2992 | | else |
| 2993 | | { |
| 2994 | | if ((INT16)VEC_ACCUM_M(accum) >= 0) |
| 2995 | | { |
| 2996 | | return negative; |
| 2997 | | } |
| 2998 | | else |
| 2999 | | { |
| 3000 | | if (slice == 0) |
| 3001 | | { |
| 3002 | | return VEC_ACCUM_L(accum); |
| 3003 | | } |
| 3004 | | else if (slice == 1) |
| 3005 | | { |
| 3006 | | return VEC_ACCUM_M(accum); |
| 3007 | | } |
| 3008 | | } |
| 3009 | | } |
| 3010 | | } |
| 3011 | | else |
| 3012 | | { |
| 3013 | | if ((UINT16)(VEC_ACCUM_H(accum)) != 0) |
| 3014 | | { |
| 3015 | | return positive; |
| 3016 | | } |
| 3017 | | else |
| 3018 | | { |
| 3019 | | if ((INT16)VEC_ACCUM_M(accum) < 0) |
| 3020 | | { |
| 3021 | | return positive; |
| 3022 | | } |
| 3023 | | else |
| 3024 | | { |
| 3025 | | if (slice == 0) |
| 3026 | | { |
| 3027 | | return VEC_ACCUM_L(accum); |
| 3028 | | } |
| 3029 | | else |
| 3030 | | { |
| 3031 | | return VEC_ACCUM_M(accum); |
| 3032 | | } |
| 3033 | | } |
| 3034 | | } |
| 3035 | | } |
| 3036 | | return 0; |
| 3037 | | } |
| 3038 | | #endif |
| 3039 | | |
| 3040 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 3041 | | inline UINT16 rsp_device::SATURATE_ACCUM(int accum, int slice, UINT16 negative, UINT16 positive) |
| 3042 | | { |
| 3043 | | if ((INT16)ACCUM_H(accum) < 0) |
| 3044 | | { |
| 3045 | | if ((UINT16)(ACCUM_H(accum)) != 0xffff) |
| 3046 | | { |
| 3047 | | return negative; |
| 3048 | | } |
| 3049 | | else |
| 3050 | | { |
| 3051 | | if ((INT16)ACCUM_M(accum) >= 0) |
| 3052 | | { |
| 3053 | | return negative; |
| 3054 | | } |
| 3055 | | else |
| 3056 | | { |
| 3057 | | if (slice == 0) |
| 3058 | | { |
| 3059 | | return ACCUM_L(accum); |
| 3060 | | } |
| 3061 | | else if (slice == 1) |
| 3062 | | { |
| 3063 | | return ACCUM_M(accum); |
| 3064 | | } |
| 3065 | | } |
| 3066 | | } |
| 3067 | | } |
| 3068 | | else |
| 3069 | | { |
| 3070 | | if ((UINT16)(ACCUM_H(accum)) != 0) |
| 3071 | | { |
| 3072 | | return positive; |
| 3073 | | } |
| 3074 | | else |
| 3075 | | { |
| 3076 | | if ((INT16)ACCUM_M(accum) < 0) |
| 3077 | | { |
| 3078 | | return positive; |
| 3079 | | } |
| 3080 | | else |
| 3081 | | { |
| 3082 | | if (slice == 0) |
| 3083 | | { |
| 3084 | | return ACCUM_L(accum); |
| 3085 | | } |
| 3086 | | else |
| 3087 | | { |
| 3088 | | return ACCUM_M(accum); |
| 3089 | | } |
| 3090 | | } |
| 3091 | | } |
| 3092 | | } |
| 3093 | | return 0; |
| 3094 | | } |
| 3095 | | #endif |
| 3096 | | |
| 3097 | | inline UINT16 rsp_device::SATURATE_ACCUM1(int accum, UINT16 negative, UINT16 positive) |
| 3098 | | { |
| 3099 | | // Return negative if H<0 && (H!=0xffff || M >= 0) |
| 3100 | | // Return positive if H>0 || (H==0 && M<0) |
| 3101 | | // Return medium slice if H==0xffff && M<0 |
| 3102 | | // Return medium slice if H==0 && M>=0 |
| 3103 | | if ((INT16)ACCUM_H(accum) < 0) |
| 3104 | | { |
| 3105 | | if ((UINT16)(ACCUM_H(accum)) != 0xffff) |
| 3106 | | { |
| 3107 | | return negative; |
| 3108 | | } |
| 3109 | | else |
| 3110 | | { |
| 3111 | | if ((INT16)ACCUM_M(accum) >= 0) |
| 3112 | | { |
| 3113 | | return negative; |
| 3114 | | } |
| 3115 | | else |
| 3116 | | { |
| 3117 | | return ACCUM_M(accum); |
| 3118 | | } |
| 3119 | | } |
| 3120 | | } |
| 3121 | | else |
| 3122 | | { |
| 3123 | | if ((UINT16)(ACCUM_H(accum)) != 0) |
| 3124 | | { |
| 3125 | | return positive; |
| 3126 | | } |
| 3127 | | else |
| 3128 | | { |
| 3129 | | if ((INT16)ACCUM_M(accum) < 0) |
| 3130 | | { |
| 3131 | | return positive; |
| 3132 | | } |
| 3133 | | else |
| 3134 | | { |
| 3135 | | return ACCUM_M(accum); |
| 3136 | | } |
| 3137 | | } |
| 3138 | | } |
| 3139 | | // never executed |
| 3140 | | //return 0; |
| 3141 | | } |
| 3142 | | |
| 3143 | | #if USE_SIMD |
| 3144 | | #define VEC_WRITEBACK_RESULT() { \ |
| 3145 | | SIMD_INSERT16(m_xv[VDREG], vres[0], 0); \ |
| 3146 | | SIMD_INSERT16(m_xv[VDREG], vres[1], 1); \ |
| 3147 | | SIMD_INSERT16(m_xv[VDREG], vres[2], 2); \ |
| 3148 | | SIMD_INSERT16(m_xv[VDREG], vres[3], 3); \ |
| 3149 | | SIMD_INSERT16(m_xv[VDREG], vres[4], 4); \ |
| 3150 | | SIMD_INSERT16(m_xv[VDREG], vres[5], 5); \ |
| 3151 | | SIMD_INSERT16(m_xv[VDREG], vres[6], 6); \ |
| 3152 | | SIMD_INSERT16(m_xv[VDREG], vres[7], 7); \ |
| 3153 | | } |
| 3154 | | #endif |
| 3155 | | |
| 3156 | | #define WRITEBACK_RESULT() { \ |
| 3157 | | W_VREG_S(VDREG, 0) = vres[0]; \ |
| 3158 | | W_VREG_S(VDREG, 1) = vres[1]; \ |
| 3159 | | W_VREG_S(VDREG, 2) = vres[2]; \ |
| 3160 | | W_VREG_S(VDREG, 3) = vres[3]; \ |
| 3161 | | W_VREG_S(VDREG, 4) = vres[4]; \ |
| 3162 | | W_VREG_S(VDREG, 5) = vres[5]; \ |
| 3163 | | W_VREG_S(VDREG, 6) = vres[6]; \ |
| 3164 | | W_VREG_S(VDREG, 7) = vres[7]; \ |
| 3165 | | } |
| 3166 | | |
| 3167 | | #if USE_SIMD |
| 3168 | | /* ============================================================================ |
| 3169 | | * RSPPackLo32to16: Pack LSBs of 32-bit vectors to 16-bits without saturation. |
| 3170 | | * TODO: 5 SSE2 operations is kind of expensive just to truncate values? |
| 3171 | | * ========================================================================= */ |
| 3172 | | INLINE __m128i RSPPackLo32to16(__m128i vectorLow, __m128i vectorHigh) |
| 3173 | | { |
| 3174 | | vectorLow = _mm_slli_epi32(vectorLow, 16); |
| 3175 | | vectorHigh = _mm_slli_epi32(vectorHigh, 16); |
| 3176 | | vectorLow = _mm_srai_epi32(vectorLow, 16); |
| 3177 | | vectorHigh = _mm_srai_epi32(vectorHigh, 16); |
| 3178 | | return _mm_packs_epi32(vectorLow, vectorHigh); |
| 3179 | | } |
| 3180 | | |
| 3181 | | /* ============================================================================ |
| 3182 | | * RSPPackHi32to16: Pack MSBs of 32-bit vectors to 16-bits without saturation. |
| 3183 | | * ========================================================================= */ |
| 3184 | | INLINE __m128i RSPPackHi32to16(__m128i vectorLow, __m128i vectorHigh) |
| 3185 | | { |
| 3186 | | vectorLow = _mm_srai_epi32(vectorLow, 16); |
| 3187 | | vectorHigh = _mm_srai_epi32(vectorHigh, 16); |
| 3188 | | return _mm_packs_epi32(vectorLow, vectorHigh); |
| 3189 | | } |
| 3190 | | |
| 3191 | | /* ============================================================================ |
| 3192 | | * RSPSignExtend16to32: Sign-extend 16-bit slices to 32-bit slices. |
| 3193 | | * ========================================================================= */ |
| 3194 | | INLINE void RSPSignExtend16to32(__m128i source, __m128i *vectorLow, __m128i *vectorHigh) |
| 3195 | | { |
| 3196 | | __m128i vMask = _mm_srai_epi16(source, 15); |
| 3197 | | *vectorHigh = _mm_unpackhi_epi16(source, vMask); |
| 3198 | | *vectorLow = _mm_unpacklo_epi16(source, vMask); |
| 3199 | | } |
| 3200 | | |
| 3201 | | /* ============================================================================ |
| 3202 | | * RSPZeroExtend16to32: Zero-extend 16-bit slices to 32-bit slices. |
| 3203 | | * ========================================================================= */ |
| 3204 | | INLINE void RSPZeroExtend16to32(__m128i source, __m128i *vectorLow, __m128i *vectorHigh) |
| 3205 | | { |
| 3206 | | *vectorHigh = _mm_unpackhi_epi16(source, _mm_setzero_si128()); |
| 3207 | | *vectorLow = _mm_unpacklo_epi16(source, _mm_setzero_si128()); |
| 3208 | | } |
| 3209 | | |
| 3210 | | /* ============================================================================ |
| 3211 | | * _mm_mullo_epi32: SSE2 lacks _mm_mullo_epi32, define it manually. |
| 3212 | | * TODO/WARNING/DISCLAIMER: Assumes one argument is positive. |
| 3213 | | * ========================================================================= */ |
| 3214 | | INLINE __m128i _mm_mullo_epi32(__m128i a, __m128i b) |
| 3215 | | { |
| 3216 | | __m128i a4 = _mm_srli_si128(a, 4); |
| 3217 | | __m128i b4 = _mm_srli_si128(b, 4); |
| 3218 | | __m128i ba = _mm_mul_epu32(b, a); |
| 3219 | | __m128i b4a4 = _mm_mul_epu32(b4, a4); |
| 3220 | | |
| 3221 | | __m128i mask = _mm_setr_epi32(~0, 0, ~0, 0); |
| 3222 | | __m128i baMask = _mm_and_si128(ba, mask); |
| 3223 | | __m128i b4a4Mask = _mm_and_si128(b4a4, mask); |
| 3224 | | __m128i b4a4MaskShift = _mm_slli_si128(b4a4Mask, 4); |
| 3225 | | |
| 3226 | | return _mm_or_si128(baMask, b4a4MaskShift); |
| 3227 | | } |
| 3228 | | |
| 3229 | | /* ============================================================================ |
| 3230 | | * RSPClampLowToVal: Clamps the low word of the accumulator. |
| 3231 | | * ========================================================================= */ |
| 3232 | | INLINE __m128i RSPClampLowToVal(__m128i vaccLow, __m128i vaccMid, __m128i vaccHigh) |
| 3233 | | { |
| 3234 | | __m128i setMask = _mm_cmpeq_epi16(_mm_setzero_si128(), _mm_setzero_si128()); |
| 3235 | | __m128i negCheck, useValMask, negVal, posVal; |
| 3236 | | |
| 3237 | | /* Compute some common values ahead of time. */ |
| 3238 | | negCheck = _mm_cmplt_epi16(vaccHigh, _mm_setzero_si128()); |
| 3239 | | |
| 3240 | | /* If accmulator < 0, clamp to val if val != TMin. */ |
| 3241 | | useValMask = _mm_and_si128(vaccHigh, _mm_srai_epi16(vaccMid, 15)); |
| 3242 | | useValMask = _mm_cmpeq_epi16(useValMask, setMask); |
| 3243 | | negVal = _mm_and_si128(useValMask, vaccLow); |
| 3244 | | |
| 3245 | | /* Otherwise, clamp to ~0 if any high bits are set. */ |
| 3246 | | useValMask = _mm_or_si128(vaccHigh, _mm_srai_epi16(vaccMid, 15)); |
| 3247 | | useValMask = _mm_cmpeq_epi16(useValMask, _mm_setzero_si128()); |
| 3248 | | posVal = _mm_and_si128(useValMask, vaccLow); |
| 3249 | | |
| 3250 | | negVal = _mm_and_si128(negCheck, negVal); |
| 3251 | | posVal = _mm_andnot_si128(negCheck, posVal); |
| 3252 | | return _mm_or_si128(negVal, posVal); |
| 3253 | | } |
| 3254 | | #endif |
| 3255 | | |
| 3256 | | #if USE_SIMD |
| 3257 | | // VMULF |
| 3258 | | // |
| 3259 | | // 31 25 24 20 15 10 5 0 |
| 3260 | | // ------------------------------------------------------ |
| 3261 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000000 | |
| 3262 | | // ------------------------------------------------------ |
| 3263 | | // |
| 3264 | | // Multiplies signed integer by signed integer * 2 |
| 3265 | | |
| 3266 | | inline void rsp_device::ccfunc_rsp_vmulf_simd() |
| 3267 | | { |
| 3268 | | int op = m_rsp_state->arg0; |
| 3269 | | |
| 3270 | | INT16 vres[8]; |
| 3271 | | for (int i = 0; i < 8; i++) |
| 3272 | | { |
| 3273 | | UINT16 w1, w2; |
| 3274 | | VEC_GET_SCALAR_VS1(w1, i); |
| 3275 | | VEC_GET_SCALAR_VS2(w2, i); |
| 3276 | | INT32 s1 = (INT32)(INT16)w1; |
| 3277 | | INT32 s2 = (INT32)(INT16)w2; |
| 3278 | | |
| 3279 | | if (s1 == -32768 && s2 == -32768) |
| 3280 | | { |
| 3281 | | // overflow |
| 3282 | | VEC_SET_ACCUM_H(0, i); |
| 3283 | | VEC_SET_ACCUM_M(-32768, i); |
| 3284 | | VEC_SET_ACCUM_L(-32768, i); |
| 3285 | | vres[i] = 0x7fff; |
| 3286 | | } |
| 3287 | | else |
| 3288 | | { |
| 3289 | | INT64 r = s1 * s2 * 2; |
| 3290 | | r += 0x8000; // rounding ? |
| 3291 | | VEC_SET_ACCUM_H((r < 0) ? 0xffff : 0, i); |
| 3292 | | VEC_SET_ACCUM_M((INT16)(r >> 16), i); |
| 3293 | | VEC_SET_ACCUM_L((UINT16)(r), i); |
| 3294 | | vres[i] = VEC_ACCUM_M(i); |
| 3295 | | } |
| 3296 | | } |
| 3297 | | VEC_WRITEBACK_RESULT(); |
| 3298 | | } |
| 3299 | | |
| 3300 | | static void cfunc_rsp_vmulf_simd(void *param) |
| 3301 | | { |
| 3302 | | ((rsp_device *)param)->ccfunc_rsp_vmulf_simd(); |
| 3303 | | } |
| 3304 | | #endif |
| 3305 | | |
| 3306 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 3307 | | |
| 3308 | | inline void rsp_device::ccfunc_rsp_vmulf_scalar() |
| 3309 | | { |
| 3310 | | int op = m_rsp_state->arg0; |
| 3311 | | |
| 3312 | | INT16 vres[8]; |
| 3313 | | for (int i = 0; i < 8; i++) |
| 3314 | | { |
| 3315 | | UINT16 w1, w2; |
| 3316 | | SCALAR_GET_VS1(w1, i); |
| 3317 | | SCALAR_GET_VS2(w2, i); |
| 3318 | | INT32 s1 = (INT32)(INT16)w1; |
| 3319 | | INT32 s2 = (INT32)(INT16)w2; |
| 3320 | | |
| 3321 | | if (s1 == -32768 && s2 == -32768) |
| 3322 | | { |
| 3323 | | // overflow |
| 3324 | | SET_ACCUM_H(0, i); |
| 3325 | | SET_ACCUM_M(-32768, i); |
| 3326 | | SET_ACCUM_L(-32768, i); |
| 3327 | | vres[i] = 0x7fff; |
| 3328 | | } |
| 3329 | | else |
| 3330 | | { |
| 3331 | | INT64 r = s1 * s2 * 2; |
| 3332 | | r += 0x8000; // rounding ? |
| 3333 | | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); |
| 3334 | | SET_ACCUM_M((INT16)(r >> 16), i); |
| 3335 | | SET_ACCUM_L((UINT16)(r), i); |
| 3336 | | vres[i] = ACCUM_M(i); |
| 3337 | | } |
| 3338 | | } |
| 3339 | | WRITEBACK_RESULT(); |
| 3340 | | } |
| 3341 | | |
| 3342 | | static void cfunc_rsp_vmulf_scalar(void *param) |
| 3343 | | { |
| 3344 | | ((rsp_device *)param)->ccfunc_rsp_vmulf_scalar(); |
| 3345 | | } |
| 3346 | | #endif |
| 3347 | | |
| 3348 | | #if USE_SIMD |
| 3349 | | // VMULU |
| 3350 | | // |
| 3351 | | // 31 25 24 20 15 10 5 0 |
| 3352 | | // ------------------------------------------------------ |
| 3353 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000001 | |
| 3354 | | // ------------------------------------------------------ |
| 3355 | | // |
| 3356 | | |
| 3357 | | inline void rsp_device::ccfunc_rsp_vmulu_simd() |
| 3358 | | { |
| 3359 | | int op = m_rsp_state->arg0; |
| 3360 | | |
| 3361 | | INT16 vres[8]; |
| 3362 | | for (int i = 0; i < 8; i++) |
| 3363 | | { |
| 3364 | | UINT16 w1, w2; |
| 3365 | | VEC_GET_SCALAR_VS1(w1, i); |
| 3366 | | VEC_GET_SCALAR_VS2(w2, i); |
| 3367 | | INT32 s1 = (INT32)(INT16)w1; |
| 3368 | | INT32 s2 = (INT32)(INT16)w2; |
| 3369 | | |
| 3370 | | INT64 r = s1 * s2 * 2; |
| 3371 | | r += 0x8000; // rounding ? |
| 3372 | | |
| 3373 | | VEC_SET_ACCUM_H((UINT16)(r >> 32), i); |
| 3374 | | VEC_SET_ACCUM_M((UINT16)(r >> 16), i); |
| 3375 | | VEC_SET_ACCUM_L((UINT16)(r), i); |
| 3376 | | |
| 3377 | | if (r < 0) |
| 3378 | | { |
| 3379 | | vres[i] = 0; |
| 3380 | | } |
| 3381 | | else if (((INT16)(VEC_ACCUM_H(i)) ^ (INT16)(VEC_ACCUM_M(i))) < 0) |
| 3382 | | { |
| 3383 | | vres[i] = -1; |
| 3384 | | } |
| 3385 | | else |
| 3386 | | { |
| 3387 | | vres[i] = VEC_ACCUM_M(i); |
| 3388 | | } |
| 3389 | | } |
| 3390 | | VEC_WRITEBACK_RESULT(); |
| 3391 | | } |
| 3392 | | |
| 3393 | | static void cfunc_rsp_vmulu_simd(void *param) |
| 3394 | | { |
| 3395 | | ((rsp_device *)param)->ccfunc_rsp_vmulu_simd(); |
| 3396 | | } |
| 3397 | | #endif |
| 3398 | | |
| 3399 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 3400 | | |
| 3401 | | inline void rsp_device::ccfunc_rsp_vmulu_scalar() |
| 3402 | | { |
| 3403 | | int op = m_rsp_state->arg0; |
| 3404 | | |
| 3405 | | INT16 vres[8]; |
| 3406 | | for (int i = 0; i < 8; i++) |
| 3407 | | { |
| 3408 | | UINT16 w1, w2; |
| 3409 | | SCALAR_GET_VS1(w1, i); |
| 3410 | | SCALAR_GET_VS2(w2, i); |
| 3411 | | INT32 s1 = (INT32)(INT16)w1; |
| 3412 | | INT32 s2 = (INT32)(INT16)w2; |
| 3413 | | |
| 3414 | | INT64 r = s1 * s2 * 2; |
| 3415 | | r += 0x8000; // rounding ? |
| 3416 | | |
| 3417 | | SET_ACCUM_H((UINT16)(r >> 32), i); |
| 3418 | | SET_ACCUM_M((UINT16)(r >> 16), i); |
| 3419 | | SET_ACCUM_L((UINT16)(r), i); |
| 3420 | | |
| 3421 | | if (r < 0) |
| 3422 | | { |
| 3423 | | vres[i] = 0; |
| 3424 | | } |
| 3425 | | else if (((INT16)(ACCUM_H(i)) ^ (INT16)(ACCUM_M(i))) < 0) |
| 3426 | | { |
| 3427 | | vres[i] = -1; |
| 3428 | | } |
| 3429 | | else |
| 3430 | | { |
| 3431 | | vres[i] = ACCUM_M(i); |
| 3432 | | } |
| 3433 | | } |
| 3434 | | WRITEBACK_RESULT(); |
| 3435 | | } |
| 3436 | | |
| 3437 | | static void cfunc_rsp_vmulu_scalar(void *param) |
| 3438 | | { |
| 3439 | | ((rsp_device *)param)->ccfunc_rsp_vmulu_scalar(); |
| 3440 | | } |
| 3441 | | #endif |
| 3442 | | |
| 3443 | | #if USE_SIMD |
| 3444 | | // VMUDL |
| 3445 | | // |
| 3446 | | // 31 25 24 20 15 10 5 0 |
| 3447 | | // ------------------------------------------------------ |
| 3448 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 | |
| 3449 | | // ------------------------------------------------------ |
| 3450 | | // |
| 3451 | | // Multiplies signed integer by unsigned fraction |
| 3452 | | // The result is added into accumulator |
| 3453 | | // The middle slice of accumulator is stored into destination element |
| 3454 | | |
| 3455 | | inline void rsp_device::ccfunc_rsp_vmudl_simd() |
| 3456 | | { |
| 3457 | | int op = m_rsp_state->arg0; |
| 3458 | | |
| 3459 | | __m128i vsReg = m_xv[VS1REG]; |
| 3460 | | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3461 | | |
| 3462 | | /* Unpack to obtain for 32-bit precision. */ |
| 3463 | | __m128i unpackLo = _mm_mullo_epi16(vsReg, vtReg); |
| 3464 | | __m128i unpackHi = _mm_mulhi_epu16(vsReg, vtReg); |
| 3465 | | __m128i loProduct = _mm_unpacklo_epi16(unpackLo, unpackHi); |
| 3466 | | __m128i hiProduct = _mm_unpackhi_epi16(unpackLo, unpackHi); |
| 3467 | | |
| 3468 | | m_xv[VDREG] = m_accum_l = RSPPackHi32to16(loProduct, hiProduct); |
| 3469 | | |
| 3470 | | m_accum_m = _mm_setzero_si128(); |
| 3471 | | m_accum_h = _mm_setzero_si128(); |
| 3472 | | } |
| 3473 | | |
| 3474 | | static void cfunc_rsp_vmudl_simd(void *param) |
| 3475 | | { |
| 3476 | | ((rsp_device *)param)->ccfunc_rsp_vmudl_simd(); |
| 3477 | | } |
| 3478 | | #endif |
| 3479 | | |
| 3480 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 3481 | | |
| 3482 | | inline void rsp_device::ccfunc_rsp_vmudl_scalar() |
| 3483 | | { |
| 3484 | | int op = m_rsp_state->arg0; |
| 3485 | | |
| 3486 | | INT16 vres[8]; |
| 3487 | | for (int i = 0; i < 8; i++) |
| 3488 | | { |
| 3489 | | UINT16 w1, w2; |
| 3490 | | SCALAR_GET_VS1(w1, i); |
| 3491 | | SCALAR_GET_VS2(w2, i); |
| 3492 | | UINT32 s1 = (UINT32)(UINT16)w1; |
| 3493 | | UINT32 s2 = (UINT32)(UINT16)w2; |
| 3494 | | |
| 3495 | | UINT32 r = s1 * s2; |
| 3496 | | |
| 3497 | | SET_ACCUM_H(0, i); |
| 3498 | | SET_ACCUM_M(0, i); |
| 3499 | | SET_ACCUM_L((UINT16)(r >> 16), i); |
| 3500 | | |
| 3501 | | vres[i] = ACCUM_L(i); |
| 3502 | | } |
| 3503 | | WRITEBACK_RESULT(); |
| 3504 | | } |
| 3505 | | |
| 3506 | | static void cfunc_rsp_vmudl_scalar(void *param) |
| 3507 | | { |
| 3508 | | ((rsp_device *)param)->ccfunc_rsp_vmudl_scalar(); |
| 3509 | | } |
| 3510 | | #endif |
| 3511 | | |
| 3512 | | #if USE_SIMD |
| 3513 | | // VMUDM |
| 3514 | | // |
| 3515 | | // 31 25 24 20 15 10 5 0 |
| 3516 | | // ------------------------------------------------------ |
| 3517 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000101 | |
| 3518 | | // ------------------------------------------------------ |
| 3519 | | // |
| 3520 | | // Multiplies signed integer by unsigned fraction |
| 3521 | | // The result is stored into accumulator |
| 3522 | | // The middle slice of accumulator is stored into destination element |
| 3523 | | |
| 3524 | | inline void rsp_device::ccfunc_rsp_vmudm_simd() |
| 3525 | | { |
| 3526 | | int op = m_rsp_state->arg0; |
| 3527 | | |
| 3528 | | __m128i vsRegLo, vsRegHi, vtRegLo, vtRegHi; |
| 3529 | | |
| 3530 | | __m128i vsReg = m_xv[VS1REG]; |
| 3531 | | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3532 | | |
| 3533 | | /* Unpack to obtain for 32-bit precision. */ |
| 3534 | | RSPSignExtend16to32(vsReg, &vsRegLo, &vsRegHi); |
| 3535 | | RSPZeroExtend16to32(vtReg, &vtRegLo, &vtRegHi); |
| 3536 | | |
| 3537 | | /* Begin accumulating the products. */ |
| 3538 | | __m128i loProduct = _mm_mullo_epi32(vsRegLo, vtRegLo); |
| 3539 | | __m128i hiProduct = _mm_mullo_epi32(vsRegHi, vtRegHi); |
| 3540 | | m_accum_l = RSPPackLo32to16(loProduct, hiProduct); |
| 3541 | | m_accum_m = m_xv[VDREG] = RSPPackHi32to16(loProduct, hiProduct); |
| 3542 | | |
| 3543 | | loProduct = _mm_cmplt_epi32(loProduct, _mm_setzero_si128()); |
| 3544 | | hiProduct = _mm_cmplt_epi32(hiProduct, _mm_setzero_si128()); |
| 3545 | | m_accum_h = _mm_packs_epi32(loProduct, hiProduct); |
| 3546 | | } |
| 3547 | | |
| 3548 | | static void cfunc_rsp_vmudm_simd(void *param) |
| 3549 | | { |
| 3550 | | ((rsp_device *)param)->ccfunc_rsp_vmudm_simd(); |
| 3551 | | } |
| 3552 | | #endif |
| 3553 | | |
| 3554 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 3555 | | |
| 3556 | | inline void rsp_device::ccfunc_rsp_vmudm_scalar() |
| 3557 | | { |
| 3558 | | int op = m_rsp_state->arg0; |
| 3559 | | |
| 3560 | | INT16 vres[8]; |
| 3561 | | for (int i = 0; i < 8; i++) |
| 3562 | | { |
| 3563 | | UINT16 w1, w2; |
| 3564 | | SCALAR_GET_VS1(w1, i); |
| 3565 | | SCALAR_GET_VS2(w2, i); |
| 3566 | | INT32 s1 = (INT32)(INT16)w1; |
| 3567 | | INT32 s2 = (UINT16)w2; |
| 3568 | | |
| 3569 | | INT32 r = s1 * s2; |
| 3570 | | |
| 3571 | | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); // sign-extend to 48-bit |
| 3572 | | SET_ACCUM_M((INT16)(r >> 16), i); |
| 3573 | | SET_ACCUM_L((UINT16)r, i); |
| 3574 | | |
| 3575 | | vres[i] = ACCUM_M(i); |
| 3576 | | } |
| 3577 | | WRITEBACK_RESULT(); |
| 3578 | | } |
| 3579 | | |
| 3580 | | static void cfunc_rsp_vmudm_scalar(void *param) |
| 3581 | | { |
| 3582 | | ((rsp_device *)param)->ccfunc_rsp_vmudm_scalar(); |
| 3583 | | } |
| 3584 | | #endif |
| 3585 | | |
| 3586 | | #if USE_SIMD |
| 3587 | | // VMUDN |
| 3588 | | // |
| 3589 | | // 31 25 24 20 15 10 5 0 |
| 3590 | | // ------------------------------------------------------ |
| 3591 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000110 | |
| 3592 | | // ------------------------------------------------------ |
| 3593 | | // |
| 3594 | | // Multiplies unsigned fraction by signed integer |
| 3595 | | // The result is stored into accumulator |
| 3596 | | // The low slice of accumulator is stored into destination element |
| 3597 | | |
| 3598 | | inline void rsp_device::ccfunc_rsp_vmudn_simd() |
| 3599 | | { |
| 3600 | | int op = m_rsp_state->arg0; |
| 3601 | | |
| 3602 | | __m128i vsRegLo, vsRegHi, vtRegLo, vtRegHi; |
| 3603 | | |
| 3604 | | __m128i vsReg = m_xv[VS1REG]; |
| 3605 | | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3606 | | |
| 3607 | | /* Unpack to obtain for 32-bit precision. */ |
| 3608 | | RSPZeroExtend16to32(vsReg, &vsRegLo, &vsRegHi); |
| 3609 | | RSPSignExtend16to32(vtReg, &vtRegLo, &vtRegHi); |
| 3610 | | |
| 3611 | | /* Begin accumulating the products. */ |
| 3612 | | __m128i loProduct = _mm_mullo_epi32(vsRegLo, vtRegLo); |
| 3613 | | __m128i hiProduct = _mm_mullo_epi32(vsRegHi, vtRegHi); |
| 3614 | | m_xv[VDREG] = m_accum_l = RSPPackLo32to16(loProduct, hiProduct); |
| 3615 | | m_accum_m = RSPPackHi32to16(loProduct, hiProduct); |
| 3616 | | m_accum_h = _mm_cmplt_epi16(m_accum_m, _mm_setzero_si128()); |
| 3617 | | } |
| 3618 | | |
| 3619 | | static void cfunc_rsp_vmudn_simd(void *param) |
| 3620 | | { |
| 3621 | | ((rsp_device *)param)->ccfunc_rsp_vmudn_simd(); |
| 3622 | | } |
| 3623 | | #endif |
| 3624 | | |
| 3625 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 3626 | | |
| 3627 | | inline void rsp_device::ccfunc_rsp_vmudn_scalar() |
| 3628 | | { |
| 3629 | | int op = m_rsp_state->arg0; |
| 3630 | | |
| 3631 | | INT16 vres[8] = { 0 }; |
| 3632 | | for (int i = 0; i < 8; i++) |
| 3633 | | { |
| 3634 | | UINT16 w1, w2; |
| 3635 | | SCALAR_GET_VS1(w1, i); |
| 3636 | | SCALAR_GET_VS2(w2, i); |
| 3637 | | INT32 s1 = (UINT16)w1; |
| 3638 | | INT32 s2 = (INT32)(INT16)w2; |
| 3639 | | |
| 3640 | | INT32 r = s1 * s2; |
| 3641 | | |
| 3642 | | SET_ACCUM_H((r < 0) ? 0xffff : 0, i); // sign-extend to 48-bit |
| 3643 | | SET_ACCUM_M((INT16)(r >> 16), i); |
| 3644 | | SET_ACCUM_L((UINT16)(r), i); |
| 3645 | | |
| 3646 | | vres[i] = (UINT16)(r); |
| 3647 | | } |
| 3648 | | WRITEBACK_RESULT(); |
| 3649 | | } |
| 3650 | | |
| 3651 | | static void cfunc_rsp_vmudn_scalar(void *param) |
| 3652 | | { |
| 3653 | | ((rsp_device *)param)->ccfunc_rsp_vmudn_scalar(); |
| 3654 | | } |
| 3655 | | #endif |
| 3656 | | |
| 3657 | | #if USE_SIMD |
| 3658 | | // VMUDH |
| 3659 | | // |
| 3660 | | // 31 25 24 20 15 10 5 0 |
| 3661 | | // ------------------------------------------------------ |
| 3662 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000111 | |
| 3663 | | // ------------------------------------------------------ |
| 3664 | | // |
| 3665 | | // Multiplies signed integer by signed integer |
| 3666 | | // The result is stored into highest 32 bits of accumulator, the low slice is zero |
| 3667 | | // The highest 32 bits of accumulator is saturated into destination element |
| 3668 | | |
| 3669 | | inline void rsp_device::ccfunc_rsp_vmudh_simd() |
| 3670 | | { |
| 3671 | | int op = m_rsp_state->arg0; |
| 3672 | | |
| 3673 | | __m128i vaccLow, vaccHigh; |
| 3674 | | __m128i unpackLo, unpackHi; |
| 3675 | | |
| 3676 | | __m128i vsReg = m_xv[VS1REG]; |
| 3677 | | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3678 | | |
| 3679 | | /* Multiply the sources, accumulate the product. */ |
| 3680 | | unpackLo = _mm_mullo_epi16(vsReg, vtReg); |
| 3681 | | unpackHi = _mm_mulhi_epi16(vsReg, vtReg); |
| 3682 | | vaccHigh = _mm_unpackhi_epi16(unpackLo, unpackHi); |
| 3683 | | vaccLow = _mm_unpacklo_epi16(unpackLo, unpackHi); |
| 3684 | | |
| 3685 | | /* Pack the accumulator and result back up. */ |
| 3686 | | m_xv[VDREG] = _mm_packs_epi32(vaccLow, vaccHigh); |
| 3687 | | m_accum_l = _mm_setzero_si128(); |
| 3688 | | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 3689 | | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 3690 | | } |
| 3691 | | |
| 3692 | | static void cfunc_rsp_vmudh_simd(void *param) |
| 3693 | | { |
| 3694 | | ((rsp_device *)param)->ccfunc_rsp_vmudh_simd(); |
| 3695 | | } |
| 3696 | | #endif |
| 3697 | | |
| 3698 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 3699 | | |
| 3700 | | inline void rsp_device::ccfunc_rsp_vmudh_scalar() |
| 3701 | | { |
| 3702 | | int op = m_rsp_state->arg0; |
| 3703 | | |
| 3704 | | INT16 vres[8]; |
| 3705 | | for (int i = 0; i < 8; i++) |
| 3706 | | { |
| 3707 | | UINT16 w1, w2; |
| 3708 | | SCALAR_GET_VS1(w1, i); |
| 3709 | | SCALAR_GET_VS2(w2, i); |
| 3710 | | INT32 s1 = (INT32)(INT16)w1; |
| 3711 | | INT32 s2 = (INT32)(INT16)w2; |
| 3712 | | |
| 3713 | | INT32 r = s1 * s2; |
| 3714 | | |
| 3715 | | SET_ACCUM_H((INT16)(r >> 16), i); |
| 3716 | | SET_ACCUM_M((UINT16)(r), i); |
| 3717 | | SET_ACCUM_L(0, i); |
| 3718 | | |
| 3719 | | if (r < -32768) r = -32768; |
| 3720 | | if (r > 32767) r = 32767; |
| 3721 | | vres[i] = (INT16)(r); |
| 3722 | | } |
| 3723 | | WRITEBACK_RESULT(); |
| 3724 | | } |
| 3725 | | |
| 3726 | | static void cfunc_rsp_vmudh_scalar(void *param) |
| 3727 | | { |
| 3728 | | ((rsp_device *)param)->ccfunc_rsp_vmudh_scalar(); |
| 3729 | | } |
| 3730 | | #endif |
| 3731 | | |
| 3732 | | #if USE_SIMD |
| 3733 | | // VMACF |
| 3734 | | // |
| 3735 | | // 31 25 24 20 15 10 5 0 |
| 3736 | | // ------------------------------------------------------ |
| 3737 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001000 | |
| 3738 | | // ------------------------------------------------------ |
| 3739 | | // |
| 3740 | | |
| 3741 | | inline void rsp_device::ccfunc_rsp_vmacf_simd() |
| 3742 | | { |
| 3743 | | int op = m_rsp_state->arg0; |
| 3744 | | |
| 3745 | | INT16 vres[8]; |
| 3746 | | for (int i = 0; i < 8; i++) |
| 3747 | | { |
| 3748 | | UINT16 w1, w2; |
| 3749 | | VEC_GET_SCALAR_VS1(w1, i); |
| 3750 | | VEC_GET_SCALAR_VS2(w2, i); |
| 3751 | | INT32 s1 = (INT32)(INT16)w1; |
| 3752 | | INT32 s2 = (INT32)(INT16)w2; |
| 3753 | | |
| 3754 | | INT32 r = s1 * s2; |
| 3755 | | |
| 3756 | | UINT64 q = (UINT64)(UINT16)VEC_ACCUM_LL(i); |
| 3757 | | q |= (((UINT64)(UINT16)VEC_ACCUM_L(i)) << 16); |
| 3758 | | q |= (((UINT64)(UINT16)VEC_ACCUM_M(i)) << 32); |
| 3759 | | q |= (((UINT64)(UINT16)VEC_ACCUM_H(i)) << 48); |
| 3760 | | |
| 3761 | | q += (INT64)(r) << 17; |
| 3762 | | VEC_SET_ACCUM_LL((UINT16)q, i); |
| 3763 | | VEC_SET_ACCUM_L((UINT16)(q >> 16), i); |
| 3764 | | VEC_SET_ACCUM_M((UINT16)(q >> 32), i); |
| 3765 | | VEC_SET_ACCUM_H((UINT16)(q >> 48), i); |
| 3766 | | |
| 3767 | | vres[i] = VEC_SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 3768 | | } |
| 3769 | | VEC_WRITEBACK_RESULT(); |
| 3770 | | /* |
| 3771 | | __m128i loProduct, hiProduct, unpackLo, unpackHi; |
| 3772 | | __m128i vaccHigh; |
| 3773 | | __m128i vdReg, vdRegLo, vdRegHi; |
| 3774 | | |
| 3775 | | __m128i vsReg = m_xv[VS1REG]; |
| 3776 | | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3777 | | |
| 3778 | | __m128i vaccLow = m_accum_l; |
| 3779 | | |
| 3780 | | // Unpack to obtain for 32-bit precision. |
| 3781 | | RSPZeroExtend16to32(vaccLow, &vaccLow, &vaccHigh); |
| 3782 | | |
| 3783 | | // Begin accumulating the products. |
| 3784 | | unpackLo = _mm_mullo_epi16(vsReg, vtReg); |
| 3785 | | unpackHi = _mm_mulhi_epi16(vsReg, vtReg); |
| 3786 | | loProduct = _mm_unpacklo_epi16(unpackLo, unpackHi); |
| 3787 | | hiProduct = _mm_unpackhi_epi16(unpackLo, unpackHi); |
| 3788 | | loProduct = _mm_slli_epi32(loProduct, 1); |
| 3789 | | hiProduct = _mm_slli_epi32(hiProduct, 1); |
| 3790 | | |
| 3791 | | vdRegLo = _mm_srli_epi32(loProduct, 16); |
| 3792 | | vdRegHi = _mm_srli_epi32(hiProduct, 16); |
| 3793 | | vdRegLo = _mm_slli_epi32(vdRegLo, 16); |
| 3794 | | vdRegHi = _mm_slli_epi32(vdRegHi, 16); |
| 3795 | | vdRegLo = _mm_xor_si128(vdRegLo, loProduct); |
| 3796 | | vdRegHi = _mm_xor_si128(vdRegHi, hiProduct); |
| 3797 | | |
| 3798 | | vaccLow = _mm_add_epi32(vaccLow, vdRegLo); |
| 3799 | | vaccHigh = _mm_add_epi32(vaccHigh, vdRegHi); |
| 3800 | | |
| 3801 | | m_accum_l = vdReg = RSPPackLo32to16(vaccLow, vaccHigh); |
| 3802 | | |
| 3803 | | // Multiply the MSB of sources, accumulate the product. |
| 3804 | | vdRegLo = _mm_unpacklo_epi16(m_accum_m, m_accum_h); |
| 3805 | | vdRegHi = _mm_unpackhi_epi16(m_accum_m, m_accum_h); |
| 3806 | | |
| 3807 | | loProduct = _mm_srai_epi32(loProduct, 16); |
| 3808 | | hiProduct = _mm_srai_epi32(hiProduct, 16); |
| 3809 | | vaccLow = _mm_srai_epi32(vaccLow, 16); |
| 3810 | | vaccHigh = _mm_srai_epi32(vaccHigh, 16); |
| 3811 | | |
| 3812 | | vaccLow = _mm_add_epi32(loProduct, vaccLow); |
| 3813 | | vaccHigh = _mm_add_epi32(hiProduct, vaccHigh); |
| 3814 | | vaccLow = _mm_add_epi32(vdRegLo, vaccLow); |
| 3815 | | vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh); |
| 3816 | | |
| 3817 | | // Clamp the accumulator and write it all out. |
| 3818 | | m_xv[VDREG] = _mm_packs_epi32(vaccLow, vaccHigh); |
| 3819 | | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 3820 | | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 3821 | | */ |
| 3822 | | } |
| 3823 | | |
| 3824 | | static void cfunc_rsp_vmacf_simd(void *param) |
| 3825 | | { |
| 3826 | | ((rsp_device *)param)->ccfunc_rsp_vmacf_simd(); |
| 3827 | | } |
| 3828 | | #endif |
| 3829 | | |
| 3830 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 3831 | | |
| 3832 | | inline void rsp_device::ccfunc_rsp_vmacf_scalar() |
| 3833 | | { |
| 3834 | | int op = m_rsp_state->arg0; |
| 3835 | | |
| 3836 | | INT16 vres[8]; |
| 3837 | | for (int i = 0; i < 8; i++) |
| 3838 | | { |
| 3839 | | UINT16 w1, w2; |
| 3840 | | SCALAR_GET_VS1(w1, i); |
| 3841 | | SCALAR_GET_VS2(w2, i); |
| 3842 | | INT32 s1 = (INT32)(INT16)w1; |
| 3843 | | INT32 s2 = (INT32)(INT16)w2; |
| 3844 | | |
| 3845 | | INT32 r = s1 * s2; |
| 3846 | | |
| 3847 | | UINT64 q = (UINT64)(UINT16)ACCUM_LL(i); |
| 3848 | | q |= (((UINT64)(UINT16)ACCUM_L(i)) << 16); |
| 3849 | | q |= (((UINT64)(UINT16)ACCUM_M(i)) << 32); |
| 3850 | | q |= (((UINT64)(UINT16)ACCUM_H(i)) << 48); |
| 3851 | | |
| 3852 | | q += (INT64)(r) << 17; |
| 3853 | | SET_ACCUM_LL((UINT16)q, i); |
| 3854 | | SET_ACCUM_L((UINT16)(q >> 16), i); |
| 3855 | | SET_ACCUM_M((UINT16)(q >> 32), i); |
| 3856 | | SET_ACCUM_H((UINT16)(q >> 48), i); |
| 3857 | | |
| 3858 | | vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 3859 | | } |
| 3860 | | WRITEBACK_RESULT(); |
| 3861 | | } |
| 3862 | | |
| 3863 | | static void cfunc_rsp_vmacf_scalar(void *param) |
| 3864 | | { |
| 3865 | | ((rsp_device *)param)->ccfunc_rsp_vmacf_scalar(); |
| 3866 | | } |
| 3867 | | #endif |
| 3868 | | |
| 3869 | | #if USE_SIMD |
| 3870 | | // VMACU |
| 3871 | | // |
| 3872 | | // 31 25 24 20 15 10 5 0 |
| 3873 | | // ------------------------------------------------------ |
| 3874 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001001 | |
| 3875 | | // ------------------------------------------------------ |
| 3876 | | // |
| 3877 | | |
| 3878 | | inline void rsp_device::ccfunc_rsp_vmacu_simd() |
| 3879 | | { |
| 3880 | | int op = m_rsp_state->arg0; |
| 3881 | | |
| 3882 | | __m128i loProduct, hiProduct, unpackLo, unpackHi; |
| 3883 | | __m128i vaccHigh; |
| 3884 | | __m128i vdReg, vdRegLo, vdRegHi; |
| 3885 | | |
| 3886 | | __m128i vsReg = m_xv[VS1REG]; |
| 3887 | | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 3888 | | |
| 3889 | | __m128i vaccLow = m_accum_l; |
| 3890 | | |
| 3891 | | /* Unpack to obtain for 32-bit precision. */ |
| 3892 | | RSPZeroExtend16to32(vaccLow, &vaccLow, &vaccHigh); |
| 3893 | | |
| 3894 | | /* Begin accumulating the products. */ |
| 3895 | | unpackLo = _mm_mullo_epi16(vsReg, vtReg); |
| 3896 | | unpackHi = _mm_mulhi_epi16(vsReg, vtReg); |
| 3897 | | loProduct = _mm_unpacklo_epi16(unpackLo, unpackHi); |
| 3898 | | hiProduct = _mm_unpackhi_epi16(unpackLo, unpackHi); |
| 3899 | | loProduct = _mm_slli_epi32(loProduct, 1); |
| 3900 | | hiProduct = _mm_slli_epi32(hiProduct, 1); |
| 3901 | | |
| 3902 | | vdRegLo = _mm_srli_epi32(loProduct, 16); |
| 3903 | | vdRegHi = _mm_srli_epi32(hiProduct, 16); |
| 3904 | | vdRegLo = _mm_slli_epi32(vdRegLo, 16); |
| 3905 | | vdRegHi = _mm_slli_epi32(vdRegHi, 16); |
| 3906 | | vdRegLo = _mm_xor_si128(vdRegLo, loProduct); |
| 3907 | | vdRegHi = _mm_xor_si128(vdRegHi, hiProduct); |
| 3908 | | |
| 3909 | | vaccLow = _mm_add_epi32(vaccLow, vdRegLo); |
| 3910 | | vaccHigh = _mm_add_epi32(vaccHigh, vdRegHi); |
| 3911 | | |
| 3912 | | m_accum_l = vdReg = RSPPackLo32to16(vaccLow, vaccHigh); |
| 3913 | | |
| 3914 | | /* Multiply the MSB of sources, accumulate the product. */ |
| 3915 | | vdRegLo = _mm_unpacklo_epi16(m_accum_m, m_accum_h); |
| 3916 | | vdRegHi = _mm_unpackhi_epi16(m_accum_m, m_accum_h); |
| 3917 | | |
| 3918 | | loProduct = _mm_srai_epi32(loProduct, 16); |
| 3919 | | hiProduct = _mm_srai_epi32(hiProduct, 16); |
| 3920 | | vaccLow = _mm_srai_epi32(vaccLow, 16); |
| 3921 | | vaccHigh = _mm_srai_epi32(vaccHigh, 16); |
| 3922 | | |
| 3923 | | vaccLow = _mm_add_epi32(loProduct, vaccLow); |
| 3924 | | vaccHigh = _mm_add_epi32(hiProduct, vaccHigh); |
| 3925 | | vaccLow = _mm_add_epi32(vdRegLo, vaccLow); |
| 3926 | | vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh); |
| 3927 | | |
| 3928 | | /* Clamp the accumulator and write it all out. */ |
| 3929 | | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 3930 | | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 3931 | | } |
| 3932 | | |
| 3933 | | static void cfunc_rsp_vmacu_simd(void *param) |
| 3934 | | { |
| 3935 | | ((rsp_device *)param)->ccfunc_rsp_vmacu_simd(); |
| 3936 | | } |
| 3937 | | #endif |
| 3938 | | |
| 3939 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 3940 | | |
| 3941 | | inline void rsp_device::ccfunc_rsp_vmacu_scalar() |
| 3942 | | { |
| 3943 | | int op = m_rsp_state->arg0; |
| 3944 | | |
| 3945 | | INT16 vres[8]; |
| 3946 | | for (int i = 0; i < 8; i++) |
| 3947 | | { |
| 3948 | | UINT16 w1, w2; |
| 3949 | | SCALAR_GET_VS1(w1, i); |
| 3950 | | SCALAR_GET_VS2(w2, i); |
| 3951 | | INT32 s1 = (INT32)(INT16)w1; |
| 3952 | | INT32 s2 = (INT32)(INT16)w2; |
| 3953 | | |
| 3954 | | INT32 r1 = s1 * s2; |
| 3955 | | UINT32 r2 = (UINT16)ACCUM_L(i) + ((UINT16)(r1) * 2); |
| 3956 | | UINT32 r3 = (UINT16)ACCUM_M(i) + (UINT16)((r1 >> 16) * 2) + (UINT16)(r2 >> 16); |
| 3957 | | |
| 3958 | | SET_ACCUM_L((UINT16)(r2), i); |
| 3959 | | SET_ACCUM_M((UINT16)(r3), i); |
| 3960 | | SET_ACCUM_H(ACCUM_H(i) + (UINT16)(r3 >> 16) + (UINT16)(r1 >> 31), i); |
| 3961 | | |
| 3962 | | if ((INT16)ACCUM_H(i) < 0) |
| 3963 | | { |
| 3964 | | vres[i] = 0; |
| 3965 | | } |
| 3966 | | else |
| 3967 | | { |
| 3968 | | if (ACCUM_H(i) != 0) |
| 3969 | | { |
| 3970 | | vres[i] = (INT16)0xffff; |
| 3971 | | } |
| 3972 | | else |
| 3973 | | { |
| 3974 | | if ((INT16)ACCUM_M(i) < 0) |
| 3975 | | { |
| 3976 | | vres[i] = (INT16)0xffff; |
| 3977 | | } |
| 3978 | | else |
| 3979 | | { |
| 3980 | | vres[i] = ACCUM_M(i); |
| 3981 | | } |
| 3982 | | } |
| 3983 | | } |
| 3984 | | } |
| 3985 | | WRITEBACK_RESULT(); |
| 3986 | | } |
| 3987 | | |
| 3988 | | static void cfunc_rsp_vmacu_scalar(void *param) |
| 3989 | | { |
| 3990 | | ((rsp_device *)param)->ccfunc_rsp_vmacu_scalar(); |
| 3991 | | } |
| 3992 | | #endif |
| 3993 | | |
| 3994 | | #if USE_SIMD |
| 3995 | | // VMADL |
| 3996 | | // |
| 3997 | | // 31 25 24 20 15 10 5 0 |
| 3998 | | // ------------------------------------------------------ |
| 3999 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001100 | |
| 4000 | | // ------------------------------------------------------ |
| 4001 | | // |
| 4002 | | // Multiplies unsigned fraction by unsigned fraction |
| 4003 | | // Adds the higher 16 bits of the 32-bit result to accumulator |
| 4004 | | // The low slice of accumulator is stored into destination element |
| 4005 | | |
| 4006 | | inline void rsp_device::ccfunc_rsp_vmadl_simd() |
| 4007 | | { |
| 4008 | | int op = m_rsp_state->arg0; |
| 4009 | | |
| 4010 | | INT16 vres[8]; |
| 4011 | | for (int i = 0; i < 8; i++) |
| 4012 | | { |
| 4013 | | UINT16 w1, w2; |
| 4014 | | VEC_GET_SCALAR_VS1(w1, i); |
| 4015 | | VEC_GET_SCALAR_VS2(w2, i); |
| 4016 | | UINT32 s1 = w1; |
| 4017 | | UINT32 s2 = w2; |
| 4018 | | |
| 4019 | | UINT32 r1 = s1 * s2; |
| 4020 | | UINT32 r2 = (UINT16)VEC_ACCUM_L(i) + (r1 >> 16); |
| 4021 | | UINT32 r3 = (UINT16)VEC_ACCUM_M(i) + (r2 >> 16); |
| 4022 | | |
| 4023 | | VEC_SET_ACCUM_L((UINT16)r2, i); |
| 4024 | | VEC_SET_ACCUM_M((UINT16)r3, i); |
| 4025 | | VEC_SET_ACCUM_H(VEC_ACCUM_H(i) + (INT16)(r3 >> 16), i); |
| 4026 | | |
| 4027 | | vres[i] = VEC_SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 4028 | | } |
| 4029 | | VEC_WRITEBACK_RESULT(); |
| 4030 | | |
| 4031 | | /*__m128i vaccHigh; |
| 4032 | | __m128i unpackHi, loProduct, hiProduct; |
| 4033 | | __m128i vdReg, vdRegLo, vdRegHi; |
| 4034 | | |
| 4035 | | __m128i vsReg = m_xv[VS1REG]; |
| 4036 | | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4037 | | |
| 4038 | | __m128i vaccLow = m_accum_l; |
| 4039 | | |
| 4040 | | // Unpack to obtain for 32-bit precision. |
| 4041 | | RSPZeroExtend16to32(vaccLow, &vaccLow, &vaccHigh); |
| 4042 | | |
| 4043 | | // Begin accumulating the products. |
| 4044 | | unpackHi = _mm_mulhi_epu16(vsReg, vtReg); |
| 4045 | | loProduct = _mm_unpacklo_epi16(unpackHi, _mm_setzero_si128()); |
| 4046 | | hiProduct = _mm_unpackhi_epi16(unpackHi, _mm_setzero_si128()); |
| 4047 | | |
| 4048 | | vaccLow = _mm_add_epi32(vaccLow, loProduct); |
| 4049 | | vaccHigh = _mm_add_epi32(vaccHigh, hiProduct); |
| 4050 | | m_accum_l = vdReg = RSPPackLo32to16(vaccLow, vaccHigh); |
| 4051 | | |
| 4052 | | // Finish accumulating whatever is left. |
| 4053 | | vdRegLo = _mm_unpacklo_epi16(m_accum_m, m_accum_h); |
| 4054 | | vdRegHi = _mm_unpackhi_epi16(m_accum_m, m_accum_h); |
| 4055 | | |
| 4056 | | vaccLow = _mm_srai_epi32(vaccLow, 16); |
| 4057 | | vaccHigh = _mm_srai_epi32(vaccHigh, 16); |
| 4058 | | vaccLow = _mm_add_epi32(vdRegLo, vaccLow); |
| 4059 | | vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh); |
| 4060 | | |
| 4061 | | // Clamp the accumulator and write it all out. |
| 4062 | | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 4063 | | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 4064 | | m_xv[VDREG] = RSPClampLowToVal(vdReg, m_accum_m, m_accum_h);*/ |
| 4065 | | } |
| 4066 | | |
| 4067 | | static void cfunc_rsp_vmadl_simd(void *param) |
| 4068 | | { |
| 4069 | | ((rsp_device *)param)->ccfunc_rsp_vmadl_simd(); |
| 4070 | | } |
| 4071 | | #endif |
| 4072 | | |
| 4073 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 4074 | | |
| 4075 | | inline void rsp_device::ccfunc_rsp_vmadl_scalar() |
| 4076 | | { |
| 4077 | | int op = m_rsp_state->arg0; |
| 4078 | | |
| 4079 | | INT16 vres[8]; |
| 4080 | | for (int i = 0; i < 8; i++) |
| 4081 | | { |
| 4082 | | UINT16 w1, w2; |
| 4083 | | SCALAR_GET_VS1(w1, i); |
| 4084 | | SCALAR_GET_VS2(w2, i); |
| 4085 | | UINT32 s1 = w1; |
| 4086 | | UINT32 s2 = w2; |
| 4087 | | |
| 4088 | | UINT32 r1 = s1 * s2; |
| 4089 | | UINT32 r2 = (UINT16)ACCUM_L(i) + (r1 >> 16); |
| 4090 | | UINT32 r3 = (UINT16)ACCUM_M(i) + (r2 >> 16); |
| 4091 | | |
| 4092 | | SET_ACCUM_L((UINT16)r2, i); |
| 4093 | | SET_ACCUM_M((UINT16)r3, i); |
| 4094 | | SET_ACCUM_H(ACCUM_H(i) + (INT16)(r3 >> 16), i); |
| 4095 | | |
| 4096 | | vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 4097 | | } |
| 4098 | | WRITEBACK_RESULT(); |
| 4099 | | } |
| 4100 | | |
| 4101 | | static void cfunc_rsp_vmadl_scalar(void *param) |
| 4102 | | { |
| 4103 | | ((rsp_device *)param)->ccfunc_rsp_vmadl_scalar(); |
| 4104 | | } |
| 4105 | | #endif |
| 4106 | | |
| 4107 | | #if USE_SIMD |
| 4108 | | // VMADM |
| 4109 | | // |
| 4110 | | |
| 4111 | | inline void rsp_device::ccfunc_rsp_vmadm_simd() |
| 4112 | | { |
| 4113 | | int op = m_rsp_state->arg0; |
| 4114 | | |
| 4115 | | __m128i vaccLow, vaccHigh, loProduct, hiProduct; |
| 4116 | | __m128i vsRegLo, vsRegHi, vtRegLo, vtRegHi, vdRegLo, vdRegHi; |
| 4117 | | |
| 4118 | | __m128i vsReg = m_xv[VS1REG]; |
| 4119 | | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4120 | | |
| 4121 | | /* Unpack to obtain for 32-bit precision. */ |
| 4122 | | RSPSignExtend16to32(vsReg, &vsRegLo, &vsRegHi); |
| 4123 | | RSPZeroExtend16to32(vtReg, &vtRegLo, &vtRegHi); |
| 4124 | | RSPZeroExtend16to32(m_accum_l, &vaccLow, &vaccHigh); |
| 4125 | | |
| 4126 | | /* Begin accumulating the products. */ |
| 4127 | | loProduct = _mm_mullo_epi32(vsRegLo, vtRegLo); |
| 4128 | | hiProduct = _mm_mullo_epi32(vsRegHi, vtRegHi); |
| 4129 | | |
| 4130 | | vdRegLo = _mm_srli_epi32(loProduct, 16); |
| 4131 | | vdRegHi = _mm_srli_epi32(hiProduct, 16); |
| 4132 | | vdRegLo = _mm_slli_epi32(vdRegLo, 16); |
| 4133 | | vdRegHi = _mm_slli_epi32(vdRegHi, 16); |
| 4134 | | vdRegLo = _mm_xor_si128(vdRegLo, loProduct); |
| 4135 | | vdRegHi = _mm_xor_si128(vdRegHi, hiProduct); |
| 4136 | | vaccLow = _mm_add_epi32(vaccLow, vdRegLo); |
| 4137 | | vaccHigh = _mm_add_epi32(vaccHigh, vdRegHi); |
| 4138 | | |
| 4139 | | m_accum_l = m_xv[VDREG] = RSPPackLo32to16(vaccLow, vaccHigh); |
| 4140 | | |
| 4141 | | /* Multiply the MSB of sources, accumulate the product. */ |
| 4142 | | vdRegLo = _mm_unpacklo_epi16(m_accum_m, m_accum_h); |
| 4143 | | vdRegHi = _mm_unpackhi_epi16(m_accum_m, m_accum_h); |
| 4144 | | |
| 4145 | | loProduct = _mm_srai_epi32(loProduct, 16); |
| 4146 | | hiProduct = _mm_srai_epi32(hiProduct, 16); |
| 4147 | | vaccLow = _mm_srai_epi32(vaccLow, 16); |
| 4148 | | vaccHigh = _mm_srai_epi32(vaccHigh, 16); |
| 4149 | | |
| 4150 | | vaccLow = _mm_add_epi32(loProduct, vaccLow); |
| 4151 | | vaccHigh = _mm_add_epi32(hiProduct, vaccHigh); |
| 4152 | | vaccLow = _mm_add_epi32(vdRegLo, vaccLow); |
| 4153 | | vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh); |
| 4154 | | |
| 4155 | | /* Clamp the accumulator and write it all out. */ |
| 4156 | | m_xv[VDREG] = _mm_packs_epi32(vaccLow, vaccHigh); |
| 4157 | | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 4158 | | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 4159 | | } |
| 4160 | | |
| 4161 | | static void cfunc_rsp_vmadm_simd(void *param) |
| 4162 | | { |
| 4163 | | ((rsp_device *)param)->ccfunc_rsp_vmadm_simd(); |
| 4164 | | } |
| 4165 | | #endif |
| 4166 | | |
| 4167 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 4168 | | |
| 4169 | | inline void rsp_device::ccfunc_rsp_vmadm_scalar() |
| 4170 | | { |
| 4171 | | int op = m_rsp_state->arg0; |
| 4172 | | |
| 4173 | | INT16 vres[8]; |
| 4174 | | for (int i = 0; i < 8; i++) |
| 4175 | | { |
| 4176 | | UINT16 w1, w2; |
| 4177 | | SCALAR_GET_VS1(w1, i); |
| 4178 | | SCALAR_GET_VS2(w2, i); |
| 4179 | | UINT32 s1 = (INT32)(INT16)w1; |
| 4180 | | UINT32 s2 = (UINT16)w2; |
| 4181 | | |
| 4182 | | UINT32 r1 = s1 * s2; |
| 4183 | | UINT32 r2 = (UINT16)ACCUM_L(i) + (UINT16)(r1); |
| 4184 | | UINT32 r3 = (UINT16)ACCUM_M(i) + (r1 >> 16) + (r2 >> 16); |
| 4185 | | |
| 4186 | | SET_ACCUM_L((UINT16)r2, i); |
| 4187 | | SET_ACCUM_M((UINT16)r3, i); |
| 4188 | | SET_ACCUM_H((UINT16)ACCUM_H(i) + (UINT16)(r3 >> 16), i); |
| 4189 | | if ((INT32)(r1) < 0) |
| 4190 | | { |
| 4191 | | SET_ACCUM_H((UINT16)ACCUM_H(i) - 1, i); |
| 4192 | | } |
| 4193 | | |
| 4194 | | vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 4195 | | } |
| 4196 | | WRITEBACK_RESULT(); |
| 4197 | | } |
| 4198 | | |
| 4199 | | static void cfunc_rsp_vmadm_scalar(void *param) |
| 4200 | | { |
| 4201 | | ((rsp_device *)param)->ccfunc_rsp_vmadm_scalar(); |
| 4202 | | } |
| 4203 | | #endif |
| 4204 | | |
| 4205 | | #if USE_SIMD |
| 4206 | | // VMADN |
| 4207 | | // |
| 4208 | | |
| 4209 | | inline void rsp_device::ccfunc_rsp_vmadn_simd() |
| 4210 | | { |
| 4211 | | int op = m_rsp_state->arg0; |
| 4212 | | |
| 4213 | | INT16 vres[8]; |
| 4214 | | for (int i = 0; i < 8; i++) |
| 4215 | | { |
| 4216 | | UINT16 w1, w2; |
| 4217 | | VEC_GET_SCALAR_VS1(w1, i); |
| 4218 | | VEC_GET_SCALAR_VS2(w2, i); |
| 4219 | | INT32 s1 = (UINT16)w1; |
| 4220 | | INT32 s2 = (INT32)(INT16)w2; |
| 4221 | | |
| 4222 | | UINT64 q = (UINT64)VEC_ACCUM_LL(i); |
| 4223 | | q |= (((UINT64)VEC_ACCUM_L(i)) << 16); |
| 4224 | | q |= (((UINT64)VEC_ACCUM_M(i)) << 32); |
| 4225 | | q |= (((UINT64)VEC_ACCUM_H(i)) << 48); |
| 4226 | | q += (INT64)(s1*s2) << 16; |
| 4227 | | |
| 4228 | | VEC_SET_ACCUM_LL((UINT16)q, i); |
| 4229 | | VEC_SET_ACCUM_L((UINT16)(q >> 16), i); |
| 4230 | | VEC_SET_ACCUM_M((UINT16)(q >> 32), i); |
| 4231 | | VEC_SET_ACCUM_H((UINT16)(q >> 48), i); |
| 4232 | | |
| 4233 | | vres[i] = VEC_SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 4234 | | } |
| 4235 | | VEC_WRITEBACK_RESULT(); |
| 4236 | | } |
| 4237 | | /*INLINE void cfunc_rsp_vmadn_simd(void *param) |
| 4238 | | { |
| 4239 | | rsp_state *rsp = (rsp_state*)param; |
| 4240 | | int op = m_rsp_state->arg0; |
| 4241 | | |
| 4242 | | __m128i vaccLow, vaccHigh, loProduct, hiProduct; |
| 4243 | | __m128i vsRegLo, vsRegHi, vtRegLo, vtRegHi, vdRegLo, vdRegHi; |
| 4244 | | |
| 4245 | | __m128i vsReg = m_xv[VS1REG]; |
| 4246 | | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4247 | | |
| 4248 | | vaccLow = m_accum_l; |
| 4249 | | |
| 4250 | | RSPZeroExtend16to32(vsReg, &vsRegLo, &vsRegHi); |
| 4251 | | RSPSignExtend16to32(vtReg, &vtRegLo, &vtRegHi); |
| 4252 | | RSPZeroExtend16to32(vaccLow, &vaccLow, &vaccHigh); |
| 4253 | | |
| 4254 | | // Begin accumulating the products. |
| 4255 | | loProduct = _mm_mullo_epi32(vsRegLo, vtRegLo); |
| 4256 | | hiProduct = _mm_mullo_epi32(vsRegHi, vtRegHi); |
| 4257 | | |
| 4258 | | vdRegLo = _mm_srli_epi32(loProduct, 16); |
| 4259 | | vdRegHi = _mm_srli_epi32(hiProduct, 16); |
| 4260 | | vdRegLo = _mm_slli_epi32(vdRegLo, 16); |
| 4261 | | vdRegHi = _mm_slli_epi32(vdRegHi, 16); |
| 4262 | | vdRegLo = _mm_xor_si128(vdRegLo, loProduct); |
| 4263 | | vdRegHi = _mm_xor_si128(vdRegHi, hiProduct); |
| 4264 | | |
| 4265 | | vaccLow = _mm_add_epi32(vaccLow, vdRegLo); |
| 4266 | | vaccHigh = _mm_add_epi32(vaccHigh, vdRegHi); |
| 4267 | | |
| 4268 | | m_accum_l = RSPPackLo32to16(vaccLow, vaccHigh); |
| 4269 | | |
| 4270 | | // Multiply the MSB of sources, accumulate the product. |
| 4271 | | vdRegLo = _mm_unpacklo_epi16(m_accum_m, m_accum_h); |
| 4272 | | vdRegHi = _mm_unpackhi_epi16(m_accum_m, m_accum_h); |
| 4273 | | |
| 4274 | | loProduct = _mm_srai_epi32(loProduct, 16); |
| 4275 | | hiProduct = _mm_srai_epi32(hiProduct, 16); |
| 4276 | | vaccLow = _mm_srai_epi32(vaccLow, 16); |
| 4277 | | vaccHigh = _mm_srai_epi32(vaccHigh, 16); |
| 4278 | | |
| 4279 | | vaccLow = _mm_add_epi32(loProduct, vaccLow); |
| 4280 | | vaccHigh = _mm_add_epi32(hiProduct, vaccHigh); |
| 4281 | | vaccLow = _mm_add_epi32(vdRegLo, vaccLow); |
| 4282 | | vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh); |
| 4283 | | |
| 4284 | | // Clamp the accumulator and write it all out. |
| 4285 | | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 4286 | | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 4287 | | m_xv[VDREG] = RSPClampLowToVal(m_accum_l, m_accum_m, m_accum_h); |
| 4288 | | }*/ |
| 4289 | | |
| 4290 | | static void cfunc_rsp_vmadn_simd(void *param) |
| 4291 | | { |
| 4292 | | ((rsp_device *)param)->ccfunc_rsp_vmadn_simd(); |
| 4293 | | } |
| 4294 | | #endif |
| 4295 | | |
| 4296 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 4297 | | |
| 4298 | | inline void rsp_device::ccfunc_rsp_vmadn_scalar() |
| 4299 | | { |
| 4300 | | int op = m_rsp_state->arg0; |
| 4301 | | |
| 4302 | | INT16 vres[8]; |
| 4303 | | for (int i = 0; i < 8; i++) |
| 4304 | | { |
| 4305 | | UINT16 w1, w2; |
| 4306 | | SCALAR_GET_VS1(w1, i); |
| 4307 | | SCALAR_GET_VS2(w2, i); |
| 4308 | | INT32 s1 = (UINT16)w1; |
| 4309 | | INT32 s2 = (INT32)(INT16)w2; |
| 4310 | | |
| 4311 | | UINT64 q = (UINT64)ACCUM_LL(i); |
| 4312 | | q |= (((UINT64)ACCUM_L(i)) << 16); |
| 4313 | | q |= (((UINT64)ACCUM_M(i)) << 32); |
| 4314 | | q |= (((UINT64)ACCUM_H(i)) << 48); |
| 4315 | | q += (INT64)(s1*s2) << 16; |
| 4316 | | |
| 4317 | | SET_ACCUM_LL((UINT16)q, i); |
| 4318 | | SET_ACCUM_L((UINT16)(q >> 16), i); |
| 4319 | | SET_ACCUM_M((UINT16)(q >> 32), i); |
| 4320 | | SET_ACCUM_H((UINT16)(q >> 48), i); |
| 4321 | | |
| 4322 | | vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 4323 | | } |
| 4324 | | WRITEBACK_RESULT(); |
| 4325 | | } |
| 4326 | | |
| 4327 | | static void cfunc_rsp_vmadn_scalar(void *param) |
| 4328 | | { |
| 4329 | | ((rsp_device *)param)->ccfunc_rsp_vmadn_scalar(); |
| 4330 | | } |
| 4331 | | #endif |
| 4332 | | |
| 4333 | | #if USE_SIMD |
| 4334 | | // VMADH |
| 4335 | | // |
| 4336 | | // 31 25 24 20 15 10 5 0 |
| 4337 | | // ------------------------------------------------------ |
| 4338 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001111 | |
| 4339 | | // ------------------------------------------------------ |
| 4340 | | // |
| 4341 | | // Multiplies signed integer by signed integer |
| 4342 | | // The result is added into highest 32 bits of accumulator, the low slice is zero |
| 4343 | | // The highest 32 bits of accumulator is saturated into destination element |
| 4344 | | |
| 4345 | | inline void rsp_device::ccfunc_rsp_vmadh_simd() |
| 4346 | | { |
| 4347 | | int op = m_rsp_state->arg0; |
| 4348 | | |
| 4349 | | __m128i vsReg = m_xv[VS1REG]; |
| 4350 | | __m128i vtReg = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4351 | | |
| 4352 | | /* Unpack to obtain for 32-bit precision. */ |
| 4353 | | __m128i vaccLow = _mm_unpacklo_epi16(m_accum_m, m_accum_h); |
| 4354 | | __m128i vaccHigh = _mm_unpackhi_epi16(m_accum_m, m_accum_h); |
| 4355 | | |
| 4356 | | /* Multiply the sources, accumulate the product. */ |
| 4357 | | __m128i unpackLo = _mm_mullo_epi16(vsReg, vtReg); |
| 4358 | | __m128i unpackHi = _mm_mulhi_epi16(vsReg, vtReg); |
| 4359 | | __m128i loProduct = _mm_unpacklo_epi16(unpackLo, unpackHi); |
| 4360 | | __m128i hiProduct = _mm_unpackhi_epi16(unpackLo, unpackHi); |
| 4361 | | vaccLow = _mm_add_epi32(vaccLow, loProduct); |
| 4362 | | vaccHigh = _mm_add_epi32(vaccHigh, hiProduct); |
| 4363 | | |
| 4364 | | /* Pack the accumulator and result back up. */ |
| 4365 | | m_xv[VDREG] = _mm_packs_epi32(vaccLow, vaccHigh); |
| 4366 | | m_accum_m = RSPPackLo32to16(vaccLow, vaccHigh); |
| 4367 | | m_accum_h = RSPPackHi32to16(vaccLow, vaccHigh); |
| 4368 | | } |
| 4369 | | |
| 4370 | | static void cfunc_rsp_vmadh_simd(void *param) |
| 4371 | | { |
| 4372 | | ((rsp_device *)param)->ccfunc_rsp_vmadh_simd(); |
| 4373 | | } |
| 4374 | | #endif |
| 4375 | | |
| 4376 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 4377 | | |
| 4378 | | inline void rsp_device::ccfunc_rsp_vmadh_scalar() |
| 4379 | | { |
| 4380 | | int op = m_rsp_state->arg0; |
| 4381 | | |
| 4382 | | INT16 vres[8]; |
| 4383 | | for (int i = 0; i < 8; i++) |
| 4384 | | { |
| 4385 | | INT16 w1, w2; |
| 4386 | | SCALAR_GET_VS1(w1, i); |
| 4387 | | SCALAR_GET_VS2(w2, i); |
| 4388 | | INT32 s1 = (INT32)(INT16)w1; |
| 4389 | | INT32 s2 = (INT32)(INT16)w2; |
| 4390 | | |
| 4391 | | INT32 accum = (UINT32)(UINT16)ACCUM_M(i); |
| 4392 | | accum |= ((UINT32)((UINT16)ACCUM_H(i))) << 16; |
| 4393 | | accum += s1 * s2; |
| 4394 | | |
| 4395 | | SET_ACCUM_H((UINT16)(accum >> 16), i); |
| 4396 | | SET_ACCUM_M((UINT16)accum, i); |
| 4397 | | |
| 4398 | | vres[i] = SATURATE_ACCUM1(i, 0x8000, 0x7fff); |
| 4399 | | } |
| 4400 | | WRITEBACK_RESULT(); |
| 4401 | | } |
| 4402 | | |
| 4403 | | static void cfunc_rsp_vmadh_scalar(void *param) |
| 4404 | | { |
| 4405 | | ((rsp_device *)param)->ccfunc_rsp_vmadh_scalar(); |
| 4406 | | } |
| 4407 | | #endif |
| 4408 | | |
| 4409 | | #if USE_SIMD |
| 4410 | | // VADD |
| 4411 | | // 31 25 24 20 15 10 5 0 |
| 4412 | | // ------------------------------------------------------ |
| 4413 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010000 | |
| 4414 | | // ------------------------------------------------------ |
| 4415 | | // |
| 4416 | | // Adds two vector registers and carry flag, the result is saturated to 32767 |
| 4417 | | |
| 4418 | | inline void rsp_device::ccfunc_rsp_vadd_simd() |
| 4419 | | { |
| 4420 | | int op = m_rsp_state->arg0; |
| 4421 | | |
| 4422 | | __m128i shuffled = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4423 | | __m128i carry = _mm_and_si128(m_xvflag[CARRY], vec_flagmask); |
| 4424 | | m_accum_l = _mm_add_epi16(_mm_add_epi16(m_xv[VS1REG], shuffled), carry); |
| 4425 | | |
| 4426 | | __m128i addvec = _mm_adds_epi16(m_xv[VS1REG], shuffled); |
| 4427 | | |
| 4428 | | carry = _mm_and_si128(carry, _mm_xor_si128(_mm_cmpeq_epi16(addvec, vec_32767), vec_neg1)); |
| 4429 | | carry = _mm_and_si128(carry, _mm_xor_si128(_mm_cmpeq_epi16(addvec, vec_n32768), vec_neg1)); |
| 4430 | | |
| 4431 | | m_xv[VDREG] = _mm_add_epi16(addvec, carry); |
| 4432 | | |
| 4433 | | m_xvflag[ZERO] = vec_zero; |
| 4434 | | m_xvflag[CARRY] = vec_zero; |
| 4435 | | } |
| 4436 | | |
| 4437 | | static void cfunc_rsp_vadd_simd(void *param) |
| 4438 | | { |
| 4439 | | ((rsp_Device *)param)->ccfunc_rsp_vadd_simd(); |
| 4440 | | } |
| 4441 | | #endif |
| 4442 | | |
| 4443 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 4444 | | |
| 4445 | | inline void rsp_device::ccfunc_rsp_vadd_scalar() |
| 4446 | | { |
| 4447 | | int op = m_rsp_state->arg0; |
| 4448 | | |
| 4449 | | INT16 vres[8] = { 0 }; |
| 4450 | | for (int i = 0; i < 8; i++) |
| 4451 | | { |
| 4452 | | INT16 w1, w2; |
| 4453 | | SCALAR_GET_VS1(w1, i); |
| 4454 | | SCALAR_GET_VS2(w2, i); |
| 4455 | | INT32 s1 = (INT32)(INT16)w1; |
| 4456 | | INT32 s2 = (INT32)(INT16)w2; |
| 4457 | | INT32 r = s1 + s2 + (((CARRY_FLAG(i)) != 0) ? 1 : 0); |
| 4458 | | |
| 4459 | | SET_ACCUM_L((INT16)(r), i); |
| 4460 | | |
| 4461 | | if (r > 32767) r = 32767; |
| 4462 | | if (r < -32768) r = -32768; |
| 4463 | | vres[i] = (INT16)(r); |
| 4464 | | } |
| 4465 | | CLEAR_ZERO_FLAGS(); |
| 4466 | | CLEAR_CARRY_FLAGS(); |
| 4467 | | WRITEBACK_RESULT(); |
| 4468 | | } |
| 4469 | | |
| 4470 | | static void cfunc_rsp_vadd_scalar(void *param) |
| 4471 | | { |
| 4472 | | ((rsp_device *)param)->ccfunc_rsp_vadd_scalar(); |
| 4473 | | } |
| 4474 | | #endif |
| 4475 | | |
| 4476 | | #if USE_SIMD |
| 4477 | | // VSUB |
| 4478 | | // |
| 4479 | | // 31 25 24 20 15 10 5 0 |
| 4480 | | // ------------------------------------------------------ |
| 4481 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010001 | |
| 4482 | | // ------------------------------------------------------ |
| 4483 | | // |
| 4484 | | // Subtracts two vector registers and carry flag, the result is saturated to -32768 |
| 4485 | | // TODO: check VS2REG == VDREG |
| 4486 | | |
| 4487 | | inline void rsp_device::ccfunc_rsp_vsub_simd() |
| 4488 | | { |
| 4489 | | int op = m_rsp_state->arg0; |
| 4490 | | |
| 4491 | | __m128i shuffled = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4492 | | __m128i carry = _mm_and_si128(m_xvflag[CARRY], vec_flagmask); |
| 4493 | | __m128i unsat = _mm_sub_epi16(m_xv[VS1REG], shuffled); |
| 4494 | | |
| 4495 | | __m128i vs2neg = _mm_cmplt_epi16(shuffled, vec_zero); |
| 4496 | | __m128i vs2pos = _mm_cmpeq_epi16(vs2neg, vec_zero); |
| 4497 | | |
| 4498 | | __m128i saturated = _mm_subs_epi16(m_xv[VS1REG], shuffled); |
| 4499 | | __m128i carry_mask = _mm_cmpeq_epi16(unsat, saturated); |
| 4500 | | carry_mask = _mm_and_si128(vs2neg, carry_mask); |
| 4501 | | |
| 4502 | | vs2neg = _mm_and_si128(carry_mask, carry); |
| 4503 | | vs2pos = _mm_and_si128(vs2pos, carry); |
| 4504 | | __m128i dest_carry = _mm_or_si128(vs2neg, vs2pos); |
| 4505 | | m_xv[VDREG] = _mm_subs_epi16(saturated, dest_carry); |
| 4506 | | |
| 4507 | | m_accum_l = _mm_sub_epi16(unsat, carry); |
| 4508 | | |
| 4509 | | m_xvflag[ZERO] = _mm_setzero_si128(); |
| 4510 | | m_xvflag[CARRY] = _mm_setzero_si128(); |
| 4511 | | } |
| 4512 | | |
| 4513 | | static void cfunc_rsp_vsub_simd(void *param) |
| 4514 | | { |
| 4515 | | ((rsp_device *)param)->ccfunc_rsp_vsub_simd(); |
| 4516 | | } |
| 4517 | | #endif |
| 4518 | | |
| 4519 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 4520 | | |
| 4521 | | inline void rsp_device::ccfunc_rsp_vsub_scalar() |
| 4522 | | { |
| 4523 | | int op = m_rsp_state->arg0; |
| 4524 | | |
| 4525 | | INT16 vres[8]; |
| 4526 | | for (int i = 0; i < 8; i++) |
| 4527 | | { |
| 4528 | | INT16 w1, w2; |
| 4529 | | SCALAR_GET_VS1(w1, i); |
| 4530 | | SCALAR_GET_VS2(w2, i); |
| 4531 | | INT32 s1 = (INT32)(INT16)w1; |
| 4532 | | INT32 s2 = (INT32)(INT16)w2; |
| 4533 | | INT32 r = s1 - s2 - (((CARRY_FLAG(i)) != 0) ? 1 : 0); |
| 4534 | | |
| 4535 | | SET_ACCUM_L((INT16)(r), i); |
| 4536 | | |
| 4537 | | if (r > 32767) r = 32767; |
| 4538 | | if (r < -32768) r = -32768; |
| 4539 | | |
| 4540 | | vres[i] = (INT16)(r); |
| 4541 | | } |
| 4542 | | CLEAR_ZERO_FLAGS(); |
| 4543 | | CLEAR_CARRY_FLAGS(); |
| 4544 | | WRITEBACK_RESULT(); |
| 4545 | | } |
| 4546 | | |
| 4547 | | static void cfunc_rsp_vsub_scalar(void *param) |
| 4548 | | { |
| 4549 | | ((rsp_device *)param)->ccfunc_rsp_vsub_scalar(); |
| 4550 | | } |
| 4551 | | #endif |
| 4552 | | |
| 4553 | | #if USE_SIMD |
| 4554 | | // VABS |
| 4555 | | // |
| 4556 | | // 31 25 24 20 15 10 5 0 |
| 4557 | | // ------------------------------------------------------ |
| 4558 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010011 | |
| 4559 | | // ------------------------------------------------------ |
| 4560 | | // |
| 4561 | | // Changes the sign of source register 2 if source register 1 is negative and stores the result to destination register |
| 4562 | | |
| 4563 | | inline void rsp_device::ccfunc_rsp_vabs_simd() |
| 4564 | | { |
| 4565 | | int op = m_rsp_state->arg0; |
| 4566 | | |
| 4567 | | __m128i shuf2 = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4568 | | __m128i negs2 = _mm_sub_epi16(_mm_setzero_si128(), shuf2); |
| 4569 | | __m128i s2_n32768 = _mm_cmpeq_epi16(shuf2, vec_n32768); |
| 4570 | | __m128i s1_lz = _mm_cmplt_epi16(m_xv[VS1REG], _mm_setzero_si128()); |
| 4571 | | |
| 4572 | | __m128i result_gz = _mm_and_si128(shuf2, _mm_cmpgt_epi16(m_xv[VS1REG], _mm_setzero_si128())); |
| 4573 | | __m128i result_n32768 = _mm_and_si128(s1_lz, _mm_and_si128(vec_32767, s2_n32768)); |
| 4574 | | __m128i result_negs2 = _mm_and_si128(s1_lz, _mm_and_si128(negs2, _mm_xor_si128(s2_n32768, vec_neg1))); |
| 4575 | | m_xv[VDREG] = m_accum_l = _mm_or_si128(result_gz, _mm_or_si128(result_n32768, result_negs2)); |
| 4576 | | } |
| 4577 | | |
| 4578 | | static void cfunc_rsp_vabs_simd(void *param) |
| 4579 | | { |
| 4580 | | ((rsp_device *)param)->ccfunc_rsp_vabs_simd(); |
| 4581 | | } |
| 4582 | | #endif |
| 4583 | | |
| 4584 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 4585 | | |
| 4586 | | inline void rsp_device::ccfunc_rsp_vabs_scalar() |
| 4587 | | { |
| 4588 | | int op = m_rsp_state->arg0; |
| 4589 | | |
| 4590 | | INT16 vres[8]; |
| 4591 | | for (int i = 0; i < 8; i++) |
| 4592 | | { |
| 4593 | | INT16 s1, s2; |
| 4594 | | SCALAR_GET_VS1(s1, i); |
| 4595 | | SCALAR_GET_VS2(s2, i); |
| 4596 | | |
| 4597 | | if (s1 < 0) |
| 4598 | | { |
| 4599 | | if (s2 == -32768) |
| 4600 | | { |
| 4601 | | vres[i] = 32767; |
| 4602 | | } |
| 4603 | | else |
| 4604 | | { |
| 4605 | | vres[i] = -s2; |
| 4606 | | } |
| 4607 | | } |
| 4608 | | else if (s1 > 0) |
| 4609 | | { |
| 4610 | | vres[i] = s2; |
| 4611 | | } |
| 4612 | | else |
| 4613 | | { |
| 4614 | | vres[i] = 0; |
| 4615 | | } |
| 4616 | | |
| 4617 | | SET_ACCUM_L(vres[i], i); |
| 4618 | | } |
| 4619 | | WRITEBACK_RESULT(); |
| 4620 | | } |
| 4621 | | |
| 4622 | | static void cfunc_rsp_vabs_scalar(void *param) |
| 4623 | | { |
| 4624 | | ((rsp_device *)param)->ccfunc_rsp_vabs_scalar(); |
| 4625 | | } |
| 4626 | | #endif |
| 4627 | | |
| 4628 | | #if USE_SIMD |
| 4629 | | // VADDC |
| 4630 | | // |
| 4631 | | // 31 25 24 20 15 10 5 0 |
| 4632 | | // ------------------------------------------------------ |
| 4633 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010100 | |
| 4634 | | // ------------------------------------------------------ |
| 4635 | | // |
| 4636 | | // Adds two vector registers, the carry out is stored into carry register |
| 4637 | | // TODO: check VS2REG = VDREG |
| 4638 | | |
| 4639 | | inline void rsp_device::ccfunc_rsp_vaddc_simd() |
| 4640 | | { |
| 4641 | | int op = m_rsp_state->arg0; |
| 4642 | | |
| 4643 | | VEC_CLEAR_ZERO_FLAGS(); |
| 4644 | | VEC_CLEAR_CARRY_FLAGS(); |
| 4645 | | |
| 4646 | | __m128i shuf2 = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4647 | | __m128i vec7531 = _mm_and_si128(m_xv[VS1REG], vec_lomask); |
| 4648 | | __m128i vec6420 = _mm_srli_epi32(m_xv[VS1REG], 16); |
| 4649 | | __m128i shuf7531 = _mm_and_si128(shuf2, vec_lomask); |
| 4650 | | __m128i shuf6420 = _mm_srli_epi32(shuf2, 16); |
| 4651 | | __m128i sum7531 = _mm_add_epi32(vec7531, shuf7531); |
| 4652 | | __m128i sum6420 = _mm_add_epi32(vec6420, shuf6420); |
| 4653 | | |
| 4654 | | __m128i over7531 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum7531, _mm_setzero_si128()), vec_neg1), vec_himask); |
| 4655 | | __m128i over6420 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum6420, _mm_setzero_si128()), vec_neg1), vec_himask); |
| 4656 | | |
| 4657 | | sum7531 = _mm_and_si128(sum7531, vec_lomask); |
| 4658 | | sum6420 = _mm_and_si128(sum6420, vec_lomask); |
| 4659 | | |
| 4660 | | m_xvflag[CARRY] = _mm_or_si128(over6420, _mm_srli_epi32(over7531, 16)); |
| 4661 | | m_accum_l = m_xv[VDREG] = _mm_or_si128(_mm_slli_epi32(sum6420, 16), sum7531); |
| 4662 | | } |
| 4663 | | |
| 4664 | | static void cfunc_rsp_vaddc_simd(void *param) |
| 4665 | | { |
| 4666 | | ((rsp_device *)param)->ccfunc_rsp_vaddc_simd(); |
| 4667 | | } |
| 4668 | | #endif |
| 4669 | | |
| 4670 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 4671 | | |
| 4672 | | inline void rsp_device::ccfunc_rsp_vaddc_scalar() |
| 4673 | | { |
| 4674 | | int op = m_rsp_state->arg0; |
| 4675 | | |
| 4676 | | CLEAR_ZERO_FLAGS(); |
| 4677 | | CLEAR_CARRY_FLAGS(); |
| 4678 | | |
| 4679 | | INT16 vres[8] = { 0 }; |
| 4680 | | for (int i = 0; i < 8; i++) |
| 4681 | | { |
| 4682 | | INT16 w1, w2; |
| 4683 | | SCALAR_GET_VS1(w1, i); |
| 4684 | | SCALAR_GET_VS2(w2, i); |
| 4685 | | INT32 s1 = (UINT32)(UINT16)w1; |
| 4686 | | INT32 s2 = (UINT32)(UINT16)w2; |
| 4687 | | INT32 r = s1 + s2; |
| 4688 | | |
| 4689 | | vres[i] = (INT16)(r); |
| 4690 | | SET_ACCUM_L((INT16)r, i); |
| 4691 | | |
| 4692 | | if (r & 0xffff0000) |
| 4693 | | { |
| 4694 | | SET_CARRY_FLAG(i); |
| 4695 | | } |
| 4696 | | } |
| 4697 | | WRITEBACK_RESULT(); |
| 4698 | | } |
| 4699 | | |
| 4700 | | static void cfunc_rsp_vaddc_scalar(void *param) |
| 4701 | | { |
| 4702 | | ((rsp_device *)param)->ccfunc_rsp_vaddc_scalar(); |
| 4703 | | } |
| 4704 | | #endif |
| 4705 | | |
| 4706 | | #if USE_SIMD |
| 4707 | | // VSUBC |
| 4708 | | // |
| 4709 | | // 31 25 24 20 15 10 5 0 |
| 4710 | | // ------------------------------------------------------ |
| 4711 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010101 | |
| 4712 | | // ------------------------------------------------------ |
| 4713 | | // |
| 4714 | | // Subtracts two vector registers, the carry out is stored into carry register |
| 4715 | | // TODO: check VS2REG = VDREG |
| 4716 | | |
| 4717 | | inline void rsp_device::ccfunc_rsp_vsubc_simd() |
| 4718 | | { |
| 4719 | | int op = m_rsp_state->arg0; |
| 4720 | | |
| 4721 | | VEC_CLEAR_ZERO_FLAGS(); |
| 4722 | | VEC_CLEAR_CARRY_FLAGS(); |
| 4723 | | |
| 4724 | | __m128i shuf2 = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4725 | | __m128i vec7531 = _mm_and_si128(m_xv[VS1REG], vec_lomask); |
| 4726 | | __m128i vec6420 = _mm_srli_epi32(m_xv[VS1REG], 16); |
| 4727 | | __m128i shuf7531 = _mm_and_si128(shuf2, vec_lomask); |
| 4728 | | __m128i shuf6420 = _mm_srli_epi32(shuf2, 16); |
| 4729 | | __m128i sum7531 = _mm_sub_epi32(vec7531, shuf7531); |
| 4730 | | __m128i sum6420 = _mm_sub_epi32(vec6420, shuf6420); |
| 4731 | | |
| 4732 | | __m128i over7531 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum7531, _mm_setzero_si128()), vec_neg1), vec_himask); |
| 4733 | | __m128i over6420 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum6420, _mm_setzero_si128()), vec_neg1), vec_himask); |
| 4734 | | sum7531 = _mm_and_si128(sum7531, vec_lomask); |
| 4735 | | sum6420 = _mm_and_si128(sum6420, vec_lomask); |
| 4736 | | __m128i zero7531 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum7531, _mm_setzero_si128()), vec_neg1), vec_lomask); |
| 4737 | | __m128i zero6420 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum6420, _mm_setzero_si128()), vec_neg1), vec_lomask); |
| 4738 | | |
| 4739 | | m_xvflag[CARRY] = _mm_or_si128(over6420, _mm_srli_epi32(over7531, 16)); |
| 4740 | | m_xvflag[ZERO] = _mm_or_si128(_mm_slli_epi32(zero6420, 16), zero7531); |
| 4741 | | |
| 4742 | | m_accum_l = m_xv[VDREG] = _mm_or_si128(_mm_slli_epi32(sum6420, 16), sum7531); |
| 4743 | | } |
| 4744 | | |
| 4745 | | static void cfunc_rsp_vsubc_simd(void *param) |
| 4746 | | { |
| 4747 | | ((rsp_device *)param)->ccfunc_rsp_vsubc_simd(); |
| 4748 | | } |
| 4749 | | #endif |
| 4750 | | |
| 4751 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 4752 | | |
| 4753 | | inline void rsp_device::ccfunc_rsp_vsubc_scalar() |
| 4754 | | { |
| 4755 | | int op = m_rsp_state->arg0; |
| 4756 | | |
| 4757 | | CLEAR_ZERO_FLAGS(); |
| 4758 | | CLEAR_CARRY_FLAGS(); |
| 4759 | | |
| 4760 | | INT16 vres[8]; |
| 4761 | | for (int i = 0; i < 8; i++) |
| 4762 | | { |
| 4763 | | INT16 w1, w2; |
| 4764 | | SCALAR_GET_VS1(w1, i); |
| 4765 | | SCALAR_GET_VS2(w2, i); |
| 4766 | | INT32 s1 = (UINT32)(UINT16)w1; |
| 4767 | | INT32 s2 = (UINT32)(UINT16)w2; |
| 4768 | | INT32 r = s1 - s2; |
| 4769 | | |
| 4770 | | vres[i] = (INT16)(r); |
| 4771 | | SET_ACCUM_L((UINT16)r, i); |
| 4772 | | |
| 4773 | | if ((UINT16)(r) != 0) |
| 4774 | | { |
| 4775 | | SET_ZERO_FLAG(i); |
| 4776 | | } |
| 4777 | | if (r & 0xffff0000) |
| 4778 | | { |
| 4779 | | SET_CARRY_FLAG(i); |
| 4780 | | } |
| 4781 | | } |
| 4782 | | WRITEBACK_RESULT(); |
| 4783 | | } |
| 4784 | | |
| 4785 | | static void cfunc_rsp_vsubc_scalar(void *param) |
| 4786 | | { |
| 4787 | | ((rsp_device *)param)->ccfunc_rsp_vsubc_scalar(); |
| 4788 | | } |
| 4789 | | #endif |
| 4790 | | |
| 4791 | | // VADDB |
| 4792 | | // |
| 4793 | | // 31 25 24 20 15 10 5 0 |
| 4794 | | // ------------------------------------------------------ |
| 4795 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010110 | |
| 4796 | | // ------------------------------------------------------ |
| 4797 | | // |
| 4798 | | // Adds two vector registers bytewise with rounding |
| 4799 | | inline void rsp_device::ccfunc_rsp_vaddb_scalar() |
| 4800 | | { |
| 4801 | | const int op = m_rsp_state->arg0; |
| 4802 | | const int round = (EL == 0) ? 0 : (1 << (EL - 1)); |
| 4803 | | |
| 4804 | | INT16 vres[8]; |
| 4805 | | for (int i = 0; i < 8; i++) |
| 4806 | | { |
| 4807 | | UINT16 w1, w2; |
| 4808 | | SCALAR_GET_VS1(w1, i); |
| 4809 | | SCALAR_GET_VS2(w2, i); |
| 4810 | | |
| 4811 | | UINT8 hb1 = w1 >> 8; |
| 4812 | | UINT8 lb1 = w1 & 0xff; |
| 4813 | | UINT8 hb2 = w2 >> 8; |
| 4814 | | UINT8 lb2 = w2 & 0xff; |
| 4815 | | |
| 4816 | | UINT16 hs = hb1 + hb2 + round; |
| 4817 | | UINT16 ls = lb1 + lb2 + round; |
| 4818 | | |
| 4819 | | SET_ACCUM_L((hs << 8) | ls, i); |
| 4820 | | |
| 4821 | | hs >>= EL; |
| 4822 | | if (hs > 255) |
| 4823 | | { |
| 4824 | | hs = 255; |
| 4825 | | } |
| 4826 | | /*else if (hs < 0) |
| 4827 | | { |
| 4828 | | hs = 0; |
| 4829 | | }*/ |
| 4830 | | |
| 4831 | | ls >>= EL; |
| 4832 | | if (ls > 255) |
| 4833 | | { |
| 4834 | | ls = 255; |
| 4835 | | } |
| 4836 | | /*else if (ls < 0) |
| 4837 | | { |
| 4838 | | ls = 0; |
| 4839 | | }*/ |
| 4840 | | |
| 4841 | | vres[i] = 0; // VD writeback disabled on production hardware |
| 4842 | | // vres[i] = (hs << 8) | ls; |
| 4843 | | } |
| 4844 | | WRITEBACK_RESULT(); |
| 4845 | | } |
| 4846 | | |
| 4847 | | static void cfunc_rsp_vaddb_scalar(void *param) |
| 4848 | | { |
| 4849 | | ((rsp_device *)param)->ccfunc_rsp_vaddb_scalar(); |
| 4850 | | } |
| 4851 | | |
| 4852 | | #if USE_SIMD |
| 4853 | | // VSAW |
| 4854 | | // |
| 4855 | | // 31 25 24 20 15 10 5 0 |
| 4856 | | // ------------------------------------------------------ |
| 4857 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 011101 | |
| 4858 | | // ------------------------------------------------------ |
| 4859 | | // |
| 4860 | | // Stores high, middle or low slice of accumulator to destination vector |
| 4861 | | |
| 4862 | | inline void rsp_device::ccfunc_rsp_vsaw_simd() |
| 4863 | | { |
| 4864 | | int op = m_rsp_state->arg0; |
| 4865 | | |
| 4866 | | switch (EL) |
| 4867 | | { |
| 4868 | | case 0x08: // VSAWH |
| 4869 | | { |
| 4870 | | m_xv[VDREG] = m_accum_h; |
| 4871 | | break; |
| 4872 | | } |
| 4873 | | case 0x09: // VSAWM |
| 4874 | | { |
| 4875 | | m_xv[VDREG] = m_accum_m; |
| 4876 | | break; |
| 4877 | | } |
| 4878 | | case 0x0a: // VSAWL |
| 4879 | | { |
| 4880 | | m_xv[VDREG] = m_accum_l; |
| 4881 | | break; |
| 4882 | | } |
| 4883 | | default: // Unsupported, writes 0 to VD |
| 4884 | | { |
| 4885 | | |
| 4886 | | } |
| 4887 | | } |
| 4888 | | } |
| 4889 | | |
| 4890 | | static void cfunc_rsp_vsaw_simd(void *param) |
| 4891 | | { |
| 4892 | | ((rsp_device *)param)->ccfunc_rsp_vsaw_simd(); |
| 4893 | | } |
| 4894 | | #endif |
| 4895 | | |
| 4896 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 4897 | | |
| 4898 | | inline void rsp_device::ccfunc_rsp_vsaw_scalar() |
| 4899 | | { |
| 4900 | | int op = m_rsp_state->arg0; |
| 4901 | | |
| 4902 | | switch (EL) |
| 4903 | | { |
| 4904 | | case 0x08: // VSAWH |
| 4905 | | for (int i = 0; i < 8; i++) |
| 4906 | | { |
| 4907 | | W_VREG_S(VDREG, i) = ACCUM_H(i); |
| 4908 | | } |
| 4909 | | break; |
| 4910 | | case 0x09: // VSAWM |
| 4911 | | for (int i = 0; i < 8; i++) |
| 4912 | | { |
| 4913 | | W_VREG_S(VDREG, i) = ACCUM_M(i); |
| 4914 | | } |
| 4915 | | break; |
| 4916 | | case 0x0a: // VSAWL |
| 4917 | | for (int i = 0; i < 8; i++) |
| 4918 | | { |
| 4919 | | W_VREG_S(VDREG, i) = ACCUM_L(i); |
| 4920 | | } |
| 4921 | | break; |
| 4922 | | default: // Unsupported |
| 4923 | | { |
| 4924 | | for (int i = 0; i < 8; i++) |
| 4925 | | { |
| 4926 | | W_VREG_S(VDREG, i) = 0; |
| 4927 | | } |
| 4928 | | } |
| 4929 | | } |
| 4930 | | } |
| 4931 | | |
| 4932 | | static void cfunc_rsp_vsaw_scalar(void *param) |
| 4933 | | { |
| 4934 | | ((rsp_device *)param)->ccfunc_rsp_vsaw_scalar(); |
| 4935 | | } |
| 4936 | | #endif |
| 4937 | | |
| 4938 | | #if USE_SIMD |
| 4939 | | // VLT |
| 4940 | | // |
| 4941 | | // 31 25 24 20 15 10 5 0 |
| 4942 | | // ------------------------------------------------------ |
| 4943 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100000 | |
| 4944 | | // ------------------------------------------------------ |
| 4945 | | // |
| 4946 | | // Sets compare flags if elements in VS1 are less than VS2 |
| 4947 | | // Moves the element in VS2 to destination vector |
| 4948 | | |
| 4949 | | inline void rsp_device::ccfunc_rsp_vlt_simd() |
| 4950 | | { |
| 4951 | | int op = m_rsp_state->arg0; |
| 4952 | | |
| 4953 | | m_xvflag[COMPARE] = m_xvflag[CLIP2] = _mm_setzero_si128(); |
| 4954 | | |
| 4955 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 4956 | | __m128i zc_mask = _mm_and_si128(m_xvflag[ZERO], m_xvflag[CARRY]); |
| 4957 | | __m128i lt_mask = _mm_cmplt_epi16(m_xv[VS1REG], shuf); |
| 4958 | | __m128i eq_mask = _mm_and_si128(_mm_cmpeq_epi16(m_xv[VS1REG], shuf), zc_mask); |
| 4959 | | |
| 4960 | | m_xvflag[COMPARE] = _mm_or_si128(lt_mask, eq_mask); |
| 4961 | | |
| 4962 | | __m128i result = _mm_and_si128(m_xv[VS1REG], m_xvflag[COMPARE]); |
| 4963 | | m_accum_l = m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(m_xvflag[COMPARE], vec_neg1))); |
| 4964 | | |
| 4965 | | m_xvflag[ZERO] = m_xvflag[CARRY] = _mm_setzero_si128(); |
| 4966 | | } |
| 4967 | | |
| 4968 | | static void void cfunc_rsp_vlt_simd(void *param) |
| 4969 | | { |
| 4970 | | ((rsp_device *)param)->ccfunc_rsp_vlt_simd(); |
| 4971 | | } |
| 4972 | | #endif |
| 4973 | | |
| 4974 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 4975 | | |
| 4976 | | inline void rsp_device::ccfunc_rsp_vlt_scalar() |
| 4977 | | { |
| 4978 | | int op = m_rsp_state->arg0; |
| 4979 | | |
| 4980 | | CLEAR_COMPARE_FLAGS(); |
| 4981 | | CLEAR_CLIP2_FLAGS(); |
| 4982 | | |
| 4983 | | INT16 vres[8]; |
| 4984 | | for (int i = 0; i < 8; i++) |
| 4985 | | { |
| 4986 | | INT16 s1, s2; |
| 4987 | | SCALAR_GET_VS1(s1, i); |
| 4988 | | SCALAR_GET_VS2(s2, i); |
| 4989 | | |
| 4990 | | if (s1 < s2) |
| 4991 | | { |
| 4992 | | SET_COMPARE_FLAG(i); |
| 4993 | | } |
| 4994 | | else if (s1 == s2) |
| 4995 | | { |
| 4996 | | if (ZERO_FLAG(i) != 0 && CARRY_FLAG(i) != 0) |
| 4997 | | { |
| 4998 | | SET_COMPARE_FLAG(i); |
| 4999 | | } |
| 5000 | | } |
| 5001 | | |
| 5002 | | if (COMPARE_FLAG(i) != 0) |
| 5003 | | { |
| 5004 | | vres[i] = s1; |
| 5005 | | } |
| 5006 | | else |
| 5007 | | { |
| 5008 | | vres[i] = s2; |
| 5009 | | } |
| 5010 | | |
| 5011 | | SET_ACCUM_L(vres[i], i); |
| 5012 | | } |
| 5013 | | |
| 5014 | | CLEAR_ZERO_FLAGS(); |
| 5015 | | CLEAR_CARRY_FLAGS(); |
| 5016 | | WRITEBACK_RESULT(); |
| 5017 | | } |
| 5018 | | |
| 5019 | | static void cfunc_rsp_vlt_scalar(void *param) |
| 5020 | | { |
| 5021 | | ((rsp_device *)param)->ccfunc_rsp_vlt_scalar(); |
| 5022 | | } |
| 5023 | | #endif |
| 5024 | | |
| 5025 | | #if USE_SIMD |
| 5026 | | // VEQ |
| 5027 | | // |
| 5028 | | // 31 25 24 20 15 10 5 0 |
| 5029 | | // ------------------------------------------------------ |
| 5030 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100001 | |
| 5031 | | // ------------------------------------------------------ |
| 5032 | | // |
| 5033 | | // Sets compare flags if elements in VS1 are equal with VS2 |
| 5034 | | // Moves the element in VS2 to destination vector |
| 5035 | | |
| 5036 | | inline void rsp_device::ccfunc_rsp_veq_simd() |
| 5037 | | { |
| 5038 | | int op = m_rsp_state->arg0; |
| 5039 | | |
| 5040 | | m_xvflag[COMPARE] = m_xvflag[CLIP2] = _mm_setzero_si128(); |
| 5041 | | |
| 5042 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 5043 | | __m128i zero_mask = _mm_cmpeq_epi16(m_xvflag[ZERO], _mm_setzero_si128()); |
| 5044 | | __m128i eq_mask = _mm_cmpeq_epi16(m_xv[VS1REG], shuf); |
| 5045 | | |
| 5046 | | m_xvflag[COMPARE] = _mm_and_si128(zero_mask, eq_mask); |
| 5047 | | |
| 5048 | | __m128i result = _mm_and_si128(m_xv[VS1REG], m_xvflag[COMPARE]); |
| 5049 | | m_accum_l = m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(m_xvflag[COMPARE], vec_neg1))); |
| 5050 | | |
| 5051 | | m_xvflag[ZERO] = m_xvflag[CARRY] = _mm_setzero_si128(); |
| 5052 | | } |
| 5053 | | |
| 5054 | | static void cfunc_rsp_veq_simd(void *param) |
| 5055 | | { |
| 5056 | | ((rsp_device *)param)->ccfunc_rsp_veq_simd(); |
| 5057 | | } |
| 5058 | | #endif |
| 5059 | | |
| 5060 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 5061 | | |
| 5062 | | inline void rsp_device::ccfunc_rsp_veq_scalar() |
| 5063 | | { |
| 5064 | | int op = m_rsp_state->arg0; |
| 5065 | | |
| 5066 | | CLEAR_COMPARE_FLAGS(); |
| 5067 | | CLEAR_CLIP2_FLAGS(); |
| 5068 | | |
| 5069 | | INT16 vres[8]; |
| 5070 | | for (int i = 0; i < 8; i++) |
| 5071 | | { |
| 5072 | | INT16 s1, s2; |
| 5073 | | SCALAR_GET_VS1(s1, i); |
| 5074 | | SCALAR_GET_VS2(s2, i); |
| 5075 | | |
| 5076 | | if ((s1 == s2) && ZERO_FLAG(i) == 0) |
| 5077 | | { |
| 5078 | | SET_COMPARE_FLAG(i); |
| 5079 | | vres[i] = s1; |
| 5080 | | } |
| 5081 | | else |
| 5082 | | { |
| 5083 | | vres[i] = s2; |
| 5084 | | } |
| 5085 | | |
| 5086 | | SET_ACCUM_L(vres[i], i); |
| 5087 | | } |
| 5088 | | |
| 5089 | | CLEAR_ZERO_FLAGS(); |
| 5090 | | CLEAR_CARRY_FLAGS(); |
| 5091 | | WRITEBACK_RESULT(); |
| 5092 | | } |
| 5093 | | |
| 5094 | | static void cfunc_rsp_veq_scalar(void *param) |
| 5095 | | { |
| 5096 | | ((rsp_device *)param)->ccfunc_rsp_veq_scalar(); |
| 5097 | | } |
| 5098 | | #endif |
| 5099 | | |
| 5100 | | #if USE_SIMD |
| 5101 | | // VNE |
| 5102 | | // |
| 5103 | | // 31 25 24 20 15 10 5 0 |
| 5104 | | // ------------------------------------------------------ |
| 5105 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100010 | |
| 5106 | | // ------------------------------------------------------ |
| 5107 | | // |
| 5108 | | // Sets compare flags if elements in VS1 are not equal with VS2 |
| 5109 | | // Moves the element in VS2 to destination vector |
| 5110 | | |
| 5111 | | inline void rsp_device::ccfunc_rsp_vne_simd() |
| 5112 | | { |
| 5113 | | int op = m_rsp_state->arg0; |
| 5114 | | |
| 5115 | | m_xvflag[COMPARE] = m_xvflag[CLIP2] = _mm_setzero_si128(); |
| 5116 | | |
| 5117 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 5118 | | __m128i neq_mask = _mm_xor_si128(_mm_cmpeq_epi16(m_xv[VS1REG], shuf), vec_neg1); |
| 5119 | | |
| 5120 | | m_xvflag[COMPARE] = _mm_or_si128(m_xvflag[ZERO], neq_mask); |
| 5121 | | |
| 5122 | | __m128i result = _mm_and_si128(m_xv[VS1REG], m_xvflag[COMPARE]); |
| 5123 | | m_accum_l = m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(m_xvflag[COMPARE], vec_neg1))); |
| 5124 | | |
| 5125 | | m_xvflag[ZERO] = m_xvflag[CARRY] = _mm_setzero_si128(); |
| 5126 | | } |
| 5127 | | |
| 5128 | | static void cfunc_rsp_vne_simd(void *param) |
| 5129 | | { |
| 5130 | | ((rsp_device *)param)->ccfunc_rsp_vne_simd(); |
| 5131 | | } |
| 5132 | | #endif |
| 5133 | | |
| 5134 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 5135 | | |
| 5136 | | inline void rsp_device::ccfunc_rsp_vne_scalar() |
| 5137 | | { |
| 5138 | | int op = m_rsp_state->arg0; |
| 5139 | | |
| 5140 | | CLEAR_COMPARE_FLAGS(); |
| 5141 | | CLEAR_CLIP2_FLAGS(); |
| 5142 | | |
| 5143 | | INT16 vres[8]; |
| 5144 | | for (int i = 0; i < 8; i++) |
| 5145 | | { |
| 5146 | | INT16 s1, s2; |
| 5147 | | SCALAR_GET_VS1(s1, i); |
| 5148 | | SCALAR_GET_VS2(s2, i); |
| 5149 | | |
| 5150 | | if (s1 != s2 || ZERO_FLAG(i) != 0) |
| 5151 | | { |
| 5152 | | SET_COMPARE_FLAG(i); |
| 5153 | | vres[i] = s1; |
| 5154 | | } |
| 5155 | | else |
| 5156 | | { |
| 5157 | | vres[i] = s2; |
| 5158 | | } |
| 5159 | | |
| 5160 | | SET_ACCUM_L(vres[i], i); |
| 5161 | | } |
| 5162 | | |
| 5163 | | CLEAR_ZERO_FLAGS(); |
| 5164 | | CLEAR_CARRY_FLAGS(); |
| 5165 | | WRITEBACK_RESULT(); |
| 5166 | | } |
| 5167 | | |
| 5168 | | static void cfunc_rsp_vne_scalar(void *param) |
| 5169 | | { |
| 5170 | | ((rsp_device *)param)->ccfunc_rsp_vne_scalar(); |
| 5171 | | } |
| 5172 | | #endif |
| 5173 | | |
| 5174 | | #if USE_SIMD |
| 5175 | | // VGE |
| 5176 | | // |
| 5177 | | // 31 25 24 20 15 10 5 0 |
| 5178 | | // ------------------------------------------------------ |
| 5179 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100011 | |
| 5180 | | // ------------------------------------------------------ |
| 5181 | | // |
| 5182 | | // Sets compare flags if elements in VS1 are greater or equal with VS2 |
| 5183 | | // Moves the element in VS2 to destination vector |
| 5184 | | |
| 5185 | | inline void rsp_device::ccfunc_rsp_vge_simd() |
| 5186 | | { |
| 5187 | | int op = m_rsp_state->arg0; |
| 5188 | | |
| 5189 | | m_xvflag[COMPARE] = m_xvflag[CLIP2] = _mm_setzero_si128(); |
| 5190 | | |
| 5191 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 5192 | | __m128i zero_mask = _mm_cmpeq_epi16(m_xvflag[ZERO], _mm_setzero_si128()); |
| 5193 | | __m128i carry_mask = _mm_cmpeq_epi16(m_xvflag[CARRY], _mm_setzero_si128()); |
| 5194 | | __m128i flag_mask = _mm_or_si128(zero_mask, carry_mask); |
| 5195 | | __m128i eq_mask = _mm_and_si128(_mm_cmpeq_epi16(m_xv[VS1REG], shuf), flag_mask); |
| 5196 | | __m128i gt_mask = _mm_cmpgt_epi16(m_xv[VS1REG], shuf); |
| 5197 | | m_xvflag[COMPARE] = _mm_or_si128(eq_mask, gt_mask); |
| 5198 | | |
| 5199 | | __m128i result = _mm_and_si128(m_xv[VS1REG], m_xvflag[COMPARE]); |
| 5200 | | m_accum_l = m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(m_xvflag[COMPARE], vec_neg1))); |
| 5201 | | |
| 5202 | | m_xvflag[ZERO] = m_xvflag[CARRY] = _mm_setzero_si128(); |
| 5203 | | } |
| 5204 | | |
| 5205 | | static void cfunc_rsp_vge_simd(void *param) |
| 5206 | | { |
| 5207 | | ((rsp_device *)param)->ccfunc_rsp_vge_simd(); |
| 5208 | | } |
| 5209 | | #endif |
| 5210 | | |
| 5211 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 5212 | | |
| 5213 | | inline void rsp_device::ccfunc_rsp_vge_scalar() |
| 5214 | | { |
| 5215 | | int op = m_rsp_state->arg0; |
| 5216 | | |
| 5217 | | CLEAR_COMPARE_FLAGS(); |
| 5218 | | CLEAR_CLIP2_FLAGS(); |
| 5219 | | |
| 5220 | | INT16 vres[8]; |
| 5221 | | for (int i = 0; i < 8; i++) |
| 5222 | | { |
| 5223 | | INT16 s1, s2; |
| 5224 | | SCALAR_GET_VS1(s1, i); |
| 5225 | | SCALAR_GET_VS2(s2, i); |
| 5226 | | if ((s1 == s2 && (ZERO_FLAG(i) == 0 || CARRY_FLAG(i) == 0)) || s1 > s2) |
| 5227 | | { |
| 5228 | | SET_COMPARE_FLAG(i); |
| 5229 | | vres[i] = s1; |
| 5230 | | } |
| 5231 | | else |
| 5232 | | { |
| 5233 | | vres[i] = s2; |
| 5234 | | } |
| 5235 | | |
| 5236 | | SET_ACCUM_L(vres[i], i); |
| 5237 | | } |
| 5238 | | |
| 5239 | | CLEAR_ZERO_FLAGS(); |
| 5240 | | CLEAR_CARRY_FLAGS(); |
| 5241 | | WRITEBACK_RESULT(); |
| 5242 | | } |
| 5243 | | |
| 5244 | | static void cfunc_rsp_vge_scalar(void *param) |
| 5245 | | { |
| 5246 | | ((rsp_device *)param)->ccfunc_rsp_vge_scalar(); |
| 5247 | | } |
| 5248 | | #endif |
| 5249 | | |
| 5250 | | #if USE_SIMD |
| 5251 | | // VCL |
| 5252 | | // |
| 5253 | | // 31 25 24 20 15 10 5 0 |
| 5254 | | // ------------------------------------------------------ |
| 5255 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100100 | |
| 5256 | | // ------------------------------------------------------ |
| 5257 | | // |
| 5258 | | // Vector clip low |
| 5259 | | |
| 5260 | | inline void rsp_device::ccfunc_rsp_vcl_simd() |
| 5261 | | { |
| 5262 | | int op = m_rsp_state->arg0; |
| 5263 | | INT16 vres[8]; |
| 5264 | | |
| 5265 | | for (int i = 0; i < 8; i++) |
| 5266 | | { |
| 5267 | | INT16 s1, s2; |
| 5268 | | VEC_GET_SCALAR_VS1(s1, i); |
| 5269 | | VEC_GET_SCALAR_VS2(s2, i); |
| 5270 | | |
| 5271 | | if (VEC_CARRY_FLAG(i) != 0) |
| 5272 | | { |
| 5273 | | if (VEC_ZERO_FLAG(i) != 0) |
| 5274 | | { |
| 5275 | | if (VEC_COMPARE_FLAG(i) != 0) |
| 5276 | | { |
| 5277 | | VEC_SET_ACCUM_L(-(UINT16)s2, i); |
| 5278 | | } |
| 5279 | | else |
| 5280 | | { |
| 5281 | | VEC_SET_ACCUM_L(s1, i); |
| 5282 | | } |
| 5283 | | } |
| 5284 | | else |
| 5285 | | { |
| 5286 | | if (VEC_CLIP1_FLAG(i) != 0) |
| 5287 | | { |
| 5288 | | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) > 0x10000) |
| 5289 | | { |
| 5290 | | VEC_SET_ACCUM_L(s1, i); |
| 5291 | | VEC_CLEAR_COMPARE_FLAG(i); |
| 5292 | | } |
| 5293 | | else |
| 5294 | | { |
| 5295 | | VEC_SET_ACCUM_L(-((UINT16)s2), i); |
| 5296 | | VEC_SET_COMPARE_FLAG(i); |
| 5297 | | } |
| 5298 | | } |
| 5299 | | else |
| 5300 | | { |
| 5301 | | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) != 0) |
| 5302 | | { |
| 5303 | | VEC_SET_ACCUM_L(s1, i); |
| 5304 | | VEC_CLEAR_COMPARE_FLAG(i); |
| 5305 | | } |
| 5306 | | else |
| 5307 | | { |
| 5308 | | VEC_SET_ACCUM_L(-((UINT16)s2), i); |
| 5309 | | VEC_SET_COMPARE_FLAG(i); |
| 5310 | | } |
| 5311 | | } |
| 5312 | | } |
| 5313 | | } |
| 5314 | | else |
| 5315 | | { |
| 5316 | | if (VEC_ZERO_FLAG(i) != 0) |
| 5317 | | { |
| 5318 | | if (VEC_CLIP2_FLAG(i) != 0) |
| 5319 | | { |
| 5320 | | VEC_SET_ACCUM_L(s2, i); |
| 5321 | | } |
| 5322 | | else |
| 5323 | | { |
| 5324 | | VEC_SET_ACCUM_L(s1, i); |
| 5325 | | } |
| 5326 | | } |
| 5327 | | else |
| 5328 | | { |
| 5329 | | if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0) |
| 5330 | | { |
| 5331 | | VEC_SET_ACCUM_L(s2, i); |
| 5332 | | VEC_SET_CLIP2_FLAG(i); |
| 5333 | | } |
| 5334 | | else |
| 5335 | | { |
| 5336 | | VEC_SET_ACCUM_L(s1, i); |
| 5337 | | VEC_CLEAR_CLIP2_FLAG(i); |
| 5338 | | } |
| 5339 | | } |
| 5340 | | } |
| 5341 | | vres[i] = VEC_ACCUM_L(i); |
| 5342 | | } |
| 5343 | | VEC_CLEAR_ZERO_FLAGS(); |
| 5344 | | VEC_CLEAR_CARRY_FLAGS(); |
| 5345 | | VEC_CLEAR_CLIP1_FLAGS(); |
| 5346 | | VEC_WRITEBACK_RESULT(); |
| 5347 | | } |
| 5348 | | |
| 5349 | | static void cfunc_rsp_vcl_simd(void *param) |
| 5350 | | { |
| 5351 | | ((rsp_device *)param)->ccfunc_rsp_vcl_simd(); |
| 5352 | | } |
| 5353 | | #endif |
| 5354 | | |
| 5355 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 5356 | | |
| 5357 | | inline void rsp_device::ccfunc_rsp_vcl_scalar() |
| 5358 | | { |
| 5359 | | int op = m_rsp_state->arg0; |
| 5360 | | INT16 vres[8]; |
| 5361 | | |
| 5362 | | for (int i = 0; i < 8; i++) |
| 5363 | | { |
| 5364 | | INT16 s1, s2; |
| 5365 | | SCALAR_GET_VS1(s1, i); |
| 5366 | | SCALAR_GET_VS2(s2, i); |
| 5367 | | |
| 5368 | | if (CARRY_FLAG(i) != 0) |
| 5369 | | { |
| 5370 | | if (ZERO_FLAG(i) != 0) |
| 5371 | | { |
| 5372 | | if (COMPARE_FLAG(i) != 0) |
| 5373 | | { |
| 5374 | | SET_ACCUM_L(-(UINT16)s2, i); |
| 5375 | | } |
| 5376 | | else |
| 5377 | | { |
| 5378 | | SET_ACCUM_L(s1, i); |
| 5379 | | } |
| 5380 | | } |
| 5381 | | else |
| 5382 | | { |
| 5383 | | if (CLIP1_FLAG(i) != 0) |
| 5384 | | { |
| 5385 | | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) > 0x10000) |
| 5386 | | { |
| 5387 | | SET_ACCUM_L(s1, i); |
| 5388 | | CLEAR_COMPARE_FLAG(i); |
| 5389 | | } |
| 5390 | | else |
| 5391 | | { |
| 5392 | | SET_ACCUM_L(-((UINT16)s2), i); |
| 5393 | | SET_COMPARE_FLAG(i); |
| 5394 | | } |
| 5395 | | } |
| 5396 | | else |
| 5397 | | { |
| 5398 | | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) != 0) |
| 5399 | | { |
| 5400 | | SET_ACCUM_L(s1, i); |
| 5401 | | CLEAR_COMPARE_FLAG(i); |
| 5402 | | } |
| 5403 | | else |
| 5404 | | { |
| 5405 | | SET_ACCUM_L(-((UINT16)s2), i); |
| 5406 | | SET_COMPARE_FLAG(i); |
| 5407 | | } |
| 5408 | | } |
| 5409 | | } |
| 5410 | | } |
| 5411 | | else |
| 5412 | | { |
| 5413 | | if (ZERO_FLAG(i) != 0) |
| 5414 | | { |
| 5415 | | if (CLIP2_FLAG(i) != 0) |
| 5416 | | { |
| 5417 | | SET_ACCUM_L(s2, i); |
| 5418 | | } |
| 5419 | | else |
| 5420 | | { |
| 5421 | | SET_ACCUM_L(s1, i); |
| 5422 | | } |
| 5423 | | } |
| 5424 | | else |
| 5425 | | { |
| 5426 | | if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0) |
| 5427 | | { |
| 5428 | | SET_ACCUM_L(s2, i); |
| 5429 | | SET_CLIP2_FLAG(i); |
| 5430 | | } |
| 5431 | | else |
| 5432 | | { |
| 5433 | | SET_ACCUM_L(s1, i); |
| 5434 | | CLEAR_CLIP2_FLAG(i); |
| 5435 | | } |
| 5436 | | } |
| 5437 | | } |
| 5438 | | vres[i] = ACCUM_L(i); |
| 5439 | | } |
| 5440 | | CLEAR_ZERO_FLAGS(); |
| 5441 | | CLEAR_CARRY_FLAGS(); |
| 5442 | | CLEAR_CLIP1_FLAGS(); |
| 5443 | | WRITEBACK_RESULT(); |
| 5444 | | } |
| 5445 | | |
| 5446 | | static void cfunc_rsp_vcl_scalar(void *param) |
| 5447 | | { |
| 5448 | | ((rsp_device *)param)->ccfunc_rsp_vcl_scalar(); |
| 5449 | | } |
| 5450 | | #endif |
| 5451 | | |
| 5452 | | #if USE_SIMD |
| 5453 | | // VCH |
| 5454 | | // |
| 5455 | | // 31 25 24 20 15 10 5 0 |
| 5456 | | // ------------------------------------------------------ |
| 5457 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100101 | |
| 5458 | | // ------------------------------------------------------ |
| 5459 | | // |
| 5460 | | // Vector clip high |
| 5461 | | |
| 5462 | | inline void rsp_device::ccfunc_rsp_vch_simd() |
| 5463 | | { |
| 5464 | | int op = m_rsp_state->arg0; |
| 5465 | | |
| 5466 | | VEC_CLEAR_CARRY_FLAGS(); |
| 5467 | | VEC_CLEAR_COMPARE_FLAGS(); |
| 5468 | | VEC_CLEAR_CLIP1_FLAGS(); |
| 5469 | | VEC_CLEAR_ZERO_FLAGS(); |
| 5470 | | VEC_CLEAR_CLIP2_FLAGS(); |
| 5471 | | |
| 5472 | | #if 0 |
| 5473 | | // Compare flag |
| 5474 | | // flag[1] bit [0- 7] set if (s1 ^ s2) < 0 && (s1 + s2) <= 0) |
| 5475 | | // flag[1] bit [0- 7] set if (s1 ^ s2) >= 0 && (s2 < 0) |
| 5476 | | |
| 5477 | | // flag[1] bit [8-15] set if (s1 ^ s2) < 0 && (s2 < 0) |
| 5478 | | // flag[1] bit [8-15] set if (s1 ^ s2) >= 0 && (s1 - s2) >= 0 |
| 5479 | | |
| 5480 | | // Carry flag |
| 5481 | | // flag[0] bit [0- 7] set if (s1 ^ s2) < 0 |
| 5482 | | |
| 5483 | | // Zero flag |
| 5484 | | // flag[0] bit [8-15] set if (s1 ^ s2) < 0 && (s1 + s2) != 0 && (s1 != ~s2) |
| 5485 | | // flag[0] bit [8-15] set if (s1 ^ s2) >= 0 && (s1 - s2) != 0 && (s1 != ~s2) |
| 5486 | | |
| 5487 | | // flag[2] bit [0- 7] set if (s1 ^ s2) < 0 && (s1 + s2) == -1 |
| 5488 | | |
| 5489 | | // accum set to -s2 if (s1 ^ s2) < 0 && (s1 + s2) <= 0) |
| 5490 | | // accum set to -s2 if (s1 ^ s2) >= 0 && (s1 - s2) >= 0 |
| 5491 | | |
| 5492 | | // accum set to s1 if (s1 ^ s2) < 0 && (s1 + s2) > 0) |
| 5493 | | // accum set to s1 if (s1 ^ s2) >= 0 && (s1 - s2) < 0 |
| 5494 | | |
| 5495 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 5496 | | __m128i s1_xor_s2 = _mm_xor_si128(m_xv[VS1REG], shuf); |
| 5497 | | __m128i s1_plus_s2 = _mm_add_epi16(m_xv[VS1REG], shuf); |
| 5498 | | __m128i s1_sub_s2 = _mm_sub_epi16(m_xv[VS1REG], shuf); |
| 5499 | | __m128i s2_neg = _mm_xor_si128(shuf, vec_neg1); |
| 5500 | | |
| 5501 | | __m128i s2_lz = _mm_cmplt_epi16(shuf, _mm_setzero_si128()); |
| 5502 | | __m128i s1s2_xor_lz = _mm_cmplt_epi16(s1_xor_s2, _mm_setzero_si128()); |
| 5503 | | __m128i s1s2_xor_gez = _mm_xor_si128(s1s2_xor_lz, vec_neg1); |
| 5504 | | __m128i s1s2_plus_nz = _mm_xor_si128(_mm_cmpeq_epi16(s1_plus_s2, _mm_setzero_si128()), vec_neg1); |
| 5505 | | __m128i s1s2_plus_gz = _mm_cmpgt_epi16(s1_plus_s2, _mm_setzero_si128()); |
| 5506 | | __m128i s1s2_plus_lez = _mm_xor_si128(s1s2_plus_gz, vec_neg1); |
| 5507 | | __m128i s1s2_plus_n1 = _mm_cmpeq_epi16(s1_plus_s2, vec_neg1); |
| 5508 | | __m128i s1s2_sub_nz = _mm_xor_si128(_mm_cmpeq_epi16(s1_sub_s2, _mm_setzero_si128()), vec_neg1); |
| 5509 | | __m128i s1s2_sub_lz = _mm_cmplt_epi16(s1_sub_s2, _mm_setzero_si128()); |
| 5510 | | __m128i s1s2_sub_gez = _mm_xor_si128(s1s2_sub_lz, vec_neg1); |
| 5511 | | __m128i s1_nens2 = _mm_xor_si128(_mm_cmpeq_epi16(m_xv[VS1REG], s2_neg), vec_neg1); |
| 5512 | | |
| 5513 | | __m128i ext_mask = _mm_and_si128(_mm_and_si128(s1s2_xor_lz, s1s2_plus_n1), vec_flagmask); |
| 5514 | | m_flag[2] |= _mm_extract_epi16(ext_mask, 0) << 0; |
| 5515 | | m_flag[2] |= _mm_extract_epi16(ext_mask, 1) << 1; |
| 5516 | | m_flag[2] |= _mm_extract_epi16(ext_mask, 2) << 2; |
| 5517 | | m_flag[2] |= _mm_extract_epi16(ext_mask, 3) << 3; |
| 5518 | | m_flag[2] |= _mm_extract_epi16(ext_mask, 4) << 4; |
| 5519 | | m_flag[2] |= _mm_extract_epi16(ext_mask, 5) << 5; |
| 5520 | | m_flag[2] |= _mm_extract_epi16(ext_mask, 6) << 6; |
| 5521 | | m_flag[2] |= _mm_extract_epi16(ext_mask, 7) << 7; |
| 5522 | | |
| 5523 | | __m128i carry_mask = _mm_and_si128(s1s2_xor_lz, vec_flagmask); |
| 5524 | | m_flag[0] |= _mm_extract_epi16(carry_mask, 0) << 0; |
| 5525 | | m_flag[0] |= _mm_extract_epi16(carry_mask, 1) << 1; |
| 5526 | | m_flag[0] |= _mm_extract_epi16(carry_mask, 2) << 2; |
| 5527 | | m_flag[0] |= _mm_extract_epi16(carry_mask, 3) << 3; |
| 5528 | | m_flag[0] |= _mm_extract_epi16(carry_mask, 4) << 4; |
| 5529 | | m_flag[0] |= _mm_extract_epi16(carry_mask, 5) << 5; |
| 5530 | | m_flag[0] |= _mm_extract_epi16(carry_mask, 6) << 6; |
| 5531 | | m_flag[0] |= _mm_extract_epi16(carry_mask, 7) << 7; |
| 5532 | | |
| 5533 | | __m128i z0_mask = _mm_and_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_nz), s1_nens2); |
| 5534 | | __m128i z1_mask = _mm_and_si128(_mm_and_si128(s1s2_xor_lz, s1s2_plus_nz), s1_nens2); |
| 5535 | | __m128i z_mask = _mm_and_si128(_mm_or_si128(z0_mask, z1_mask), vec_flagmask); |
| 5536 | | z_mask = _mm_and_si128(_mm_or_si128(z_mask, _mm_srli_epi32(z_mask, 15)), vec_shiftmask2); |
| 5537 | | z_mask = _mm_and_si128(_mm_or_si128(z_mask, _mm_srli_epi64(z_mask, 30)), vec_shiftmask4); |
| 5538 | | z_mask = _mm_or_si128(z_mask, _mm_srli_si128(z_mask, 7)); |
| 5539 | | z_mask = _mm_or_si128(z_mask, _mm_srli_epi16(z_mask, 4)); |
| 5540 | | m_flag[0] |= (_mm_extract_epi16(z_mask, 0) << 8) & 0x00ff00; |
| 5541 | | |
| 5542 | | __m128i f0_mask = _mm_and_si128(_mm_or_si128(_mm_and_si128(s1s2_xor_gez, s2_lz), _mm_and_si128(s1s2_xor_lz, s1s2_plus_lez)), vec_flagmask); |
| 5543 | | __m128i f8_mask = _mm_and_si128(_mm_or_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_gez), _mm_and_si128(s1s2_xor_lz, s2_lz)), vec_flagmask); |
| 5544 | | f0_mask = _mm_and_si128(f0_mask, vec_flagmask); |
| 5545 | | f8_mask = _mm_and_si128(f8_mask, vec_flagmask); |
| 5546 | | m_flag[1] |= _mm_extract_epi16(f0_mask, 0) << 0; |
| 5547 | | m_flag[1] |= _mm_extract_epi16(f0_mask, 1) << 1; |
| 5548 | | m_flag[1] |= _mm_extract_epi16(f0_mask, 2) << 2; |
| 5549 | | m_flag[1] |= _mm_extract_epi16(f0_mask, 3) << 3; |
| 5550 | | m_flag[1] |= _mm_extract_epi16(f0_mask, 4) << 4; |
| 5551 | | m_flag[1] |= _mm_extract_epi16(f0_mask, 5) << 5; |
| 5552 | | m_flag[1] |= _mm_extract_epi16(f0_mask, 6) << 6; |
| 5553 | | m_flag[1] |= _mm_extract_epi16(f0_mask, 7) << 7; |
| 5554 | | |
| 5555 | | m_flag[1] |= _mm_extract_epi16(f8_mask, 0) << 8; |
| 5556 | | m_flag[1] |= _mm_extract_epi16(f8_mask, 1) << 9; |
| 5557 | | m_flag[1] |= _mm_extract_epi16(f8_mask, 2) << 10; |
| 5558 | | m_flag[1] |= _mm_extract_epi16(f8_mask, 3) << 11; |
| 5559 | | m_flag[1] |= _mm_extract_epi16(f8_mask, 4) << 12; |
| 5560 | | m_flag[1] |= _mm_extract_epi16(f8_mask, 5) << 13; |
| 5561 | | m_flag[1] |= _mm_extract_epi16(f8_mask, 6) << 14; |
| 5562 | | m_flag[1] |= _mm_extract_epi16(f8_mask, 7) << 15; |
| 5563 | | #endif |
| 5564 | | INT16 vres[8]; |
| 5565 | | UINT32 vce = 0; |
| 5566 | | for (int i = 0; i < 8; i++) |
| 5567 | | { |
| 5568 | | INT16 s1, s2; |
| 5569 | | VEC_GET_SCALAR_VS1(s1, i); |
| 5570 | | VEC_GET_SCALAR_VS2(s2, i); |
| 5571 | | |
| 5572 | | if ((s1 ^ s2) < 0) |
| 5573 | | { |
| 5574 | | vce = (s1 + s2 == -1); |
| 5575 | | VEC_SET_CARRY_FLAG(i); |
| 5576 | | if (s2 < 0) |
| 5577 | | { |
| 5578 | | VEC_SET_CLIP2_FLAG(i); |
| 5579 | | } |
| 5580 | | |
| 5581 | | if ((s1 + s2) <= 0) |
| 5582 | | { |
| 5583 | | VEC_SET_COMPARE_FLAG(i); |
| 5584 | | vres[i] = -((UINT16)s2); |
| 5585 | | } |
| 5586 | | else |
| 5587 | | { |
| 5588 | | vres[i] = s1; |
| 5589 | | } |
| 5590 | | |
| 5591 | | if ((s1 + s2) != 0 && s1 != ~s2) |
| 5592 | | { |
| 5593 | | VEC_SET_ZERO_FLAG(i); |
| 5594 | | } |
| 5595 | | }//sign |
| 5596 | | else |
| 5597 | | { |
| 5598 | | vce = 0; |
| 5599 | | if (s2 < 0) |
| 5600 | | { |
| 5601 | | VEC_SET_COMPARE_FLAG(i); |
| 5602 | | } |
| 5603 | | if ((s1 - s2) >= 0) |
| 5604 | | { |
| 5605 | | VEC_SET_CLIP2_FLAG(i); |
| 5606 | | vres[i] = s2; |
| 5607 | | } |
| 5608 | | else |
| 5609 | | { |
| 5610 | | vres[i] = s1; |
| 5611 | | } |
| 5612 | | |
| 5613 | | if ((s1 - s2) != 0 && s1 != ~s2) |
| 5614 | | { |
| 5615 | | VEC_SET_ZERO_FLAG(i); |
| 5616 | | } |
| 5617 | | } |
| 5618 | | if (vce) |
| 5619 | | { |
| 5620 | | VEC_SET_CLIP1_FLAG(i); |
| 5621 | | } |
| 5622 | | VEC_SET_ACCUM_L(vres[i], i); |
| 5623 | | } |
| 5624 | | VEC_WRITEBACK_RESULT(); |
| 5625 | | } |
| 5626 | | |
| 5627 | | static void cfunc_rsp_vch_simd(void *param) |
| 5628 | | { |
| 5629 | | ((rsp_device *)param)->ccfunc_rsp_vch_simd(); |
| 5630 | | } |
| 5631 | | #endif |
| 5632 | | |
| 5633 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 5634 | | |
| 5635 | | inline void rsp_device::ccfunc_rsp_vch_scalar() |
| 5636 | | { |
| 5637 | | int op = m_rsp_state->arg0; |
| 5638 | | |
| 5639 | | CLEAR_CARRY_FLAGS(); |
| 5640 | | CLEAR_COMPARE_FLAGS(); |
| 5641 | | CLEAR_CLIP1_FLAGS(); |
| 5642 | | CLEAR_ZERO_FLAGS(); |
| 5643 | | CLEAR_CLIP2_FLAGS(); |
| 5644 | | |
| 5645 | | INT16 vres[8]; |
| 5646 | | UINT32 vce = 0; |
| 5647 | | for (int i = 0; i < 8; i++) |
| 5648 | | { |
| 5649 | | INT16 s1, s2; |
| 5650 | | SCALAR_GET_VS1(s1, i); |
| 5651 | | SCALAR_GET_VS2(s2, i); |
| 5652 | | |
| 5653 | | if ((s1 ^ s2) < 0) |
| 5654 | | { |
| 5655 | | vce = (s1 + s2 == -1); |
| 5656 | | SET_CARRY_FLAG(i); |
| 5657 | | if (s2 < 0) |
| 5658 | | { |
| 5659 | | SET_CLIP2_FLAG(i); |
| 5660 | | } |
| 5661 | | |
| 5662 | | if ((s1 + s2) <= 0) |
| 5663 | | { |
| 5664 | | SET_COMPARE_FLAG(i); |
| 5665 | | vres[i] = -((UINT16)s2); |
| 5666 | | } |
| 5667 | | else |
| 5668 | | { |
| 5669 | | vres[i] = s1; |
| 5670 | | } |
| 5671 | | |
| 5672 | | if ((s1 + s2) != 0 && s1 != ~s2) |
| 5673 | | { |
| 5674 | | SET_ZERO_FLAG(i); |
| 5675 | | } |
| 5676 | | }//sign |
| 5677 | | else |
| 5678 | | { |
| 5679 | | vce = 0; |
| 5680 | | if (s2 < 0) |
| 5681 | | { |
| 5682 | | SET_COMPARE_FLAG(i); |
| 5683 | | } |
| 5684 | | if ((s1 - s2) >= 0) |
| 5685 | | { |
| 5686 | | SET_CLIP2_FLAG(i); |
| 5687 | | vres[i] = s2; |
| 5688 | | } |
| 5689 | | else |
| 5690 | | { |
| 5691 | | vres[i] = s1; |
| 5692 | | } |
| 5693 | | |
| 5694 | | if ((s1 - s2) != 0 && s1 != ~s2) |
| 5695 | | { |
| 5696 | | SET_ZERO_FLAG(i); |
| 5697 | | } |
| 5698 | | } |
| 5699 | | if (vce) |
| 5700 | | { |
| 5701 | | SET_CLIP1_FLAG(i); |
| 5702 | | } |
| 5703 | | SET_ACCUM_L(vres[i], i); |
| 5704 | | } |
| 5705 | | WRITEBACK_RESULT(); |
| 5706 | | } |
| 5707 | | |
| 5708 | | static void cfunc_rsp_vch_scalar(void *param) |
| 5709 | | { |
| 5710 | | ((rsp_device *)param)->ccfunc_rsp_vch_scalar(); |
| 5711 | | } |
| 5712 | | #endif |
| 5713 | | |
| 5714 | | #if USE_SIMD |
| 5715 | | // VCR |
| 5716 | | // |
| 5717 | | // 31 25 24 20 15 10 5 0 |
| 5718 | | // ------------------------------------------------------ |
| 5719 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100110 | |
| 5720 | | // ------------------------------------------------------ |
| 5721 | | // |
| 5722 | | // Vector clip reverse |
| 5723 | | |
| 5724 | | inline void rsp_device::ccfunc_rsp_vcr_simd() |
| 5725 | | { |
| 5726 | | int op = m_rsp_state->arg0; |
| 5727 | | |
| 5728 | | VEC_CLEAR_CARRY_FLAGS(); |
| 5729 | | VEC_CLEAR_COMPARE_FLAGS(); |
| 5730 | | VEC_CLEAR_CLIP1_FLAGS(); |
| 5731 | | VEC_CLEAR_ZERO_FLAGS(); |
| 5732 | | VEC_CLEAR_CLIP2_FLAGS(); |
| 5733 | | |
| 5734 | | #if 0 |
| 5735 | | // flag[1] bit [0- 7] set if (s1 ^ s2) < 0 && (s1 + s2) <= 0) |
| 5736 | | // flag[1] bit [0- 7] set if (s1 ^ s2) >= 0 && (s2 < 0) |
| 5737 | | |
| 5738 | | // flag[1] bit [8-15] set if (s1 ^ s2) < 0 && (s2 < 0) |
| 5739 | | // flag[1] bit [8-15] set if (s1 ^ s2) >= 0 && (s1 - s2) >= 0 |
| 5740 | | |
| 5741 | | // accum set to ~s2 if (s1 ^ s2) < 0 && (s1 + s2) <= 0) |
| 5742 | | // accum set to ~s2 if (s1 ^ s2) >= 0 && (s1 - s2) >= 0 |
| 5743 | | |
| 5744 | | // accum set to s1 if (s1 ^ s2) < 0 && (s1 + s2) > 0) |
| 5745 | | // accum set to s1 if (s1 ^ s2) >= 0 && (s1 - s2) < 0 |
| 5746 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 5747 | | __m128i s1_xor_s2 = _mm_xor_si128(m_xv[VS1REG], shuf); |
| 5748 | | __m128i s1_plus_s2 = _mm_add_epi16(m_xv[VS1REG], shuf); |
| 5749 | | __m128i s1_sub_s2 = _mm_sub_epi16(m_xv[VS1REG], shuf); |
| 5750 | | __m128i s2_neg = _mm_xor_si128(shuf, vec_neg1); |
| 5751 | | |
| 5752 | | __m128i s2_lz = _mm_cmplt_epi16(shuf, _mm_setzero_si128()); |
| 5753 | | __m128i s1s2_xor_lz = _mm_cmplt_epi16(s1_xor_s2, _mm_setzero_si128()); |
| 5754 | | __m128i s1s2_xor_gez = _mm_xor_si128(s1s2_xor_lz, vec_neg1); |
| 5755 | | __m128i s1s2_plus_gz = _mm_cmpgt_epi16(s1_plus_s2, _mm_setzero_si128()); |
| 5756 | | __m128i s1s2_plus_lez = _mm_xor_si128(s1s2_plus_gz, vec_neg1); |
| 5757 | | __m128i s1s2_sub_lz = _mm_cmplt_epi16(s1_sub_s2, _mm_setzero_si128()); |
| 5758 | | __m128i s1s2_sub_gez = _mm_xor_si128(s1s2_sub_lz, vec_neg1); |
| 5759 | | |
| 5760 | | __m128i s1_mask = _mm_or_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_lz), _mm_and_si128(s1s2_xor_lz, s1s2_plus_gz)); |
| 5761 | | __m128i s2_mask = _mm_or_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_gez), _mm_and_si128(s1s2_xor_lz, s1s2_plus_lez)); |
| 5762 | | m_accum_l = _mm_or_si128(_mm_and_si128(m_xv[VS1REG], s1_mask), _mm_and_si128(s2_neg, s2_mask)); |
| 5763 | | m_xv[VDREG] = m_accum_l; |
| 5764 | | |
| 5765 | | m_xvflag[COMPARE] = _mm_or_si128(_mm_and_si128(s1s2_xor_gez, s2_lz), _mm_and_si128(s1s2_xor_lz, s1s2_plus_lez)); |
| 5766 | | m_xvflag[CLIP2] = _mm_or_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_gez), _mm_and_si128(s1s2_xor_lz, s2_lz)); |
| 5767 | | #endif |
| 5768 | | INT16 vres[8]; |
| 5769 | | for (int i = 0; i < 8; i++) |
| 5770 | | { |
| 5771 | | INT16 s1, s2; |
| 5772 | | VEC_GET_SCALAR_VS1(s1, i); |
| 5773 | | VEC_GET_SCALAR_VS2(s2, i); |
| 5774 | | |
| 5775 | | if ((INT16)(s1 ^ s2) < 0) |
| 5776 | | { |
| 5777 | | if (s2 < 0) |
| 5778 | | { |
| 5779 | | VEC_SET_CLIP2_FLAG(i); |
| 5780 | | } |
| 5781 | | if ((s1 + s2) <= 0) |
| 5782 | | { |
| 5783 | | VEC_SET_ACCUM_L(~((UINT16)s2), i); |
| 5784 | | VEC_SET_COMPARE_FLAG(i); |
| 5785 | | } |
| 5786 | | else |
| 5787 | | { |
| 5788 | | VEC_SET_ACCUM_L(s1, i); |
| 5789 | | } |
| 5790 | | } |
| 5791 | | else |
| 5792 | | { |
| 5793 | | if (s2 < 0) |
| 5794 | | { |
| 5795 | | VEC_SET_COMPARE_FLAG(i); |
| 5796 | | } |
| 5797 | | if ((s1 - s2) >= 0) |
| 5798 | | { |
| 5799 | | VEC_SET_ACCUM_L(s2, i); |
| 5800 | | VEC_SET_CLIP2_FLAG(i); |
| 5801 | | } |
| 5802 | | else |
| 5803 | | { |
| 5804 | | VEC_SET_ACCUM_L(s1, i); |
| 5805 | | } |
| 5806 | | } |
| 5807 | | |
| 5808 | | vres[i] = VEC_ACCUM_L(i); |
| 5809 | | } |
| 5810 | | VEC_WRITEBACK_RESULT(); |
| 5811 | | } |
| 5812 | | |
| 5813 | | static void cfunc_rsp_vcr_simd(void *param) |
| 5814 | | { |
| 5815 | | ((rsp_device *)param)->ccfunc_rsp_vcr_simd(); |
| 5816 | | } |
| 5817 | | #endif |
| 5818 | | |
| 5819 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 5820 | | |
| 5821 | | inline void rsp_device::ccfunc_rsp_vcr_scalar() |
| 5822 | | { |
| 5823 | | int op = m_rsp_state->arg0; |
| 5824 | | |
| 5825 | | CLEAR_CARRY_FLAGS(); |
| 5826 | | CLEAR_COMPARE_FLAGS(); |
| 5827 | | CLEAR_CLIP1_FLAGS(); |
| 5828 | | CLEAR_ZERO_FLAGS(); |
| 5829 | | CLEAR_CLIP2_FLAGS(); |
| 5830 | | |
| 5831 | | INT16 vres[8]; |
| 5832 | | for (int i = 0; i < 8; i++) |
| 5833 | | { |
| 5834 | | INT16 s1, s2; |
| 5835 | | SCALAR_GET_VS1(s1, i); |
| 5836 | | SCALAR_GET_VS2(s2, i); |
| 5837 | | |
| 5838 | | if ((INT16)(s1 ^ s2) < 0) |
| 5839 | | { |
| 5840 | | if (s2 < 0) |
| 5841 | | { |
| 5842 | | SET_CLIP2_FLAG(i); |
| 5843 | | } |
| 5844 | | if ((s1 + s2) <= 0) |
| 5845 | | { |
| 5846 | | SET_ACCUM_L(~((UINT16)s2), i); |
| 5847 | | SET_COMPARE_FLAG(i); |
| 5848 | | } |
| 5849 | | else |
| 5850 | | { |
| 5851 | | SET_ACCUM_L(s1, i); |
| 5852 | | } |
| 5853 | | } |
| 5854 | | else |
| 5855 | | { |
| 5856 | | if (s2 < 0) |
| 5857 | | { |
| 5858 | | SET_COMPARE_FLAG(i); |
| 5859 | | } |
| 5860 | | if ((s1 - s2) >= 0) |
| 5861 | | { |
| 5862 | | SET_ACCUM_L(s2, i); |
| 5863 | | SET_CLIP2_FLAG(i); |
| 5864 | | } |
| 5865 | | else |
| 5866 | | { |
| 5867 | | SET_ACCUM_L(s1, i); |
| 5868 | | } |
| 5869 | | } |
| 5870 | | |
| 5871 | | vres[i] = ACCUM_L(i); |
| 5872 | | } |
| 5873 | | WRITEBACK_RESULT(); |
| 5874 | | } |
| 5875 | | |
| 5876 | | static void cfunc_rsp_vcr_scalar(void *param) |
| 5877 | | { |
| 5878 | | ((rsp_device *)param)->ccfunc_rsp_vcr_scalar(); |
| 5879 | | } |
| 5880 | | #endif |
| 5881 | | |
| 5882 | | #if USE_SIMD |
| 5883 | | // VMRG |
| 5884 | | // |
| 5885 | | // 31 25 24 20 15 10 5 0 |
| 5886 | | // ------------------------------------------------------ |
| 5887 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100111 | |
| 5888 | | // ------------------------------------------------------ |
| 5889 | | // |
| 5890 | | // Merges two vectors according to compare flags |
| 5891 | | |
| 5892 | | inline void rsp_device::ccfunc_rsp_vmrg_simd() |
| 5893 | | { |
| 5894 | | int op = m_rsp_state->arg0; |
| 5895 | | |
| 5896 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 5897 | | __m128i s2mask = _mm_cmpeq_epi16(m_xvflag[COMPARE], _mm_setzero_si128()); |
| 5898 | | __m128i s1mask = _mm_xor_si128(s2mask, vec_neg1); |
| 5899 | | __m128i result = _mm_and_si128(m_xv[VS1REG], s1mask); |
| 5900 | | m_xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, s2mask)); |
| 5901 | | m_accum_l = m_xv[VDREG]; |
| 5902 | | } |
| 5903 | | |
| 5904 | | static void cfunc_rsp_vmrg_simd(void *param) |
| 5905 | | { |
| 5906 | | ((rsp_device *)param)->ccfunc_rsp_vmrg_simd(); |
| 5907 | | } |
| 5908 | | #endif |
| 5909 | | |
| 5910 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 5911 | | |
| 5912 | | inline void rsp_device::ccfunc_rsp_vmrg_scalar() |
| 5913 | | { |
| 5914 | | int op = m_rsp_state->arg0; |
| 5915 | | |
| 5916 | | INT16 vres[8]; |
| 5917 | | for (int i = 0; i < 8; i++) |
| 5918 | | { |
| 5919 | | INT16 s1, s2; |
| 5920 | | SCALAR_GET_VS1(s1, i); |
| 5921 | | SCALAR_GET_VS2(s2, i); |
| 5922 | | if (COMPARE_FLAG(i) != 0) |
| 5923 | | { |
| 5924 | | vres[i] = s1; |
| 5925 | | } |
| 5926 | | else |
| 5927 | | { |
| 5928 | | vres[i] = s2; |
| 5929 | | } |
| 5930 | | |
| 5931 | | SET_ACCUM_L(vres[i], i); |
| 5932 | | } |
| 5933 | | WRITEBACK_RESULT(); |
| 5934 | | } |
| 5935 | | |
| 5936 | | static void cfunc_rsp_vmrg_scalar(void *param) |
| 5937 | | { |
| 5938 | | ((rsp_device *)param)->ccfunc_rsp_vmrg_scalar(); |
| 5939 | | } |
| 5940 | | #endif |
| 5941 | | |
| 5942 | | #if USE_SIMD |
| 5943 | | // VAND |
| 5944 | | // |
| 5945 | | // 31 25 24 20 15 10 5 0 |
| 5946 | | // ------------------------------------------------------ |
| 5947 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101000 | |
| 5948 | | // ------------------------------------------------------ |
| 5949 | | // |
| 5950 | | // Bitwise AND of two vector registers |
| 5951 | | |
| 5952 | | inline void rsp_device::ccfunc_rsp_vand_simd() |
| 5953 | | { |
| 5954 | | int op = m_rsp_state->arg0; |
| 5955 | | |
| 5956 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 5957 | | m_xv[VDREG] = _mm_and_si128(m_xv[VS1REG], shuf); |
| 5958 | | m_accum_l = m_xv[VDREG]; |
| 5959 | | } |
| 5960 | | |
| 5961 | | static void cfunc_rsp_vand_simd(void *param) |
| 5962 | | { |
| 5963 | | ((rsp_device *)param)->ccfunc_rsp_vand_simd(); |
| 5964 | | } |
| 5965 | | #endif |
| 5966 | | |
| 5967 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 5968 | | |
| 5969 | | inline void rsp_device::ccfunc_rsp_vand_scalar() |
| 5970 | | { |
| 5971 | | int op = m_rsp_state->arg0; |
| 5972 | | |
| 5973 | | INT16 vres[8]; |
| 5974 | | for (int i = 0; i < 8; i++) |
| 5975 | | { |
| 5976 | | UINT16 s1, s2; |
| 5977 | | SCALAR_GET_VS1(s1, i); |
| 5978 | | SCALAR_GET_VS2(s2, i); |
| 5979 | | vres[i] = s1 & s2; |
| 5980 | | SET_ACCUM_L(vres[i], i); |
| 5981 | | } |
| 5982 | | WRITEBACK_RESULT(); |
| 5983 | | } |
| 5984 | | |
| 5985 | | static void cfunc_rsp_vand_scalar(void *param) |
| 5986 | | { |
| 5987 | | ((rsp_device *)param)->ccfunc_rsp_vand_scalar(); |
| 5988 | | } |
| 5989 | | #endif |
| 5990 | | |
| 5991 | | #if USE_SIMD |
| 5992 | | // VNAND |
| 5993 | | // |
| 5994 | | // 31 25 24 20 15 10 5 0 |
| 5995 | | // ------------------------------------------------------ |
| 5996 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101001 | |
| 5997 | | // ------------------------------------------------------ |
| 5998 | | // |
| 5999 | | // Bitwise NOT AND of two vector registers |
| 6000 | | |
| 6001 | | inline void rsp_device::ccfunc_rsp_vnand_simd() |
| 6002 | | { |
| 6003 | | int op = m_rsp_state->arg0; |
| 6004 | | |
| 6005 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6006 | | m_xv[VDREG] = _mm_xor_si128(_mm_and_si128(m_xv[VS1REG], shuf), vec_neg1); |
| 6007 | | m_accum_l = m_xv[VDREG]; |
| 6008 | | } |
| 6009 | | |
| 6010 | | static void cfunc_rsp_vnand_simd(void *param) |
| 6011 | | { |
| 6012 | | ((rsp_device *)param)->ccfunc_rsp_vnand_simd(); |
| 6013 | | } |
| 6014 | | #endif |
| 6015 | | |
| 6016 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 6017 | | |
| 6018 | | inline void rsp_device::ccfunc_rsp_vnand_scalar() |
| 6019 | | { |
| 6020 | | int op = m_rsp_state->arg0; |
| 6021 | | |
| 6022 | | INT16 vres[8]; |
| 6023 | | for (int i = 0; i < 8; i++) |
| 6024 | | { |
| 6025 | | UINT16 s1, s2; |
| 6026 | | SCALAR_GET_VS1(s1, i); |
| 6027 | | SCALAR_GET_VS2(s2, i); |
| 6028 | | vres[i] = ~((s1 & s2)); |
| 6029 | | SET_ACCUM_L(vres[i], i); |
| 6030 | | } |
| 6031 | | WRITEBACK_RESULT(); |
| 6032 | | } |
| 6033 | | |
| 6034 | | static void cfunc_rsp_vnand_scalar(void *param) |
| 6035 | | { |
| 6036 | | ((rsp_device *)param)->ccfunc_rsp_vnand_scalar(); |
| 6037 | | } |
| 6038 | | #endif |
| 6039 | | |
| 6040 | | #if USE_SIMD |
| 6041 | | // VOR |
| 6042 | | // |
| 6043 | | // 31 25 24 20 15 10 5 0 |
| 6044 | | // ------------------------------------------------------ |
| 6045 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101010 | |
| 6046 | | // ------------------------------------------------------ |
| 6047 | | // |
| 6048 | | // Bitwise OR of two vector registers |
| 6049 | | |
| 6050 | | inline void rsp_device::ccfunc_rsp_vor_simd() |
| 6051 | | { |
| 6052 | | int op = m_rsp_state->arg0; |
| 6053 | | |
| 6054 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6055 | | m_xv[VDREG] = _mm_or_si128(m_xv[VS1REG], shuf); |
| 6056 | | m_accum_l = m_xv[VDREG]; |
| 6057 | | } |
| 6058 | | |
| 6059 | | static void cfunc_rsp_vor_simd(void *param) |
| 6060 | | { |
| 6061 | | ((rsp_device *)param)->ccfunc_rsp_vor_simd(); |
| 6062 | | } |
| 6063 | | #endif |
| 6064 | | |
| 6065 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 6066 | | |
| 6067 | | inline void rsp_device::ccfunc_rsp_vor_scalar() |
| 6068 | | { |
| 6069 | | int op = m_rsp_state->arg0; |
| 6070 | | |
| 6071 | | INT16 vres[8]; |
| 6072 | | for (int i = 0; i < 8; i++) |
| 6073 | | { |
| 6074 | | UINT16 s1, s2; |
| 6075 | | SCALAR_GET_VS1(s1, i); |
| 6076 | | SCALAR_GET_VS2(s2, i); |
| 6077 | | vres[i] = s1 | s2; |
| 6078 | | SET_ACCUM_L(vres[i], i); |
| 6079 | | } |
| 6080 | | WRITEBACK_RESULT(); |
| 6081 | | } |
| 6082 | | |
| 6083 | | static void cfunc_rsp_vor_scalar(void *param) |
| 6084 | | { |
| 6085 | | ((rsp_device *)param)->ccfunc_rsp_vor_scalar(); |
| 6086 | | } |
| 6087 | | #endif |
| 6088 | | |
| 6089 | | #if USE_SIMD |
| 6090 | | // VNOR |
| 6091 | | // |
| 6092 | | // 31 25 24 20 15 10 5 0 |
| 6093 | | // ------------------------------------------------------ |
| 6094 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101011 | |
| 6095 | | // ------------------------------------------------------ |
| 6096 | | // |
| 6097 | | // Bitwise NOT OR of two vector registers |
| 6098 | | |
| 6099 | | inline void rsp_device::ccfunc_rsp_vnor_simd() |
| 6100 | | { |
| 6101 | | int op = m_rsp_state->arg0; |
| 6102 | | |
| 6103 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6104 | | m_xv[VDREG] = _mm_xor_si128(_mm_or_si128(m_xv[VS1REG], shuf), vec_neg1); |
| 6105 | | m_accum_l = m_xv[VDREG]; |
| 6106 | | } |
| 6107 | | |
| 6108 | | static void cfunc_rsp_vnor_simd(void *param) |
| 6109 | | { |
| 6110 | | ((rsp_device *)param)->ccfunc_rsp_vnor_simd(); |
| 6111 | | } |
| 6112 | | #endif |
| 6113 | | |
| 6114 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 6115 | | |
| 6116 | | inline void rsp_device::ccfunc_rsp_vnor_scalar() |
| 6117 | | { |
| 6118 | | int op = m_rsp_state->arg0; |
| 6119 | | |
| 6120 | | INT16 vres[8]; |
| 6121 | | for (int i = 0; i < 8; i++) |
| 6122 | | { |
| 6123 | | UINT16 s1, s2; |
| 6124 | | SCALAR_GET_VS1(s1, i); |
| 6125 | | SCALAR_GET_VS2(s2, i); |
| 6126 | | vres[i] = ~(s1 | s2); |
| 6127 | | SET_ACCUM_L(vres[i], i); |
| 6128 | | } |
| 6129 | | WRITEBACK_RESULT(); |
| 6130 | | } |
| 6131 | | |
| 6132 | | static void cfunc_rsp_vnor_scalar(void *param) |
| 6133 | | { |
| 6134 | | ((rsp_device *)param)->ccfunc_rsp_vnor_scalar(); |
| 6135 | | } |
| 6136 | | #endif |
| 6137 | | |
| 6138 | | #if USE_SIMD |
| 6139 | | // VXOR |
| 6140 | | // |
| 6141 | | // 31 25 24 20 15 10 5 0 |
| 6142 | | // ------------------------------------------------------ |
| 6143 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101100 | |
| 6144 | | // ------------------------------------------------------ |
| 6145 | | // |
| 6146 | | // Bitwise XOR of two vector registers |
| 6147 | | |
| 6148 | | inline void rsp_device::ccfunc_rsp_vxor_simd() |
| 6149 | | { |
| 6150 | | int op = m_rsp_state->arg0; |
| 6151 | | |
| 6152 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6153 | | m_xv[VDREG] = _mm_xor_si128(m_xv[VS1REG], shuf); |
| 6154 | | m_accum_l = m_xv[VDREG]; |
| 6155 | | } |
| 6156 | | |
| 6157 | | static void cfunc_rsp_vxor_simd(void *param) |
| 6158 | | { |
| 6159 | | ((rsp_device *)param)->ccfunc_rsp_vxor_simd(); |
| 6160 | | } |
| 6161 | | #endif |
| 6162 | | |
| 6163 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 6164 | | |
| 6165 | | inline void rsp_device::ccfunc_rsp_vxor_scalar() |
| 6166 | | { |
| 6167 | | int op = m_rsp_state->arg0; |
| 6168 | | |
| 6169 | | INT16 vres[8]; |
| 6170 | | for (int i = 0; i < 8; i++) |
| 6171 | | { |
| 6172 | | UINT16 s1, s2; |
| 6173 | | SCALAR_GET_VS1(s1, i); |
| 6174 | | SCALAR_GET_VS2(s2, i); |
| 6175 | | vres[i] = s1 ^ s2; |
| 6176 | | SET_ACCUM_L(vres[i], i); |
| 6177 | | } |
| 6178 | | WRITEBACK_RESULT(); |
| 6179 | | } |
| 6180 | | |
| 6181 | | static void cfunc_rsp_vxor_scalar(void *param) |
| 6182 | | { |
| 6183 | | ((rsp_device *)param)->ccfunc_rsp_vxor_scalar(); |
| 6184 | | } |
| 6185 | | #endif |
| 6186 | | |
| 6187 | | #if USE_SIMD |
| 6188 | | // VNXOR |
| 6189 | | // |
| 6190 | | // 31 25 24 20 15 10 5 0 |
| 6191 | | // ------------------------------------------------------ |
| 6192 | | // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101101 | |
| 6193 | | // ------------------------------------------------------ |
| 6194 | | // |
| 6195 | | // Bitwise NOT XOR of two vector registers |
| 6196 | | |
| 6197 | | inline void rsp_device::ccfunc_rsp_vnxor_simd() |
| 6198 | | { |
| 6199 | | int op = m_rsp_state->arg0; |
| 6200 | | |
| 6201 | | __m128i shuf = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6202 | | m_xv[VDREG] = _mm_xor_si128(_mm_xor_si128(m_xv[VS1REG], shuf), vec_neg1); |
| 6203 | | m_accum_l = m_xv[VDREG]; |
| 6204 | | } |
| 6205 | | |
| 6206 | | static void cfunc_rsp_vnxor_simd(void *param) |
| 6207 | | { |
| 6208 | | ((rsp_device *)param)->ccfunc_rsp_vnxor_simd(); |
| 6209 | | } |
| 6210 | | #endif |
| 6211 | | |
| 6212 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 6213 | | |
| 6214 | | inline void rsp_device::ccfunc_rsp_vnxor_scalar() |
| 6215 | | { |
| 6216 | | int op = m_rsp_state->arg0; |
| 6217 | | |
| 6218 | | INT16 vres[8]; |
| 6219 | | for (int i = 0; i < 8; i++) |
| 6220 | | { |
| 6221 | | UINT16 s1, s2; |
| 6222 | | SCALAR_GET_VS1(s1, i); |
| 6223 | | SCALAR_GET_VS2(s2, i); |
| 6224 | | vres[i] = ~(s1 ^ s2); |
| 6225 | | SET_ACCUM_L(vres[i], i); |
| 6226 | | } |
| 6227 | | WRITEBACK_RESULT(); |
| 6228 | | } |
| 6229 | | |
| 6230 | | static void cfunc_rsp_vnxor_scalar(void *param) |
| 6231 | | { |
| 6232 | | ((rsp_device *)param)->ccfunc_rsp_vnxor_scalar(); |
| 6233 | | } |
| 6234 | | #endif |
| 6235 | | |
| 6236 | | #if USE_SIMD |
| 6237 | | // VRCP |
| 6238 | | // |
| 6239 | | // 31 25 24 20 15 10 5 0 |
| 6240 | | // ------------------------------------------------------ |
| 6241 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110000 | |
| 6242 | | // ------------------------------------------------------ |
| 6243 | | // |
| 6244 | | // Calculates reciprocal |
| 6245 | | |
| 6246 | | inline void rsp_device::ccfunc_rsp_vrcp_simd() |
| 6247 | | { |
| 6248 | | int op = m_rsp_state->arg0; |
| 6249 | | |
| 6250 | | INT32 shifter = 0; |
| 6251 | | UINT16 urec; |
| 6252 | | INT32 rec; |
| 6253 | | SIMD_EXTRACT16(m_xv[VS2REG], urec, EL); |
| 6254 | | rec = (INT16)urec; |
| 6255 | | INT32 datainput = (rec < 0) ? (-rec) : rec; |
| 6256 | | if (datainput) |
| 6257 | | { |
| 6258 | | for (int i = 0; i < 32; i++) |
| 6259 | | { |
| 6260 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 6261 | | { |
| 6262 | | shifter = i; |
| 6263 | | break; |
| 6264 | | } |
| 6265 | | } |
| 6266 | | } |
| 6267 | | else |
| 6268 | | { |
| 6269 | | shifter = 0x10; |
| 6270 | | } |
| 6271 | | |
| 6272 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 6273 | | INT32 fetchval = rsp_divtable[address]; |
| 6274 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 6275 | | if (rec < 0) |
| 6276 | | { |
| 6277 | | temp = ~temp; |
| 6278 | | } |
| 6279 | | if (!rec) |
| 6280 | | { |
| 6281 | | temp = 0x7fffffff; |
| 6282 | | } |
| 6283 | | else if (rec == 0xffff8000) |
| 6284 | | { |
| 6285 | | temp = 0xffff0000; |
| 6286 | | } |
| 6287 | | rec = temp; |
| 6288 | | |
| 6289 | | m_reciprocal_res = rec; |
| 6290 | | m_dp_allowed = 0; |
| 6291 | | |
| 6292 | | SIMD_INSERT16(m_xv[VDREG], (UINT16)rec, VS1REG); |
| 6293 | | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6294 | | } |
| 6295 | | |
| 6296 | | static void cfunc_rsp_vrcp_simd(void *param) |
| 6297 | | { |
| 6298 | | ((rsp_device *)param)->ccfunc_rsp_vrcp_simd(); |
| 6299 | | } |
| 6300 | | #endif |
| 6301 | | |
| 6302 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 6303 | | |
| 6304 | | inline void rsp_device::ccfunc_rsp_vrcp_scalar() |
| 6305 | | { |
| 6306 | | int op = m_rsp_state->arg0; |
| 6307 | | |
| 6308 | | INT32 shifter = 0; |
| 6309 | | INT32 rec = (INT16)(VREG_S(VS2REG, EL & 7)); |
| 6310 | | INT32 datainput = (rec < 0) ? (-rec) : rec; |
| 6311 | | if (datainput) |
| 6312 | | { |
| 6313 | | for (int i = 0; i < 32; i++) |
| 6314 | | { |
| 6315 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 6316 | | { |
| 6317 | | shifter = i; |
| 6318 | | break; |
| 6319 | | } |
| 6320 | | } |
| 6321 | | } |
| 6322 | | else |
| 6323 | | { |
| 6324 | | shifter = 0x10; |
| 6325 | | } |
| 6326 | | |
| 6327 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 6328 | | INT32 fetchval = rsp_divtable[address]; |
| 6329 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 6330 | | if (rec < 0) |
| 6331 | | { |
| 6332 | | temp = ~temp; |
| 6333 | | } |
| 6334 | | if (!rec) |
| 6335 | | { |
| 6336 | | temp = 0x7fffffff; |
| 6337 | | } |
| 6338 | | else if (rec == 0xffff8000) |
| 6339 | | { |
| 6340 | | temp = 0xffff0000; |
| 6341 | | } |
| 6342 | | rec = temp; |
| 6343 | | |
| 6344 | | m_reciprocal_res = rec; |
| 6345 | | m_dp_allowed = 0; |
| 6346 | | |
| 6347 | | W_VREG_S(VDREG, VS1REG & 7) = (UINT16)rec; |
| 6348 | | for (int i = 0; i < 8; i++) |
| 6349 | | { |
| 6350 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 6351 | | } |
| 6352 | | } |
| 6353 | | |
| 6354 | | static void cfunc_rsp_vrcp_scalar(void *param) |
| 6355 | | { |
| 6356 | | ((rsp_device *)param)->ccfunc_rsp_vrcp_scalar(); |
| 6357 | | } |
| 6358 | | #endif |
| 6359 | | |
| 6360 | | #if USE_SIMD |
| 6361 | | // VRCPL |
| 6362 | | // |
| 6363 | | // 31 25 24 20 15 10 5 0 |
| 6364 | | // ------------------------------------------------------ |
| 6365 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110001 | |
| 6366 | | // ------------------------------------------------------ |
| 6367 | | // |
| 6368 | | // Calculates reciprocal low part |
| 6369 | | |
| 6370 | | inline void rsp_device::ccfunc_rsp_vrcpl_simd() |
| 6371 | | { |
| 6372 | | int op = m_rsp_state->arg0; |
| 6373 | | |
| 6374 | | #if SIMUL_SIMD |
| 6375 | | m_old_reciprocal_res = m_reciprocal_res; |
| 6376 | | m_old_reciprocal_high = m_reciprocal_high; |
| 6377 | | m_old_dp_allowed = m_dp_allowed; |
| 6378 | | #endif |
| 6379 | | |
| 6380 | | INT32 shifter = 0; |
| 6381 | | |
| 6382 | | UINT16 urec; |
| 6383 | | SIMD_EXTRACT16(m_xv[VS2REG], urec, EL); |
| 6384 | | INT32 rec = (INT16)urec; |
| 6385 | | INT32 datainput = rec; |
| 6386 | | |
| 6387 | | if (m_dp_allowed) |
| 6388 | | { |
| 6389 | | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 6390 | | datainput = rec; |
| 6391 | | |
| 6392 | | if (rec < 0) |
| 6393 | | { |
| 6394 | | if (rec < -32768) |
| 6395 | | { |
| 6396 | | datainput = ~datainput; |
| 6397 | | } |
| 6398 | | else |
| 6399 | | { |
| 6400 | | datainput = -datainput; |
| 6401 | | } |
| 6402 | | } |
| 6403 | | } |
| 6404 | | else if (datainput < 0) |
| 6405 | | { |
| 6406 | | datainput = -datainput; |
| 6407 | | |
| 6408 | | shifter = 0x10; |
| 6409 | | } |
| 6410 | | |
| 6411 | | if (datainput) |
| 6412 | | { |
| 6413 | | for (int i = 0; i < 32; i++) |
| 6414 | | { |
| 6415 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 6416 | | { |
| 6417 | | shifter = i; |
| 6418 | | break; |
| 6419 | | } |
| 6420 | | } |
| 6421 | | } |
| 6422 | | |
| 6423 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 6424 | | INT32 fetchval = rsp_divtable[address]; |
| 6425 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 6426 | | temp ^= rec >> 31; |
| 6427 | | |
| 6428 | | if (!rec) |
| 6429 | | { |
| 6430 | | temp = 0x7fffffff; |
| 6431 | | } |
| 6432 | | else if (rec == 0xffff8000) |
| 6433 | | { |
| 6434 | | temp = 0xffff0000; |
| 6435 | | } |
| 6436 | | rec = temp; |
| 6437 | | |
| 6438 | | m_reciprocal_res = rec; |
| 6439 | | m_dp_allowed = 0; |
| 6440 | | |
| 6441 | | SIMD_INSERT16(m_xv[VDREG], (UINT16)rec, VS1REG); |
| 6442 | | |
| 6443 | | for (int i = 0; i < 8; i++) |
| 6444 | | { |
| 6445 | | INT16 val; |
| 6446 | | SIMD_EXTRACT16(m_xv[VS2REG], val, VEC_EL_2(EL, i)); |
| 6447 | | VEC_SET_ACCUM_L(val, i); |
| 6448 | | } |
| 6449 | | } |
| 6450 | | |
| 6451 | | static void cfunc_rsp_vrcpl_simd(void *param) |
| 6452 | | { |
| 6453 | | ((rsp_device *)param)->ccfunc_rsp_vrcpl_simd(); |
| 6454 | | } |
| 6455 | | #endif |
| 6456 | | |
| 6457 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 6458 | | |
| 6459 | | inline void rsp_device::ccfunc_rsp_vrcpl_scalar() |
| 6460 | | { |
| 6461 | | int op = m_rsp_state->arg0; |
| 6462 | | |
| 6463 | | INT32 shifter = 0; |
| 6464 | | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 6465 | | INT32 datainput = rec; |
| 6466 | | |
| 6467 | | if (m_dp_allowed) |
| 6468 | | { |
| 6469 | | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 6470 | | datainput = rec; |
| 6471 | | |
| 6472 | | if (rec < 0) |
| 6473 | | { |
| 6474 | | if (rec < -32768) |
| 6475 | | { |
| 6476 | | datainput = ~datainput; |
| 6477 | | } |
| 6478 | | else |
| 6479 | | { |
| 6480 | | datainput = -datainput; |
| 6481 | | } |
| 6482 | | } |
| 6483 | | } |
| 6484 | | else if (datainput < 0) |
| 6485 | | { |
| 6486 | | datainput = -datainput; |
| 6487 | | |
| 6488 | | shifter = 0x10; |
| 6489 | | } |
| 6490 | | |
| 6491 | | if (datainput) |
| 6492 | | { |
| 6493 | | for (int i = 0; i < 32; i++) |
| 6494 | | { |
| 6495 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 6496 | | { |
| 6497 | | shifter = i; |
| 6498 | | break; |
| 6499 | | } |
| 6500 | | } |
| 6501 | | } |
| 6502 | | |
| 6503 | | UINT32 address = (datainput << shifter) >> 22; |
| 6504 | | INT32 fetchval = rsp_divtable[address & 0x1ff]; |
| 6505 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 6506 | | temp ^= rec >> 31; |
| 6507 | | |
| 6508 | | if (!rec) |
| 6509 | | { |
| 6510 | | temp = 0x7fffffff; |
| 6511 | | } |
| 6512 | | else if (rec == 0xffff8000) |
| 6513 | | { |
| 6514 | | temp = 0xffff0000; |
| 6515 | | } |
| 6516 | | rec = temp; |
| 6517 | | |
| 6518 | | m_reciprocal_res = rec; |
| 6519 | | m_dp_allowed = 0; |
| 6520 | | |
| 6521 | | W_VREG_S(VDREG, VS1REG & 7) = (UINT16)rec; |
| 6522 | | |
| 6523 | | for (int i = 0; i < 8; i++) |
| 6524 | | { |
| 6525 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 6526 | | } |
| 6527 | | } |
| 6528 | | |
| 6529 | | static void cfunc_rsp_vrcpl_scalar(void *param) |
| 6530 | | { |
| 6531 | | ((rsp_device *)param)->ccfunc_rsp_vrcpl_scalar(); |
| 6532 | | } |
| 6533 | | #endif |
| 6534 | | |
| 6535 | | #if USE_SIMD |
| 6536 | | // VRCPH |
| 6537 | | // |
| 6538 | | // 31 25 24 20 15 10 5 0 |
| 6539 | | // ------------------------------------------------------ |
| 6540 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110010 | |
| 6541 | | // ------------------------------------------------------ |
| 6542 | | // |
| 6543 | | // Calculates reciprocal high part |
| 6544 | | |
| 6545 | | inline void rsp_device::ccfunc_rsp_vrcph_simd() |
| 6546 | | { |
| 6547 | | int op = m_rsp_state->arg0; |
| 6548 | | |
| 6549 | | #if SIMUL_SIMD |
| 6550 | | m_old_reciprocal_res = m_reciprocal_res; |
| 6551 | | m_old_reciprocal_high = m_reciprocal_high; |
| 6552 | | m_old_dp_allowed = m_dp_allowed; |
| 6553 | | #endif |
| 6554 | | |
| 6555 | | UINT16 rcph; |
| 6556 | | SIMD_EXTRACT16(m_xv[VS2REG], rcph, EL); |
| 6557 | | m_reciprocal_high = rcph << 16; |
| 6558 | | m_dp_allowed = 1; |
| 6559 | | |
| 6560 | | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6561 | | |
| 6562 | | SIMD_INSERT16(m_xv[VDREG], (INT16)(m_reciprocal_res >> 16), VS1REG); |
| 6563 | | } |
| 6564 | | |
| 6565 | | static void cfunc_rsp_vrcph_simd(void *param) |
| 6566 | | { |
| 6567 | | ((rsp_device *)param)->ccfunc_rsp_vrcph_simd(); |
| 6568 | | } |
| 6569 | | #endif |
| 6570 | | |
| 6571 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 6572 | | |
| 6573 | | inline void rsp_device::ccfunc_rsp_vrcph_scalar() |
| 6574 | | { |
| 6575 | | int op = m_rsp_state->arg0; |
| 6576 | | |
| 6577 | | m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16; |
| 6578 | | m_dp_allowed = 1; |
| 6579 | | |
| 6580 | | for (int i = 0; i < 8; i++) |
| 6581 | | { |
| 6582 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 6583 | | } |
| 6584 | | |
| 6585 | | W_VREG_S(VDREG, VS1REG & 7) = (INT16)(m_reciprocal_res >> 16); |
| 6586 | | } |
| 6587 | | |
| 6588 | | static void cfunc_rsp_vrcph_scalar(void *param) |
| 6589 | | { |
| 6590 | | ((rsp_device *)param)->ccfunc_rsp_vrcph_scalar(); |
| 6591 | | } |
| 6592 | | #endif |
| 6593 | | |
| 6594 | | #if USE_SIMD |
| 6595 | | // VMOV |
| 6596 | | // |
| 6597 | | // 31 25 24 20 15 10 5 0 |
| 6598 | | // ------------------------------------------------------ |
| 6599 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110011 | |
| 6600 | | // ------------------------------------------------------ |
| 6601 | | // |
| 6602 | | // Moves element from vector to destination vector |
| 6603 | | |
| 6604 | | inline void rsp_device::ccfunc_rsp_vmov_simd() |
| 6605 | | { |
| 6606 | | int op = m_rsp_state->arg0; |
| 6607 | | |
| 6608 | | INT16 val; |
| 6609 | | SIMD_EXTRACT16(m_xv[VS2REG], val, EL); |
| 6610 | | SIMD_INSERT16(m_xv[VDREG], val, VS1REG); |
| 6611 | | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6612 | | } |
| 6613 | | |
| 6614 | | static void cfunc_rsp_vmov_simd(void *param) |
| 6615 | | { |
| 6616 | | ((rsp_device *)param)->ccfunc_rsp_vmov_simd(); |
| 6617 | | } |
| 6618 | | #endif |
| 6619 | | |
| 6620 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 6621 | | |
| 6622 | | inline void rsp_device::ccfunc_rsp_vmov_scalar() |
| 6623 | | { |
| 6624 | | int op = m_rsp_state->arg0; |
| 6625 | | |
| 6626 | | W_VREG_S(VDREG, VS1REG & 7) = VREG_S(VS2REG, EL & 7); |
| 6627 | | for (int i = 0; i < 8; i++) |
| 6628 | | { |
| 6629 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 6630 | | } |
| 6631 | | } |
| 6632 | | |
| 6633 | | static void cfunc_rsp_vmov_scalar(void *param) |
| 6634 | | { |
| 6635 | | ((rsp_device *)param)->ccfunc_rsp_vmov_scalar(); |
| 6636 | | } |
| 6637 | | #endif |
| 6638 | | |
| 6639 | | // VRSQ |
| 6640 | | // |
| 6641 | | // 31 25 24 20 15 10 5 0 |
| 6642 | | // ------------------------------------------------------ |
| 6643 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110100 | |
| 6644 | | // ------------------------------------------------------ |
| 6645 | | // |
| 6646 | | // Calculates reciprocal square-root |
| 6647 | | |
| 6648 | | inline void rsp_device::ccfunc_rsp_vrsq_scalar() |
| 6649 | | { |
| 6650 | | int op = m_rsp_state->arg0; |
| 6651 | | |
| 6652 | | INT32 shifter = 0; |
| 6653 | | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 6654 | | INT32 datainput = (rec < 0) ? (-rec) : (rec); |
| 6655 | | |
| 6656 | | if (rec < 0) |
| 6657 | | { |
| 6658 | | if (rec < -32768) |
| 6659 | | { |
| 6660 | | datainput = ~datainput; |
| 6661 | | } |
| 6662 | | else |
| 6663 | | { |
| 6664 | | datainput = -datainput; |
| 6665 | | } |
| 6666 | | } |
| 6667 | | |
| 6668 | | if (datainput) |
| 6669 | | { |
| 6670 | | for (int i = 0; i < 32; i++) |
| 6671 | | { |
| 6672 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 6673 | | { |
| 6674 | | shifter = i; |
| 6675 | | break; |
| 6676 | | } |
| 6677 | | } |
| 6678 | | } |
| 6679 | | else |
| 6680 | | { |
| 6681 | | shifter = 0; |
| 6682 | | } |
| 6683 | | |
| 6684 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 6685 | | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 6686 | | |
| 6687 | | INT32 fetchval = rsp_divtable[address]; |
| 6688 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 6689 | | if (rec < 0) |
| 6690 | | { |
| 6691 | | temp = ~temp; |
| 6692 | | } |
| 6693 | | if (!rec) |
| 6694 | | { |
| 6695 | | temp = 0x7fffffff; |
| 6696 | | } |
| 6697 | | else if (rec == 0xffff8000) |
| 6698 | | { |
| 6699 | | temp = 0xffff0000; |
| 6700 | | } |
| 6701 | | rec = temp; |
| 6702 | | |
| 6703 | | if (rec < 0) |
| 6704 | | { |
| 6705 | | if (m_dp_allowed) |
| 6706 | | { |
| 6707 | | if (rec < -32768) |
| 6708 | | { |
| 6709 | | datainput = ~datainput; |
| 6710 | | } |
| 6711 | | else |
| 6712 | | { |
| 6713 | | datainput = -datainput; |
| 6714 | | } |
| 6715 | | } |
| 6716 | | else |
| 6717 | | { |
| 6718 | | datainput = -datainput; |
| 6719 | | } |
| 6720 | | } |
| 6721 | | |
| 6722 | | if (datainput) |
| 6723 | | { |
| 6724 | | for (int i = 0; i < 32; i++) |
| 6725 | | { |
| 6726 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 6727 | | { |
| 6728 | | shifter = i; |
| 6729 | | break; |
| 6730 | | } |
| 6731 | | } |
| 6732 | | } |
| 6733 | | else |
| 6734 | | { |
| 6735 | | shifter = 0; |
| 6736 | | } |
| 6737 | | |
| 6738 | | address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 6739 | | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 6740 | | |
| 6741 | | fetchval = rsp_divtable[address]; |
| 6742 | | temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 6743 | | if (rec < 0) |
| 6744 | | { |
| 6745 | | temp = ~temp; |
| 6746 | | } |
| 6747 | | if (!rec) |
| 6748 | | { |
| 6749 | | temp = 0x7fff; |
| 6750 | | } |
| 6751 | | else if (rec == 0xffff8000) |
| 6752 | | { |
| 6753 | | temp = 0x0000; |
| 6754 | | } |
| 6755 | | rec = temp; |
| 6756 | | |
| 6757 | | W_VREG_S(VDREG, VS1REG & 7) = (UINT16)rec; |
| 6758 | | for (int i = 0; i < 8; i++) |
| 6759 | | { |
| 6760 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 6761 | | } |
| 6762 | | } |
| 6763 | | |
| 6764 | | static void cfunc_rsp_vrsq_scalar(void *param) |
| 6765 | | { |
| 6766 | | ((rsp_device *)param)->ccfunc_rsp_vrsq_scalar(); |
| 6767 | | } |
| 6768 | | |
| 6769 | | #if USE_SIMD |
| 6770 | | // VRSQL |
| 6771 | | // |
| 6772 | | // 31 25 24 20 15 10 5 0 |
| 6773 | | // ------------------------------------------------------ |
| 6774 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110101 | |
| 6775 | | // ------------------------------------------------------ |
| 6776 | | // |
| 6777 | | // Calculates reciprocal square-root low part |
| 6778 | | |
| 6779 | | inline void rsp_device::ccfunc_rsp_vrsql_simd() |
| 6780 | | { |
| 6781 | | int op = m_rsp_state->arg0; |
| 6782 | | |
| 6783 | | #if SIMUL_SIMD |
| 6784 | | m_old_reciprocal_res = m_reciprocal_res; |
| 6785 | | m_old_reciprocal_high = m_reciprocal_high; |
| 6786 | | m_old_dp_allowed = m_dp_allowed; |
| 6787 | | #endif |
| 6788 | | |
| 6789 | | INT32 shifter = 0; |
| 6790 | | UINT16 val; |
| 6791 | | SIMD_EXTRACT16(m_xv[VS2REG], val, EL); |
| 6792 | | INT32 rec = (INT16)val; |
| 6793 | | INT32 datainput = rec; |
| 6794 | | |
| 6795 | | if (m_dp_allowed) |
| 6796 | | { |
| 6797 | | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 6798 | | datainput = rec; |
| 6799 | | |
| 6800 | | if (rec < 0) |
| 6801 | | { |
| 6802 | | if (rec < -32768) |
| 6803 | | { |
| 6804 | | datainput = ~datainput; |
| 6805 | | } |
| 6806 | | else |
| 6807 | | { |
| 6808 | | datainput = -datainput; |
| 6809 | | } |
| 6810 | | } |
| 6811 | | } |
| 6812 | | else if (datainput < 0) |
| 6813 | | { |
| 6814 | | datainput = -datainput; |
| 6815 | | |
| 6816 | | shifter = 0x10; |
| 6817 | | } |
| 6818 | | |
| 6819 | | if (datainput) |
| 6820 | | { |
| 6821 | | for (int i = 0; i < 32; i++) |
| 6822 | | { |
| 6823 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 6824 | | { |
| 6825 | | shifter = i; |
| 6826 | | break; |
| 6827 | | } |
| 6828 | | } |
| 6829 | | } |
| 6830 | | |
| 6831 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 6832 | | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 6833 | | |
| 6834 | | INT32 fetchval = rsp_divtable[address]; |
| 6835 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 6836 | | temp ^= rec >> 31; |
| 6837 | | |
| 6838 | | if (!rec) |
| 6839 | | { |
| 6840 | | temp = 0x7fffffff; |
| 6841 | | } |
| 6842 | | else if (rec == 0xffff8000) |
| 6843 | | { |
| 6844 | | temp = 0xffff0000; |
| 6845 | | } |
| 6846 | | rec = temp; |
| 6847 | | |
| 6848 | | m_reciprocal_res = rec; |
| 6849 | | m_dp_allowed = 0; |
| 6850 | | |
| 6851 | | SIMD_INSERT16(m_xv[VDREG], (UINT16)rec, VS1REG); |
| 6852 | | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6853 | | } |
| 6854 | | |
| 6855 | | static void cfunc_rsp_vrsql_simd(void *param) |
| 6856 | | { |
| 6857 | | ((rsp_device *)param)->ccfunc_rsp_vrsql_simd(); |
| 6858 | | } |
| 6859 | | #endif |
| 6860 | | |
| 6861 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 6862 | | |
| 6863 | | inline void rsp_device::ccfunc_rsp_vrsql_scalar() |
| 6864 | | { |
| 6865 | | int op = m_rsp_state->arg0; |
| 6866 | | |
| 6867 | | INT32 shifter = 0; |
| 6868 | | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 6869 | | INT32 datainput = rec; |
| 6870 | | |
| 6871 | | if (m_dp_allowed) |
| 6872 | | { |
| 6873 | | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 6874 | | datainput = rec; |
| 6875 | | |
| 6876 | | if (rec < 0) |
| 6877 | | { |
| 6878 | | if (rec < -32768) |
| 6879 | | { |
| 6880 | | datainput = ~datainput; |
| 6881 | | } |
| 6882 | | else |
| 6883 | | { |
| 6884 | | datainput = -datainput; |
| 6885 | | } |
| 6886 | | } |
| 6887 | | } |
| 6888 | | else if (datainput < 0) |
| 6889 | | { |
| 6890 | | datainput = -datainput; |
| 6891 | | |
| 6892 | | shifter = 0x10; |
| 6893 | | } |
| 6894 | | |
| 6895 | | if (datainput) |
| 6896 | | { |
| 6897 | | for (int i = 0; i < 32; i++) |
| 6898 | | { |
| 6899 | | if (datainput & (1 << ((~i) & 0x1f))) |
| 6900 | | { |
| 6901 | | shifter = i; |
| 6902 | | break; |
| 6903 | | } |
| 6904 | | } |
| 6905 | | } |
| 6906 | | |
| 6907 | | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 6908 | | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 6909 | | |
| 6910 | | INT32 fetchval = rsp_divtable[address]; |
| 6911 | | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 6912 | | temp ^= rec >> 31; |
| 6913 | | |
| 6914 | | if (!rec) |
| 6915 | | { |
| 6916 | | temp = 0x7fffffff; |
| 6917 | | } |
| 6918 | | else if (rec == 0xffff8000) |
| 6919 | | { |
| 6920 | | temp = 0xffff0000; |
| 6921 | | } |
| 6922 | | rec = temp; |
| 6923 | | |
| 6924 | | m_reciprocal_res = rec; |
| 6925 | | m_dp_allowed = 0; |
| 6926 | | |
| 6927 | | W_VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 6928 | | for (int i = 0; i < 8; i++) |
| 6929 | | { |
| 6930 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 6931 | | } |
| 6932 | | } |
| 6933 | | |
| 6934 | | static void cfunc_rsp_vrsql_scalar(void *param) |
| 6935 | | { |
| 6936 | | ((rsp_device *)param)->ccfunc_rsp_vrsql_scalar(); |
| 6937 | | } |
| 6938 | | #endif |
| 6939 | | |
| 6940 | | #if USE_SIMD |
| 6941 | | // VRSQH |
| 6942 | | // |
| 6943 | | // 31 25 24 20 15 10 5 0 |
| 6944 | | // ------------------------------------------------------ |
| 6945 | | // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110110 | |
| 6946 | | // ------------------------------------------------------ |
| 6947 | | // |
| 6948 | | // Calculates reciprocal square-root high part |
| 6949 | | |
| 6950 | | inline void rsp_device::ccfunc_rsp_vrsqh_simd() |
| 6951 | | { |
| 6952 | | int op = m_rsp_state->arg0; |
| 6953 | | |
| 6954 | | #if SIMUL_SIMD |
| 6955 | | m_old_reciprocal_res = m_reciprocal_res; |
| 6956 | | m_old_reciprocal_high = m_reciprocal_high; |
| 6957 | | m_old_dp_allowed = m_dp_allowed; |
| 6958 | | #endif |
| 6959 | | |
| 6960 | | UINT16 val; |
| 6961 | | SIMD_EXTRACT16(m_xv[VS2REG], val, EL); |
| 6962 | | m_reciprocal_high = val << 16; |
| 6963 | | m_dp_allowed = 1; |
| 6964 | | |
| 6965 | | m_accum_l = _mm_shuffle_epi8(m_xv[VS2REG], vec_shuf_inverse[EL]); |
| 6966 | | |
| 6967 | | SIMD_INSERT16(m_xv[VDREG], (INT16)(m_reciprocal_res >> 16), VS1REG); // store high part |
| 6968 | | } |
| 6969 | | |
| 6970 | | static void cfunc_rsp_vrsqh_simd(void *param) |
| 6971 | | { |
| 6972 | | ((rsp_device *)param)->ccfunc_rsp_vrsqh_simd(); |
| 6973 | | } |
| 6974 | | #endif |
| 6975 | | |
| 6976 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 6977 | | |
| 6978 | | inline void rsp_device::ccfunc_rsp_vrsqh_scalar() |
| 6979 | | { |
| 6980 | | int op = m_rsp_state->arg0; |
| 6981 | | |
| 6982 | | m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16; |
| 6983 | | m_dp_allowed = 1; |
| 6984 | | |
| 6985 | | for (int i = 0; i < 8; i++) |
| 6986 | | { |
| 6987 | | SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i); |
| 6988 | | } |
| 6989 | | |
| 6990 | | W_VREG_S(VDREG, VS1REG & 7) = (INT16)(m_reciprocal_res >> 16); // store high part |
| 6991 | | } |
| 6992 | | |
| 6993 | | static void cfunc_rsp_vrsqh_scalar(void *param) |
| 6994 | | { |
| 6995 | | ((rsp_device *)param)->ccfunc_rsp_vrsqh_scalar(); |
| 6996 | | } |
| 6997 | | #endif |
| 6998 | | |
| 6999 | | |
| 7000 | 322 | inline void rsp_device::ccfunc_sp_set_status_cb() |
| 7001 | 323 | { |
| 7002 | 324 | m_sp_set_status_func(0, m_rsp_state->arg0, 0xffffffff); |
| r241957 | r241958 | |
| 7355 | 677 | if (size == 1) |
| 7356 | 678 | { |
| 7357 | 679 | UML_MOV(block, mem(&m_rsp_state->arg0), I0); // mov [arg0],i0 ; address |
| 7358 | | UML_CALLC(block, cfunc_read8, this); // callc cfunc_printf_debug |
| 680 | UML_CALLC(block, cfunc_read8, this); // callc read8 |
| 7359 | 681 | UML_MOV(block, I0, mem(&m_rsp_state->arg0)); // mov i0,[arg0],i0 ; result |
| 7360 | 682 | } |
| 7361 | 683 | else if (size == 2) |
| r241957 | r241958 | |
| 7581 | 903 | UML_MAPVAR(block, MAPVAR_CYCLES, compiler->cycles); // mapvar CYCLES,compiler->cycles |
| 7582 | 904 | } |
| 7583 | 905 | |
| 7584 | | |
| 7585 | | /*------------------------------------------------- |
| 7586 | | generate_vector_opcode - generate code for a |
| 7587 | | vector opcode |
| 7588 | | -------------------------------------------------*/ |
| 7589 | | |
| 7590 | | #if USE_SIMD |
| 7591 | | |
| 7592 | | int rsp_device::generate_vector_opcode(drcuml_block *block, compiler_state *compiler, const opcode_desc *desc) |
| 7593 | | { |
| 7594 | | UINT32 op = desc->opptr.l[0]; |
| 7595 | | // Opcode legend: |
| 7596 | | // E = VS2 element type |
| 7597 | | // S = VS1, Source vector 1 |
| 7598 | | // T = VS2, Source vector 2 |
| 7599 | | // D = Destination vector |
| 7600 | | |
| 7601 | | switch (op & 0x3f) |
| 7602 | | { |
| 7603 | | case 0x00: /* VMULF */ |
| 7604 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7605 | | UML_CALLC(block, cfunc_rsp_vmulf_simd, this); |
| 7606 | | #if SIMUL_SIMD |
| 7607 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7608 | | UML_CALLC(block, cfunc_rsp_vmulf_scalar, this); |
| 7609 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7610 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7611 | | #endif |
| 7612 | | return TRUE; |
| 7613 | | |
| 7614 | | case 0x01: /* VMULU */ |
| 7615 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7616 | | UML_CALLC(block, cfunc_rsp_vmulu_simd, this); |
| 7617 | | #if SIMUL_SIMD |
| 7618 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7619 | | UML_CALLC(block, cfunc_rsp_vmulu_scalar, this); |
| 7620 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7621 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7622 | | #endif |
| 7623 | | return TRUE; |
| 7624 | | |
| 7625 | | case 0x04: /* VMUDL */ |
| 7626 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7627 | | UML_CALLC(block, cfunc_rsp_vmudl_simd, this); |
| 7628 | | #if SIMUL_SIMD |
| 7629 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7630 | | UML_CALLC(block, cfunc_rsp_vmudl_scalar, this); |
| 7631 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7632 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7633 | | #endif |
| 7634 | | return TRUE; |
| 7635 | | |
| 7636 | | case 0x05: /* VMUDM */ |
| 7637 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7638 | | UML_CALLC(block, cfunc_rsp_vmudm_simd, this); |
| 7639 | | #if SIMUL_SIMD |
| 7640 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7641 | | UML_CALLC(block, cfunc_rsp_vmudm_scalar, this); |
| 7642 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7643 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7644 | | #endif |
| 7645 | | return TRUE; |
| 7646 | | |
| 7647 | | case 0x06: /* VMUDN */ |
| 7648 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7649 | | UML_CALLC(block, cfunc_rsp_vmudn_simd, this); |
| 7650 | | #if SIMUL_SIMD |
| 7651 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7652 | | UML_CALLC(block, cfunc_rsp_vmudn_scalar, this); |
| 7653 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7654 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7655 | | #endif |
| 7656 | | return TRUE; |
| 7657 | | |
| 7658 | | case 0x07: /* VMUDH */ |
| 7659 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7660 | | UML_CALLC(block, cfunc_rsp_vmudh_simd, this); |
| 7661 | | #if SIMUL_SIMD |
| 7662 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7663 | | UML_CALLC(block, cfunc_rsp_vmudh_scalar, this); |
| 7664 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7665 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7666 | | #endif |
| 7667 | | return TRUE; |
| 7668 | | |
| 7669 | | case 0x08: /* VMACF */ |
| 7670 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7671 | | UML_CALLC(block, cfunc_rsp_vmacf_simd, this); |
| 7672 | | #if SIMUL_SIMD |
| 7673 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7674 | | UML_CALLC(block, cfunc_rsp_vmacf_scalar, this); |
| 7675 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7676 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7677 | | #endif |
| 7678 | | return TRUE; |
| 7679 | | |
| 7680 | | case 0x09: /* VMACU */ |
| 7681 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7682 | | UML_CALLC(block, cfunc_rsp_vmacu_simd, this); |
| 7683 | | #if SIMUL_SIMD |
| 7684 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7685 | | UML_CALLC(block, cfunc_rsp_vmacu_scalar, this); |
| 7686 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7687 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7688 | | #endif |
| 7689 | | return TRUE; |
| 7690 | | |
| 7691 | | case 0x0c: /* VMADL */ |
| 7692 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7693 | | UML_CALLC(block, cfunc_rsp_vmadl_simd, this); |
| 7694 | | #if SIMUL_SIMD |
| 7695 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7696 | | UML_CALLC(block, cfunc_rsp_vmadl_scalar, this); |
| 7697 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7698 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7699 | | #endif |
| 7700 | | return TRUE; |
| 7701 | | |
| 7702 | | case 0x0d: /* VMADM */ |
| 7703 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7704 | | UML_CALLC(block, cfunc_rsp_vmadm_simd, this); |
| 7705 | | #if SIMUL_SIMD |
| 7706 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7707 | | UML_CALLC(block, cfunc_rsp_vmadm_scalar, this); |
| 7708 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7709 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7710 | | #endif |
| 7711 | | return TRUE; |
| 7712 | | |
| 7713 | | case 0x0e: /* VMADN */ |
| 7714 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7715 | | UML_CALLC(block, cfunc_rsp_vmadn_simd, this); |
| 7716 | | #if SIMUL_SIMD |
| 7717 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7718 | | UML_CALLC(block, cfunc_rsp_vmadn_scalar, this); |
| 7719 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7720 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7721 | | #endif |
| 7722 | | return TRUE; |
| 7723 | | |
| 7724 | | case 0x0f: /* VMADH */ |
| 7725 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7726 | | UML_CALLC(block, cfunc_rsp_vmadh_simd, this); |
| 7727 | | #if SIMUL_SIMD |
| 7728 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7729 | | UML_CALLC(block, cfunc_rsp_vmadh_scalar, this); |
| 7730 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7731 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7732 | | #endif |
| 7733 | | return TRUE; |
| 7734 | | |
| 7735 | | case 0x10: /* VADD */ |
| 7736 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7737 | | UML_CALLC(block, cfunc_rsp_vadd_simd, this); |
| 7738 | | #if SIMUL_SIMD |
| 7739 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7740 | | UML_CALLC(block, cfunc_rsp_vadd_scalar, this); |
| 7741 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7742 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7743 | | #endif |
| 7744 | | return TRUE; |
| 7745 | | |
| 7746 | | case 0x11: /* VSUB */ |
| 7747 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7748 | | UML_CALLC(block, cfunc_rsp_vsub_simd, this); |
| 7749 | | #if SIMUL_SIMD |
| 7750 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7751 | | UML_CALLC(block, cfunc_rsp_vsub_scalar, this); |
| 7752 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7753 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7754 | | #endif |
| 7755 | | return TRUE; |
| 7756 | | |
| 7757 | | case 0x13: /* VABS */ |
| 7758 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7759 | | UML_CALLC(block, cfunc_rsp_vabs_simd, this); |
| 7760 | | #if SIMUL_SIMD |
| 7761 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7762 | | UML_CALLC(block, cfunc_rsp_vabs_scalar, this); |
| 7763 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7764 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7765 | | #endif |
| 7766 | | return TRUE; |
| 7767 | | |
| 7768 | | case 0x14: /* VADDC */ |
| 7769 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7770 | | UML_CALLC(block, cfunc_rsp_vaddc_simd, this); |
| 7771 | | #if SIMUL_SIMD |
| 7772 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7773 | | UML_CALLC(block, cfunc_rsp_vaddc_scalar, this); |
| 7774 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7775 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7776 | | #endif |
| 7777 | | return TRUE; |
| 7778 | | |
| 7779 | | case 0x15: /* VSUBC */ |
| 7780 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7781 | | UML_CALLC(block, cfunc_rsp_vsubc_simd, this); |
| 7782 | | #if SIMUL_SIMD |
| 7783 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7784 | | UML_CALLC(block, cfunc_rsp_vsubc_scalar, this); |
| 7785 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7786 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7787 | | #endif |
| 7788 | | return TRUE; |
| 7789 | | |
| 7790 | | case 0x16: /* VADDB */ |
| 7791 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7792 | | UML_CALLC(block, cfunc_rsp_vaddb_scalar, this); |
| 7793 | | return TRUE; |
| 7794 | | |
| 7795 | | case 0x17: /* VSUBB (reserved, functionally identical to VADDB) */ |
| 7796 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7797 | | UML_CALLC(block, cfunc_rsp_vaddb_scalar, this); |
| 7798 | | return TRUE; |
| 7799 | | |
| 7800 | | case 0x18: /* VACCB (reserved, functionally identical to VADDB) */ |
| 7801 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7802 | | UML_CALLC(block, cfunc_rsp_vaddb_scalar, this); |
| 7803 | | return TRUE; |
| 7804 | | |
| 7805 | | case 0x19: /* VSUCB (reserved, functionally identical to VADDB) */ |
| 7806 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7807 | | UML_CALLC(block, cfunc_rsp_vaddb_scalar, this); |
| 7808 | | return TRUE; |
| 7809 | | |
| 7810 | | case 0x1d: /* VSAW */ |
| 7811 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7812 | | UML_CALLC(block, cfunc_rsp_vsaw_simd, this); |
| 7813 | | #if SIMUL_SIMD |
| 7814 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7815 | | UML_CALLC(block, cfunc_rsp_vsaw_scalar, this); |
| 7816 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7817 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7818 | | #endif |
| 7819 | | return TRUE; |
| 7820 | | |
| 7821 | | case 0x20: /* VLT */ |
| 7822 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7823 | | UML_CALLC(block, cfunc_rsp_vlt_simd, this); |
| 7824 | | #if SIMUL_SIMD |
| 7825 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7826 | | UML_CALLC(block, cfunc_rsp_vlt_scalar, this); |
| 7827 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7828 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7829 | | #endif |
| 7830 | | return TRUE; |
| 7831 | | |
| 7832 | | case 0x21: /* VEQ */ |
| 7833 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7834 | | UML_CALLC(block, cfunc_rsp_veq_simd, this); |
| 7835 | | #if SIMUL_SIMD |
| 7836 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7837 | | UML_CALLC(block, cfunc_rsp_veq_scalar, this); |
| 7838 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7839 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7840 | | #endif |
| 7841 | | return TRUE; |
| 7842 | | |
| 7843 | | case 0x22: /* VNE */ |
| 7844 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7845 | | UML_CALLC(block, cfunc_rsp_vne_simd, this); |
| 7846 | | #if SIMUL_SIMD |
| 7847 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7848 | | UML_CALLC(block, cfunc_rsp_vne_scalar, this); |
| 7849 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7850 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7851 | | #endif |
| 7852 | | return TRUE; |
| 7853 | | |
| 7854 | | case 0x23: /* VGE */ |
| 7855 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7856 | | UML_CALLC(block, cfunc_rsp_vge_simd, this); |
| 7857 | | #if SIMUL_SIMD |
| 7858 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7859 | | UML_CALLC(block, cfunc_rsp_vge_scalar, this); |
| 7860 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7861 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7862 | | #endif |
| 7863 | | return TRUE; |
| 7864 | | |
| 7865 | | case 0x24: /* VCL */ |
| 7866 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7867 | | UML_CALLC(block, cfunc_rsp_vcl_simd, this); |
| 7868 | | #if SIMUL_SIMD |
| 7869 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7870 | | UML_CALLC(block, cfunc_rsp_vcl_scalar, this); |
| 7871 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7872 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7873 | | #endif |
| 7874 | | return TRUE; |
| 7875 | | |
| 7876 | | case 0x25: /* VCH */ |
| 7877 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7878 | | UML_CALLC(block, cfunc_rsp_vch_simd, this); |
| 7879 | | #if SIMUL_SIMD |
| 7880 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7881 | | UML_CALLC(block, cfunc_rsp_vch_scalar, this); |
| 7882 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7883 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7884 | | #endif |
| 7885 | | return TRUE; |
| 7886 | | |
| 7887 | | case 0x26: /* VCR */ |
| 7888 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7889 | | UML_CALLC(block, cfunc_rsp_vcr_simd, this); |
| 7890 | | #if SIMUL_SIMD |
| 7891 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7892 | | UML_CALLC(block, cfunc_rsp_vcr_scalar, this); |
| 7893 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7894 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7895 | | #endif |
| 7896 | | return TRUE; |
| 7897 | | |
| 7898 | | case 0x27: /* VMRG */ |
| 7899 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7900 | | UML_CALLC(block, cfunc_rsp_vmrg_simd, this); |
| 7901 | | #if SIMUL_SIMD |
| 7902 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7903 | | UML_CALLC(block, cfunc_rsp_vmrg_scalar, this); |
| 7904 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7905 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7906 | | #endif |
| 7907 | | return TRUE; |
| 7908 | | |
| 7909 | | case 0x28: /* VAND */ |
| 7910 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7911 | | UML_CALLC(block, cfunc_rsp_vand_simd, this); |
| 7912 | | #if SIMUL_SIMD |
| 7913 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7914 | | UML_CALLC(block, cfunc_rsp_vand_scalar, this); |
| 7915 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7916 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7917 | | #endif |
| 7918 | | return TRUE; |
| 7919 | | |
| 7920 | | case 0x29: /* VNAND */ |
| 7921 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7922 | | UML_CALLC(block, cfunc_rsp_vnand_simd, this); |
| 7923 | | #if SIMUL_SIMD |
| 7924 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7925 | | UML_CALLC(block, cfunc_rsp_vnand_scalar, this); |
| 7926 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7927 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7928 | | #endif |
| 7929 | | return TRUE; |
| 7930 | | |
| 7931 | | case 0x2a: /* VOR */ |
| 7932 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7933 | | UML_CALLC(block, cfunc_rsp_vor_simd, this); |
| 7934 | | #if SIMUL_SIMD |
| 7935 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7936 | | UML_CALLC(block, cfunc_rsp_vor_scalar, this); |
| 7937 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7938 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7939 | | #endif |
| 7940 | | return TRUE; |
| 7941 | | |
| 7942 | | case 0x2b: /* VNOR */ |
| 7943 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7944 | | UML_CALLC(block, cfunc_rsp_vnor_simd, this); |
| 7945 | | #if SIMUL_SIMD |
| 7946 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7947 | | UML_CALLC(block, cfunc_rsp_vnor_scalar, this); |
| 7948 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7949 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7950 | | #endif |
| 7951 | | return TRUE; |
| 7952 | | |
| 7953 | | case 0x2c: /* VXOR */ |
| 7954 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7955 | | UML_CALLC(block, cfunc_rsp_vxor_simd, this); |
| 7956 | | #if SIMUL_SIMD |
| 7957 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7958 | | UML_CALLC(block, cfunc_rsp_vxor_scalar, this); |
| 7959 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7960 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7961 | | #endif |
| 7962 | | return TRUE; |
| 7963 | | |
| 7964 | | case 0x2d: /* VNXOR */ |
| 7965 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7966 | | UML_CALLC(block, cfunc_rsp_vnxor_simd, this); |
| 7967 | | #if SIMUL_SIMD |
| 7968 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7969 | | UML_CALLC(block, cfunc_rsp_vnxor_scalar, this); |
| 7970 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7971 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7972 | | #endif |
| 7973 | | return TRUE; |
| 7974 | | |
| 7975 | | case 0x30: /* VRCP */ |
| 7976 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7977 | | UML_CALLC(block, cfunc_rsp_vrcp_simd, this); |
| 7978 | | #if SIMUL_SIMD |
| 7979 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7980 | | UML_CALLC(block, cfunc_rsp_vrcp_scalar, this); |
| 7981 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7982 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7983 | | #endif |
| 7984 | | return TRUE; |
| 7985 | | |
| 7986 | | case 0x31: /* VRCPL */ |
| 7987 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7988 | | UML_CALLC(block, cfunc_rsp_vrcpl_simd, this); |
| 7989 | | #if SIMUL_SIMD |
| 7990 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 7991 | | UML_CALLC(block, cfunc_rsp_vrcpl_scalar, this); |
| 7992 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 7993 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 7994 | | #endif |
| 7995 | | return TRUE; |
| 7996 | | |
| 7997 | | case 0x32: /* VRCPH */ |
| 7998 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 7999 | | UML_CALLC(block, cfunc_rsp_vrcph_simd, this); |
| 8000 | | #if SIMUL_SIMD |
| 8001 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 8002 | | UML_CALLC(block, cfunc_rsp_vrcph_scalar, this); |
| 8003 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 8004 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 8005 | | #endif |
| 8006 | | return TRUE; |
| 8007 | | |
| 8008 | | case 0x33: /* VMOV */ |
| 8009 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8010 | | UML_CALLC(block, cfunc_rsp_vmov_simd, this); |
| 8011 | | #if SIMUL_SIMD |
| 8012 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 8013 | | UML_CALLC(block, cfunc_rsp_vmov_scalar, this); |
| 8014 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 8015 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 8016 | | #endif |
| 8017 | | return TRUE; |
| 8018 | | |
| 8019 | | case 0x34: /* VRSQ */ |
| 8020 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8021 | | UML_CALLC_block, cfunc_rsp_vrsq_scalar, this); |
| 8022 | | return TRUE; |
| 8023 | | |
| 8024 | | case 0x35: /* VRSQL */ |
| 8025 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8026 | | UML_CALLC(block, cfunc_rsp_vrsql_simd, this); |
| 8027 | | #if SIMUL_SIMD |
| 8028 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 8029 | | UML_CALLC(block, cfunc_rsp_vrsql_scalar, this); |
| 8030 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 8031 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 8032 | | #endif |
| 8033 | | return TRUE; |
| 8034 | | |
| 8035 | | case 0x36: /* VRSQH */ |
| 8036 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8037 | | UML_CALLC(block, cfunc_rsp_vrsqh_simd, this); |
| 8038 | | #if SIMUL_SIMD |
| 8039 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 8040 | | UML_CALLC(block, cfunc_rsp_vrsqh_scalar, this); |
| 8041 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 8042 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 8043 | | #endif |
| 8044 | | return TRUE; |
| 8045 | | |
| 8046 | | case 0x37: /* VNOP */ |
| 8047 | | case 0x3F: /* VNULL */ |
| 8048 | | return TRUE; |
| 8049 | | |
| 8050 | | default: |
| 8051 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8052 | | UML_CALLC(block, cfunc_unimplemented_opcode, this); |
| 8053 | | return FALSE; |
| 8054 | | } |
| 8055 | | } |
| 8056 | | |
| 8057 | | #else |
| 8058 | | |
| 8059 | | int rsp_device::generate_vector_opcode(drcuml_block *block, compiler_state *compiler, const opcode_desc *desc) |
| 8060 | | { |
| 8061 | | UINT32 op = desc->opptr.l[0]; |
| 8062 | | // Opcode legend: |
| 8063 | | // E = VS2 element type |
| 8064 | | // S = VS1, Source vector 1 |
| 8065 | | // T = VS2, Source vector 2 |
| 8066 | | // D = Destination vector |
| 8067 | | |
| 8068 | | switch (op & 0x3f) |
| 8069 | | { |
| 8070 | | case 0x00: /* VMULF */ |
| 8071 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8072 | | UML_CALLC(block, cfunc_rsp_vmulf_scalar, this); |
| 8073 | | return TRUE; |
| 8074 | | |
| 8075 | | case 0x01: /* VMULU */ |
| 8076 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8077 | | UML_CALLC(block, cfunc_rsp_vmulu_scalar, this); |
| 8078 | | return TRUE; |
| 8079 | | |
| 8080 | | case 0x04: /* VMUDL */ |
| 8081 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8082 | | UML_CALLC(block, cfunc_rsp_vmudl_scalar, this); |
| 8083 | | return TRUE; |
| 8084 | | |
| 8085 | | case 0x05: /* VMUDM */ |
| 8086 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8087 | | UML_CALLC(block, cfunc_rsp_vmudm_scalar, this); |
| 8088 | | return TRUE; |
| 8089 | | |
| 8090 | | case 0x06: /* VMUDN */ |
| 8091 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8092 | | UML_CALLC(block, cfunc_rsp_vmudn_scalar, this); |
| 8093 | | return TRUE; |
| 8094 | | |
| 8095 | | case 0x07: /* VMUDH */ |
| 8096 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8097 | | UML_CALLC(block, cfunc_rsp_vmudh_scalar, this); |
| 8098 | | return TRUE; |
| 8099 | | |
| 8100 | | case 0x08: /* VMACF */ |
| 8101 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8102 | | UML_CALLC(block, cfunc_rsp_vmacf_scalar, this); |
| 8103 | | return TRUE; |
| 8104 | | |
| 8105 | | case 0x09: /* VMACU */ |
| 8106 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8107 | | UML_CALLC(block, cfunc_rsp_vmacu_scalar, this); |
| 8108 | | return TRUE; |
| 8109 | | |
| 8110 | | case 0x0c: /* VMADL */ |
| 8111 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8112 | | UML_CALLC(block, cfunc_rsp_vmadl_scalar, this); |
| 8113 | | return TRUE; |
| 8114 | | |
| 8115 | | case 0x0d: /* VMADM */ |
| 8116 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8117 | | UML_CALLC(block, cfunc_rsp_vmadm_scalar, this); |
| 8118 | | return TRUE; |
| 8119 | | |
| 8120 | | case 0x0e: /* VMADN */ |
| 8121 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8122 | | UML_CALLC(block, cfunc_rsp_vmadn_scalar, this); |
| 8123 | | return TRUE; |
| 8124 | | |
| 8125 | | case 0x0f: /* VMADH */ |
| 8126 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8127 | | UML_CALLC(block, cfunc_rsp_vmadh_scalar, this); |
| 8128 | | return TRUE; |
| 8129 | | |
| 8130 | | case 0x10: /* VADD */ |
| 8131 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8132 | | UML_CALLC(block, cfunc_rsp_vadd_scalar, this); |
| 8133 | | return TRUE; |
| 8134 | | |
| 8135 | | case 0x11: /* VSUB */ |
| 8136 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8137 | | UML_CALLC(block, cfunc_rsp_vsub_scalar, this); |
| 8138 | | return TRUE; |
| 8139 | | |
| 8140 | | case 0x13: /* VABS */ |
| 8141 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8142 | | UML_CALLC(block, cfunc_rsp_vabs_scalar, this); |
| 8143 | | return TRUE; |
| 8144 | | |
| 8145 | | case 0x14: /* VADDC */ |
| 8146 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8147 | | UML_CALLC(block, cfunc_rsp_vaddc_scalar, this); |
| 8148 | | return TRUE; |
| 8149 | | |
| 8150 | | case 0x15: /* VSUBC */ |
| 8151 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8152 | | UML_CALLC(block, cfunc_rsp_vsubc_scalar, this); |
| 8153 | | return TRUE; |
| 8154 | | |
| 8155 | | case 0x16: /* VADDB */ |
| 8156 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8157 | | UML_CALLC(block, cfunc_rsp_vaddb_scalar, this); |
| 8158 | | return TRUE; |
| 8159 | | |
| 8160 | | case 0x17: /* VSUBB (reserved, functionally identical to VADDB) */ |
| 8161 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8162 | | UML_CALLC(block, cfunc_rsp_vaddb_scalar, this); |
| 8163 | | return TRUE; |
| 8164 | | |
| 8165 | | case 0x18: /* VACCB (reserved, functionally identical to VADDB) */ |
| 8166 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8167 | | UML_CALLC(block, cfunc_rsp_vaddb_scalar, this); |
| 8168 | | return TRUE; |
| 8169 | | |
| 8170 | | case 0x19: /* VSUCB (reserved, functionally identical to VADDB) */ |
| 8171 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8172 | | UML_CALLC(block, cfunc_rsp_vaddb_scalar, this); |
| 8173 | | return TRUE; |
| 8174 | | |
| 8175 | | case 0x1d: /* VSAW */ |
| 8176 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8177 | | UML_CALLC(block, cfunc_rsp_vsaw_scalar, this); |
| 8178 | | return TRUE; |
| 8179 | | |
| 8180 | | case 0x20: /* VLT */ |
| 8181 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8182 | | UML_CALLC(block, cfunc_rsp_vlt_scalar, this); |
| 8183 | | return TRUE; |
| 8184 | | |
| 8185 | | case 0x21: /* VEQ */ |
| 8186 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8187 | | UML_CALLC(block, cfunc_rsp_veq_scalar, this); |
| 8188 | | return TRUE; |
| 8189 | | |
| 8190 | | case 0x22: /* VNE */ |
| 8191 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8192 | | UML_CALLC(block, cfunc_rsp_vne_scalar, this); |
| 8193 | | return TRUE; |
| 8194 | | |
| 8195 | | case 0x23: /* VGE */ |
| 8196 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8197 | | UML_CALLC(block, cfunc_rsp_vge_scalar, this); |
| 8198 | | return TRUE; |
| 8199 | | |
| 8200 | | case 0x24: /* VCL */ |
| 8201 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8202 | | UML_CALLC(block, cfunc_rsp_vcl_scalar, this); |
| 8203 | | return TRUE; |
| 8204 | | |
| 8205 | | case 0x25: /* VCH */ |
| 8206 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8207 | | UML_CALLC(block, cfunc_rsp_vch_scalar, this); |
| 8208 | | return TRUE; |
| 8209 | | |
| 8210 | | case 0x26: /* VCR */ |
| 8211 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8212 | | UML_CALLC(block, cfunc_rsp_vcr_scalar, this); |
| 8213 | | return TRUE; |
| 8214 | | |
| 8215 | | case 0x27: /* VMRG */ |
| 8216 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8217 | | UML_CALLC(block, cfunc_rsp_vmrg_scalar, this); |
| 8218 | | return TRUE; |
| 8219 | | |
| 8220 | | case 0x28: /* VAND */ |
| 8221 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8222 | | UML_CALLC(block, cfunc_rsp_vand_scalar, this); |
| 8223 | | return TRUE; |
| 8224 | | |
| 8225 | | case 0x29: /* VNAND */ |
| 8226 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8227 | | UML_CALLC(block, cfunc_rsp_vnand_scalar, this); |
| 8228 | | return TRUE; |
| 8229 | | |
| 8230 | | case 0x2a: /* VOR */ |
| 8231 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8232 | | UML_CALLC(block, cfunc_rsp_vor_scalar, this); |
| 8233 | | return TRUE; |
| 8234 | | |
| 8235 | | case 0x2b: /* VNOR */ |
| 8236 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8237 | | UML_CALLC(block, cfunc_rsp_vnor_scalar, this); |
| 8238 | | return TRUE; |
| 8239 | | |
| 8240 | | case 0x2c: /* VXOR */ |
| 8241 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8242 | | UML_CALLC(block, cfunc_rsp_vxor_scalar, this); |
| 8243 | | return TRUE; |
| 8244 | | |
| 8245 | | case 0x2d: /* VNXOR */ |
| 8246 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8247 | | UML_CALLC(block, cfunc_rsp_vnxor_scalar, this); |
| 8248 | | return TRUE; |
| 8249 | | |
| 8250 | | case 0x30: /* VRCP */ |
| 8251 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8252 | | UML_CALLC(block, cfunc_rsp_vrcp_scalar, this); |
| 8253 | | return TRUE; |
| 8254 | | |
| 8255 | | case 0x31: /* VRCPL */ |
| 8256 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8257 | | UML_CALLC(block, cfunc_rsp_vrcpl_scalar, this); |
| 8258 | | return TRUE; |
| 8259 | | |
| 8260 | | case 0x32: /* VRCPH */ |
| 8261 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8262 | | UML_CALLC(block, cfunc_rsp_vrcph_scalar, this); |
| 8263 | | return TRUE; |
| 8264 | | |
| 8265 | | case 0x33: /* VMOV */ |
| 8266 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8267 | | UML_CALLC(block, cfunc_rsp_vmov_scalar, this); |
| 8268 | | return TRUE; |
| 8269 | | |
| 8270 | | case 0x34: /* VRSQ */ |
| 8271 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8272 | | UML_CALLC(block, cfunc_rsp_vrsq_scalar, this); |
| 8273 | | return TRUE; |
| 8274 | | |
| 8275 | | case 0x35: /* VRSQL */ |
| 8276 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8277 | | UML_CALLC(block, cfunc_rsp_vrsql_scalar, this); |
| 8278 | | return TRUE; |
| 8279 | | |
| 8280 | | case 0x36: /* VRSQH */ |
| 8281 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8282 | | UML_CALLC(block, cfunc_rsp_vrsqh_scalar, this); |
| 8283 | | return TRUE; |
| 8284 | | |
| 8285 | | case 0x37: /* VNOP */ |
| 8286 | | case 0x3F: /* VNULL */ |
| 8287 | | return TRUE; |
| 8288 | | |
| 8289 | | default: |
| 8290 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8291 | | UML_CALLC(block, cfunc_unimplemented_opcode, this); |
| 8292 | | return FALSE; |
| 8293 | | } |
| 8294 | | } |
| 8295 | | #endif |
| 8296 | | |
| 8297 | 906 | int rsp_device::generate_opcode(drcuml_block *block, compiler_state *compiler, const opcode_desc *desc) |
| 8298 | 907 | { |
| 8299 | 908 | int in_delay_slot = ((desc->flags & OPFLAG_IN_DELAY_SLOT) != 0); |
| r241957 | r241958 | |
| 8450 | 1059 | return TRUE; |
| 8451 | 1060 | |
| 8452 | 1061 | case 0x32: /* LWC2 - MIPS I */ |
| 8453 | | return generate_lwc2(block, compiler, desc); |
| 1062 | return m_cop2->generate_lwc2(block, compiler, desc); |
| 8454 | 1063 | |
| 8455 | 1064 | |
| 8456 | 1065 | /* ----- memory store operations ----- */ |
| r241957 | r241958 | |
| 8480 | 1089 | return TRUE; |
| 8481 | 1090 | |
| 8482 | 1091 | case 0x3a: /* SWC2 - MIPS I */ |
| 8483 | | return generate_swc2(block, compiler, desc); |
| 8484 | | //UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8485 | | //UML_CALLC(block, cfunc_swc2, this); // callc cfunc_mfc2 |
| 8486 | | //return TRUE; |
| 1092 | return m_cop2->generate_swc2(block, compiler, desc); |
| 8487 | 1093 | |
| 8488 | 1094 | /* ----- coprocessor instructions ----- */ |
| 8489 | 1095 | |
| r241957 | r241958 | |
| 8491 | 1097 | return generate_cop0(block, compiler, desc); |
| 8492 | 1098 | |
| 8493 | 1099 | case 0x12: /* COP2 - MIPS I */ |
| 8494 | | return generate_cop2(block, compiler, desc); |
| 8495 | | //UML_EXH(block, m_exception[EXCEPTION_INVALIDOP], 0);// exh invalidop,0 |
| 8496 | | //return TRUE; |
| 1100 | return m_cop2->generate_cop2(block, compiler, desc); |
| 8497 | 1101 | |
| 8498 | 1102 | |
| 8499 | 1103 | /* ----- unimplemented/illegal instructions ----- */ |
| r241957 | r241958 | |
| 8705 | 1309 | |
| 8706 | 1310 | |
| 8707 | 1311 | /*------------------------------------------------- |
| 8708 | | generate_cop2 - compile COP2 opcodes |
| 8709 | | -------------------------------------------------*/ |
| 8710 | | |
| 8711 | | int rsp_device::generate_cop2(drcuml_block *block, compiler_state *compiler, const opcode_desc *desc) |
| 8712 | | { |
| 8713 | | UINT32 op = desc->opptr.l[0]; |
| 8714 | | UINT8 opswitch = RSREG; |
| 8715 | | |
| 8716 | | switch (opswitch) |
| 8717 | | { |
| 8718 | | case 0x00: /* MFCz */ |
| 8719 | | if (RTREG != 0) |
| 8720 | | { |
| 8721 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8722 | | #if USE_SIMD |
| 8723 | | UML_CALLC(block, cfunc_mfc2_simd, this); // callc cfunc_ctc2 |
| 8724 | | #if SIMUL_SIMD |
| 8725 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 8726 | | UML_CALLC(block, cfunc_mfc2_scalar, this); |
| 8727 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 8728 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 8729 | | #endif |
| 8730 | | #else |
| 8731 | | UML_CALLC(block, cfunc_mfc2_scalar, this); |
| 8732 | | #endif |
| 8733 | | //UML_SEXT(block, R32(RTREG), I0, DWORD); // dsext <rtreg>,i0,dword |
| 8734 | | } |
| 8735 | | return TRUE; |
| 8736 | | |
| 8737 | | case 0x02: /* CFCz */ |
| 8738 | | if (RTREG != 0) |
| 8739 | | { |
| 8740 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8741 | | #if USE_SIMD |
| 8742 | | UML_CALLC(block, cfunc_cfc2_simd, this); // callc cfunc_ctc2 |
| 8743 | | #if SIMUL_SIMD |
| 8744 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 8745 | | UML_CALLC(block, cfunc_cfc2_scalar, this); |
| 8746 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 8747 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 8748 | | #endif |
| 8749 | | #else |
| 8750 | | UML_CALLC(block, cfunc_cfc2_scalar, this); |
| 8751 | | #endif |
| 8752 | | //UML_SEXT(block, R32(RTREG), I0, DWORD); // dsext <rtreg>,i0,dword |
| 8753 | | } |
| 8754 | | return TRUE; |
| 8755 | | |
| 8756 | | case 0x04: /* MTCz */ |
| 8757 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8758 | | #if USE_SIMD |
| 8759 | | UML_CALLC(block, cfunc_mtc2_simd, this); // callc cfunc_ctc2 |
| 8760 | | #if SIMUL_SIMD |
| 8761 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 8762 | | UML_CALLC(block, cfunc_mtc2_scalar, this); |
| 8763 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 8764 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 8765 | | #endif |
| 8766 | | #else |
| 8767 | | UML_CALLC(block, cfunc_mtc2_scalar, this); |
| 8768 | | #endif |
| 8769 | | return TRUE; |
| 8770 | | |
| 8771 | | case 0x06: /* CTCz */ |
| 8772 | | UML_MOV(block, mem(&m_rsp_state->arg0), desc->opptr.l[0]); // mov [arg0],desc->opptr.l |
| 8773 | | #if USE_SIMD |
| 8774 | | UML_CALLC(block, cfunc_ctc2_simd, this); // callc cfunc_ctc2 |
| 8775 | | #if SIMUL_SIMD |
| 8776 | | UML_CALLC(block, cfunc_backup_regs, this); |
| 8777 | | UML_CALLC(block, cfunc_ctc2_scalar, this); |
| 8778 | | UML_CALLC(block, cfunc_restore_regs, this); |
| 8779 | | UML_CALLC(block, cfunc_verify_regs, this); |
| 8780 | | #endif |
| 8781 | | #else |
| 8782 | | UML_CALLC(block, cfunc_ctc2_scalar, this); |
| 8783 | | #endif |
| 8784 | | return TRUE; |
| 8785 | | |
| 8786 | | case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: |
| 8787 | | case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: |
| 8788 | | return generate_vector_opcode(block, compiler, desc); |
| 8789 | | } |
| 8790 | | return FALSE; |
| 8791 | | } |
| 8792 | | |
| 8793 | | /*------------------------------------------------- |
| 8794 | 1312 | generate_cop0 - compile COP0 opcodes |
| 8795 | 1313 | -------------------------------------------------*/ |
| 8796 | 1314 | |
| r241957 | r241958 | |
| 8825 | 1343 | return FALSE; |
| 8826 | 1344 | } |
| 8827 | 1345 | |
| 8828 | | #if USE_SIMD |
| 8829 | | inline void rsp_device::ccfunc_mfc2_simd() |
| 8830 | | { |
| 8831 | | UINT32 op = m_rsp_state->arg0; |
| 8832 | | int el = (op >> 7) & 0xf; |
| 8833 | | |
| 8834 | | UINT16 out; |
| 8835 | | SIMD_EXTRACT16(m_xv[VS1REG], out, (el >> 1)); |
| 8836 | | out >>= (1 - (el & 1)) * 8; |
| 8837 | | out &= 0x00ff; |
| 8838 | | |
| 8839 | | el++; |
| 8840 | | |
| 8841 | | UINT16 temp; |
| 8842 | | SIMD_EXTRACT16(m_xv[VS1REG], temp, (el >> 1)); |
| 8843 | | temp >>= (1 - (el & 1)) * 8; |
| 8844 | | temp &= 0x00ff; |
| 8845 | | |
| 8846 | | m_rsp_state->r[RTREG] = (INT32)(INT16)((out << 8) | temp); |
| 8847 | | } |
| 8848 | | |
| 8849 | | static void cfunc_mfc2_simd(void *param) |
| 8850 | | { |
| 8851 | | ((rsp_device *)param)->ccfunc_mfc2_simd(); |
| 8852 | | } |
| 8853 | | #endif |
| 8854 | | |
| 8855 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 8856 | | inline void rsp_device::ccfunc_mfc2_scalar() |
| 8857 | | { |
| 8858 | | UINT32 op = m_rsp_state->arg0; |
| 8859 | | int el = (op >> 7) & 0xf; |
| 8860 | | |
| 8861 | | UINT16 b1 = VREG_B(VS1REG, (el+0) & 0xf); |
| 8862 | | UINT16 b2 = VREG_B(VS1REG, (el+1) & 0xf); |
| 8863 | | if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2)); |
| 8864 | | } |
| 8865 | | |
| 8866 | | static void cfunc_mfc2_scalar(void *param) |
| 8867 | | { |
| 8868 | | ((rsp_device *)param)->ccfunc_mfc2_scalar(); |
| 8869 | | } |
| 8870 | | #endif |
| 8871 | | |
| 8872 | | #if USE_SIMD |
| 8873 | | inline void rsp_device::ccfunc_cfc2_simd() |
| 8874 | | { |
| 8875 | | UINT32 op = m_rsp_state->arg0; |
| 8876 | | if (RTREG) |
| 8877 | | { |
| 8878 | | switch(RDREG) |
| 8879 | | { |
| 8880 | | case 0: |
| 8881 | | RTVAL = ((VEC_CARRY_FLAG(0) & 1) << 0) | |
| 8882 | | ((VEC_CARRY_FLAG(1) & 1) << 1) | |
| 8883 | | ((VEC_CARRY_FLAG(2) & 1) << 2) | |
| 8884 | | ((VEC_CARRY_FLAG(3) & 1) << 3) | |
| 8885 | | ((VEC_CARRY_FLAG(4) & 1) << 4) | |
| 8886 | | ((VEC_CARRY_FLAG(5) & 1) << 5) | |
| 8887 | | ((VEC_CARRY_FLAG(6) & 1) << 6) | |
| 8888 | | ((VEC_CARRY_FLAG(7) & 1) << 7) | |
| 8889 | | ((VEC_ZERO_FLAG(0) & 1) << 8) | |
| 8890 | | ((VEC_ZERO_FLAG(1) & 1) << 9) | |
| 8891 | | ((VEC_ZERO_FLAG(2) & 1) << 10) | |
| 8892 | | ((VEC_ZERO_FLAG(3) & 1) << 11) | |
| 8893 | | ((VEC_ZERO_FLAG(4) & 1) << 12) | |
| 8894 | | ((VEC_ZERO_FLAG(5) & 1) << 13) | |
| 8895 | | ((VEC_ZERO_FLAG(6) & 1) << 14) | |
| 8896 | | ((VEC_ZERO_FLAG(7) & 1) << 15); |
| 8897 | | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 8898 | | break; |
| 8899 | | case 1: |
| 8900 | | RTVAL = ((VEC_COMPARE_FLAG(0) & 1) << 0) | |
| 8901 | | ((VEC_COMPARE_FLAG(1) & 1) << 1) | |
| 8902 | | ((VEC_COMPARE_FLAG(2) & 1) << 2) | |
| 8903 | | ((VEC_COMPARE_FLAG(3) & 1) << 3) | |
| 8904 | | ((VEC_COMPARE_FLAG(4) & 1) << 4) | |
| 8905 | | ((VEC_COMPARE_FLAG(5) & 1) << 5) | |
| 8906 | | ((VEC_COMPARE_FLAG(6) & 1) << 6) | |
| 8907 | | ((VEC_COMPARE_FLAG(7) & 1) << 7) | |
| 8908 | | ((VEC_CLIP2_FLAG(0) & 1) << 8) | |
| 8909 | | ((VEC_CLIP2_FLAG(1) & 1) << 9) | |
| 8910 | | ((VEC_CLIP2_FLAG(2) & 1) << 10) | |
| 8911 | | ((VEC_CLIP2_FLAG(3) & 1) << 11) | |
| 8912 | | ((VEC_CLIP2_FLAG(4) & 1) << 12) | |
| 8913 | | ((VEC_CLIP2_FLAG(5) & 1) << 13) | |
| 8914 | | ((VEC_CLIP2_FLAG(6) & 1) << 14) | |
| 8915 | | ((VEC_CLIP2_FLAG(7) & 1) << 15); |
| 8916 | | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 8917 | | break; |
| 8918 | | case 2: |
| 8919 | | RTVAL = ((VEC_CLIP1_FLAG(0) & 1) << 0) | |
| 8920 | | ((VEC_CLIP1_FLAG(1) & 1) << 1) | |
| 8921 | | ((VEC_CLIP1_FLAG(2) & 1) << 2) | |
| 8922 | | ((VEC_CLIP1_FLAG(3) & 1) << 3) | |
| 8923 | | ((VEC_CLIP1_FLAG(4) & 1) << 4) | |
| 8924 | | ((VEC_CLIP1_FLAG(5) & 1) << 5) | |
| 8925 | | ((VEC_CLIP1_FLAG(6) & 1) << 6) | |
| 8926 | | ((VEC_CLIP1_FLAG(7) & 1) << 7); |
| 8927 | | break; |
| 8928 | | } |
| 8929 | | } |
| 8930 | | } |
| 8931 | | |
| 8932 | | static void cfunc_cfc2_simd(void *param) |
| 8933 | | { |
| 8934 | | ((rsp_device *)param)->ccfunc_cfc2_simd(); |
| 8935 | | } |
| 8936 | | #endif |
| 8937 | | |
| 8938 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 8939 | | inline void rsp_device::ccfunc_cfc2_scalar() |
| 8940 | | { |
| 8941 | | UINT32 op = m_rsp_state->arg0; |
| 8942 | | if (RTREG) |
| 8943 | | { |
| 8944 | | switch(RDREG) |
| 8945 | | { |
| 8946 | | case 0: |
| 8947 | | RTVAL = ((CARRY_FLAG(0) & 1) << 0) | |
| 8948 | | ((CARRY_FLAG(1) & 1) << 1) | |
| 8949 | | ((CARRY_FLAG(2) & 1) << 2) | |
| 8950 | | ((CARRY_FLAG(3) & 1) << 3) | |
| 8951 | | ((CARRY_FLAG(4) & 1) << 4) | |
| 8952 | | ((CARRY_FLAG(5) & 1) << 5) | |
| 8953 | | ((CARRY_FLAG(6) & 1) << 6) | |
| 8954 | | ((CARRY_FLAG(7) & 1) << 7) | |
| 8955 | | ((ZERO_FLAG(0) & 1) << 8) | |
| 8956 | | ((ZERO_FLAG(1) & 1) << 9) | |
| 8957 | | ((ZERO_FLAG(2) & 1) << 10) | |
| 8958 | | ((ZERO_FLAG(3) & 1) << 11) | |
| 8959 | | ((ZERO_FLAG(4) & 1) << 12) | |
| 8960 | | ((ZERO_FLAG(5) & 1) << 13) | |
| 8961 | | ((ZERO_FLAG(6) & 1) << 14) | |
| 8962 | | ((ZERO_FLAG(7) & 1) << 15); |
| 8963 | | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 8964 | | break; |
| 8965 | | case 1: |
| 8966 | | RTVAL = ((COMPARE_FLAG(0) & 1) << 0) | |
| 8967 | | ((COMPARE_FLAG(1) & 1) << 1) | |
| 8968 | | ((COMPARE_FLAG(2) & 1) << 2) | |
| 8969 | | ((COMPARE_FLAG(3) & 1) << 3) | |
| 8970 | | ((COMPARE_FLAG(4) & 1) << 4) | |
| 8971 | | ((COMPARE_FLAG(5) & 1) << 5) | |
| 8972 | | ((COMPARE_FLAG(6) & 1) << 6) | |
| 8973 | | ((COMPARE_FLAG(7) & 1) << 7) | |
| 8974 | | ((CLIP2_FLAG(0) & 1) << 8) | |
| 8975 | | ((CLIP2_FLAG(1) & 1) << 9) | |
| 8976 | | ((CLIP2_FLAG(2) & 1) << 10) | |
| 8977 | | ((CLIP2_FLAG(3) & 1) << 11) | |
| 8978 | | ((CLIP2_FLAG(4) & 1) << 12) | |
| 8979 | | ((CLIP2_FLAG(5) & 1) << 13) | |
| 8980 | | ((CLIP2_FLAG(6) & 1) << 14) | |
| 8981 | | ((CLIP2_FLAG(7) & 1) << 15); |
| 8982 | | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 8983 | | break; |
| 8984 | | case 2: |
| 8985 | | RTVAL = ((CLIP1_FLAG(0) & 1) << 0) | |
| 8986 | | ((CLIP1_FLAG(1) & 1) << 1) | |
| 8987 | | ((CLIP1_FLAG(2) & 1) << 2) | |
| 8988 | | ((CLIP1_FLAG(3) & 1) << 3) | |
| 8989 | | ((CLIP1_FLAG(4) & 1) << 4) | |
| 8990 | | ((CLIP1_FLAG(5) & 1) << 5) | |
| 8991 | | ((CLIP1_FLAG(6) & 1) << 6) | |
| 8992 | | ((CLIP1_FLAG(7) & 1) << 7); |
| 8993 | | break; |
| 8994 | | } |
| 8995 | | } |
| 8996 | | } |
| 8997 | | |
| 8998 | | static void cfunc_cfc2_scalar(void *param) |
| 8999 | | { |
| 9000 | | ((rsp_device *)param)->ccfunc_cfc2_scalar(); |
| 9001 | | } |
| 9002 | | #endif |
| 9003 | | |
| 9004 | | #if USE_SIMD |
| 9005 | | inline void rsp_device::ccfunc_mtc2_simd() |
| 9006 | | { |
| 9007 | | UINT32 op = m_rsp_state->arg0; |
| 9008 | | int el = (op >> 7) & 0xf; |
| 9009 | | SIMD_INSERT16(m_xv[VS1REG], RTVAL, el >> 1); |
| 9010 | | } |
| 9011 | | |
| 9012 | | static void cfunc_mtc2_simd(void *param) |
| 9013 | | { |
| 9014 | | ((rsp_device *)param)->ccfunc_mtc2_simd(); |
| 9015 | | } |
| 9016 | | #endif |
| 9017 | | |
| 9018 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 9019 | | inline void rsp_device::ccfunc_mtc2_scalar() |
| 9020 | | { |
| 9021 | | UINT32 op = m_rsp_state->arg0; |
| 9022 | | int el = (op >> 7) & 0xf; |
| 9023 | | VREG_B(VS1REG, (el+0) & 0xf) = (RTVAL >> 8) & 0xff; |
| 9024 | | VREG_B(VS1REG, (el+1) & 0xf) = (RTVAL >> 0) & 0xff; |
| 9025 | | } |
| 9026 | | |
| 9027 | | static void cfunc_mtc2_scalar(void *param) |
| 9028 | | { |
| 9029 | | ((rsp_device *)param)->ccfunc_mtc2_scalar(); |
| 9030 | | } |
| 9031 | | #endif |
| 9032 | | |
| 9033 | | #if USE_SIMD |
| 9034 | | inline void rsp_device::ccfunc_ctc2_simd() |
| 9035 | | { |
| 9036 | | UINT32 op = m_rsp_state->arg0; |
| 9037 | | switch(RDREG) |
| 9038 | | { |
| 9039 | | case 0: |
| 9040 | | VEC_CLEAR_CARRY_FLAGS(); |
| 9041 | | VEC_CLEAR_ZERO_FLAGS(); |
| 9042 | | m_vflag[0][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 9043 | | m_vflag[0][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 9044 | | m_vflag[0][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 9045 | | m_vflag[0][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 9046 | | m_vflag[0][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 9047 | | m_vflag[0][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 9048 | | m_vflag[0][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 9049 | | m_vflag[0][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 9050 | | if (RTVAL & (1 << 0)) { VEC_SET_CARRY_FLAG(0); } |
| 9051 | | if (RTVAL & (1 << 1)) { VEC_SET_CARRY_FLAG(1); } |
| 9052 | | if (RTVAL & (1 << 2)) { VEC_SET_CARRY_FLAG(2); } |
| 9053 | | if (RTVAL & (1 << 3)) { VEC_SET_CARRY_FLAG(3); } |
| 9054 | | if (RTVAL & (1 << 4)) { VEC_SET_CARRY_FLAG(4); } |
| 9055 | | if (RTVAL & (1 << 5)) { VEC_SET_CARRY_FLAG(5); } |
| 9056 | | if (RTVAL & (1 << 6)) { VEC_SET_CARRY_FLAG(6); } |
| 9057 | | if (RTVAL & (1 << 7)) { VEC_SET_CARRY_FLAG(7); } |
| 9058 | | m_vflag[3][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 9059 | | m_vflag[3][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 9060 | | m_vflag[3][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 9061 | | m_vflag[3][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 9062 | | m_vflag[3][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 9063 | | m_vflag[3][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 9064 | | m_vflag[3][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 9065 | | m_vflag[3][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 9066 | | if (RTVAL & (1 << 8)) { VEC_SET_ZERO_FLAG(0); } |
| 9067 | | if (RTVAL & (1 << 9)) { VEC_SET_ZERO_FLAG(1); } |
| 9068 | | if (RTVAL & (1 << 10)) { VEC_SET_ZERO_FLAG(2); } |
| 9069 | | if (RTVAL & (1 << 11)) { VEC_SET_ZERO_FLAG(3); } |
| 9070 | | if (RTVAL & (1 << 12)) { VEC_SET_ZERO_FLAG(4); } |
| 9071 | | if (RTVAL & (1 << 13)) { VEC_SET_ZERO_FLAG(5); } |
| 9072 | | if (RTVAL & (1 << 14)) { VEC_SET_ZERO_FLAG(6); } |
| 9073 | | if (RTVAL & (1 << 15)) { VEC_SET_ZERO_FLAG(7); } |
| 9074 | | break; |
| 9075 | | case 1: |
| 9076 | | VEC_CLEAR_COMPARE_FLAGS(); |
| 9077 | | VEC_CLEAR_CLIP2_FLAGS(); |
| 9078 | | m_vflag[1][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 9079 | | m_vflag[1][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 9080 | | m_vflag[1][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 9081 | | m_vflag[1][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 9082 | | m_vflag[1][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 9083 | | m_vflag[1][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 9084 | | m_vflag[1][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 9085 | | m_vflag[1][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 9086 | | if (RTVAL & (1 << 0)) { VEC_SET_COMPARE_FLAG(0); } |
| 9087 | | if (RTVAL & (1 << 1)) { VEC_SET_COMPARE_FLAG(1); } |
| 9088 | | if (RTVAL & (1 << 2)) { VEC_SET_COMPARE_FLAG(2); } |
| 9089 | | if (RTVAL & (1 << 3)) { VEC_SET_COMPARE_FLAG(3); } |
| 9090 | | if (RTVAL & (1 << 4)) { VEC_SET_COMPARE_FLAG(4); } |
| 9091 | | if (RTVAL & (1 << 5)) { VEC_SET_COMPARE_FLAG(5); } |
| 9092 | | if (RTVAL & (1 << 6)) { VEC_SET_COMPARE_FLAG(6); } |
| 9093 | | if (RTVAL & (1 << 7)) { VEC_SET_COMPARE_FLAG(7); } |
| 9094 | | m_vflag[4][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 9095 | | m_vflag[4][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 9096 | | m_vflag[4][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 9097 | | m_vflag[4][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 9098 | | m_vflag[4][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 9099 | | m_vflag[4][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 9100 | | m_vflag[4][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 9101 | | m_vflag[4][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 9102 | | if (RTVAL & (1 << 8)) { VEC_SET_CLIP2_FLAG(0); } |
| 9103 | | if (RTVAL & (1 << 9)) { VEC_SET_CLIP2_FLAG(1); } |
| 9104 | | if (RTVAL & (1 << 10)) { VEC_SET_CLIP2_FLAG(2); } |
| 9105 | | if (RTVAL & (1 << 11)) { VEC_SET_CLIP2_FLAG(3); } |
| 9106 | | if (RTVAL & (1 << 12)) { VEC_SET_CLIP2_FLAG(4); } |
| 9107 | | if (RTVAL & (1 << 13)) { VEC_SET_CLIP2_FLAG(5); } |
| 9108 | | if (RTVAL & (1 << 14)) { VEC_SET_CLIP2_FLAG(6); } |
| 9109 | | if (RTVAL & (1 << 15)) { VEC_SET_CLIP2_FLAG(7); } |
| 9110 | | break; |
| 9111 | | case 2: |
| 9112 | | VEC_CLEAR_CLIP1_FLAGS(); |
| 9113 | | m_vflag[2][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 9114 | | m_vflag[2][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 9115 | | m_vflag[2][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 9116 | | m_vflag[2][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 9117 | | m_vflag[2][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 9118 | | m_vflag[2][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 9119 | | m_vflag[2][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 9120 | | m_vflag[2][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 9121 | | if (RTVAL & (1 << 0)) { VEC_SET_CLIP1_FLAG(0); } |
| 9122 | | if (RTVAL & (1 << 1)) { VEC_SET_CLIP1_FLAG(1); } |
| 9123 | | if (RTVAL & (1 << 2)) { VEC_SET_CLIP1_FLAG(2); } |
| 9124 | | if (RTVAL & (1 << 3)) { VEC_SET_CLIP1_FLAG(3); } |
| 9125 | | if (RTVAL & (1 << 4)) { VEC_SET_CLIP1_FLAG(4); } |
| 9126 | | if (RTVAL & (1 << 5)) { VEC_SET_CLIP1_FLAG(5); } |
| 9127 | | if (RTVAL & (1 << 6)) { VEC_SET_CLIP1_FLAG(6); } |
| 9128 | | if (RTVAL & (1 << 7)) { VEC_SET_CLIP1_FLAG(7); } |
| 9129 | | break; |
| 9130 | | } |
| 9131 | | } |
| 9132 | | |
| 9133 | | static void cfunc_ctc2_simd(void *param) |
| 9134 | | { |
| 9135 | | ((rsp_device *)param)->ccfunc_ctc2_simd(); |
| 9136 | | } |
| 9137 | | #endif |
| 9138 | | |
| 9139 | | #if (!USE_SIMD || SIMUL_SIMD) |
| 9140 | | inline void rsp_device::ccfunc_ctc2_scalar() |
| 9141 | | { |
| 9142 | | UINT32 op = m_rsp_state->arg0; |
| 9143 | | switch(RDREG) |
| 9144 | | { |
| 9145 | | case 0: |
| 9146 | | CLEAR_CARRY_FLAGS(); |
| 9147 | | CLEAR_ZERO_FLAGS(); |
| 9148 | | m_vflag[0][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 9149 | | m_vflag[0][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 9150 | | m_vflag[0][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 9151 | | m_vflag[0][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 9152 | | m_vflag[0][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 9153 | | m_vflag[0][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 9154 | | m_vflag[0][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 9155 | | m_vflag[0][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 9156 | | if (RTVAL & (1 << 0)) { SET_CARRY_FLAG(0); } |
| 9157 | | if (RTVAL & (1 << 1)) { SET_CARRY_FLAG(1); } |
| 9158 | | if (RTVAL & (1 << 2)) { SET_CARRY_FLAG(2); } |
| 9159 | | if (RTVAL & (1 << 3)) { SET_CARRY_FLAG(3); } |
| 9160 | | if (RTVAL & (1 << 4)) { SET_CARRY_FLAG(4); } |
| 9161 | | if (RTVAL & (1 << 5)) { SET_CARRY_FLAG(5); } |
| 9162 | | if (RTVAL & (1 << 6)) { SET_CARRY_FLAG(6); } |
| 9163 | | if (RTVAL & (1 << 7)) { SET_CARRY_FLAG(7); } |
| 9164 | | m_vflag[3][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 9165 | | m_vflag[3][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 9166 | | m_vflag[3][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 9167 | | m_vflag[3][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 9168 | | m_vflag[3][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 9169 | | m_vflag[3][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 9170 | | m_vflag[3][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 9171 | | m_vflag[3][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 9172 | | if (RTVAL & (1 << 8)) { SET_ZERO_FLAG(0); } |
| 9173 | | if (RTVAL & (1 << 9)) { SET_ZERO_FLAG(1); } |
| 9174 | | if (RTVAL & (1 << 10)) { SET_ZERO_FLAG(2); } |
| 9175 | | if (RTVAL & (1 << 11)) { SET_ZERO_FLAG(3); } |
| 9176 | | if (RTVAL & (1 << 12)) { SET_ZERO_FLAG(4); } |
| 9177 | | if (RTVAL & (1 << 13)) { SET_ZERO_FLAG(5); } |
| 9178 | | if (RTVAL & (1 << 14)) { SET_ZERO_FLAG(6); } |
| 9179 | | if (RTVAL & (1 << 15)) { SET_ZERO_FLAG(7); } |
| 9180 | | break; |
| 9181 | | case 1: |
| 9182 | | CLEAR_COMPARE_FLAGS(); |
| 9183 | | CLEAR_CLIP2_FLAGS(); |
| 9184 | | m_vflag[1][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 9185 | | m_vflag[1][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 9186 | | m_vflag[1][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 9187 | | m_vflag[1][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 9188 | | m_vflag[1][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 9189 | | m_vflag[1][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 9190 | | m_vflag[1][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 9191 | | m_vflag[1][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 9192 | | if (RTVAL & (1 << 0)) { SET_COMPARE_FLAG(0); } |
| 9193 | | if (RTVAL & (1 << 1)) { SET_COMPARE_FLAG(1); } |
| 9194 | | if (RTVAL & (1 << 2)) { SET_COMPARE_FLAG(2); } |
| 9195 | | if (RTVAL & (1 << 3)) { SET_COMPARE_FLAG(3); } |
| 9196 | | if (RTVAL & (1 << 4)) { SET_COMPARE_FLAG(4); } |
| 9197 | | if (RTVAL & (1 << 5)) { SET_COMPARE_FLAG(5); } |
| 9198 | | if (RTVAL & (1 << 6)) { SET_COMPARE_FLAG(6); } |
| 9199 | | if (RTVAL & (1 << 7)) { SET_COMPARE_FLAG(7); } |
| 9200 | | m_vflag[4][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0; |
| 9201 | | m_vflag[4][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0; |
| 9202 | | m_vflag[4][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0; |
| 9203 | | m_vflag[4][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0; |
| 9204 | | m_vflag[4][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0; |
| 9205 | | m_vflag[4][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0; |
| 9206 | | m_vflag[4][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0; |
| 9207 | | m_vflag[4][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0; |
| 9208 | | if (RTVAL & (1 << 8)) { SET_CLIP2_FLAG(0); } |
| 9209 | | if (RTVAL & (1 << 9)) { SET_CLIP2_FLAG(1); } |
| 9210 | | if (RTVAL & (1 << 10)) { SET_CLIP2_FLAG(2); } |
| 9211 | | if (RTVAL & (1 << 11)) { SET_CLIP2_FLAG(3); } |
| 9212 | | if (RTVAL & (1 << 12)) { SET_CLIP2_FLAG(4); } |
| 9213 | | if (RTVAL & (1 << 13)) { SET_CLIP2_FLAG(5); } |
| 9214 | | if (RTVAL & (1 << 14)) { SET_CLIP2_FLAG(6); } |
| 9215 | | if (RTVAL & (1 << 15)) { SET_CLIP2_FLAG(7); } |
| 9216 | | break; |
| 9217 | | case 2: |
| 9218 | | CLEAR_CLIP1_FLAGS(); |
| 9219 | | m_vflag[2][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0; |
| 9220 | | m_vflag[2][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0; |
| 9221 | | m_vflag[2][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0; |
| 9222 | | m_vflag[2][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0; |
| 9223 | | m_vflag[2][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0; |
| 9224 | | m_vflag[2][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0; |
| 9225 | | m_vflag[2][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0; |
| 9226 | | m_vflag[2][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0; |
| 9227 | | if (RTVAL & (1 << 0)) { SET_CLIP1_FLAG(0); } |
| 9228 | | if (RTVAL & (1 << 1)) { SET_CLIP1_FLAG(1); } |
| 9229 | | if (RTVAL & (1 << 2)) { SET_CLIP1_FLAG(2); } |
| 9230 | | if (RTVAL & (1 << 3)) { SET_CLIP1_FLAG(3); } |
| 9231 | | if (RTVAL & (1 << 4)) { SET_CLIP1_FLAG(4); } |
| 9232 | | if (RTVAL & (1 << 5)) { SET_CLIP1_FLAG(5); } |
| 9233 | | if (RTVAL & (1 << 6)) { SET_CLIP1_FLAG(6); } |
| 9234 | | if (RTVAL & (1 << 7)) { SET_CLIP1_FLAG(7); } |
| 9235 | | break; |
| 9236 | | } |
| 9237 | | } |
| 9238 | | |
| 9239 | | static void cfunc_ctc2_scalar(void *param) |
| 9240 | | { |
| 9241 | | ((rsp_device *)param)->ccfunc_ctc2_scalar(); |
| 9242 | | } |
| 9243 | | #endif |
| 9244 | | |
| 9245 | 1346 | /*************************************************************************** |
| 9246 | 1347 | CODE LOGGING HELPERS |
| 9247 | 1348 | ***************************************************************************/ |