trunk/src/emu/cpu/rsp/rsp.c
| r241934 | r241935 | |
| 62 | 62 | #define ACCUM_H(x) m_accum[((x))].w[3] |
| 63 | 63 | #define ACCUM_M(x) m_accum[((x))].w[2] |
| 64 | 64 | #define ACCUM_L(x) m_accum[((x))].w[1] |
| 65 | #define ACCUM_LL(x) m_accum[((x))].w[0] |
| 65 | 66 | |
| 66 | 67 | #define CARRY 0 |
| 67 | 68 | #define COMPARE 1 |
| r241934 | r241935 | |
| 75 | 76 | #define ZERO_FLAG(x) (m_vflag[ZERO][x & 7] != 0 ? 0xffff : 0) |
| 76 | 77 | #define CLIP2_FLAG(x) (m_vflag[CLIP2][x & 7] != 0 ? 0xffff : 0) |
| 77 | 78 | |
| 78 | | #define CLEAR_CARRY_FLAGS() { memset(m_vflag[0], 0, 16); } |
| 79 | | #define CLEAR_COMPARE_FLAGS() { memset(m_vflag[1], 0, 16); } |
| 80 | | #define CLEAR_CLIP1_FLAGS() { memset(m_vflag[2], 0, 16); } |
| 81 | | #define CLEAR_ZERO_FLAGS() { memset(m_vflag[3], 0, 16); } |
| 82 | | #define CLEAR_CLIP2_FLAGS() { memset(m_vflag[4], 0, 16); } |
| 79 | #define CLEAR_CARRY_FLAGS() { memset(m_vflag[CARRY], 0, 16); } |
| 80 | #define CLEAR_COMPARE_FLAGS() { memset(m_vflag[COMPARE], 0, 16); } |
| 81 | #define CLEAR_CLIP1_FLAGS() { memset(m_vflag[CLIP1], 0, 16); } |
| 82 | #define CLEAR_ZERO_FLAGS() { memset(m_vflag[ZERO], 0, 16); } |
| 83 | #define CLEAR_CLIP2_FLAGS() { memset(m_vflag[CLIP2], 0, 16); } |
| 83 | 84 | |
| 84 | | #define SET_CARRY_FLAG(x) { m_vflag[0][x & 7] = 0xffff; } |
| 85 | | #define SET_COMPARE_FLAG(x) { m_vflag[1][x & 7] = 0xffff; } |
| 86 | | #define SET_CLIP1_FLAG(x) { m_vflag[2][x & 7] = 0xffff; } |
| 87 | | #define SET_ZERO_FLAG(x) { m_vflag[3][x & 7] = 0xffff; } |
| 88 | | #define SET_CLIP2_FLAG(x) { m_vflag[4][x & 7] = 0xffff; } |
| 85 | #define SET_CARRY_FLAG(x) { m_vflag[CARRY][x & 7] = 0xffff; } |
| 86 | #define SET_COMPARE_FLAG(x) { m_vflag[COMPARE][x & 7] = 0xffff; } |
| 87 | #define SET_CLIP1_FLAG(x) { m_vflag[CLIP1][x & 7] = 0xffff; } |
| 88 | #define SET_ZERO_FLAG(x) { m_vflag[ZERO][x & 7] = 0xffff; } |
| 89 | #define SET_CLIP2_FLAG(x) { m_vflag[CLIP2][x & 7] = 0xffff; } |
| 89 | 90 | |
| 90 | | #define CLEAR_CARRY_FLAG(x) { m_vflag[0][x & 7] = 0; } |
| 91 | | #define CLEAR_COMPARE_FLAG(x) { m_vflag[1][x & 7] = 0; } |
| 92 | | #define CLEAR_CLIP1_FLAG(x) { m_vflag[2][x & 7] = 0; } |
| 93 | | #define CLEAR_ZERO_FLAG(x) { m_vflag[3][x & 7] = 0; } |
| 94 | | #define CLEAR_CLIP2_FLAG(x) { m_vflag[4][x & 7] = 0; } |
| 91 | #define CLEAR_CARRY_FLAG(x) { m_vflag[CARRY][x & 7] = 0; } |
| 92 | #define CLEAR_COMPARE_FLAG(x) { m_vflag[COMPARE][x & 7] = 0; } |
| 93 | #define CLEAR_CLIP1_FLAG(x) { m_vflag[CLIP1][x & 7] = 0; } |
| 94 | #define CLEAR_ZERO_FLAG(x) { m_vflag[ZERO][x & 7] = 0; } |
| 95 | #define CLEAR_CLIP2_FLAG(x) { m_vflag[CLIP2][x & 7] = 0; } |
| 95 | 96 | |
| 96 | 97 | #define ROPCODE(pc) m_program->read_dword(pc) |
| 97 | 98 | |
| r241934 | r241935 | |
| 203 | 204 | inline UINT8 rsp_device::READ8(UINT32 address) |
| 204 | 205 | { |
| 205 | 206 | UINT8 ret; |
| 206 | | address = 0x04000000 | (address & 0xfff); |
| 207 | address &= 0xfff; |
| 207 | 208 | ret = m_program->read_byte(address); |
| 208 | | //printf("%04xr%02x\n", address & 0x0000ffff, ret); |
| 209 | 209 | return ret; |
| 210 | 210 | } |
| 211 | 211 | |
| 212 | 212 | inline UINT16 rsp_device::READ16(UINT32 address) |
| 213 | 213 | { |
| 214 | 214 | UINT16 ret; |
| 215 | | address = 0x04000000 | (address & 0xfff); |
| 215 | address &= 0xfff; |
| 216 | 216 | |
| 217 | | if(address & 1) |
| 218 | | { |
| 219 | | ret = ((m_program->read_byte(address + 0) & 0xff) << 8) | (m_program->read_byte(address + 1) & 0xff); |
| 220 | | } |
| 221 | | else |
| 222 | | { |
| 223 | | ret = m_program->read_word(address); |
| 224 | | } |
| 217 | ret = (m_program->read_byte(address) << 8) | (m_program->read_byte(address + 1) & 0xff); |
| 225 | 218 | |
| 226 | | //printf("%04xr%04x\n", address & 0x0000ffff, ret); |
| 227 | | |
| 228 | 219 | return ret; |
| 229 | 220 | } |
| 230 | 221 | |
| 231 | 222 | inline UINT32 rsp_device::READ32(UINT32 address) |
| 232 | 223 | { |
| 233 | 224 | UINT32 ret; |
| 234 | | address = 0x04000000 | (address & 0xfff); |
| 225 | address &= 0xfff; |
| 235 | 226 | |
| 236 | | if(address & 3) |
| 237 | | { |
| 238 | | ret = ((m_program->read_byte(address + 0) & 0xff) << 24) | |
| 239 | | ((m_program->read_byte(address + 1) & 0xff) << 16) | |
| 240 | | ((m_program->read_byte(address + 2) & 0xff) << 8) | |
| 241 | | ((m_program->read_byte(address + 3) & 0xff) << 0); |
| 242 | | } |
| 243 | | else |
| 244 | | { |
| 245 | | ret = m_program->read_dword(address); |
| 246 | | } |
| 227 | ret = (m_program->read_byte(address) << 24) | |
| 228 | (m_program->read_byte(address + 1) << 16) | |
| 229 | (m_program->read_byte(address + 2) << 8) | |
| 230 | (m_program->read_byte(address + 3) << 0); |
| 247 | 231 | |
| 248 | | //printf("%04xr%08x\n", address & 0x0000ffff, ret); |
| 249 | 232 | return ret; |
| 250 | 233 | } |
| 251 | 234 | |
| 252 | 235 | void rsp_device::WRITE8(UINT32 address, UINT8 data) |
| 253 | 236 | { |
| 254 | | address = 0x04000000 | (address & 0xfff); |
| 255 | | //printf("%04x:%02x\n", address & 0x0000ffff, data); |
| 237 | address &= 0xfff; |
| 256 | 238 | m_program->write_byte(address, data); |
| 257 | 239 | } |
| 258 | 240 | |
| 259 | 241 | void rsp_device::WRITE16(UINT32 address, UINT16 data) |
| 260 | 242 | { |
| 261 | | address = 0x04000000 | (address & 0xfff); |
| 262 | | //printf("%04x:%04x\n", address & 0x0000ffff, data); |
| 243 | address &= 0xfff; |
| 263 | 244 | |
| 264 | | if(address & 1) |
| 265 | | { |
| 266 | | m_program->write_byte(address + 0, (data >> 8) & 0xff); |
| 267 | | m_program->write_byte(address + 1, (data >> 0) & 0xff); |
| 268 | | return; |
| 269 | | } |
| 270 | | |
| 271 | | m_program->write_word(address, data); |
| 245 | m_program->write_byte(address, data >> 8); |
| 246 | m_program->write_byte(address + 1, data & 0xff); |
| 272 | 247 | } |
| 273 | 248 | |
| 274 | 249 | void rsp_device::WRITE32(UINT32 address, UINT32 data) |
| 275 | 250 | { |
| 276 | | address = 0x04000000 | (address & 0xfff); |
| 277 | | //printf("%04x:%08x\n", address & 0x0000ffff, data); |
| 251 | address &= 0xfff; |
| 278 | 252 | |
| 279 | | if(address & 3) |
| 280 | | { |
| 281 | | m_program->write_byte(address + 0, (data >> 24) & 0xff); |
| 282 | | m_program->write_byte(address + 1, (data >> 16) & 0xff); |
| 283 | | m_program->write_byte(address + 2, (data >> 8) & 0xff); |
| 284 | | m_program->write_byte(address + 3, (data >> 0) & 0xff); |
| 285 | | return; |
| 286 | | } |
| 287 | | |
| 288 | | m_program->write_dword(address, data); |
| 253 | m_program->write_byte(address, data >> 24); |
| 254 | m_program->write_byte(address + 1, (data >> 16) & 0xff); |
| 255 | m_program->write_byte(address + 2, (data >> 8) & 0xff); |
| 256 | m_program->write_byte(address + 3, data & 0xff); |
| 289 | 257 | } |
| 290 | 258 | |
| 291 | 259 | /*****************************************************************************/ |
| r241934 | r241935 | |
| 402 | 370 | m_direct = &m_program->direct(); |
| 403 | 371 | resolve_cb(); |
| 404 | 372 | |
| 405 | | // Inaccurate. RSP registers power on to a random state... |
| 373 | // RSP registers should power on to a random state |
| 406 | 374 | for(int regIdx = 0; regIdx < 32; regIdx++ ) |
| 407 | 375 | { |
| 408 | 376 | m_rsp_state->r[regIdx] = 0; |
| r241934 | r241935 | |
| 414 | 382 | CLEAR_CLIP1_FLAGS(); |
| 415 | 383 | CLEAR_ZERO_FLAGS(); |
| 416 | 384 | CLEAR_CLIP2_FLAGS(); |
| 417 | | //m_square_root_res = 0; |
| 418 | | //m_square_root_high = 0; |
| 419 | 385 | m_reciprocal_res = 0; |
| 420 | 386 | m_reciprocal_high = 0; |
| 421 | 387 | |
| 422 | | // ...except for the accumulators. |
| 423 | | // We're not calling machine.rand() because initializing something with machine.rand() |
| 424 | | // makes me retch uncontrollably. |
| 388 | // Accumulators do not power on to a random state |
| 425 | 389 | for(int accumIdx = 0; accumIdx < 8; accumIdx++ ) |
| 426 | 390 | { |
| 427 | 391 | m_accum[accumIdx].q = 0; |
| r241934 | r241935 | |
| 458 | 422 | m_regmap[regnum] = (regnum == 0) ? uml::parameter(0) : uml::parameter::make_memory(&m_rsp_state->r[regnum]); |
| 459 | 423 | } |
| 460 | 424 | |
| 461 | | /* |
| 462 | | drcbe_info beinfo; |
| 463 | | m_drcuml->get_backend_info(beinfo); |
| 464 | | if (beinfo.direct_iregs > 2) |
| 465 | | { |
| 466 | | m_regmap[30] = I2; |
| 467 | | } |
| 468 | | if (beinfo.direct_iregs > 3) |
| 469 | | { |
| 470 | | m_regmap[31] = I3; |
| 471 | | } |
| 472 | | if (beinfo.direct_iregs > 4) |
| 473 | | { |
| 474 | | m_regmap[2] = I4; |
| 475 | | } |
| 476 | | if (beinfo.direct_iregs > 5) |
| 477 | | { |
| 478 | | m_regmap[3] = I5; |
| 479 | | } |
| 480 | | if (beinfo.direct_iregs > 6) |
| 481 | | { |
| 482 | | m_regmap[4] = I6; |
| 483 | | } |
| 484 | | */ |
| 485 | | |
| 486 | 425 | /* mark the cache dirty so it is updated on next execute */ |
| 487 | 426 | m_cache_dirty = TRUE; |
| 488 | 427 | |
| r241934 | r241935 | |
| 1527 | 1466 | } |
| 1528 | 1467 | } |
| 1529 | 1468 | } |
| 1530 | | |
| 1531 | | // never executed |
| 1532 | | //return 0; |
| 1533 | 1469 | } |
| 1534 | 1470 | |
| 1535 | 1471 | #define WRITEBACK_RESULT() {memcpy(&m_v[VDREG].s[0], &vres[0], 16);} |
| 1536 | 1472 | |
| 1537 | | #if 0 |
| 1538 | | static float float_round(float input) |
| 1539 | | { |
| 1540 | | INT32 integer = (INT32)input; |
| 1541 | | float fraction = input - (float)integer; |
| 1542 | | float output = 0.0f; |
| 1543 | | if( fraction >= 0.5f ) |
| 1544 | | { |
| 1545 | | output = (float)( integer + 1 ); |
| 1546 | | } |
| 1547 | | else |
| 1548 | | { |
| 1549 | | output = (float)integer; |
| 1550 | | } |
| 1551 | | return output; |
| 1552 | | } |
| 1553 | | #endif |
| 1554 | | |
| 1555 | 1473 | void rsp_device::handle_vector_ops(UINT32 op) |
| 1556 | 1474 | { |
| 1557 | 1475 | int i; |
| r241934 | r241935 | |
| 1578 | 1496 | // |
| 1579 | 1497 | // Multiplies signed integer by signed integer * 2 |
| 1580 | 1498 | |
| 1581 | | int sel; |
| 1582 | | INT32 s1, s2; |
| 1583 | | INT64 r; |
| 1584 | 1499 | for (i=0; i < 8; i++) |
| 1585 | 1500 | { |
| 1586 | | sel = VEC_EL_2(EL, i); |
| 1587 | | s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1588 | | s2 = (INT32)(INT16)VREG_S(VS2REG, sel); |
| 1501 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1502 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1503 | |
| 1589 | 1504 | if (s1 == -32768 && s2 == -32768) |
| 1590 | 1505 | { |
| 1591 | 1506 | // overflow |
| r241934 | r241935 | |
| 1596 | 1511 | } |
| 1597 | 1512 | else |
| 1598 | 1513 | { |
| 1599 | | r = s1 * s2 * 2; |
| 1514 | INT64 r = s1 * s2 * 2; |
| 1600 | 1515 | r += 0x8000; // rounding ? |
| 1601 | 1516 | ACCUM_H(i) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit |
| 1602 | 1517 | ACCUM_M(i) = (INT16)(r >> 16); |
| r241934 | r241935 | |
| 1617 | 1532 | // ------------------------------------------------------ |
| 1618 | 1533 | // |
| 1619 | 1534 | |
| 1620 | | int sel; |
| 1621 | | INT32 s1, s2; |
| 1622 | | INT64 r; |
| 1623 | 1535 | for (i=0; i < 8; i++) |
| 1624 | 1536 | { |
| 1625 | | sel = VEC_EL_2(EL, i); |
| 1626 | | s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1627 | | s2 = (INT32)(INT16)VREG_S(VS2REG, sel); |
| 1628 | | r = s1 * s2 * 2; |
| 1537 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1538 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1539 | |
| 1540 | INT64 r = s1 * s2 * 2; |
| 1629 | 1541 | r += 0x8000; // rounding ? |
| 1630 | 1542 | |
| 1631 | 1543 | ACCUM_H(i) = (UINT16)(r >> 32); |
| r241934 | r241935 | |
| 1660 | 1572 | // Stores the higher 16 bits of the 32-bit result to accumulator |
| 1661 | 1573 | // The low slice of accumulator is stored into destination element |
| 1662 | 1574 | |
| 1663 | | int sel; |
| 1664 | | UINT32 s1, s2; |
| 1665 | | UINT32 r; |
| 1666 | 1575 | for (i=0; i < 8; i++) |
| 1667 | 1576 | { |
| 1668 | | sel = VEC_EL_2(EL, i); |
| 1669 | | s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1670 | | s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel); |
| 1671 | | r = s1 * s2; |
| 1577 | UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1578 | UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1579 | UINT32 r = s1 * s2; |
| 1672 | 1580 | |
| 1673 | 1581 | ACCUM_H(i) = 0; |
| 1674 | 1582 | ACCUM_M(i) = 0; |
| r241934 | r241935 | |
| 1691 | 1599 | // The result is stored into accumulator |
| 1692 | 1600 | // The middle slice of accumulator is stored into destination element |
| 1693 | 1601 | |
| 1694 | | int sel; |
| 1695 | | INT32 s1, s2; |
| 1696 | | INT32 r; |
| 1697 | 1602 | for (i=0; i < 8; i++) |
| 1698 | 1603 | { |
| 1699 | | sel = VEC_EL_2(EL, i); |
| 1700 | | s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1701 | | s2 = (UINT16)VREG_S(VS2REG, sel); // not sign-extended |
| 1702 | | r = s1 * s2; |
| 1604 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1605 | INT32 s2 = (UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); // not sign-extended |
| 1606 | INT32 r = s1 * s2; |
| 1703 | 1607 | |
| 1704 | 1608 | ACCUM_H(i) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit |
| 1705 | 1609 | ACCUM_M(i) = (INT16)(r >> 16); |
| r241934 | r241935 | |
| 1723 | 1627 | // The result is stored into accumulator |
| 1724 | 1628 | // The low slice of accumulator is stored into destination element |
| 1725 | 1629 | |
| 1726 | | int sel; |
| 1727 | | INT32 s1, s2; |
| 1728 | | INT32 r; |
| 1729 | 1630 | for (i=0; i < 8; i++) |
| 1730 | 1631 | { |
| 1731 | | sel = VEC_EL_2(EL, i); |
| 1732 | | s1 = (UINT16)VREG_S(VS1REG, i); // not sign-extended |
| 1733 | | s2 = (INT32)(INT16)VREG_S(VS2REG, sel); |
| 1734 | | r = s1 * s2; |
| 1632 | INT32 s1 = (UINT16)VREG_S(VS1REG, i); // not sign-extended |
| 1633 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1634 | INT32 r = s1 * s2; |
| 1735 | 1635 | |
| 1736 | 1636 | ACCUM_H(i) = (r < 0) ? 0xffff : 0; // sign-extend to 48-bit |
| 1737 | 1637 | ACCUM_M(i) = (INT16)(r >> 16); |
| r241934 | r241935 | |
| 1754 | 1654 | // The result is stored into highest 32 bits of accumulator, the low slice is zero |
| 1755 | 1655 | // The highest 32 bits of accumulator is saturated into destination element |
| 1756 | 1656 | |
| 1757 | | int sel; |
| 1758 | | INT32 s1, s2; |
| 1759 | | INT32 r; |
| 1760 | 1657 | for (i=0; i < 8; i++) |
| 1761 | 1658 | { |
| 1762 | | sel = VEC_EL_2(EL, i); |
| 1763 | | s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1764 | | s2 = (INT32)(INT16)VREG_S(VS2REG, sel); |
| 1765 | | r = s1 * s2; |
| 1659 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1660 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1661 | INT32 r = s1 * s2; |
| 1766 | 1662 | |
| 1767 | 1663 | ACCUM_H(i) = (INT16)(r >> 16); |
| 1768 | 1664 | ACCUM_M(i) = (UINT16)(r); |
| r241934 | r241935 | |
| 1786 | 1682 | // Multiplies signed integer by signed integer * 2 |
| 1787 | 1683 | // The result is added to accumulator |
| 1788 | 1684 | |
| 1789 | | int sel; |
| 1790 | | INT32 s1, s2; |
| 1791 | | INT32 r; |
| 1792 | | UINT16 res; |
| 1793 | 1685 | for (i=0; i < 8; i++) |
| 1794 | 1686 | { |
| 1795 | | sel = VEC_EL_2(EL, i); |
| 1796 | | s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1797 | | s2 = (INT32)(INT16)VREG_S(VS2REG, sel); |
| 1798 | | r = s1 * s2; |
| 1687 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1688 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1689 | INT32 r = s1 * s2; |
| 1799 | 1690 | |
| 1800 | | ACCUM(i) += (INT64)(r) << 17; |
| 1801 | | res = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1691 | UINT64 q = (UINT64)(UINT16)ACCUM_LL(i); |
| 1692 | q |= (((UINT64)(UINT16)ACCUM_L(i)) << 16); |
| 1693 | q |= (((UINT64)(UINT16)ACCUM_M(i)) << 32); |
| 1694 | q |= (((UINT64)(UINT16)ACCUM_H(i)) << 48); |
| 1802 | 1695 | |
| 1803 | | vres[i] = res; |
| 1696 | q += (INT64)(r) << 17; |
| 1697 | |
| 1698 | ACCUM_LL(i) = (UINT16)q; |
| 1699 | ACCUM_L(i) = (UINT16)(q >> 16); |
| 1700 | ACCUM_M(i) = (UINT16)(q >> 32); |
| 1701 | ACCUM_H(i) = (UINT16)(q >> 48); |
| 1702 | |
| 1703 | vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1804 | 1704 | } |
| 1805 | 1705 | WRITEBACK_RESULT(); |
| 1806 | 1706 | break; |
| r241934 | r241935 | |
| 1814 | 1714 | // ------------------------------------------------------ |
| 1815 | 1715 | // |
| 1816 | 1716 | |
| 1817 | | UINT16 res; |
| 1818 | | int sel; |
| 1819 | | INT32 s1, s2, r1; |
| 1820 | | UINT32 r2, r3; |
| 1821 | 1717 | for (i = 0; i < 8; i++) |
| 1822 | 1718 | { |
| 1823 | | sel = VEC_EL_2(EL, i); |
| 1824 | | s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1825 | | s2 = (INT32)(INT16)VREG_S(VS2REG, sel); |
| 1826 | | r1 = s1 * s2; |
| 1827 | | r2 = (UINT16)ACCUM_L(i) + ((UINT16)(r1) * 2); |
| 1828 | | r3 = (UINT16)ACCUM_M(i) + (UINT16)((r1 >> 16) * 2) + (UINT16)(r2 >> 16); |
| 1719 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1720 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1721 | INT32 r1 = s1 * s2; |
| 1722 | UINT32 r2 = (UINT16)ACCUM_L(i) + ((UINT16)(r1) * 2); |
| 1723 | UINT32 r3 = (UINT16)ACCUM_M(i) + (UINT16)((r1 >> 16) * 2) + (UINT16)(r2 >> 16); |
| 1829 | 1724 | |
| 1830 | 1725 | ACCUM_L(i) = (UINT16)(r2); |
| 1831 | 1726 | ACCUM_M(i) = (UINT16)(r3); |
| 1832 | 1727 | ACCUM_H(i) += (UINT16)(r3 >> 16) + (UINT16)(r1 >> 31); |
| 1833 | 1728 | |
| 1834 | | //res = SATURATE_ACCUM(i, 1, 0x0000, 0xffff); |
| 1835 | 1729 | if ((INT16)ACCUM_H(i) < 0) |
| 1836 | 1730 | { |
| 1837 | | res = 0; |
| 1731 | vres[i] = 0; |
| 1838 | 1732 | } |
| 1839 | 1733 | else |
| 1840 | 1734 | { |
| 1841 | 1735 | if (ACCUM_H(i) != 0) |
| 1842 | 1736 | { |
| 1843 | | res = 0xffff; |
| 1737 | vres[i] = 0xffff; |
| 1844 | 1738 | } |
| 1845 | 1739 | else |
| 1846 | 1740 | { |
| 1847 | 1741 | if ((INT16)ACCUM_M(i) < 0) |
| 1848 | 1742 | { |
| 1849 | | res = 0xffff; |
| 1743 | vres[i] = 0xffff; |
| 1850 | 1744 | } |
| 1851 | 1745 | else |
| 1852 | 1746 | { |
| 1853 | | res = ACCUM_M(i); |
| 1747 | vres[i] = ACCUM_M(i); |
| 1854 | 1748 | } |
| 1855 | 1749 | } |
| 1856 | 1750 | } |
| 1857 | | |
| 1858 | | vres[i] = res; |
| 1859 | 1751 | } |
| 1860 | 1752 | WRITEBACK_RESULT(); |
| 1861 | 1753 | break; |
| r241934 | r241935 | |
| 1872 | 1764 | // Adds the higher 16 bits of the 32-bit result to accumulator |
| 1873 | 1765 | // The low slice of accumulator is stored into destination element |
| 1874 | 1766 | |
| 1875 | | UINT16 res; |
| 1876 | | int sel; |
| 1877 | | UINT32 s1, s2, r1; |
| 1878 | | UINT32 r2, r3; |
| 1879 | 1767 | for (i = 0; i < 8; i++) |
| 1880 | 1768 | { |
| 1881 | | sel = VEC_EL_2(EL, i); |
| 1882 | | s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1883 | | s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel); |
| 1884 | | r1 = s1 * s2; |
| 1885 | | r2 = (UINT16)ACCUM_L(i) + (r1 >> 16); |
| 1886 | | r3 = (UINT16)ACCUM_M(i) + (r2 >> 16); |
| 1769 | UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1770 | UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1771 | UINT32 r1 = s1 * s2; |
| 1772 | UINT32 r2 = (UINT16)ACCUM_L(i) + (r1 >> 16); |
| 1773 | UINT32 r3 = (UINT16)ACCUM_M(i) + (r2 >> 16); |
| 1887 | 1774 | |
| 1888 | 1775 | ACCUM_L(i) = (UINT16)(r2); |
| 1889 | 1776 | ACCUM_M(i) = (UINT16)(r3); |
| 1890 | 1777 | ACCUM_H(i) += (INT16)(r3 >> 16); |
| 1891 | 1778 | |
| 1892 | | res = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 1893 | | |
| 1894 | | vres[i] = res; |
| 1779 | vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 1895 | 1780 | } |
| 1896 | 1781 | WRITEBACK_RESULT(); |
| 1897 | 1782 | break; |
| r241934 | r241935 | |
| 1908 | 1793 | // The result is added into accumulator |
| 1909 | 1794 | // The middle slice of accumulator is stored into destination element |
| 1910 | 1795 | |
| 1911 | | UINT16 res; |
| 1912 | | int sel; |
| 1913 | | UINT32 s1, s2, r1; |
| 1914 | | UINT32 r2, r3; |
| 1915 | 1796 | for (i=0; i < 8; i++) |
| 1916 | 1797 | { |
| 1917 | | sel = VEC_EL_2(EL, i); |
| 1918 | | s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1919 | | s2 = (UINT16)VREG_S(VS2REG, sel); // not sign-extended |
| 1920 | | r1 = s1 * s2; |
| 1921 | | r2 = (UINT16)ACCUM_L(i) + (UINT16)(r1); |
| 1922 | | r3 = (UINT16)ACCUM_M(i) + (r1 >> 16) + (r2 >> 16); |
| 1798 | UINT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1799 | UINT32 s2 = (UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); // not sign-extended |
| 1800 | UINT32 r1 = s1 * s2; |
| 1801 | UINT32 r2 = (UINT16)ACCUM_L(i) + (UINT16)(r1); |
| 1802 | UINT32 r3 = (UINT16)ACCUM_M(i) + (r1 >> 16) + (r2 >> 16); |
| 1923 | 1803 | |
| 1924 | 1804 | ACCUM_L(i) = (UINT16)(r2); |
| 1925 | 1805 | ACCUM_M(i) = (UINT16)(r3); |
| r241934 | r241935 | |
| 1927 | 1807 | if ((INT32)(r1) < 0) |
| 1928 | 1808 | ACCUM_H(i) -= 1; |
| 1929 | 1809 | |
| 1930 | | res = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1931 | | |
| 1932 | | vres[i] = res; |
| 1810 | vres[i] = SATURATE_ACCUM(i, 1, 0x8000, 0x7fff); |
| 1933 | 1811 | } |
| 1934 | 1812 | WRITEBACK_RESULT(); |
| 1935 | 1813 | break; |
| r241934 | r241935 | |
| 1946 | 1824 | // The result is added into accumulator |
| 1947 | 1825 | // The low slice of accumulator is stored into destination element |
| 1948 | 1826 | |
| 1949 | | INT32 s1, s2; |
| 1950 | | UINT16 res; |
| 1951 | | int sel; |
| 1952 | 1827 | for (i=0; i < 8; i++) |
| 1953 | 1828 | { |
| 1954 | | sel = VEC_EL_2(EL, i); |
| 1955 | | s1 = (UINT16)VREG_S(VS1REG, i); // not sign-extended |
| 1956 | | s2 = (INT32)(INT16)VREG_S(VS2REG, sel); |
| 1829 | INT32 s1 = (UINT16)VREG_S(VS1REG, i); // not sign-extended |
| 1830 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1957 | 1831 | |
| 1958 | | ACCUM(i) += (INT64)(s1*s2)<<16; |
| 1832 | UINT64 q = (UINT64)ACCUM_LL(i); |
| 1833 | q |= (((UINT64)ACCUM_L(i)) << 16); |
| 1834 | q |= (((UINT64)ACCUM_M(i)) << 32); |
| 1835 | q |= (((UINT64)ACCUM_H(i)) << 48); |
| 1836 | q += (INT64)(s1*s2) << 16; |
| 1959 | 1837 | |
| 1960 | | res = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 1961 | | vres[i] = res; |
| 1838 | ACCUM_LL(i) = (UINT16)q; |
| 1839 | ACCUM_L(i) = (UINT16)(q >> 16); |
| 1840 | ACCUM_M(i) = (UINT16)(q >> 32); |
| 1841 | ACCUM_H(i) = (UINT16)(q >> 48); |
| 1842 | |
| 1843 | vres[i] = SATURATE_ACCUM(i, 0, 0x0000, 0xffff); |
| 1962 | 1844 | } |
| 1963 | 1845 | WRITEBACK_RESULT(); |
| 1964 | 1846 | |
| r241934 | r241935 | |
| 1976 | 1858 | // The result is added into highest 32 bits of accumulator, the low slice is zero |
| 1977 | 1859 | // The highest 32 bits of accumulator is saturated into destination element |
| 1978 | 1860 | |
| 1979 | | UINT16 res; |
| 1980 | | int sel; |
| 1981 | | INT32 s1, s2; |
| 1982 | 1861 | for (i = 0; i < 8; i++) |
| 1983 | 1862 | { |
| 1984 | | sel = VEC_EL_2(EL, i); |
| 1985 | | s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1986 | | s2 = (INT32)(INT16)VREG_S(VS2REG, sel); |
| 1863 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1864 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1987 | 1865 | |
| 1988 | | m_accum[i].l[1] += s1*s2; |
| 1866 | INT32 accum = (UINT32)(UINT16)ACCUM_M(i); |
| 1867 | accum |= ((UINT32)((UINT16)ACCUM_H(i))) << 16; |
| 1868 | accum += s1 * s2; |
| 1989 | 1869 | |
| 1990 | | res = SATURATE_ACCUM1(i, 0x8000, 0x7fff); |
| 1870 | ACCUM_H(i) = (UINT16)(accum >> 16); |
| 1871 | ACCUM_M(i) = (UINT16)accum; |
| 1991 | 1872 | |
| 1992 | | vres[i] = res; |
| 1873 | vres[i] = SATURATE_ACCUM1(i, 0x8000, 0x7fff); |
| 1993 | 1874 | } |
| 1994 | 1875 | WRITEBACK_RESULT(); |
| 1995 | 1876 | |
| r241934 | r241935 | |
| 2007 | 1888 | |
| 2008 | 1889 | // TODO: check VS2REG == VDREG |
| 2009 | 1890 | |
| 2010 | | int sel; |
| 2011 | | INT32 s1, s2, r; |
| 2012 | 1891 | for (i=0; i < 8; i++) |
| 2013 | 1892 | { |
| 2014 | | sel = VEC_EL_2(EL, i); |
| 2015 | | s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 2016 | | s2 = (INT32)(INT16)VREG_S(VS2REG, sel); |
| 2017 | | r = s1 + s2 + CARRY_FLAG(i); |
| 1893 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1894 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1895 | INT32 r = s1 + s2 + (CARRY_FLAG(i) != 0 ? 1 : 0); |
| 2018 | 1896 | |
| 2019 | 1897 | ACCUM_L(i) = (INT16)(r); |
| 2020 | 1898 | |
| r241934 | r241935 | |
| 2039 | 1917 | |
| 2040 | 1918 | // TODO: check VS2REG == VDREG |
| 2041 | 1919 | |
| 2042 | | int sel; |
| 2043 | | INT32 s1, s2, r; |
| 2044 | 1920 | for (i = 0; i < 8; i++) |
| 2045 | 1921 | { |
| 2046 | | sel = VEC_EL_2(EL, i); |
| 2047 | | s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 2048 | | s2 = (INT32)(INT16)VREG_S(VS2REG, sel); |
| 2049 | | r = s1 - s2 - CARRY_FLAG(i); |
| 1922 | INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i); |
| 1923 | INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1924 | INT32 r = s1 - s2 - (CARRY_FLAG(i) != 0 ? 1 : 0); |
| 2050 | 1925 | |
| 2051 | 1926 | ACCUM_L(i) = (INT16)(r); |
| 2052 | 1927 | |
| r241934 | r241935 | |
| 2071 | 1946 | // Changes the sign of source register 2 if source register 1 is negative and stores |
| 2072 | 1947 | // the result to destination register |
| 2073 | 1948 | |
| 2074 | | int sel; |
| 2075 | | INT16 s1, s2; |
| 2076 | 1949 | for (i=0; i < 8; i++) |
| 2077 | 1950 | { |
| 2078 | | sel = VEC_EL_2(EL, i); |
| 2079 | | s1 = (INT16)VREG_S(VS1REG, i); |
| 2080 | | s2 = (INT16)VREG_S(VS2REG, sel); |
| 1951 | INT16 s1 = (INT16)VREG_S(VS1REG, i); |
| 1952 | INT16 s2 = (INT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2081 | 1953 | |
| 2082 | 1954 | if (s1 < 0) |
| 2083 | 1955 | { |
| r241934 | r241935 | |
| 2116 | 1988 | |
| 2117 | 1989 | // TODO: check VS2REG = VDREG |
| 2118 | 1990 | |
| 2119 | | int sel; |
| 2120 | | INT32 s1, s2, r; |
| 2121 | 1991 | CLEAR_ZERO_FLAGS(); |
| 2122 | 1992 | CLEAR_CARRY_FLAGS(); |
| 2123 | 1993 | |
| 2124 | 1994 | for (i=0; i < 8; i++) |
| 2125 | 1995 | { |
| 2126 | | sel = VEC_EL_2(EL, i); |
| 2127 | | s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 2128 | | s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel); |
| 2129 | | r = s1 + s2; |
| 1996 | INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 1997 | INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 1998 | INT32 r = s1 + s2; |
| 2130 | 1999 | |
| 2131 | 2000 | vres[i] = (INT16)(r); |
| 2132 | 2001 | ACCUM_L(i) = (INT16)(r); |
| r241934 | r241935 | |
| 2151 | 2020 | |
| 2152 | 2021 | // TODO: check VS2REG = VDREG |
| 2153 | 2022 | |
| 2154 | | int sel; |
| 2155 | | INT32 s1, s2, r; |
| 2156 | 2023 | CLEAR_ZERO_FLAGS(); |
| 2157 | 2024 | CLEAR_CARRY_FLAGS(); |
| 2158 | 2025 | |
| 2159 | 2026 | for (i=0; i < 8; i++) |
| 2160 | 2027 | { |
| 2161 | | sel = VEC_EL_2(EL, i); |
| 2162 | | s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 2163 | | s2 = (UINT32)(UINT16)VREG_S(VS2REG, sel); |
| 2164 | | r = s1 - s2; |
| 2028 | INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i); |
| 2029 | INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2030 | INT32 r = s1 - s2; |
| 2165 | 2031 | |
| 2166 | 2032 | vres[i] = (INT16)(r); |
| 2167 | 2033 | ACCUM_L(i) = (UINT16)(r); |
| r241934 | r241935 | |
| 2231 | 2097 | // Sets compare flags if elements in VS1 are less than VS2 |
| 2232 | 2098 | // Moves the element in VS2 to destination vector |
| 2233 | 2099 | |
| 2234 | | int sel; |
| 2235 | 2100 | CLEAR_COMPARE_FLAGS(); |
| 2236 | 2101 | CLEAR_CLIP2_FLAGS(); |
| 2237 | 2102 | |
| 2238 | 2103 | for (i=0; i < 8; i++) |
| 2239 | 2104 | { |
| 2240 | | sel = VEC_EL_2(EL, i); |
| 2241 | | |
| 2242 | | if (VREG_S(VS1REG, i) < VREG_S(VS2REG, sel)) |
| 2105 | INT16 s1, s2; |
| 2106 | s1 = VREG_S(VS1REG, i); |
| 2107 | s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2108 | if (s1 < s2) |
| 2243 | 2109 | { |
| 2244 | 2110 | SET_COMPARE_FLAG(i); |
| 2245 | 2111 | } |
| 2246 | | else if (VREG_S(VS1REG, i) == VREG_S(VS2REG, sel)) |
| 2112 | else if (s1 == s2) |
| 2247 | 2113 | { |
| 2248 | | if (ZERO_FLAG(i) == 1 && CARRY_FLAG(i) != 0) |
| 2114 | if (ZERO_FLAG(i) != 0 && CARRY_FLAG(i) != 0) |
| 2249 | 2115 | { |
| 2250 | 2116 | SET_COMPARE_FLAG(i); |
| 2251 | 2117 | } |
| 2252 | 2118 | } |
| 2253 | 2119 | |
| 2254 | | if (COMPARE_FLAG(i)) |
| 2120 | if (COMPARE_FLAG(i) != 0) |
| 2255 | 2121 | { |
| 2256 | | vres[i] = VREG_S(VS1REG, i); |
| 2122 | vres[i] = s1; |
| 2257 | 2123 | } |
| 2258 | 2124 | else |
| 2259 | 2125 | { |
| 2260 | | vres[i] = VREG_S(VS2REG, sel); |
| 2126 | vres[i] = s2; |
| 2261 | 2127 | } |
| 2262 | 2128 | |
| 2263 | 2129 | ACCUM_L(i) = vres[i]; |
| r241934 | r241935 | |
| 2279 | 2145 | // Sets compare flags if elements in VS1 are equal with VS2 |
| 2280 | 2146 | // Moves the element in VS2 to destination vector |
| 2281 | 2147 | |
| 2282 | | int sel; |
| 2283 | 2148 | CLEAR_COMPARE_FLAGS(); |
| 2284 | 2149 | CLEAR_CLIP2_FLAGS(); |
| 2285 | 2150 | |
| 2286 | 2151 | for (i = 0; i < 8; i++) |
| 2287 | 2152 | { |
| 2288 | | sel = VEC_EL_2(EL, i); |
| 2153 | INT16 s1 = VREG_S(VS1REG, i); |
| 2154 | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2289 | 2155 | |
| 2290 | | if ((VREG_S(VS1REG, i) == VREG_S(VS2REG, sel)) && ZERO_FLAG(i) == 0) |
| 2156 | if ((s1 == s2) && ZERO_FLAG(i) == 0) |
| 2291 | 2157 | { |
| 2292 | 2158 | SET_COMPARE_FLAG(i); |
| 2293 | | vres[i] = VREG_S(VS1REG, i); |
| 2159 | vres[i] = s1; |
| 2294 | 2160 | } |
| 2295 | 2161 | else |
| 2296 | 2162 | { |
| 2297 | | vres[i] = VREG_S(VS2REG, sel); |
| 2163 | vres[i] = s2; |
| 2298 | 2164 | } |
| 2299 | 2165 | ACCUM_L(i) = vres[i]; |
| 2300 | 2166 | } |
| r241934 | r241935 | |
| 2315 | 2181 | // Sets compare flags if elements in VS1 are not equal with VS2 |
| 2316 | 2182 | // Moves the element in VS2 to destination vector |
| 2317 | 2183 | |
| 2318 | | int sel; |
| 2319 | 2184 | CLEAR_COMPARE_FLAGS(); |
| 2320 | 2185 | CLEAR_CLIP2_FLAGS(); |
| 2321 | 2186 | |
| 2322 | | for (i=0; i < 8; i++)//?????????? ???? |
| 2187 | for (i = 0; i < 8; i++) |
| 2323 | 2188 | { |
| 2324 | | sel = VEC_EL_2(EL, i); |
| 2189 | INT16 s1 = VREG_S(VS1REG, i); |
| 2190 | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2325 | 2191 | |
| 2326 | | if (VREG_S(VS1REG, i) != VREG_S(VS2REG, sel)) |
| 2192 | if (s1 != s2 || ZERO_FLAG(i) != 0) |
| 2327 | 2193 | { |
| 2328 | 2194 | SET_COMPARE_FLAG(i); |
| 2195 | vres[i] = s1; |
| 2329 | 2196 | } |
| 2330 | 2197 | else |
| 2331 | 2198 | { |
| 2332 | | if (ZERO_FLAG(i) == 1) |
| 2333 | | { |
| 2334 | | SET_COMPARE_FLAG(i); |
| 2335 | | } |
| 2199 | vres[i] = s2; |
| 2336 | 2200 | } |
| 2337 | | if (COMPARE_FLAG(i)) |
| 2338 | | { |
| 2339 | | vres[i] = VREG_S(VS1REG, i); |
| 2340 | | } |
| 2341 | | else |
| 2342 | | { |
| 2343 | | vres[i] = VREG_S(VS2REG, sel); |
| 2344 | | } |
| 2345 | 2201 | ACCUM_L(i) = vres[i]; |
| 2346 | 2202 | } |
| 2347 | 2203 | |
| r241934 | r241935 | |
| 2361 | 2217 | // Sets compare flags if elements in VS1 are greater or equal with VS2 |
| 2362 | 2218 | // Moves the element in VS2 to destination vector |
| 2363 | 2219 | |
| 2364 | | int sel; |
| 2365 | 2220 | CLEAR_COMPARE_FLAGS(); |
| 2366 | 2221 | CLEAR_CLIP2_FLAGS(); |
| 2367 | 2222 | |
| 2368 | 2223 | for (i=0; i < 8; i++) |
| 2369 | 2224 | { |
| 2370 | | sel = VEC_EL_2(EL, i); |
| 2225 | INT16 s1 = VREG_S(VS1REG, i); |
| 2226 | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2371 | 2227 | |
| 2372 | | if (VREG_S(VS1REG, i) == VREG_S(VS2REG, sel)) |
| 2228 | if ((s1 == s2 && (ZERO_FLAG(i) == 0 || CARRY_FLAG(i) == 0)) || s1 > s2) |
| 2373 | 2229 | { |
| 2374 | | if (ZERO_FLAG(i) == 0 || CARRY_FLAG(i) == 0) |
| 2375 | | { |
| 2376 | | SET_COMPARE_FLAG(i); |
| 2377 | | } |
| 2378 | | } |
| 2379 | | else if (VREG_S(VS1REG, i) > VREG_S(VS2REG, sel)) |
| 2380 | | { |
| 2381 | 2230 | SET_COMPARE_FLAG(i); |
| 2231 | vres[i] = s1; |
| 2382 | 2232 | } |
| 2383 | | |
| 2384 | | if (COMPARE_FLAG(i) != 0) |
| 2385 | | { |
| 2386 | | vres[i] = VREG_S(VS1REG, i); |
| 2387 | | } |
| 2388 | 2233 | else |
| 2389 | 2234 | { |
| 2390 | | vres[i] = VREG_S(VS2REG, sel); |
| 2235 | vres[i] = s2; |
| 2391 | 2236 | } |
| 2392 | 2237 | |
| 2393 | 2238 | ACCUM_L(i) = vres[i]; |
| r241934 | r241935 | |
| 2408 | 2253 | // |
| 2409 | 2254 | // Vector clip low |
| 2410 | 2255 | |
| 2411 | | int sel; |
| 2412 | | INT16 s1, s2; |
| 2413 | 2256 | for (i = 0; i < 8; i++) |
| 2414 | 2257 | { |
| 2415 | | sel = VEC_EL_2(EL, i); |
| 2416 | | s1 = VREG_S(VS1REG, i); |
| 2417 | | s2 = VREG_S(VS2REG, sel); |
| 2258 | INT16 s1 = VREG_S(VS1REG, i); |
| 2259 | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2418 | 2260 | |
| 2419 | 2261 | if (CARRY_FLAG(i) != 0) |
| 2420 | 2262 | { |
| r241934 | r241935 | |
| 2429 | 2271 | ACCUM_L(i) = s1; |
| 2430 | 2272 | } |
| 2431 | 2273 | } |
| 2432 | | else//ZERO_FLAG(i)==0 |
| 2274 | else |
| 2433 | 2275 | { |
| 2434 | 2276 | if (CLIP1_FLAG(i) != 0) |
| 2435 | 2277 | { |
| 2436 | 2278 | if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) > 0x10000) |
| 2437 | | {//proper fix for Harvest Moon 64, r4 |
| 2279 | { |
| 2438 | 2280 | |
| 2439 | 2281 | ACCUM_L(i) = s1; |
| 2440 | 2282 | CLEAR_COMPARE_FLAG(i); |
| r241934 | r241935 | |
| 2459 | 2301 | } |
| 2460 | 2302 | } |
| 2461 | 2303 | } |
| 2462 | | }// |
| 2463 | | else//CARRY_FLAG(i)==0 |
| 2304 | } |
| 2305 | else |
| 2464 | 2306 | { |
| 2465 | 2307 | if (ZERO_FLAG(i) != 0) |
| 2466 | 2308 | { |
| r241934 | r241935 | |
| 2506 | 2348 | // |
| 2507 | 2349 | // Vector clip high |
| 2508 | 2350 | |
| 2509 | | int sel; |
| 2510 | | INT16 s1, s2; |
| 2511 | 2351 | CLEAR_CARRY_FLAGS(); |
| 2512 | 2352 | CLEAR_COMPARE_FLAGS(); |
| 2513 | 2353 | CLEAR_CLIP1_FLAGS(); |
| r241934 | r241935 | |
| 2517 | 2357 | |
| 2518 | 2358 | for (i=0; i < 8; i++) |
| 2519 | 2359 | { |
| 2520 | | sel = VEC_EL_2(EL, i); |
| 2521 | | s1 = VREG_S(VS1REG, i); |
| 2522 | | s2 = VREG_S(VS2REG, sel); |
| 2360 | INT16 s1 = VREG_S(VS1REG, i); |
| 2361 | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2523 | 2362 | |
| 2524 | 2363 | if ((s1 ^ s2) < 0) |
| 2525 | 2364 | { |
| r241934 | r241935 | |
| 2547 | 2386 | SET_ZERO_FLAG(i); |
| 2548 | 2387 | } |
| 2549 | 2388 | } |
| 2550 | | }//sign |
| 2389 | } |
| 2551 | 2390 | else |
| 2552 | 2391 | { |
| 2553 | 2392 | vce = 0; |
| r241934 | r241935 | |
| 2592 | 2431 | // |
| 2593 | 2432 | // Vector clip reverse |
| 2594 | 2433 | |
| 2595 | | int sel; |
| 2596 | | INT16 s1, s2; |
| 2597 | 2434 | CLEAR_CARRY_FLAGS(); |
| 2598 | 2435 | CLEAR_COMPARE_FLAGS(); |
| 2599 | 2436 | CLEAR_CLIP1_FLAGS(); |
| r241934 | r241935 | |
| 2602 | 2439 | |
| 2603 | 2440 | for (i=0; i < 8; i++) |
| 2604 | 2441 | { |
| 2605 | | sel = VEC_EL_2(EL, i); |
| 2606 | | s1 = VREG_S(VS1REG, i); |
| 2607 | | s2 = VREG_S(VS2REG, sel); |
| 2442 | INT16 s1 = VREG_S(VS1REG, i); |
| 2443 | INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2608 | 2444 | |
| 2609 | 2445 | if ((INT16)(s1 ^ s2) < 0) |
| 2610 | 2446 | { |
| r241934 | r241935 | |
| 2654 | 2490 | // |
| 2655 | 2491 | // Merges two vectors according to compare flags |
| 2656 | 2492 | |
| 2657 | | int sel; |
| 2658 | 2493 | for (i = 0; i < 8; i++) |
| 2659 | 2494 | { |
| 2660 | | sel = VEC_EL_2(EL, i); |
| 2661 | 2495 | if (COMPARE_FLAG(i) != 0) |
| 2662 | 2496 | { |
| 2663 | 2497 | vres[i] = VREG_S(VS1REG, i); |
| 2664 | 2498 | } |
| 2665 | 2499 | else |
| 2666 | 2500 | { |
| 2667 | | vres[i] = VREG_S(VS2REG, sel);//??? ??????????? |
| 2501 | vres[i] = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2668 | 2502 | } |
| 2669 | 2503 | |
| 2670 | 2504 | ACCUM_L(i) = vres[i]; |
| r241934 | r241935 | |
| 2681 | 2515 | // |
| 2682 | 2516 | // Bitwise AND of two vector registers |
| 2683 | 2517 | |
| 2684 | | int sel; |
| 2685 | 2518 | for (i = 0; i < 8; i++) |
| 2686 | 2519 | { |
| 2687 | | sel = VEC_EL_2(EL, i); |
| 2688 | | vres[i] = VREG_S(VS1REG, i) & VREG_S(VS2REG, sel); |
| 2520 | vres[i] = VREG_S(VS1REG, i) & VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2689 | 2521 | ACCUM_L(i) = vres[i]; |
| 2690 | 2522 | } |
| 2691 | 2523 | WRITEBACK_RESULT(); |
| r241934 | r241935 | |
| 2700 | 2532 | // |
| 2701 | 2533 | // Bitwise NOT AND of two vector registers |
| 2702 | 2534 | |
| 2703 | | int sel; |
| 2704 | 2535 | for (i = 0; i < 8; i++) |
| 2705 | 2536 | { |
| 2706 | | sel = VEC_EL_2(EL, i); |
| 2707 | | vres[i] = ~((VREG_S(VS1REG, i) & VREG_S(VS2REG, sel))); |
| 2537 | vres[i] = ~((VREG_S(VS1REG, i) & VREG_S(VS2REG, VEC_EL_2(EL, i)))); |
| 2708 | 2538 | ACCUM_L(i) = vres[i]; |
| 2709 | 2539 | } |
| 2710 | 2540 | WRITEBACK_RESULT(); |
| r241934 | r241935 | |
| 2719 | 2549 | // |
| 2720 | 2550 | // Bitwise OR of two vector registers |
| 2721 | 2551 | |
| 2722 | | int sel; |
| 2723 | 2552 | for (i = 0; i < 8; i++) |
| 2724 | 2553 | { |
| 2725 | | sel = VEC_EL_2(EL, i); |
| 2726 | | vres[i] = VREG_S(VS1REG, i) | VREG_S(VS2REG, sel); |
| 2554 | vres[i] = VREG_S(VS1REG, i) | VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2727 | 2555 | ACCUM_L(i) = vres[i]; |
| 2728 | 2556 | } |
| 2729 | 2557 | WRITEBACK_RESULT(); |
| r241934 | r241935 | |
| 2738 | 2566 | // |
| 2739 | 2567 | // Bitwise NOT OR of two vector registers |
| 2740 | 2568 | |
| 2741 | | int sel; |
| 2742 | 2569 | for (i=0; i < 8; i++) |
| 2743 | 2570 | { |
| 2744 | | sel = VEC_EL_2(EL, i); |
| 2745 | | vres[i] = ~((VREG_S(VS1REG, i) | VREG_S(VS2REG, sel))); |
| 2571 | vres[i] = ~((VREG_S(VS1REG, i) | VREG_S(VS2REG, VEC_EL_2(EL, i)))); |
| 2746 | 2572 | ACCUM_L(i) = vres[i]; |
| 2747 | 2573 | } |
| 2748 | 2574 | WRITEBACK_RESULT(); |
| r241934 | r241935 | |
| 2757 | 2583 | // |
| 2758 | 2584 | // Bitwise XOR of two vector registers |
| 2759 | 2585 | |
| 2760 | | int sel; |
| 2761 | 2586 | for (i=0; i < 8; i++) |
| 2762 | 2587 | { |
| 2763 | | sel = VEC_EL_2(EL, i); |
| 2764 | | vres[i] = VREG_S(VS1REG, i) ^ VREG_S(VS2REG, sel); |
| 2588 | vres[i] = VREG_S(VS1REG, i) ^ VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2765 | 2589 | ACCUM_L(i) = vres[i]; |
| 2766 | 2590 | } |
| 2767 | 2591 | WRITEBACK_RESULT(); |
| r241934 | r241935 | |
| 2776 | 2600 | // |
| 2777 | 2601 | // Bitwise NOT XOR of two vector registers |
| 2778 | 2602 | |
| 2779 | | int sel; |
| 2780 | 2603 | for (i=0; i < 8; i++) |
| 2781 | 2604 | { |
| 2782 | | sel = VEC_EL_2(EL, i); |
| 2783 | | vres[i] = ~((VREG_S(VS1REG, i) ^ VREG_S(VS2REG, sel))); |
| 2605 | vres[i] = ~((VREG_S(VS1REG, i) ^ VREG_S(VS2REG, VEC_EL_2(EL, i)))); |
| 2784 | 2606 | ACCUM_L(i) = vres[i]; |
| 2785 | 2607 | } |
| 2786 | 2608 | WRITEBACK_RESULT(); |
| r241934 | r241935 | |
| 2795 | 2617 | // ------------------------------------------------------ |
| 2796 | 2618 | // |
| 2797 | 2619 | // Calculates reciprocal |
| 2798 | | int del = VS1REG & 7; |
| 2799 | | int sel = EL & 7; |
| 2800 | 2620 | INT32 shifter = 0; |
| 2801 | 2621 | |
| 2802 | | INT32 rec = (INT16)(VREG_S(VS2REG, sel)); |
| 2622 | INT32 rec = (INT16)(VREG_S(VS2REG, EL & 7)); |
| 2803 | 2623 | INT32 datainput = (rec < 0) ? (-rec) : rec; |
| 2804 | 2624 | if (datainput) |
| 2805 | 2625 | { |
| 2806 | 2626 | for (i = 0; i < 32; i++) |
| 2807 | 2627 | { |
| 2808 | | if (datainput & (1 << ((~i) & 0x1f)))//?.?.??? 31 - i |
| 2628 | if (datainput & (1 << ((~i) & 0x1f))) |
| 2809 | 2629 | { |
| 2810 | 2630 | shifter = i; |
| 2811 | 2631 | break; |
| r241934 | r241935 | |
| 2837 | 2657 | m_reciprocal_res = rec; |
| 2838 | 2658 | m_dp_allowed = 0; |
| 2839 | 2659 | |
| 2840 | | VREG_S(VDREG, del) = (UINT16)(rec & 0xffff); |
| 2660 | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 2841 | 2661 | |
| 2842 | 2662 | for (i = 0; i < 8; i++) |
| 2843 | 2663 | { |
| 2844 | | sel = VEC_EL_2(EL, i); |
| 2845 | | ACCUM_L(i) = VREG_S(VS2REG, sel); |
| 2664 | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2846 | 2665 | } |
| 2847 | 2666 | |
| 2848 | 2667 | |
| r241934 | r241935 | |
| 2858 | 2677 | // |
| 2859 | 2678 | // Calculates reciprocal low part |
| 2860 | 2679 | |
| 2861 | | int del = VS1REG & 7; |
| 2862 | | int sel = EL & 7; |
| 2863 | 2680 | INT32 shifter = 0; |
| 2864 | 2681 | |
| 2865 | | INT32 rec = ((UINT16)(VREG_S(VS2REG, sel)) | ((UINT32)(m_reciprocal_high) & 0xffff0000)); |
| 2866 | | |
| 2682 | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 2867 | 2683 | INT32 datainput = rec; |
| 2868 | 2684 | |
| 2869 | | if (rec < 0) |
| 2685 | if (m_dp_allowed) |
| 2870 | 2686 | { |
| 2871 | | if (m_dp_allowed) |
| 2687 | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 2688 | datainput = rec; |
| 2689 | |
| 2690 | if (rec < 0) |
| 2872 | 2691 | { |
| 2873 | 2692 | if (rec < -32768) |
| 2874 | 2693 | { |
| r241934 | r241935 | |
| 2879 | 2698 | datainput = -datainput; |
| 2880 | 2699 | } |
| 2881 | 2700 | } |
| 2882 | | else |
| 2883 | | { |
| 2884 | | datainput = -datainput; |
| 2885 | | } |
| 2886 | 2701 | } |
| 2702 | else if (datainput < 0) |
| 2703 | { |
| 2704 | datainput = -datainput; |
| 2887 | 2705 | |
| 2706 | shifter = 0x10; |
| 2707 | } |
| 2888 | 2708 | |
| 2889 | | if (datainput) |
| 2709 | |
| 2710 | for (i = 0; i < 32; i++) |
| 2890 | 2711 | { |
| 2891 | | for (i = 0; i < 32; i++) |
| 2712 | if (datainput & (1 << ((~i) & 0x1f))) |
| 2892 | 2713 | { |
| 2893 | | if (datainput & (1 << ((~i) & 0x1f)))//?.?.??? 31 - i |
| 2894 | | { |
| 2895 | | shifter = i; |
| 2896 | | break; |
| 2897 | | } |
| 2714 | shifter = i; |
| 2715 | break; |
| 2898 | 2716 | } |
| 2899 | 2717 | } |
| 2900 | | else |
| 2901 | | { |
| 2902 | | if (m_dp_allowed) |
| 2903 | | { |
| 2904 | | shifter = 0; |
| 2905 | | } |
| 2906 | | else |
| 2907 | | { |
| 2908 | | shifter = 0x10; |
| 2909 | | } |
| 2910 | | } |
| 2911 | 2718 | |
| 2912 | 2719 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 2913 | 2720 | INT32 fetchval = rsp_divtable[address]; |
| 2914 | 2721 | INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f); |
| 2915 | | if (rec < 0) |
| 2916 | | { |
| 2917 | | temp = ~temp; |
| 2918 | | } |
| 2722 | temp ^= rec >> 31; |
| 2723 | |
| 2919 | 2724 | if (!rec) |
| 2920 | 2725 | { |
| 2921 | 2726 | temp = 0x7fffffff; |
| r241934 | r241935 | |
| 2929 | 2734 | m_reciprocal_res = rec; |
| 2930 | 2735 | m_dp_allowed = 0; |
| 2931 | 2736 | |
| 2932 | | VREG_S(VDREG, del) = (UINT16)(rec & 0xffff); |
| 2737 | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 2933 | 2738 | |
| 2934 | 2739 | for (i = 0; i < 8; i++) |
| 2935 | 2740 | { |
| 2936 | | sel = VEC_EL_2(EL, i); |
| 2937 | | ACCUM_L(i) = VREG_S(VS2REG, sel); |
| 2741 | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2938 | 2742 | } |
| 2939 | 2743 | |
| 2940 | 2744 | break; |
| r241934 | r241935 | |
| 2949 | 2753 | // |
| 2950 | 2754 | // Calculates reciprocal high part |
| 2951 | 2755 | |
| 2952 | | int del = VS1REG & 7; |
| 2953 | | int sel = EL & 7; |
| 2954 | | |
| 2955 | | m_reciprocal_high = (VREG_S(VS2REG, sel)) << 16; |
| 2756 | m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16; |
| 2956 | 2757 | m_dp_allowed = 1; |
| 2957 | 2758 | |
| 2958 | 2759 | for (i = 0; i < 8; i++) |
| 2959 | 2760 | { |
| 2960 | | sel = VEC_EL_2(EL, i); |
| 2961 | | ACCUM_L(i) = VREG_S(VS2REG, sel); |
| 2761 | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2962 | 2762 | } |
| 2963 | 2763 | |
| 2964 | | VREG_S(VDREG, del) = (INT16)(m_reciprocal_res >> 16); |
| 2764 | VREG_S(VDREG, VS1REG & 7) = (INT16)(m_reciprocal_res >> 16); |
| 2965 | 2765 | |
| 2966 | 2766 | break; |
| 2967 | 2767 | } |
| r241934 | r241935 | |
| 2975 | 2775 | // |
| 2976 | 2776 | // Moves element from vector to destination vector |
| 2977 | 2777 | |
| 2978 | | int del = VS1REG & 7; |
| 2979 | | int sel = EL & 7; |
| 2980 | | |
| 2981 | | VREG_S(VDREG, del) = VREG_S(VS2REG, sel); |
| 2778 | VREG_S(VDREG, VS1REG & 7) = VREG_S(VS2REG, EL & 7); |
| 2982 | 2779 | for (i = 0; i < 8; i++) |
| 2983 | 2780 | { |
| 2984 | | sel = VEC_EL_2(EL, i); |
| 2985 | | ACCUM_L(i) = VREG_S(VS2REG, sel); |
| 2781 | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 2986 | 2782 | } |
| 2987 | 2783 | break; |
| 2988 | 2784 | } |
| r241934 | r241935 | |
| 2996 | 2792 | // |
| 2997 | 2793 | // Calculates reciprocal square-root |
| 2998 | 2794 | |
| 2999 | | int del = VS1REG & 7; |
| 3000 | | int sel = EL & 7; |
| 3001 | 2795 | INT32 shifter = 0; |
| 3002 | 2796 | |
| 3003 | | INT32 rec = (INT16)(VREG_S(VS2REG, sel)); |
| 2797 | INT32 rec = (INT16)(VREG_S(VS2REG, EL & 7)); |
| 3004 | 2798 | INT32 datainput = (rec < 0) ? (-rec) : rec; |
| 3005 | 2799 | if (datainput) |
| 3006 | 2800 | { |
| r241934 | r241935 | |
| 3040 | 2834 | m_reciprocal_res = rec; |
| 3041 | 2835 | m_dp_allowed = 0; |
| 3042 | 2836 | |
| 3043 | | VREG_S(VDREG, del) = (UINT16)(rec & 0xffff); |
| 2837 | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 3044 | 2838 | |
| 3045 | 2839 | for (i = 0; i < 8; i++) |
| 3046 | 2840 | { |
| 3047 | | sel = VEC_EL_2(EL, i); |
| 3048 | | ACCUM_L(i) = VREG_S(VS2REG, sel); |
| 2841 | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 3049 | 2842 | } |
| 3050 | 2843 | |
| 3051 | 2844 | break; |
| r241934 | r241935 | |
| 3060 | 2853 | // |
| 3061 | 2854 | // Calculates reciprocal square-root low part |
| 3062 | 2855 | |
| 3063 | | int del = VS1REG & 7; |
| 3064 | | int sel = EL & 7; |
| 3065 | 2856 | INT32 shifter = 0; |
| 3066 | | |
| 3067 | | INT32 rec = ((UINT16)(VREG_S(VS2REG, sel)) | ((UINT32)(m_reciprocal_high) & 0xffff0000)); |
| 3068 | | |
| 2857 | INT32 rec = (INT16)VREG_S(VS2REG, EL & 7); |
| 3069 | 2858 | INT32 datainput = rec; |
| 3070 | 2859 | |
| 3071 | | if (rec < 0) |
| 2860 | if (m_dp_allowed) |
| 3072 | 2861 | { |
| 3073 | | if (m_dp_allowed) |
| 2862 | rec = (rec & 0x0000ffff) | m_reciprocal_high; |
| 2863 | datainput = rec; |
| 2864 | |
| 2865 | if (rec < 0) |
| 3074 | 2866 | { |
| 3075 | | if (rec < -32768)//VDIV.C,208 |
| 2867 | if (rec < -32768) |
| 3076 | 2868 | { |
| 3077 | 2869 | datainput = ~datainput; |
| 3078 | 2870 | } |
| r241934 | r241935 | |
| 3081 | 2873 | datainput = -datainput; |
| 3082 | 2874 | } |
| 3083 | 2875 | } |
| 3084 | | else |
| 3085 | | { |
| 3086 | | datainput = -datainput; |
| 3087 | | } |
| 3088 | 2876 | } |
| 2877 | else if (datainput < 0) |
| 2878 | { |
| 2879 | datainput = -datainput; |
| 3089 | 2880 | |
| 2881 | shifter = 0x10; |
| 2882 | } |
| 2883 | |
| 3090 | 2884 | if (datainput) |
| 3091 | 2885 | { |
| 3092 | 2886 | for (i = 0; i < 32; i++) |
| r241934 | r241935 | |
| 3098 | 2892 | } |
| 3099 | 2893 | } |
| 3100 | 2894 | } |
| 3101 | | else |
| 3102 | | { |
| 3103 | | if (m_dp_allowed) |
| 3104 | | { |
| 3105 | | shifter = 0; |
| 3106 | | } |
| 3107 | | else |
| 3108 | | { |
| 3109 | | shifter = 0x10; |
| 3110 | | } |
| 3111 | | } |
| 3112 | 2895 | |
| 3113 | 2896 | INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22; |
| 3114 | 2897 | address = ((address | 0x200) & 0x3fe) | (shifter & 1); |
| 3115 | 2898 | |
| 3116 | 2899 | INT32 fetchval = rsp_divtable[address]; |
| 3117 | 2900 | INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1); |
| 3118 | | if (rec < 0) |
| 3119 | | { |
| 3120 | | temp = ~temp; |
| 3121 | | } |
| 2901 | temp ^= rec >> 31; |
| 2902 | |
| 3122 | 2903 | if (!rec) |
| 3123 | 2904 | { |
| 3124 | 2905 | temp = 0x7fffffff; |
| r241934 | r241935 | |
| 3132 | 2913 | m_reciprocal_res = rec; |
| 3133 | 2914 | m_dp_allowed = 0; |
| 3134 | 2915 | |
| 3135 | | VREG_S(VDREG, del) = (UINT16)(rec & 0xffff); |
| 2916 | VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff); |
| 3136 | 2917 | |
| 3137 | 2918 | for (i = 0; i < 8; i++) |
| 3138 | 2919 | { |
| 3139 | | sel = VEC_EL_2(EL, i); |
| 3140 | | ACCUM_L(i) = VREG_S(VS2REG, sel); |
| 2920 | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 3141 | 2921 | } |
| 3142 | 2922 | |
| 3143 | 2923 | break; |
| r241934 | r241935 | |
| 3152 | 2932 | // |
| 3153 | 2933 | // Calculates reciprocal square-root high part |
| 3154 | 2934 | |
| 3155 | | int del = VS1REG & 7; |
| 3156 | | int sel = EL & 7; |
| 3157 | | |
| 3158 | | m_reciprocal_high = (VREG_S(VS2REG, sel)) << 16; |
| 2935 | m_reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16; |
| 3159 | 2936 | m_dp_allowed = 1; |
| 3160 | 2937 | |
| 3161 | 2938 | for (i=0; i < 8; i++) |
| 3162 | 2939 | { |
| 3163 | | sel = VEC_EL_2(EL, i); |
| 3164 | | ACCUM_L(i) = VREG_S(VS2REG, sel); |
| 2940 | ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i)); |
| 3165 | 2941 | } |
| 3166 | 2942 | |
| 3167 | | VREG_S(VDREG, del) = (INT16)(m_reciprocal_res >> 16); // store high part |
| 2943 | VREG_S(VDREG, VS1REG & 7) = (INT16)(m_reciprocal_res >> 16); // store high part |
| 3168 | 2944 | break; |
| 3169 | 2945 | } |
| 3170 | 2946 | |
| r241934 | r241935 | |
| 3362 | 3138 | break; |
| 3363 | 3139 | case 2: |
| 3364 | 3140 | // Anciliary clipping flags |
| 3365 | | RTVAL = ((CARRY_FLAG(0) & 1) << 0) | |
| 3366 | | ((CARRY_FLAG(1) & 1) << 1) | |
| 3367 | | ((CARRY_FLAG(2) & 1) << 2) | |
| 3368 | | ((CARRY_FLAG(3) & 1) << 3) | |
| 3369 | | ((CARRY_FLAG(4) & 1) << 4) | |
| 3370 | | ((CARRY_FLAG(5) & 1) << 5) | |
| 3371 | | ((CARRY_FLAG(6) & 1) << 6) | |
| 3372 | | ((CARRY_FLAG(7) & 1) << 7) | |
| 3373 | | ((ZERO_FLAG(0) & 1) << 8) | |
| 3374 | | ((ZERO_FLAG(1) & 1) << 9) | |
| 3375 | | ((ZERO_FLAG(2) & 1) << 10) | |
| 3376 | | ((ZERO_FLAG(3) & 1) << 11) | |
| 3377 | | ((ZERO_FLAG(4) & 1) << 12) | |
| 3378 | | ((ZERO_FLAG(5) & 1) << 13) | |
| 3379 | | ((ZERO_FLAG(6) & 1) << 14) | |
| 3380 | | ((ZERO_FLAG(7) & 1) << 15); |
| 3381 | | if (RTVAL & 0x8000) RTVAL |= 0xffff0000; |
| 3141 | RTVAL = ((CLIP1_FLAG(0) & 1) << 0) | |
| 3142 | ((CLIP1_FLAG(1) & 1) << 1) | |
| 3143 | ((CLIP1_FLAG(2) & 1) << 2) | |
| 3144 | ((CLIP1_FLAG(3) & 1) << 3) | |
| 3145 | ((CLIP1_FLAG(4) & 1) << 4) | |
| 3146 | ((CLIP1_FLAG(5) & 1) << 5) | |
| 3147 | ((CLIP1_FLAG(6) & 1) << 6) | |
| 3148 | ((CLIP1_FLAG(7) & 1) << 7); |
| 3382 | 3149 | } |
| 3383 | 3150 | } |
| 3384 | 3151 | break; |