trunk/src/emu/cpu/rsp/rspdrc.c
| r23553 | r23554 | |
| 18 | 18 | |
| 19 | 19 | ***************************************************************************/ |
| 20 | 20 | |
| 21 | #include <tmmintrin.h> |
| 22 | |
| 21 | 23 | #include "emu.h" |
| 22 | 24 | #include "debugger.h" |
| 23 | 25 | #include "rsp.h" |
| r23553 | r23554 | |
| 753 | 755 | |
| 754 | 756 | ea = (base) ? rsp->r[base] + offset : offset; |
| 755 | 757 | VREG_B(dest, index) = READ8(rsp, ea); |
| 758 | |
| 759 | // SSE |
| 760 | #if USE_SIMD |
| 761 | // Better solutions for this situation welcome. Need to be able to insert a byte at an arbitrary |
| 762 | // byte index in the __m128. Current method amounts to: |
| 763 | // final_vec = (in_vec &~ discard_mask) | insert_value |
| 764 | // Naturally, SSE4.1 adds the highly-useful PINSRB opcode. As the name implies, it's an |
| 765 | // arbitrary byte-insert-into-m128, but do we want to require SSE4.1? Maybe just have an ifdef |
| 766 | // and use the more optimal one if available. |
| 767 | const __m128i neg1 = _mm_set_epi16(0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff); |
| 768 | |
| 769 | __m128i insert_vec = _mm_setzero_si128(); |
| 770 | INT16 insert_value = READ8(rsp, ea) << ((1 - (index & 1)) << 2); |
| 771 | _mm_insert_epi16 (insert_vec, insert_value, index >> 1); |
| 772 | |
| 773 | __m128i discard_mask = _mm_setzero_si128(); |
| 774 | INT16 discard_element = 0x00ff << ((1 - (index & 1)) << 2); |
| 775 | _mm_insert_epi16 (discard_mask, discard_element, index >> 1); |
| 776 | _mm_xor_si128 (discard_mask, neg1); |
| 777 | _mm_and_si128 (rsp->xv[dest], discard_mask); |
| 778 | _mm_or_si128 (rsp->xv[dest], insert_vec); |
| 779 | #endif |
| 756 | 780 | } |
| 757 | 781 | |
| 758 | 782 | static void cfunc_rsp_lsv(void *param) |
| r23553 | r23554 | |
| 762 | 786 | UINT32 ea = 0; |
| 763 | 787 | int dest = (op >> 16) & 0x1f; |
| 764 | 788 | int base = (op >> 21) & 0x1f; |
| 765 | | int index = (op >> 7) & 0xf; |
| 789 | int index = (op >> 7) & 0xe; |
| 766 | 790 | int offset = (op & 0x7f); |
| 767 | 791 | if (offset & 0x40) |
| 768 | 792 | { |
| r23553 | r23554 | |
| 784 | 808 | VREG_B(dest, i) = READ8(rsp, ea); |
| 785 | 809 | ea++; |
| 786 | 810 | } |
| 811 | |
| 812 | // SSE |
| 813 | #if USE_SIMD |
| 814 | INT16 insert_value = READ8(rsp, ea) << 8 | READ8(rsp, ea + 1); |
| 815 | _mm_insert_epi16 (rsp->xv[dest], insert_value, index >> 1); |
| 816 | #endif |
| 787 | 817 | } |
| 788 | 818 | |
| 789 | 819 | static void cfunc_rsp_llv(void *param) |
| r23553 | r23554 | |
| 793 | 823 | UINT32 ea = 0; |
| 794 | 824 | int dest = (op >> 16) & 0x1f; |
| 795 | 825 | int base = (op >> 21) & 0x1f; |
| 796 | | int index = (op >> 7) & 0xf; |
| 826 | int index = (op >> 7) & 0xc; |
| 797 | 827 | int offset = (op & 0x7f); |
| 798 | 828 | if (offset & 0x40) |
| 799 | 829 | { |
| r23553 | r23554 | |
| 815 | 845 | VREG_B(dest, i) = READ8(rsp, ea); |
| 816 | 846 | ea++; |
| 817 | 847 | } |
| 848 | |
| 849 | // SSE |
| 850 | #if USE_SIMD |
| 851 | INT16 insert_value0 = READ8(rsp, ea) << 8 | READ8(rsp, ea + 1); |
| 852 | INT16 insert_value1 = READ8(rsp, ea + 2) << 8 | READ8(rsp, ea + 3); |
| 853 | _mm_insert_epi16 (rsp->xv[dest], insert_value0, (index >> 1)); |
| 854 | _mm_insert_epi16 (rsp->xv[dest], insert_value1, (index >> 1) + 1); |
| 855 | #endif |
| 818 | 856 | } |
| 819 | 857 | |
| 820 | 858 | static void cfunc_rsp_ldv(void *param) |
| r23553 | r23554 | |
| 824 | 862 | UINT32 ea = 0; |
| 825 | 863 | int dest = (op >> 16) & 0x1f; |
| 826 | 864 | int base = (op >> 21) & 0x1f; |
| 827 | | int index = (op >> 7) & 0xf; |
| 865 | int index = (op >> 7) & 0x8; |
| 828 | 866 | int offset = (op & 0x7f); |
| 829 | 867 | if (offset & 0x40) |
| 830 | 868 | { |
| r23553 | r23554 | |
| 846 | 884 | VREG_B(dest, i) = READ8(rsp, ea); |
| 847 | 885 | ea++; |
| 848 | 886 | } |
| 887 | |
| 888 | #if USE_SIMD |
| 889 | INT16 insert_value0 = READ8(rsp, ea) << 8 | READ8(rsp, ea + 1); |
| 890 | INT16 insert_value1 = READ8(rsp, ea + 2) << 8 | READ8(rsp, ea + 3); |
| 891 | INT16 insert_value2 = READ8(rsp, ea + 4) << 8 | READ8(rsp, ea + 5); |
| 892 | INT16 insert_value3 = READ8(rsp, ea + 6) << 8 | READ8(rsp, ea + 7); |
| 893 | _mm_insert_epi16 (rsp->xv[dest], insert_value0, (index >> 1)); |
| 894 | _mm_insert_epi16 (rsp->xv[dest], insert_value1, (index >> 1) + 1); |
| 895 | _mm_insert_epi16 (rsp->xv[dest], insert_value2, (index >> 1) + 2); |
| 896 | _mm_insert_epi16 (rsp->xv[dest], insert_value3, (index >> 1) + 3); |
| 897 | #endif |
| 849 | 898 | } |
| 850 | 899 | |
| 851 | 900 | static void cfunc_rsp_lqv(void *param) |
| r23553 | r23554 | |
| 857 | 906 | UINT32 ea = 0; |
| 858 | 907 | int dest = (op >> 16) & 0x1f; |
| 859 | 908 | int base = (op >> 21) & 0x1f; |
| 860 | | int index = (op >> 7) & 0xf; |
| 909 | int index = 0; // Just a test, it goes right back the way it was if something breaks //(op >> 7) & 0xf; |
| 861 | 910 | int offset = (op & 0x7f); |
| 862 | 911 | if (offset & 0x40) |
| 863 | 912 | { |
| r23553 | r23554 | |
| 880 | 929 | VREG_B(dest, i) = READ8(rsp, ea); |
| 881 | 930 | ea++; |
| 882 | 931 | } |
| 932 | |
| 933 | // SSE |
| 934 | #if USE_SIMD |
| 935 | INT16 val0 = READ8(rsp, ea) << 8 | READ8(rsp, ea + 1); |
| 936 | INT16 val1 = READ8(rsp, ea + 2) << 8 | READ8(rsp, ea + 3); |
| 937 | INT16 val2 = READ8(rsp, ea + 4) << 8 | READ8(rsp, ea + 5); |
| 938 | INT16 val3 = READ8(rsp, ea + 6) << 8 | READ8(rsp, ea + 7); |
| 939 | INT16 val4 = READ8(rsp, ea + 8) << 8 | READ8(rsp, ea + 9); |
| 940 | INT16 val5 = READ8(rsp, ea + 10) << 8 | READ8(rsp, ea + 11); |
| 941 | INT16 val6 = READ8(rsp, ea + 12) << 8 | READ8(rsp, ea + 13); |
| 942 | INT16 val7 = READ8(rsp, ea + 14) << 8 | READ8(rsp, ea + 15); |
| 943 | |
| 944 | rsp->xv[dest] = _mm_set_epi16(val0, val1, val2, val3, val4, val5, val6, val7); |
| 945 | #endif |
| 883 | 946 | } |
| 884 | 947 | |
| 885 | 948 | static void cfunc_rsp_lrv(void *param) |