trunk/src/emu/cpu/rsp/rspdrc.c
| r23558 | r23559 | |
| 973 | 973 | end = 16; |
| 974 | 974 | ea &= ~0xf; |
| 975 | 975 | |
| 976 | #if USE_SIMD |
| 977 | INT16 mask[8] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; |
| 978 | INT16 val[8] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; |
| 979 | #endif |
| 976 | 980 | for (i=index; i < end; i++) |
| 977 | 981 | { |
| 982 | #if USE_SIMD |
| 983 | mask[i >> 1] |= 0x00ff << ((i & 1) * 8); |
| 984 | val[i >> 1] |= READ8(rsp, ea) << ((i & 1) * 8); |
| 985 | #endif |
| 978 | 986 | VREG_B(dest, i) = READ8(rsp, ea); |
| 979 | 987 | ea++; |
| 980 | 988 | } |
| 989 | |
| 990 | #if USE_SIMD |
| 991 | __m128i neg1 = _mm_set_epi16(0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff); |
| 992 | __m128i keep_mask = _mm_set_epi16(mask[0], mask[1], mask[2], mask[3], mask[4], mask[5], mask[6], mask[7]); |
| 993 | __m128i load_val = _mm_set_epi16(val[0], val[1], val[2], val[3], val[4], val[5], val[6], val[7]); |
| 994 | keep_mask = _mm_xor_si128(keep_mask, neg1); |
| 995 | rsp->xv[dest] = _mm_and_si128(rsp->xv[dest], keep_mask); |
| 996 | rsp->xv[dest] = _mm_or_si128(rsp->xv[dest], load_val); |
| 997 | #endif |
| 981 | 998 | } |
| 982 | 999 | |
| 983 | 1000 | static void cfunc_rsp_lpv(void *param) |
| r23558 | r23559 | |
| 1003 | 1020 | |
| 1004 | 1021 | ea = (base) ? rsp->r[base] + (offset * 8) : (offset * 8); |
| 1005 | 1022 | |
| 1023 | #if USE_SIMD |
| 1024 | INT16 val[8] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; |
| 1025 | #endif |
| 1006 | 1026 | for (i=0; i < 8; i++) |
| 1007 | 1027 | { |
| 1028 | #if USE_SIMD |
| 1029 | val[i] = READ8(rsp, ea + (((16-index) + i) & 0xf)) << 8; |
| 1030 | #endif |
| 1008 | 1031 | W_VREG_S(dest, i) = READ8(rsp, ea + (((16-index) + i) & 0xf)) << 8; |
| 1009 | 1032 | } |
| 1033 | |
| 1034 | #if USE_SIMD |
| 1035 | rsp->xv[dest] = _mm_set_epi16(val[0], val[1], val[2], val[3], val[4], val[5], val[6], val[7]); |
| 1036 | #endif |
| 1010 | 1037 | } |
| 1011 | 1038 | |
| 1012 | 1039 | static void cfunc_rsp_luv(void *param) |
| r23558 | r23559 | |
| 1032 | 1059 | |
| 1033 | 1060 | ea = (base) ? rsp->r[base] + (offset * 8) : (offset * 8); |
| 1034 | 1061 | |
| 1062 | #if USE_SIMD |
| 1063 | INT16 val[8] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; |
| 1064 | #endif |
| 1035 | 1065 | for (i=0; i < 8; i++) |
| 1036 | 1066 | { |
| 1067 | #if USE_SIMD |
| 1068 | val[i] = READ8(rsp, ea + (((16-index) + i) & 0xf)) << 7; |
| 1069 | #endif |
| 1037 | 1070 | W_VREG_S(dest, i) = READ8(rsp, ea + (((16-index) + i) & 0xf)) << 7; |
| 1038 | 1071 | } |
| 1072 | |
| 1073 | #if USE_SIMD |
| 1074 | rsp->xv[dest] = _mm_set_epi16(val[0], val[1], val[2], val[3], val[4], val[5], val[6], val[7]); |
| 1075 | #endif |
| 1039 | 1076 | } |
| 1040 | 1077 | |
| 1041 | 1078 | static void cfunc_rsp_lhv(void *param) |
| r23558 | r23559 | |
| 1061 | 1098 | |
| 1062 | 1099 | ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16); |
| 1063 | 1100 | |
| 1101 | #if USE_SIMD |
| 1102 | INT16 val[8] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; |
| 1103 | #endif |
| 1064 | 1104 | for (i=0; i < 8; i++) |
| 1065 | 1105 | { |
| 1106 | #if USE_SIMD |
| 1107 | val[i] = READ8(rsp, ea + (((16-index) + (i<<1)) & 0xf)) << 7; |
| 1108 | #endif |
| 1066 | 1109 | W_VREG_S(dest, i) = READ8(rsp, ea + (((16-index) + (i<<1)) & 0xf)) << 7; |
| 1067 | 1110 | } |
| 1111 | |
| 1112 | #if USE_SIMD |
| 1113 | rsp->xv[dest] = _mm_set_epi16(val[0], val[1], val[2], val[3], val[4], val[5], val[6], val[7]); |
| 1114 | #endif |
| 1068 | 1115 | } |
| 1069 | 1116 | |
| 1070 | 1117 | static void cfunc_rsp_lfv(void *param) |
| r23558 | r23559 | |
| 1096 | 1143 | |
| 1097 | 1144 | end = (index >> 1) + 4; |
| 1098 | 1145 | |
| 1146 | #if USE_SIMD |
| 1147 | INT16 mask[8] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; |
| 1148 | INT16 val[8] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; |
| 1149 | #endif |
| 1099 | 1150 | for (i=index >> 1; i < end; i++) |
| 1100 | 1151 | { |
| 1152 | #if USE_SIMD |
| 1153 | mask[i] = 0xffff; |
| 1154 | val[i] = READ8(rsp, ea) << 7; |
| 1155 | #endif |
| 1101 | 1156 | W_VREG_S(dest, i) = READ8(rsp, ea) << 7; |
| 1102 | 1157 | ea += 4; |
| 1103 | 1158 | } |
| 1159 | |
| 1160 | #if USE_SIMD |
| 1161 | __m128i neg1 = _mm_set_epi16(0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff); |
| 1162 | __m128i keep_mask = _mm_set_epi16(mask[0], mask[1], mask[2], mask[3], mask[4], mask[5], mask[6], mask[7]); |
| 1163 | __m128i load_val = _mm_set_epi16(val[0], val[1], val[2], val[3], val[4], val[5], val[6], val[7]); |
| 1164 | keep_mask = _mm_xor_si128(keep_mask, neg1); |
| 1165 | rsp->xv[dest] = _mm_and_si128(rsp->xv[dest], keep_mask); |
| 1166 | rsp->xv[dest] = _mm_or_si128(rsp->xv[dest], load_val); |
| 1167 | #endif |
| 1104 | 1168 | } |
| 1105 | 1169 | |
| 1106 | 1170 | static void cfunc_rsp_lwv(void *param) |
| r23558 | r23559 | |
| 1130 | 1194 | |
| 1131 | 1195 | end = (16 - index) + 16; |
| 1132 | 1196 | |
| 1197 | #if USE_SIMD |
| 1198 | INT16 val[8] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; |
| 1199 | #endif |
| 1133 | 1200 | for (i=(16 - index); i < end; i++) |
| 1134 | 1201 | { |
| 1202 | #if USE_SIMD |
| 1203 | val[i >> 1] |= READ8(rsp, ea) << ((i & 1) * 8); |
| 1204 | #endif |
| 1135 | 1205 | VREG_B(dest, i & 0xf) = READ8(rsp, ea); |
| 1136 | 1206 | ea += 4; |
| 1137 | 1207 | } |
| 1208 | |
| 1209 | #if USE_SIMD |
| 1210 | rsp->xv[dest] = _mm_set_epi16(val[0], val[1], val[2], val[3], val[4], val[5], val[6], val[7]); |
| 1211 | #endif |
| 1138 | 1212 | } |
| 1139 | 1213 | |
| 1140 | 1214 | static void cfunc_rsp_ltv(void *param) |
| r23558 | r23559 | |
| 1173 | 1247 | for (i = vs; i < ve; i++) |
| 1174 | 1248 | { |
| 1175 | 1249 | element = ((8 - (index >> 1) + (i - vs)) << 1); |
| 1250 | #if USE_SIMD |
| 1251 | UINT16 value = (READ8(rsp, ea + 1) << 8) | READ8(rsp, ea); |
| 1252 | _mm_insert_epi16 (rsp->xv[i], value, element); |
| 1253 | #endif |
| 1176 | 1254 | VREG_B(i, (element & 0xf)) = READ8(rsp, ea); |
| 1177 | 1255 | VREG_B(i, ((element + 1) & 0xf)) = READ8(rsp, ea + 1); |
| 1178 | 1256 | |