Previous 199869 Revisions Next

r23559 Sunday 9th June, 2013 at 08:41:21 UTC by Ryan Holtz
- Added SIMD code for the rest of the RSP vector load instructions [MooglyGuy]
[src/emu/cpu/rsp]rspdrc.c

trunk/src/emu/cpu/rsp/rspdrc.c
r23558r23559
973973   end = 16;
974974   ea &= ~0xf;
975975
976#if USE_SIMD
977   INT16 mask[8] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
978   INT16 val[8] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
979#endif
976980   for (i=index; i < end; i++)
977981   {
982#if USE_SIMD
983      mask[i >> 1] |= 0x00ff << ((i & 1) * 8);
984      val[i >> 1] |= READ8(rsp, ea) << ((i & 1) * 8);
985#endif
978986      VREG_B(dest, i) = READ8(rsp, ea);
979987      ea++;
980988   }
989
990#if USE_SIMD
991   __m128i neg1 = _mm_set_epi16(0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff);
992   __m128i keep_mask = _mm_set_epi16(mask[0], mask[1], mask[2], mask[3], mask[4], mask[5], mask[6], mask[7]);
993   __m128i load_val = _mm_set_epi16(val[0], val[1], val[2], val[3], val[4], val[5], val[6], val[7]);
994   keep_mask = _mm_xor_si128(keep_mask, neg1);
995   rsp->xv[dest] = _mm_and_si128(rsp->xv[dest], keep_mask);
996   rsp->xv[dest] = _mm_or_si128(rsp->xv[dest], load_val);
997#endif
981998}
982999
9831000static void cfunc_rsp_lpv(void *param)
r23558r23559
10031020
10041021   ea = (base) ? rsp->r[base] + (offset * 8) : (offset * 8);
10051022
1023#if USE_SIMD
1024   INT16 val[8] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
1025#endif
10061026   for (i=0; i < 8; i++)
10071027   {
1028#if USE_SIMD
1029      val[i] = READ8(rsp, ea + (((16-index) + i) & 0xf)) << 8;
1030#endif
10081031      W_VREG_S(dest, i) = READ8(rsp, ea + (((16-index) + i) & 0xf)) << 8;
10091032   }
1033
1034#if USE_SIMD
1035   rsp->xv[dest] = _mm_set_epi16(val[0], val[1], val[2], val[3], val[4], val[5], val[6], val[7]);
1036#endif
10101037}
10111038
10121039static void cfunc_rsp_luv(void *param)
r23558r23559
10321059
10331060   ea = (base) ? rsp->r[base] + (offset * 8) : (offset * 8);
10341061
1062#if USE_SIMD
1063   INT16 val[8] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
1064#endif
10351065   for (i=0; i < 8; i++)
10361066   {
1067#if USE_SIMD
1068      val[i] = READ8(rsp, ea + (((16-index) + i) & 0xf)) << 7;
1069#endif
10371070      W_VREG_S(dest, i) = READ8(rsp, ea + (((16-index) + i) & 0xf)) << 7;
10381071   }
1072
1073#if USE_SIMD
1074   rsp->xv[dest] = _mm_set_epi16(val[0], val[1], val[2], val[3], val[4], val[5], val[6], val[7]);
1075#endif
10391076}
10401077
10411078static void cfunc_rsp_lhv(void *param)
r23558r23559
10611098
10621099   ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
10631100
1101#if USE_SIMD
1102   INT16 val[8] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
1103#endif
10641104   for (i=0; i < 8; i++)
10651105   {
1106#if USE_SIMD
1107      val[i] = READ8(rsp, ea + (((16-index) + (i<<1)) & 0xf)) << 7;
1108#endif
10661109      W_VREG_S(dest, i) = READ8(rsp, ea + (((16-index) + (i<<1)) & 0xf)) << 7;
10671110   }
1111
1112#if USE_SIMD
1113   rsp->xv[dest] = _mm_set_epi16(val[0], val[1], val[2], val[3], val[4], val[5], val[6], val[7]);
1114#endif
10681115}
10691116
10701117static void cfunc_rsp_lfv(void *param)
r23558r23559
10961143
10971144   end = (index >> 1) + 4;
10981145
1146#if USE_SIMD
1147   INT16 mask[8] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
1148   INT16 val[8] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
1149#endif
10991150   for (i=index >> 1; i < end; i++)
11001151   {
1152#if USE_SIMD
1153      mask[i] = 0xffff;
1154      val[i] = READ8(rsp, ea) << 7;
1155#endif
11011156      W_VREG_S(dest, i) = READ8(rsp, ea) << 7;
11021157      ea += 4;
11031158   }
1159
1160#if USE_SIMD
1161   __m128i neg1 = _mm_set_epi16(0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff);
1162   __m128i keep_mask = _mm_set_epi16(mask[0], mask[1], mask[2], mask[3], mask[4], mask[5], mask[6], mask[7]);
1163   __m128i load_val = _mm_set_epi16(val[0], val[1], val[2], val[3], val[4], val[5], val[6], val[7]);
1164   keep_mask = _mm_xor_si128(keep_mask, neg1);
1165   rsp->xv[dest] = _mm_and_si128(rsp->xv[dest], keep_mask);
1166   rsp->xv[dest] = _mm_or_si128(rsp->xv[dest], load_val);
1167#endif
11041168}
11051169
11061170static void cfunc_rsp_lwv(void *param)
r23558r23559
11301194
11311195   end = (16 - index) + 16;
11321196
1197#if USE_SIMD
1198   INT16 val[8] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
1199#endif
11331200   for (i=(16 - index); i < end; i++)
11341201   {
1202#if USE_SIMD
1203      val[i >> 1] |= READ8(rsp, ea) << ((i & 1) * 8);
1204#endif
11351205      VREG_B(dest, i & 0xf) = READ8(rsp, ea);
11361206      ea += 4;
11371207   }
1208
1209#if USE_SIMD
1210   rsp->xv[dest] = _mm_set_epi16(val[0], val[1], val[2], val[3], val[4], val[5], val[6], val[7]);
1211#endif
11381212}
11391213
11401214static void cfunc_rsp_ltv(void *param)
r23558r23559
11731247   for (i = vs; i < ve; i++)
11741248   {
11751249      element = ((8 - (index >> 1) + (i - vs)) << 1);
1250#if USE_SIMD
1251      UINT16 value = (READ8(rsp, ea + 1) << 8) | READ8(rsp, ea);
1252      _mm_insert_epi16 (rsp->xv[i], value, element);
1253#endif
11761254      VREG_B(i, (element & 0xf)) = READ8(rsp, ea);
11771255      VREG_B(i, ((element + 1) & 0xf)) = READ8(rsp, ea + 1);
11781256

Previous 199869 Revisions Next


© 1997-2024 The MAME Team