Previous 199869 Revisions Next

r23554 Sunday 9th June, 2013 at 06:08:14 UTC by Ryan Holtz
- Added USE_SIMD flag to RSP headers and began converting some opcodes to use
 SSE* intrinsics. Current plan is to target SSSE3-capable hardware (Core2 and
 up), with the resulting speedup theorized to be on the order of 5-10x when
 conversion is complete, though this applies only to situations where the
 emulation is heavily bottlenecked by the RSP. [MooglyGuy]
[src/emu/cpu/rsp]rsp.h rspdrc.c

trunk/src/emu/cpu/rsp/rsp.h
r23553r23554
1616#ifndef __RSP_H__
1717#define __RSP_H__
1818
19#define USE_SIMD      (1)
20
21#if USE_SIMD
22#include <tmmintrin.h>
23#endif
24
1925#define USE_RSPDRC
2026
2127/***************************************************************************
r23553r23554
159165   UINT32 pc;
160166   UINT32 r[35];
161167   VECTOR_REG v[32];
168#if USE_SIMD
169   // Mirror of v[] for now, to be used in parallel as
170   // more vector ops are transitioned over
171   __m128i xv[32];
172#endif
162173   UINT16 flag[4];
163174   UINT32 sr;
164175   UINT32 step_count;
trunk/src/emu/cpu/rsp/rspdrc.c
r23553r23554
1818
1919***************************************************************************/
2020
21#include <tmmintrin.h>
22
2123#include "emu.h"
2224#include "debugger.h"
2325#include "rsp.h"
r23553r23554
753755
754756   ea = (base) ? rsp->r[base] + offset : offset;
755757   VREG_B(dest, index) = READ8(rsp, ea);
758
759   // SSE
760#if USE_SIMD
761   // Better solutions for this situation welcome. Need to be able to insert a byte at an arbitrary
762   // byte index in the __m128. Current method amounts to:
763   //     final_vec = (in_vec &~ discard_mask) | insert_value
764   // Naturally, SSE4.1 adds the highly-useful PINSRB opcode. As the name implies, it's an
765   // arbitrary byte-insert-into-m128, but do we want to require SSE4.1? Maybe just have an ifdef
766   // and use the more optimal one if available.
767   const __m128i neg1 = _mm_set_epi16(0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff);
768
769   __m128i insert_vec = _mm_setzero_si128();
770   INT16 insert_value = READ8(rsp, ea) << ((1 - (index & 1)) << 2);
771   _mm_insert_epi16 (insert_vec, insert_value, index >> 1);
772
773   __m128i discard_mask = _mm_setzero_si128();
774   INT16 discard_element = 0x00ff << ((1 - (index & 1)) << 2);
775   _mm_insert_epi16 (discard_mask, discard_element, index >> 1);
776   _mm_xor_si128 (discard_mask, neg1);
777   _mm_and_si128 (rsp->xv[dest], discard_mask);
778   _mm_or_si128 (rsp->xv[dest], insert_vec);
779#endif
756780}
757781
758782static void cfunc_rsp_lsv(void *param)
r23553r23554
762786   UINT32 ea = 0;
763787   int dest = (op >> 16) & 0x1f;
764788   int base = (op >> 21) & 0x1f;
765   int index = (op >> 7) & 0xf;
789   int index = (op >> 7) & 0xe;
766790   int offset = (op & 0x7f);
767791   if (offset & 0x40)
768792   {
r23553r23554
784808      VREG_B(dest, i) = READ8(rsp, ea);
785809      ea++;
786810   }
811
812   // SSE
813#if USE_SIMD
814   INT16 insert_value = READ8(rsp, ea) << 8 | READ8(rsp, ea + 1);
815   _mm_insert_epi16 (rsp->xv[dest], insert_value, index >> 1);
816#endif
787817}
788818
789819static void cfunc_rsp_llv(void *param)
r23553r23554
793823   UINT32 ea = 0;
794824   int dest = (op >> 16) & 0x1f;
795825   int base = (op >> 21) & 0x1f;
796   int index = (op >> 7) & 0xf;
826   int index = (op >> 7) & 0xc;
797827   int offset = (op & 0x7f);
798828   if (offset & 0x40)
799829   {
r23553r23554
815845      VREG_B(dest, i) = READ8(rsp, ea);
816846      ea++;
817847   }
848
849   // SSE
850#if USE_SIMD
851   INT16 insert_value0 = READ8(rsp, ea) << 8 | READ8(rsp, ea + 1);
852   INT16 insert_value1 = READ8(rsp, ea + 2) << 8 | READ8(rsp, ea + 3);
853   _mm_insert_epi16 (rsp->xv[dest], insert_value0, (index >> 1));
854   _mm_insert_epi16 (rsp->xv[dest], insert_value1, (index >> 1) + 1);
855#endif
818856}
819857
820858static void cfunc_rsp_ldv(void *param)
r23553r23554
824862   UINT32 ea = 0;
825863   int dest = (op >> 16) & 0x1f;
826864   int base = (op >> 21) & 0x1f;
827   int index = (op >> 7) & 0xf;
865   int index = (op >> 7) & 0x8;
828866   int offset = (op & 0x7f);
829867   if (offset & 0x40)
830868   {
r23553r23554
846884      VREG_B(dest, i) = READ8(rsp, ea);
847885      ea++;
848886   }
887
888#if USE_SIMD
889   INT16 insert_value0 = READ8(rsp, ea) << 8 | READ8(rsp, ea + 1);
890   INT16 insert_value1 = READ8(rsp, ea + 2) << 8 | READ8(rsp, ea + 3);
891   INT16 insert_value2 = READ8(rsp, ea + 4) << 8 | READ8(rsp, ea + 5);
892   INT16 insert_value3 = READ8(rsp, ea + 6) << 8 | READ8(rsp, ea + 7);
893   _mm_insert_epi16 (rsp->xv[dest], insert_value0, (index >> 1));
894   _mm_insert_epi16 (rsp->xv[dest], insert_value1, (index >> 1) + 1);
895   _mm_insert_epi16 (rsp->xv[dest], insert_value2, (index >> 1) + 2);
896   _mm_insert_epi16 (rsp->xv[dest], insert_value3, (index >> 1) + 3);
897#endif
849898}
850899
851900static void cfunc_rsp_lqv(void *param)
r23553r23554
857906   UINT32 ea = 0;
858907   int dest = (op >> 16) & 0x1f;
859908   int base = (op >> 21) & 0x1f;
860   int index = (op >> 7) & 0xf;
909   int index = 0; // Just a test, it goes right back the way it was if something breaks //(op >> 7) & 0xf;
861910   int offset = (op & 0x7f);
862911   if (offset & 0x40)
863912   {
r23553r23554
880929      VREG_B(dest, i) = READ8(rsp, ea);
881930      ea++;
882931   }
932
933   // SSE
934#if USE_SIMD
935   INT16 val0 = READ8(rsp, ea) << 8 | READ8(rsp, ea + 1);
936   INT16 val1 = READ8(rsp, ea + 2) << 8 | READ8(rsp, ea + 3);
937   INT16 val2 = READ8(rsp, ea + 4) << 8 | READ8(rsp, ea + 5);
938   INT16 val3 = READ8(rsp, ea + 6) << 8 | READ8(rsp, ea + 7);
939   INT16 val4 = READ8(rsp, ea + 8) << 8 | READ8(rsp, ea + 9);
940   INT16 val5 = READ8(rsp, ea + 10) << 8 | READ8(rsp, ea + 11);
941   INT16 val6 = READ8(rsp, ea + 12) << 8 | READ8(rsp, ea + 13);
942   INT16 val7 = READ8(rsp, ea + 14) << 8 | READ8(rsp, ea + 15);
943
944   rsp->xv[dest] = _mm_set_epi16(val0, val1, val2, val3, val4, val5, val6, val7);
945#endif
883946}
884947
885948static void cfunc_rsp_lrv(void *param)

Previous 199869 Revisions Next


© 1997-2024 The MAME Team