Previous 199869 Revisions Next

r25439 Thursday 26th September, 2013 at 18:30:00 UTC by Ryan Holtz
-RSP SIMD changes: [MooglyGuy, Marathon Man]
 * Converted high and middle accumulator slices into SIMD vectors.
 * Converted flag registers into SIMD vectors.
 * Fixed unaligned accesses in CFC2 opcode.
 * Added SCALAR_GET_VS1, SCALAR_GET_VS2, and SET_ACCUM_H/M/L to act as a
   passthrough to either the non-SIMD implementation or individual element
   insert/extract ops in order to minimize #if USE_SIMD all over the place.
 * Ported SIMD implementations of VMUDL, VMUDM, VMUDN, VMUDH, VMACF, VMACU,
   VMADL, VMADM, VMADN, and VMADH from CEN64 with permission
 * Resulted in an overall speedup of approximately 50% for RSP-bound scenes on
   a 2.5GHz i5.
[src/emu/cpu/rsp]rsp.c rsp.h rspdrc.c

trunk/src/emu/cpu/rsp/rsp.c
r25438r25439
6666#define ACCUM_M(x)      rsp->accum[((x))].w[2]
6767#define ACCUM_L(x)      rsp->accum[((x))].w[1]
6868
69#define CARRY_FLAG(x)           ((rsp->flag[0] & (1 << ((x)))) ? 1 : 0)
70#define CLEAR_CARRY_FLAGS()     { rsp->flag[0] &= ~0xff; }
71#define SET_CARRY_FLAG(x)       { rsp->flag[0] |= (1 << ((x))); }
72#define CLEAR_CARRY_FLAG(x)     { rsp->flag[0] &= ~(1 << ((x))); }
69#define CARRY      0
70#define COMPARE      1
71#define CLIP1      2
72#define ZERO      3
73#define CLIP2      4
7374
74#define COMPARE_FLAG(x)         ((rsp->flag[1] & (1 << ((x)))) ? 1 : 0)
75#define CLEAR_COMPARE_FLAGS()   { rsp->flag[1] &= ~0xff; }
76#define SET_COMPARE_FLAG(x)     { rsp->flag[1] |= (1 << ((x))); }
77#define CLEAR_COMPARE_FLAG(x)   { rsp->flag[1] &= ~(1 << ((x))); }
75#define CARRY_FLAG(x)           (rsp->vflag[CARRY][x & 7] != 0 ? 0xffff : 0)
76#define COMPARE_FLAG(x)         (rsp->vflag[COMPARE][x & 7] != 0 ? 0xffff : 0)
77#define CLIP1_FLAG(x)           (rsp->vflag[CLIP1][x & 7] != 0 ? 0xffff : 0)
78#define ZERO_FLAG(x)            (rsp->vflag[ZERO][x & 7] != 0 ? 0xffff : 0)
79#define CLIP2_FLAG(x)           (rsp->vflag[CLIP2][x & 7] != 0 ? 0xffff : 0)
7880
79#define ZERO_FLAG(x)            ((rsp->flag[0] & (1 << (8+(x)))) ? 1 : 0)
80#define CLEAR_ZERO_FLAGS()      { rsp->flag[0] &= ~0xff00; }
81#define SET_ZERO_FLAG(x)        { rsp->flag[0] |= (1 << (8+(x))); }
82#define CLEAR_ZERO_FLAG(x)      { rsp->flag[0] &= ~(1 << (8+(x))); }
81#define CLEAR_CARRY_FLAGS()      { memset(rsp->vflag[0], 0, 16); }
82#define CLEAR_COMPARE_FLAGS()   { memset(rsp->vflag[1], 0, 16); }
83#define CLEAR_CLIP1_FLAGS()     { memset(rsp->vflag[2], 0, 16); }
84#define CLEAR_ZERO_FLAGS()      { memset(rsp->vflag[3], 0, 16); }
85#define CLEAR_CLIP2_FLAGS()     { memset(rsp->vflag[4], 0, 16); }
8386
84#define EXTENSION_FLAG(x)       ((rsp.flag[2] & (1 << ((x)))) ? 1 : 0)
87#define SET_CARRY_FLAG(x)       { rsp->vflag[0][x & 7] = 0xffff; }
88#define SET_COMPARE_FLAG(x)     { rsp->vflag[1][x & 7] = 0xffff; }
89#define SET_CLIP1_FLAG(x)       { rsp->vflag[2][x & 7] = 0xffff; }
90#define SET_ZERO_FLAG(x)        { rsp->vflag[3][x & 7] = 0xffff; }
91#define SET_CLIP2_FLAG(x)       { rsp->vflag[4][x & 7] = 0xffff; }
8592
93#define CLEAR_CARRY_FLAG(x)     { rsp->vflag[0][x & 7] = 0; }
94#define CLEAR_COMPARE_FLAG(x)   { rsp->vflag[1][x & 7] = 0; }
95#define CLEAR_CLIP1_FLAG(x)     { rsp->vflag[2][x & 7] = 0; }
96#define CLEAR_ZERO_FLAG(x)      { rsp->vflag[3][x & 7] = 0; }
97#define CLEAR_CLIP2_FLAG(x)     { rsp->vflag[4][x & 7] = 0; }
98
8699#define ROPCODE(pc)     rsp->program->read_dword(pc)
87100
88101INLINE UINT8 READ8(rsp_state *rsp, UINT32 address)
r25438r25439
296309      rsp->v[regIdx].d[0] = 0;
297310      rsp->v[regIdx].d[1] = 0;
298311   }
299   rsp->flag[0] = 0;
300   rsp->flag[1] = 0;
301   rsp->flag[2] = 0;
302   rsp->flag[3] = 0;
312    CLEAR_CARRY_FLAGS();
313    CLEAR_COMPARE_FLAGS();
314    CLEAR_CLIP1_FLAGS();
315    CLEAR_ZERO_FLAGS();
316    CLEAR_CLIP2_FLAGS();
303317   //rsp->square_root_res = 0;
304318   //rsp->square_root_high = 0;
305319   rsp->reciprocal_res = 0;
r25438r25439
17321746         // Moves the element in VS2 to destination vector
17331747
17341748         int sel;
1735         rsp->flag[1] = 0;
1749         CLEAR_COMPARE_FLAGS();
1750         CLEAR_CLIP2_FLAGS();
17361751
17371752         for (i=0; i < 8; i++)
17381753         {
r25438r25439
17621777            ACCUM_L(i) = vres[i];
17631778         }
17641779
1765         rsp->flag[0] = 0;
1780         CLEAR_CARRY_FLAGS();
1781         CLEAR_ZERO_FLAGS();
17661782         WRITEBACK_RESULT();
17671783         break;
17681784      }
r25438r25439
17781794         // Moves the element in VS2 to destination vector
17791795
17801796         int sel;
1781         rsp->flag[1] = 0;
1797         CLEAR_COMPARE_FLAGS();
1798         CLEAR_CLIP2_FLAGS();
17821799
17831800         for (i = 0; i < 8; i++)
17841801         {
r25438r25439
17961813            ACCUM_L(i) = vres[i];
17971814         }
17981815
1799         rsp->flag[0] = 0;
1816         CLEAR_ZERO_FLAGS();
1817         CLEAR_CARRY_FLAGS();
18001818         WRITEBACK_RESULT();
18011819         break;
18021820      }
r25438r25439
18121830         // Moves the element in VS2 to destination vector
18131831
18141832         int sel;
1815         rsp->flag[1] = 0;
1833         CLEAR_COMPARE_FLAGS();
1834         CLEAR_CLIP2_FLAGS();
18161835
18171836         for (i=0; i < 8; i++)//?????????? ????
18181837         {
r25438r25439
18401859            ACCUM_L(i) = vres[i];
18411860         }
18421861
1843         rsp->flag[0] = 0;
1862         CLEAR_CARRY_FLAGS();
1863         CLEAR_ZERO_FLAGS();
18441864         WRITEBACK_RESULT();
18451865         break;
18461866      }
r25438r25439
18561876         // Moves the element in VS2 to destination vector
18571877
18581878         int sel;
1859         rsp->flag[1] = 0;
1879         CLEAR_COMPARE_FLAGS();
1880         CLEAR_CLIP2_FLAGS();
18601881
18611882         for (i=0; i < 8; i++)
18621883         {
r25438r25439
18861907            ACCUM_L(i) = vres[i];
18871908         }
18881909
1889         rsp->flag[0] = 0;
1910         CLEAR_CARRY_FLAGS();
1911         CLEAR_ZERO_FLAGS();
18901912         WRITEBACK_RESULT();
18911913         break;
18921914      }
r25438r25439
19231945               }
19241946               else//ZERO_FLAG(i)==0
19251947               {
1926                  if (rsp->flag[2] & (1 << (i)))
1948                  if (CLIP1_FLAG(i) != 0)
19271949                  {
19281950                     if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) > 0x10000)
19291951                     {//proper fix for Harvest Moon 64, r4
r25438r25439
19561978            {
19571979               if (ZERO_FLAG(i) != 0)
19581980               {
1959                  if (rsp->flag[1] & (1 << (8+i)))
1981                  if (CLIP2_FLAG(i) != 0)
19601982                  {
19611983                     ACCUM_L(i) = s2;
19621984                  }
r25438r25439
19701992                  if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0)
19711993                  {
19721994                     ACCUM_L(i) = s2;
1973                     rsp->flag[1] |= (1 << (8+i));
1995                     SET_CLIP2_FLAG(i);
19741996                  }
19751997                  else
19761998                  {
19771999                     ACCUM_L(i) = s1;
1978                     rsp->flag[1] &= ~(1 << (8+i));
2000                     CLEAR_CLIP2_FLAG(i);
19792001                  }
19802002               }
19812003            }
19822004
19832005            vres[i] = ACCUM_L(i);
19842006         }
1985         rsp->flag[0] = 0;
1986         rsp->flag[2] = 0;
2007         CLEAR_CARRY_FLAGS();
2008         CLEAR_ZERO_FLAGS();
2009         CLEAR_CLIP1_FLAGS();
19872010         WRITEBACK_RESULT();
19882011         break;
19892012      }
r25438r25439
19992022
20002023         int sel;
20012024         INT16 s1, s2;
2002         rsp->flag[0] = 0;
2003         rsp->flag[1] = 0;
2004         rsp->flag[2] = 0;
2025         CLEAR_CARRY_FLAGS();
2026         CLEAR_COMPARE_FLAGS();
2027         CLEAR_CLIP1_FLAGS();
2028         CLEAR_ZERO_FLAGS();
2029         CLEAR_CLIP2_FLAGS();
20052030         UINT32 vce = 0;
20062031
20072032         for (i=0; i < 8; i++)
r25438r25439
20162041               SET_CARRY_FLAG(i);
20172042               if (s2 < 0)
20182043               {
2019                  rsp->flag[1] |= (1 << (8+i));
2044                  SET_CLIP2_FLAG(i);
20202045               }
20212046
20222047               if (s1 + s2 <= 0)
r25438r25439
20462071               }
20472072               if (s1 - s2 >= 0)
20482073               {
2049                  rsp->flag[1] |= (1 << (8+i));
2074                  SET_CLIP2_FLAG(i);
20502075                  vres[i] = s2;
20512076               }
20522077               else
r25438r25439
20622087                  }
20632088               }
20642089            }
2065            rsp->flag[2] |= (vce << (i));
2090            if (vce != 0)
2091            {
2092               SET_CLIP1_FLAG(i);
2093            }
20662094            ACCUM_L(i) = vres[i];
20672095         }
20682096         WRITEBACK_RESULT();
r25438r25439
20802108
20812109         int sel;
20822110         INT16 s1, s2;
2083         rsp->flag[0] = 0;
2084         rsp->flag[1] = 0;
2085         rsp->flag[2] = 0;
2111         CLEAR_CARRY_FLAGS();
2112         CLEAR_COMPARE_FLAGS();
2113         CLEAR_CLIP1_FLAGS();
2114         CLEAR_ZERO_FLAGS();
2115         CLEAR_CLIP2_FLAGS();
20862116
20872117         for (i=0; i < 8; i++)
20882118         {
r25438r25439
20942124            {
20952125               if (s2 < 0)
20962126               {
2097                  rsp->flag[1] |= (1 << (8+i));
2127                  SET_CLIP2_FLAG(i);
20982128               }
20992129               if ((s1 + s2) <= 0)
21002130               {
r25438r25439
21152145               if ((s1 - s2) >= 0)
21162146               {
21172147                  ACCUM_L(i) = s2;
2118                  rsp->flag[1] |= (1 << (8+i));
2148                  SET_CLIP2_FLAG(i);
21192149               }
21202150               else
21212151               {
r25438r25439
28012831
28022832                  if (RTREG)
28032833                  {
2804                     if (RDREG == 2)
2834                     switch(RDREG)
28052835                     {
2806                        // Anciliary clipping flags
2807                        RTVAL = rsp->flag[RDREG] & 0x00ff;
2836                        case 0:
2837                           RTVAL = ((CARRY_FLAG(0) & 1) << 0) |
2838                                 ((CARRY_FLAG(1) & 1) << 1) |
2839                                 ((CARRY_FLAG(2) & 1) << 2) |
2840                                 ((CARRY_FLAG(3) & 1) << 3) |
2841                                 ((CARRY_FLAG(4) & 1) << 4) |
2842                                 ((CARRY_FLAG(5) & 1) << 5) |
2843                                 ((CARRY_FLAG(6) & 1) << 6) |
2844                                 ((CARRY_FLAG(7) & 1) << 7) |
2845                                 ((ZERO_FLAG(0) & 1) << 8) |
2846                                 ((ZERO_FLAG(1) & 1) << 9) |
2847                                 ((ZERO_FLAG(2) & 1) << 10) |
2848                                 ((ZERO_FLAG(3) & 1) << 11) |
2849                                 ((ZERO_FLAG(4) & 1) << 12) |
2850                                 ((ZERO_FLAG(5) & 1) << 13) |
2851                                 ((ZERO_FLAG(6) & 1) << 14) |
2852                                 ((ZERO_FLAG(7) & 1) << 15);
2853                           if (RTVAL & 0x8000) RTVAL |= 0xffff0000;
2854                           break;
2855                        case 1:
2856                           RTVAL = ((COMPARE_FLAG(0) & 1) << 0) |
2857                                 ((COMPARE_FLAG(1) & 1) << 1) |
2858                                 ((COMPARE_FLAG(2) & 1) << 2) |
2859                                 ((COMPARE_FLAG(3) & 1) << 3) |
2860                                 ((COMPARE_FLAG(4) & 1) << 4) |
2861                                 ((COMPARE_FLAG(5) & 1) << 5) |
2862                                 ((COMPARE_FLAG(6) & 1) << 6) |
2863                                 ((COMPARE_FLAG(7) & 1) << 7) |
2864                                 ((CLIP2_FLAG(0) & 1) << 8) |
2865                                 ((CLIP2_FLAG(1) & 1) << 9) |
2866                                 ((CLIP2_FLAG(2) & 1) << 10) |
2867                                 ((CLIP2_FLAG(3) & 1) << 11) |
2868                                 ((CLIP2_FLAG(4) & 1) << 12) |
2869                                 ((CLIP2_FLAG(5) & 1) << 13) |
2870                                 ((CLIP2_FLAG(6) & 1) << 14) |
2871                                 ((CLIP2_FLAG(7) & 1) << 15);
2872                           if (RTVAL & 0x8000) RTVAL |= 0xffff0000;
2873                           break;
2874                        case 2:
2875                           // Anciliary clipping flags
2876                           RTVAL = ((CARRY_FLAG(0) & 1) << 0) |
2877                                 ((CARRY_FLAG(1) & 1) << 1) |
2878                                 ((CARRY_FLAG(2) & 1) << 2) |
2879                                 ((CARRY_FLAG(3) & 1) << 3) |
2880                                 ((CARRY_FLAG(4) & 1) << 4) |
2881                                 ((CARRY_FLAG(5) & 1) << 5) |
2882                                 ((CARRY_FLAG(6) & 1) << 6) |
2883                                 ((CARRY_FLAG(7) & 1) << 7) |
2884                                 ((ZERO_FLAG(0) & 1) << 8) |
2885                                 ((ZERO_FLAG(1) & 1) << 9) |
2886                                 ((ZERO_FLAG(2) & 1) << 10) |
2887                                 ((ZERO_FLAG(3) & 1) << 11) |
2888                                 ((ZERO_FLAG(4) & 1) << 12) |
2889                                 ((ZERO_FLAG(5) & 1) << 13) |
2890                                 ((ZERO_FLAG(6) & 1) << 14) |
2891                                 ((ZERO_FLAG(7) & 1) << 15);
2892                           if (RTVAL & 0x8000) RTVAL |= 0xffff0000;
28082893                     }
2809                     else
2810                     {
2811                        // All other flags are 16 bits but sign-extended at retrieval
2812                        RTVAL = (UINT32)rsp->flag[RDREG] | ( ( rsp->flag[RDREG] & 0x8000 ) ? 0xffff0000 : 0 );
2813                     }
28142894                  }
28152895                  break;
28162896               }
r25438r25439
28352915                  // ------------------------------------------------
28362916                  //
28372917
2838                  rsp->flag[RDREG] = RTVAL & 0xffff;
2918                  switch(RDREG)
2919                  {
2920                     case 0:
2921                        CLEAR_CARRY_FLAGS();
2922                        CLEAR_ZERO_FLAGS();
2923                        if (RTVAL & (1 << 0))  { SET_CARRY_FLAG(0); }
2924                        if (RTVAL & (1 << 1))  { SET_CARRY_FLAG(1); }
2925                        if (RTVAL & (1 << 2))  { SET_CARRY_FLAG(2); }
2926                        if (RTVAL & (1 << 3))  { SET_CARRY_FLAG(3); }
2927                        if (RTVAL & (1 << 4))  { SET_CARRY_FLAG(4); }
2928                        if (RTVAL & (1 << 5))  { SET_CARRY_FLAG(5); }
2929                        if (RTVAL & (1 << 6))  { SET_CARRY_FLAG(6); }
2930                        if (RTVAL & (1 << 7))  { SET_CARRY_FLAG(7); }
2931                        if (RTVAL & (1 << 8))  { SET_ZERO_FLAG(0); }
2932                        if (RTVAL & (1 << 9))  { SET_ZERO_FLAG(1); }
2933                        if (RTVAL & (1 << 10)) { SET_ZERO_FLAG(2); }
2934                        if (RTVAL & (1 << 11)) { SET_ZERO_FLAG(3); }
2935                        if (RTVAL & (1 << 12)) { SET_ZERO_FLAG(4); }
2936                        if (RTVAL & (1 << 13)) { SET_ZERO_FLAG(5); }
2937                        if (RTVAL & (1 << 14)) { SET_ZERO_FLAG(6); }
2938                        if (RTVAL & (1 << 15)) { SET_ZERO_FLAG(7); }
2939                        break;
2940                     case 1:
2941                        CLEAR_COMPARE_FLAGS();
2942                        CLEAR_CLIP2_FLAGS();
2943                        if (RTVAL & (1 << 0)) { SET_COMPARE_FLAG(0); }
2944                        if (RTVAL & (1 << 1)) { SET_COMPARE_FLAG(1); }
2945                        if (RTVAL & (1 << 2)) { SET_COMPARE_FLAG(2); }
2946                        if (RTVAL & (1 << 3)) { SET_COMPARE_FLAG(3); }
2947                        if (RTVAL & (1 << 4)) { SET_COMPARE_FLAG(4); }
2948                        if (RTVAL & (1 << 5)) { SET_COMPARE_FLAG(5); }
2949                        if (RTVAL & (1 << 6)) { SET_COMPARE_FLAG(6); }
2950                        if (RTVAL & (1 << 7)) { SET_COMPARE_FLAG(7); }
2951                        if (RTVAL & (1 << 8))  { SET_CLIP2_FLAG(0); }
2952                        if (RTVAL & (1 << 9))  { SET_CLIP2_FLAG(1); }
2953                        if (RTVAL & (1 << 10)) { SET_CLIP2_FLAG(2); }
2954                        if (RTVAL & (1 << 11)) { SET_CLIP2_FLAG(3); }
2955                        if (RTVAL & (1 << 12)) { SET_CLIP2_FLAG(4); }
2956                        if (RTVAL & (1 << 13)) { SET_CLIP2_FLAG(5); }
2957                        if (RTVAL & (1 << 14)) { SET_CLIP2_FLAG(6); }
2958                        if (RTVAL & (1 << 15)) { SET_CLIP2_FLAG(7); }
2959                        break;
2960                     case 2:
2961                        CLEAR_CLIP1_FLAGS();
2962                        if (RTVAL & (1 << 0)) { SET_CLIP1_FLAG(0); }
2963                        if (RTVAL & (1 << 1)) { SET_CLIP1_FLAG(1); }
2964                        if (RTVAL & (1 << 2)) { SET_CLIP1_FLAG(2); }
2965                        if (RTVAL & (1 << 3)) { SET_CLIP1_FLAG(3); }
2966                        if (RTVAL & (1 << 4)) { SET_CLIP1_FLAG(4); }
2967                        if (RTVAL & (1 << 5)) { SET_CLIP1_FLAG(5); }
2968                        if (RTVAL & (1 << 6)) { SET_CLIP1_FLAG(6); }
2969                        if (RTVAL & (1 << 7)) { SET_CLIP1_FLAG(7); }
2970                        break;
2971                  }
28392972                  break;
28402973               }
28412974
trunk/src/emu/cpu/rsp/rsp.h
r25438r25439
167167   UINT32 pc;
168168   UINT32 r[35];
169169   VECTOR_REG v[32];
170   UINT16 vflag[6][8];
170171#if USE_SIMD
171172   // Mirror of v[] for now, to be used in parallel as
172173   // more vector ops are transitioned over
173174   __m128i xv[32];
175   __m128i xvflag[6];
174176#endif
175   UINT16 flag[4];
176177   UINT32 sr;
177178   UINT32 step_count;
178179
trunk/src/emu/cpu/rsp/rspdrc.c
r25438r25439
44
55    Universal machine language-based Nintendo/SGI RSP emulator.
66    Written by Harmony of the MESS team.
7    SIMD versions of vector multiplication opcodes provided by Marathon Man
8      of the CEN64 team.
79
810    Copyright the MESS team.
911    Released for general non-commercial use under the MAME license
r25438r25439
8183/* fast RAM info */
8284struct fast_ram_info
8385{
84   offs_t              start;                      /* start of the RAM block */
85   offs_t              end;                        /* end of the RAM block */
86   UINT8               readonly;                   /* TRUE if read-only */
87   void *              base;                       /* base in memory where the RAM lives */
86    offs_t              start;                      /* start of the RAM block */
87    offs_t              end;                        /* end of the RAM block */
88    UINT8               readonly;                   /* TRUE if read-only */
89    void *              base;                       /* base in memory where the RAM lives */
8890};
8991
9092
9193/* internal compiler state */
9294struct compiler_state
9395{
94   UINT32              cycles;                     /* accumulated cycles */
95   UINT8               checkints;                  /* need to check interrupts before next instruction */
96   UINT8               checksoftints;              /* need to check software interrupts before next instruction */
97   code_label  labelnum;                   /* index for local labels */
96    UINT32              cycles;                     /* accumulated cycles */
97    UINT8               checkints;                  /* need to check interrupts before next instruction */
98    UINT8               checksoftints;              /* need to check software interrupts before next instruction */
99    code_label  labelnum;                   /* index for local labels */
98100};
99101
100102struct rspimp_state
101103{
102   /* core state */
103   drc_cache *         cache;                      /* pointer to the DRC code cache */
104   drcuml_state *      drcuml;                     /* DRC UML generator state */
105   rsp_frontend *      drcfe;                      /* pointer to the DRC front-end state */
106   UINT32              drcoptions;                 /* configurable DRC options */
104    /* core state */
105    drc_cache *         cache;                      /* pointer to the DRC code cache */
106    drcuml_state *      drcuml;                     /* DRC UML generator state */
107    rsp_frontend *      drcfe;                      /* pointer to the DRC front-end state */
108    UINT32              drcoptions;                 /* configurable DRC options */
107109
108   /* internal stuff */
109   UINT8               cache_dirty;                /* true if we need to flush the cache */
110   UINT32              jmpdest;                    /* destination jump target */
110    /* internal stuff */
111    UINT8               cache_dirty;                /* true if we need to flush the cache */
112    UINT32              jmpdest;                    /* destination jump target */
111113
112   /* parameters for subroutines */
113   UINT64              numcycles;                  /* return value from gettotalcycles */
114   const char *        format;                     /* format string for print_debug */
115   UINT32              arg0;                       /* print_debug argument 1 */
116   UINT32              arg1;                       /* print_debug argument 2 */
117   UINT32              arg2;                       /* print_debug argument 3 */
118   UINT32              arg3;                       /* print_debug argument 4 */
119   UINT32              vres[8];                    /* used for temporary vector results */
114    /* parameters for subroutines */
115    UINT64              numcycles;                  /* return value from gettotalcycles */
116    const char *        format;                     /* format string for print_debug */
117    UINT32              arg0;                       /* print_debug argument 1 */
118    UINT32              arg1;                       /* print_debug argument 2 */
119    UINT32              arg2;                       /* print_debug argument 3 */
120    UINT32              arg3;                       /* print_debug argument 4 */
121    UINT32              vres[8];                    /* used for temporary vector results */
120122
121   /* register mappings */
122   parameter   regmap[34];                 /* parameter to register mappings for all 32 integer registers */
123    /* register mappings */
124    parameter   regmap[34];                 /* parameter to register mappings for all 32 integer registers */
123125
124   /* subroutines */
125   code_handle *   entry;                      /* entry point */
126   code_handle *   nocode;                     /* nocode exception handler */
127   code_handle *   out_of_cycles;              /* out of cycles exception handler */
128   code_handle *   read8;                      /* read byte */
129   code_handle *   write8;                     /* write byte */
130   code_handle *   read16;                     /* read half */
131   code_handle *   write16;                    /* write half */
132   code_handle *   read32;                     /* read word */
133   code_handle *   write32;                    /* write word */
126    /* subroutines */
127    code_handle *   entry;                      /* entry point */
128    code_handle *   nocode;                     /* nocode exception handler */
129    code_handle *   out_of_cycles;              /* out of cycles exception handler */
130    code_handle *   read8;                      /* read byte */
131    code_handle *   write8;                     /* write byte */
132    code_handle *   read16;                     /* read half */
133    code_handle *   write16;                    /* write half */
134    code_handle *   read32;                     /* read word */
135    code_handle *   write32;                    /* write word */
134136};
135137
136138/***************************************************************************
r25438r25439
206208#define EL                          ((op >> 21) & 0xf)
207209
208210#define SIMD_EXTRACT16(reg, value, element) \
209   if (element < 0) printf("extract element <0 %d\n", element); \
210   switch((element) & 7) \
211   { \
212      case 0: value = _mm_extract_epi16(reg, 0); break; \
213      case 1: value = _mm_extract_epi16(reg, 1); break; \
214      case 2: value = _mm_extract_epi16(reg, 2); break; \
215      case 3: value = _mm_extract_epi16(reg, 3); break; \
216      case 4: value = _mm_extract_epi16(reg, 4); break; \
217      case 5: value = _mm_extract_epi16(reg, 5); break; \
218      case 6: value = _mm_extract_epi16(reg, 6); break; \
219      case 7: value = _mm_extract_epi16(reg, 7); break; \
220   }
211    switch((element) & 7) \
212    { \
213        case 0: value = _mm_extract_epi16(reg, 0); break; \
214        case 1: value = _mm_extract_epi16(reg, 1); break; \
215        case 2: value = _mm_extract_epi16(reg, 2); break; \
216        case 3: value = _mm_extract_epi16(reg, 3); break; \
217        case 4: value = _mm_extract_epi16(reg, 4); break; \
218        case 5: value = _mm_extract_epi16(reg, 5); break; \
219        case 6: value = _mm_extract_epi16(reg, 6); break; \
220        case 7: value = _mm_extract_epi16(reg, 7); break; \
221    }
221222
222223
223224#define SIMD_INSERT16(reg, value, element) \
224   if (element < 0) printf("insert element <0 %d\n", element); \
225   switch((element) & 7) \
226   { \
227      case 0: reg = _mm_insert_epi16(reg, value, 0); break; \
228      case 1: reg = _mm_insert_epi16(reg, value, 1); break; \
229      case 2: reg = _mm_insert_epi16(reg, value, 2); break; \
230      case 3: reg = _mm_insert_epi16(reg, value, 3); break; \
231      case 4: reg = _mm_insert_epi16(reg, value, 4); break; \
232      case 5: reg = _mm_insert_epi16(reg, value, 5); break; \
233      case 6: reg = _mm_insert_epi16(reg, value, 6); break; \
234      case 7: reg = _mm_insert_epi16(reg, value, 7); break; \
235   }
225    switch((element) & 7) \
226    { \
227        case 0: reg = _mm_insert_epi16(reg, value, 0); break; \
228        case 1: reg = _mm_insert_epi16(reg, value, 1); break; \
229        case 2: reg = _mm_insert_epi16(reg, value, 2); break; \
230        case 3: reg = _mm_insert_epi16(reg, value, 3); break; \
231        case 4: reg = _mm_insert_epi16(reg, value, 4); break; \
232        case 5: reg = _mm_insert_epi16(reg, value, 5); break; \
233        case 6: reg = _mm_insert_epi16(reg, value, 6); break; \
234        case 7: reg = _mm_insert_epi16(reg, value, 7); break; \
235    }
236236
237237
238#define SIMD_EXTRACT16C(reg, value, element) value = _mm_extract_epi16(reg, element);
239#define SIMD_INSERT16C(reg, value, element) reg = _mm_insert_epi16(reg, value, element);
240
238241#define VREG_B(reg, offset)         rsp->v[(reg)].b[(offset)^1]
239242#define W_VREG_S(reg, offset)       rsp->v[(reg)].s[(offset)]
240243#define VREG_S(reg, offset)         (INT16)rsp->v[(reg)].s[(offset)]
r25438r25439
242245#define VEC_EL_2(x,z)               (vector_elements_2[(x)][(z)])
243246
244247#define ACCUM(x)        rsp->accum[x].q
245#define ACCUM_H(x)      rsp->accum[((x))].w[3]
246#define ACCUM_M(x)      rsp->accum[((x))].w[2]
247#define ACCUM_L(x)      rsp->accum[((x))].w[1]
248#if USE_SIMD
249INLINE UINT16 ACCUM_H(const rsp_state *rsp, int x)
250{
251   UINT16 out;
252   SIMD_EXTRACT16(rsp->accum_h, out, x);
253   return out;
254}
248255
249#define CARRY_FLAG(x)               ((rsp->flag[0] & (1 << (x))) ? 1 : 0)
250#define CLEAR_CARRY_FLAGS()         { rsp->flag[0] &= ~0xff; }
251#define SET_CARRY_FLAG(x)           { rsp->flag[0] |= (1 << (x)); }
252#define CLEAR_CARRY_FLAG(x)         { rsp->flag[0] &= ~(1 << (x)); }
256INLINE UINT16 ACCUM_M(const rsp_state *rsp, int x)
257{
258   UINT16 out;
259   SIMD_EXTRACT16(rsp->accum_m, out, x);
260   return out;
261}
253262
254#define COMPARE_FLAG(x)             ((rsp->flag[1] >> (x)) & 1)
255#define CLEAR_COMPARE_FLAGS()       { rsp->flag[1] &= ~0xff; }
256#define SET_COMPARE_FLAG(x)         { rsp->flag[1] |= (1 << (x)); }
257#define CLEAR_COMPARE_FLAG(x)       { rsp->flag[1] &= ~(1 << (x)); }
263INLINE UINT16 ACCUM_L(const rsp_state *rsp, int x)
264{
265   UINT16 out;
266   SIMD_EXTRACT16(rsp->accum_l, out, x);
267   return out;
268}
258269
259#define ZERO_FLAG(x)                ((rsp->flag[0] & (0x100 << (x))) ? 1 : 0)
260#define CLEAR_ZERO_FLAGS()          { rsp->flag[0] &= ~0xff00; }
261#define SET_ZERO_FLAG(x)            { rsp->flag[0] |= (0x100 << (x)); }
262#define CLEAR_ZERO_FLAG(x)          { rsp->flag[0] &= ~(0x100 << (x)); }
270#define SET_ACCUM_H(v, x)       SIMD_INSERT16(rsp->accum_h, v, x);
271#define SET_ACCUM_M(v, x)       SIMD_INSERT16(rsp->accum_m, v, x);
272#define SET_ACCUM_L(v, x)       SIMD_INSERT16(rsp->accum_l, v, x);
263273
274#define SCALAR_GET_VS1(out, i)   SIMD_EXTRACT16(rsp->xv[VS1REG], out, i)
275#define SCALAR_GET_VS2(out, i)   SIMD_EXTRACT16(rsp->xv[VS2REG], out, VEC_EL_2(EL, i))
276
277#else
278
279#define ACCUM_H(v, x)         rsp->accum[((x))].w[3]
280#define ACCUM_M(v, x)         rsp->accum[((x))].w[2]
281#define ACCUM_L(v, x)         rsp->accum[((x))].w[1]
282
283#define SET_ACCUM_H(v, x)      ACCUM_H(-1, x) = v;
284#define SET_ACCUM_M(v, x)      ACCUM_M(-1, x) = v;
285#define SET_ACCUM_L(v, x)      ACCUM_L(-1, x) = v;
286
287#define SCALAR_GET_VS1(out, i)   out = VREG_S(VS1REG, i)
288#define SCALAR_GET_VS2(out, i)   out = VREG_S(VS2REG, VEC_EL_2(EL, i))
289
290#endif // USE_SIMD
291
292#define CARRY      0
293#define COMPARE      1
294#define CLIP1      2
295#define ZERO      3
296#define CLIP2      4
297
298#if USE_SIMD
299INLINE UINT16 CARRY_FLAG(rsp_state *rsp, const int x)
300{
301   UINT16 out;
302   SIMD_EXTRACT16(rsp->xvflag[CARRY], out, x);
303   return out;
304}
305
306INLINE UINT16 COMPARE_FLAG(rsp_state *rsp, const int x)
307{
308   UINT16 out;
309   SIMD_EXTRACT16(rsp->xvflag[COMPARE], out, x);
310   return out;
311}
312
313INLINE UINT16 CLIP1_FLAG(rsp_state *rsp, const int x)
314{
315   UINT16 out;
316   SIMD_EXTRACT16(rsp->xvflag[CLIP1], out, x);
317   return out;
318}
319
320INLINE UINT16 ZERO_FLAG(rsp_state *rsp, const int x)
321{
322   UINT16 out;
323   SIMD_EXTRACT16(rsp->xvflag[ZERO], out, x);
324   return out;
325}
326
327INLINE UINT16 CLIP2_FLAG(rsp_state *rsp, const int x)
328{
329   UINT16 out;
330   SIMD_EXTRACT16(rsp->xvflag[CLIP2], out, x);
331   return out;
332}
333
334#define CLEAR_CARRY_FLAGS()      { rsp->xvflag[CARRY] = _mm_setzero_si128(); }
335#define CLEAR_COMPARE_FLAGS()   { rsp->xvflag[COMPARE] = _mm_setzero_si128(); }
336#define CLEAR_CLIP1_FLAGS()      { rsp->xvflag[CLIP1] = _mm_setzero_si128(); }
337#define CLEAR_ZERO_FLAGS()      { rsp->xvflag[ZERO] = _mm_setzero_si128(); }
338#define CLEAR_CLIP2_FLAGS()      { rsp->xvflag[CLIP2] = _mm_setzero_si128(); }
339
340#define SET_CARRY_FLAG(x)      { SIMD_INSERT16(rsp->xvflag[CARRY], 0xffff, x); }
341#define SET_COMPARE_FLAG(x)      { SIMD_INSERT16(rsp->xvflag[COMPARE], 0xffff, x); }
342#define SET_CLIP1_FLAG(x)      { SIMD_INSERT16(rsp->xvflag[CLIP1], 0xffff, x); }
343#define SET_ZERO_FLAG(x)      { SIMD_INSERT16(rsp->xvflag[ZERO], 0xffff, x); }
344#define SET_CLIP2_FLAG(x)      { SIMD_INSERT16(rsp->xvflag[CLIP2], 0xffff, x); }
345
346#define CLEAR_CARRY_FLAG(x)      { SIMD_INSERT16(rsp->xvflag[CARRY], 0, x); }
347#define CLEAR_COMPARE_FLAG(x)   { SIMD_INSERT16(rsp->xvflag[COMPARE], 0, x); }
348#define CLEAR_CLIP1_FLAG(x)      { SIMD_INSERT16(rsp->xvflag[CLIP1], 0, x); }
349#define CLEAR_ZERO_FLAG(x)      { SIMD_INSERT16(rsp->xvflag[ZERO], 0, x); }
350#define CLEAR_CLIP2_FLAG(x)      { SIMD_INSERT16(rsp->xvflag[CLIP2], 0, x); }
351
352#else
353#define CARRY_FLAG(rsp, x)          (rsp->vflag[CARRY][x & 7] != 0 ? 0xffff : 0)
354#define COMPARE_FLAG(rsp, x)        (rsp->vflag[COMPARE][x & 7] != 0 ? 0xffff : 0)
355#define CLIP1_FLAG(rsp, x)          (rsp->vflag[CLIP1][x & 7] != 0 ? 0xffff : 0)
356#define ZERO_FLAG(rsp, x)           (rsp->vflag[ZERO][x & 7] != 0 ? 0xffff : 0)
357#define CLIP2_FLAG(rsp, x)          (rsp->vflag[CLIP2][x & 7] != 0 ? 0xffff : 0)
358
359#define CLEAR_CARRY_FLAGS()         { memset(rsp->vflag[0], 0, 16); }
360#define CLEAR_COMPARE_FLAGS()       { memset(rsp->vflag[1], 0, 16); }
361#define CLEAR_CLIP1_FLAGS()          { memset(rsp->vflag[2], 0, 16); }
362#define CLEAR_ZERO_FLAGS()          { memset(rsp->vflag[3], 0, 16); }
363#define CLEAR_CLIP2_FLAGS()          { memset(rsp->vflag[4], 0, 16); }
364
365#define SET_CARRY_FLAG(x)           { rsp->vflag[0][x & 7] = 0xffff; }
366#define SET_COMPARE_FLAG(x)         { rsp->vflag[1][x & 7] = 0xffff; }
367#define SET_CLIP1_FLAG(x)            { rsp->vflag[2][x & 7] = 0xffff; }
368#define SET_ZERO_FLAG(x)            { rsp->vflag[3][x & 7] = 0xffff; }
369#define SET_CLIP2_FLAG(x)            { rsp->vflag[4][x & 7] = 0xffff; }
370
371#define CLEAR_CARRY_FLAG(x)         { rsp->vflag[0][x & 7] = 0; }
372#define CLEAR_COMPARE_FLAG(x)       { rsp->vflag[1][x & 7] = 0; }
373#define CLEAR_CLIP1_FLAG(x)          { rsp->vflag[2][x & 7] = 0; }
374#define CLEAR_ZERO_FLAG(x)          { rsp->vflag[3][x & 7] = 0; }
375#define CLEAR_CLIP2_FLAG(x)          { rsp->vflag[4][x & 7] = 0; }
376#endif
377
264378INLINE rsp_state *get_safe_token(device_t *device)
265379{
266   assert(device != NULL);
267   assert(device->type() == RSP_DRC);
268   return *(rsp_state **)downcast<legacy_cpu_device *>(device)->token();
380    assert(device != NULL);
381    assert(device->type() == RSP_DRC);
382    return *(rsp_state **)downcast<legacy_cpu_device *>(device)->token();
269383}
270384
271385/***************************************************************************
r25438r25439
279393
280394INLINE UINT32 epc(const opcode_desc *desc)
281395{
282   return ((desc->flags & OPFLAG_IN_DELAY_SLOT) ? (desc->pc - 3) : desc->pc) | 0x1000;
396    return ((desc->flags & OPFLAG_IN_DELAY_SLOT) ? (desc->pc - 3) : desc->pc) | 0x1000;
283397}
284398
285399
r25438r25439
290404
291405INLINE void alloc_handle(drcuml_state *drcuml, code_handle **handleptr, const char *name)
292406{
293   if (*handleptr == NULL)
294      *handleptr = drcuml->handle_alloc(name);
407    if (*handleptr == NULL)
408        *handleptr = drcuml->handle_alloc(name);
295409}
296410
297411
r25438r25439
302416
303417INLINE void load_fast_iregs(rsp_state *rsp, drcuml_block *block)
304418{
305   int regnum;
419    int regnum;
306420
307   for (regnum = 0; regnum < ARRAY_LENGTH(rsp->impstate->regmap); regnum++)
308      if (rsp->impstate->regmap[regnum].is_int_register())
309         UML_MOV(block, ireg(rsp->impstate->regmap[regnum].ireg() - REG_I0), mem(&rsp->r[regnum]));
421    for (regnum = 0; regnum < ARRAY_LENGTH(rsp->impstate->regmap); regnum++)
422        if (rsp->impstate->regmap[regnum].is_int_register())
423            UML_MOV(block, ireg(rsp->impstate->regmap[regnum].ireg() - REG_I0), mem(&rsp->r[regnum]));
310424}
311425
312426
r25438r25439
317431
318432INLINE void save_fast_iregs(rsp_state *rsp, drcuml_block *block)
319433{
320   int regnum;
434    int regnum;
321435
322   for (regnum = 0; regnum < ARRAY_LENGTH(rsp->impstate->regmap); regnum++)
323      if (rsp->impstate->regmap[regnum].is_int_register())
324         UML_MOV(block, mem(&rsp->r[regnum]), ireg(rsp->impstate->regmap[regnum].ireg() - REG_I0));
436    for (regnum = 0; regnum < ARRAY_LENGTH(rsp->impstate->regmap); regnum++)
437        if (rsp->impstate->regmap[regnum].is_int_register())
438            UML_MOV(block, mem(&rsp->r[regnum]), ireg(rsp->impstate->regmap[regnum].ireg() - REG_I0));
325439}
326440
327441/***************************************************************************
r25438r25439
330444
331445void rspdrc_add_imem(device_t *device, UINT32 *base)
332446{
333   if (!device->machine().options().drc()) return;
334   rsp_state *rsp = get_safe_token(device);
335   rsp->imem32 = base;
336   rsp->imem16 = (UINT16*)base;
337   rsp->imem8 = (UINT8*)base;
447    if (!device->machine().options().drc()) return;
448    rsp_state *rsp = get_safe_token(device);
449    rsp->imem32 = base;
450    rsp->imem16 = (UINT16*)base;
451    rsp->imem8 = (UINT8*)base;
338452}
339453
340454void rspdrc_add_dmem(device_t *device, UINT32 *base)
341455{
342   if (!device->machine().options().drc()) return;
343   rsp_state *rsp = get_safe_token(device);
344   rsp->dmem32 = base;
345   rsp->dmem16 = (UINT16*)base;
346   rsp->dmem8 = (UINT8*)base;
456    if (!device->machine().options().drc()) return;
457    rsp_state *rsp = get_safe_token(device);
458    rsp->dmem32 = base;
459    rsp->dmem16 = (UINT16*)base;
460    rsp->dmem8 = (UINT8*)base;
347461}
348462
349463INLINE UINT8 READ8(rsp_state *rsp, UINT32 address)
350464{
351   UINT8 ret = rsp->dmem8[BYTE4_XOR_BE(address & 0xfff)];
352   //printf("%04xr%02x\n",address, ret);
353   return ret;
465    UINT8 ret = rsp->dmem8[BYTE4_XOR_BE(address & 0xfff)];
466    return ret;
354467}
355468
356469static void cfunc_read8(void *param)
357470{
358   rsp_state *rsp = (rsp_state *)param;
359   rsp->impstate->arg0 = READ8(rsp, rsp->impstate->arg0);
471    rsp_state *rsp = (rsp_state *)param;
472    rsp->impstate->arg0 = READ8(rsp, rsp->impstate->arg0);
360473}
361474
362475INLINE UINT16 READ16(rsp_state *rsp, UINT32 address)
363476{
364   UINT16 ret;
365   address &= 0xfff;
366   ret = rsp->dmem8[BYTE4_XOR_BE(address)] << 8;
367   ret |= rsp->dmem8[BYTE4_XOR_BE(address + 1)];
368   //printf("%04xr%04x\n",address, ret);
369   return ret;
477    UINT16 ret;
478    address &= 0xfff;
479    ret = rsp->dmem8[BYTE4_XOR_BE(address)] << 8;
480    ret |= rsp->dmem8[BYTE4_XOR_BE(address + 1)];
481    return ret;
370482}
371483
372484static void cfunc_read16(void *param)
373485{
374   rsp_state *rsp = (rsp_state *)param;
375   rsp->impstate->arg0 = READ16(rsp, rsp->impstate->arg0);
486    rsp_state *rsp = (rsp_state *)param;
487    rsp->impstate->arg0 = READ16(rsp, rsp->impstate->arg0);
376488}
377489
378490INLINE UINT32 READ32(rsp_state *rsp, UINT32 address)
379491{
380   UINT32 ret;
381   address &= 0xfff;
382   ret = rsp->dmem8[BYTE4_XOR_BE(address)] << 24;
383   ret |= rsp->dmem8[BYTE4_XOR_BE(address + 1)] << 16;
384   ret |= rsp->dmem8[BYTE4_XOR_BE(address + 2)] << 8;
385   ret |= rsp->dmem8[BYTE4_XOR_BE(address + 3)];
386   //printf("%04xr%08x\n",address, ret);
387   return ret;
492    UINT32 ret;
493    address &= 0xfff;
494    ret = rsp->dmem8[BYTE4_XOR_BE(address)] << 24;
495    ret |= rsp->dmem8[BYTE4_XOR_BE(address + 1)] << 16;
496    ret |= rsp->dmem8[BYTE4_XOR_BE(address + 2)] << 8;
497    ret |= rsp->dmem8[BYTE4_XOR_BE(address + 3)];
498    return ret;
388499}
389500
390501static void cfunc_read32(void *param)
391502{
392   rsp_state *rsp = (rsp_state *)param;
393   rsp->impstate->arg0 = READ32(rsp, rsp->impstate->arg0);
503    rsp_state *rsp = (rsp_state *)param;
504    rsp->impstate->arg0 = READ32(rsp, rsp->impstate->arg0);
394505}
395506
396507INLINE void WRITE8(rsp_state *rsp, UINT32 address, UINT8 data)
397508{
398   address &= 0xfff;
399   rsp->dmem8[BYTE4_XOR_BE(address)] = data;
400   //printf("%04x:%02x\n",address, data);
509    address &= 0xfff;
510    rsp->dmem8[BYTE4_XOR_BE(address)] = data;
401511}
402512
403513static void cfunc_write8(void *param)
404514{
405   rsp_state *rsp = (rsp_state *)param;
406   WRITE8(rsp, rsp->impstate->arg0, (UINT8)rsp->impstate->arg1);
515    rsp_state *rsp = (rsp_state *)param;
516    WRITE8(rsp, rsp->impstate->arg0, (UINT8)rsp->impstate->arg1);
407517}
408518
409519INLINE void WRITE16(rsp_state *rsp, UINT32 address, UINT16 data)
410520{
411   address &= 0xfff;
412   rsp->dmem8[BYTE4_XOR_BE(address)] = data >> 8;
413   rsp->dmem8[BYTE4_XOR_BE(address + 1)] = data & 0xff;
414   //printf("%04x:%04x\n",address, data);
521    address &= 0xfff;
522    rsp->dmem8[BYTE4_XOR_BE(address)] = data >> 8;
523    rsp->dmem8[BYTE4_XOR_BE(address + 1)] = data & 0xff;
415524}
416525
417526static void cfunc_write16(void *param)
418527{
419   rsp_state *rsp = (rsp_state *)param;
420   WRITE16(rsp, rsp->impstate->arg0, (UINT16)rsp->impstate->arg1);
528    rsp_state *rsp = (rsp_state *)param;
529    WRITE16(rsp, rsp->impstate->arg0, (UINT16)rsp->impstate->arg1);
421530}
422531
423532INLINE void WRITE32(rsp_state *rsp, UINT32 address, UINT32 data)
424533{
425   address &= 0xfff;
426   rsp->dmem8[BYTE4_XOR_BE(address)] = data >> 24;
427   rsp->dmem8[BYTE4_XOR_BE(address + 1)] = (data >> 16) & 0xff;
428   rsp->dmem8[BYTE4_XOR_BE(address + 2)] = (data >> 8) & 0xff;
429   rsp->dmem8[BYTE4_XOR_BE(address + 3)] = data & 0xff;
430   //printf("%04x:%08x\n",address, data);
534    address &= 0xfff;
535    rsp->dmem8[BYTE4_XOR_BE(address)] = data >> 24;
536    rsp->dmem8[BYTE4_XOR_BE(address + 1)] = (data >> 16) & 0xff;
537    rsp->dmem8[BYTE4_XOR_BE(address + 2)] = (data >> 8) & 0xff;
538    rsp->dmem8[BYTE4_XOR_BE(address + 3)] = data & 0xff;
431539}
432540
433541static void cfunc_write32(void *param)
434542{
435   rsp_state *rsp = (rsp_state *)param;
436   WRITE32(rsp, rsp->impstate->arg0, rsp->impstate->arg1);
543    rsp_state *rsp = (rsp_state *)param;
544    WRITE32(rsp, rsp->impstate->arg0, rsp->impstate->arg1);
437545}
438546
439547/*****************************************************************************/
r25438r25439
444552
445553void rspdrc_set_options(device_t *device, UINT32 options)
446554{
447   if (!device->machine().options().drc()) return;
448   rsp_state *rsp = get_safe_token(device);
449   rsp->impstate->drcoptions = options;
555    if (!device->machine().options().drc()) return;
556    rsp_state *rsp = get_safe_token(device);
557    rsp->impstate->drcoptions = options;
450558}
451559
452560
r25438r25439
458566#ifdef UNUSED_CODE
459567static void cfunc_printf_debug(void *param)
460568{
461   rsp_state *rsp = (rsp_state *)param;
462   switch(rsp->impstate->arg2)
463   {
464      case 0: // WRITE8
465         printf("%04x:%02x\n", rsp->impstate->arg0 & 0xffff, (UINT8)rsp->impstate->arg1);
466         break;
467      case 1: // WRITE16
468         printf("%04x:%04x\n", rsp->impstate->arg0 & 0xffff, (UINT16)rsp->impstate->arg1);
469         break;
470      case 2: // WRITE32
471         printf("%04x:%08x\n", rsp->impstate->arg0 & 0xffff, rsp->impstate->arg1);
472         break;
473      case 3: // READ8
474         printf("%04xr%02x\n", rsp->impstate->arg0 & 0xffff, (UINT8)rsp->impstate->arg1);
475         break;
476      case 4: // READ16
477         printf("%04xr%04x\n", rsp->impstate->arg0 & 0xffff, (UINT16)rsp->impstate->arg1);
478         break;
479      case 5: // READ32
480         printf("%04xr%08x\n", rsp->impstate->arg0 & 0xffff, rsp->impstate->arg1);
481         break;
482      case 6: // Checksum
483         printf("Sum: %08x\n", rsp->impstate->arg0);
484         break;
485      case 7: // Checksum
486         printf("Correct Sum: %08x\n", rsp->impstate->arg0);
487         break;
488      default: // ???
489         printf("%08x %08x\n", rsp->impstate->arg0 & 0xffff, rsp->impstate->arg1);
490         break;
491   }
569    rsp_state *rsp = (rsp_state *)param;
570    switch(rsp->impstate->arg2)
571    {
572        case 0: // WRITE8
573            printf("%04x:%02x\n", rsp->impstate->arg0 & 0xffff, (UINT8)rsp->impstate->arg1);
574            break;
575        case 1: // WRITE16
576            printf("%04x:%04x\n", rsp->impstate->arg0 & 0xffff, (UINT16)rsp->impstate->arg1);
577            break;
578        case 2: // WRITE32
579            printf("%04x:%08x\n", rsp->impstate->arg0 & 0xffff, rsp->impstate->arg1);
580            break;
581        case 3: // READ8
582            printf("%04xr%02x\n", rsp->impstate->arg0 & 0xffff, (UINT8)rsp->impstate->arg1);
583            break;
584        case 4: // READ16
585            printf("%04xr%04x\n", rsp->impstate->arg0 & 0xffff, (UINT16)rsp->impstate->arg1);
586            break;
587        case 5: // READ32
588            printf("%04xr%08x\n", rsp->impstate->arg0 & 0xffff, rsp->impstate->arg1);
589            break;
590        case 6: // Checksum
591            printf("Sum: %08x\n", rsp->impstate->arg0);
592            break;
593        case 7: // Checksum
594            printf("Correct Sum: %08x\n", rsp->impstate->arg0);
595            break;
596        default: // ???
597            printf("%08x %08x\n", rsp->impstate->arg0 & 0xffff, rsp->impstate->arg1);
598            break;
599    }
492600}
493601#endif
494602
495603static void cfunc_get_cop0_reg(void *param)
496604{
497   rsp_state *rsp = (rsp_state*)param;
498   int reg = rsp->impstate->arg0;
499   int dest = rsp->impstate->arg1;
605    rsp_state *rsp = (rsp_state*)param;
606    int reg = rsp->impstate->arg0;
607    int dest = rsp->impstate->arg1;
500608
501   if (reg >= 0 && reg < 8)
502   {
503      if(dest)
504      {
505         rsp->r[dest] = (rsp->sp_reg_r_func)(reg, 0x00000000);
506      }
507   }
508   else if (reg >= 8 && reg < 16)
509   {
510      if(dest)
511      {
512         rsp->r[dest] = (rsp->dp_reg_r_func)(reg - 8, 0x00000000);
513      }
514   }
515   else
516   {
517      fatalerror("RSP: cfunc_get_cop0_reg: %d\n", reg);
518   }
609    if (reg >= 0 && reg < 8)
610    {
611        if(dest)
612        {
613            rsp->r[dest] = (rsp->sp_reg_r_func)(reg, 0x00000000);
614        }
615    }
616    else if (reg >= 8 && reg < 16)
617    {
618        if(dest)
619        {
620            rsp->r[dest] = (rsp->dp_reg_r_func)(reg - 8, 0x00000000);
621        }
622    }
623    else
624    {
625        fatalerror("RSP: cfunc_get_cop0_reg: %d\n", reg);
626    }
519627}
520628
521629static void cfunc_set_cop0_reg(void *param)
522630{
523   rsp_state *rsp = (rsp_state*)param;
524   int reg = rsp->impstate->arg0;
525   UINT32 data = rsp->impstate->arg1;
631    rsp_state *rsp = (rsp_state*)param;
632    int reg = rsp->impstate->arg0;
633    UINT32 data = rsp->impstate->arg1;
526634
527   if (reg >= 0 && reg < 8)
528   {
529      (rsp->sp_reg_w_func)(reg, data, 0x00000000);
530   }
531   else if (reg >= 8 && reg < 16)
532   {
533      (rsp->dp_reg_w_func)(reg - 8, data, 0x00000000);
534   }
535   else
536   {
537      fatalerror("RSP: set_cop0_reg: %d, %08X\n", reg, data);
538   }
635    if (reg >= 0 && reg < 8)
636    {
637        (rsp->sp_reg_w_func)(reg, data, 0x00000000);
638    }
639    else if (reg >= 8 && reg < 16)
640    {
641        (rsp->dp_reg_w_func)(reg - 8, data, 0x00000000);
642    }
643    else
644    {
645        fatalerror("RSP: set_cop0_reg: %d, %08X\n", reg, data);
646    }
539647}
540648
541649static void cfunc_unimplemented_opcode(void *param)
542650{
543   rsp_state *rsp = (rsp_state*)param;
544   int op = rsp->impstate->arg0;
545   if ((rsp->device->machine().debug_flags & DEBUG_FLAG_ENABLED) != 0)
546   {
547      char string[200];
548      rsp_dasm_one(string, rsp->ppc, op);
549      mame_printf_debug("%08X: %s\n", rsp->ppc, string);
550   }
651    rsp_state *rsp = (rsp_state*)param;
652    int op = rsp->impstate->arg0;
653    if ((rsp->device->machine().debug_flags & DEBUG_FLAG_ENABLED) != 0)
654    {
655        char string[200];
656        rsp_dasm_one(string, rsp->ppc, op);
657        mame_printf_debug("%08X: %s\n", rsp->ppc, string);
658    }
551659
552   fatalerror("RSP: unknown opcode %02X (%08X) at %08X\n", op >> 26, op, rsp->ppc);
660    fatalerror("RSP: unknown opcode %02X (%08X) at %08X\n", op >> 26, op, rsp->ppc);
553661}
554662
555663static void unimplemented_opcode(rsp_state *rsp, UINT32 op)
556664{
557   if ((rsp->device->machine().debug_flags & DEBUG_FLAG_ENABLED) != 0)
558   {
559      char string[200];
560      rsp_dasm_one(string, rsp->ppc, op);
561      mame_printf_debug("%08X: %s\n", rsp->ppc, string);
562   }
665    if ((rsp->device->machine().debug_flags & DEBUG_FLAG_ENABLED) != 0)
666    {
667        char string[200];
668        rsp_dasm_one(string, rsp->ppc, op);
669        mame_printf_debug("%08X: %s\n", rsp->ppc, string);
670    }
563671
564   fatalerror("RSP: unknown opcode %02X (%08X) at %08X\n", op >> 26, op, rsp->ppc);
672    fatalerror("RSP: unknown opcode %02X (%08X) at %08X\n", op >> 26, op, rsp->ppc);
565673}
566674
567675/*****************************************************************************/
r25438r25439
569677/* Legacy.  Going forward, this will be transitioned into unrolled opcode decodes. */
570678static const int vector_elements_2[16][8] =
571679{
572   { 0, 1, 2, 3, 4, 5, 6, 7 },     // none
573   { 0, 1, 2, 3, 4, 5, 6, 7 },     // ???
574   { 0, 0, 2, 2, 4, 4, 6, 6 },     // 0q
575   { 1, 1, 3, 3, 5, 5, 7, 7 },     // 1q
576   { 0, 0, 0, 0, 4, 4, 4, 4 },     // 0h
577   { 1, 1, 1, 1, 5, 5, 5, 5 },     // 1h
578   { 2, 2, 2, 2, 6, 6, 6, 6 },     // 2h
579   { 3, 3, 3, 3, 7, 7, 7, 7 },     // 3h
580   { 0, 0, 0, 0, 0, 0, 0, 0 },     // 0
581   { 1, 1, 1, 1, 1, 1, 1, 1 },     // 1
582   { 2, 2, 2, 2, 2, 2, 2, 2 },     // 2
583   { 3, 3, 3, 3, 3, 3, 3, 3 },     // 3
584   { 4, 4, 4, 4, 4, 4, 4, 4 },     // 4
585   { 5, 5, 5, 5, 5, 5, 5, 5 },     // 5
586   { 6, 6, 6, 6, 6, 6, 6, 6 },     // 6
587   { 7, 7, 7, 7, 7, 7, 7, 7 },     // 7
680    { 0, 1, 2, 3, 4, 5, 6, 7 },     // none
681    { 0, 1, 2, 3, 4, 5, 6, 7 },     // ???
682    { 0, 0, 2, 2, 4, 4, 6, 6 },     // 0q
683    { 1, 1, 3, 3, 5, 5, 7, 7 },     // 1q
684    { 0, 0, 0, 0, 4, 4, 4, 4 },     // 0h
685    { 1, 1, 1, 1, 5, 5, 5, 5 },     // 1h
686    { 2, 2, 2, 2, 6, 6, 6, 6 },     // 2h
687    { 3, 3, 3, 3, 7, 7, 7, 7 },     // 3h
688    { 0, 0, 0, 0, 0, 0, 0, 0 },     // 0
689    { 1, 1, 1, 1, 1, 1, 1, 1 },     // 1
690    { 2, 2, 2, 2, 2, 2, 2, 2 },     // 2
691    { 3, 3, 3, 3, 3, 3, 3, 3 },     // 3
692    { 4, 4, 4, 4, 4, 4, 4, 4 },     // 4
693    { 5, 5, 5, 5, 5, 5, 5, 5 },     // 5
694    { 6, 6, 6, 6, 6, 6, 6, 6 },     // 6
695    { 7, 7, 7, 7, 7, 7, 7, 7 },     // 7
588696};
589697
590698#if USE_SIMD
591699static __m128i vec_himask;
592700static __m128i vec_lomask;
593static __m128i vec_overmask;
594static __m128i vec_zerobits;
701static __m128i vec_hibit;
702static __m128i vec_lobit;
703static __m128i vec_n32768;
704static __m128i vec_32767;
595705static __m128i vec_flagmask;
596706static __m128i vec_shiftmask2;
597707static __m128i vec_shiftmask4;
598static __m128i vec_zero;
708static __m128i vec_flag_reverse;
599709static __m128i vec_neg1;
600710static __m128i vec_shuf[16];
601711static __m128i vec_shuf_inverse[16];
r25438r25439
603713
604714static void rspcom_init(rsp_state *rsp, legacy_cpu_device *device, device_irq_acknowledge_callback irqcallback)
605715{
606   int regIdx = 0;
607   int accumIdx;
716    int regIdx = 0;
717    int accumIdx;
608718
609   memset(rsp, 0, sizeof(*rsp));
719    memset(rsp, 0, sizeof(*rsp));
610720
611   const rsp_config *config = (const rsp_config *)device->static_config();
612   // resolve callbacks
613   rsp->dp_reg_r_func.resolve(config->dp_reg_r_cb, *device);
614   rsp->dp_reg_w_func.resolve(config->dp_reg_w_cb, *device);
615   rsp->sp_reg_r_func.resolve(config->sp_reg_r_cb, *device);
616   rsp->sp_reg_w_func.resolve(config->sp_reg_w_cb, *device);
617   rsp->sp_set_status_func.resolve(config->sp_set_status_cb, *device);
721    const rsp_config *config = (const rsp_config *)device->static_config();
722    // resolve callbacks
723    rsp->dp_reg_r_func.resolve(config->dp_reg_r_cb, *device);
724    rsp->dp_reg_w_func.resolve(config->dp_reg_w_cb, *device);
725    rsp->sp_reg_r_func.resolve(config->sp_reg_r_cb, *device);
726    rsp->sp_reg_w_func.resolve(config->sp_reg_w_cb, *device);
727    rsp->sp_set_status_func.resolve(config->sp_set_status_cb, *device);
618728
619   rsp->irq_callback = irqcallback;
620   rsp->device = device;
621   rsp->program = &device->space(AS_PROGRAM);
622   rsp->direct = &rsp->program->direct();
729    rsp->irq_callback = irqcallback;
730    rsp->device = device;
731    rsp->program = &device->space(AS_PROGRAM);
732    rsp->direct = &rsp->program->direct();
623733
624734#if 1
625   // Inaccurate.  RSP registers power on to a random state...
626   for(regIdx = 0; regIdx < 32; regIdx++ )
627   {
628      rsp->r[regIdx] = 0;
629      rsp->v[regIdx].d[0] = 0;
630      rsp->v[regIdx].d[1] = 0;
631   }
632   rsp->flag[0] = 0;
633   rsp->flag[1] = 0;
634   rsp->flag[2] = 0;
635   rsp->flag[3] = 0;
636   rsp->reciprocal_res = 0;
637   rsp->reciprocal_high = 0;
735    // Inaccurate.  RSP registers power on to a random state...
736    for(regIdx = 0; regIdx < 32; regIdx++ )
737    {
738        rsp->r[regIdx] = 0;
739        rsp->v[regIdx].d[0] = 0;
740        rsp->v[regIdx].d[1] = 0;
741    }
742    CLEAR_CARRY_FLAGS();
743    CLEAR_COMPARE_FLAGS();
744    CLEAR_CLIP1_FLAGS();
745    CLEAR_ZERO_FLAGS();
746    CLEAR_CLIP2_FLAGS();
747    rsp->reciprocal_res = 0;
748    rsp->reciprocal_high = 0;
638749#endif
639750
640   // ...except for the accumulators.
641   for(accumIdx = 0; accumIdx < 8; accumIdx++ )
642   {
643      rsp->accum[accumIdx].q = 0;
644   }
751    // ...except for the accumulators.
752    for(accumIdx = 0; accumIdx < 8; accumIdx++ )
753    {
754        rsp->accum[accumIdx].q = 0;
755    }
645756
646   rsp->sr = RSP_STATUS_HALT;
647   rsp->step_count = 0;
757    rsp->sr = RSP_STATUS_HALT;
758    rsp->step_count = 0;
648759
649760#if USE_SIMD
650   vec_shuf_inverse[ 0] = _mm_set_epi16(0x0f0e, 0x0d0c, 0x0b0a, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100); // none
651   vec_shuf_inverse[ 1] = _mm_set_epi16(0x0f0e, 0x0d0c, 0x0b0a, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100); // ???
652   vec_shuf_inverse[ 2] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0908, 0x0908, 0x0504, 0x0504, 0x0100, 0x0100); // 0q
653   vec_shuf_inverse[ 3] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0b0a, 0x0b0a, 0x0706, 0x0706, 0x0302, 0x0302); // 1q
654   vec_shuf_inverse[ 4] = _mm_set_epi16(0x0908, 0x0908, 0x0908, 0x0908, 0x0100, 0x0100, 0x0100, 0x0100); // 0h
655   vec_shuf_inverse[ 5] = _mm_set_epi16(0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0302, 0x0302, 0x0302, 0x0302); // 1h
656   vec_shuf_inverse[ 6] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0504, 0x0504, 0x0504, 0x0504); // 2h
657   vec_shuf_inverse[ 7] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0706, 0x0706, 0x0706, 0x0706); // 3h
658   vec_shuf_inverse[ 8] = _mm_set_epi16(0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100); // 0
659   vec_shuf_inverse[ 9] = _mm_set_epi16(0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302); // 1
660   vec_shuf_inverse[10] = _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504); // 2
661   vec_shuf_inverse[11] = _mm_set_epi16(0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706); // 3
662   vec_shuf_inverse[12] = _mm_set_epi16(0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908); // 4
663   vec_shuf_inverse[13] = _mm_set_epi16(0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a); // 5
664   vec_shuf_inverse[14] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c); // 6
665   vec_shuf_inverse[15] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e); // 7
761    vec_shuf_inverse[ 0] = _mm_set_epi16(0x0f0e, 0x0d0c, 0x0b0a, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100); // none
762    vec_shuf_inverse[ 1] = _mm_set_epi16(0x0f0e, 0x0d0c, 0x0b0a, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100); // ???
763    vec_shuf_inverse[ 2] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0908, 0x0908, 0x0504, 0x0504, 0x0100, 0x0100); // 0q
764    vec_shuf_inverse[ 3] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0b0a, 0x0b0a, 0x0706, 0x0706, 0x0302, 0x0302); // 1q
765    vec_shuf_inverse[ 4] = _mm_set_epi16(0x0908, 0x0908, 0x0908, 0x0908, 0x0100, 0x0100, 0x0100, 0x0100); // 0h
766    vec_shuf_inverse[ 5] = _mm_set_epi16(0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0302, 0x0302, 0x0302, 0x0302); // 1h
767    vec_shuf_inverse[ 6] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0504, 0x0504, 0x0504, 0x0504); // 2h
768    vec_shuf_inverse[ 7] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0706, 0x0706, 0x0706, 0x0706); // 3h
769    vec_shuf_inverse[ 8] = _mm_set_epi16(0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100); // 0
770    vec_shuf_inverse[ 9] = _mm_set_epi16(0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302); // 1
771    vec_shuf_inverse[10] = _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504); // 2
772    vec_shuf_inverse[11] = _mm_set_epi16(0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706); // 3
773    vec_shuf_inverse[12] = _mm_set_epi16(0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908); // 4
774    vec_shuf_inverse[13] = _mm_set_epi16(0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a); // 5
775    vec_shuf_inverse[14] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c); // 6
776    vec_shuf_inverse[15] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e); // 7
666777
667   vec_shuf[ 0] = _mm_set_epi16(0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e); // none
668   vec_shuf[ 1] = _mm_set_epi16(0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e); // ???
669   vec_shuf[ 2] = _mm_set_epi16(0x0302, 0x0302, 0x0706, 0x0706, 0x0b0a, 0x0b0a, 0x0f0e, 0x0f0e); // 0q
670   vec_shuf[ 3] = _mm_set_epi16(0x0100, 0x0100, 0x0504, 0x0706, 0x0908, 0x0908, 0x0d0c, 0x0d0c); // 1q
671   vec_shuf[ 4] = _mm_set_epi16(0x0706, 0x0706, 0x0706, 0x0706, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e); // 0q
672   vec_shuf[ 5] = _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c); // 1q
673   vec_shuf[ 6] = _mm_set_epi16(0x0302, 0x0302, 0x0302, 0x0302, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a); // 2q
674   vec_shuf[ 7] = _mm_set_epi16(0x0100, 0x0100, 0x0100, 0x0100, 0x0908, 0x0908, 0x0908, 0x0908); // 3q
675   vec_shuf[ 8] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e); // 0
676   vec_shuf[ 9] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c); // 1
677   vec_shuf[10] = _mm_set_epi16(0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a); // 2
678   vec_shuf[11] = _mm_set_epi16(0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908); // 3
679   vec_shuf[12] = _mm_set_epi16(0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706); // 4
680   vec_shuf[13] = _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504); // 5
681   vec_shuf[14] = _mm_set_epi16(0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302); // 6
682   vec_shuf[15] = _mm_set_epi16(0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100); // 7
683   rsp->accum_h = _mm_setzero_si128();
684   rsp->accum_m = _mm_setzero_si128();
685   rsp->accum_l = _mm_setzero_si128();
686   vec_zero = _mm_setzero_si128();
687   vec_neg1 = _mm_set_epi64x(0xffffffffffffffffL, 0xffffffffffffffffL);
688   vec_himask = _mm_set_epi64x(0xffff0000ffff0000L, 0xffff0000ffff0000L);
689   vec_lomask = _mm_set_epi64x(0x0000ffff0000ffffL, 0x0000ffff0000ffffL);
690   vec_overmask = _mm_set_epi64x(0x0001000000010000L, 0x0001000000010000L);
691   vec_zerobits = _mm_set_epi64x(0x0000000100000001L, 0x0000000100000001L);
692   vec_flagmask = _mm_set_epi64x(0x0001000100010001L, 0x0001000100010001L);
693   vec_shiftmask2 = _mm_set_epi64x(0x0000000300000003L, 0x0000000300000003L);
694   vec_shiftmask4 = _mm_set_epi64x(0x000000000000000fL, 0x000000000000000fL);
778    vec_shuf[ 0] = _mm_set_epi16(0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e); // none
779    vec_shuf[ 1] = _mm_set_epi16(0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e); // ???
780    vec_shuf[ 2] = _mm_set_epi16(0x0302, 0x0302, 0x0706, 0x0706, 0x0b0a, 0x0b0a, 0x0f0e, 0x0f0e); // 0q
781    vec_shuf[ 3] = _mm_set_epi16(0x0100, 0x0100, 0x0504, 0x0706, 0x0908, 0x0908, 0x0d0c, 0x0d0c); // 1q
782    vec_shuf[ 4] = _mm_set_epi16(0x0706, 0x0706, 0x0706, 0x0706, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e); // 0q
783    vec_shuf[ 5] = _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c); // 1q
784    vec_shuf[ 6] = _mm_set_epi16(0x0302, 0x0302, 0x0302, 0x0302, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a); // 2q
785    vec_shuf[ 7] = _mm_set_epi16(0x0100, 0x0100, 0x0100, 0x0100, 0x0908, 0x0908, 0x0908, 0x0908); // 3q
786    vec_shuf[ 8] = _mm_set_epi16(0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e, 0x0f0e); // 0
787    vec_shuf[ 9] = _mm_set_epi16(0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c, 0x0d0c); // 1
788    vec_shuf[10] = _mm_set_epi16(0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a, 0x0b0a); // 2
789    vec_shuf[11] = _mm_set_epi16(0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908, 0x0908); // 3
790    vec_shuf[12] = _mm_set_epi16(0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706, 0x0706); // 4
791    vec_shuf[13] = _mm_set_epi16(0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504, 0x0504); // 5
792    vec_shuf[14] = _mm_set_epi16(0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302, 0x0302); // 6
793    vec_shuf[15] = _mm_set_epi16(0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100); // 7
794    rsp->accum_h = _mm_setzero_si128();
795    rsp->accum_m = _mm_setzero_si128();
796    rsp->accum_l = _mm_setzero_si128();
797    vec_neg1 = _mm_set_epi64x(0xffffffffffffffffL, 0xffffffffffffffffL);
798    vec_himask = _mm_set_epi64x(0xffff0000ffff0000L, 0xffff0000ffff0000L);
799    vec_lomask = _mm_set_epi64x(0x0000ffff0000ffffL, 0x0000ffff0000ffffL);
800    vec_hibit = _mm_set_epi64x(0x0001000000010000L, 0x0001000000010000L);
801    vec_lobit = _mm_set_epi64x(0x0000000100000001L, 0x0000000100000001L);
802    vec_32767 = _mm_set_epi64x(0x7fff7fff7fff7fffL, 0x7fff7fff7fff7fffL);
803    vec_n32768 = _mm_set_epi64x(0x8000800080008000L, 0x8000800080008000L);
804    vec_flagmask = _mm_set_epi64x(0x0001000100010001L, 0x0001000100010001L);
805    vec_shiftmask2 = _mm_set_epi64x(0x0000000300000003L, 0x0000000300000003L);
806    vec_shiftmask4 = _mm_set_epi64x(0x000000000000000fL, 0x000000000000000fL);
807    vec_flag_reverse = _mm_set_epi16(0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0b0a, 0x0d0c, 0x0f0e);
695808#endif
696809}
697810
698811static CPU_INIT( rsp )
699812{
700   rsp_state *rsp;
701   drc_cache *cache;
702   UINT32 flags = 0;
703   int regnum;
704   //int elnum;
813    rsp_state *rsp;
814    drc_cache *cache;
815    UINT32 flags = 0;
816    int regnum;
817    //int elnum;
705818
706   /* allocate enough space for the cache and the core */
707   cache = auto_alloc(device->machine(), drc_cache(CACHE_SIZE + sizeof(*rsp)));
819    /* allocate enough space for the cache and the core */
820    cache = auto_alloc(device->machine(), drc_cache(CACHE_SIZE + sizeof(*rsp)));
708821
709   /* allocate the core memory */
710   *(rsp_state **)device->token() = rsp = (rsp_state *)cache->alloc_near(sizeof(*rsp));
711   memset(rsp, 0, sizeof(*rsp));
822    /* allocate the core memory */
823    *(rsp_state **)device->token() = rsp = (rsp_state *)cache->alloc_near(sizeof(*rsp));
824    memset(rsp, 0, sizeof(*rsp));
712825
713   rspcom_init(rsp, device, irqcallback);
826    rspcom_init(rsp, device, irqcallback);
714827
715   /* allocate the implementation-specific state from the full cache */
716   rsp->impstate = (rspimp_state *)cache->alloc_near(sizeof(*rsp->impstate));
717   memset(rsp->impstate, 0, sizeof(*rsp->impstate));
718   rsp->impstate->cache = cache;
828    /* allocate the implementation-specific state from the full cache */
829    rsp->impstate = (rspimp_state *)cache->alloc_near(sizeof(*rsp->impstate));
830    memset(rsp->impstate, 0, sizeof(*rsp->impstate));
831    rsp->impstate->cache = cache;
719832
720   /* initialize the UML generator */
721   if (LOG_UML)
722   {
723      flags |= DRCUML_OPTION_LOG_UML;
724   }
725   if (LOG_NATIVE)
726   {
727      flags |= DRCUML_OPTION_LOG_NATIVE;
728   }
729   rsp->impstate->drcuml = auto_alloc(device->machine(), drcuml_state(*device, *cache, flags, 8, 32, 2));
833    /* initialize the UML generator */
834    if (LOG_UML)
835    {
836        flags |= DRCUML_OPTION_LOG_UML;
837    }
838    if (LOG_NATIVE)
839    {
840        flags |= DRCUML_OPTION_LOG_NATIVE;
841    }
842    rsp->impstate->drcuml = auto_alloc(device->machine(), drcuml_state(*device, *cache, flags, 8, 32, 2));
730843
731   /* add symbols for our stuff */
732   rsp->impstate->drcuml->symbol_add(&rsp->pc, sizeof(rsp->pc), "pc");
733   rsp->impstate->drcuml->symbol_add(&rsp->icount, sizeof(rsp->icount), "icount");
734   for (regnum = 0; regnum < 32; regnum++)
735   {
736      char buf[10];
737      sprintf(buf, "r%d", regnum);
738      rsp->impstate->drcuml->symbol_add(&rsp->r[regnum], sizeof(rsp->r[regnum]), buf);
739   }
740   rsp->impstate->drcuml->symbol_add(&rsp->impstate->arg0, sizeof(rsp->impstate->arg0), "arg0");
741   rsp->impstate->drcuml->symbol_add(&rsp->impstate->arg1, sizeof(rsp->impstate->arg1), "arg1");
742   rsp->impstate->drcuml->symbol_add(&rsp->impstate->arg2, sizeof(rsp->impstate->arg2), "arg2");
743   rsp->impstate->drcuml->symbol_add(&rsp->impstate->arg3, sizeof(rsp->impstate->arg3), "arg3");
744   rsp->impstate->drcuml->symbol_add(&rsp->impstate->numcycles, sizeof(rsp->impstate->numcycles), "numcycles");
844    /* add symbols for our stuff */
845    rsp->impstate->drcuml->symbol_add(&rsp->pc, sizeof(rsp->pc), "pc");
846    rsp->impstate->drcuml->symbol_add(&rsp->icount, sizeof(rsp->icount), "icount");
847    for (regnum = 0; regnum < 32; regnum++)
848    {
849        char buf[10];
850        sprintf(buf, "r%d", regnum);
851        rsp->impstate->drcuml->symbol_add(&rsp->r[regnum], sizeof(rsp->r[regnum]), buf);
852    }
853    rsp->impstate->drcuml->symbol_add(&rsp->impstate->arg0, sizeof(rsp->impstate->arg0), "arg0");
854    rsp->impstate->drcuml->symbol_add(&rsp->impstate->arg1, sizeof(rsp->impstate->arg1), "arg1");
855    rsp->impstate->drcuml->symbol_add(&rsp->impstate->arg2, sizeof(rsp->impstate->arg2), "arg2");
856    rsp->impstate->drcuml->symbol_add(&rsp->impstate->arg3, sizeof(rsp->impstate->arg3), "arg3");
857    rsp->impstate->drcuml->symbol_add(&rsp->impstate->numcycles, sizeof(rsp->impstate->numcycles), "numcycles");
745858
746   /* initialize the front-end helper */
747   rsp->impstate->drcfe = auto_alloc(device->machine(), rsp_frontend(*rsp, COMPILE_BACKWARDS_BYTES, COMPILE_FORWARDS_BYTES, SINGLE_INSTRUCTION_MODE ? 1 : COMPILE_MAX_SEQUENCE));
859    /* initialize the front-end helper */
860    rsp->impstate->drcfe = auto_alloc(device->machine(), rsp_frontend(*rsp, COMPILE_BACKWARDS_BYTES, COMPILE_FORWARDS_BYTES, SINGLE_INSTRUCTION_MODE ? 1 : COMPILE_MAX_SEQUENCE));
748861
749   /* compute the register parameters */
750   for (regnum = 0; regnum < 32; regnum++)
751      rsp->impstate->regmap[regnum] = (regnum == 0) ? parameter(0) : parameter::make_memory(&rsp->r[regnum]);
862    /* compute the register parameters */
863    for (regnum = 0; regnum < 32; regnum++)
864        rsp->impstate->regmap[regnum] = (regnum == 0) ? parameter(0) : parameter::make_memory(&rsp->r[regnum]);
752865
753   /*
754   drcbe_info beinfo;
755   rsp->impstate->drcuml->get_backend_info(beinfo);
756   if (beinfo.direct_iregs > 2)
757   {
758       rsp->impstate->regmap[30] = I2;
759   }
760   if (beinfo.direct_iregs > 3)
761   {
762       rsp->impstate->regmap[31] = I3;
763   }
764   if (beinfo.direct_iregs > 4)
765   {
766       rsp->impstate->regmap[2] = I4;
767   }
768   if (beinfo.direct_iregs > 5)
769   {
770       rsp->impstate->regmap[3] = I5;
771   }
772   if (beinfo.direct_iregs > 6)
773   {
774       rsp->impstate->regmap[4] = I6;
775   }
776   */
866    /*
867    drcbe_info beinfo;
868    rsp->impstate->drcuml->get_backend_info(beinfo);
869    if (beinfo.direct_iregs > 2)
870    {
871        rsp->impstate->regmap[30] = I2;
872    }
873    if (beinfo.direct_iregs > 3)
874    {
875        rsp->impstate->regmap[31] = I3;
876    }
877    if (beinfo.direct_iregs > 4)
878    {
879        rsp->impstate->regmap[2] = I4;
880    }
881    if (beinfo.direct_iregs > 5)
882    {
883        rsp->impstate->regmap[3] = I5;
884    }
885    if (beinfo.direct_iregs > 6)
886    {
887        rsp->impstate->regmap[4] = I6;
888    }
889    */
777890
778   /* mark the cache dirty so it is updated on next execute */
779   rsp->impstate->cache_dirty = TRUE;
891    /* mark the cache dirty so it is updated on next execute */
892    rsp->impstate->cache_dirty = TRUE;
780893}
781894
782895static CPU_EXIT( rsp )
783896{
784   rsp_state *rsp = get_safe_token(device);
897    rsp_state *rsp = get_safe_token(device);
785898
786   /* clean up the DRC */
787   auto_free(device->machine(), rsp->impstate->drcfe);
788   auto_free(device->machine(), rsp->impstate->drcuml);
789   auto_free(device->machine(), rsp->impstate->cache);
899    /* clean up the DRC */
900    auto_free(device->machine(), rsp->impstate->drcfe);
901    auto_free(device->machine(), rsp->impstate->drcuml);
902    auto_free(device->machine(), rsp->impstate->cache);
790903}
791904
792905
793906static CPU_RESET( rsp )
794907{
795   rsp_state *rsp = get_safe_token(device);
796   rsp->nextpc = ~0;
908    rsp_state *rsp = get_safe_token(device);
909    rsp->nextpc = ~0;
797910}
798911
799912static void cfunc_rsp_lbv(void *param)
800913{
801   rsp_state *rsp = (rsp_state*)param;
802   UINT32 op = rsp->impstate->arg0;
803   UINT32 ea = 0;
804   int dest = (op >> 16) & 0x1f;
805   int base = (op >> 21) & 0x1f;
806   int index = (op >> 7) & 0xf;
807   int offset = (op & 0x7f);
808   if (offset & 0x40)
809   {
810      offset |= 0xffffffc0;
811   }
812   // 31       25      20      15      10     6        0
813   // --------------------------------------------------
814   // | 110010 | BBBBB | TTTTT | 00000 | IIII | Offset |
815   // --------------------------------------------------
816   //
817   // Load 1 byte to vector byte index
914    rsp_state *rsp = (rsp_state*)param;
915    UINT32 op = rsp->impstate->arg0;
818916
819   ea = (base) ? rsp->r[base] + offset : offset;
917    UINT32 ea = 0;
918    int dest = (op >> 16) & 0x1f;
919    int base = (op >> 21) & 0x1f;
920    int index = (op >> 7) & 0xf;
921    int offset = (op & 0x7f);
922    if (offset & 0x40)
923    {
924        offset |= 0xffffffc0;
925    }
926    // 31       25      20      15      10     6        0
927    // --------------------------------------------------
928    // | 110010 | BBBBB | TTTTT | 00000 | IIII | Offset |
929    // --------------------------------------------------
930    //
931    // Load 1 byte to vector byte index
820932
933    ea = (base) ? rsp->r[base] + offset : offset;
934
821935#if USE_SIMD
822   UINT16 element;
823   SIMD_EXTRACT16(rsp->xv[dest], element, (index >> 1));
824   element &= 0xff00 >> ((1-(index & 1)) * 8);
825   element |= READ8(rsp, ea) << ((1-(index & 1)) * 8);
826   SIMD_INSERT16(rsp->xv[dest], element, (index >> 1));
936    UINT16 element;
937    SIMD_EXTRACT16(rsp->xv[dest], element, (index >> 1));
938    element &= 0xff00 >> ((1-(index & 1)) * 8);
939    element |= READ8(rsp, ea) << ((1-(index & 1)) * 8);
940    SIMD_INSERT16(rsp->xv[dest], element, (index >> 1));
827941#else
828   VREG_B(dest, index) = READ8(rsp, ea);
942    VREG_B(dest, index) = READ8(rsp, ea);
829943#endif
944
830945}
831946
832947static void cfunc_rsp_lsv(void *param)
833948{
834   rsp_state *rsp = (rsp_state*)param;
835   UINT32 op = rsp->impstate->arg0;
836   int dest = (op >> 16) & 0x1f;
837   int base = (op >> 21) & 0x1f;
838   int index = (op >> 7) & 0xe;
839   int offset = (op & 0x7f);
840   if (offset & 0x40)
841   {
842      offset |= 0xffffffc0;
843   }
844   // 31       25      20      15      10     6        0
845   // --------------------------------------------------
846   // | 110010 | BBBBB | TTTTT | 00001 | IIII | Offset |
847   // --------------------------------------------------
848   //
849   // Loads 2 bytes starting from vector byte index
949    rsp_state *rsp = (rsp_state*)param;
950    UINT32 op = rsp->impstate->arg0;
951    int dest = (op >> 16) & 0x1f;
952    int base = (op >> 21) & 0x1f;
953    int index = (op >> 7) & 0xe;
954    int offset = (op & 0x7f);
955    if (offset & 0x40)
956    {
957        offset |= 0xffffffc0;
958    }
959    // 31       25      20      15      10     6        0
960    // --------------------------------------------------
961    // | 110010 | BBBBB | TTTTT | 00001 | IIII | Offset |
962    // --------------------------------------------------
963    //
964    // Loads 2 bytes starting from vector byte index
850965
851   UINT32 ea = (base) ? rsp->r[base] + (offset * 2) : (offset * 2);
852   int end = index + 2;
853   for (int i = index; i < end; i++)
854   {
966    UINT32 ea = (base) ? rsp->r[base] + (offset * 2) : (offset * 2);
967    int end = index + 2;
968    for (int i = index; i < end; i++)
969    {
855970#if USE_SIMD
856      UINT16 element;
857      SIMD_EXTRACT16(rsp->xv[dest], element, (i >> 1));
858      element &= 0xff00 >> ((1 - (i & 1)) * 8);
859      element |= READ8(rsp, ea) << ((1 - (i & 1)) * 8);
860      SIMD_INSERT16(rsp->xv[dest], element, (i >> 1));
971        UINT16 element;
972        SIMD_EXTRACT16(rsp->xv[dest], element, (i >> 1));
973        element &= 0xff00 >> ((1 - (i & 1)) * 8);
974        element |= READ8(rsp, ea) << ((1 - (i & 1)) * 8);
975        SIMD_INSERT16(rsp->xv[dest], element, (i >> 1));
861976#else
862      VREG_B(dest, i) = READ8(rsp, ea);
977        VREG_B(dest, i) = READ8(rsp, ea);
863978#endif
864      ea++;
865   }
979        ea++;
980    }
866981}
867982
868983static void cfunc_rsp_llv(void *param)
869984{
870   rsp_state *rsp = (rsp_state*)param;
871   UINT32 op = rsp->impstate->arg0;
872   UINT32 ea = 0;
873   int dest = (op >> 16) & 0x1f;
874   int base = (op >> 21) & 0x1f;
875   int index = (op >> 7) & 0xc;
876   int offset = (op & 0x7f);
877   if (offset & 0x40)
878   {
879      offset |= 0xffffffc0;
880   }
881   // 31       25      20      15      10     6        0
882   // --------------------------------------------------
883   // | 110010 | BBBBB | TTTTT | 00010 | IIII | Offset |
884   // --------------------------------------------------
885   //
886   // Loads 4 bytes starting from vector byte index
985    rsp_state *rsp = (rsp_state*)param;
986    UINT32 op = rsp->impstate->arg0;
987    UINT32 ea = 0;
988    int dest = (op >> 16) & 0x1f;
989    int base = (op >> 21) & 0x1f;
990    int index = (op >> 7) & 0xc;
991    int offset = (op & 0x7f);
992    if (offset & 0x40)
993    {
994        offset |= 0xffffffc0;
995    }
996    // 31       25      20      15      10     6        0
997    // --------------------------------------------------
998    // | 110010 | BBBBB | TTTTT | 00010 | IIII | Offset |
999    // --------------------------------------------------
1000    //
1001    // Loads 4 bytes starting from vector byte index
8871002
888   ea = (base) ? rsp->r[base] + (offset * 4) : (offset * 4);
1003    ea = (base) ? rsp->r[base] + (offset * 4) : (offset * 4);
8891004
890   int end = index + 4;
1005    int end = index + 4;
8911006
892   for (int i = index; i < end; i++)
893   {
1007    for (int i = index; i < end; i++)
1008    {
8941009#if USE_SIMD
895      UINT16 element;
896      SIMD_EXTRACT16(rsp->xv[dest], element, (i >> 1));
897      element &= 0xff00 >> ((1 - (i & 1)) * 8);
898      element |= READ8(rsp, ea) << ((1 - (i & 1)) * 8);
899      SIMD_INSERT16(rsp->xv[dest], element, (i >> 1));
1010        UINT16 element;
1011        SIMD_EXTRACT16(rsp->xv[dest], element, (i >> 1));
1012        element &= 0xff00 >> ((1 - (i & 1)) * 8);
1013        element |= READ8(rsp, ea) << ((1 - (i & 1)) * 8);
1014        SIMD_INSERT16(rsp->xv[dest], element, (i >> 1));
9001015#else
901      VREG_B(dest, i) = READ8(rsp, ea);
1016        VREG_B(dest, i) = READ8(rsp, ea);
9021017#endif
903      ea++;
904   }
1018        ea++;
1019    }
9051020}
9061021
9071022static void cfunc_rsp_ldv(void *param)
9081023{
909   rsp_state *rsp = (rsp_state*)param;
910   UINT32 op = rsp->impstate->arg0;
911   UINT32 ea = 0;
912   int dest = (op >> 16) & 0x1f;
913   int base = (op >> 21) & 0x1f;
914   int index = (op >> 7) & 0x8;
915   int offset = (op & 0x7f);
916   if (offset & 0x40)
917   {
918      offset |= 0xffffffc0;
919   }
920   // 31       25      20      15      10     6        0
921   // --------------------------------------------------
922   // | 110010 | BBBBB | TTTTT | 00011 | IIII | Offset |
923   // --------------------------------------------------
924   //
925   // Loads 8 bytes starting from vector byte index
1024    rsp_state *rsp = (rsp_state*)param;
1025    UINT32 op = rsp->impstate->arg0;
1026    UINT32 ea = 0;
1027    int dest = (op >> 16) & 0x1f;
1028    int base = (op >> 21) & 0x1f;
1029    int index = (op >> 7) & 0x8;
1030    int offset = (op & 0x7f);
1031    if (offset & 0x40)
1032    {
1033        offset |= 0xffffffc0;
1034    }
1035    // 31       25      20      15      10     6        0
1036    // --------------------------------------------------
1037    // | 110010 | BBBBB | TTTTT | 00011 | IIII | Offset |
1038    // --------------------------------------------------
1039    //
1040    // Loads 8 bytes starting from vector byte index
9261041
927   ea = (base) ? rsp->r[base] + (offset * 8) : (offset * 8);
1042    ea = (base) ? rsp->r[base] + (offset * 8) : (offset * 8);
9281043
929   int end = index + 8;
1044    int end = index + 8;
9301045
931   for (int i = index; i < end; i++)
932   {
1046    for (int i = index; i < end; i++)
1047    {
9331048#if USE_SIMD
934      UINT16 element;
935      SIMD_EXTRACT16(rsp->xv[dest], element, (i >> 1));
936      element &= 0xff00 >> ((1 - (i & 1)) * 8);
937      element |= READ8(rsp, ea) << ((1 - (i & 1)) * 8);
938      SIMD_INSERT16(rsp->xv[dest], element, (i >> 1));
1049        UINT16 element;
1050        SIMD_EXTRACT16(rsp->xv[dest], element, (i >> 1));
1051        element &= 0xff00 >> ((1 - (i & 1)) * 8);
1052        element |= READ8(rsp, ea) << ((1 - (i & 1)) * 8);
1053        SIMD_INSERT16(rsp->xv[dest], element, (i >> 1));
9391054#else
940      VREG_B(dest, i) = READ8(rsp, ea);
1055        VREG_B(dest, i) = READ8(rsp, ea);
9411056#endif
942      ea++;
943   }
1057        ea++;
1058    }
9441059}
9451060
9461061static void cfunc_rsp_lqv(void *param)
9471062{
948   rsp_state *rsp = (rsp_state*)param;
949   UINT32 op = rsp->impstate->arg0;
950   int dest = (op >> 16) & 0x1f;
951   int base = (op >> 21) & 0x1f;
952   //int index = 0; // Just a test, it goes right back the way it was if something breaks //(op >> 7) & 0xf;
953   int offset = (op & 0x7f);
954   if (offset & 0x40)
955   {
956      offset |= 0xffffffc0;
957   }
958   // 31       25      20      15      10     6        0
959   // --------------------------------------------------
960   // | 110010 | BBBBB | TTTTT | 00100 | IIII | Offset |
961   // --------------------------------------------------
962   //
963   // Loads up to 16 bytes starting from vector byte index
1063    rsp_state *rsp = (rsp_state*)param;
1064    UINT32 op = rsp->impstate->arg0;
1065    int dest = (op >> 16) & 0x1f;
1066    int base = (op >> 21) & 0x1f;
1067    //int index = 0; // Just a test, it goes right back the way it was if something breaks //(op >> 7) & 0xf;
1068    int offset = (op & 0x7f);
1069    if (offset & 0x40)
1070    {
1071        offset |= 0xffffffc0;
1072    }
1073    // 31       25      20      15      10     6        0
1074    // --------------------------------------------------
1075    // | 110010 | BBBBB | TTTTT | 00100 | IIII | Offset |
1076    // --------------------------------------------------
1077    //
1078    // Loads up to 16 bytes starting from vector byte index
9641079
965   UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
1080    UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
9661081
967   int end = 16 - (ea & 0xf);
968   if (end > 16) end = 16;
1082    int end = 16 - (ea & 0xf);
1083    if (end > 16) end = 16;
9691084
970   for (int i = 0; i < end; i++)
971   {
1085    for (int i = 0; i < end; i++)
1086    {
9721087#if USE_SIMD
973      UINT16 element;
974      SIMD_EXTRACT16(rsp->xv[dest], element, (i >> 1));
975      element &= 0xff00 >> ((1 - (i & 1)) * 8);
976      element |= READ8(rsp, ea) << ((1 - (i & 1)) * 8);
977      SIMD_INSERT16(rsp->xv[dest], element, (i >> 1));
1088        UINT16 element;
1089        SIMD_EXTRACT16(rsp->xv[dest], element, (i >> 1));
1090        element &= 0xff00 >> ((1 - (i & 1)) * 8);
1091        element |= READ8(rsp, ea) << ((1 - (i & 1)) * 8);
1092        SIMD_INSERT16(rsp->xv[dest], element, (i >> 1));
9781093#else
979      VREG_B(dest, i) = READ8(rsp, ea);
1094        VREG_B(dest, i) = READ8(rsp, ea);
9801095#endif
981      ea++;
982   }
1096        ea++;
1097    }
9831098}
9841099
9851100static void cfunc_rsp_lrv(void *param)
9861101{
987   rsp_state *rsp = (rsp_state*)param;
988   UINT32 op = rsp->impstate->arg0;
989   int dest = (op >> 16) & 0x1f;
990   int base = (op >> 21) & 0x1f;
991   int index = (op >> 7) & 0xf;
992   int offset = (op & 0x7f);
993   if (offset & 0x40)
994   {
995      offset |= 0xffffffc0;
996   }
997   // 31       25      20      15      10     6        0
998   // --------------------------------------------------
999   // | 110010 | BBBBB | TTTTT | 00101 | IIII | Offset |
1000   // --------------------------------------------------
1001   //
1002   // Stores up to 16 bytes starting from right side until 16-byte boundary
1102    rsp_state *rsp = (rsp_state*)param;
1103    UINT32 op = rsp->impstate->arg0;
1104    int dest = (op >> 16) & 0x1f;
1105    int base = (op >> 21) & 0x1f;
1106    int index = (op >> 7) & 0xf;
1107    int offset = (op & 0x7f);
1108    if (offset & 0x40)
1109    {
1110        offset |= 0xffffffc0;
1111    }
1112    // 31       25      20      15      10     6        0
1113    // --------------------------------------------------
1114    // | 110010 | BBBBB | TTTTT | 00101 | IIII | Offset |
1115    // --------------------------------------------------
1116    //
1117    // Stores up to 16 bytes starting from right side until 16-byte boundary
10031118
1004   UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
1119    UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
10051120
1006   index = 16 - ((ea & 0xf) - index);
1007   ea &= ~0xf;
1121    index = 16 - ((ea & 0xf) - index);
1122    ea &= ~0xf;
10081123
1009   for (int i = index; i < 16; i++)
1010   {
1124    for (int i = index; i < 16; i++)
1125    {
10111126#if USE_SIMD
1012      UINT16 element;
1013      SIMD_EXTRACT16(rsp->xv[dest], element, (i >> 1));
1014      element &= 0xff00 >> ((1-(i & 1)) * 8);
1015      element |= READ8(rsp, ea) << ((1-(i & 1)) * 8);
1016      SIMD_INSERT16(rsp->xv[dest], element, (i >> 1));
1127        UINT16 element;
1128        SIMD_EXTRACT16(rsp->xv[dest], element, (i >> 1));
1129        element &= 0xff00 >> ((1-(i & 1)) * 8);
1130        element |= READ8(rsp, ea) << ((1-(i & 1)) * 8);
1131        SIMD_INSERT16(rsp->xv[dest], element, (i >> 1));
10171132#else
1018      VREG_B(dest, i) = READ8(rsp, ea);
1133        VREG_B(dest, i) = READ8(rsp, ea);
10191134#endif
1020      ea++;
1021   }
1135        ea++;
1136    }
10221137}
10231138
10241139static void cfunc_rsp_lpv(void *param)
10251140{
1026   rsp_state *rsp = (rsp_state*)param;
1027   UINT32 op = rsp->impstate->arg0;
1028   int dest = (op >> 16) & 0x1f;
1029   int base = (op >> 21) & 0x1f;
1030   int index = (op >> 7) & 0xf;
1031   int offset = (op & 0x7f);
1032   if (offset & 0x40)
1033   {
1034      offset |= 0xffffffc0;
1035   }
1036   // 31       25      20      15      10     6        0
1037   // --------------------------------------------------
1038   // | 110010 | BBBBB | TTTTT | 00110 | IIII | Offset |
1039   // --------------------------------------------------
1040   //
1041   // Loads a byte as the upper 8 bits of each element
1141    rsp_state *rsp = (rsp_state*)param;
1142    UINT32 op = rsp->impstate->arg0;
1143    int dest = (op >> 16) & 0x1f;
1144    int base = (op >> 21) & 0x1f;
1145    int index = (op >> 7) & 0xf;
1146    int offset = (op & 0x7f);
1147    if (offset & 0x40)
1148    {
1149        offset |= 0xffffffc0;
1150    }
1151    // 31       25      20      15      10     6        0
1152    // --------------------------------------------------
1153    // | 110010 | BBBBB | TTTTT | 00110 | IIII | Offset |
1154    // --------------------------------------------------
1155    //
1156    // Loads a byte as the upper 8 bits of each element
10421157
1043   UINT32 ea = (base) ? rsp->r[base] + (offset * 8) : (offset * 8);
1158    UINT32 ea = (base) ? rsp->r[base] + (offset * 8) : (offset * 8);
10441159
1045   for (int i = 0; i < 8; i++)
1046   {
1160    for (int i = 0; i < 8; i++)
1161    {
10471162#if USE_SIMD
1048      SIMD_INSERT16(rsp->xv[dest], READ8(rsp, ea + (((16-index) + i) & 0xf)) << 8, i);
1163        SIMD_INSERT16(rsp->xv[dest], READ8(rsp, ea + (((16-index) + i) & 0xf)) << 8, i);
10491164#else
1050      W_VREG_S(dest, i) = READ8(rsp, ea + (((16-index) + i) & 0xf)) << 8;
1165        W_VREG_S(dest, i) = READ8(rsp, ea + (((16-index) + i) & 0xf)) << 8;
10511166#endif
1052   }
1167    }
10531168}
10541169
10551170static void cfunc_rsp_luv(void *param)
10561171{
1057   rsp_state *rsp = (rsp_state*)param;
1058   UINT32 op = rsp->impstate->arg0;
1059   int dest = (op >> 16) & 0x1f;
1060   int base = (op >> 21) & 0x1f;
1061   int index = (op >> 7) & 0xf;
1062   int offset = (op & 0x7f);
1063   if (offset & 0x40)
1064   {
1065      offset |= 0xffffffc0;
1066   }
1067   // 31       25      20      15      10     6        0
1068   // --------------------------------------------------
1069   // | 110010 | BBBBB | TTTTT | 00111 | IIII | Offset |
1070   // --------------------------------------------------
1071   //
1072   // Loads a byte as the bits 14-7 of each element
1172    rsp_state *rsp = (rsp_state*)param;
1173    UINT32 op = rsp->impstate->arg0;
1174    int dest = (op >> 16) & 0x1f;
1175    int base = (op >> 21) & 0x1f;
1176    int index = (op >> 7) & 0xf;
1177    int offset = (op & 0x7f);
1178    if (offset & 0x40)
1179    {
1180        offset |= 0xffffffc0;
1181    }
1182    // 31       25      20      15      10     6        0
1183    // --------------------------------------------------
1184    // | 110010 | BBBBB | TTTTT | 00111 | IIII | Offset |
1185    // --------------------------------------------------
1186    //
1187    // Loads a byte as the bits 14-7 of each element
10731188
1074   UINT32 ea = (base) ? rsp->r[base] + (offset * 8) : (offset * 8);
1189    UINT32 ea = (base) ? rsp->r[base] + (offset * 8) : (offset * 8);
10751190
1076   for (int i = 0; i < 8; i++)
1077   {
1191    for (int i = 0; i < 8; i++)
1192    {
10781193#if USE_SIMD
1079      SIMD_INSERT16(rsp->xv[dest], READ8(rsp, ea + (((16-index) + i) & 0xf)) << 7, i);
1194        SIMD_INSERT16(rsp->xv[dest], READ8(rsp, ea + (((16-index) + i) & 0xf)) << 7, i);
10801195#else
1081      W_VREG_S(dest, i) = READ8(rsp, ea + (((16-index) + i) & 0xf)) << 7;
1196        W_VREG_S(dest, i) = READ8(rsp, ea + (((16-index) + i) & 0xf)) << 7;
10821197#endif
1083   }
1198    }
10841199}
10851200
10861201static void cfunc_rsp_lhv(void *param)
10871202{
1088   rsp_state *rsp = (rsp_state*)param;
1089   UINT32 op = rsp->impstate->arg0;
1090   int dest = (op >> 16) & 0x1f;
1091   int base = (op >> 21) & 0x1f;
1092   int index = (op >> 7) & 0xf;
1093   int offset = (op & 0x7f);
1094   if (offset & 0x40)
1095   {
1096      offset |= 0xffffffc0;
1097   }
1098   // 31       25      20      15      10     6        0
1099   // --------------------------------------------------
1100   // | 110010 | BBBBB | TTTTT | 01000 | IIII | Offset |
1101   // --------------------------------------------------
1102   //
1103   // Loads a byte as the bits 14-7 of each element, with 2-byte stride
1203    rsp_state *rsp = (rsp_state*)param;
1204    UINT32 op = rsp->impstate->arg0;
1205    int dest = (op >> 16) & 0x1f;
1206    int base = (op >> 21) & 0x1f;
1207    int index = (op >> 7) & 0xf;
1208    int offset = (op & 0x7f);
1209    if (offset & 0x40)
1210    {
1211        offset |= 0xffffffc0;
1212    }
1213    // 31       25      20      15      10     6        0
1214    // --------------------------------------------------
1215    // | 110010 | BBBBB | TTTTT | 01000 | IIII | Offset |
1216    // --------------------------------------------------
1217    //
1218    // Loads a byte as the bits 14-7 of each element, with 2-byte stride
11041219
1105   UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
1220    UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
11061221
1107   for (int i = 0; i < 8; i++)
1108   {
1222    for (int i = 0; i < 8; i++)
1223    {
11091224#if USE_SIMD
1110      SIMD_INSERT16(rsp->xv[dest], READ8(rsp, ea + (((16-index) + (i<<1)) & 0xf)) << 7, i);
1225        SIMD_INSERT16(rsp->xv[dest], READ8(rsp, ea + (((16-index) + (i<<1)) & 0xf)) << 7, i);
11111226#else
1112      W_VREG_S(dest, i) = READ8(rsp, ea + (((16-index) + (i<<1)) & 0xf)) << 7;
1227        W_VREG_S(dest, i) = READ8(rsp, ea + (((16-index) + (i<<1)) & 0xf)) << 7;
11131228#endif
1114   }
1229    }
11151230}
11161231
11171232static void cfunc_rsp_lfv(void *param)
11181233{
1119   rsp_state *rsp = (rsp_state*)param;
1120   UINT32 op = rsp->impstate->arg0;
1121   int dest = (op >> 16) & 0x1f;
1122   int base = (op >> 21) & 0x1f;
1123   int index = (op >> 7) & 0xf;
1124   int offset = (op & 0x7f);
1125   if (offset & 0x40)
1126   {
1127      offset |= 0xffffffc0;
1128   }
1129   // 31       25      20      15      10     6        0
1130   // --------------------------------------------------
1131   // | 110010 | BBBBB | TTTTT | 01001 | IIII | Offset |
1132   // --------------------------------------------------
1133   //
1134   // Loads a byte as the bits 14-7 of upper or lower quad, with 4-byte stride
1234    rsp_state *rsp = (rsp_state*)param;
1235    UINT32 op = rsp->impstate->arg0;
1236    int dest = (op >> 16) & 0x1f;
1237    int base = (op >> 21) & 0x1f;
1238    int index = (op >> 7) & 0xf;
1239    int offset = (op & 0x7f);
1240    if (offset & 0x40)
1241    {
1242        offset |= 0xffffffc0;
1243    }
1244    // 31       25      20      15      10     6        0
1245    // --------------------------------------------------
1246    // | 110010 | BBBBB | TTTTT | 01001 | IIII | Offset |
1247    // --------------------------------------------------
1248    //
1249    // Loads a byte as the bits 14-7 of upper or lower quad, with 4-byte stride
11351250
1136   UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
1251    UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
11371252
1138   // not sure what happens if 16-byte boundary is crossed...
1253    // not sure what happens if 16-byte boundary is crossed...
11391254
1140   int end = (index >> 1) + 4;
1255    int end = (index >> 1) + 4;
11411256
1142   for (int i = index >> 1; i < end; i++)
1143   {
1257    for (int i = index >> 1; i < end; i++)
1258    {
11441259#if USE_SIMD
1145      SIMD_INSERT16(rsp->xv[dest], READ8(rsp, ea) << 7, i);
1260        SIMD_INSERT16(rsp->xv[dest], READ8(rsp, ea) << 7, i);
11461261#else
1147      W_VREG_S(dest, i) = READ8(rsp, ea) << 7;
1262        W_VREG_S(dest, i) = READ8(rsp, ea) << 7;
11481263#endif
1149      ea += 4;
1150   }
1264        ea += 4;
1265    }
11511266}
11521267
11531268static void cfunc_rsp_lwv(void *param)
11541269{
1155   rsp_state *rsp = (rsp_state*)param;
1156   UINT32 op = rsp->impstate->arg0;
1157   int dest = (op >> 16) & 0x1f;
1158   int base = (op >> 21) & 0x1f;
1159   int index = (op >> 7) & 0xf;
1160   int offset = (op & 0x7f);
1161   if (offset & 0x40)
1162   {
1163      offset |= 0xffffffc0;
1164   }
1165   // 31       25      20      15      10     6        0
1166   // --------------------------------------------------
1167   // | 110010 | BBBBB | TTTTT | 01010 | IIII | Offset |
1168   // --------------------------------------------------
1169   //
1170   // Loads the full 128-bit vector starting from vector byte index and wrapping to index 0
1171   // after byte index 15
1270    rsp_state *rsp = (rsp_state*)param;
1271    UINT32 op = rsp->impstate->arg0;
1272    int dest = (op >> 16) & 0x1f;
1273    int base = (op >> 21) & 0x1f;
1274    int index = (op >> 7) & 0xf;
1275    int offset = (op & 0x7f);
1276    if (offset & 0x40)
1277    {
1278        offset |= 0xffffffc0;
1279    }
1280    // 31       25      20      15      10     6        0
1281    // --------------------------------------------------
1282    // | 110010 | BBBBB | TTTTT | 01010 | IIII | Offset |
1283    // --------------------------------------------------
1284    //
1285    // Loads the full 128-bit vector starting from vector byte index and wrapping to index 0
1286    // after byte index 15
11721287
1173   UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
1174   int end = (16 - index) + 16;
1288    UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
1289    int end = (16 - index) + 16;
11751290
11761291#if USE_SIMD
1177   UINT8 val[16];
1292    UINT8 val[16];
11781293#endif
1179   for (int i = (16 - index); i < end; i++)
1180   {
1294    for (int i = (16 - index); i < end; i++)
1295    {
11811296#if USE_SIMD
1182      val[i & 0xf] = READ8(rsp, ea);
1297        val[i & 0xf] = READ8(rsp, ea);
11831298#else
1184      VREG_B(dest, i & 0xf) = READ8(rsp, ea);
1299        VREG_B(dest, i & 0xf) = READ8(rsp, ea);
11851300#endif
1186      ea += 4;
1187   }
1301        ea += 4;
1302    }
11881303
11891304#if USE_SIMD
1190   rsp->xv[dest] = _mm_set_epi8(val[15], val[14], val[13], val[12], val[11], val[10], val[ 9], val[ 8],
1191                           val[ 7], val[ 6], val[ 5], val[ 4], val[ 3], val[ 2], val[ 1], val[ 0]);
1305    rsp->xv[dest] = _mm_set_epi8(val[15], val[14], val[13], val[12], val[11], val[10], val[ 9], val[ 8],
1306                                    val[ 7], val[ 6], val[ 5], val[ 4], val[ 3], val[ 2], val[ 1], val[ 0]);
11921307#endif
11931308}
11941309
11951310static void cfunc_rsp_ltv(void *param)
11961311{
1197   rsp_state *rsp = (rsp_state*)param;
1198   UINT32 op = rsp->impstate->arg0;
1199   int dest = (op >> 16) & 0x1f;
1200   int base = (op >> 21) & 0x1f;
1201   int index = (op >> 7) & 0xf;
1202   int offset = (op & 0x7f);
1312    rsp_state *rsp = (rsp_state*)param;
1313    UINT32 op = rsp->impstate->arg0;
1314    int dest = (op >> 16) & 0x1f;
1315    int base = (op >> 21) & 0x1f;
1316    int index = (op >> 7) & 0xf;
1317    int offset = (op & 0x7f);
12031318
1204   // 31       25      20      15      10     6        0
1205   // --------------------------------------------------
1206   // | 110010 | BBBBB | TTTTT | 01011 | IIII | Offset |
1207   // --------------------------------------------------
1208   //
1209   // Loads one element to maximum of 8 vectors, while incrementing element index
1319    // 31       25      20      15      10     6        0
1320    // --------------------------------------------------
1321    // | 110010 | BBBBB | TTTTT | 01011 | IIII | Offset |
1322    // --------------------------------------------------
1323    //
1324    // Loads one element to maximum of 8 vectors, while incrementing element index
12101325
1211   // FIXME: has a small problem with odd indices
1326    // FIXME: has a small problem with odd indices
12121327
1213   int vs = dest;
1214   int ve = dest + 8;
1215   if (ve > 32)
1216   {
1217      ve = 32;
1218   }
1328    int vs = dest;
1329    int ve = dest + 8;
1330    if (ve > 32)
1331    {
1332        ve = 32;
1333    }
12191334
1220   int element = 7 - (index >> 1);
1335    int element = 7 - (index >> 1);
12211336
1222   UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
1337    UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
12231338
1224   ea = ((ea + 8) & ~0xf) + (index & 1);
1225   for (int i = vs; i < ve; i++)
1226   {
1227      element = ((8 - (index >> 1) + (i - vs)) << 1);
1339    ea = ((ea + 8) & ~0xf) + (index & 1);
1340    for (int i = vs; i < ve; i++)
1341    {
1342        element = ((8 - (index >> 1) + (i - vs)) << 1);
12281343#if USE_SIMD
1229      UINT16 value = (READ8(rsp, ea) << 8) | READ8(rsp, ea + 1);
1230      SIMD_INSERT16(rsp->xv[i], value, (element >> 1));
1344        UINT16 value = (READ8(rsp, ea) << 8) | READ8(rsp, ea + 1);
1345        SIMD_INSERT16(rsp->xv[i], value, (element >> 1));
12311346#else
1232      VREG_B(i, (element & 0xf)) = READ8(rsp, ea);
1233      VREG_B(i, ((element + 1) & 0xf)) = READ8(rsp, ea + 1);
1347        VREG_B(i, (element & 0xf)) = READ8(rsp, ea);
1348        VREG_B(i, ((element + 1) & 0xf)) = READ8(rsp, ea + 1);
12341349#endif
12351350
1236      ea += 2;
1237   }
1351        ea += 2;
1352    }
12381353}
12391354
12401355static int generate_lwc2(rsp_state *rsp, drcuml_block *block, compiler_state *compiler, const opcode_desc *desc)
12411356{
1242   //int loopdest;
1243   UINT32 op = desc->opptr.l[0];
1244   //int dest = (op >> 16) & 0x1f;
1245   //int base = (op >> 21) & 0x1f;
1246   //int index = (op >> 7) & 0xf;
1247   int offset = (op & 0x7f);
1248   //int skip;
1249   if (offset & 0x40)
1250   {
1251      offset |= 0xffffffc0;
1252   }
1357    //int loopdest;
1358    UINT32 op = desc->opptr.l[0];
1359    //int dest = (op >> 16) & 0x1f;
1360    //int base = (op >> 21) & 0x1f;
1361    //int index = (op >> 7) & 0xf;
1362    int offset = (op & 0x7f);
1363    //int skip;
1364    if (offset & 0x40)
1365    {
1366        offset |= 0xffffffc0;
1367    }
12531368
1254   switch ((op >> 11) & 0x1f)
1255   {
1256      case 0x00:      /* LBV */
1257         //UML_ADD(block, I0, R32(RSREG), offset);
1258         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1259         UML_CALLC(block, cfunc_rsp_lbv, rsp);
1260         return TRUE;
1261      case 0x01:      /* LSV */
1262         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1263         UML_CALLC(block, cfunc_rsp_lsv, rsp);
1264         return TRUE;
1265      case 0x02:      /* LLV */
1266         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1267         UML_CALLC(block, cfunc_rsp_llv, rsp);
1268         return TRUE;
1269      case 0x03:      /* LDV */
1270         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1271         UML_CALLC(block, cfunc_rsp_ldv, rsp);
1272         return TRUE;
1273      case 0x04:      /* LQV */
1274         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1275         UML_CALLC(block, cfunc_rsp_lqv, rsp);
1276         return TRUE;
1277      case 0x05:      /* LRV */
1278         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1279         UML_CALLC(block, cfunc_rsp_lrv, rsp);
1280         return TRUE;
1281      case 0x06:      /* LPV */
1282         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1283         UML_CALLC(block, cfunc_rsp_lpv, rsp);
1284         return TRUE;
1285      case 0x07:      /* LUV */
1286         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1287         UML_CALLC(block, cfunc_rsp_luv, rsp);
1288         return TRUE;
1289      case 0x08:      /* LHV */
1290         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1291         UML_CALLC(block, cfunc_rsp_lhv, rsp);
1292         return TRUE;
1293      case 0x09:      /* LFV */
1294         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1295         UML_CALLC(block, cfunc_rsp_lfv, rsp);
1296         return TRUE;
1297      case 0x0a:      /* LWV */
1298         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1299         UML_CALLC(block, cfunc_rsp_lwv, rsp);
1300         return TRUE;
1301      case 0x0b:      /* LTV */
1302         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1303         UML_CALLC(block, cfunc_rsp_ltv, rsp);
1304         return TRUE;
1369    switch ((op >> 11) & 0x1f)
1370    {
1371        case 0x00:      /* LBV */
1372            //UML_ADD(block, I0, R32(RSREG), offset);
1373            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1374            UML_CALLC(block, cfunc_rsp_lbv, rsp);
1375            return TRUE;
1376        case 0x01:      /* LSV */
1377            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1378            UML_CALLC(block, cfunc_rsp_lsv, rsp);
1379            return TRUE;
1380        case 0x02:      /* LLV */
1381            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1382            UML_CALLC(block, cfunc_rsp_llv, rsp);
1383            return TRUE;
1384        case 0x03:      /* LDV */
1385            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1386            UML_CALLC(block, cfunc_rsp_ldv, rsp);
1387            return TRUE;
1388        case 0x04:      /* LQV */
1389            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1390            UML_CALLC(block, cfunc_rsp_lqv, rsp);
1391            return TRUE;
1392        case 0x05:      /* LRV */
1393            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1394            UML_CALLC(block, cfunc_rsp_lrv, rsp);
1395            return TRUE;
1396        case 0x06:      /* LPV */
1397            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1398            UML_CALLC(block, cfunc_rsp_lpv, rsp);
1399            return TRUE;
1400        case 0x07:      /* LUV */
1401            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1402            UML_CALLC(block, cfunc_rsp_luv, rsp);
1403            return TRUE;
1404        case 0x08:      /* LHV */
1405            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1406            UML_CALLC(block, cfunc_rsp_lhv, rsp);
1407            return TRUE;
1408        case 0x09:      /* LFV */
1409            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1410            UML_CALLC(block, cfunc_rsp_lfv, rsp);
1411            return TRUE;
1412        case 0x0a:      /* LWV */
1413            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1414            UML_CALLC(block, cfunc_rsp_lwv, rsp);
1415            return TRUE;
1416        case 0x0b:      /* LTV */
1417            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1418            UML_CALLC(block, cfunc_rsp_ltv, rsp);
1419            return TRUE;
13051420
1306      default:
1307         return FALSE;
1308   }
1421        default:
1422            return FALSE;
1423    }
13091424}
13101425
13111426static void cfunc_rsp_sbv(void *param)
13121427{
1313   rsp_state *rsp = (rsp_state*)param;
1314   UINT32 op = rsp->impstate->arg0;
1315   int dest = (op >> 16) & 0x1f;
1316   int base = (op >> 21) & 0x1f;
1317   int index = (op >> 7) & 0xf;
1318   int offset = (op & 0x7f);
1319   if (offset & 0x40)
1320   {
1321      offset |= 0xffffffc0;
1322   }
1428    rsp_state *rsp = (rsp_state*)param;
1429    UINT32 op = rsp->impstate->arg0;
1430    int dest = (op >> 16) & 0x1f;
1431    int base = (op >> 21) & 0x1f;
1432    int index = (op >> 7) & 0xf;
1433    int offset = (op & 0x7f);
1434    if (offset & 0x40)
1435    {
1436        offset |= 0xffffffc0;
1437    }
13231438
1324   // 31       25      20      15      10     6        0
1325   // --------------------------------------------------
1326   // | 111010 | BBBBB | TTTTT | 00000 | IIII | Offset |
1327   // --------------------------------------------------
1328   //
1329   // Stores 1 byte from vector byte index
1439    // 31       25      20      15      10     6        0
1440    // --------------------------------------------------
1441    // | 111010 | BBBBB | TTTTT | 00000 | IIII | Offset |
1442    // --------------------------------------------------
1443    //
1444    // Stores 1 byte from vector byte index
13301445
1331   UINT32 ea = (base) ? rsp->r[base] + offset : offset;
1446    UINT32 ea = (base) ? rsp->r[base] + offset : offset;
13321447#if USE_SIMD
1333   UINT16 value;
1334   SIMD_EXTRACT16(rsp->xv[dest], value, (index >> 1));
1335   value >>= (1-(index & 1)) * 8;
1336   WRITE8(rsp, ea, (UINT8)value);
1448    UINT16 value;
1449    SIMD_EXTRACT16(rsp->xv[dest], value, (index >> 1));
1450    value >>= (1-(index & 1)) * 8;
1451    WRITE8(rsp, ea, (UINT8)value);
13371452#else
1338   WRITE8(rsp, ea, VREG_B(dest, index));
1453    WRITE8(rsp, ea, VREG_B(dest, index));
13391454#endif
13401455}
13411456
13421457static void cfunc_rsp_ssv(void *param)
13431458{
1344   rsp_state *rsp = (rsp_state*)param;
1345   UINT32 op = rsp->impstate->arg0;
1346   int dest = (op >> 16) & 0x1f;
1347   int base = (op >> 21) & 0x1f;
1348   int index = (op >> 7) & 0xf;
1349   int offset = (op & 0x7f);
1350   if (offset & 0x40)
1351   {
1352      offset |= 0xffffffc0;
1353   }
1459    rsp_state *rsp = (rsp_state*)param;
1460    UINT32 op = rsp->impstate->arg0;
1461    int dest = (op >> 16) & 0x1f;
1462    int base = (op >> 21) & 0x1f;
1463    int index = (op >> 7) & 0xf;
1464    int offset = (op & 0x7f);
1465    if (offset & 0x40)
1466    {
1467        offset |= 0xffffffc0;
1468    }
13541469
1355   // 31       25      20      15      10     6        0
1356   // --------------------------------------------------
1357   // | 111010 | BBBBB | TTTTT | 00001 | IIII | Offset |
1358   // --------------------------------------------------
1359   //
1360   // Stores 2 bytes starting from vector byte index
1470    // 31       25      20      15      10     6        0
1471    // --------------------------------------------------
1472    // | 111010 | BBBBB | TTTTT | 00001 | IIII | Offset |
1473    // --------------------------------------------------
1474    //
1475    // Stores 2 bytes starting from vector byte index
13611476
1362   UINT32 ea = (base) ? rsp->r[base] + (offset * 2) : (offset * 2);
1477    UINT32 ea = (base) ? rsp->r[base] + (offset * 2) : (offset * 2);
13631478
13641479#if USE_SIMD
1365   UINT16 value;
1366   SIMD_EXTRACT16(rsp->xv[dest], value, (index >> 1));
1367   WRITE8(rsp, ea, (UINT8)(value >> 8));
1368   WRITE8(rsp, ea+1, (UINT8)(value & 0x00ff));
1480    UINT16 value;
1481    SIMD_EXTRACT16(rsp->xv[dest], value, (index >> 1));
1482    WRITE8(rsp, ea, (UINT8)(value >> 8));
1483    WRITE8(rsp, ea+1, (UINT8)(value & 0x00ff));
13691484#else
1370   int end = index + 2;
1371   for (int i = index; i < end; i++)
1372   {
1373      WRITE8(rsp, ea, VREG_B(dest, i));
1374      ea++;
1375   }
1485    int end = index + 2;
1486    for (int i = index; i < end; i++)
1487    {
1488        WRITE8(rsp, ea, VREG_B(dest, i));
1489        ea++;
1490    }
13761491#endif
13771492}
13781493
13791494static void cfunc_rsp_slv(void *param)
13801495{
1381   rsp_state *rsp = (rsp_state*)param;
1382   UINT32 op = rsp->impstate->arg0;
1383   int dest = (op >> 16) & 0x1f;
1384   int base = (op >> 21) & 0x1f;
1385   int index = (op >> 7) & 0xf;
1386   int offset = (op & 0x7f);
1387   if (offset & 0x40)
1388   {
1389      offset |= 0xffffffc0;
1390   }
1391   // 31       25      20      15      10     6        0
1392   // --------------------------------------------------
1393   // | 111010 | BBBBB | TTTTT | 00010 | IIII | Offset |
1394   // --------------------------------------------------
1395   //
1396   // Stores 4 bytes starting from vector byte index
1496    rsp_state *rsp = (rsp_state*)param;
1497    UINT32 op = rsp->impstate->arg0;
1498    int dest = (op >> 16) & 0x1f;
1499    int base = (op >> 21) & 0x1f;
1500    int index = (op >> 7) & 0xf;
1501    int offset = (op & 0x7f);
1502    if (offset & 0x40)
1503    {
1504        offset |= 0xffffffc0;
1505    }
1506    // 31       25      20      15      10     6        0
1507    // --------------------------------------------------
1508    // | 111010 | BBBBB | TTTTT | 00010 | IIII | Offset |
1509    // --------------------------------------------------
1510    //
1511    // Stores 4 bytes starting from vector byte index
13971512
1398   UINT32 ea = (base) ? rsp->r[base] + (offset * 4) : (offset * 4);
1513    UINT32 ea = (base) ? rsp->r[base] + (offset * 4) : (offset * 4);
13991514
14001515#if USE_SIMD
1401   UINT16 value0, value1;
1402   index >>= 1;
1403   SIMD_EXTRACT16(rsp->xv[dest], value0, index);
1404   SIMD_EXTRACT16(rsp->xv[dest], value1, index+1);
1405   WRITE8(rsp, ea, (UINT8)(value0 >> 8));
1406   WRITE8(rsp, ea+1, (UINT8)(value0 & 0x00ff));
1407   WRITE8(rsp, ea+2, (UINT8)(value1 >> 8));
1408   WRITE8(rsp, ea+3, (UINT8)(value1 & 0x00ff));
1516    UINT16 value0, value1;
1517    index >>= 1;
1518    SIMD_EXTRACT16(rsp->xv[dest], value0, index);
1519    SIMD_EXTRACT16(rsp->xv[dest], value1, index+1);
1520    WRITE8(rsp, ea, (UINT8)(value0 >> 8));
1521    WRITE8(rsp, ea+1, (UINT8)(value0 & 0x00ff));
1522    WRITE8(rsp, ea+2, (UINT8)(value1 >> 8));
1523    WRITE8(rsp, ea+3, (UINT8)(value1 & 0x00ff));
14091524#else
1410   int end = index + 4;
1411   for (int i = index; i < end; i++)
1412   {
1413      WRITE8(rsp, ea, VREG_B(dest, i));
1414      ea++;
1415   }
1525    int end = index + 4;
1526    for (int i = index; i < end; i++)
1527    {
1528        WRITE8(rsp, ea, VREG_B(dest, i));
1529        ea++;
1530    }
14161531#endif
14171532}
14181533
14191534static void cfunc_rsp_sdv(void *param)
14201535{
1421   rsp_state *rsp = (rsp_state*)param;
1422   UINT32 op = rsp->impstate->arg0;
1423   int dest = (op >> 16) & 0x1f;
1424   int base = (op >> 21) & 0x1f;
1425   int index = (op >> 7) & 0x8;
1426   int offset = (op & 0x7f);
1427   if (offset & 0x40)
1428   {
1429      offset |= 0xffffffc0;
1430   }
1431   // 31       25      20      15      10     6        0
1432   // --------------------------------------------------
1433   // | 111010 | BBBBB | TTTTT | 00011 | IIII | Offset |
1434   // --------------------------------------------------
1435   //
1436   // Stores 8 bytes starting from vector byte index
1437   UINT32 ea = (base) ? rsp->r[base] + (offset * 8) : (offset * 8);
1536    rsp_state *rsp = (rsp_state*)param;
1537    UINT32 op = rsp->impstate->arg0;
1538    int dest = (op >> 16) & 0x1f;
1539    int base = (op >> 21) & 0x1f;
1540    int index = (op >> 7) & 0x8;
1541    int offset = (op & 0x7f);
1542    if (offset & 0x40)
1543    {
1544        offset |= 0xffffffc0;
1545    }
1546    // 31       25      20      15      10     6        0
1547    // --------------------------------------------------
1548    // | 111010 | BBBBB | TTTTT | 00011 | IIII | Offset |
1549    // --------------------------------------------------
1550    //
1551    // Stores 8 bytes starting from vector byte index
1552    UINT32 ea = (base) ? rsp->r[base] + (offset * 8) : (offset * 8);
14381553
14391554#if USE_SIMD
1440   UINT16 value0, value1, value2, value3;
1441   index >>= 1;
1442   SIMD_EXTRACT16(rsp->xv[dest], value0, index);
1443   SIMD_EXTRACT16(rsp->xv[dest], value1, index+1);
1444   SIMD_EXTRACT16(rsp->xv[dest], value2, index+2);
1445   SIMD_EXTRACT16(rsp->xv[dest], value3, index+3);
1446   WRITE8(rsp, ea, (UINT8)(value0 >> 8));
1447   WRITE8(rsp, ea+1, (UINT8)(value0 & 0x00ff));
1448   WRITE8(rsp, ea+2, (UINT8)(value1 >> 8));
1449   WRITE8(rsp, ea+3, (UINT8)(value1 & 0x00ff));
1450   WRITE8(rsp, ea+4, (UINT8)(value2 >> 8));
1451   WRITE8(rsp, ea+5, (UINT8)(value2 & 0x00ff));
1452   WRITE8(rsp, ea+6, (UINT8)(value3 >> 8));
1453   WRITE8(rsp, ea+7, (UINT8)(value3 & 0x00ff));
1555    UINT16 value0, value1, value2, value3;
1556    index >>= 1;
1557    SIMD_EXTRACT16(rsp->xv[dest], value0, index);
1558    SIMD_EXTRACT16(rsp->xv[dest], value1, index+1);
1559    SIMD_EXTRACT16(rsp->xv[dest], value2, index+2);
1560    SIMD_EXTRACT16(rsp->xv[dest], value3, index+3);
1561    WRITE8(rsp, ea, (UINT8)(value0 >> 8));
1562    WRITE8(rsp, ea+1, (UINT8)(value0 & 0x00ff));
1563    WRITE8(rsp, ea+2, (UINT8)(value1 >> 8));
1564    WRITE8(rsp, ea+3, (UINT8)(value1 & 0x00ff));
1565    WRITE8(rsp, ea+4, (UINT8)(value2 >> 8));
1566    WRITE8(rsp, ea+5, (UINT8)(value2 & 0x00ff));
1567    WRITE8(rsp, ea+6, (UINT8)(value3 >> 8));
1568    WRITE8(rsp, ea+7, (UINT8)(value3 & 0x00ff));
14541569#else
1455   int end = index + 8;
1456   for (int i = index; i < end; i++)
1457   {
1458      WRITE8(rsp, ea, VREG_B(dest, i));
1459      ea++;
1460   }
1570    int end = index + 8;
1571    for (int i = index; i < end; i++)
1572    {
1573        WRITE8(rsp, ea, VREG_B(dest, i));
1574        ea++;
1575    }
14611576#endif
14621577}
14631578
14641579static void cfunc_rsp_sqv(void *param)
14651580{
1466   rsp_state *rsp = (rsp_state*)param;
1467   UINT32 op = rsp->impstate->arg0;
1468   int dest = (op >> 16) & 0x1f;
1469   int base = (op >> 21) & 0x1f;
1470   int index = (op >> 7) & 0xf;
1471   int offset = (op & 0x7f);
1472   if (offset & 0x40)
1473   {
1474      offset |= 0xffffffc0;
1475   }
1476   // 31       25      20      15      10     6        0
1477   // --------------------------------------------------
1478   // | 111010 | BBBBB | TTTTT | 00100 | IIII | Offset |
1479   // --------------------------------------------------
1480   //
1481   // Stores up to 16 bytes starting from vector byte index until 16-byte boundary
1581    rsp_state *rsp = (rsp_state*)param;
1582    UINT32 op = rsp->impstate->arg0;
1583    int dest = (op >> 16) & 0x1f;
1584    int base = (op >> 21) & 0x1f;
1585    int index = (op >> 7) & 0xf;
1586    int offset = (op & 0x7f);
1587    if (offset & 0x40)
1588    {
1589        offset |= 0xffffffc0;
1590    }
1591    // 31       25      20      15      10     6        0
1592    // --------------------------------------------------
1593    // | 111010 | BBBBB | TTTTT | 00100 | IIII | Offset |
1594    // --------------------------------------------------
1595    //
1596    // Stores up to 16 bytes starting from vector byte index until 16-byte boundary
14821597
1483   UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
1484   int end = index + (16 - (ea & 0xf));
1485   for (int i=index; i < end; i++)
1486   {
1598    UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
1599    int end = index + (16 - (ea & 0xf));
1600    for (int i=index; i < end; i++)
1601    {
14871602#if USE_SIMD
1488      UINT16 value;
1489      SIMD_EXTRACT16(rsp->xv[dest], value, (i >> 1));
1490      value >>= (1-(i & 1)) * 8;
1491      WRITE8(rsp, ea, (UINT8)value);
1603        UINT16 value;
1604        SIMD_EXTRACT16(rsp->xv[dest], value, (i >> 1));
1605        value >>= (1-(i & 1)) * 8;
1606        WRITE8(rsp, ea, (UINT8)value);
14921607#else
1493      WRITE8(rsp, ea, VREG_B(dest, i & 0xf));
1608        WRITE8(rsp, ea, VREG_B(dest, i & 0xf));
14941609#endif
1495      ea++;
1496   }
1610        ea++;
1611    }
14971612}
14981613
14991614static void cfunc_rsp_srv(void *param)
15001615{
1501   rsp_state *rsp = (rsp_state*)param;
1502   UINT32 op = rsp->impstate->arg0;
1503   int dest = (op >> 16) & 0x1f;
1504   int base = (op >> 21) & 0x1f;
1505   int index = (op >> 7) & 0xf;
1506   int offset = (op & 0x7f);
1507   if (offset & 0x40)
1508   {
1509      offset |= 0xffffffc0;
1510   }
1511   // 31       25      20      15      10     6        0
1512   // --------------------------------------------------
1513   // | 111010 | BBBBB | TTTTT | 00101 | IIII | Offset |
1514   // --------------------------------------------------
1515   //
1516   // Stores up to 16 bytes starting from right side until 16-byte boundary
1616    rsp_state *rsp = (rsp_state*)param;
1617    UINT32 op = rsp->impstate->arg0;
1618    int dest = (op >> 16) & 0x1f;
1619    int base = (op >> 21) & 0x1f;
1620    int index = (op >> 7) & 0xf;
1621    int offset = (op & 0x7f);
1622    if (offset & 0x40)
1623    {
1624        offset |= 0xffffffc0;
1625    }
1626    // 31       25      20      15      10     6        0
1627    // --------------------------------------------------
1628    // | 111010 | BBBBB | TTTTT | 00101 | IIII | Offset |
1629    // --------------------------------------------------
1630    //
1631    // Stores up to 16 bytes starting from right side until 16-byte boundary
15171632
1518   UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
1633    UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
15191634
1520   int end = index + (ea & 0xf);
1521   int o = (16 - (ea & 0xf)) & 0xf;
1522   ea &= ~0xf;
1635    int end = index + (ea & 0xf);
1636    int o = (16 - (ea & 0xf)) & 0xf;
1637    ea &= ~0xf;
15231638
1524   for (int i = index; i < end; i++)
1525   {
1639    for (int i = index; i < end; i++)
1640    {
15261641#if USE_SIMD
1527      UINT32 bi = (i + o) & 0xf;
1528      UINT16 value;
1529      SIMD_EXTRACT16(rsp->xv[dest], value, (bi >> 1));
1530      value >>= (1-(bi & 1)) * 8;
1531      WRITE8(rsp, ea, (UINT8)value);
1642        UINT32 bi = (i + o) & 0xf;
1643        UINT16 value;
1644        SIMD_EXTRACT16(rsp->xv[dest], value, (bi >> 1));
1645        value >>= (1-(bi & 1)) * 8;
1646        WRITE8(rsp, ea, (UINT8)value);
15321647#else
1533      WRITE8(rsp, ea, VREG_B(dest, ((i + o) & 0xf)));
1648        WRITE8(rsp, ea, VREG_B(dest, ((i + o) & 0xf)));
15341649#endif
1535      ea++;
1536   }
1650        ea++;
1651    }
15371652}
15381653
15391654static void cfunc_rsp_spv(void *param)
15401655{
1541   rsp_state *rsp = (rsp_state*)param;
1542   UINT32 op = rsp->impstate->arg0;
1543   int dest = (op >> 16) & 0x1f;
1544   int base = (op >> 21) & 0x1f;
1545   int index = (op >> 7) & 0xf;
1546   int offset = (op & 0x7f);
1547   if (offset & 0x40)
1548   {
1549      offset |= 0xffffffc0;
1550   }
1551   // 31       25      20      15      10     6        0
1552   // --------------------------------------------------
1553   // | 111010 | BBBBB | TTTTT | 00110 | IIII | Offset |
1554   // --------------------------------------------------
1555   //
1556   // Stores upper 8 bits of each element
1656    rsp_state *rsp = (rsp_state*)param;
1657    UINT32 op = rsp->impstate->arg0;
1658    int dest = (op >> 16) & 0x1f;
1659    int base = (op >> 21) & 0x1f;
1660    int index = (op >> 7) & 0xf;
1661    int offset = (op & 0x7f);
1662    if (offset & 0x40)
1663    {
1664        offset |= 0xffffffc0;
1665    }
1666    // 31       25      20      15      10     6        0
1667    // --------------------------------------------------
1668    // | 111010 | BBBBB | TTTTT | 00110 | IIII | Offset |
1669    // --------------------------------------------------
1670    //
1671    // Stores upper 8 bits of each element
15571672
1558   UINT32 ea = (base) ? rsp->r[base] + (offset * 8) : (offset * 8);
1559   int end = index + 8;
1560   for (int i=index; i < end; i++)
1561   {
1562      if ((i & 0xf) < 8)
1563      {
1673    UINT32 ea = (base) ? rsp->r[base] + (offset * 8) : (offset * 8);
1674    int end = index + 8;
1675    for (int i=index; i < end; i++)
1676    {
1677        if ((i & 0xf) < 8)
1678        {
15641679#if USE_SIMD
1565         UINT16 value;
1566         SIMD_EXTRACT16(rsp->xv[dest], value, i);
1567         WRITE8(rsp, ea, (UINT8)(value >> 8));
1680            UINT16 value;
1681            SIMD_EXTRACT16(rsp->xv[dest], value, i);
1682            WRITE8(rsp, ea, (UINT8)(value >> 8));
15681683#else
1569         WRITE8(rsp, ea, VREG_B(dest, (i & 0xf) << 1));
1684            WRITE8(rsp, ea, VREG_B(dest, (i & 0xf) << 1));
15701685#endif
1571      }
1572      else
1573      {
1686        }
1687        else
1688        {
15741689#if USE_SIMD
1575         UINT16 value;
1576         SIMD_EXTRACT16(rsp->xv[dest], value, i);
1577         value >>= 7;
1578         WRITE8(rsp, ea, (UINT8)value);
1690            UINT16 value;
1691            SIMD_EXTRACT16(rsp->xv[dest], value, i);
1692            value >>= 7;
1693            WRITE8(rsp, ea, (UINT8)value);
15791694#else
1580         WRITE8(rsp, ea, VREG_S(dest, (i & 0x7)) >> 7);
1695            WRITE8(rsp, ea, VREG_S(dest, (i & 0x7)) >> 7);
15811696#endif
1582      }
1583      ea++;
1584   }
1697        }
1698        ea++;
1699    }
15851700}
15861701
15871702static void cfunc_rsp_suv(void *param)
15881703{
1589   rsp_state *rsp = (rsp_state*)param;
1590   UINT32 op = rsp->impstate->arg0;
1591   int dest = (op >> 16) & 0x1f;
1592   int base = (op >> 21) & 0x1f;
1593   int index = (op >> 7) & 0xf;
1594   int offset = (op & 0x7f);
1595   if (offset & 0x40)
1596   {
1597      offset |= 0xffffffc0;
1598   }
1599   // 31       25      20      15      10     6        0
1600   // --------------------------------------------------
1601   // | 111010 | BBBBB | TTTTT | 00111 | IIII | Offset |
1602   // --------------------------------------------------
1603   //
1604   // Stores bits 14-7 of each element
1704    rsp_state *rsp = (rsp_state*)param;
1705    UINT32 op = rsp->impstate->arg0;
1706    int dest = (op >> 16) & 0x1f;
1707    int base = (op >> 21) & 0x1f;
1708    int index = (op >> 7) & 0xf;
1709    int offset = (op & 0x7f);
1710    if (offset & 0x40)
1711    {
1712        offset |= 0xffffffc0;
1713    }
1714    // 31       25      20      15      10     6        0
1715    // --------------------------------------------------
1716    // | 111010 | BBBBB | TTTTT | 00111 | IIII | Offset |
1717    // --------------------------------------------------
1718    //
1719    // Stores bits 14-7 of each element
16051720
1606   UINT32 ea = (base) ? rsp->r[base] + (offset * 8) : (offset * 8);
1607   int end = index + 8;
1608   for (int i=index; i < end; i++)
1609   {
1610      if ((i & 0xf) < 8)
1611      {
1721    UINT32 ea = (base) ? rsp->r[base] + (offset * 8) : (offset * 8);
1722    int end = index + 8;
1723    for (int i=index; i < end; i++)
1724    {
1725        if ((i & 0xf) < 8)
1726        {
16121727#if USE_SIMD
1613         UINT16 value;
1614         SIMD_EXTRACT16(rsp->xv[dest], value, i);
1615         value >>= 7;
1616         WRITE8(rsp, ea, (UINT8)value);
1728            UINT16 value;
1729            SIMD_EXTRACT16(rsp->xv[dest], value, i);
1730            value >>= 7;
1731            WRITE8(rsp, ea, (UINT8)value);
16171732#else
1618         WRITE8(rsp, ea, VREG_S(dest, (i & 0x7)) >> 7);
1733            WRITE8(rsp, ea, VREG_S(dest, (i & 0x7)) >> 7);
16191734#endif
1620      }
1621      else
1622      {
1735        }
1736        else
1737        {
16231738#if USE_SIMD
1624         UINT16 value;
1625         SIMD_EXTRACT16(rsp->xv[dest], value, i);
1626         WRITE8(rsp, ea, (UINT8)value >> 8);
1739            UINT16 value;
1740            SIMD_EXTRACT16(rsp->xv[dest], value, i);
1741            WRITE8(rsp, ea, (UINT8)value >> 8);
16271742#else
1628         WRITE8(rsp, ea, VREG_B(dest, ((i & 0x7) << 1)));
1743            WRITE8(rsp, ea, VREG_B(dest, ((i & 0x7) << 1)));
16291744#endif
1630      }
1631      ea++;
1632   }
1745        }
1746        ea++;
1747    }
16331748}
16341749
16351750static void cfunc_rsp_shv(void *param)
16361751{
1637   rsp_state *rsp = (rsp_state*)param;
1638   UINT32 op = rsp->impstate->arg0;
1639   int dest = (op >> 16) & 0x1f;
1640   int base = (op >> 21) & 0x1f;
1641   int index = (op >> 7) & 0xf;
1642   int offset = (op & 0x7f);
1643   if (offset & 0x40)
1644   {
1645      offset |= 0xffffffc0;
1646   }
1647   // 31       25      20      15      10     6        0
1648   // --------------------------------------------------
1649   // | 111010 | BBBBB | TTTTT | 01000 | IIII | Offset |
1650   // --------------------------------------------------
1651   //
1652   // Stores bits 14-7 of each element, with 2-byte stride
1752    rsp_state *rsp = (rsp_state*)param;
1753    UINT32 op = rsp->impstate->arg0;
1754    int dest = (op >> 16) & 0x1f;
1755    int base = (op >> 21) & 0x1f;
1756    int index = (op >> 7) & 0xf;
1757    int offset = (op & 0x7f);
1758    if (offset & 0x40)
1759    {
1760        offset |= 0xffffffc0;
1761    }
1762    // 31       25      20      15      10     6        0
1763    // --------------------------------------------------
1764    // | 111010 | BBBBB | TTTTT | 01000 | IIII | Offset |
1765    // --------------------------------------------------
1766    //
1767    // Stores bits 14-7 of each element, with 2-byte stride
16531768
1654   UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
1655   for (int i=0; i < 8; i++)
1656   {
1657      int element = index + (i << 1);
1769    UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
1770    for (int i=0; i < 8; i++)
1771    {
1772        int element = index + (i << 1);
16581773#if USE_SIMD
1659      UINT16 value;
1660      SIMD_EXTRACT16(rsp->xv[dest], value, element >> 1);
1661      WRITE8(rsp, ea, (value >> 7) & 0x00ff);
1774        UINT16 value;
1775        SIMD_EXTRACT16(rsp->xv[dest], value, element >> 1);
1776        WRITE8(rsp, ea, (value >> 7) & 0x00ff);
16621777#else
1663      UINT8 d = (VREG_B(dest, (element & 0xf)) << 1) |
1664               (VREG_B(dest, ((element + 1) & 0xf)) >> 7);
1665      WRITE8(rsp, ea, d);
1778        UINT8 d = (VREG_B(dest, (element & 0xf)) << 1) |
1779                    (VREG_B(dest, ((element + 1) & 0xf)) >> 7);
1780        WRITE8(rsp, ea, d);
16661781#endif
1667      ea += 2;
1668   }
1782        ea += 2;
1783    }
16691784}
16701785
16711786static void cfunc_rsp_sfv(void *param)
16721787{
1673   rsp_state *rsp = (rsp_state*)param;
1674   UINT32 op = rsp->impstate->arg0;
1675   int dest = (op >> 16) & 0x1f;
1676   int base = (op >> 21) & 0x1f;
1677   int index = (op >> 7) & 0xf;
1678   int offset = (op & 0x7f);
1679   if (offset & 0x40)
1680   {
1681      offset |= 0xffffffc0;
1682   }
1683   // 31       25      20      15      10     6        0
1684   // --------------------------------------------------
1685   // | 111010 | BBBBB | TTTTT | 01001 | IIII | Offset |
1686   // --------------------------------------------------
1687   //
1688   // Stores bits 14-7 of upper or lower quad, with 4-byte stride
1788    rsp_state *rsp = (rsp_state*)param;
1789    UINT32 op = rsp->impstate->arg0;
1790    int dest = (op >> 16) & 0x1f;
1791    int base = (op >> 21) & 0x1f;
1792    int index = (op >> 7) & 0xf;
1793    int offset = (op & 0x7f);
1794    if (offset & 0x40)
1795    {
1796        offset |= 0xffffffc0;
1797    }
1798    // 31       25      20      15      10     6        0
1799    // --------------------------------------------------
1800    // | 111010 | BBBBB | TTTTT | 01001 | IIII | Offset |
1801    // --------------------------------------------------
1802    //
1803    // Stores bits 14-7 of upper or lower quad, with 4-byte stride
16891804
1690   if (index & 0x7)    printf("RSP: SFV: index = %d at %08X\n", index, rsp->ppc);
1805    if (index & 0x7)    printf("RSP: SFV: index = %d at %08X\n", index, rsp->ppc);
16911806
1692   UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
1693   int eaoffset = ea & 0xf;
1694   ea &= ~0xf;
1807    UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
1808    int eaoffset = ea & 0xf;
1809    ea &= ~0xf;
16951810
1696   int end = (index >> 1) + 4;
1811    int end = (index >> 1) + 4;
16971812
1698   for (int i = index>>1; i < end; i++)
1699   {
1813    for (int i = index>>1; i < end; i++)
1814    {
17001815#if USE_SIMD
1701      UINT16 value;
1702      SIMD_EXTRACT16(rsp->xv[dest], value, i);
1703      WRITE8(rsp, ea + (eaoffset & 0xf), (value >> 7) & 0x00ff);
1816        UINT16 value;
1817        SIMD_EXTRACT16(rsp->xv[dest], value, i);
1818        WRITE8(rsp, ea + (eaoffset & 0xf), (value >> 7) & 0x00ff);
17041819#else
1705      WRITE8(rsp, ea + (eaoffset & 0xf), VREG_S(dest, i) >> 7);
1820        WRITE8(rsp, ea + (eaoffset & 0xf), VREG_S(dest, i) >> 7);
17061821#endif
1707      eaoffset += 4;
1708   }
1822        eaoffset += 4;
1823    }
17091824}
17101825
17111826static void cfunc_rsp_swv(void *param)
17121827{
1713   rsp_state *rsp = (rsp_state*)param;
1714   UINT32 op = rsp->impstate->arg0;
1715   int dest = (op >> 16) & 0x1f;
1716   int base = (op >> 21) & 0x1f;
1717   int index = (op >> 7) & 0xf;
1718   int offset = (op & 0x7f);
1719   if (offset & 0x40)
1720   {
1721      offset |= 0xffffffc0;
1722   }
1723   // 31       25      20      15      10     6        0
1724   // --------------------------------------------------
1725   // | 111010 | BBBBB | TTTTT | 01010 | IIII | Offset |
1726   // --------------------------------------------------
1727   //
1728   // Stores the full 128-bit vector starting from vector byte index and wrapping to index 0
1729   // after byte index 15
1828    rsp_state *rsp = (rsp_state*)param;
1829    UINT32 op = rsp->impstate->arg0;
1830    int dest = (op >> 16) & 0x1f;
1831    int base = (op >> 21) & 0x1f;
1832    int index = (op >> 7) & 0xf;
1833    int offset = (op & 0x7f);
1834    if (offset & 0x40)
1835    {
1836        offset |= 0xffffffc0;
1837    }
1838    // 31       25      20      15      10     6        0
1839    // --------------------------------------------------
1840    // | 111010 | BBBBB | TTTTT | 01010 | IIII | Offset |
1841    // --------------------------------------------------
1842    //
1843    // Stores the full 128-bit vector starting from vector byte index and wrapping to index 0
1844    // after byte index 15
17301845
1731   UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
1732   int eaoffset = ea & 0xf;
1733   ea &= ~0xf;
1846    UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
1847    int eaoffset = ea & 0xf;
1848    ea &= ~0xf;
17341849
1735   int end = index + 16;
1736   for (int i = index; i < end; i++)
1737   {
1850    int end = index + 16;
1851    for (int i = index; i < end; i++)
1852    {
17381853#if USE_SIMD
1739      UINT16 value;
1740      SIMD_EXTRACT16(rsp->xv[dest], value, i >> 1);
1741      WRITE8(rsp, ea + (eaoffset & 0xf), (value >> ((1-(i & 1)) * 8)) & 0xff);
1854        UINT16 value;
1855        SIMD_EXTRACT16(rsp->xv[dest], value, i >> 1);
1856        WRITE8(rsp, ea + (eaoffset & 0xf), (value >> ((1-(i & 1)) * 8)) & 0xff);
17421857#else
1743      WRITE8(rsp, ea + (eaoffset & 0xf), VREG_B(dest, i & 0xf));
1858        WRITE8(rsp, ea + (eaoffset & 0xf), VREG_B(dest, i & 0xf));
17441859#endif
1745      eaoffset++;
1746   }
1860        eaoffset++;
1861    }
17471862}
17481863
17491864static void cfunc_rsp_stv(void *param)
17501865{
1751   rsp_state *rsp = (rsp_state*)param;
1752   UINT32 op = rsp->impstate->arg0;
1753   int dest = (op >> 16) & 0x1f;
1754   int base = (op >> 21) & 0x1f;
1755   int index = (op >> 7) & 0xf;
1756   int offset = (op & 0x7f);
1866    rsp_state *rsp = (rsp_state*)param;
1867    UINT32 op = rsp->impstate->arg0;
1868    int dest = (op >> 16) & 0x1f;
1869    int base = (op >> 21) & 0x1f;
1870    int index = (op >> 7) & 0xf;
1871    int offset = (op & 0x7f);
17571872
1758   if (offset & 0x40)
1759   {
1760      offset |= 0xffffffc0;
1761   }
1762   // 31       25      20      15      10     6        0
1763   // --------------------------------------------------
1764   // | 111010 | BBBBB | TTTTT | 01011 | IIII | Offset |
1765   // --------------------------------------------------
1766   //
1767   // Stores one element from maximum of 8 vectors, while incrementing element index
1873    if (offset & 0x40)
1874    {
1875        offset |= 0xffffffc0;
1876    }
1877    // 31       25      20      15      10     6        0
1878    // --------------------------------------------------
1879    // | 111010 | BBBBB | TTTTT | 01011 | IIII | Offset |
1880    // --------------------------------------------------
1881    //
1882    // Stores one element from maximum of 8 vectors, while incrementing element index
17681883
1769   int vs = dest;
1770   int ve = dest + 8;
1771   if (ve > 32)
1772   {
1773      ve = 32;
1774   }
1884    int vs = dest;
1885    int ve = dest + 8;
1886    if (ve > 32)
1887    {
1888        ve = 32;
1889    }
17751890
1776   int element = 8 - (index >> 1);
1891    int element = 8 - (index >> 1);
17771892
1778   UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
1779   int eaoffset = (ea & 0xf) + (element * 2);
1780   ea &= ~0xf;
1893    UINT32 ea = (base) ? rsp->r[base] + (offset * 16) : (offset * 16);
1894    int eaoffset = (ea & 0xf) + (element * 2);
1895    ea &= ~0xf;
17811896
1782   for (int i = vs; i < ve; i++)
1783   {
1897    for (int i = vs; i < ve; i++)
1898    {
17841899#if USE_SIMD
1785      UINT16 value;
1786      SIMD_EXTRACT16(rsp->xv[dest], value, element);
1787      WRITE16(rsp, ea + (eaoffset & 0xf), value);
1900        UINT16 value;
1901        SIMD_EXTRACT16(rsp->xv[dest], value, element);
1902        WRITE16(rsp, ea + (eaoffset & 0xf), value);
17881903#else
1789      WRITE16(rsp, ea + (eaoffset & 0xf), VREG_S(i, element & 0x7));
1904        WRITE16(rsp, ea + (eaoffset & 0xf), VREG_S(i, element & 0x7));
17901905#endif
1791      eaoffset += 2;
1792      element++;
1793   }
1906        eaoffset += 2;
1907        element++;
1908    }
17941909}
17951910
17961911static int generate_swc2(rsp_state *rsp, drcuml_block *block, compiler_state *compiler, const opcode_desc *desc)
17971912{
17981913//  int loopdest;
1799   UINT32 op = desc->opptr.l[0];
1800   //int dest = (op >> 16) & 0x1f;
1801   //int base = (op >> 21) & 0x1f;
1802   //int index = (op >> 7) & 0xf;
1803   int offset = (op & 0x7f);
1804   //int skip;
1805   if (offset & 0x40)
1806   {
1807      offset |= 0xffffffc0;
1808   }
1914    UINT32 op = desc->opptr.l[0];
1915    //int dest = (op >> 16) & 0x1f;
1916    //int base = (op >> 21) & 0x1f;
1917    //int index = (op >> 7) & 0xf;
1918    int offset = (op & 0x7f);
1919    //int skip;
1920    if (offset & 0x40)
1921    {
1922        offset |= 0xffffffc0;
1923    }
18091924
1810   switch ((op >> 11) & 0x1f)
1811   {
1812      case 0x00:      /* SBV */
1813         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1814         UML_CALLC(block, cfunc_rsp_sbv, rsp);
1815         return TRUE;
1816      case 0x01:      /* SSV */
1817         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1818         UML_CALLC(block, cfunc_rsp_ssv, rsp);
1819         return TRUE;
1820      case 0x02:      /* SLV */
1821         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1822         UML_CALLC(block, cfunc_rsp_slv, rsp);
1823         return TRUE;
1824      case 0x03:      /* SDV */
1825         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1826         UML_CALLC(block, cfunc_rsp_sdv, rsp);
1827         return TRUE;
1828      case 0x04:      /* SQV */
1829         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1830         UML_CALLC(block, cfunc_rsp_sqv, rsp);
1831         return TRUE;
1832      case 0x05:      /* SRV */
1833         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1834         UML_CALLC(block, cfunc_rsp_srv, rsp);
1835         return TRUE;
1836      case 0x06:      /* SPV */
1837         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1838         UML_CALLC(block, cfunc_rsp_spv, rsp);
1839         return TRUE;
1840      case 0x07:      /* SUV */
1841         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1842         UML_CALLC(block, cfunc_rsp_suv, rsp);
1843         return TRUE;
1844      case 0x08:      /* SHV */
1845         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1846         UML_CALLC(block, cfunc_rsp_shv, rsp);
1847         return TRUE;
1848      case 0x09:      /* SFV */
1849         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1850         UML_CALLC(block, cfunc_rsp_sfv, rsp);
1851         return TRUE;
1852      case 0x0a:      /* SWV */
1853         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1854         UML_CALLC(block, cfunc_rsp_swv, rsp);
1855         return TRUE;
1856      case 0x0b:      /* STV */
1857         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1858         UML_CALLC(block, cfunc_rsp_stv, rsp);
1859         return TRUE;
1925    switch ((op >> 11) & 0x1f)
1926    {
1927        case 0x00:      /* SBV */
1928            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1929            UML_CALLC(block, cfunc_rsp_sbv, rsp);
1930            return TRUE;
1931        case 0x01:      /* SSV */
1932            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1933            UML_CALLC(block, cfunc_rsp_ssv, rsp);
1934            return TRUE;
1935        case 0x02:      /* SLV */
1936            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1937            UML_CALLC(block, cfunc_rsp_slv, rsp);
1938            return TRUE;
1939        case 0x03:      /* SDV */
1940            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1941            UML_CALLC(block, cfunc_rsp_sdv, rsp);
1942            return TRUE;
1943        case 0x04:      /* SQV */
1944            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1945            UML_CALLC(block, cfunc_rsp_sqv, rsp);
1946            return TRUE;
1947        case 0x05:      /* SRV */
1948            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1949            UML_CALLC(block, cfunc_rsp_srv, rsp);
1950            return TRUE;
1951        case 0x06:      /* SPV */
1952            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1953            UML_CALLC(block, cfunc_rsp_spv, rsp);
1954            return TRUE;
1955        case 0x07:      /* SUV */
1956            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1957            UML_CALLC(block, cfunc_rsp_suv, rsp);
1958            return TRUE;
1959        case 0x08:      /* SHV */
1960            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1961            UML_CALLC(block, cfunc_rsp_shv, rsp);
1962            return TRUE;
1963        case 0x09:      /* SFV */
1964            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1965            UML_CALLC(block, cfunc_rsp_sfv, rsp);
1966            return TRUE;
1967        case 0x0a:      /* SWV */
1968            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1969            UML_CALLC(block, cfunc_rsp_swv, rsp);
1970            return TRUE;
1971        case 0x0b:      /* STV */
1972            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
1973            UML_CALLC(block, cfunc_rsp_stv, rsp);
1974            return TRUE;
18601975
1861      default:
1862         unimplemented_opcode(rsp, op);
1863         return FALSE;
1864   }
1976        default:
1977            unimplemented_opcode(rsp, op);
1978            return FALSE;
1979    }
18651980
1866   return TRUE;
1981    return TRUE;
18671982}
18681983
18691984INLINE UINT16 SATURATE_ACCUM(rsp_state *rsp, int accum, int slice, UINT16 negative, UINT16 positive)
18701985{
1871   if ((INT16)ACCUM_H(accum) < 0)
1872   {
1873      if ((UINT16)(ACCUM_H(accum)) != 0xffff)
1874      {
1875         return negative;
1876      }
1877      else
1878      {
1879         if ((INT16)ACCUM_M(accum) >= 0)
1880         {
1881            return negative;
1882         }
1883         else
1884         {
1885            if (slice == 0)
1886            {
1887#if USE_SIMD
1888               UINT16 ret;
1889               SIMD_EXTRACT16(rsp->accum_l, ret, accum);
1890               return ret;
1891#else
1892               return ACCUM_L(accum);
1893#endif
1894            }
1895            else if (slice == 1)
1896            {
1897               return ACCUM_M(accum);
1898            }
1899         }
1900      }
1901   }
1902   else
1903   {
1904      if ((UINT16)(ACCUM_H(accum)) != 0)
1905      {
1906         return positive;
1907      }
1908      else
1909      {
1910         if ((INT16)ACCUM_M(accum) < 0)
1911         {
1912            return positive;
1913         }
1914         else
1915         {
1916            if (slice == 0)
1917            {
1918#if USE_SIMD
1919               UINT16 ret;
1920               SIMD_EXTRACT16(rsp->accum_l, ret, accum);
1921               return ret;
1922#else
1923               return ACCUM_L(accum);
1924#endif
1925            }
1926            else
1927            {
1928               return ACCUM_M(accum);
1929            }
1930         }
1931      }
1932   }
1986    if ((INT16)ACCUM_H(rsp, accum) < 0)
1987    {
1988        if ((UINT16)(ACCUM_H(rsp, accum)) != 0xffff)
1989        {
1990            return negative;
1991        }
1992        else
1993        {
1994            if ((INT16)ACCUM_M(rsp, accum) >= 0)
1995            {
1996                return negative;
1997            }
1998            else
1999            {
2000                if (slice == 0)
2001                {
2002                    return ACCUM_L(rsp, accum);
2003                }
2004                else if (slice == 1)
2005                {
2006                    return ACCUM_M(rsp, accum);
2007                }
2008            }
2009        }
2010    }
2011    else
2012    {
2013        if ((UINT16)(ACCUM_H(rsp, accum)) != 0)
2014        {
2015            return positive;
2016        }
2017        else
2018        {
2019            if ((INT16)ACCUM_M(rsp, accum) < 0)
2020            {
2021                return positive;
2022            }
2023            else
2024            {
2025                if (slice == 0)
2026                {
2027                    return ACCUM_L(rsp, accum);
2028                }
2029                else
2030                {
2031                    return ACCUM_M(rsp, accum);
2032                }
2033            }
2034        }
2035    }
19332036
1934   return 0;
2037    return 0;
19352038}
19362039
19372040#if USE_SIMD
19382041__m128i SATURATE_ACCUM1(__m128i accum_h, __m128i accum_m, UINT16 negative, UINT16 positive)
19392042{
1940   __m128i vnegative = _mm_set_epi16(negative, negative, negative, negative, negative, negative, negative, negative);
1941   __m128i vpositive = _mm_set_epi16(positive, positive, positive, positive, positive, positive, positive, positive);
2043    __m128i vnegative = _mm_set_epi16(negative, negative, negative, negative, negative, negative, negative, negative);
2044    __m128i vpositive = _mm_set_epi16(positive, positive, positive, positive, positive, positive, positive, positive);
19422045
1943   // conditional masks
1944   __m128i accum_hlz = _mm_cmplt_epi16(accum_h, vec_zero);
1945   __m128i accum_hgz = _mm_cmpgt_epi16(accum_h, vec_zero);
1946   __m128i accum_hz = _mm_cmpeq_epi16(accum_h, vec_zero);
1947   __m128i accum_hn1 = _mm_cmpeq_epi16(accum_h, vec_neg1);
1948   __m128i accum_hnn1 = _mm_xor_si128(accum_hn1, vec_neg1);
2046    // conditional masks
2047    __m128i accum_hlz = _mm_cmplt_epi16(accum_h, _mm_setzero_si128());
2048    __m128i accum_hgz = _mm_cmpgt_epi16(accum_h, _mm_setzero_si128());
2049    __m128i accum_hz = _mm_cmpeq_epi16(accum_h, _mm_setzero_si128());
2050    __m128i accum_hn1 = _mm_cmpeq_epi16(accum_h, vec_neg1);
2051    __m128i accum_hnn1 = _mm_xor_si128(accum_hn1, vec_neg1);
19492052
1950   __m128i accum_mlz = _mm_cmplt_epi16(accum_m, vec_zero);
1951   __m128i accum_mgz = _mm_cmpgt_epi16(accum_m, vec_zero);
1952   __m128i accum_mz = _mm_cmpeq_epi16(accum_m, vec_zero);
1953   __m128i accum_mgez = _mm_or_si128(accum_mz, accum_mgz);
2053    __m128i accum_mlz = _mm_cmplt_epi16(accum_m, _mm_setzero_si128());
2054    __m128i accum_mgz = _mm_cmpgt_epi16(accum_m, _mm_setzero_si128());
2055    __m128i accum_mz = _mm_cmpeq_epi16(accum_m, _mm_setzero_si128());
2056    __m128i accum_mgez = _mm_or_si128(accum_mz, accum_mgz);
19542057
1955   // Return negative if H<0 && (H!=0xffff || M >= 0)
1956   // Return positive if H>0 || (H==0 && M<0)
1957   // Return medium slice if H==0xffff && M<0
1958   // Return medium slice if H==0 && M>=0
2058    // Return negative if H<0 && (H!=0xffff || M >= 0)
2059    // Return positive if H>0 || (H==0 && M<0)
2060    // Return medium slice if H==0xffff && M<0
2061    // Return medium slice if H==0 && M>=0
19592062
1960   __m128i negative_mask = _mm_and_si128(accum_hlz, _mm_or_si128(accum_hnn1, accum_mgez));
1961   __m128i positive_mask = _mm_or_si128(accum_hgz, _mm_and_si128(accum_hz, accum_mlz));
1962   __m128i accumm_mask = _mm_or_si128(_mm_and_si128(accum_hz, accum_mgez), _mm_and_si128(accum_hn1, accum_mlz));
2063    __m128i negative_mask = _mm_and_si128(accum_hlz, _mm_or_si128(accum_hnn1, accum_mgez));
2064    __m128i positive_mask = _mm_or_si128(accum_hgz, _mm_and_si128(accum_hz, accum_mlz));
2065    __m128i accumm_mask = _mm_or_si128(_mm_and_si128(accum_hz, accum_mgez), _mm_and_si128(accum_hn1, accum_mlz));
19632066
1964   __m128i output = _mm_and_si128(accum_m, accumm_mask);
1965   output = _mm_or_si128(output, _mm_and_si128(vnegative, negative_mask));
1966   output = _mm_or_si128(output, _mm_and_si128(vpositive, positive_mask));
1967   return output;
2067    __m128i output = _mm_and_si128(accum_m, accumm_mask);
2068    output = _mm_or_si128(output, _mm_and_si128(vnegative, negative_mask));
2069    output = _mm_or_si128(output, _mm_and_si128(vpositive, positive_mask));
2070    return output;
19682071}
19692072#endif
19702073
19712074INLINE UINT16 SATURATE_ACCUM1(rsp_state *rsp, int accum, UINT16 negative, UINT16 positive)
19722075{
1973   // Return negative if H<0 && (H!=0xffff || M >= 0)
1974   // Return positive if H>0 || (H==0 && M<0)
1975   // Return medium slice if H==0xffff && M<0
1976   // Return medium slice if H==0 && M>=0
1977   if ((INT16)ACCUM_H(accum) < 0)
1978   {
1979      if ((UINT16)(ACCUM_H(accum)) != 0xffff)
1980      {
1981         return negative;
1982      }
1983      else
1984      {
1985         if ((INT16)ACCUM_M(accum) >= 0)
1986         {
1987            return negative;
1988         }
1989         else
1990         {
1991            return ACCUM_M(accum);
1992         }
1993      }
1994   }
1995   else
1996   {
1997      if ((UINT16)(ACCUM_H(accum)) != 0)
1998      {
1999         return positive;
2000      }
2001      else
2002      {
2003         if ((INT16)ACCUM_M(accum) < 0)
2004         {
2005            return positive;
2006         }
2007         else
2008         {
2009            return ACCUM_M(accum);
2010         }
2011      }
2012   }
2076    // Return negative if H<0 && (H!=0xffff || M >= 0)
2077    // Return positive if H>0 || (H==0 && M<0)
2078    // Return medium slice if H==0xffff && M<0
2079    // Return medium slice if H==0 && M>=0
2080    if ((INT16)ACCUM_H(rsp, accum) < 0)
2081    {
2082        if ((UINT16)(ACCUM_H(rsp, accum)) != 0xffff)
2083        {
2084            return negative;
2085        }
2086        else
2087        {
2088            if ((INT16)ACCUM_M(rsp, accum) >= 0)
2089            {
2090                return negative;
2091            }
2092            else
2093            {
2094                return ACCUM_M(rsp, accum);
2095            }
2096        }
2097    }
2098    else
2099    {
2100        if ((UINT16)(ACCUM_H(rsp, accum)) != 0)
2101        {
2102            return positive;
2103        }
2104        else
2105        {
2106            if ((INT16)ACCUM_M(rsp, accum) < 0)
2107            {
2108                return positive;
2109            }
2110            else
2111            {
2112                return ACCUM_M(rsp, accum);
2113            }
2114        }
2115    }
20132116
2014   return 0;
2117    return 0;
20152118}
20162119
20172120INLINE UINT16 C_SATURATE_ACCUM1(UINT16 *h, UINT16 *m, int accum, UINT16 negative, UINT16 positive)
20182121{
2019   // Return negative if H<0 && (H!=0xffff || M >= 0)
2020   // Return positive if H>0 || (H==0 && M<0)
2021   // Return medium slice if H==0xffff && M<0
2022   // Return medium slice if H==0 && M>=0
2023   if ((INT16)h[accum] < 0)
2024   {
2025      if ((UINT16)h[accum] != 0xffff)
2026      {
2027         return negative;
2028      }
2029      else
2030      {
2031         if ((INT16)m[accum] >= 0)
2032         {
2033            return negative;
2034         }
2035         else
2036         {
2037            return m[accum];
2038         }
2039      }
2040   }
2041   else
2042   {
2043      if ((UINT16)h[accum] != 0)
2044      {
2045         return positive;
2046      }
2047      else
2048      {
2049         if ((INT16)m[accum] < 0)
2050         {
2051            return positive;
2052         }
2053         else
2054         {
2055            return m[accum];
2056         }
2057      }
2058   }
2122    // Return negative if H<0 && (H!=0xffff || M >= 0)
2123    // Return positive if H>0 || (H==0 && M<0)
2124    // Return medium slice if H==0xffff && M<0
2125    // Return medium slice if H==0 && M>=0
2126    if ((INT16)h[accum] < 0)
2127    {
2128        if ((UINT16)h[accum] != 0xffff)
2129        {
2130            return negative;
2131        }
2132        else
2133        {
2134            if ((INT16)m[accum] >= 0)
2135            {
2136                return negative;
2137            }
2138            else
2139            {
2140                return m[accum];
2141            }
2142        }
2143    }
2144    else
2145    {
2146        if ((UINT16)h[accum] != 0)
2147        {
2148            return positive;
2149        }
2150        else
2151        {
2152            if ((INT16)m[accum] < 0)
2153            {
2154                return positive;
2155            }
2156            else
2157            {
2158                return m[accum];
2159            }
2160        }
2161    }
20592162
2060   return 0;
2163    return 0;
20612164}
20622165
20632166#if USE_SIMD
20642167#define WRITEBACK_RESULT() { \
2065      SIMD_INSERT16(rsp->xv[VDREG], vres[0], 0); \
2066      SIMD_INSERT16(rsp->xv[VDREG], vres[1], 1); \
2067      SIMD_INSERT16(rsp->xv[VDREG], vres[2], 2); \
2068      SIMD_INSERT16(rsp->xv[VDREG], vres[3], 3); \
2069      SIMD_INSERT16(rsp->xv[VDREG], vres[4], 4); \
2070      SIMD_INSERT16(rsp->xv[VDREG], vres[5], 5); \
2071      SIMD_INSERT16(rsp->xv[VDREG], vres[6], 6); \
2072      SIMD_INSERT16(rsp->xv[VDREG], vres[7], 7); \
2168        SIMD_INSERT16(rsp->xv[VDREG], vres[0], 0); \
2169        SIMD_INSERT16(rsp->xv[VDREG], vres[1], 1); \
2170        SIMD_INSERT16(rsp->xv[VDREG], vres[2], 2); \
2171        SIMD_INSERT16(rsp->xv[VDREG], vres[3], 3); \
2172        SIMD_INSERT16(rsp->xv[VDREG], vres[4], 4); \
2173        SIMD_INSERT16(rsp->xv[VDREG], vres[5], 5); \
2174        SIMD_INSERT16(rsp->xv[VDREG], vres[6], 6); \
2175        SIMD_INSERT16(rsp->xv[VDREG], vres[7], 7); \
20732176}
20742177#else
20752178#define WRITEBACK_RESULT() { \
2076      W_VREG_S(VDREG, 0) = vres[0];   \
2077      W_VREG_S(VDREG, 1) = vres[1];   \
2078      W_VREG_S(VDREG, 2) = vres[2];   \
2079      W_VREG_S(VDREG, 3) = vres[3];   \
2080      W_VREG_S(VDREG, 4) = vres[4];   \
2081      W_VREG_S(VDREG, 5) = vres[5];   \
2082      W_VREG_S(VDREG, 6) = vres[6];   \
2083      W_VREG_S(VDREG, 7) = vres[7];   \
2179        W_VREG_S(VDREG, 0) = vres[0];   \
2180        W_VREG_S(VDREG, 1) = vres[1];   \
2181        W_VREG_S(VDREG, 2) = vres[2];   \
2182        W_VREG_S(VDREG, 3) = vres[3];   \
2183        W_VREG_S(VDREG, 4) = vres[4];   \
2184        W_VREG_S(VDREG, 5) = vres[5];   \
2185        W_VREG_S(VDREG, 6) = vres[6];   \
2186        W_VREG_S(VDREG, 7) = vres[7];   \
20842187}
20852188#endif
20862189
2190
2191/* ============================================================================
2192* RSPPackLo32to16: Pack LSBs of 32-bit vectors to 16-bits without saturation.
2193* TODO: 5 SSE2 operations is kind of expensive just to truncate values?
2194* ========================================================================= */
2195INLINE __m128i RSPPackLo32to16(__m128i vectorLow, __m128i vectorHigh)
2196{
2197   vectorLow = _mm_slli_epi32(vectorLow, 16);
2198   vectorHigh = _mm_slli_epi32(vectorHigh, 16);
2199   vectorLow = _mm_srai_epi32(vectorLow, 16);
2200   vectorHigh = _mm_srai_epi32(vectorHigh, 16);
2201   return _mm_packs_epi32(vectorLow, vectorHigh);
2202}
2203
2204/* ============================================================================
2205* RSPPackHi32to16: Pack MSBs of 32-bit vectors to 16-bits without saturation.
2206* ========================================================================= */
2207INLINE __m128i RSPPackHi32to16(__m128i vectorLow, __m128i vectorHigh)
2208{
2209   vectorLow = _mm_srai_epi32(vectorLow, 16);
2210   vectorHigh = _mm_srai_epi32(vectorHigh, 16);
2211   return _mm_packs_epi32(vectorLow, vectorHigh);
2212}
2213
2214/* ============================================================================
2215* RSPSignExtend16to32: Sign-extend 16-bit slices to 32-bit slices.
2216* ========================================================================= */
2217INLINE void RSPSignExtend16to32(__m128i source, __m128i *vectorLow, __m128i *vectorHigh)
2218{
2219   __m128i vMask = _mm_srai_epi16(source, 15);
2220   *vectorHigh = _mm_unpackhi_epi16(source, vMask);
2221   *vectorLow = _mm_unpacklo_epi16(source, vMask);
2222}
2223
2224/* ============================================================================
2225* RSPZeroExtend16to32: Zero-extend 16-bit slices to 32-bit slices.
2226* ========================================================================= */
2227INLINE void RSPZeroExtend16to32(__m128i source, __m128i *vectorLow, __m128i *vectorHigh)
2228{
2229   *vectorHigh = _mm_unpackhi_epi16(source, _mm_setzero_si128());
2230   *vectorLow = _mm_unpacklo_epi16(source, _mm_setzero_si128());
2231}
2232
2233/* ============================================================================
2234* _mm_mullo_epi32: SSE2 lacks _mm_mullo_epi32, define it manually.
2235* TODO/WARNING/DISCLAIMER: Assumes one argument is positive.
2236* ========================================================================= */
2237INLINE __m128i _mm_mullo_epi32(__m128i a, __m128i b)
2238{
2239   __m128i a4 = _mm_srli_si128(a, 4);
2240   __m128i b4 = _mm_srli_si128(b, 4);
2241   __m128i ba = _mm_mul_epu32(b, a);
2242   __m128i b4a4 = _mm_mul_epu32(b4, a4);
2243
2244   __m128i mask = _mm_setr_epi32(~0, 0, ~0, 0);
2245   __m128i baMask = _mm_and_si128(ba, mask);
2246   __m128i b4a4Mask = _mm_and_si128(b4a4, mask);
2247   __m128i b4a4MaskShift = _mm_slli_si128(b4a4Mask, 4);
2248
2249   return _mm_or_si128(baMask, b4a4MaskShift);
2250}
2251
2252/* ============================================================================
2253* RSPClampLowToVal: Clamps the low word of the accumulator.
2254* ========================================================================= */
2255INLINE __m128i RSPClampLowToVal(__m128i vaccLow, __m128i vaccMid, __m128i vaccHigh)
2256{
2257   __m128i setMask = _mm_cmpeq_epi16(_mm_setzero_si128(), _mm_setzero_si128());
2258   __m128i negCheck, useValMask, negVal, posVal;
2259
2260   /* Compute some common values ahead of time. */
2261   negCheck = _mm_cmplt_epi16(vaccHigh, _mm_setzero_si128());
2262
2263   /* If accmulator < 0, clamp to val if val != TMin. */
2264   useValMask = _mm_and_si128(vaccHigh, _mm_srai_epi16(vaccMid, 15));
2265   useValMask = _mm_cmpeq_epi16(useValMask, setMask);
2266   negVal = _mm_and_si128(useValMask, vaccLow);
2267
2268   /* Otherwise, clamp to ~0 if any high bits are set. */
2269   useValMask = _mm_or_si128(vaccHigh, _mm_srai_epi16(vaccMid, 15));
2270   useValMask = _mm_cmpeq_epi16(useValMask, _mm_setzero_si128());
2271   posVal = _mm_and_si128(useValMask, vaccLow);
2272
2273   negVal = _mm_and_si128(negCheck, negVal);
2274   posVal = _mm_andnot_si128(negCheck, posVal);
2275   return _mm_or_si128(negVal, posVal);
2276}
2277
20872278INLINE void cfunc_rsp_vmulf(void *param)
20882279{
2089   rsp_state *rsp = (rsp_state*)param;
2090   int op = rsp->impstate->arg0;
2091   //int i;
2092   // 31       25  24     20      15      10      5        0
2093   // ------------------------------------------------------
2094   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000000 |
2095   // ------------------------------------------------------
2096   //
2097   // Multiplies signed integer by signed integer * 2
2280    rsp_state *rsp = (rsp_state*)param;
2281    int op = rsp->impstate->arg0;
2282    //int i;
2283    // 31       25  24     20      15      10      5        0
2284    // ------------------------------------------------------
2285    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000000 |
2286    // ------------------------------------------------------
2287    //
2288    // Multiplies signed integer by signed integer * 2
20982289
2099   INT16 vres[8] = { 0 };
2100   for (int i = 0; i < 8; i++)
2101   {
2102#if USE_SIMD
2103      UINT16 w1, w2;
2104      SIMD_EXTRACT16(rsp->xv[VS1REG], w1, i);
2105      SIMD_EXTRACT16(rsp->xv[VS2REG], w2, VEC_EL_2(EL, i));
2106      INT32 s1 = (INT32)(INT16)w1;
2107      INT32 s2 = (INT32)(INT16)w2;
2108#else
2109      INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i);
2110      INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
2111#endif
2112      if (s1 == -32768 && s2 == -32768)
2113      {
2114         // overflow
2115         ACCUM_H(i) = 0;
2116         ACCUM_M(i) = -32768;
2117#if USE_SIMD
2118         SIMD_INSERT16(rsp->accum_l, -32768, i);
2119#else
2120         ACCUM_L(i) = -32768;
2121#endif
2122         vres[i] = 0x7fff;
2123      }
2124      else
2125      {
2126         INT64 r =  s1 * s2 * 2;
2127         r += 0x8000;    // rounding ?
2128         ACCUM_H(i) = (r < 0) ? 0xffff : 0;      // sign-extend to 48-bit
2129         ACCUM_M(i) = (INT16)(r >> 16);
2130#if USE_SIMD
2131         SIMD_INSERT16(rsp->accum_l, (UINT16)(r), i);
2132#else
2133         ACCUM_L(i) = (UINT16)r;
2134#endif
2135         vres[i] = ACCUM_M(i);
2136      }
2137   }
2138   WRITEBACK_RESULT();
2290    INT16 vres[8];
2291    for (int i = 0; i < 8; i++)
2292    {
2293        UINT16 w1, w2;
2294      SCALAR_GET_VS1(w1, i);
2295      SCALAR_GET_VS2(w2, i);
2296        INT32 s1 = (INT32)(INT16)w1;
2297        INT32 s2 = (INT32)(INT16)w2;
2298
2299        if (s1 == -32768 && s2 == -32768)
2300        {
2301            // overflow
2302            SET_ACCUM_H(0, i);
2303            SET_ACCUM_M(-32768, i);
2304            SET_ACCUM_L(-32768, i);
2305            vres[i] = 0x7fff;
2306        }
2307        else
2308        {
2309            INT64 r =  s1 * s2 * 2;
2310            r += 0x8000;    // rounding ?
2311            SET_ACCUM_H((r < 0) ? 0xffff : 0, i);
2312            SET_ACCUM_M((INT16)(r >> 16), i);
2313            SET_ACCUM_L((UINT16)(r), i);
2314            vres[i] = ACCUM_M(rsp, i);
2315        }
2316    }
2317    WRITEBACK_RESULT();
21392318}
21402319
21412320INLINE void cfunc_rsp_vmulu(void *param)
21422321{
2143   rsp_state *rsp = (rsp_state*)param;
2144   int op = rsp->impstate->arg0;
2145   // 31       25  24     20      15      10      5        0
2146   // ------------------------------------------------------
2147   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000001 |
2148   // ------------------------------------------------------
2149   //
2322    rsp_state *rsp = (rsp_state*)param;
2323    int op = rsp->impstate->arg0;
2324    // 31       25  24     20      15      10      5        0
2325    // ------------------------------------------------------
2326    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000001 |
2327    // ------------------------------------------------------
2328    //
21502329
2151   INT16 vres[8];
2152   for (int i = 0; i < 8; i++)
2153   {
2154#if USE_SIMD
2155      UINT16 w1, w2;
2156      SIMD_EXTRACT16(rsp->xv[VS1REG], w1, i);
2157      SIMD_EXTRACT16(rsp->xv[VS2REG], w2, VEC_EL_2(EL, i));
2158      INT32 s1 = (INT32)(INT16)w1;
2159      INT32 s2 = (INT32)(INT16)w2;
2160#else
2161      INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i);
2162      INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
2163#endif
2164      INT64 r = s1 * s2 * 2;
2165      r += 0x8000;    // rounding ?
2330    INT16 vres[8];
2331    for (int i = 0; i < 8; i++)
2332    {
2333        UINT16 w1, w2;
2334      SCALAR_GET_VS1(w1, i);
2335      SCALAR_GET_VS2(w2, i);
2336        INT32 s1 = (INT32)(INT16)w1;
2337        INT32 s2 = (INT32)(INT16)w2;
21662338
2167      ACCUM_H(i) = (UINT16)(r >> 32);
2168      ACCUM_M(i) = (UINT16)(r >> 16);
2169#if USE_SIMD
2170      SIMD_INSERT16(rsp->accum_l, (UINT16)(r), i);
2171#else
2172      ACCUM_L(i) = (UINT16)(r);
2173#endif
2339        INT64 r = s1 * s2 * 2;
2340        r += 0x8000;    // rounding ?
21742341
2175      if (r < 0)
2176      {
2177         vres[i] = 0;
2178      }
2179      else if (((INT16)(ACCUM_H(i)) ^ (INT16)(ACCUM_M(i))) < 0)
2180      {
2181         vres[i] = -1;
2182      }
2183      else
2184      {
2185         vres[i] = ACCUM_M(i);
2186      }
2187   }
2188   WRITEBACK_RESULT();
2342      SET_ACCUM_H((UINT16)(r >> 32), i);
2343      SET_ACCUM_M((UINT16)(r >> 16), i);
2344      SET_ACCUM_L((UINT16)(r), i);
2345
2346        if (r < 0)
2347        {
2348            vres[i] = 0;
2349        }
2350        else if (((INT16)(ACCUM_H(rsp, i)) ^ (INT16)(ACCUM_M(rsp, i))) < 0)
2351        {
2352            vres[i] = -1;
2353        }
2354        else
2355        {
2356            vres[i] = ACCUM_M(rsp, i);
2357        }
2358    }
2359    WRITEBACK_RESULT();
21892360}
21902361
21912362INLINE void cfunc_rsp_vmudl(void *param)
21922363{
2193   rsp_state *rsp = (rsp_state*)param;
2194   int op = rsp->impstate->arg0;
2195   INT16 vres[8] = { 0 };
2196   // 31       25  24     20      15      10      5        0
2197   // ------------------------------------------------------
2198   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 |
2199   // ------------------------------------------------------
2200   //
2201   // Multiplies signed integer by unsigned fraction
2202   // The result is added into accumulator
2203   // The middle slice of accumulator is stored into destination element
2364    rsp_state *rsp = (rsp_state*)param;
2365    int op = rsp->impstate->arg0;
22042366
2205   for (int i = 0; i < 8; i++)
2206   {
2367    // 31       25  24     20      15      10      5        0
2368    // ------------------------------------------------------
2369    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001101 |
2370    // ------------------------------------------------------
2371    //
2372    // Multiplies signed integer by unsigned fraction
2373    // The result is added into accumulator
2374    // The middle slice of accumulator is stored into destination element
2375
22072376#if USE_SIMD
2208      UINT16 w1, w2;
2209      SIMD_EXTRACT16(rsp->xv[VS1REG], w1, i);
2210      SIMD_EXTRACT16(rsp->xv[VS2REG], w2, VEC_EL_2(EL, i));
2211      UINT32 s1 = (UINT32)w1;
2212      UINT32 s2 = (UINT32)w2;
2377
2378   __m128i vsReg = rsp->xv[VS1REG];
2379   __m128i vtReg = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
2380
2381   /* Unpack to obtain for 32-bit precision. */
2382   __m128i unpackLo = _mm_mullo_epi16(vsReg, vtReg);
2383   __m128i unpackHi = _mm_mulhi_epu16(vsReg, vtReg);
2384   __m128i loProduct = _mm_unpacklo_epi16(unpackLo, unpackHi);
2385   __m128i hiProduct = _mm_unpackhi_epi16(unpackLo, unpackHi);
2386
2387   rsp->xv[VDREG] = rsp->accum_l = RSPPackHi32to16(loProduct, hiProduct);
2388
2389   rsp->accum_m = _mm_setzero_si128();
2390   rsp->accum_h = _mm_setzero_si128();
2391
22132392#else
2214      UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i);
2215      UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
2216#endif
2217      UINT32 r = s1 * s2;
22182393
2219      ACCUM_H(i) = 0;
2220      ACCUM_M(i) = 0;
2221#if USE_SIMD
2222      SIMD_INSERT16(rsp->accum_l, (UINT16)(r >> 16), i);
2223#else
2224      ACCUM_L(i) = (UINT16)(r >> 16);
2394    INT16 vres[8];
2395    for (int i = 0; i < 8; i++)
2396    {
2397        UINT16 w1, w2;
2398      SCALAR_GET_VS1(w1, i);
2399      SCALAR_GET_VS2(w2, i);
2400        UINT32 s1 = (UINT32)(UINT16)w1;
2401        UINT32 s2 = (UINT32)(UINT16)w2;
2402
2403        UINT32 r = s1 * s2;
2404
2405        SET_ACCUM_H(0, i);
2406        SET_ACCUM_M(0, i);
2407        SET_ACCUM_L((UINT16)(r >> 16), i);
2408
2409        vres[i] = ACCUM_L(rsp, i);
2410    }
2411    WRITEBACK_RESULT();
22252412#endif
2226
2227      vres[i] = (UINT16)(r >> 16);
2228   }
2229   WRITEBACK_RESULT();
22302413}
22312414
22322415INLINE void cfunc_rsp_vmudm(void *param)
22332416{
2234   rsp_state *rsp = (rsp_state*)param;
2235   int op = rsp->impstate->arg0;
2236   INT16 vres[8] = { 0 };
2237   //int i;
2238   // 31       25  24     20      15      10      5        0
2239   // ------------------------------------------------------
2240   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000101 |
2241   // ------------------------------------------------------
2242   //
2243   // Multiplies signed integer by unsigned fraction
2244   // The result is stored into accumulator
2245   // The middle slice of accumulator is stored into destination element
2417    rsp_state *rsp = (rsp_state*)param;
2418    int op = rsp->impstate->arg0;
22462419
2247   for (int i = 0; i < 8; i++)
2248   {
2420    // 31       25  24     20      15      10      5        0
2421    // ------------------------------------------------------
2422    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000101 |
2423    // ------------------------------------------------------
2424    //
2425    // Multiplies signed integer by unsigned fraction
2426    // The result is stored into accumulator
2427    // The middle slice of accumulator is stored into destination element
2428
22492429#if USE_SIMD
2250      UINT16 w1, w2;
2251      SIMD_EXTRACT16(rsp->xv[VS1REG], w1, i);
2252      SIMD_EXTRACT16(rsp->xv[VS2REG], w2, VEC_EL_2(EL, i));
2253      INT32 s1 = (INT32)(INT16)w1;
2254      INT32 s2 = w2;
2430
2431   __m128i vsRegLo, vsRegHi, vtRegLo, vtRegHi;
2432
2433   __m128i vsReg = rsp->xv[VS1REG];
2434   __m128i vtReg = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
2435
2436   /* Unpack to obtain for 32-bit precision. */
2437   RSPSignExtend16to32(vsReg, &vsRegLo, &vsRegHi);
2438   RSPZeroExtend16to32(vtReg, &vtRegLo, &vtRegHi);
2439
2440   /* Begin accumulating the products. */
2441   __m128i loProduct = _mm_mullo_epi32(vsRegLo, vtRegLo);
2442   __m128i hiProduct = _mm_mullo_epi32(vsRegHi, vtRegHi);
2443   rsp->accum_l = RSPPackLo32to16(loProduct, hiProduct);
2444   rsp->accum_m = rsp->xv[VDREG] = RSPPackHi32to16(loProduct, hiProduct);
2445
2446   loProduct = _mm_cmplt_epi32(loProduct, _mm_setzero_si128());
2447   hiProduct = _mm_cmplt_epi32(hiProduct, _mm_setzero_si128());
2448   rsp->accum_h = _mm_packs_epi32(loProduct, hiProduct);
2449
22552450#else
2256      INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i);
2257      INT32 s2 = (UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i));   // not sign-extended
2258#endif
2259      INT32 r =  s1 * s2;
22602451
2261      ACCUM_H(i) = (r < 0) ? 0xffff : 0;      // sign-extend to 48-bit
2262      ACCUM_M(i) = (INT16)(r >> 16);
2263#if USE_SIMD
2264      SIMD_INSERT16(rsp->accum_l, (UINT16)(r), i);
2265#else
2266      ACCUM_L(i) = (UINT16)(r);
2452    INT16 vres[8];
2453    for (int i = 0; i < 8; i++)
2454    {
2455        UINT16 w1, w2;
2456      SCALAR_GET_VS1(w1, i);
2457      SCALAR_GET_VS2(w2, i);
2458        INT32 s1 = (INT32)(INT16)w1;
2459        INT32 s2 = (UINT16)w2;
2460
2461        INT32 r =  s1 * s2;
2462
2463        SET_ACCUM_H((r < 0) ? 0xffff : 0, i);      // sign-extend to 48-bit
2464        SET_ACCUM_M((INT16)(r >> 16), i);
2465        SET_ACCUM_L((UINT16)r, i);
2466
2467        vres[i] = ACCUM_M(rsp, i);
2468    }
2469    WRITEBACK_RESULT();
22672470#endif
2268
2269      vres[i] = ACCUM_M(i);
2270   }
2271   WRITEBACK_RESULT();
22722471}
22732472
22742473INLINE void cfunc_rsp_vmudn(void *param)
22752474{
2276   rsp_state *rsp = (rsp_state*)param;
2277   int op = rsp->impstate->arg0;
2475    rsp_state *rsp = (rsp_state*)param;
2476    int op = rsp->impstate->arg0;
22782477
2279   // 31       25  24     20      15      10      5        0
2280   // ------------------------------------------------------
2281   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000110 |
2282   // ------------------------------------------------------
2283   //
2284   // Multiplies unsigned fraction by signed integer
2285   // The result is stored into accumulator
2286   // The low slice of accumulator is stored into destination element
2478    // 31       25  24     20      15      10      5        0
2479    // ------------------------------------------------------
2480    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000110 |
2481    // ------------------------------------------------------
2482    //
2483    // Multiplies unsigned fraction by signed integer
2484    // The result is stored into accumulator
2485    // The low slice of accumulator is stored into destination element
22872486
2288   INT16 vres[8] = { 0 };
2289   for (int i = 0; i < 8; i++)
2290   {
22912487#if USE_SIMD
2292      UINT16 w1, w2;
2293      SIMD_EXTRACT16(rsp->xv[VS1REG], w1, i);
2294      SIMD_EXTRACT16(rsp->xv[VS2REG], w2, VEC_EL_2(EL, i));
2295      INT32 s1 = w1;
2296      INT32 s2 = (INT32)(INT16)w2;
2488
2489   __m128i vsRegLo, vsRegHi, vtRegLo, vtRegHi;
2490
2491   __m128i vsReg = rsp->xv[VS1REG];
2492   __m128i vtReg = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
2493
2494   /* Unpack to obtain for 32-bit precision. */
2495   RSPZeroExtend16to32(vsReg, &vsRegLo, &vsRegHi);
2496   RSPSignExtend16to32(vtReg, &vtRegLo, &vtRegHi);
2497
2498   /* Begin accumulating the products. */
2499   __m128i loProduct = _mm_mullo_epi32(vsRegLo, vtRegLo);
2500   __m128i hiProduct = _mm_mullo_epi32(vsRegHi, vtRegHi);
2501   rsp->xv[VDREG] = rsp->accum_l = RSPPackLo32to16(loProduct, hiProduct);
2502   rsp->accum_m = RSPPackHi32to16(loProduct, hiProduct);
2503   rsp->accum_h = _mm_cmplt_epi16(rsp->accum_m, _mm_setzero_si128());
2504
22972505#else
2298      INT32 s1 = (UINT16)VREG_S(VS1REG, i);     // not sign-extended
2299      INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
2300#endif
2301      INT32 r = s1 * s2;
23022506
2303      ACCUM_H(i) = (r < 0) ? 0xffff : 0;      // sign-extend to 48-bit
2304      ACCUM_M(i) = (INT16)(r >> 16);
2305#if USE_SIMD
2306      SIMD_INSERT16(rsp->accum_l, (UINT16)(r), i);
2307#else
2308      ACCUM_L(i) = (UINT16)(r);
2507    INT16 vres[8] = { 0 };
2508    for (int i = 0; i < 8; i++)
2509    {
2510        UINT16 w1, w2;
2511      SCALAR_GET_VS1(w1, i);
2512      SCALAR_GET_VS2(w2, i);
2513        INT32 s1 = (UINT16)w1;
2514        INT32 s2 = (INT32)(INT16)w2;
2515
2516        INT32 r = s1 * s2;
2517
2518        SET_ACCUM_H((r < 0) ? 0xffff : 0, i);      // sign-extend to 48-bit
2519        SET_ACCUM_M((INT16)(r >> 16), i);
2520        SET_ACCUM_L((UINT16)(r), i);
2521
2522        vres[i] = (UINT16)(r);
2523    }
2524    WRITEBACK_RESULT();
23092525#endif
2310
2311      vres[i] = (UINT16)(r);
2312   }
2313   WRITEBACK_RESULT();
23142526}
23152527
23162528INLINE void cfunc_rsp_vmudh(void *param)
23172529{
2318   rsp_state *rsp = (rsp_state*)param;
2319   int op = rsp->impstate->arg0;
2320   // 31       25  24     20      15      10      5        0
2321   // ------------------------------------------------------
2322   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000111 |
2323   // ------------------------------------------------------
2324   //
2325   // Multiplies signed integer by signed integer
2326   // The result is stored into highest 32 bits of accumulator, the low slice is zero
2327   // The highest 32 bits of accumulator is saturated into destination element
2530    rsp_state *rsp = (rsp_state*)param;
2531    int op = rsp->impstate->arg0;
2532    // 31       25  24     20      15      10      5        0
2533    // ------------------------------------------------------
2534    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 000111 |
2535    // ------------------------------------------------------
2536    //
2537    // Multiplies signed integer by signed integer
2538    // The result is stored into highest 32 bits of accumulator, the low slice is zero
2539    // The highest 32 bits of accumulator is saturated into destination element
23282540
2329   INT16 vres[8];
2330   for (int i = 0; i < 8; i++)
2331   {
23322541#if USE_SIMD
2333      UINT16 w1, w2;
2334      SIMD_EXTRACT16(rsp->xv[VS1REG], w1, i);
2335      SIMD_EXTRACT16(rsp->xv[VS2REG], w2, VEC_EL_2(EL, i));
2336      INT32 s1 = (INT32)(INT16)w1;
2337      INT32 s2 = (INT32)(INT16)w2;
2542
2543   __m128i vaccLow, vaccHigh;
2544   __m128i unpackLo, unpackHi;
2545
2546   __m128i vsReg = rsp->xv[VS1REG];
2547   __m128i vtReg = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
2548
2549   /* Multiply the sources, accumulate the product. */
2550   unpackLo = _mm_mullo_epi16(vsReg, vtReg);
2551   unpackHi = _mm_mulhi_epi16(vsReg, vtReg);
2552   vaccHigh = _mm_unpackhi_epi16(unpackLo, unpackHi);
2553   vaccLow = _mm_unpacklo_epi16(unpackLo, unpackHi);
2554
2555   /* Pack the accumulator and result back up. */
2556   rsp->xv[VDREG] = _mm_packs_epi32(vaccLow, vaccHigh);
2557   rsp->accum_l = _mm_setzero_si128();
2558   rsp->accum_m = RSPPackLo32to16(vaccLow, vaccHigh);
2559   rsp->accum_h = RSPPackHi32to16(vaccLow, vaccHigh);
2560
23382561#else
2339      INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i);
2340      INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
2341#endif
2342      INT32 r = s1 * s2;
23432562
2344      ACCUM_H(i) = (INT16)(r >> 16);
2345      ACCUM_M(i) = (UINT16)(r);
2346#if USE_SIMD
2347      SIMD_INSERT16(rsp->accum_l, 0, i);
2348#else
2349      ACCUM_L(i) = 0;
2563    INT16 vres[8];
2564    for (int i = 0; i < 8; i++)
2565    {
2566        UINT16 w1, w2;
2567      SCALAR_GET_VS1(w1, i);
2568      SCALAR_GET_VS2(w2, i);
2569        INT32 s1 = (INT32)(INT16)w1;
2570        INT32 s2 = (INT32)(INT16)w2;
2571
2572        INT32 r = s1 * s2;
2573
2574        SET_ACCUM_H((INT16)(r >> 16), i);
2575        SET_ACCUM_M((UINT16)(r), i);
2576        SET_ACCUM_L(0, i);
2577
2578        if (r < -32768) r = -32768;
2579        if (r >  32767) r = 32767;
2580        vres[i] = (INT16)(r);
2581    }
2582    WRITEBACK_RESULT();
23502583#endif
2351
2352      if (r < -32768) r = -32768;
2353      if (r >  32767) r = 32767;
2354      vres[i] = (INT16)(r);
2355   }
2356   WRITEBACK_RESULT();
23572584}
23582585
23592586INLINE void cfunc_rsp_vmacf(void *param)
23602587{
2361   rsp_state *rsp = (rsp_state*)param;
2362   int op = rsp->impstate->arg0;
2588    rsp_state *rsp = (rsp_state*)param;
2589    int op = rsp->impstate->arg0;
23632590
2364   INT16 vres[8];
2365   for (int i = 0; i < 8; i++)
2366   {
23672591#if USE_SIMD
2368      UINT16 w1, w2;
2369      SIMD_EXTRACT16(rsp->xv[VS1REG], w1, i);
2370      SIMD_EXTRACT16(rsp->xv[VS2REG], w2, VEC_EL_2(EL, i));
2371      INT32 s1 = (INT32)(INT16)w1;
2372      INT32 s2 = (INT32)(INT16)w2;
2592
2593   __m128i loProduct, hiProduct, unpackLo, unpackHi;
2594   __m128i vaccHigh;
2595   __m128i vdReg, vdRegLo, vdRegHi;
2596
2597   __m128i vsReg = rsp->xv[VS1REG];
2598   __m128i vtReg = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
2599
2600   __m128i vaccLow = rsp->accum_l;
2601
2602   /* Unpack to obtain for 32-bit precision. */
2603   RSPZeroExtend16to32(vaccLow, &vaccLow, &vaccHigh);
2604
2605   /* Begin accumulating the products. */
2606   unpackLo = _mm_mullo_epi16(vsReg, vtReg);
2607   unpackHi = _mm_mulhi_epi16(vsReg, vtReg);
2608   loProduct = _mm_unpacklo_epi16(unpackLo, unpackHi);
2609   hiProduct = _mm_unpackhi_epi16(unpackLo, unpackHi);
2610   loProduct = _mm_slli_epi32(loProduct, 1);
2611   hiProduct = _mm_slli_epi32(hiProduct, 1);
2612
2613   vdRegLo = _mm_srli_epi32(loProduct, 16);
2614   vdRegHi = _mm_srli_epi32(hiProduct, 16);
2615   vdRegLo = _mm_slli_epi32(vdRegLo, 16);
2616   vdRegHi = _mm_slli_epi32(vdRegHi, 16);
2617   vdRegLo = _mm_xor_si128(vdRegLo, loProduct);
2618   vdRegHi = _mm_xor_si128(vdRegHi, hiProduct);
2619
2620   vaccLow = _mm_add_epi32(vaccLow, vdRegLo);
2621   vaccHigh = _mm_add_epi32(vaccHigh, vdRegHi);
2622
2623   rsp->accum_l = vdReg = RSPPackLo32to16(vaccLow, vaccHigh);
2624
2625   /* Multiply the MSB of sources, accumulate the product. */
2626   vdRegLo = _mm_unpacklo_epi16(rsp->accum_m, rsp->accum_h);
2627   vdRegHi = _mm_unpackhi_epi16(rsp->accum_m, rsp->accum_h);
2628
2629   loProduct = _mm_srai_epi32(loProduct, 16);
2630   hiProduct = _mm_srai_epi32(hiProduct, 16);
2631   vaccLow = _mm_srai_epi32(vaccLow, 16);
2632   vaccHigh = _mm_srai_epi32(vaccHigh, 16);
2633
2634   vaccLow = _mm_add_epi32(loProduct, vaccLow);
2635   vaccHigh = _mm_add_epi32(hiProduct, vaccHigh);
2636   vaccLow = _mm_add_epi32(vdRegLo, vaccLow);
2637   vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh);
2638
2639   /* Clamp the accumulator and write it all out. */
2640   rsp->xv[VDREG] = _mm_packs_epi32(vaccLow, vaccHigh);
2641   rsp->accum_m = RSPPackLo32to16(vaccLow, vaccHigh);
2642   rsp->accum_h = RSPPackHi32to16(vaccLow, vaccHigh);
2643
23732644#else
2374      INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i);
2375      INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
2376#endif
2377      INT32 r = s1 * s2;
23782645
2379#if USE_SIMD
2380      UINT64 q = (UINT64)ACCUM(i) & 0xffffffff0000ffffL;
2381      UINT16 accl;
2382      SIMD_EXTRACT16(rsp->accum_l, accl, i);
2383      q |= (UINT64)((UINT32)accl << 16);
2384      q += (INT64)(r) << 17;
2385      ACCUM(i) = q;
2386      SIMD_INSERT16(rsp->accum_l, (UINT16)(q >> 16), i);
2387#else
2388      ACCUM(i) += (INT64)(r) << 17;
2646    INT16 vres[8];
2647    for (int i = 0; i < 8; i++)
2648    {
2649        UINT16 w1, w2;
2650      SCALAR_GET_VS1(w1, i);
2651      SCALAR_GET_VS2(w2, i);
2652        INT32 s1 = (INT32)(INT16)w1;
2653        INT32 s2 = (INT32)(INT16)w2;
2654
2655        INT32 r = s1 * s2;
2656
2657        UINT64 q = ACCUM(i) & 0x000000000000ffffL;
2658        q |= (((UINT64)(UINT16)ACCUM_L(rsp, i)) << 16);
2659        q |= (((UINT64)(UINT16)ACCUM_M(rsp, i)) << 32);
2660        q |= (((UINT64)(UINT16)ACCUM_H(rsp, i)) << 48);
2661
2662        q += (INT64)(r) << 17;
2663        ACCUM(i) = q & 0x000000000000ffffL;
2664
2665        SET_ACCUM_L((UINT16)(q >> 16), i);
2666        SET_ACCUM_M((UINT16)(q >> 32), i);
2667        SET_ACCUM_H((UINT16)(q >> 48), i);
2668
2669        vres[i] = SATURATE_ACCUM(rsp, i, 1, 0x8000, 0x7fff);
2670    }
2671    WRITEBACK_RESULT();
23892672#endif
2390
2391      vres[i] = SATURATE_ACCUM(rsp, i, 1, 0x8000, 0x7fff);
2392   }
2393   WRITEBACK_RESULT();
23942673}
23952674
23962675INLINE void cfunc_rsp_vmacu(void *param)
23972676{
2398   rsp_state *rsp = (rsp_state*)param;
2399   int op = rsp->impstate->arg0;
2677    rsp_state *rsp = (rsp_state*)param;
2678    int op = rsp->impstate->arg0;
24002679
2401   // 31       25  24     20      15      10      5        0
2402   // ------------------------------------------------------
2403   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001001 |
2404   // ------------------------------------------------------
2405   //
2680    // 31       25  24     20      15      10      5        0
2681    // ------------------------------------------------------
2682    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001001 |
2683    // ------------------------------------------------------
2684    //
24062685
2407   INT16 vres[8];
2408   for (int i = 0; i < 8; i++)
2409   {
24102686#if USE_SIMD
2411      UINT16 w1, w2;
2412      SIMD_EXTRACT16(rsp->xv[VS1REG], w1, i);
2413      SIMD_EXTRACT16(rsp->xv[VS2REG], w2, VEC_EL_2(EL, i));
2414      INT32 s1 = (INT32)(INT16)w1;
2415      INT32 s2 = (INT32)(INT16)w2;
2687
2688   __m128i loProduct, hiProduct, unpackLo, unpackHi;
2689   __m128i vaccHigh;
2690   __m128i vdReg, vdRegLo, vdRegHi;
2691
2692   __m128i vsReg = rsp->xv[VS1REG];
2693    __m128i vtReg = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
2694
2695   __m128i vaccLow = rsp->accum_l;
2696
2697   /* Unpack to obtain for 32-bit precision. */
2698   RSPZeroExtend16to32(vaccLow, &vaccLow, &vaccHigh);
2699
2700   /* Begin accumulating the products. */
2701   unpackLo = _mm_mullo_epi16(vsReg, vtReg);
2702   unpackHi = _mm_mulhi_epi16(vsReg, vtReg);
2703   loProduct = _mm_unpacklo_epi16(unpackLo, unpackHi);
2704   hiProduct = _mm_unpackhi_epi16(unpackLo, unpackHi);
2705   loProduct = _mm_slli_epi32(loProduct, 1);
2706   hiProduct = _mm_slli_epi32(hiProduct, 1);
2707
2708   vdRegLo = _mm_srli_epi32(loProduct, 16);
2709   vdRegHi = _mm_srli_epi32(hiProduct, 16);
2710   vdRegLo = _mm_slli_epi32(vdRegLo, 16);
2711   vdRegHi = _mm_slli_epi32(vdRegHi, 16);
2712   vdRegLo = _mm_xor_si128(vdRegLo, loProduct);
2713   vdRegHi = _mm_xor_si128(vdRegHi, hiProduct);
2714
2715   vaccLow = _mm_add_epi32(vaccLow, vdRegLo);
2716   vaccHigh = _mm_add_epi32(vaccHigh, vdRegHi);
2717
2718   rsp->accum_l = vdReg = RSPPackLo32to16(vaccLow, vaccHigh);
2719
2720   /* Multiply the MSB of sources, accumulate the product. */
2721   vdRegLo = _mm_unpacklo_epi16(rsp->accum_m, rsp->accum_h);
2722   vdRegHi = _mm_unpackhi_epi16(rsp->accum_m, rsp->accum_h);
2723
2724   loProduct = _mm_srai_epi32(loProduct, 16);
2725   hiProduct = _mm_srai_epi32(hiProduct, 16);
2726   vaccLow = _mm_srai_epi32(vaccLow, 16);
2727   vaccHigh = _mm_srai_epi32(vaccHigh, 16);
2728
2729   vaccLow = _mm_add_epi32(loProduct, vaccLow);
2730   vaccHigh = _mm_add_epi32(hiProduct, vaccHigh);
2731   vaccLow = _mm_add_epi32(vdRegLo, vaccLow);
2732   vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh);
2733
2734   /* Clamp the accumulator and write it all out. */
2735   rsp->accum_m = RSPPackLo32to16(vaccLow, vaccHigh);
2736   rsp->accum_h = RSPPackHi32to16(vaccLow, vaccHigh);
24162737#else
2417      INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i);
2418      INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
2419#endif
2420      INT32 r1 = s1 * s2;
2421#if USE_SIMD
2422      UINT16 accl;
2423      SIMD_EXTRACT16(rsp->accum_l, accl, i);
2424      UINT32 r2 = accl + ((UINT16)(r1) * 2);
2425#else
2426      UINT32 r2 = (UINT16)ACCUM_L(i) + ((UINT16)(r1) * 2);
2427#endif
2428      UINT32 r3 = (UINT16)ACCUM_M(i) + (UINT16)((r1 >> 16) * 2) + (UINT16)(r2 >> 16);
24292738
2430#if USE_SIMD
2431      SIMD_INSERT16(rsp->accum_l, (UINT16)(r2), i);
2432#else
2433      ACCUM_L(i) = (UINT16)(r2);
2739    INT16 vres[8];
2740    for (int i = 0; i < 8; i++)
2741    {
2742        UINT16 w1, w2;
2743      SCALAR_GET_VS1(w1, i);
2744      SCALAR_GET_VS2(w2, i);
2745        INT32 s1 = (INT32)(INT16)w1;
2746        INT32 s2 = (INT32)(INT16)w2;
2747
2748        INT32 r1 = s1 * s2;
2749        UINT32 r2 = (UINT16)ACCUM_L(rsp, i) + ((UINT16)(r1) * 2);
2750        UINT32 r3 = (UINT16)ACCUM_M(rsp, i) + (UINT16)((r1 >> 16) * 2) + (UINT16)(r2 >> 16);
2751
2752        SET_ACCUM_L((UINT16)(r2), i);
2753        SET_ACCUM_M((UINT16)(r3), i);
2754        SET_ACCUM_H(ACCUM_H(rsp, i) + (UINT16)(r3 >> 16) + (UINT16)(r1 >> 31), i);
2755
2756        //res = SATURATE_ACCUM(i, 1, 0x0000, 0xffff);
2757        if ((INT16)ACCUM_H(rsp, i) < 0)
2758        {
2759            vres[i] = 0;
2760        }
2761        else
2762        {
2763            if (ACCUM_H(rsp, i) != 0)
2764            {
2765                vres[i] = (INT16)0xffff;
2766            }
2767            else
2768            {
2769                if ((INT16)ACCUM_M(rsp, i) < 0)
2770                {
2771                    vres[i] = (INT16)0xffff;
2772                }
2773                else
2774                {
2775                    vres[i] = ACCUM_M(rsp, i);
2776                }
2777            }
2778        }
2779    }
2780    WRITEBACK_RESULT();
24342781#endif
2435      ACCUM_M(i) = (UINT16)(r3);
2436      ACCUM_H(i) += (UINT16)(r3 >> 16) + (UINT16)(r1 >> 31);
2437
2438      //res = SATURATE_ACCUM(i, 1, 0x0000, 0xffff);
2439      if ((INT16)ACCUM_H(i) < 0)
2440      {
2441         vres[i] = 0;
2442      }
2443      else
2444      {
2445         if (ACCUM_H(i) != 0)
2446         {
2447            vres[i] = (INT16)0xffff;
2448         }
2449         else
2450         {
2451            if ((INT16)ACCUM_M(i) < 0)
2452            {
2453               vres[i] = (INT16)0xffff;
2454            }
2455            else
2456            {
2457               vres[i] = ACCUM_M(i);
2458            }
2459         }
2460      }
2461   }
2462   WRITEBACK_RESULT();
24632782}
24642783
24652784INLINE void cfunc_rsp_vmadl(void *param)
24662785{
2467   rsp_state *rsp = (rsp_state*)param;
2468   int op = rsp->impstate->arg0;
2786    rsp_state *rsp = (rsp_state*)param;
2787    int op = rsp->impstate->arg0;
24692788
2470   // 31       25  24     20      15      10      5        0
2471   // ------------------------------------------------------
2472   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001100 |
2473   // ------------------------------------------------------
2474   //
2475   // Multiplies unsigned fraction by unsigned fraction
2476   // Adds the higher 16 bits of the 32-bit result to accumulator
2477   // The low slice of accumulator is stored into destination element
2789    // 31       25  24     20      15      10      5        0
2790    // ------------------------------------------------------
2791    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001100 |
2792    // ------------------------------------------------------
2793    //
2794    // Multiplies unsigned fraction by unsigned fraction
2795    // Adds the higher 16 bits of the 32-bit result to accumulator
2796    // The low slice of accumulator is stored into destination element
24782797
2479   INT16 vres[8];
2480   for (int i = 0; i < 8; i++)
2481   {
24822798#if USE_SIMD
2483      UINT16 w1, w2;
2484      SIMD_EXTRACT16(rsp->xv[VS1REG], w1, i);
2485      SIMD_EXTRACT16(rsp->xv[VS2REG], w2, VEC_EL_2(EL, i));
2486      UINT32 s1 = w1;
2487      UINT32 s2 = w2;
2799
2800   __m128i vaccHigh;
2801   __m128i unpackHi, loProduct, hiProduct;
2802   __m128i vdReg, vdRegLo, vdRegHi;
2803
2804   __m128i vsReg = rsp->xv[VS1REG];
2805    __m128i vtReg = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
2806
2807   __m128i vaccLow = rsp->accum_l;
2808
2809   /* Unpack to obtain for 32-bit precision. */
2810   RSPZeroExtend16to32(vaccLow, &vaccLow, &vaccHigh);
2811
2812   /* Begin accumulating the products. */
2813   unpackHi = _mm_mulhi_epu16(vsReg, vtReg);
2814   loProduct = _mm_unpacklo_epi16(unpackHi, _mm_setzero_si128());
2815   hiProduct = _mm_unpackhi_epi16(unpackHi, _mm_setzero_si128());
2816
2817   vaccLow = _mm_add_epi32(vaccLow, loProduct);
2818   vaccHigh = _mm_add_epi32(vaccHigh, hiProduct);
2819   rsp->accum_l = vdReg = RSPPackLo32to16(vaccLow, vaccHigh);
2820
2821   /* Finish accumulating whatever is left. */
2822   vdRegLo = _mm_unpacklo_epi16(rsp->accum_m, rsp->accum_h);
2823   vdRegHi = _mm_unpackhi_epi16(rsp->accum_m, rsp->accum_h);
2824
2825   vaccLow = _mm_srai_epi32(vaccLow, 16);
2826   vaccHigh = _mm_srai_epi32(vaccHigh, 16);
2827   vaccLow = _mm_add_epi32(vdRegLo, vaccLow);
2828   vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh);
2829
2830   /* Clamp the accumulator and write it all out. */
2831   rsp->accum_m = RSPPackLo32to16(vaccLow, vaccHigh);
2832   rsp->accum_h = RSPPackHi32to16(vaccLow, vaccHigh);
2833   rsp->xv[VDREG] = RSPClampLowToVal(vdReg, rsp->accum_m, rsp->accum_h);
2834
24882835#else
2489      UINT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i);
2490      UINT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
2491#endif
2492      UINT32 r1 = s1 * s2;
2493#if USE_SIMD
2494      UINT16 accl;
2495      SIMD_EXTRACT16(rsp->accum_l, accl, i);
2496      UINT32 r2 = accl + (r1 >> 16);
2497#else
2498      UINT32 r2 = (UINT16)ACCUM_L(i) + (r1 >> 16);
2499#endif
2500      UINT32 r3 = (UINT16)ACCUM_M(i) + (r2 >> 16);
2836    INT16 vres[8];
2837    for (int i = 0; i < 8; i++)
2838    {
2839        UINT16 w1, w2;
2840      SCALAR_GET_VS1(w1, i);
2841      SCALAR_GET_VS2(w2, i);
2842        UINT32 s1 = w1;
2843        UINT32 s2 = w2;
25012844
2502#if USE_SIMD
2503      SIMD_INSERT16(rsp->accum_l, (UINT16)(r2), i);
2504#else
2505      ACCUM_L(i) = (UINT16)(r2);
2845        UINT32 r1 = s1 * s2;
2846        UINT32 r2 = (UINT16)ACCUM_L(rsp, i) + (r1 >> 16);
2847      UINT32 r3 = (UINT16)ACCUM_M(rsp, i) + (r2 >> 16);
2848
2849        SET_ACCUM_L((UINT16)r2, i);
2850        SET_ACCUM_M((UINT16)r3, i);
2851        SET_ACCUM_H(ACCUM_H(rsp, i) + (INT16)(r3 >> 16), i);
2852
2853        vres[i] = SATURATE_ACCUM(rsp, i, 0, 0x0000, 0xffff);
2854    }
2855    WRITEBACK_RESULT();
25062856#endif
2507      ACCUM_M(i) = (UINT16)(r3);
2508      ACCUM_H(i) += (INT16)(r3 >> 16);
2509
2510      vres[i] = SATURATE_ACCUM(rsp, i, 0, 0x0000, 0xffff);
2511   }
2512   WRITEBACK_RESULT();
25132857}
25142858
25152859INLINE void cfunc_rsp_vmadm(void *param)
25162860{
2517   rsp_state *rsp = (rsp_state*)param;
2518   int op = rsp->impstate->arg0;
2861    rsp_state *rsp = (rsp_state*)param;
2862    int op = rsp->impstate->arg0;
25192863
2520   INT16 vres[8];
2521   for (int i = 0; i < 8; i++)
2522   {
25232864#if USE_SIMD
2524      UINT16 w1, w2;
2525      SIMD_EXTRACT16(rsp->xv[VS1REG], w1, i);
2526      SIMD_EXTRACT16(rsp->xv[VS2REG], w2, VEC_EL_2(EL, i));
2527      UINT32 s1 = (INT32)(INT16)w1;
2528      UINT32 s2 = w2;
2865   __m128i vaccLow, vaccHigh, loProduct, hiProduct;
2866   __m128i vsRegLo, vsRegHi, vtRegLo, vtRegHi, vdRegLo, vdRegHi;
2867
2868   __m128i vsReg = rsp->xv[VS1REG];
2869    __m128i vtReg = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
2870
2871   /* Unpack to obtain for 32-bit precision. */
2872   RSPSignExtend16to32(vsReg, &vsRegLo, &vsRegHi);
2873   RSPZeroExtend16to32(vtReg, &vtRegLo, &vtRegHi);
2874   RSPZeroExtend16to32(rsp->accum_l, &vaccLow, &vaccHigh);
2875
2876   /* Begin accumulating the products. */
2877   loProduct = _mm_mullo_epi32(vsRegLo, vtRegLo);
2878   hiProduct = _mm_mullo_epi32(vsRegHi, vtRegHi);
2879
2880   vdRegLo = _mm_srli_epi32(loProduct, 16);
2881   vdRegHi = _mm_srli_epi32(hiProduct, 16);
2882   vdRegLo = _mm_slli_epi32(vdRegLo, 16);
2883   vdRegHi = _mm_slli_epi32(vdRegHi, 16);
2884   vdRegLo = _mm_xor_si128(vdRegLo, loProduct);
2885   vdRegHi = _mm_xor_si128(vdRegHi, hiProduct);
2886   vaccLow = _mm_add_epi32(vaccLow, vdRegLo);
2887   vaccHigh = _mm_add_epi32(vaccHigh, vdRegHi);
2888
2889   rsp->accum_l = rsp->xv[VDREG] = RSPPackLo32to16(vaccLow, vaccHigh);
2890
2891   /* Multiply the MSB of sources, accumulate the product. */
2892   vdRegLo = _mm_unpacklo_epi16(rsp->accum_m, rsp->accum_h);
2893   vdRegHi = _mm_unpackhi_epi16(rsp->accum_m, rsp->accum_h);
2894
2895   loProduct = _mm_srai_epi32(loProduct, 16);
2896   hiProduct = _mm_srai_epi32(hiProduct, 16);
2897   vaccLow = _mm_srai_epi32(vaccLow, 16);
2898   vaccHigh = _mm_srai_epi32(vaccHigh, 16);
2899
2900   vaccLow = _mm_add_epi32(loProduct, vaccLow);
2901   vaccHigh = _mm_add_epi32(hiProduct, vaccHigh);
2902   vaccLow = _mm_add_epi32(vdRegLo, vaccLow);
2903   vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh);
2904
2905   /* Clamp the accumulator and write it all out. */
2906   rsp->xv[VDREG] = _mm_packs_epi32(vaccLow, vaccHigh);
2907   rsp->accum_m = RSPPackLo32to16(vaccLow, vaccHigh);
2908   rsp->accum_h = RSPPackHi32to16(vaccLow, vaccHigh);
2909
25292910#else
2530      UINT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i);
2531      UINT32 s2 = (UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i));   // not sign-extended
2532#endif
2533      UINT32 r1 = s1 * s2;
2534#if USE_SIMD
2535      UINT16 accl;
2536      SIMD_EXTRACT16(rsp->accum_l, accl, i);
2537      UINT32 r2 = accl + (UINT16)(r1);
2538#else
2539      UINT32 r2 = (UINT16)ACCUM_L(i) + (UINT16)(r1);
2540#endif
2541      UINT32 r3 = (UINT16)ACCUM_M(i) + (r1 >> 16) + (r2 >> 16);
2911    INT16 vres[8];
2912    for (int i = 0; i < 8; i++)
2913    {
2914        UINT16 w1, w2;
2915      SCALAR_GET_VS1(w1, i);
2916      SCALAR_GET_VS2(w2, i);
2917        UINT32 s1 = (INT32)(INT16)w1;
2918        UINT32 s2 = (UINT16)w2;
25422919
2543#if USE_SIMD
2544      SIMD_INSERT16(rsp->accum_l, (UINT16)(r2), i);
2545#else
2546      ACCUM_L(i) = (UINT16)(r2);
2920        UINT32 r1 = s1 * s2;
2921        UINT32 r2 = (UINT16)ACCUM_L(rsp, i) + (UINT16)(r1);
2922        UINT32 r3 = (UINT16)ACCUM_M(rsp, i) + (r1 >> 16) + (r2 >> 16);
2923
2924        SET_ACCUM_L((UINT16)r2, i);
2925        SET_ACCUM_M((UINT16)r3, i);
2926        SET_ACCUM_H((UINT16)ACCUM_H(rsp, i) + (UINT16)(r3 >> 16), i);
2927        if ((INT32)(r1) < 0)
2928        {
2929         SET_ACCUM_H((UINT16)ACCUM_H(rsp, i) - 1, i);
2930      }
2931
2932        vres[i] = SATURATE_ACCUM(rsp, i, 1, 0x8000, 0x7fff);
2933    }
2934    WRITEBACK_RESULT();
25472935#endif
2548      ACCUM_M(i) = (UINT16)(r3);
2549      ACCUM_H(i) += (UINT16)(r3 >> 16);
2550      if ((INT32)(r1) < 0)
2551         ACCUM_H(i) -= 1;
2552
2553      vres[i] = SATURATE_ACCUM(rsp, i, 1, 0x8000, 0x7fff);
2554   }
2555   WRITEBACK_RESULT();
25562936}
25572937
25582938INLINE void cfunc_rsp_vmadn(void *param)
25592939{
2560   rsp_state *rsp = (rsp_state*)param;
2561   int op = rsp->impstate->arg0;
2940    rsp_state *rsp = (rsp_state*)param;
2941    int op = rsp->impstate->arg0;
25622942
2563   INT16 vres[8];
2564   for (int i = 0; i < 8; i++)
2565   {
25662943#if USE_SIMD
2567      UINT16 w1, w2;
2568      SIMD_EXTRACT16(rsp->xv[VS1REG], w1, i);
2569      SIMD_EXTRACT16(rsp->xv[VS2REG], w2, VEC_EL_2(EL, i));
2570      INT32 s1 = w1;
2571      INT32 s2 = (INT32)(INT16)w2;
2572#else
2573      INT32 s1 = (UINT16)VREG_S(VS1REG, i);     // not sign-extended
2574      INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
2575#endif
2944   __m128i vaccLow, vaccHigh, loProduct, hiProduct;
2945   __m128i vsRegLo, vsRegHi, vtRegLo, vtRegHi, vdRegLo, vdRegHi;
25762946
2577#if USE_SIMD
2578      UINT64 q = (UINT64)ACCUM(i) & 0xffffffff0000ffffL;
2579      UINT16 accl;
2580      SIMD_EXTRACT16(rsp->accum_l, accl, i);
2581      q |= (UINT64)((UINT32)accl << 16);
2582      q += (INT64)(s1*s2) << 16;
2583      ACCUM(i) = q;
2584      SIMD_INSERT16(rsp->accum_l, (UINT16)(q >> 16), i);
2585#else
2586      ACCUM(i) += (INT64)(s1*s2) << 16;
2587#endif
2947   __m128i vsReg = rsp->xv[VS1REG];
2948    __m128i vtReg = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
25882949
2589      vres[i] = SATURATE_ACCUM(rsp, i, 0, 0x0000, 0xffff);
2590   }
2591   WRITEBACK_RESULT();
2592}
2950    vaccLow = rsp->accum_l;
25932951
2594INLINE void cfunc_rsp_vmadh(void *param)
2595{
2596   rsp_state *rsp = (rsp_state*)param;
2597   int op = rsp->impstate->arg0;
2598   // 31       25  24     20      15      10      5        0
2599   // ------------------------------------------------------
2600   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001111 |
2601   // ------------------------------------------------------
2602   //
2603   // Multiplies signed integer by signed integer
2604   // The result is added into highest 32 bits of accumulator, the low slice is zero
2605   // The highest 32 bits of accumulator is saturated into destination element
2952    RSPZeroExtend16to32(vsReg, &vsRegLo, &vsRegHi);
2953    RSPSignExtend16to32(vtReg, &vtRegLo, &vtRegHi);
2954    RSPZeroExtend16to32(vaccLow, &vaccLow, &vaccHigh);
26062955
2607#if 0
2608   UINT16 caccumh[8], caccumm[8], vs1[8], vs2[8];
2609   for (int i = 0; i < 8; i++)
2610   {
2611      caccumh[i] = ACCUM_H(i);
2612      caccumm[i] = ACCUM_M(i);
2613      SIMD_EXTRACT16(rsp->xv[VS1REG], vs1[i], i);
2614      SIMD_EXTRACT16(rsp->xv[VS2REG], vs2[i], i);
2615      printf("%04x%04x\n", (UINT16)caccumh[i], (UINT16)caccumm[i]);
2616   }
2617#endif
2956   /* Begin accumulating the products. */
2957   loProduct = _mm_mullo_epi32(vsRegLo, vtRegLo);
2958   hiProduct = _mm_mullo_epi32(vsRegHi, vtRegHi);
26182959
2619#if USE_SIMD
2620   __m128i vec7531 = _mm_and_si128(rsp->xv[VS1REG], vec_himask);
2621   __m128i vec6420 = _mm_slli_epi32(rsp->xv[VS1REG], 16);
2622   __m128i shuf2 = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
2960   vdRegLo = _mm_srli_epi32(loProduct, 16);
2961   vdRegHi = _mm_srli_epi32(hiProduct, 16);
2962   vdRegLo = _mm_slli_epi32(vdRegLo, 16);
2963   vdRegHi = _mm_slli_epi32(vdRegHi, 16);
2964   vdRegLo = _mm_xor_si128(vdRegLo, loProduct);
2965   vdRegHi = _mm_xor_si128(vdRegHi, hiProduct);
26232966
2624   __m128i shuf7531 = _mm_and_si128(shuf2, vec_himask);
2625   __m128i shuf6420 = _mm_slli_epi32(shuf2, 16);
2967   vaccLow = _mm_add_epi32(vaccLow, vdRegLo);
2968   vaccHigh = _mm_add_epi32(vaccHigh, vdRegHi);
26262969
2627   __m128i upper7531 = _mm_mulhi_epi16(vec7531, shuf7531);
2628   __m128i lower7531 = _mm_srli_epi32(_mm_mullo_epi16(vec7531, shuf7531), 16);
2629   __m128i prod7531 = _mm_or_si128(upper7531, lower7531);
2970   rsp->accum_l = RSPPackLo32to16(vaccLow, vaccHigh);
26302971
2631   __m128i upper6420 = _mm_mulhi_epi16(vec6420, shuf6420);
2632   __m128i lower6420 = _mm_srli_epi32(_mm_mullo_epi16(vec6420, shuf6420), 16);
2633   __m128i prod6420 = _mm_or_si128(upper6420, lower6420);
2972   /* Multiply the MSB of sources, accumulate the product. */
2973   vdRegLo = _mm_unpacklo_epi16(rsp->accum_m, rsp->accum_h);
2974   vdRegHi = _mm_unpackhi_epi16(rsp->accum_m, rsp->accum_h);
26342975
2635#if 0
2636   UINT16 svs1[8], svs2[8];
2637   svs1[0] = _mm_extract_epi16(rsp->xv[VS1REG], 7);
2638   svs1[1] = _mm_extract_epi16(rsp->xv[VS1REG], 6);
2639   svs1[2] = _mm_extract_epi16(rsp->xv[VS1REG], 5);
2640   svs1[3] = _mm_extract_epi16(rsp->xv[VS1REG], 4);
2641   svs1[4] = _mm_extract_epi16(rsp->xv[VS1REG], 3);
2642   svs1[5] = _mm_extract_epi16(rsp->xv[VS1REG], 2);
2643   svs1[6] = _mm_extract_epi16(rsp->xv[VS1REG], 1);
2644   svs1[7] = _mm_extract_epi16(rsp->xv[VS1REG], 0);
2645   svs2[0] = _mm_extract_epi16(rsp->xv[VS2REG], 7);
2646   svs2[1] = _mm_extract_epi16(rsp->xv[VS2REG], 6);
2647   svs2[2] = _mm_extract_epi16(rsp->xv[VS2REG], 5);
2648   svs2[3] = _mm_extract_epi16(rsp->xv[VS2REG], 4);
2649   svs2[4] = _mm_extract_epi16(rsp->xv[VS2REG], 3);
2650   svs2[5] = _mm_extract_epi16(rsp->xv[VS2REG], 2);
2651   svs2[6] = _mm_extract_epi16(rsp->xv[VS2REG], 1);
2652   svs2[7] = _mm_extract_epi16(rsp->xv[VS2REG], 0);
2976   loProduct = _mm_srai_epi32(loProduct, 16);
2977   hiProduct = _mm_srai_epi32(hiProduct, 16);
2978   vaccLow = _mm_srai_epi32(vaccLow, 16);
2979   vaccHigh = _mm_srai_epi32(vaccHigh, 16);
26532980
2654   printf("%d\n", EL);
2981   vaccLow = _mm_add_epi32(loProduct, vaccLow);
2982   vaccHigh = _mm_add_epi32(hiProduct, vaccHigh);
2983   vaccLow = _mm_add_epi32(vdRegLo, vaccLow);
2984   vaccHigh = _mm_add_epi32(vdRegHi, vaccHigh);
26552985
2656   UINT16 vecs[16];
2657   vecs[0] = _mm_extract_epi16(vec7531, 0);
2658   vecs[1] = _mm_extract_epi16(vec7531, 1);
2659   vecs[2] = _mm_extract_epi16(vec7531, 2);
2660   vecs[3] = _mm_extract_epi16(vec7531, 3);
2661   vecs[4] = _mm_extract_epi16(vec7531, 4);
2662   vecs[5] = _mm_extract_epi16(vec7531, 5);
2663   vecs[6] = _mm_extract_epi16(vec7531, 6);
2664   vecs[7] = _mm_extract_epi16(vec7531, 7);
2665   vecs[8] = _mm_extract_epi16(vec6420, 0);
2666   vecs[9] = _mm_extract_epi16(vec6420, 1);
2667   vecs[10] = _mm_extract_epi16(vec6420, 2);
2668   vecs[11] = _mm_extract_epi16(vec6420, 3);
2669   vecs[12] = _mm_extract_epi16(vec6420, 4);
2670   vecs[13] = _mm_extract_epi16(vec6420, 5);
2671   vecs[14] = _mm_extract_epi16(vec6420, 6);
2672   vecs[15] = _mm_extract_epi16(vec6420, 7);
2673   printf("VS1 %04x%04x %04x%04x %04x%04x %04x%04x\n", vs1[0], vs1[1], vs1[2], vs1[3], vs1[4], vs1[5], vs1[6], vs1[7]);
2674   printf("VS2 %04x%04x %04x%04x %04x%04x %04x%04x\n", vs2[0], vs2[1], vs2[2], vs2[3], vs2[4], vs2[5], vs2[6], vs2[7]);
2675   printf("Vec %04x%04x %04x%04x %04x%04x %04x%04x\n", vecs[0], vecs[1], vecs[2], vecs[3], vecs[4], vecs[5], vecs[6], vecs[7]);
2676   printf("Vec %04x%04x %04x%04x %04x%04x %04x%04x\n", vecs[8], vecs[9], vecs[10], vecs[11], vecs[12], vecs[13], vecs[14], vecs[15]);
2986   /* Clamp the accumulator and write it all out. */
2987   rsp->accum_m = RSPPackLo32to16(vaccLow, vaccHigh);
2988   rsp->accum_h = RSPPackHi32to16(vaccLow, vaccHigh);
2989   rsp->xv[VDREG] = RSPClampLowToVal(rsp->accum_l, rsp->accum_m, rsp->accum_h);
2990#else
2991    INT16 vres[8];
2992    for (int i = 0; i < 8; i++)
2993    {
2994        UINT16 w1, w2;
2995      SCALAR_GET_VS1(w1, i);
2996      SCALAR_GET_VS2(w2, i);
2997        INT32 s1 = (UINT16)w1;
2998        INT32 s2 = (INT32)(INT16)w2;
26772999
2678   UINT16 shufs[16];
2679   shufs[0] = _mm_extract_epi16(shuf7531, 0);
2680   shufs[1] = _mm_extract_epi16(shuf7531, 1);
2681   shufs[2] = _mm_extract_epi16(shuf7531, 2);
2682   shufs[3] = _mm_extract_epi16(shuf7531, 3);
2683   shufs[4] = _mm_extract_epi16(shuf7531, 4);
2684   shufs[5] = _mm_extract_epi16(shuf7531, 5);
2685   shufs[6] = _mm_extract_epi16(shuf7531, 6);
2686   shufs[7] = _mm_extract_epi16(shuf7531, 7);
2687   shufs[8] = _mm_extract_epi16(shuf6420, 0);
2688   shufs[9] = _mm_extract_epi16(shuf6420, 1);
2689   shufs[10] = _mm_extract_epi16(shuf6420, 2);
2690   shufs[11] = _mm_extract_epi16(shuf6420, 3);
2691   shufs[12] = _mm_extract_epi16(shuf6420, 4);
2692   shufs[13] = _mm_extract_epi16(shuf6420, 5);
2693   shufs[14] = _mm_extract_epi16(shuf6420, 6);
2694   shufs[15] = _mm_extract_epi16(shuf6420, 7);
2695   printf("Shf %04x%04x %04x%04x %04x%04x %04x%04x\n", shufs[0], shufs[1], shufs[2], shufs[3], shufs[4], shufs[5], shufs[6], shufs[7]);
2696   printf("Shf %04x%04x %04x%04x %04x%04x %04x%04x\n", shufs[8], shufs[9], shufs[10], shufs[11], shufs[12], shufs[13], shufs[14], shufs[15]);
3000        INT64 q = (UINT64)ACCUM(i) & 0x000000000000ffffL;
3001        q |= (((UINT64)ACCUM_L(rsp, i)) << 16);
3002        q |= (((UINT64)ACCUM_M(rsp, i)) << 32);
3003        q |= (((UINT64)ACCUM_H(rsp, i)) << 48);
3004        q += (INT64)(s1*s2) << 16;
26973005
2698   UINT16 uppers[16];
2699   uppers[0] = _mm_extract_epi16(upper7531, 0);
2700   uppers[1] = _mm_extract_epi16(upper7531, 1);
2701   uppers[2] = _mm_extract_epi16(upper7531, 2);
2702   uppers[3] = _mm_extract_epi16(upper7531, 3);
2703   uppers[4] = _mm_extract_epi16(upper7531, 4);
2704   uppers[5] = _mm_extract_epi16(upper7531, 5);
2705   uppers[6] = _mm_extract_epi16(upper7531, 6);
2706   uppers[7] = _mm_extract_epi16(upper7531, 7);
2707   uppers[8] = _mm_extract_epi16(upper6420, 0);
2708   uppers[9] = _mm_extract_epi16(upper6420, 1);
2709   uppers[10] = _mm_extract_epi16(upper6420, 2);
2710   uppers[11] = _mm_extract_epi16(upper6420, 3);
2711   uppers[12] = _mm_extract_epi16(upper6420, 4);
2712   uppers[13] = _mm_extract_epi16(upper6420, 5);
2713   uppers[14] = _mm_extract_epi16(upper6420, 6);
2714   uppers[15] = _mm_extract_epi16(upper6420, 7);
2715   printf("Upr %04x%04x %04x%04x %04x%04x %04x%04x\n", uppers[0], uppers[1], uppers[2], uppers[3], uppers[4], uppers[5], uppers[6], uppers[7]);
2716   printf("Upr %04x%04x %04x%04x %04x%04x %04x%04x\n", uppers[8], uppers[9], uppers[10], uppers[11], uppers[12], uppers[13], uppers[14], uppers[15]);
3006        ACCUM(i) = q & 0x000000000000ffffL;
3007        SET_ACCUM_L((UINT16)(q >> 16), i);
3008        SET_ACCUM_M((UINT16)(q >> 32), i);
3009        SET_ACCUM_H((UINT16)(q >> 48), i);
27173010
2718   UINT16 lowers[16];
2719   lowers[0] = _mm_extract_epi16(lower7531, 0);
2720   lowers[1] = _mm_extract_epi16(lower7531, 1);
2721   lowers[2] = _mm_extract_epi16(lower7531, 2);
2722   lowers[3] = _mm_extract_epi16(lower7531, 3);
2723   lowers[4] = _mm_extract_epi16(lower7531, 4);
2724   lowers[5] = _mm_extract_epi16(lower7531, 5);
2725   lowers[6] = _mm_extract_epi16(lower7531, 6);
2726   lowers[7] = _mm_extract_epi16(lower7531, 7);
2727   lowers[8] = _mm_extract_epi16(lower6420, 0);
2728   lowers[9] = _mm_extract_epi16(lower6420, 1);
2729   lowers[10] = _mm_extract_epi16(lower6420, 2);
2730   lowers[11] = _mm_extract_epi16(lower6420, 3);
2731   lowers[12] = _mm_extract_epi16(lower6420, 4);
2732   lowers[13] = _mm_extract_epi16(lower6420, 5);
2733   lowers[14] = _mm_extract_epi16(lower6420, 6);
2734   lowers[15] = _mm_extract_epi16(lower6420, 7);
2735   printf("Lwr %04x%04x %04x%04x %04x%04x %04x%04x\n", lowers[0], lowers[1], lowers[2], lowers[3], lowers[4], lowers[5], lowers[6], lowers[7]);
2736   printf("Lwr %04x%04x %04x%04x %04x%04x %04x%04x\n", lowers[8], lowers[9], lowers[10], lowers[11], lowers[12], lowers[13], lowers[14], lowers[15]);
2737
2738   UINT16 prods[16];
2739   prods[0] = _mm_extract_epi16(prod7531, 0);
2740   prods[1] = _mm_extract_epi16(prod7531, 1);
2741   prods[2] = _mm_extract_epi16(prod7531, 2);
2742   prods[3] = _mm_extract_epi16(prod7531, 3);
2743   prods[4] = _mm_extract_epi16(prod7531, 4);
2744   prods[5] = _mm_extract_epi16(prod7531, 5);
2745   prods[6] = _mm_extract_epi16(prod7531, 6);
2746   prods[7] = _mm_extract_epi16(prod7531, 7);
2747   prods[8] = _mm_extract_epi16(prod6420, 0);
2748   prods[9] = _mm_extract_epi16(prod6420, 1);
2749   prods[10] = _mm_extract_epi16(prod6420, 2);
2750   prods[11] = _mm_extract_epi16(prod6420, 3);
2751   prods[12] = _mm_extract_epi16(prod6420, 4);
2752   prods[13] = _mm_extract_epi16(prod6420, 5);
2753   prods[14] = _mm_extract_epi16(prod6420, 6);
2754   prods[15] = _mm_extract_epi16(prod6420, 7);
2755   printf("Prd %04x%04x %04x%04x %04x%04x %04x%04x\n", prods[0], prods[1], prods[2], prods[3], prods[4], prods[5], prods[6], prods[7]);
2756   printf("Prd %04x%04x %04x%04x %04x%04x %04x%04x\n", prods[8], prods[9], prods[10], prods[11], prods[12], prods[13], prods[14], prods[15]);
3011        vres[i] = SATURATE_ACCUM(rsp, i, 0, 0x0000, 0xffff);
3012    }
3013    WRITEBACK_RESULT();
27573014#endif
3015}
27583016
2759   __m128i accum7531 = _mm_set_epi16(ACCUM_H(7), ACCUM_M(7), ACCUM_H(5), ACCUM_M(5), ACCUM_H(3), ACCUM_M(3), ACCUM_H(1), ACCUM_M(1));
2760   __m128i accum6420 = _mm_set_epi16(ACCUM_H(6), ACCUM_M(6), ACCUM_H(4), ACCUM_M(4), ACCUM_H(2), ACCUM_M(2), ACCUM_H(0), ACCUM_M(0));
2761   accum7531 = _mm_add_epi32(accum7531, prod7531);
2762   accum6420 = _mm_add_epi32(accum6420, prod6420);
2763   __m128i accum7531_m = _mm_slli_epi32(_mm_and_si128(accum7531, vec_lomask), 16);
2764   __m128i accum7531_h = _mm_and_si128(accum7531, vec_himask);
2765   __m128i accum6420_m = _mm_and_si128(accum6420, vec_lomask);
2766   __m128i accum6420_h = _mm_srli_epi32(_mm_and_si128(accum6420, vec_himask), 16);
2767   __m128i newaccum_h = _mm_or_si128(accum7531_h, accum6420_h);
2768   __m128i newaccum_m = _mm_or_si128(accum7531_m, accum6420_m);
2769#if 0
2770   UINT16 accums[16];
2771   accums[0] = _mm_extract_epi16(newaccum_h, 0);
2772   accums[1] = _mm_extract_epi16(newaccum_h, 1);
2773   accums[2] = _mm_extract_epi16(newaccum_h, 2);
2774   accums[3] = _mm_extract_epi16(newaccum_h, 3);
2775   accums[4] = _mm_extract_epi16(newaccum_h, 4);
2776   accums[5] = _mm_extract_epi16(newaccum_h, 5);
2777   accums[6] = _mm_extract_epi16(newaccum_h, 6);
2778   accums[7] = _mm_extract_epi16(newaccum_h, 7);
2779   accums[8] = _mm_extract_epi16(newaccum_m, 0);
2780   accums[9] = _mm_extract_epi16(newaccum_m, 1);
2781   accums[10] = _mm_extract_epi16(newaccum_m, 2);
2782   accums[11] = _mm_extract_epi16(newaccum_m, 3);
2783   accums[12] = _mm_extract_epi16(newaccum_m, 4);
2784   accums[13] = _mm_extract_epi16(newaccum_m, 5);
2785   accums[14] = _mm_extract_epi16(newaccum_m, 6);
2786   accums[15] = _mm_extract_epi16(newaccum_m, 7);
2787   printf("AcH %04x%04x %04x%04x %04x%04x %04x%04x\n", accums[0], accums[1], accums[2], accums[3], accums[4], accums[5], accums[6], accums[7]);
2788   printf("AcM %04x%04x %04x%04x %04x%04x %04x%04x\n", accums[8], accums[9], accums[10], accums[11], accums[12], accums[13], accums[14], accums[15]);
2789#endif
3017INLINE void cfunc_rsp_vmadh(void *param)
3018{
3019    rsp_state *rsp = (rsp_state*)param;
3020    int op = rsp->impstate->arg0;
3021    // 31       25  24     20      15      10      5        0
3022    // ------------------------------------------------------
3023    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 001111 |
3024    // ------------------------------------------------------
3025    //
3026    // Multiplies signed integer by signed integer
3027    // The result is added into highest 32 bits of accumulator, the low slice is zero
3028    // The highest 32 bits of accumulator is saturated into destination element
27903029
2791   __m128i result = SATURATE_ACCUM1(newaccum_h, newaccum_m, 0x8000, 0x7fff);
2792   rsp->xv[VDREG] = result;//_mm_shuffle_epi8(result, vec_shuf_inverse[0]);//SATURATE_ACCUM1(newaccum_h, newaccum_m, 0x8000, 0x7fff);
2793#if 0
2794   UINT16 vresult[8];
2795   vresult[0] = _mm_extract_epi16(result, 0);
2796   vresult[1] = _mm_extract_epi16(result, 1);
2797   vresult[2] = _mm_extract_epi16(result, 2);
2798   vresult[3] = _mm_extract_epi16(result, 3);
2799   vresult[4] = _mm_extract_epi16(result, 4);
2800   vresult[5] = _mm_extract_epi16(result, 5);
2801   vresult[6] = _mm_extract_epi16(result, 6);
2802   vresult[7] = _mm_extract_epi16(result, 7);
2803   printf("%04x %04x %04x %04x %04x %04x %04x %04x\n\n", vresult[0], vresult[1], vresult[2], vresult[3], vresult[4], vresult[5], vresult[6], vresult[7]);
2804#endif
2805   ACCUM_H(0) = _mm_extract_epi16(newaccum_h, 0);
2806   ACCUM_H(1) = _mm_extract_epi16(newaccum_h, 1);
2807   ACCUM_H(2) = _mm_extract_epi16(newaccum_h, 2);
2808   ACCUM_H(3) = _mm_extract_epi16(newaccum_h, 3);
2809   ACCUM_H(4) = _mm_extract_epi16(newaccum_h, 4);
2810   ACCUM_H(5) = _mm_extract_epi16(newaccum_h, 5);
2811   ACCUM_H(6) = _mm_extract_epi16(newaccum_h, 6);
2812   ACCUM_H(7) = _mm_extract_epi16(newaccum_h, 7);
2813   ACCUM_M(0) = _mm_extract_epi16(newaccum_m, 0);
2814   ACCUM_M(1) = _mm_extract_epi16(newaccum_m, 1);
2815   ACCUM_M(2) = _mm_extract_epi16(newaccum_m, 2);
2816   ACCUM_M(3) = _mm_extract_epi16(newaccum_m, 3);
2817   ACCUM_M(4) = _mm_extract_epi16(newaccum_m, 4);
2818   ACCUM_M(5) = _mm_extract_epi16(newaccum_m, 5);
2819   ACCUM_M(6) = _mm_extract_epi16(newaccum_m, 6);
2820   ACCUM_M(7) = _mm_extract_epi16(newaccum_m, 7);
2821#else
2822   INT16 vres[8];
2823   for (int i = 0; i < 8; i++)
2824   {
28253030#if USE_SIMD
2826      UINT16 w1, w2;
2827      SIMD_EXTRACT16(rsp->xv[VS1REG], w1, i);
2828      SIMD_EXTRACT16(rsp->xv[VS2REG], w2, VEC_EL_2(EL, i));
2829      INT32 s1 = (INT32)(INT16)w1;
2830      INT32 s2 = (INT32)(INT16)w2;
2831#else
2832      INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i);
2833      INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
2834#endif
2835      //INT32 s1 = (INT32)(INT16)vs1[i];
2836      //INT32 s2 = (INT32)(INT16)vs2[VEC_EL_2(EL, i)];
28373031
2838      rsp->accum[i].l[1] += s1*s2;
3032   __m128i vsReg = rsp->xv[VS1REG];
3033    __m128i vtReg = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
28393034
2840      vres[i] = SATURATE_ACCUM1(rsp, i, 0x8000, 0x7fff);
3035   /* Unpack to obtain for 32-bit precision. */
3036   __m128i vaccLow = _mm_unpacklo_epi16(rsp->accum_m, rsp->accum_h);
3037   __m128i vaccHigh = _mm_unpackhi_epi16(rsp->accum_m, rsp->accum_h);
28413038
2842      /*INT32 accum = (INT32)((caccumh[i] << 16) | caccumm[i]);
2843      accum += (INT32)s1*s2;
2844      caccumh[i] = (accum >> 16) & 0x0000ffff;
2845      caccumm[i] = accum & 0x0000ffff;
3039   /* Multiply the sources, accumulate the product. */
3040   __m128i unpackLo = _mm_mullo_epi16(vsReg, vtReg);
3041   __m128i unpackHi = _mm_mulhi_epi16(vsReg, vtReg);
3042   __m128i loProduct = _mm_unpacklo_epi16(unpackLo, unpackHi);
3043   __m128i hiProduct = _mm_unpackhi_epi16(unpackLo, unpackHi);
3044   vaccLow = _mm_add_epi32(vaccLow, loProduct);
3045   vaccHigh = _mm_add_epi32(vaccHigh, hiProduct);
28463046
2847      vres[i] = C_SATURATE_ACCUM1(caccumh, caccumm, i, 0x8000, 0x7fff);*/
2848   }
2849/*  printf("%08x\n", rsp->pc);
3047   /* Pack the accumulator and result back up. */
3048   rsp->xv[VDREG] = _mm_packs_epi32(vaccLow, vaccHigh);
3049   rsp->accum_m = RSPPackLo32to16(vaccLow, vaccHigh);
3050   rsp->accum_h = RSPPackHi32to16(vaccLow, vaccHigh);
3051
3052#else
3053    INT16 vres[8];
28503054    for (int i = 0; i < 8; i++)
28513055    {
2852        if ((UINT16)vres[i] != vresult[i])
2853        {
2854            printf("Result mismatch:\n");
2855            printf("   C: %04x %04x %04x %04x %04x %04x %04x %04x\n", vres[0], vres[1], vres[2], vres[3], vres[4], vres[5], vres[6], vres[7]);
2856            printf("SIMD: %04x %04x %04x %04x %04x %04x %04x %04x\n", vresult[0], vresult[1], vresult[2], vresult[3], vresult[4], vresult[5], vresult[6], vresult[7]);
2857            printf("High accumulator:\n");
2858            printf("   C: %04x %04x %04x %04x %04x %04x %04x %04x\n", caccumh[0], caccumh[1], caccumh[2], caccumh[3], caccumh[4], caccumh[5], caccumh[6], caccumh[7]);
2859            printf("SIMD: %04x %04x %04x %04x %04x %04x %04x %04x\n", ACCUM_H(0), ACCUM_H(1), ACCUM_H(2), ACCUM_H(3), ACCUM_H(4), ACCUM_H(5), ACCUM_H(6), ACCUM_H(7));
2860            printf("Mid accumulator:\n");
2861            printf("   C: %04x %04x %04x %04x %04x %04x %04x %04x\n", caccumm[0], caccumm[1], caccumm[2], caccumm[3], caccumm[4], caccumm[5], caccumm[6], caccumm[7]);
2862            printf("SIMD: %04x %04x %04x %04x %04x %04x %04x %04x\n", ACCUM_M(0), ACCUM_M(1), ACCUM_M(2), ACCUM_M(3), ACCUM_M(4), ACCUM_M(5), ACCUM_M(6), ACCUM_M(7));
2863            printf("VS1:\n");
2864            printf("   C: %04x %04x %04x %04x %04x %04x %04x %04x\n", vs1[0], vs1[1], vs1[2], vs1[3], vs1[4], vs1[5], vs1[6], vs1[7]);
2865            printf("SIMD: %04x %04x %04x %04x %04x %04x %04x %04x\n", svs1[0], svs1[1], svs1[2], svs1[3], svs1[4], svs1[5], svs1[6], svs1[7]);
2866            printf("VS2:\n");
2867            printf("   C: %04x %04x %04x %04x %04x %04x %04x %04x\n", vs2[0], vs2[1], vs2[2], vs2[3], vs2[4], vs2[5], vs2[6], vs2[7]);
2868            printf("SIMD: %04x %04x %04x %04x %04x %04x %04x %04x\n", svs2[0], svs2[1], svs2[2], svs2[3], svs2[4], svs2[5], svs2[6], svs2[7]);
2869            fatalerror("asdf");
2870        }
2871        if (caccumh[i] != (UINT16)ACCUM_H(i))
2872        {
2873            printf("Result:\n");
2874            printf("   C: %04x %04x %04x %04x %04x %04x %04x %04x\n", vres[0], vres[1], vres[2], vres[3], vres[4], vres[5], vres[6], vres[7]);
2875            printf("SIMD: %04x %04x %04x %04x %04x %04x %04x %04x\n", vresult[0], vresult[1], vresult[2], vresult[3], vresult[4], vresult[5], vresult[6], vresult[7]);
2876            printf("High accumulator mismatch:\n");
2877            printf("   C: %04x %04x %04x %04x %04x %04x %04x %04x\n", caccumh[0], caccumh[1], caccumh[2], caccumh[3], caccumh[4], caccumh[5], caccumh[6], caccumh[7]);
2878            printf("SIMD: %04x %04x %04x %04x %04x %04x %04x %04x\n", ACCUM_H(0), ACCUM_H(1), ACCUM_H(2), ACCUM_H(3), ACCUM_H(4), ACCUM_H(5), ACCUM_H(6), ACCUM_H(7));
2879            printf("Mid accumulator:\n");
2880            printf("   C: %04x %04x %04x %04x %04x %04x %04x %04x\n", caccumm[0], caccumm[1], caccumm[2], caccumm[3], caccumm[4], caccumm[5], caccumm[6], caccumm[7]);
2881            printf("SIMD: %04x %04x %04x %04x %04x %04x %04x %04x\n", ACCUM_M(0), ACCUM_M(1), ACCUM_M(2), ACCUM_M(3), ACCUM_M(4), ACCUM_M(5), ACCUM_M(6), ACCUM_M(7));
2882            printf("VS1:\n");
2883            printf("   C: %04x %04x %04x %04x %04x %04x %04x %04x\n", vs1[0], vs1[1], vs1[2], vs1[3], vs1[4], vs1[5], vs1[6], vs1[7]);
2884            printf("SIMD: %04x %04x %04x %04x %04x %04x %04x %04x\n", svs1[0], svs1[1], svs1[2], svs1[3], svs1[4], svs1[5], svs1[6], svs1[7]);
2885            printf("VS2:\n");
2886            printf("   C: %04x %04x %04x %04x %04x %04x %04x %04x\n", vs2[0], vs2[1], vs2[2], vs2[3], vs2[4], vs2[5], vs2[6], vs2[7]);
2887            printf("SIMD: %04x %04x %04x %04x %04x %04x %04x %04x\n", svs2[0], svs2[1], svs2[2], svs2[3], svs2[4], svs2[5], svs2[6], svs2[7]);
2888            fatalerror("asdf");
2889        }
2890        if (caccumm[i] != (UINT16)ACCUM_M(i))
2891        {
2892            printf("Result:\n");
2893            printf("   C: %04x %04x %04x %04x %04x %04x %04x %04x\n", vres[0], vres[1], vres[2], vres[3], vres[4], vres[5], vres[6], vres[7]);
2894            printf("SIMD: %04x %04x %04x %04x %04x %04x %04x %04x\n", vresult[0], vresult[1], vresult[2], vresult[3], vresult[4], vresult[5], vresult[6], vresult[7]);
2895            printf("High accumulator:\n");
2896            printf("   C: %04x %04x %04x %04x %04x %04x %04x %04x\n", caccumh[0], caccumh[1], caccumh[2], caccumh[3], caccumh[4], caccumh[5], caccumh[6], caccumh[7]);
2897            printf("SIMD: %04x %04x %04x %04x %04x %04x %04x %04x\n", ACCUM_H(0), ACCUM_H(1), ACCUM_H(2), ACCUM_H(3), ACCUM_H(4), ACCUM_H(5), ACCUM_H(6), ACCUM_H(7));
2898            printf("Mid accumulator mismatch:\n");
2899            printf("   C: %04x %04x %04x %04x %04x %04x %04x %04x\n", caccumm[0], caccumm[1], caccumm[2], caccumm[3], caccumm[4], caccumm[5], caccumm[6], caccumm[7]);
2900            printf("SIMD: %04x %04x %04x %04x %04x %04x %04x %04x\n", ACCUM_M(0), ACCUM_M(1), ACCUM_M(2), ACCUM_M(3), ACCUM_M(4), ACCUM_M(5), ACCUM_M(6), ACCUM_M(7));
2901            printf("VS1:\n");
2902            printf("   C: %04x %04x %04x %04x %04x %04x %04x %04x\n", vs1[0], vs1[1], vs1[2], vs1[3], vs1[4], vs1[5], vs1[6], vs1[7]);
2903            printf("SIMD: %04x %04x %04x %04x %04x %04x %04x %04x\n", svs1[0], svs1[1], svs1[2], svs1[3], svs1[4], svs1[5], svs1[6], svs1[7]);
2904            printf("VS2:\n");
2905            printf("   C: %04x %04x %04x %04x %04x %04x %04x %04x\n", vs2[0], vs2[1], vs2[2], vs2[3], vs2[4], vs2[5], vs2[6], vs2[7]);
2906            printf("SIMD: %04x %04x %04x %04x %04x %04x %04x %04x\n", svs2[0], svs2[1], svs2[2], svs2[3], svs2[4], svs2[5], svs2[6], svs2[7]);
2907            fatalerror("asdf");
2908        }
2909    }*/
2910   WRITEBACK_RESULT();
3056        INT16 w1, w2;
3057      SCALAR_GET_VS1(w1, i);
3058      SCALAR_GET_VS2(w2, i);
3059        INT32 s1 = (INT32)(INT16)w1;
3060        INT32 s2 = (INT32)(INT16)w2;
3061
3062        INT32 accum = (UINT32)(UINT16)ACCUM_M(rsp, i);
3063        accum |= ((UINT32)((UINT16)ACCUM_H(rsp, i))) << 16;
3064        accum += s1*s2;
3065
3066        SET_ACCUM_H((UINT16)(accum >> 16), i);
3067        SET_ACCUM_M((UINT16)accum, i);
3068
3069        vres[i] = SATURATE_ACCUM1(rsp, i, 0x8000, 0x7fff);
3070    }
3071    WRITEBACK_RESULT();
29113072#endif
29123073}
29133074
29143075INLINE void cfunc_rsp_vadd(void *param)
29153076{
2916   rsp_state *rsp = (rsp_state*)param;
2917   int op = rsp->impstate->arg0;
2918   // 31       25  24     20      15      10      5        0
2919   // ------------------------------------------------------
2920   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010000 |
2921   // ------------------------------------------------------
2922   //
2923   // Adds two vector registers and carry flag, the result is saturated to 32767
3077    rsp_state *rsp = (rsp_state*)param;
3078    int op = rsp->impstate->arg0;
3079    // 31       25  24     20      15      10      5        0
3080    // ------------------------------------------------------
3081    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010000 |
3082    // ------------------------------------------------------
3083    //
3084    // Adds two vector registers and carry flag, the result is saturated to 32767
29243085
29253086#if USE_SIMD
2926   __m128i shuffled = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
2927   __m128i unsat = rsp->xv[VS1REG];
2928   __m128i carry = _mm_set_epi16(CARRY_FLAG(7), CARRY_FLAG(6), CARRY_FLAG(5), CARRY_FLAG(4),
2929                           CARRY_FLAG(3), CARRY_FLAG(2), CARRY_FLAG(1), CARRY_FLAG(0));
29303087
2931   unsat = _mm_add_epi16(unsat, shuffled);
2932   unsat = _mm_add_epi16(unsat, carry);
3088    __m128i shuffled = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
3089   __m128i carry = _mm_and_si128(rsp->xvflag[CARRY], vec_flagmask);
3090    __m128i unsat = _mm_add_epi16(_mm_add_epi16(rsp->xv[VS1REG], shuffled), carry);
29333091
2934   __m128i maxval = _mm_set_epi64x(0x7fff7fff7fff7fffL, 0x7fff7fff7fff7fffL);
2935   __m128i minval = _mm_set_epi64x(0x8000800080008000L, 0x8000800080008000L);
3092    __m128i addvec = _mm_adds_epi16(rsp->xv[VS1REG], shuffled);
29363093
2937   __m128i addvec = _mm_adds_epi16(rsp->xv[VS1REG], shuffled);
3094    carry = _mm_and_si128(carry, _mm_xor_si128(_mm_cmpeq_epi16(addvec, vec_32767), vec_neg1));
3095    carry = _mm_and_si128(carry, _mm_xor_si128(_mm_cmpeq_epi16(addvec, vec_n32768), vec_neg1));
29383096
2939   __m128i carrymask = _mm_cmpeq_epi16(addvec, maxval);
2940   carrymask = _mm_xor_si128(carrymask, vec_neg1);
2941   carry = _mm_and_si128(carry, carrymask);
3097    rsp->xv[VDREG] = _mm_add_epi16(addvec, carry);
29423098
2943   carrymask = _mm_cmpeq_epi16(addvec, minval);
2944   carrymask = _mm_xor_si128(carrymask, vec_neg1);
2945   carry = _mm_and_si128(carry, carrymask);
3099    rsp->accum_l = unsat;
29463100
2947   rsp->xv[VDREG] = _mm_add_epi16(addvec, carry);
2948
2949   rsp->accum_l = unsat;
2950   ACCUM_L(0) = _mm_extract_epi16(unsat, 0);
2951   ACCUM_L(1) = _mm_extract_epi16(unsat, 1);
2952   ACCUM_L(2) = _mm_extract_epi16(unsat, 2);
2953   ACCUM_L(3) = _mm_extract_epi16(unsat, 3);
2954   ACCUM_L(4) = _mm_extract_epi16(unsat, 4);
2955   ACCUM_L(5) = _mm_extract_epi16(unsat, 5);
2956   ACCUM_L(6) = _mm_extract_epi16(unsat, 6);
2957   ACCUM_L(7) = _mm_extract_epi16(unsat, 7);
2958
2959   CLEAR_ZERO_FLAGS();
2960   CLEAR_CARRY_FLAGS();
3101   rsp->xvflag[ZERO] = _mm_setzero_si128();
3102   rsp->xvflag[CARRY] = _mm_setzero_si128();
29613103#else
2962   INT16 vres[8] = { 0 };
2963   for (int i = 0; i < 8; i++)
2964   {
2965#if USE_SIMD
2966      UINT16 w1, w2;
2967      SIMD_EXTRACT16(rsp->xv[VS1REG], w1, i);
2968      SIMD_EXTRACT16(rsp->xv[VS2REG], w2, VEC_EL_2(EL, i));
2969      INT32 s1 = (INT32)(INT16)w1;
2970      INT32 s2 = (INT32)(INT16)w2;
2971#else
2972      INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i);
2973      INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
2974#endif
2975      INT32 r = s1 + s2 + CARRY_FLAG(i);
3104    INT16 vres[8] = { 0 };
3105    for (int i = 0; i < 8; i++)
3106    {
3107        INT16 w1, w2;
3108      SCALAR_GET_VS1(w1, i);
3109      SCALAR_GET_VS2(w2, i);
3110        INT32 s1 = (INT32)(INT16)w1;
3111        INT32 s2 = (INT32)(INT16)w2;
3112        INT32 r = s1 + s2 + (((CARRY_FLAG(rsp, i)) != 0) ? 1 : 0);
29763113
2977#if USE_SIMD
2978      SIMD_INSERT16(rsp->accum_l, (INT16)(r), i);
2979#else
2980      ACCUM_L(i) = (INT16)(r);
2981#endif
3114        SET_ACCUM_L((INT16)(r), i);
29823115
2983      if (r > 32767) r = 32767;
2984      if (r < -32768) r = -32768;
2985      vres[i] = (INT16)(r);
2986   }
2987   CLEAR_ZERO_FLAGS();
2988   CLEAR_CARRY_FLAGS();
2989   WRITEBACK_RESULT();
3116        if (r > 32767) r = 32767;
3117        if (r < -32768) r = -32768;
3118        vres[i] = (INT16)(r);
3119    }
3120    CLEAR_ZERO_FLAGS();
3121    CLEAR_CARRY_FLAGS();
3122    WRITEBACK_RESULT();
29903123#endif
29913124}
29923125
29933126INLINE void cfunc_rsp_vsub(void *param)
29943127{
2995   rsp_state *rsp = (rsp_state*)param;
2996   int op = rsp->impstate->arg0;
2997   // 31       25  24     20      15      10      5        0
2998   // ------------------------------------------------------
2999   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010001 |
3000   // ------------------------------------------------------
3001   //
3002   // Subtracts two vector registers and carry flag, the result is saturated to -32768
3128    rsp_state *rsp = (rsp_state*)param;
3129    int op = rsp->impstate->arg0;
3130    // 31       25  24     20      15      10      5        0
3131    // ------------------------------------------------------
3132    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010001 |
3133    // ------------------------------------------------------
3134    //
3135    // Subtracts two vector registers and carry flag, the result is saturated to -32768
30033136
3004   // TODO: check VS2REG == VDREG
3137    // TODO: check VS2REG == VDREG
30053138
30063139#if USE_SIMD
3007   __m128i shuffled = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
3008   __m128i unsat = rsp->xv[VS1REG];
3009   __m128i carry = _mm_set_epi16(CARRY_FLAG(7), CARRY_FLAG(6), CARRY_FLAG(5), CARRY_FLAG(4),
3010                           CARRY_FLAG(3), CARRY_FLAG(2), CARRY_FLAG(1), CARRY_FLAG(0));
3140    __m128i shuffled = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
3141   __m128i carry = _mm_and_si128(rsp->xvflag[CARRY], vec_flagmask);
3142    __m128i unsat = _mm_sub_epi16(_mm_sub_epi16(rsp->xv[VS1REG], shuffled), carry);
30113143
3012   unsat = _mm_sub_epi16(unsat, shuffled);
3013   unsat = _mm_sub_epi16(unsat, carry);
3144    __m128i subvec = _mm_subs_epi16(rsp->xv[VS1REG], shuffled);
30143145
3015   __m128i minval = _mm_set_epi64x(0x8000800080008000L, 0x8000800080008000L);
3146    carry = _mm_and_si128(carry, _mm_xor_si128(_mm_cmpeq_epi16(subvec, vec_n32768), vec_neg1));
30163147
3017   __m128i subvec = _mm_subs_epi16(rsp->xv[VS1REG], shuffled);
3148    rsp->xv[VDREG] = _mm_sub_epi16(subvec, carry);
30183149
3019   __m128i carrymask = _mm_cmpeq_epi16(subvec, minval);
3020   carrymask = _mm_xor_si128(carrymask, vec_neg1);
3021   carry = _mm_and_si128(carry, carrymask);
3150    rsp->accum_l = unsat;
30223151
3023   rsp->xv[VDREG] = _mm_sub_epi16(subvec, carry);
3024
3025   rsp->accum_l = unsat;
3026   ACCUM_L(0) = _mm_extract_epi16(unsat, 0);
3027   ACCUM_L(1) = _mm_extract_epi16(unsat, 1);
3028   ACCUM_L(2) = _mm_extract_epi16(unsat, 2);
3029   ACCUM_L(3) = _mm_extract_epi16(unsat, 3);
3030   ACCUM_L(4) = _mm_extract_epi16(unsat, 4);
3031   ACCUM_L(5) = _mm_extract_epi16(unsat, 5);
3032   ACCUM_L(6) = _mm_extract_epi16(unsat, 6);
3033   ACCUM_L(7) = _mm_extract_epi16(unsat, 7);
3034
3035   CLEAR_ZERO_FLAGS();
3036   CLEAR_CARRY_FLAGS();
3152   rsp->xvflag[ZERO] = _mm_setzero_si128();
3153   rsp->xvflag[CARRY] = _mm_setzero_si128();
30373154#else
3038   INT16 vres[8];
3039   for (int i = 0; i < 8; i++)
3040   {
3041#if USE_SIMD
3042      UINT16 w1, w2;
3043      SIMD_EXTRACT16(rsp->xv[VS1REG], w1, i);
3044      SIMD_EXTRACT16(rsp->xv[VS2REG], w2, VEC_EL_2(EL, i));
3045      INT32 s1 = (INT32)(INT16)w1;
3046      INT32 s2 = (INT32)(INT16)w2;
3047#else
3048      INT32 s1 = (INT32)(INT16)VREG_S(VS1REG, i);
3049      INT32 s2 = (INT32)(INT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
3050#endif
3051      INT32 r = s1 - s2 - CARRY_FLAG(i);
3155    INT16 vres[8];
3156    for (int i = 0; i < 8; i++)
3157    {
3158        INT16 w1, w2;
3159      SCALAR_GET_VS1(w1, i);
3160      SCALAR_GET_VS2(w2, i);
3161        INT32 s1 = (INT32)(INT16)w1;
3162        INT32 s2 = (INT32)(INT16)w2;
3163        INT32 r = s1 - s2 - (((CARRY_FLAG(rsp, i)) != 0) ? 1 : 0);
30523164
3053#if USE_SIMD
3054      SIMD_INSERT16(rsp->accum_l, (INT16)(r), i);
3055#else
3056      ACCUM_L(i) = (INT16)(r);
3057#endif
3165        SET_ACCUM_L((INT16)(r), i);
30583166
3059      if (r > 32767) r = 32767;
3060      if (r < -32768) r = -32768;
3167        if (r > 32767) r = 32767;
3168        if (r < -32768) r = -32768;
30613169
3062      vres[i] = (INT16)(r);
3063   }
3064   CLEAR_ZERO_FLAGS();
3065   CLEAR_CARRY_FLAGS();
3066   WRITEBACK_RESULT();
3170        vres[i] = (INT16)(r);
3171    }
3172    CLEAR_ZERO_FLAGS();
3173    CLEAR_CARRY_FLAGS();
3174    WRITEBACK_RESULT();
30673175#endif
30683176}
30693177
30703178INLINE void cfunc_rsp_vabs(void *param)
30713179{
3072   rsp_state *rsp = (rsp_state*)param;
3073   int op = rsp->impstate->arg0;
3074   INT16 vres[8];
3075   // 31       25  24     20      15      10      5        0
3076   // ------------------------------------------------------
3077   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010011 |
3078   // ------------------------------------------------------
3079   //
3080   // Changes the sign of source register 2 if source register 1 is negative and stores
3081   // the result to destination register
3180    rsp_state *rsp = (rsp_state*)param;
3181    int op = rsp->impstate->arg0;
3182    // 31       25  24     20      15      10      5        0
3183    // ------------------------------------------------------
3184    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010011 |
3185    // ------------------------------------------------------
3186    //
3187    // Changes the sign of source register 2 if source register 1 is negative and stores
3188    // the result to destination register
30823189
3083   for (int i = 0; i < 8; i++)
3084   {
30853190#if USE_SIMD
3086      INT16 s1, s2;
3087      SIMD_EXTRACT16(rsp->xv[VS1REG], s1, i);
3088      SIMD_EXTRACT16(rsp->xv[VS2REG], s2, VEC_EL_2(EL, i));
3191    __m128i shuf2 = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
3192    __m128i negs2 = _mm_sub_epi16(_mm_setzero_si128(), shuf2);
3193   __m128i s2_n32768 = _mm_cmpeq_epi16(shuf2, vec_n32768);
3194    __m128i s1_lz = _mm_cmplt_epi16(rsp->xv[VS1REG], _mm_setzero_si128());
3195
3196    __m128i result_gz = _mm_and_si128(shuf2, _mm_cmpgt_epi16(rsp->xv[VS1REG], _mm_setzero_si128()));
3197    __m128i result_n32768 = _mm_and_si128(s1_lz, _mm_and_si128(vec_32767, s2_n32768));
3198    __m128i result_negs2 = _mm_and_si128(s1_lz, _mm_and_si128(negs2, _mm_xor_si128(s2_n32768, vec_neg1)));
3199   rsp->xv[VDREG] = rsp->accum_l = _mm_or_si128(result_gz, _mm_or_si128(result_n32768, result_negs2));
30893200#else
3090      INT16 s1 = (INT16)VREG_S(VS1REG, i);
3091      INT16 s2 = (INT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
3092#endif
3201    INT16 vres[8];
3202    for (int i = 0; i < 8; i++)
3203    {
3204        INT16 s1, s2;
3205      SCALAR_GET_VS1(s1, i);
3206      SCALAR_GET_VS2(s2, i);
30933207
3094      if (s1 < 0)
3095      {
3096         if (s2 == -32768)
3097         {
3098            vres[i] = 32767;
3099         }
3100         else
3101         {
3102            vres[i] = -s2;
3103         }
3104      }
3105      else if (s1 > 0)
3106      {
3107         vres[i] = s2;
3108      }
3109      else
3110      {
3111         vres[i] = 0;
3112      }
3208        if (s1 < 0)
3209        {
3210            if (s2 == -32768)
3211            {
3212                vres[i] = 32767;
3213            }
3214            else
3215            {
3216                vres[i] = -s2;
3217            }
3218        }
3219        else if (s1 > 0)
3220        {
3221            vres[i] = s2;
3222        }
3223        else
3224        {
3225            vres[i] = 0;
3226        }
31133227
3114#if USE_SIMD
3115      SIMD_INSERT16(rsp->accum_l, vres[i], i);
3116#else
3117      ACCUM_L(i) = vres[i];
3228        SET_ACCUM_L(vres[i], i);
3229    }
3230    WRITEBACK_RESULT();
31183231#endif
3119   }
3120   WRITEBACK_RESULT();
31213232}
31223233
31233234INLINE void cfunc_rsp_vaddc(void *param)
31243235{
3125   rsp_state *rsp = (rsp_state*)param;
3126   int op = rsp->impstate->arg0;
3127   // 31       25  24     20      15      10      5        0
3128   // ------------------------------------------------------
3129   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010100 |
3130   // ------------------------------------------------------
3131   //
3132   // Adds two vector registers, the carry out is stored into carry register
3236    rsp_state *rsp = (rsp_state*)param;
3237    int op = rsp->impstate->arg0;
3238    // 31       25  24     20      15      10      5        0
3239    // ------------------------------------------------------
3240    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010100 |
3241    // ------------------------------------------------------
3242    //
3243    // Adds two vector registers, the carry out is stored into carry register
31333244
3134   // TODO: check VS2REG = VDREG
3245    // TODO: check VS2REG = VDREG
31353246
3136   CLEAR_ZERO_FLAGS();
3137   CLEAR_CARRY_FLAGS();
3247    CLEAR_ZERO_FLAGS();
3248    CLEAR_CARRY_FLAGS();
31383249
31393250#if USE_SIMD
3140   __m128i shuf2 = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
3141                                    __m128i vec7531 = _mm_and_si128(rsp->xv[VS1REG], vec_lomask);
3142   __m128i vec6420 = _mm_srli_epi32(rsp->xv[VS1REG], 16);
3143   __m128i shuf7531 = _mm_and_si128(shuf2, vec_lomask);
3144   __m128i shuf6420 = _mm_srli_epi32(shuf2, 16);
3145   __m128i sum7531 = _mm_add_epi32(vec7531, shuf7531);
3146   __m128i sum6420 = _mm_add_epi32(vec6420, shuf6420);
3251    __m128i shuf2 = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
3252    __m128i vec7531 = _mm_and_si128(rsp->xv[VS1REG], vec_lomask);
3253    __m128i vec6420 = _mm_srli_epi32(rsp->xv[VS1REG], 16);
3254    __m128i shuf7531 = _mm_and_si128(shuf2, vec_lomask);
3255    __m128i shuf6420 = _mm_srli_epi32(shuf2, 16);
3256    __m128i sum7531 = _mm_add_epi32(vec7531, shuf7531);
3257    __m128i sum6420 = _mm_add_epi32(vec6420, shuf6420);
31473258
3148   __m128i over7531 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum7531, vec_zero), vec_neg1), vec_overmask);
3149   __m128i over6420 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum6420, vec_zero), vec_neg1), vec_overmask);
3259    __m128i over7531 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum7531, _mm_setzero_si128()), vec_neg1), vec_himask);
3260    __m128i over6420 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum6420, _mm_setzero_si128()), vec_neg1), vec_himask);
31503261
3151   rsp->flag[0] |= _mm_extract_epi16(over7531, 7) << 6;
3152   rsp->flag[0] |= _mm_extract_epi16(over7531, 5) << 4;
3153   rsp->flag[0] |= _mm_extract_epi16(over7531, 3) << 2;
3154   rsp->flag[0] |= _mm_extract_epi16(over7531, 1) << 0;
3155   rsp->flag[0] |= _mm_extract_epi16(over6420, 7) << 7;
3156   rsp->flag[0] |= _mm_extract_epi16(over6420, 5) << 5;
3157   rsp->flag[0] |= _mm_extract_epi16(over6420, 3) << 3;
3158   rsp->flag[0] |= _mm_extract_epi16(over6420, 1) << 1;
3159   rsp->xv[VDREG] = _mm_or_si128(_mm_slli_epi32(sum6420, 16), sum7531);
3160   rsp->accum_l = rsp->xv[VDREG];
3161
3262   rsp->xvflag[CARRY] = _mm_or_si128(over6420, _mm_srli_epi32(over7531, 16));
3263    rsp->accum_l = rsp->xv[VDREG] = _mm_or_si128(_mm_slli_epi32(sum6420, 16), sum7531);
31623264#else
3163   INT16 vres[8] = { 0 };
3164   for (int i = 0; i < 8; i++)
3165   {
3166      INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i);
3167      INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
3168      INT32 r = s1 + s2;
3265    INT16 vres[8] = { 0 };
3266    for (int i = 0; i < 8; i++)
3267    {
3268        INT16 w1, w2;
3269      SCALAR_GET_VS1(w1, i);
3270      SCALAR_GET_VS2(w2, i);
3271        INT32 s1 = (UINT32)(UINT16)w1;
3272        INT32 s2 = (UINT32)(UINT16)w2;
3273        INT32 r = s1 + s2;
31693274
3170      vres[i] = (INT16)r;
3171      ACCUM_L(i) = (INT16)r;
3275        vres[i] = (INT16)r;
3276        SET_ACCUM_L((INT16)r, i);
31723277
3173      if (r & 0xffff0000)
3174      {
3175         SET_CARRY_FLAG(i);
3176      }
3177   }
3178   WRITEBACK_RESULT();
3278        if (r & 0xffff0000)
3279        {
3280            SET_CARRY_FLAG(i);
3281        }
3282    }
3283    WRITEBACK_RESULT();
31793284#endif
31803285}
31813286
31823287INLINE void cfunc_rsp_vsubc(void *param)
31833288{
3184   rsp_state *rsp = (rsp_state*)param;
3185   int op = rsp->impstate->arg0;
3186   // 31       25  24     20      15      10      5        0
3187   // ------------------------------------------------------
3188   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010101 |
3189   // ------------------------------------------------------
3190   //
3191   // Subtracts two vector registers, the carry out is stored into carry register
3289    rsp_state *rsp = (rsp_state*)param;
3290    int op = rsp->impstate->arg0;
3291    // 31       25  24     20      15      10      5        0
3292    // ------------------------------------------------------
3293    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 010101 |
3294    // ------------------------------------------------------
3295    //
3296    // Subtracts two vector registers, the carry out is stored into carry register
31923297
3193   // TODO: check VS2REG = VDREG
3298    // TODO: check VS2REG = VDREG
31943299
3195   CLEAR_ZERO_FLAGS();
3196   CLEAR_CARRY_FLAGS();
3300    CLEAR_ZERO_FLAGS();
3301    CLEAR_CARRY_FLAGS();
31973302
31983303#if USE_SIMD
3199   __m128i shuf2 = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
3200   __m128i vec7531 = _mm_and_si128(rsp->xv[VS1REG], vec_lomask);
3201   __m128i vec6420 = _mm_srli_epi32(rsp->xv[VS1REG], 16);
3202   __m128i shuf7531 = _mm_and_si128(shuf2, vec_lomask);
3203   __m128i shuf6420 = _mm_srli_epi32(shuf2, 16);
3204   __m128i sum7531 = _mm_sub_epi32(vec7531, shuf7531);
3205   __m128i sum6420 = _mm_sub_epi32(vec6420, shuf6420);
3304    __m128i shuf2 = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
3305    __m128i vec7531 = _mm_and_si128(rsp->xv[VS1REG], vec_lomask);
3306    __m128i vec6420 = _mm_srli_epi32(rsp->xv[VS1REG], 16);
3307    __m128i shuf7531 = _mm_and_si128(shuf2, vec_lomask);
3308    __m128i shuf6420 = _mm_srli_epi32(shuf2, 16);
3309    __m128i sum7531 = _mm_sub_epi32(vec7531, shuf7531);
3310    __m128i sum6420 = _mm_sub_epi32(vec6420, shuf6420);
32063311
3207   __m128i over7531 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum7531, vec_zero), vec_neg1), vec_overmask);
3208   __m128i over6420 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum6420, vec_zero), vec_neg1), vec_overmask);
3209   sum7531 = _mm_and_si128(sum7531, vec_lomask);
3210   sum6420 = _mm_and_si128(sum6420, vec_lomask);
3211   __m128i zero7531 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum7531, vec_zero), vec_neg1), vec_zerobits);
3212   __m128i zero6420 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum6420, vec_zero), vec_neg1), vec_zerobits);
3312    __m128i over7531 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum7531, _mm_setzero_si128()), vec_neg1), vec_himask);
3313    __m128i over6420 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum6420, _mm_setzero_si128()), vec_neg1), vec_himask);
3314    sum7531 = _mm_and_si128(sum7531, vec_lomask);
3315    sum6420 = _mm_and_si128(sum6420, vec_lomask);
3316    __m128i zero7531 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum7531, _mm_setzero_si128()), vec_neg1), vec_lomask);
3317    __m128i zero6420 = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi16(sum6420, _mm_setzero_si128()), vec_neg1), vec_lomask);
32133318
3214   rsp->flag[0] |= _mm_extract_epi16(over7531, 7) << 6;
3215   rsp->flag[0] |= _mm_extract_epi16(over7531, 5) << 4;
3216   rsp->flag[0] |= _mm_extract_epi16(over7531, 3) << 2;
3217   rsp->flag[0] |= _mm_extract_epi16(over7531, 1) << 0;
3218   rsp->flag[0] |= _mm_extract_epi16(over6420, 7) << 7;
3219   rsp->flag[0] |= _mm_extract_epi16(over6420, 5) << 5;
3220   rsp->flag[0] |= _mm_extract_epi16(over6420, 3) << 3;
3221   rsp->flag[0] |= _mm_extract_epi16(over6420, 1) << 1;
3319   rsp->xvflag[CARRY] = _mm_or_si128(over6420, _mm_srli_epi32(over7531, 16));
3320   rsp->xvflag[ZERO] = _mm_or_si128(zero6420, _mm_srli_epi32(zero7531, 16));
32223321
3223   rsp->flag[0] |= _mm_extract_epi16(zero7531, 6) << 14;
3224   rsp->flag[0] |= _mm_extract_epi16(zero7531, 4) << 12;
3225   rsp->flag[0] |= _mm_extract_epi16(zero7531, 2) << 10;
3226   rsp->flag[0] |= _mm_extract_epi16(zero7531, 0) << 8;
3227   rsp->flag[0] |= _mm_extract_epi16(zero6420, 6) << 15;
3228   rsp->flag[0] |= _mm_extract_epi16(zero6420, 4) << 13;
3229   rsp->flag[0] |= _mm_extract_epi16(zero6420, 2) << 11;
3230   rsp->flag[0] |= _mm_extract_epi16(zero6420, 0) << 9;
3231
3232   rsp->xv[VDREG] = _mm_or_si128(_mm_slli_epi32(sum6420, 16), sum7531);
3233   rsp->accum_l = rsp->xv[VDREG];
3234
3322    rsp->accum_l = rsp->xv[VDREG] = _mm_or_si128(_mm_slli_epi32(sum6420, 16), sum7531);
32353323#else
3236   INT16 vres[8];
3237   for (int i = 0; i < 8; i++)
3238   {
3239      INT32 s1 = (UINT32)(UINT16)VREG_S(VS1REG, i);
3240      INT32 s2 = (UINT32)(UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
3241      INT32 r = s1 - s2;
3324    INT16 vres[8];
3325    for (int i = 0; i < 8; i++)
3326    {
3327        INT16 w1, w2;
3328      SCALAR_GET_VS1(w1, i);
3329      SCALAR_GET_VS2(w2, i);
3330        INT32 s1 = (UINT32)(UINT16)w1;
3331        INT32 s2 = (UINT32)(UINT16)w2;
3332        INT32 r = s1 - s2;
32423333
3243      vres[i] = (INT16)(r);
3244      ACCUM_L(i) = (UINT16)(r);
3334        vres[i] = (INT16)(r);
3335        SET_ACCUM_L((UINT16)r, i);
32453336
3246      if ((UINT16)(r) != 0)
3247      {
3248         SET_ZERO_FLAG(i);
3249      }
3250      if (r & 0xffff0000)
3251      {
3252         SET_CARRY_FLAG(i);
3253      }
3254   }
3255   WRITEBACK_RESULT();
3337        if ((UINT16)(r) != 0)
3338        {
3339            SET_ZERO_FLAG(i);
3340        }
3341        if (r & 0xffff0000)
3342        {
3343            SET_CARRY_FLAG(i);
3344        }
3345    }
3346    WRITEBACK_RESULT();
32563347#endif
32573348}
32583349
32593350INLINE void cfunc_rsp_vsaw(void *param)
32603351{
3261   rsp_state *rsp = (rsp_state*)param;
3262   int op = rsp->impstate->arg0;
3263   // 31       25  24     20      15      10      5        0
3264   // ------------------------------------------------------
3265   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 011101 |
3266   // ------------------------------------------------------
3267   //
3268   // Stores high, middle or low slice of accumulator to destination vector
3352    rsp_state *rsp = (rsp_state*)param;
3353    int op = rsp->impstate->arg0;
3354    // 31       25  24     20      15      10      5        0
3355    // ------------------------------------------------------
3356    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 011101 |
3357    // ------------------------------------------------------
3358    //
3359    // Stores high, middle or low slice of accumulator to destination vector
32693360
3270   switch (EL)
3271   {
3272      case 0x08:      // VSAWH
3273      {
3274         for (int i = 0; i < 8; i++)
3275         {
3361    switch (EL)
3362    {
3363        case 0x08:      // VSAWH
3364        {
32763365#if USE_SIMD
3277            rsp->xv[VDREG] = _mm_insert_epi16(rsp->xv[VDREG], ACCUM_H(i), i);
3366         rsp->xv[VDREG] = rsp->accum_h;
32783367#else
3279            W_VREG_S(VDREG, i) = ACCUM_H(i);
3368            for (int i = 0; i < 8; i++)
3369            {
3370                W_VREG_S(VDREG, i) = ACCUM_H(rsp, i);
3371            }
32803372#endif
3281         }
3282         break;
3283      }
3284      case 0x09:      // VSAWM
3285      {
3286         for (int i = 0; i < 8; i++)
3287         {
3373            break;
3374        }
3375        case 0x09:      // VSAWM
3376        {
32883377#if USE_SIMD
3289            rsp->xv[VDREG] = _mm_insert_epi16(rsp->xv[VDREG], ACCUM_M(i), i);
3378         rsp->xv[VDREG] = rsp->accum_m;
32903379#else
3291            W_VREG_S(VDREG, i) = ACCUM_M(i);
3380            for (int i = 0; i < 8; i++)
3381            {
3382                W_VREG_S(VDREG, i) = ACCUM_M(rsp, i);
3383            }
32923384#endif
3293         }
3294         break;
3295      }
3296      case 0x0a:      // VSAWL
3297      {
3385            break;
3386        }
3387        case 0x0a:      // VSAWL
3388        {
32983389#if USE_SIMD
3299         rsp->xv[VDREG] = rsp->accum_l;
3390            rsp->xv[VDREG] = rsp->accum_l;
33003391#else
3301         for (int i = 0; i < 8; i++)
3302         {
3303            W_VREG_S(VDREG, i) = ACCUM_L(i);
3304         }
3392            for (int i = 0; i < 8; i++)
3393            {
3394                W_VREG_S(VDREG, i) = ACCUM_L(rsp, i);
3395            }
33053396#endif
3306         break;
3307      }
3308      default:    fatalerror("RSP: VSAW: el = %d\n", EL);
3309   }
3397            break;
3398        }
3399        default:    fatalerror("RSP: VSAW: el = %d\n", EL);
3400    }
33103401}
33113402
33123403INLINE void cfunc_rsp_vlt(void *param)
33133404{
3314   rsp_state *rsp = (rsp_state*)param;
3315   int op = rsp->impstate->arg0;
3316   INT16 vres[8] = { 0 };
3317   //int i;
3318   // 31       25  24     20      15      10      5        0
3319   // ------------------------------------------------------
3320   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100000 |
3321   // ------------------------------------------------------
3322   //
3323   // Sets compare flags if elements in VS1 are less than VS2
3324   // Moves the element in VS2 to destination vector
3405    rsp_state *rsp = (rsp_state*)param;
3406    int op = rsp->impstate->arg0;
3407    //int i;
3408    // 31       25  24     20      15      10      5        0
3409    // ------------------------------------------------------
3410    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100000 |
3411    // ------------------------------------------------------
3412    //
3413    // Sets compare flags if elements in VS1 are less than VS2
3414    // Moves the element in VS2 to destination vector
33253415
3326   rsp->flag[1] = 0;
3416#if USE_SIMD
3417   rsp->xvflag[COMPARE] = rsp->xvflag[CLIP2] = _mm_setzero_si128();
33273418
3328   for (int i = 0; i < 8; i++)
3329   {
3330#if USE_SIMD
3331      INT16 s1, s2;
3332      SIMD_EXTRACT16(rsp->xv[VS1REG], s1, i);
3333      SIMD_EXTRACT16(rsp->xv[VS2REG], s2, VEC_EL_2(EL, i));
3419    __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
3420   __m128i zc_mask = _mm_and_si128(rsp->xvflag[ZERO], rsp->xvflag[CARRY]);
3421   __m128i lt_mask = _mm_cmplt_epi16(rsp->xv[VS1REG], shuf);
3422   __m128i eq_mask = _mm_and_si128(_mm_cmpeq_epi16(rsp->xv[VS1REG], shuf), zc_mask);
3423
3424   rsp->xvflag[COMPARE] = _mm_or_si128(lt_mask, eq_mask);
3425
3426   __m128i result = _mm_and_si128(rsp->xv[VS1REG], rsp->xvflag[COMPARE]);
3427    rsp->accum_l = rsp->xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(rsp->xvflag[COMPARE], vec_neg1)));
3428
3429   rsp->xvflag[ZERO] = rsp->xvflag[CARRY] = _mm_setzero_si128();
33343430#else
3335      INT16 s1 = (INT16)VREG_S(VS1REG, i);
3336      INT16 s2 = (INT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
3337#endif
3338      if (s1 < s2)
3339      {
3340         SET_COMPARE_FLAG(i);
3341      }
3342      else if (s1 == s2)
3343      {
3344         if (ZERO_FLAG(i) == 1 && CARRY_FLAG(i) != 0)
3345         {
3346            SET_COMPARE_FLAG(i);
3347         }
3348      }
3431   CLEAR_COMPARE_FLAGS();
3432   CLEAR_CLIP2_FLAGS();
33493433
3350      if (COMPARE_FLAG(i))
3351      {
3352         vres[i] = s1;
3353      }
3354      else
3355      {
3356         vres[i] = s2;
3357      }
3434    INT16 vres[8];
3435    for (int i = 0; i < 8; i++)
3436    {
3437        INT16 s1, s2;
3438      SCALAR_GET_VS1(s1, i);
3439      SCALAR_GET_VS2(s2, i);
33583440
3359#if USE_SIMD
3360      SIMD_INSERT16(rsp->accum_l, vres[i], i);
3361#else
3362      ACCUM_L(i) = vres[i];
3441        if (s1 < s2)
3442        {
3443            SET_COMPARE_FLAG(i);
3444        }
3445        else if (s1 == s2)
3446        {
3447            if (ZERO_FLAG(rsp, i) != 0 && CARRY_FLAG(rsp, i) != 0)
3448            {
3449                SET_COMPARE_FLAG(i);
3450            }
3451        }
3452
3453        if (COMPARE_FLAG(rsp, i) != 0)
3454        {
3455            vres[i] = s1;
3456        }
3457        else
3458        {
3459            vres[i] = s2;
3460        }
3461
3462        SET_ACCUM_L(vres[i], i);
3463    }
3464
3465   CLEAR_ZERO_FLAGS();
3466   CLEAR_CARRY_FLAGS();
3467    WRITEBACK_RESULT();
33633468#endif
3364   }
3365
3366   rsp->flag[0] = 0;
3367   WRITEBACK_RESULT();
33683469}
33693470
33703471INLINE void cfunc_rsp_veq(void *param)
33713472{
3372   rsp_state *rsp = (rsp_state*)param;
3373   int op = rsp->impstate->arg0;
3374   INT16 vres[8];
3473    rsp_state *rsp = (rsp_state*)param;
3474    int op = rsp->impstate->arg0;
33753475
3376   // 31       25  24     20      15      10      5        0
3377   // ------------------------------------------------------
3378   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100001 |
3379   // ------------------------------------------------------
3380   //
3381   // Sets compare flags if elements in VS1 are equal with VS2
3382   // Moves the element in VS2 to destination vector
3476    // 31       25  24     20      15      10      5        0
3477    // ------------------------------------------------------
3478    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100001 |
3479    // ------------------------------------------------------
3480    //
3481    // Sets compare flags if elements in VS1 are equal with VS2
3482    // Moves the element in VS2 to destination vector
33833483
3384   rsp->flag[1] = 0;
3484#if USE_SIMD
3485   rsp->xvflag[COMPARE] = rsp->xvflag[CLIP2] = _mm_setzero_si128();
33853486
3386   for (int i = 0; i < 8; i++)
3387   {
3388#if USE_SIMD
3389      INT16 s1, s2;
3390      SIMD_EXTRACT16(rsp->xv[VS1REG], s1, i);
3391      SIMD_EXTRACT16(rsp->xv[VS2REG], s2, VEC_EL_2(EL, i));
3487    __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
3488   __m128i zero_mask = _mm_cmpeq_epi16(rsp->xvflag[ZERO], _mm_setzero_si128());
3489   __m128i eq_mask = _mm_cmpeq_epi16(rsp->xv[VS1REG], shuf);
3490
3491   rsp->xvflag[COMPARE] = _mm_and_si128(zero_mask, eq_mask);
3492
3493   __m128i result = _mm_and_si128(rsp->xv[VS1REG], rsp->xvflag[COMPARE]);
3494    rsp->accum_l = rsp->xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(rsp->xvflag[COMPARE], vec_neg1)));
3495
3496   rsp->xvflag[ZERO] = rsp->xvflag[CARRY] = _mm_setzero_si128();
33923497#else
3393      INT16 s1 = (INT16)VREG_S(VS1REG, i);
3394      INT16 s2 = (INT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
3498   CLEAR_COMPARE_FLAGS();
3499   CLEAR_CLIP2_FLAGS();
3500
3501    INT16 vres[8];
3502    for (int i = 0; i < 8; i++)
3503    {
3504        INT16 s1, s2;
3505      SCALAR_GET_VS1(s1, i);
3506      SCALAR_GET_VS2(s2, i);
3507
3508        if ((s1 == s2) && ZERO_FLAG(rsp, i) == 0)
3509        {
3510            SET_COMPARE_FLAG(i);
3511            vres[i] = s1;
3512        }
3513        else
3514        {
3515            vres[i] = s2;
3516        }
3517
3518        SET_ACCUM_L(vres[i], i);
3519    }
3520
3521   CLEAR_ZERO_FLAGS();
3522   CLEAR_CARRY_FLAGS();
3523    WRITEBACK_RESULT();
33953524#endif
3396      if ((s1 == s2) && ZERO_FLAG(i) == 0)
3397      {
3398         SET_COMPARE_FLAG(i);
3399         vres[i] = s1;
3400      }
3401      else
3402      {
3403         vres[i] = s2;
3404      }
3405#if USE_SIMD
3406      SIMD_INSERT16(rsp->accum_l, vres[i], i);
3407#else
3408      ACCUM_L(i) = vres[i];
3409#endif
3410   }
3411
3412   rsp->flag[0] = 0;
3413   WRITEBACK_RESULT();
34143525}
34153526
34163527INLINE void cfunc_rsp_vne(void *param)
34173528{
3418   rsp_state *rsp = (rsp_state*)param;
3419   int op = rsp->impstate->arg0;
3420   INT16 vres[8];
3529    rsp_state *rsp = (rsp_state*)param;
3530    int op = rsp->impstate->arg0;
34213531
3422   // 31       25  24     20      15      10      5        0
3423   // ------------------------------------------------------
3424   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100010 |
3425   // ------------------------------------------------------
3426   //
3427   // Sets compare flags if elements in VS1 are not equal with VS2
3428   // Moves the element in VS2 to destination vector
3532    // 31       25  24     20      15      10      5        0
3533    // ------------------------------------------------------
3534    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100010 |
3535    // ------------------------------------------------------
3536    //
3537    // Sets compare flags if elements in VS1 are not equal with VS2
3538    // Moves the element in VS2 to destination vector
34293539
3430   rsp->flag[1] = 0;
3540#if USE_SIMD
3541   rsp->xvflag[COMPARE] = rsp->xvflag[CLIP2] = _mm_setzero_si128();
34313542
3432   for (int i = 0; i < 8; i++)
3433   {
3434#if USE_SIMD
3435      INT16 s1, s2;
3436      SIMD_EXTRACT16(rsp->xv[VS1REG], s1, i);
3437      SIMD_EXTRACT16(rsp->xv[VS2REG], s2, VEC_EL_2(EL, i));
3543    __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
3544   __m128i neq_mask = _mm_xor_si128(_mm_cmpeq_epi16(rsp->xv[VS1REG], shuf), vec_neg1);
3545
3546   rsp->xvflag[COMPARE] = _mm_or_si128(rsp->xvflag[ZERO], neq_mask);
3547
3548   __m128i result = _mm_and_si128(rsp->xv[VS1REG], rsp->xvflag[COMPARE]);
3549    rsp->accum_l = rsp->xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(rsp->xvflag[COMPARE], vec_neg1)));
3550
3551   rsp->xvflag[ZERO] = rsp->xvflag[CARRY] = _mm_setzero_si128();
34383552#else
3439      INT16 s1 = (INT16)VREG_S(VS1REG, i);
3440      INT16 s2 = (INT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
3441#endif
3442      if (s1 != s2)
3443      {
3444         SET_COMPARE_FLAG(i);
3553   CLEAR_COMPARE_FLAGS();
3554   CLEAR_CLIP2_FLAGS();
3555
3556    INT16 vres[8];
3557    for (int i = 0; i < 8; i++)
3558    {
3559        INT16 s1, s2;
3560      SCALAR_GET_VS1(s1, i);
3561      SCALAR_GET_VS2(s2, i);
3562
3563        if (s1 != s2 || ZERO_FLAG(rsp, i) != 0)
3564        {
3565            SET_COMPARE_FLAG(i);
3566            vres[i] = s1;
3567        }
3568        else
3569        {
3570            vres[i] = s2;
34453571      }
3446      else
3447      {
3448         if (ZERO_FLAG(i) == 1)
3449         {
3450            SET_COMPARE_FLAG(i);
3451         }
3452      }
3453      if (COMPARE_FLAG(i))
3454      {
3455         vres[i] = s1;
3456      }
3457      else
3458      {
3459         vres[i] = s2;
3460      }
3461#if USE_SIMD
3462      SIMD_INSERT16(rsp->accum_l, vres[i], i);
3463#else
3464      ACCUM_L(i) = vres[i];
3572
3573        SET_ACCUM_L(vres[i], i);
3574    }
3575
3576   CLEAR_ZERO_FLAGS();
3577   CLEAR_CARRY_FLAGS();
3578    WRITEBACK_RESULT();
34653579#endif
3466   }
3467
3468   rsp->flag[0] = 0;
3469   WRITEBACK_RESULT();
34703580}
34713581
34723582INLINE void cfunc_rsp_vge(void *param)
34733583{
3474   rsp_state *rsp = (rsp_state*)param;
3475   int op = rsp->impstate->arg0;
3476   INT16 vres[8] = { 0 };
3477   //int i;
3478   // 31       25  24     20      15      10      5        0
3479   // ------------------------------------------------------
3480   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100011 |
3481   // ------------------------------------------------------
3482   //
3483   // Sets compare flags if elements in VS1 are greater or equal with VS2
3484   // Moves the element in VS2 to destination vector
3584    rsp_state *rsp = (rsp_state*)param;
3585    int op = rsp->impstate->arg0;
34853586
3486   rsp->flag[1] = 0;
3587    //int i;
3588    // 31       25  24     20      15      10      5        0
3589    // ------------------------------------------------------
3590    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100011 |
3591    // ------------------------------------------------------
3592    //
3593    // Sets compare flags if elements in VS1 are greater or equal with VS2
3594    // Moves the element in VS2 to destination vector
34873595
3488   for (int i = 0; i < 8; i++)
3489   {
34903596#if USE_SIMD
3491      INT16 s1, s2;
3492      SIMD_EXTRACT16(rsp->xv[VS1REG], s1, i);
3493      SIMD_EXTRACT16(rsp->xv[VS2REG], s2, VEC_EL_2(EL, i));
3597   rsp->xvflag[COMPARE] = rsp->xvflag[CLIP2] = _mm_setzero_si128();
3598
3599    __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
3600   __m128i flag_mask = _mm_cmpeq_epi16(_mm_or_si128(rsp->xvflag[ZERO], rsp->xvflag[CARRY]), _mm_setzero_si128());
3601   __m128i eq_mask = _mm_and_si128(_mm_cmpeq_epi16(rsp->xv[VS1REG], shuf), flag_mask);
3602   __m128i gt_mask = _mm_cmpgt_epi16(rsp->xv[VS1REG], shuf);
3603   rsp->xvflag[COMPARE] = _mm_or_si128(eq_mask, gt_mask);
3604
3605   __m128i result = _mm_and_si128(rsp->xv[VS1REG], rsp->xvflag[COMPARE]);
3606    rsp->accum_l = rsp->xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, _mm_xor_si128(rsp->xvflag[COMPARE], vec_neg1)));
3607
3608   rsp->xvflag[ZERO] = rsp->xvflag[CARRY] = _mm_setzero_si128();
34943609#else
3495      INT16 s1 = (INT16)VREG_S(VS1REG, i);
3496      INT16 s2 = (INT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
3497#endif
3498      if (s1 == s2)
3499      {
3500         if (ZERO_FLAG(i) == 0 || CARRY_FLAG(i) == 0)
3501         {
3502            SET_COMPARE_FLAG(i);
3503         }
3504      }
3505      else if (s1 > s2)
3506      {
3507         SET_COMPARE_FLAG(i);
3508      }
3610   CLEAR_COMPARE_FLAGS();
3611   CLEAR_CLIP2_FLAGS();
35093612
3510      if (COMPARE_FLAG(i) != 0)
3511      {
3512         vres[i] = s1;
3513      }
3514      else
3515      {
3516         vres[i] = s2;
3517      }
3613    INT16 vres[8];
3614    for (int i = 0; i < 8; i++)
3615    {
3616        INT16 s1, s2;
3617      SCALAR_GET_VS1(s1, i);
3618      SCALAR_GET_VS2(s2, i);
3619        if ((s1 == s2 && (ZERO_FLAG(rsp, i) == 0 || CARRY_FLAG(rsp, i) == 0)) || s1 > s2)
3620        {
3621            SET_COMPARE_FLAG(i);
3622            vres[i] = s1;
3623        }
3624        else
3625        {
3626            vres[i] = s2;
3627        }
35183628
3519#if USE_SIMD
3520      SIMD_INSERT16(rsp->accum_l, vres[i], i);
3521#else
3522      ACCUM_L(i) = vres[i];
3629        SET_ACCUM_L(vres[i], i);
3630    }
3631
3632   CLEAR_ZERO_FLAGS();
3633   CLEAR_CARRY_FLAGS();
3634    WRITEBACK_RESULT();
35233635#endif
3524   }
3525
3526   rsp->flag[0] = 0;
3527   WRITEBACK_RESULT();
35283636}
35293637
35303638INLINE void cfunc_rsp_vcl(void *param)
35313639{
3532   rsp_state *rsp = (rsp_state*)param;
3533   int op = rsp->impstate->arg0;
3534   INT16 vres[8];
3640    rsp_state *rsp = (rsp_state*)param;
3641    int op = rsp->impstate->arg0;
3642    INT16 vres[8];
35353643
3536   // 31       25  24     20      15      10      5        0
3537   // ------------------------------------------------------
3538   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100100 |
3539   // ------------------------------------------------------
3540   //
3541   // Vector clip low
3644    // 31       25  24     20      15      10      5        0
3645    // ------------------------------------------------------
3646    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100100 |
3647    // ------------------------------------------------------
3648    //
3649    // Vector clip low
35423650
3543   for (int i = 0; i < 8; i++)
3544   {
3545#if USE_SIMD
3546      INT16 s1, s2;
3547      SIMD_EXTRACT16(rsp->xv[VS1REG], s1, i);
3548      SIMD_EXTRACT16(rsp->xv[VS2REG], s2, VEC_EL_2(EL, i));
3549#else
3550      INT16 s1 = VREG_S(VS1REG, i);
3551      INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i));
3552#endif
3651#if 0//USE_SIMD
3652    __m128i flag0_07 = _mm_set_epi16(CARRY_FLAG(0), CARRY_FLAG(1), CARRY_FLAG(2), CARRY_FLAG(3),
3653                                     CARRY_FLAG(4), CARRY_FLAG(5), CARRY_FLAG(6), CARRY_FLAG(7));
3654    __m128i flag0_815 = _mm_set_epi16(ZERO_FLAG(0), ZERO_FLAG(1), ZERO_FLAG(2), ZERO_FLAG(3),
3655                                      ZERO_FLAG(4), ZERO_FLAG(5), ZERO_FLAG(6), ZERO_FLAG(7));
3656    __m128i flag1_07 = _mm_set_epi16(COMPARE_FLAG(0), COMPARE_FLAG(1), COMPARE_FLAG(2), COMPARE_FLAG(3),
3657                                     COMPARE_FLAG(4), COMPARE_FLAG(5), COMPARE_FLAG(6), COMPARE_FLAG(7));
3658    __m128i flag1_815 = _mm_set_epi16((rsp->flag[1] >>  8) & 1, (rsp->flag[1] >>  9) & 1, (rsp->flag[1] >> 10) & 1, (rsp->flag[1] >> 11) & 1,
3659                                      (rsp->flag[1] >> 12) & 1, (rsp->flag[1] >> 13) & 1, (rsp->flag[1] >> 14) & 1, (rsp->flag[1] >> 15) & 1);
3660    __m128i flag2_07 = _mm_set_epi16(rsp->flag[2][0], rsp->flag[2][1], rsp->flag[2][2], rsp->flag[2][3],
3661                                     rsp->flag[2][4], rsp->flag[2][5], rsp->flag[2][6], rsp->flag[2][7]);
3662    __m128i n0_07 = _mm_xor_si128(flag0_07, vec_neg1);
3663    __m128i n0_815 = _mm_xor_si128(flag0_815, vec_neg1);
3664    __m128i n1_07 = _mm_xor_si128(flag1_07, vec_neg1);
3665    __m128i n1_815 = _mm_xor_si128(flag1_815, vec_neg1);
3666    __m128i n2_07 = _mm_xor_si128(flag2_07, vec_neg1);
35533667
3554      if (CARRY_FLAG(i) != 0)
3555      {
3556         if (ZERO_FLAG(i) != 0)
3557         {
3558            if (COMPARE_FLAG(i) != 0)
3559            {
3560#if USE_SIMD
3561               SIMD_INSERT16(rsp->accum_l, -(UINT16)s2, i);
3668    __m128i shuf2 = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
3669    __m128i vec7531 = _mm_and_si128(rsp->xv[VS1REG], vec_lomask);
3670    __m128i vec6420 = _mm_srli_epi32(rsp->xv[VS1REG], 16);
3671    __m128i shuf7531 = _mm_and_si128(shuf2, vec_lomask);
3672    __m128i shuf6420 = _mm_srli_epi32(shuf2, 16);
3673    __m128i sub7531 = _mm_sub_epi32(vec7531, shuf7531);
3674    __m128i sub6420 = _mm_sub_epi32(vec6420, shuf6420);
3675    __m128i subh7531 = _mm_and_si128(sub7531, vec_himask);
3676    __m128i subh6420 = _mm_and_si128(sub6420, vec_himask);
3677    __m128i sub_gez = _mm_or_si128(_mm_slli_epi32(_mm_cmpeq_epi16(subh6420, _mm_setzero_si128()), 16), _mm_cmpeq_epi16(subh7531, _mm_setzero_si128()));
3678    __m128i sub_lz  = _mm_xor_si128(sub_gez, vec_neg1);
3679
3680    __m128i sum7531 = _mm_add_epi32(vec7531, shuf7531);
3681    __m128i sum6420 = _mm_add_epi32(vec6420, shuf6420);
3682    __m128i suml7531 = _mm_and_si128(sum7531, vec_lomask);
3683    __m128i suml6420 = _mm_and_si128(sum6420, vec_lomask);
3684    __m128i sumh7531 = _mm_and_si128(sum7531, vec_himask);
3685    __m128i sumh6420 = _mm_and_si128(sum6420, vec_himask);
3686    __m128i suml_z = _mm_or_si128(_mm_slli_epi32(_mm_cmpeq_epi16(suml6420, _mm_setzero_si128()), 16), _mm_cmpeq_epi16(suml7531, _mm_setzero_si128()));
3687    __m128i sumh_1 = _mm_or_si128(_mm_slli_epi32(_mm_cmpeq_epi16(sumh6420, vec_hibit), 16), _mm_cmpeq_epi16(sumh7531, vec_hibit));
3688    __m128i sumh_z = _mm_or_si128(_mm_slli_epi32(_mm_cmpeq_epi16(sumh6420, _mm_setzero_si128()), 16), _mm_cmpeq_epi16(sumh7531, _mm_setzero_si128()));
3689    __m128i sum_z = _mm_and_si128(suml_z, sumh_z);
3690    __m128i sum_nz = _mm_xor_si128(sum_z, vec_neg1);
3691    __m128i sum_le0x10000 = _mm_or_si128(_mm_and_si128(suml_z, sumh_1), sumh_z);
3692    __m128i sum_g0x10000 = _mm_xor_si128(sum_le0x10000, vec_neg1);
3693
3694    __m128i  f0a_and_nf0b = _mm_and_si128(flag0_07, n0_815);
3695    __m128i nf0a_and_nf0b = _mm_and_si128(   n0_07, n0_815);
3696
3697    // accum set to -s2 if  flag0[0-7] &&  flag0[8-15] &&  flag1[0-7]
3698    // accum set to -s2 if  flag0[0-7] && !flag0[8-15] &&  flag2[0-7] && (s1 + s2) >  0x10000
3699    // accum set to -s2 if  flag0[0-7] && !flag0[8-15] && !flag2[0-7] && (s1 + s2) == 0
3700    __m128i accum_ns2 = _mm_and_si128(_mm_and_si128(flag0_07, flag0_815), flag1_07);
3701    accum_ns2 = _mm_or_si128(accum_ns2, _mm_and_si128(_mm_and_si128(f0a_and_nf0b, flag2_07), sum_g0x10000));
3702    accum_ns2 = _mm_or_si128(accum_ns2, _mm_and_si128(_mm_and_si128(f0a_and_nf0b, n2_07), sum_z));
3703
3704    // accum set to  s2 if !flag0[0-7] &&  flag0[8-15] &&  flag1[8-15]
3705    // accum set to  s2 if !flag0[0-7] && !flag0[8-15] && (s1 - s2) >= 0
3706    __m128i accum_s2 = _mm_and_si128(n0_07, _mm_and_si128(flag0_815, flag1_815));
3707    accum_s2 = _mm_or_si128(accum_s2, _mm_and_si128(_mm_and_si128(n0_07, n0_815), sub_gez));
3708
3709    // flag1[8-15]  set if !flag0[0-7] && !flag0[8-15] && (s1 - s2) >= 0
3710    __m128i new_f1b_s = _mm_and_si128(_mm_and_si128(nf0a_and_nf0b, sub_gez), vec_flagmask);
3711    UINT16 flag1_set = 0;
3712    flag1_set |= _mm_extract_epi16(new_f1b_s, 0) << 8;
3713    flag1_set |= _mm_extract_epi16(new_f1b_s, 1) << 9;
3714    flag1_set |= _mm_extract_epi16(new_f1b_s, 2) << 10;
3715    flag1_set |= _mm_extract_epi16(new_f1b_s, 3) << 11;
3716    flag1_set |= _mm_extract_epi16(new_f1b_s, 4) << 12;
3717    flag1_set |= _mm_extract_epi16(new_f1b_s, 5) << 13;
3718    flag1_set |= _mm_extract_epi16(new_f1b_s, 6) << 14;
3719    flag1_set |= _mm_extract_epi16(new_f1b_s, 7) << 15;
3720
3721    // flag1[8-15]unset if !flag0[0-7] && !flag0[8-15] && (s1 - s2) < 0
3722    __m128i new_f1b_u = _mm_xor_si128(vec_neg1, _mm_and_si128(nf0a_and_nf0b, sub_lz));
3723    new_f1b_u = _mm_and_si128(new_f1b_u, vec_flagmask);
3724    UINT16 flag1_unset = 0;
3725    flag1_unset |= _mm_extract_epi16(new_f1b_u, 0) << 8;
3726    flag1_unset |= _mm_extract_epi16(new_f1b_u, 1) << 9;
3727    flag1_unset |= _mm_extract_epi16(new_f1b_u, 2) << 10;
3728    flag1_unset |= _mm_extract_epi16(new_f1b_u, 3) << 11;
3729    flag1_unset |= _mm_extract_epi16(new_f1b_u, 4) << 12;
3730    flag1_unset |= _mm_extract_epi16(new_f1b_u, 5) << 13;
3731    flag1_unset |= _mm_extract_epi16(new_f1b_u, 6) << 14;
3732    flag1_unset |= _mm_extract_epi16(new_f1b_u, 7) << 15;
3733
3734    // flag1[0-7]   set if  flag0[0-7] && !flag0[8-15] &&  flag2[0-7] && (s1 + s2) <= 0x10000
3735    // flag1[0-7]   set if  flag0[0-7] && !flag0[8-15] && !flag2[0-7] && (s1 + s2) == 0
3736    __m128i new_f1a_s = _mm_and_si128(_mm_and_si128(f0a_and_nf0b, flag2_07), sum_le0x10000);
3737    new_f1a_s = _mm_or_si128(new_f1a_u, _mm_and_si128(_mm_and_si128(f0a_and_nf0b, n2_07), sum_z));
3738    new_f1a_s = _mm_and_si128(new_f1a_s, vec_flagmask);
3739    flag1_set |= _mm_extract_epi16(new_f1a_s, 0) << 0;
3740    flag1_set |= _mm_extract_epi16(new_f1a_s, 1) << 1;
3741    flag1_set |= _mm_extract_epi16(new_f1a_s, 2) << 2;
3742    flag1_set |= _mm_extract_epi16(new_f1a_s, 3) << 3;
3743    flag1_set |= _mm_extract_epi16(new_f1a_s, 4) << 4;
3744    flag1_set |= _mm_extract_epi16(new_f1a_s, 5) << 5;
3745    flag1_set |= _mm_extract_epi16(new_f1a_s, 6) << 6;
3746    flag1_set |= _mm_extract_epi16(new_f1a_s, 7) << 7;
3747
3748    // flag1[0-7] unset if  flag0[0-7] && !flag0[8-15] &&  flag2[0-7] && (s1 + s2) >  0x10000
3749    // flag1[0-7] unset if  flag0[0-7] && !flag0[8-15] && !flag2[0-7] && (s1 + s2) != 0
3750    __m128i new_f1a_u = _mm_and_si128(_mm_and_si128(f0a_and_nf0b, flag2_07), sum_g0x10000);
3751    new_f1a_u = _mm_or_si128(new_f1a_u, _mm_and_si128(_mm_and_si128(f0a_and_nf0b, n2_07), sum_nz));
3752    new_f1a_u = _mm_and_si128(new_f1a_u, vec_flagmask);
3753    flag1_unset |= _mm_extract_epi16(new_f1a_u, 0) << 0;
3754    flag1_unset |= _mm_extract_epi16(new_f1a_u, 1) << 1;
3755    flag1_unset |= _mm_extract_epi16(new_f1a_u, 2) << 2;
3756    flag1_unset |= _mm_extract_epi16(new_f1a_u, 3) << 3;
3757    flag1_unset |= _mm_extract_epi16(new_f1a_u, 4) << 4;
3758    flag1_unset |= _mm_extract_epi16(new_f1a_u, 5) << 5;
3759    flag1_unset |= _mm_extract_epi16(new_f1a_u, 6) << 6;
3760    flag1_unset |= _mm_extract_epi16(new_f1a_u, 7) << 7;
3761
3762    rsp->flag[1] &= ~flag1_unset;
3763    rsp->flag[1] |= flag1_set;
3764
3765    // accum set to  s1 if  flag0[0-7] &&  flag0[8-15] && !flag1[0-7]
3766    // accum set to  s1 if  flag0[0-7] && !flag0[8-15] &&  flag2[0-7] && (s1 + s2) <= 0x10000
3767    // accum set to  s1 if  flag0[0-7] && !flag0[8-15] && !flag2[0-7] && (s1 + s2) != 0
3768    // accum set to  s1 if !flag0[0-7] &&  flag0[8-15] && !flag1[8-15]
3769    // accum set to  s1 if !flag0[0-7] && !flag0[8-15] && (s1 - s2) < 0
3770    __m128i accum_s1 = _mm_and_si128(flag0_07, _mm_and_si128(flag0_815, n1_07));
3771    accum_s1 = _mm_or_si128(accum_s1, _mm_and_si128(_mm_and_si128(f0a_and_nf0b, flag2_07), sum_le0x10000));
3772    accum_s1 = _mm_or_si128(accum_s1, _mm_and_si128(_mm_and_si128(f0a_and_nf0b, n2_07), sum_nz));
3773    accum_s1 = _mm_or_si128(accum_s1, _mm_and_si128(_mm_and_si128(n0_07, flag0_815), n1_815));
3774    accum_s1 = _mm_or_si128(accum_s1, _mm_and_si128(nf0a_and_nf0b, sub_lz));
3775    //__m128i zms2 = _mm_sub_epi16(_mm_setzero_si128(), shuf2);
3776
3777    /*
3778    __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
3779    __m128i s1_xor_s2 = _mm_xor_si128(rsp->xv[VS1REG], shuf);
3780    __m128i s1_plus_s2 = _mm_add_epi16(rsp->xv[VS1REG], shuf);
3781    __m128i s1_sub_s2 = _mm_sub_epi16(rsp->xv[VS1REG], shuf);
3782    __m128i s2_neg = _mm_xor_si128(shuf, vec_neg1);
3783
3784    __m128i s2_lz = _mm_cmplt_epi16(shuf, _mm_setzero_si128());
3785    __m128i s1s2_xor_lz = _mm_cmplt_epi16(s1_xor_s2, _mm_setzero_si128());
3786    __m128i s1s2_xor_gez = _mm_xor_si128(s1s2_xor_lz, vec_neg1);
3787    __m128i s1s2_plus_nz = _mm_xor_si128(_mm_cmpeq_epi16(s1_plus_s2, _mm_setzero_si128()), vec_neg1);
3788    __m128i s1s2_plus_gz = _mm_cmpgt_epi16(s1_plus_s2, _mm_setzero_si128());
3789    __m128i s1s2_plus_lez = _mm_xor_si128(s1s2_plus_gz, vec_neg1);
3790    __m128i s1s2_plus_n1 = _mm_cmpeq_epi16(s1_plus_s2, vec_neg1);
3791    __m128i s1s2_sub_nz = _mm_xor_si128(_mm_cmpeq_epi16(s1_sub_s2, _mm_setzero_si128()), vec_neg1);
3792    __m128i s1s2_sub_lz = _mm_cmplt_epi16(s1_sub_s2, _mm_setzero_si128());
3793    __m128i s1s2_sub_gez = _mm_xor_si128(s1s2_sub_lz, vec_neg1);
3794    __m128i s1_nens2 = _mm_xor_si128(_mm_cmpeq_epi16(rsp->xv[VS1REG], s2_neg), vec_neg1);
3795
3796    __m128i ext_mask = _mm_and_si128(_mm_and_si128(s1s2_xor_lz, s1s2_plus_n1), vec_flagmask);
3797    rsp->flag[2] |= _mm_extract_epi16(ext_mask, 0) << 0;
3798    rsp->flag[2] |= _mm_extract_epi16(ext_mask, 1) << 1;
3799    rsp->flag[2] |= _mm_extract_epi16(ext_mask, 2) << 2;
3800    rsp->flag[2] |= _mm_extract_epi16(ext_mask, 3) << 3;
3801    rsp->flag[2] |= _mm_extract_epi16(ext_mask, 4) << 4;
3802    rsp->flag[2] |= _mm_extract_epi16(ext_mask, 5) << 5;
3803    rsp->flag[2] |= _mm_extract_epi16(ext_mask, 6) << 6;
3804    rsp->flag[2] |= _mm_extract_epi16(ext_mask, 7) << 7;
3805
3806    __m128i carry_mask = _mm_and_si128(s1s2_xor_lz, vec_flagmask);
3807    rsp->flag[0] |= _mm_extract_epi16(carry_mask, 0) << 0;
3808    rsp->flag[0] |= _mm_extract_epi16(carry_mask, 1) << 1;
3809    rsp->flag[0] |= _mm_extract_epi16(carry_mask, 2) << 2;
3810    rsp->flag[0] |= _mm_extract_epi16(carry_mask, 3) << 3;
3811    rsp->flag[0] |= _mm_extract_epi16(carry_mask, 4) << 4;
3812    rsp->flag[0] |= _mm_extract_epi16(carry_mask, 5) << 5;
3813    rsp->flag[0] |= _mm_extract_epi16(carry_mask, 6) << 6;
3814    rsp->flag[0] |= _mm_extract_epi16(carry_mask, 7) << 7;
3815
3816    __m128i z0_mask = _mm_and_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_nz), s1_nens2);
3817    __m128i z1_mask = _mm_and_si128(_mm_and_si128(s1s2_xor_lz, s1s2_plus_nz), s1_nens2);
3818    __m128i z_mask = _mm_and_si128(_mm_or_si128(z0_mask, z1_mask), vec_flagmask);
3819    z_mask = _mm_and_si128(_mm_or_si128(z_mask, _mm_srli_epi32(z_mask, 15)), vec_shiftmask2);
3820    z_mask = _mm_and_si128(_mm_or_si128(z_mask, _mm_srli_epi64(z_mask, 30)), vec_shiftmask4);
3821    z_mask = _mm_or_si128(z_mask, _mm_srli_si128(z_mask, 7));
3822    z_mask = _mm_or_si128(z_mask, _mm_srli_epi16(z_mask, 4));
3823    rsp->flag[0] |= (_mm_extract_epi16(z_mask, 0) << 8) & 0x00ff00;
3824
3825    __m128i f0_mask = _mm_and_si128(_mm_or_si128(_mm_and_si128(s1s2_xor_gez, s2_lz),         _mm_and_si128(s1s2_xor_lz, s1s2_plus_lez)), vec_flagmask);
3826    __m128i f8_mask = _mm_and_si128(_mm_or_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_gez),  _mm_and_si128(s1s2_xor_lz, s2_lz)), vec_flagmask);
3827    f0_mask = _mm_and_si128(f0_mask, vec_flagmask);
3828    f8_mask = _mm_and_si128(f8_mask, vec_flagmask);
3829    rsp->flag[1] |= _mm_extract_epi16(f0_mask, 0) << 0;
3830    rsp->flag[1] |= _mm_extract_epi16(f0_mask, 1) << 1;
3831    rsp->flag[1] |= _mm_extract_epi16(f0_mask, 2) << 2;
3832    rsp->flag[1] |= _mm_extract_epi16(f0_mask, 3) << 3;
3833    rsp->flag[1] |= _mm_extract_epi16(f0_mask, 4) << 4;
3834    rsp->flag[1] |= _mm_extract_epi16(f0_mask, 5) << 5;
3835    rsp->flag[1] |= _mm_extract_epi16(f0_mask, 6) << 6;
3836    rsp->flag[1] |= _mm_extract_epi16(f0_mask, 7) << 7;
3837
3838    rsp->flag[1] |= _mm_extract_epi16(f8_mask, 0) << 8;
3839    rsp->flag[1] |= _mm_extract_epi16(f8_mask, 1) << 9;
3840    rsp->flag[1] |= _mm_extract_epi16(f8_mask, 2) << 10;
3841    rsp->flag[1] |= _mm_extract_epi16(f8_mask, 3) << 11;
3842    rsp->flag[1] |= _mm_extract_epi16(f8_mask, 4) << 12;
3843    rsp->flag[1] |= _mm_extract_epi16(f8_mask, 5) << 13;
3844    rsp->flag[1] |= _mm_extract_epi16(f8_mask, 6) << 14;
3845    rsp->flag[1] |= _mm_extract_epi16(f8_mask, 7) << 15;*/
35623846#else
3563               ACCUM_L(i) = -(UINT16)s2;
3564#endif
3565            }
3566            else
3567            {
3568#if USE_SIMD
3569               SIMD_INSERT16(rsp->accum_l, s1, i);
3570#else
3571               ACCUM_L(i) = s1;
3572#endif
3573            }
3574         }
3575         else//ZERO_FLAG(i)==0
3576         {
3577            if (rsp->flag[2] & (1 << (i)))
3578            {
3579               if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) > 0x10000)
3580               {//proper fix for Harvest Moon 64, r4
3581#if USE_SIMD
3582                  SIMD_INSERT16(rsp->accum_l, s1, i);
3583#else
3584                  ACCUM_L(i) = s1;
3585#endif
3586                  CLEAR_COMPARE_FLAG(i);
3587               }
3588               else
3589               {
3590#if USE_SIMD
3591                  SIMD_INSERT16(rsp->accum_l, -((UINT16)s2), i);
3592#else
3593                  ACCUM_L(i) = -((UINT16)s2);
3594#endif
3595                  SET_COMPARE_FLAG(i);
3596               }
3597            }
3598            else
3599            {
3600               if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) != 0)
3601               {
3602#if USE_SIMD
3603                  SIMD_INSERT16(rsp->accum_l, s1, i);
3604#else
3605                  ACCUM_L(i) = s1;
3606#endif
3607                  CLEAR_COMPARE_FLAG(i);
3608               }
3609               else
3610               {
3611#if USE_SIMD
3612                  SIMD_INSERT16(rsp->accum_l, -((UINT16)s2), i);
3613#else
3614                  ACCUM_L(i) = -((UINT16)s2);
3615#endif
3616                  SET_COMPARE_FLAG(i);
3617               }
3618            }
3619         }
3620      }
3621      else//CARRY_FLAG(i)==0
3622      {
3623         if (ZERO_FLAG(i) != 0)
3624         {
3625            if (rsp->flag[1] & (1 << (8+i)))
3626            {
3627#if USE_SIMD
3628               SIMD_INSERT16(rsp->accum_l, s2, i);
3629#else
3630               ACCUM_L(i) = s2;
3631#endif
3632            }
3633            else
3634            {
3635#if USE_SIMD
3636               SIMD_INSERT16(rsp->accum_l, s1, i);
3637#else
3638               ACCUM_L(i) = s1;
3639#endif
3640            }
3641         }
3642         else
3643         {
3644            if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0)
3645            {
3646#if USE_SIMD
3647               SIMD_INSERT16(rsp->accum_l, s2, i);
3648#else
3649               ACCUM_L(i) = s2;
3650#endif
3651               rsp->flag[1] |= (1 << (8+i));
3652            }
3653            else
3654            {
3655#if USE_SIMD
3656               SIMD_INSERT16(rsp->accum_l, s1, i);
3657#else
3658               ACCUM_L(i) = s1;
3659#endif
3660               rsp->flag[1] &= ~(1 << (8+i));
3661            }
3662         }
3663      }
3847    for (int i = 0; i < 8; i++)
3848    {
3849        INT16 s1, s2;
3850      SCALAR_GET_VS1(s1, i);
3851      SCALAR_GET_VS2(s2, i);
36643852
3665#if USE_SIMD
3666      SIMD_EXTRACT16(rsp->accum_l, vres[i], i);
3667#else
3668      vres[i] = ACCUM_L(i);
3853        if (CARRY_FLAG(rsp, i) != 0)
3854        {
3855            if (ZERO_FLAG(rsp, i) != 0)
3856            {
3857                if (COMPARE_FLAG(rsp, i) != 0)
3858                {
3859                    SET_ACCUM_L(-(UINT16)s2, i);
3860                }
3861                else
3862                {
3863                    SET_ACCUM_L(s1, i);
3864                }
3865            }
3866            else//ZERO_FLAG(rsp, i)==0
3867            {
3868                if (CLIP1_FLAG(rsp, i) != 0)
3869                {
3870                    if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) > 0x10000)
3871                    {//proper fix for Harvest Moon 64, r4
3872                        SET_ACCUM_L(s1, i);
3873                        CLEAR_COMPARE_FLAG(i);
3874                    }
3875                    else
3876                    {
3877                        SET_ACCUM_L(-((UINT16)s2), i);
3878                        SET_COMPARE_FLAG(i);
3879                    }
3880                }
3881                else
3882                {
3883                    if (((UINT32)(UINT16)(s1) + (UINT32)(UINT16)(s2)) != 0)
3884                    {
3885                        SET_ACCUM_L(s1, i);
3886                        CLEAR_COMPARE_FLAG(i);
3887                    }
3888                    else
3889                    {
3890                        SET_ACCUM_L(-((UINT16)s2), i);
3891                        SET_COMPARE_FLAG(i);
3892                    }
3893                }
3894            }
3895        }
3896        else//CARRY_FLAG(rsp, i)==0
3897        {
3898            if (ZERO_FLAG(rsp, i) != 0)
3899            {
3900                if (CLIP2_FLAG(rsp, i) != 0)
3901                {
3902                    SET_ACCUM_L(s2, i);
3903                }
3904                else
3905                {
3906                    SET_ACCUM_L(s1, i);
3907                }
3908            }
3909            else
3910            {
3911                if (((INT32)(UINT16)s1 - (INT32)(UINT16)s2) >= 0)
3912                {
3913                    SET_ACCUM_L(s2, i);
3914                    SET_CLIP2_FLAG(i);
3915                }
3916                else
3917                {
3918                    SET_ACCUM_L(s1, i);
3919                    CLEAR_CLIP2_FLAG(i);
3920                }
3921            }
3922        }
3923        vres[i] = ACCUM_L(rsp, i);
3924    }
3925   CLEAR_ZERO_FLAGS();
3926   CLEAR_CARRY_FLAGS();
3927   CLEAR_CLIP1_FLAGS();
3928    WRITEBACK_RESULT();
36693929#endif
3670   }
3671   rsp->flag[0] = 0;
3672   rsp->flag[2] = 0;
3673   WRITEBACK_RESULT();
36743930}
36753931
36763932INLINE void cfunc_rsp_vch(void *param)
36773933{
3678   rsp_state *rsp = (rsp_state*)param;
3679   int op = rsp->impstate->arg0;
3934    rsp_state *rsp = (rsp_state*)param;
3935    int op = rsp->impstate->arg0;
36803936
3681   // 31       25  24     20      15      10      5        0
3682   // ------------------------------------------------------
3683   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100101 |
3684   // ------------------------------------------------------
3685   //
3686   // Vector clip high
3937    // 31       25  24     20      15      10      5        0
3938    // ------------------------------------------------------
3939    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100101 |
3940    // ------------------------------------------------------
3941    //
3942    // Vector clip high
36873943
3688   rsp->flag[0] = 0;
3689   rsp->flag[1] = 0;
3690   rsp->flag[2] = 0;
3944   CLEAR_CARRY_FLAGS();
3945   CLEAR_COMPARE_FLAGS();
3946   CLEAR_CLIP1_FLAGS();
3947   CLEAR_ZERO_FLAGS();
3948   CLEAR_CLIP2_FLAGS();
36913949
3692#if USE_SIMD
3693   // Compare flag
3694   // flag[1] bit [0- 7] set if (s1 ^ s2) < 0 && (s1 + s2) <= 0)
3695   // flag[1] bit [0- 7] set if (s1 ^ s2) >= 0 && (s2 < 0)
3950#if 0//USE_SIMD
3951    // Compare flag
3952    // flag[1] bit [0- 7] set if (s1 ^ s2) < 0 && (s1 + s2) <= 0)
3953    // flag[1] bit [0- 7] set if (s1 ^ s2) >= 0 && (s2 < 0)
36963954
3697   // flag[1] bit [8-15] set if (s1 ^ s2) < 0 && (s2 < 0)
3698   // flag[1] bit [8-15] set if (s1 ^ s2) >= 0 && (s1 - s2) >= 0
3955    // flag[1] bit [8-15] set if (s1 ^ s2) < 0 && (s2 < 0)
3956    // flag[1] bit [8-15] set if (s1 ^ s2) >= 0 && (s1 - s2) >= 0
36993957
3700   // Carry flag
3701   // flag[0] bit [0- 7] set if (s1 ^ s2) < 0
3958    // Carry flag
3959    // flag[0] bit [0- 7] set if (s1 ^ s2) < 0
37023960
3703   // Zero flag
3704   // flag[0] bit [8-15] set if (s1 ^ s2) < 0  && (s1 + s2) != 0 && (s1 != ~s2)
3705   // flag[0] bit [8-15] set if (s1 ^ s2) >= 0 && (s1 - s2) != 0 && (s1 != ~s2)
3961    // Zero flag
3962    // flag[0] bit [8-15] set if (s1 ^ s2) < 0  && (s1 + s2) != 0 && (s1 != ~s2)
3963    // flag[0] bit [8-15] set if (s1 ^ s2) >= 0 && (s1 - s2) != 0 && (s1 != ~s2)
37063964
3707   // flag[2] bit [0- 7] set if (s1 ^ s2) < 0 && (s1 + s2) == -1
3965    // flag[2] bit [0- 7] set if (s1 ^ s2) < 0 && (s1 + s2) == -1
37083966
3709   // accum set to -s2 if (s1 ^ s2) < 0 && (s1 + s2) <= 0)
3710   // accum set to -s2 if (s1 ^ s2) >= 0 && (s1 - s2) >= 0
3967    // accum set to -s2 if (s1 ^ s2) < 0 && (s1 + s2) <= 0)
3968    // accum set to -s2 if (s1 ^ s2) >= 0 && (s1 - s2) >= 0
37113969
3712   // accum set to s1 if (s1 ^ s2) < 0 && (s1 + s2) > 0)
3713   // accum set to s1 if (s1 ^ s2) >= 0 && (s1 - s2) < 0
3970    // accum set to s1 if (s1 ^ s2) < 0 && (s1 + s2) > 0)
3971    // accum set to s1 if (s1 ^ s2) >= 0 && (s1 - s2) < 0
37143972
3715   __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
3716   __m128i s1_xor_s2 = _mm_xor_si128(rsp->xv[VS1REG], shuf);
3717   __m128i s1_plus_s2 = _mm_add_epi16(rsp->xv[VS1REG], shuf);
3718   __m128i s1_sub_s2 = _mm_sub_epi16(rsp->xv[VS1REG], shuf);
3719   __m128i s2_neg = _mm_xor_si128(shuf, vec_neg1);
3973    __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
3974    __m128i s1_xor_s2 = _mm_xor_si128(rsp->xv[VS1REG], shuf);
3975    __m128i s1_plus_s2 = _mm_add_epi16(rsp->xv[VS1REG], shuf);
3976    __m128i s1_sub_s2 = _mm_sub_epi16(rsp->xv[VS1REG], shuf);
3977    __m128i s2_neg = _mm_xor_si128(shuf, vec_neg1);
37203978
3721   __m128i s2_lz = _mm_cmplt_epi16(shuf, vec_zero);
3722   __m128i s1s2_xor_lz = _mm_cmplt_epi16(s1_xor_s2, vec_zero);
3723   __m128i s1s2_xor_gez = _mm_xor_si128(s1s2_xor_lz, vec_neg1);
3724   __m128i s1s2_plus_nz = _mm_xor_si128(_mm_cmpeq_epi16(s1_plus_s2, vec_zero), vec_neg1);
3725   __m128i s1s2_plus_gz = _mm_cmpgt_epi16(s1_plus_s2, vec_zero);
3726   __m128i s1s2_plus_lez = _mm_xor_si128(s1s2_plus_gz, vec_neg1);
3727   __m128i s1s2_plus_n1 = _mm_cmpeq_epi16(s1_plus_s2, vec_neg1);
3728   __m128i s1s2_sub_nz = _mm_xor_si128(_mm_cmpeq_epi16(s1_sub_s2, vec_zero), vec_neg1);
3729   __m128i s1s2_sub_lz = _mm_cmplt_epi16(s1_sub_s2, vec_zero);
3730   __m128i s1s2_sub_gez = _mm_xor_si128(s1s2_sub_lz, vec_neg1);
3731   __m128i s1_nens2 = _mm_xor_si128(_mm_cmpeq_epi16(rsp->xv[VS1REG], s2_neg), vec_neg1);
3979    __m128i s2_lz = _mm_cmplt_epi16(shuf, _mm_setzero_si128());
3980    __m128i s1s2_xor_lz = _mm_cmplt_epi16(s1_xor_s2, _mm_setzero_si128());
3981    __m128i s1s2_xor_gez = _mm_xor_si128(s1s2_xor_lz, vec_neg1);
3982    __m128i s1s2_plus_nz = _mm_xor_si128(_mm_cmpeq_epi16(s1_plus_s2, _mm_setzero_si128()), vec_neg1);
3983    __m128i s1s2_plus_gz = _mm_cmpgt_epi16(s1_plus_s2, _mm_setzero_si128());
3984    __m128i s1s2_plus_lez = _mm_xor_si128(s1s2_plus_gz, vec_neg1);
3985    __m128i s1s2_plus_n1 = _mm_cmpeq_epi16(s1_plus_s2, vec_neg1);
3986    __m128i s1s2_sub_nz = _mm_xor_si128(_mm_cmpeq_epi16(s1_sub_s2, _mm_setzero_si128()), vec_neg1);
3987    __m128i s1s2_sub_lz = _mm_cmplt_epi16(s1_sub_s2, _mm_setzero_si128());
3988    __m128i s1s2_sub_gez = _mm_xor_si128(s1s2_sub_lz, vec_neg1);
3989    __m128i s1_nens2 = _mm_xor_si128(_mm_cmpeq_epi16(rsp->xv[VS1REG], s2_neg), vec_neg1);
37323990
3733   __m128i ext_mask = _mm_and_si128(_mm_and_si128(s1s2_xor_lz, s1s2_plus_n1), vec_flagmask);
3734   rsp->flag[2] |= _mm_extract_epi16(ext_mask, 0) << 0;
3735   rsp->flag[2] |= _mm_extract_epi16(ext_mask, 1) << 1;
3736   rsp->flag[2] |= _mm_extract_epi16(ext_mask, 2) << 2;
3737   rsp->flag[2] |= _mm_extract_epi16(ext_mask, 3) << 3;
3738   rsp->flag[2] |= _mm_extract_epi16(ext_mask, 4) << 4;
3739   rsp->flag[2] |= _mm_extract_epi16(ext_mask, 5) << 5;
3740   rsp->flag[2] |= _mm_extract_epi16(ext_mask, 6) << 6;
3741   rsp->flag[2] |= _mm_extract_epi16(ext_mask, 7) << 7;
3991    __m128i ext_mask = _mm_and_si128(_mm_and_si128(s1s2_xor_lz, s1s2_plus_n1), vec_flagmask);
3992    rsp->flag[2] |= _mm_extract_epi16(ext_mask, 0) << 0;
3993    rsp->flag[2] |= _mm_extract_epi16(ext_mask, 1) << 1;
3994    rsp->flag[2] |= _mm_extract_epi16(ext_mask, 2) << 2;
3995    rsp->flag[2] |= _mm_extract_epi16(ext_mask, 3) << 3;
3996    rsp->flag[2] |= _mm_extract_epi16(ext_mask, 4) << 4;
3997    rsp->flag[2] |= _mm_extract_epi16(ext_mask, 5) << 5;
3998    rsp->flag[2] |= _mm_extract_epi16(ext_mask, 6) << 6;
3999    rsp->flag[2] |= _mm_extract_epi16(ext_mask, 7) << 7;
37424000
3743   __m128i carry_mask = _mm_and_si128(s1s2_xor_lz, vec_flagmask);
3744   rsp->flag[0] |= _mm_extract_epi16(carry_mask, 0) << 0;
3745   rsp->flag[0] |= _mm_extract_epi16(carry_mask, 1) << 1;
3746   rsp->flag[0] |= _mm_extract_epi16(carry_mask, 2) << 2;
3747   rsp->flag[0] |= _mm_extract_epi16(carry_mask, 3) << 3;
3748   rsp->flag[0] |= _mm_extract_epi16(carry_mask, 4) << 4;
3749   rsp->flag[0] |= _mm_extract_epi16(carry_mask, 5) << 5;
3750   rsp->flag[0] |= _mm_extract_epi16(carry_mask, 6) << 6;
3751   rsp->flag[0] |= _mm_extract_epi16(carry_mask, 7) << 7;
4001    __m128i carry_mask = _mm_and_si128(s1s2_xor_lz, vec_flagmask);
4002    rsp->flag[0] |= _mm_extract_epi16(carry_mask, 0) << 0;
4003    rsp->flag[0] |= _mm_extract_epi16(carry_mask, 1) << 1;
4004    rsp->flag[0] |= _mm_extract_epi16(carry_mask, 2) << 2;
4005    rsp->flag[0] |= _mm_extract_epi16(carry_mask, 3) << 3;
4006    rsp->flag[0] |= _mm_extract_epi16(carry_mask, 4) << 4;
4007    rsp->flag[0] |= _mm_extract_epi16(carry_mask, 5) << 5;
4008    rsp->flag[0] |= _mm_extract_epi16(carry_mask, 6) << 6;
4009    rsp->flag[0] |= _mm_extract_epi16(carry_mask, 7) << 7;
37524010
3753   __m128i z0_mask = _mm_and_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_nz), s1_nens2);
3754   __m128i z1_mask = _mm_and_si128(_mm_and_si128(s1s2_xor_lz, s1s2_plus_nz), s1_nens2);
3755   __m128i z_mask = _mm_and_si128(_mm_or_si128(z0_mask, z1_mask), vec_flagmask);
3756   z_mask = _mm_and_si128(_mm_or_si128(z_mask, _mm_srli_epi32(z_mask, 15)), vec_shiftmask2);
3757   z_mask = _mm_and_si128(_mm_or_si128(z_mask, _mm_srli_epi64(z_mask, 30)), vec_shiftmask4);
3758   z_mask = _mm_or_si128(z_mask, _mm_srli_si128(z_mask, 7));
3759   z_mask = _mm_or_si128(z_mask, _mm_srli_epi16(z_mask, 4));
3760   rsp->flag[0] |= (_mm_extract_epi16(z_mask, 0) << 8) & 0x00ff00;
4011    __m128i z0_mask = _mm_and_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_nz), s1_nens2);
4012    __m128i z1_mask = _mm_and_si128(_mm_and_si128(s1s2_xor_lz, s1s2_plus_nz), s1_nens2);
4013    __m128i z_mask = _mm_and_si128(_mm_or_si128(z0_mask, z1_mask), vec_flagmask);
4014    z_mask = _mm_and_si128(_mm_or_si128(z_mask, _mm_srli_epi32(z_mask, 15)), vec_shiftmask2);
4015    z_mask = _mm_and_si128(_mm_or_si128(z_mask, _mm_srli_epi64(z_mask, 30)), vec_shiftmask4);
4016    z_mask = _mm_or_si128(z_mask, _mm_srli_si128(z_mask, 7));
4017    z_mask = _mm_or_si128(z_mask, _mm_srli_epi16(z_mask, 4));
4018    rsp->flag[0] |= (_mm_extract_epi16(z_mask, 0) << 8) & 0x00ff00;
37614019
3762   __m128i f0_mask = _mm_and_si128(_mm_or_si128(_mm_and_si128(s1s2_xor_gez, s2_lz),         _mm_and_si128(s1s2_xor_lz, s1s2_plus_lez)), vec_flagmask);
3763   __m128i f8_mask = _mm_and_si128(_mm_or_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_gez),  _mm_and_si128(s1s2_xor_lz, s2_lz)), vec_flagmask);
3764   f0_mask = _mm_and_si128(f0_mask, vec_flagmask);
3765   f8_mask = _mm_and_si128(f8_mask, vec_flagmask);
3766   rsp->flag[1] |= _mm_extract_epi16(f0_mask, 0) << 0;
3767   rsp->flag[1] |= _mm_extract_epi16(f0_mask, 1) << 1;
3768   rsp->flag[1] |= _mm_extract_epi16(f0_mask, 2) << 2;
3769   rsp->flag[1] |= _mm_extract_epi16(f0_mask, 3) << 3;
3770   rsp->flag[1] |= _mm_extract_epi16(f0_mask, 4) << 4;
3771   rsp->flag[1] |= _mm_extract_epi16(f0_mask, 5) << 5;
3772   rsp->flag[1] |= _mm_extract_epi16(f0_mask, 6) << 6;
3773   rsp->flag[1] |= _mm_extract_epi16(f0_mask, 7) << 7;
4020    __m128i f0_mask = _mm_and_si128(_mm_or_si128(_mm_and_si128(s1s2_xor_gez, s2_lz),         _mm_and_si128(s1s2_xor_lz, s1s2_plus_lez)), vec_flagmask);
4021    __m128i f8_mask = _mm_and_si128(_mm_or_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_gez),  _mm_and_si128(s1s2_xor_lz, s2_lz)), vec_flagmask);
4022    f0_mask = _mm_and_si128(f0_mask, vec_flagmask);
4023    f8_mask = _mm_and_si128(f8_mask, vec_flagmask);
4024    rsp->flag[1] |= _mm_extract_epi16(f0_mask, 0) << 0;
4025    rsp->flag[1] |= _mm_extract_epi16(f0_mask, 1) << 1;
4026    rsp->flag[1] |= _mm_extract_epi16(f0_mask, 2) << 2;
4027    rsp->flag[1] |= _mm_extract_epi16(f0_mask, 3) << 3;
4028    rsp->flag[1] |= _mm_extract_epi16(f0_mask, 4) << 4;
4029    rsp->flag[1] |= _mm_extract_epi16(f0_mask, 5) << 5;
4030    rsp->flag[1] |= _mm_extract_epi16(f0_mask, 6) << 6;
4031    rsp->flag[1] |= _mm_extract_epi16(f0_mask, 7) << 7;
37744032
3775   rsp->flag[1] |= _mm_extract_epi16(f8_mask, 0) << 8;
3776   rsp->flag[1] |= _mm_extract_epi16(f8_mask, 1) << 9;
3777   rsp->flag[1] |= _mm_extract_epi16(f8_mask, 2) << 10;
3778   rsp->flag[1] |= _mm_extract_epi16(f8_mask, 3) << 11;
3779   rsp->flag[1] |= _mm_extract_epi16(f8_mask, 4) << 12;
3780   rsp->flag[1] |= _mm_extract_epi16(f8_mask, 5) << 13;
3781   rsp->flag[1] |= _mm_extract_epi16(f8_mask, 6) << 14;
3782   rsp->flag[1] |= _mm_extract_epi16(f8_mask, 7) << 15;
4033    rsp->flag[1] |= _mm_extract_epi16(f8_mask, 0) << 8;
4034    rsp->flag[1] |= _mm_extract_epi16(f8_mask, 1) << 9;
4035    rsp->flag[1] |= _mm_extract_epi16(f8_mask, 2) << 10;
4036    rsp->flag[1] |= _mm_extract_epi16(f8_mask, 3) << 11;
4037    rsp->flag[1] |= _mm_extract_epi16(f8_mask, 4) << 12;
4038    rsp->flag[1] |= _mm_extract_epi16(f8_mask, 5) << 13;
4039    rsp->flag[1] |= _mm_extract_epi16(f8_mask, 6) << 14;
4040    rsp->flag[1] |= _mm_extract_epi16(f8_mask, 7) << 15;
37834041#else
37844042
3785   INT16 vres[8];
3786   UINT32 vce = 0;
3787   for (int i = 0; i < 8; i++)
3788   {
3789#if USE_SIMD
3790      INT16 s1, s2;
3791      SIMD_EXTRACT16(rsp->xv[VS1REG], s1, i);
3792      SIMD_EXTRACT16(rsp->xv[VS2REG], s2, VEC_EL_2(EL, i));
3793#else
3794      INT16 s1 = VREG_S(VS1REG, i);
3795      INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i));
3796#endif
4043    INT16 vres[8];
4044    UINT32 vce = 0;
4045    for (int i = 0; i < 8; i++)
4046    {
4047        INT16 s1, s2;
4048      SCALAR_GET_VS1(s1, i);
4049      SCALAR_GET_VS2(s2, i);
37974050
3798      if ((s1 ^ s2) < 0)
3799      {
3800         vce = (s1 + s2 == -1);
3801         SET_CARRY_FLAG(i);
3802         if (s2 < 0)
3803         {
3804            rsp->flag[1] |= (1 << (8+i));
3805         }
4051        if ((s1 ^ s2) < 0)
4052        {
4053            vce = (s1 + s2 == -1);
4054            SET_CARRY_FLAG(i);
4055            if (s2 < 0)
4056            {
4057            SET_CLIP2_FLAG(i);
4058            }
38064059
3807         if (s1 + s2 <= 0)
3808         {
3809            SET_COMPARE_FLAG(i);
3810            vres[i] = -((UINT16)s2);
3811         }
3812         else
3813         {
3814            vres[i] = s1;
3815         }
4060            if ((s1 + s2) <= 0)
4061            {
4062                SET_COMPARE_FLAG(i);
4063                vres[i] = -((UINT16)s2);
4064            }
4065            else
4066            {
4067                vres[i] = s1;
4068            }
38164069
3817         if (s1 + s2 != 0 && s1 != ~s2)
3818         {
3819            SET_ZERO_FLAG(i);
3820         }
3821      }//sign
3822      else
3823      {
3824         vce = 0;
3825         if (s2 < 0)
3826         {
3827            SET_COMPARE_FLAG(i);
3828         }
3829         if (s1 - s2 >= 0)
3830         {
3831            rsp->flag[1] |= (1 << (8+i));
3832            vres[i] = s2;
3833         }
3834         else
3835         {
3836            vres[i] = s1;
3837         }
4070            if ((s1 + s2) != 0 && s1 != ~s2)
4071            {
4072                SET_ZERO_FLAG(i);
4073            }
4074        }//sign
4075        else
4076        {
4077            vce = 0;
4078            if (s2 < 0)
4079            {
4080                SET_COMPARE_FLAG(i);
4081            }
4082            if ((s1 - s2) >= 0)
4083            {
4084            SET_CLIP2_FLAG(i);
4085                vres[i] = s2;
4086            }
4087            else
4088            {
4089                vres[i] = s1;
4090            }
38384091
3839         if ((s1 - s2) != 0 && s1 != ~s2)
3840         {
3841            SET_ZERO_FLAG(i);
3842         }
4092            if ((s1 - s2) != 0 && s1 != ~s2)
4093            {
4094                SET_ZERO_FLAG(i);
4095            }
4096        }
4097        if (vce)
4098        {
4099           SET_CLIP1_FLAG(i);
38434100      }
3844      rsp->flag[2] |= (vce << (i));
3845#if USE_SIMD
3846      SIMD_INSERT16(rsp->accum_l, vres[i], i);
3847#else
3848      ACCUM_L(i) = vres[i];
4101        SET_ACCUM_L(vres[i], i);
4102    }
4103    WRITEBACK_RESULT();
38494104#endif
3850   }
3851   WRITEBACK_RESULT();
3852#endif
38534105}
38544106
38554107INLINE void cfunc_rsp_vcr(void *param)
38564108{
3857   rsp_state *rsp = (rsp_state*)param;
3858   int op = rsp->impstate->arg0;
4109    rsp_state *rsp = (rsp_state*)param;
4110    int op = rsp->impstate->arg0;
38594111
3860   // 31       25  24     20      15      10      5        0
3861   // ------------------------------------------------------
3862   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100110 |
3863   // ------------------------------------------------------
3864   //
3865   // Vector clip reverse
4112    // 31       25  24     20      15      10      5        0
4113    // ------------------------------------------------------
4114    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100110 |
4115    // ------------------------------------------------------
4116    //
4117    // Vector clip reverse
38664118
3867   rsp->flag[0] = 0;
3868   rsp->flag[1] = 0;
3869   rsp->flag[2] = 0;
4119   CLEAR_CARRY_FLAGS();
4120   CLEAR_COMPARE_FLAGS();
4121   CLEAR_CLIP1_FLAGS();
4122   CLEAR_ZERO_FLAGS();
4123   CLEAR_CLIP2_FLAGS();
38704124
3871#if USE_SIMD
3872   // flag[1] bit [0- 7] set if (s1 ^ s2) < 0 && (s1 + s2) <= 0)
3873   // flag[1] bit [0- 7] set if (s1 ^ s2) >= 0 && (s2 < 0)
4125#if 0//USE_SIMD
4126    // flag[1] bit [0- 7] set if (s1 ^ s2) < 0 && (s1 + s2) <= 0)
4127    // flag[1] bit [0- 7] set if (s1 ^ s2) >= 0 && (s2 < 0)
38744128
3875   // flag[1] bit [8-15] set if (s1 ^ s2) < 0 && (s2 < 0)
3876   // flag[1] bit [8-15] set if (s1 ^ s2) >= 0 && (s1 - s2) >= 0
4129    // flag[1] bit [8-15] set if (s1 ^ s2) < 0 && (s2 < 0)
4130    // flag[1] bit [8-15] set if (s1 ^ s2) >= 0 && (s1 - s2) >= 0
38774131
3878   // accum set to ~s2 if (s1 ^ s2) < 0 && (s1 + s2) <= 0)
3879   // accum set to ~s2 if (s1 ^ s2) >= 0 && (s1 - s2) >= 0
4132    // accum set to ~s2 if (s1 ^ s2) < 0 && (s1 + s2) <= 0)
4133    // accum set to ~s2 if (s1 ^ s2) >= 0 && (s1 - s2) >= 0
38804134
3881   // accum set to s1 if (s1 ^ s2) < 0 && (s1 + s2) > 0)
3882   // accum set to s1 if (s1 ^ s2) >= 0 && (s1 - s2) < 0
3883   __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
3884   __m128i s1_xor_s2 = _mm_xor_si128(rsp->xv[VS1REG], shuf);
3885   __m128i s1_plus_s2 = _mm_add_epi16(rsp->xv[VS1REG], shuf);
3886   __m128i s1_sub_s2 = _mm_sub_epi16(rsp->xv[VS1REG], shuf);
3887   __m128i s2_neg = _mm_xor_si128(shuf, vec_neg1);
4135    // accum set to s1 if (s1 ^ s2) < 0 && (s1 + s2) > 0)
4136    // accum set to s1 if (s1 ^ s2) >= 0 && (s1 - s2) < 0
4137    __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
4138    __m128i s1_xor_s2 = _mm_xor_si128(rsp->xv[VS1REG], shuf);
4139    __m128i s1_plus_s2 = _mm_add_epi16(rsp->xv[VS1REG], shuf);
4140    __m128i s1_sub_s2 = _mm_sub_epi16(rsp->xv[VS1REG], shuf);
4141    __m128i s2_neg = _mm_xor_si128(shuf, vec_neg1);
38884142
3889   __m128i s2_lz = _mm_cmplt_epi16(shuf, vec_zero);
3890   __m128i s1s2_xor_lz = _mm_cmplt_epi16(s1_xor_s2, vec_zero);
3891   __m128i s1s2_xor_gez = _mm_xor_si128(s1s2_xor_lz, vec_neg1);
3892   __m128i s1s2_plus_gz = _mm_cmpgt_epi16(s1_plus_s2, vec_zero);
3893   __m128i s1s2_plus_lez = _mm_xor_si128(s1s2_plus_gz, vec_neg1);
3894   __m128i s1s2_sub_lz = _mm_cmplt_epi16(s1_sub_s2, vec_zero);
3895   __m128i s1s2_sub_gez = _mm_xor_si128(s1s2_sub_lz, vec_neg1);
4143    __m128i s2_lz = _mm_cmplt_epi16(shuf, _mm_setzero_si128());
4144    __m128i s1s2_xor_lz = _mm_cmplt_epi16(s1_xor_s2, _mm_setzero_si128());
4145    __m128i s1s2_xor_gez = _mm_xor_si128(s1s2_xor_lz, vec_neg1);
4146    __m128i s1s2_plus_gz = _mm_cmpgt_epi16(s1_plus_s2, _mm_setzero_si128());
4147    __m128i s1s2_plus_lez = _mm_xor_si128(s1s2_plus_gz, vec_neg1);
4148    __m128i s1s2_sub_lz = _mm_cmplt_epi16(s1_sub_s2, _mm_setzero_si128());
4149    __m128i s1s2_sub_gez = _mm_xor_si128(s1s2_sub_lz, vec_neg1);
38964150
3897   __m128i s1_mask = _mm_or_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_lz),   _mm_and_si128(s1s2_xor_lz, s1s2_plus_gz));
3898   __m128i s2_mask = _mm_or_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_gez),  _mm_and_si128(s1s2_xor_lz, s1s2_plus_lez));
3899   rsp->accum_l = _mm_or_si128(_mm_and_si128(rsp->xv[VS1REG], s1_mask), _mm_and_si128(s2_neg, s2_mask));
3900   rsp->xv[VDREG] = rsp->accum_l;
4151    __m128i s1_mask = _mm_or_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_lz),   _mm_and_si128(s1s2_xor_lz, s1s2_plus_gz));
4152    __m128i s2_mask = _mm_or_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_gez),  _mm_and_si128(s1s2_xor_lz, s1s2_plus_lez));
4153    rsp->accum_l = _mm_or_si128(_mm_and_si128(rsp->xv[VS1REG], s1_mask), _mm_and_si128(s2_neg, s2_mask));
4154    rsp->xv[VDREG] = rsp->accum_l;
39014155
3902   __m128i f0_mask = _mm_or_si128(_mm_and_si128(s1s2_xor_gez, s2_lz),         _mm_and_si128(s1s2_xor_lz, s1s2_plus_lez));
3903   __m128i f8_mask = _mm_or_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_gez),  _mm_and_si128(s1s2_xor_lz, s2_lz));
3904   f0_mask = _mm_and_si128(f0_mask, vec_flagmask);
3905   f8_mask = _mm_and_si128(f8_mask, vec_flagmask);
3906   rsp->flag[1] |= _mm_extract_epi16(f0_mask, 0) << 0;
3907   rsp->flag[1] |= _mm_extract_epi16(f0_mask, 1) << 1;
3908   rsp->flag[1] |= _mm_extract_epi16(f0_mask, 2) << 2;
3909   rsp->flag[1] |= _mm_extract_epi16(f0_mask, 3) << 3;
3910   rsp->flag[1] |= _mm_extract_epi16(f0_mask, 4) << 4;
3911   rsp->flag[1] |= _mm_extract_epi16(f0_mask, 5) << 5;
3912   rsp->flag[1] |= _mm_extract_epi16(f0_mask, 6) << 6;
3913   rsp->flag[1] |= _mm_extract_epi16(f0_mask, 7) << 7;
3914
3915   rsp->flag[1] |= _mm_extract_epi16(f8_mask, 0) << 8;
3916   rsp->flag[1] |= _mm_extract_epi16(f8_mask, 1) << 9;
3917   rsp->flag[1] |= _mm_extract_epi16(f8_mask, 2) << 10;
3918   rsp->flag[1] |= _mm_extract_epi16(f8_mask, 3) << 11;
3919   rsp->flag[1] |= _mm_extract_epi16(f8_mask, 4) << 12;
3920   rsp->flag[1] |= _mm_extract_epi16(f8_mask, 5) << 13;
3921   rsp->flag[1] |= _mm_extract_epi16(f8_mask, 6) << 14;
3922   rsp->flag[1] |= _mm_extract_epi16(f8_mask, 7) << 15;
4156    rsp->xvflag[COMPARE] = _mm_or_si128(_mm_and_si128(s1s2_xor_gez, s2_lz),         _mm_and_si128(s1s2_xor_lz, s1s2_plus_lez));
4157    rsp->xvflag[CLIP2] = _mm_or_si128(_mm_and_si128(s1s2_xor_gez, s1s2_sub_gez),  _mm_and_si128(s1s2_xor_lz, s2_lz));
39234158#else
3924   INT16 vres[8];
3925   for (int i = 0; i < 8; i++)
3926   {
3927      INT16 s1 = VREG_S(VS1REG, i);
3928      INT16 s2 = VREG_S(VS2REG, VEC_EL_2(EL, i));
4159    INT16 vres[8];
4160    for (int i = 0; i < 8; i++)
4161    {
4162        INT16 s1, s2;
4163      SCALAR_GET_VS1(s1, i);
4164      SCALAR_GET_VS2(s2, i);
39294165
3930      if ((INT16)(s1 ^ s2) < 0)
3931      {
3932         if (s2 < 0)
3933         {
3934            rsp->flag[1] |= (1 << (8+i));
3935         }
3936         if ((s1 + s2) <= 0)
3937         {
3938            ACCUM_L(i) = ~((UINT16)s2);
3939            SET_COMPARE_FLAG(i);
3940         }
3941         else
3942         {
3943            ACCUM_L(i) = s1;
3944         }
3945      }
3946      else
3947      {
3948         if (s2 < 0)
3949         {
3950            SET_COMPARE_FLAG(i);
3951         }
3952         if ((s1 - s2) >= 0)
3953         {
3954            ACCUM_L(i) = s2;
3955            rsp->flag[1] |= (1 << (8+i));
3956         }
3957         else
3958         {
3959            ACCUM_L(i) = s1;
3960         }
3961      }
4166        if ((INT16)(s1 ^ s2) < 0)
4167        {
4168            if (s2 < 0)
4169            {
4170            SET_CLIP2_FLAG(i);
4171            }
4172            if ((s1 + s2) <= 0)
4173            {
4174                SET_ACCUM_L(~((UINT16)s2), i);
4175                SET_COMPARE_FLAG(i);
4176            }
4177            else
4178            {
4179                SET_ACCUM_L(s1, i);
4180            }
4181        }
4182        else
4183        {
4184            if (s2 < 0)
4185            {
4186                SET_COMPARE_FLAG(i);
4187            }
4188            if ((s1 - s2) >= 0)
4189            {
4190                SET_ACCUM_L(s2, i);
4191            SET_CLIP2_FLAG(i);
4192            }
4193            else
4194            {
4195                SET_ACCUM_L(s1, i);
4196            }
4197        }
39624198
3963      vres[i] = ACCUM_L(i);
3964   }
3965   WRITEBACK_RESULT();
4199        vres[i] = ACCUM_L(rsp, i);
4200    }
4201    WRITEBACK_RESULT();
39664202#endif
39674203}
39684204
39694205INLINE void cfunc_rsp_vmrg(void *param)
39704206{
3971   rsp_state *rsp = (rsp_state*)param;
3972   int op = rsp->impstate->arg0;
3973   // 31       25  24     20      15      10      5        0
3974   // ------------------------------------------------------
3975   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100111 |
3976   // ------------------------------------------------------
3977   //
3978   // Merges two vectors according to compare flags
4207    rsp_state *rsp = (rsp_state*)param;
4208    int op = rsp->impstate->arg0;
4209    // 31       25  24     20      15      10      5        0
4210    // ------------------------------------------------------
4211    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 100111 |
4212    // ------------------------------------------------------
4213    //
4214    // Merges two vectors according to compare flags
39794215
39804216#if USE_SIMD
3981   __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
3982   __m128i compare = _mm_set_epi16(COMPARE_FLAG(7), COMPARE_FLAG(6), COMPARE_FLAG(5), COMPARE_FLAG(4),
3983                           COMPARE_FLAG(3), COMPARE_FLAG(2), COMPARE_FLAG(1), COMPARE_FLAG(0));
3984   __m128i s2mask = _mm_cmpeq_epi16(compare, vec_zero);
3985   __m128i s1mask = _mm_xor_si128(s2mask, vec_neg1);
3986   __m128i result = _mm_and_si128(rsp->xv[VS1REG], s1mask);
3987   rsp->xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, s2mask));
3988   rsp->accum_l = rsp->xv[VDREG];
4217    __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
4218    __m128i s2mask = _mm_cmpeq_epi16(rsp->xvflag[COMPARE], _mm_setzero_si128());
4219    __m128i s1mask = _mm_xor_si128(s2mask, vec_neg1);
4220    __m128i result = _mm_and_si128(rsp->xv[VS1REG], s1mask);
4221    rsp->xv[VDREG] = _mm_or_si128(result, _mm_and_si128(shuf, s2mask));
4222    rsp->accum_l = rsp->xv[VDREG];
39894223#else
3990   INT16 vres[8];
3991   for (int i = 0; i < 8; i++)
3992   {
3993      INT16 s1 = (INT16)VREG_S(VS1REG, i);
3994      INT16 s2 = (INT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
3995      if (COMPARE_FLAG(i) != 0)
3996      {
3997         vres[i] = s1;
3998      }
3999      else
4000      {
4001         vres[i] = s2;
4002      }
4224    INT16 vres[8];
4225    for (int i = 0; i < 8; i++)
4226    {
4227        INT16 s1, s2;
4228      SCALAR_GET_VS1(s1, i);
4229      SCALAR_GET_VS2(s2, i);
4230        if (COMPARE_FLAG(rsp, i) != 0)
4231        {
4232            vres[i] = s1;
4233        }
4234        else
4235        {
4236            vres[i] = s2;
4237        }
40034238
4004      ACCUM_L(i) = vres[i];
4005   }
4006   WRITEBACK_RESULT();
4239        SET_ACCUM_L(vres[i], i);
4240    }
4241    WRITEBACK_RESULT();
40074242#endif
40084243}
40094244
40104245INLINE void cfunc_rsp_vand(void *param)
40114246{
4012   rsp_state *rsp = (rsp_state*)param;
4013   int op = rsp->impstate->arg0;
4247    rsp_state *rsp = (rsp_state*)param;
4248    int op = rsp->impstate->arg0;
40144249
4015   // 31       25  24     20      15      10      5        0
4016   // ------------------------------------------------------
4017   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101000 |
4018   // ------------------------------------------------------
4019   //
4020   // Bitwise AND of two vector registers
4250    // 31       25  24     20      15      10      5        0
4251    // ------------------------------------------------------
4252    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101000 |
4253    // ------------------------------------------------------
4254    //
4255    // Bitwise AND of two vector registers
40214256
40224257#if USE_SIMD
4023   __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
4024   rsp->xv[VDREG] = _mm_and_si128(rsp->xv[VS1REG], shuf);
4025   rsp->accum_l = rsp->xv[VDREG];
4258    __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
4259    rsp->xv[VDREG] = _mm_and_si128(rsp->xv[VS1REG], shuf);
4260    rsp->accum_l = rsp->xv[VDREG];
40264261#else
4027   INT16 vres[8];
4028   for (int i = 0; i < 8; i++)
4029   {
4030      UINT16 s1 = (UINT16)VREG_S(VS1REG, i);
4031      UINT16 s2 = (UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
4032      vres[i] = s1 & s2;
4033      ACCUM_L(i) = vres[i];
4034   }
4035   WRITEBACK_RESULT();
4262    INT16 vres[8];
4263    for (int i = 0; i < 8; i++)
4264    {
4265        UINT16 s1, s2;
4266      SCALAR_GET_VS1(s1, i);
4267      SCALAR_GET_VS2(s2, i);
4268        vres[i] = s1 & s2;
4269        SET_ACCUM_L(vres[i], i);
4270    }
4271    WRITEBACK_RESULT();
40364272#endif
40374273}
40384274
40394275INLINE void cfunc_rsp_vnand(void *param)
40404276{
4041   rsp_state *rsp = (rsp_state*)param;
4042   int op = rsp->impstate->arg0;
4277    rsp_state *rsp = (rsp_state*)param;
4278    int op = rsp->impstate->arg0;
40434279
4044   // 31       25  24     20      15      10      5        0
4045   // ------------------------------------------------------
4046   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101001 |
4047   // ------------------------------------------------------
4048   //
4049   // Bitwise NOT AND of two vector registers
4280    // 31       25  24     20      15      10      5        0
4281    // ------------------------------------------------------
4282    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101001 |
4283    // ------------------------------------------------------
4284    //
4285    // Bitwise NOT AND of two vector registers
40504286
40514287#if USE_SIMD
4052   __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
4053   rsp->xv[VDREG] = _mm_xor_si128(_mm_and_si128(rsp->xv[VS1REG], shuf), vec_neg1);
4054   rsp->accum_l = rsp->xv[VDREG];
4288    __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
4289    rsp->xv[VDREG] = _mm_xor_si128(_mm_and_si128(rsp->xv[VS1REG], shuf), vec_neg1);
4290    rsp->accum_l = rsp->xv[VDREG];
40554291#else
4056   INT16 vres[8];
4057   for (int i = 0; i < 8; i++)
4058   {
4059      UINT16 s1 = (UINT16)VREG_S(VS1REG, i);
4060      UINT16 s2 = (UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
4061      vres[i] = ~((s1 & s2));
4062      ACCUM_L(i) = vres[i];
4063   }
4064   WRITEBACK_RESULT();
4292    INT16 vres[8];
4293    for (int i = 0; i < 8; i++)
4294    {
4295        UINT16 s1, s2;
4296      SCALAR_GET_VS1(s1, i);
4297      SCALAR_GET_VS2(s2, i);
4298        vres[i] = ~((s1 & s2));
4299        SET_ACCUM_L(vres[i], i);
4300    }
4301    WRITEBACK_RESULT();
40654302#endif
40664303}
40674304
40684305INLINE void cfunc_rsp_vor(void *param)
40694306{
4070   rsp_state *rsp = (rsp_state*)param;
4071   int op = rsp->impstate->arg0;
4307    rsp_state *rsp = (rsp_state*)param;
4308    int op = rsp->impstate->arg0;
40724309
4073   // 31       25  24     20      15      10      5        0
4074   // ------------------------------------------------------
4075   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101010 |
4076   // ------------------------------------------------------
4077   //
4078   // Bitwise OR of two vector registers
4310    // 31       25  24     20      15      10      5        0
4311    // ------------------------------------------------------
4312    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101010 |
4313    // ------------------------------------------------------
4314    //
4315    // Bitwise OR of two vector registers
40794316
40804317#if USE_SIMD
4081   __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
4082   rsp->xv[VDREG] = _mm_or_si128(rsp->xv[VS1REG], shuf);
4083   rsp->accum_l = rsp->xv[VDREG];
4318    __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
4319    rsp->xv[VDREG] = _mm_or_si128(rsp->xv[VS1REG], shuf);
4320    rsp->accum_l = rsp->xv[VDREG];
40844321#else
4085   INT16 vres[8];
4086   for (int i = 0; i < 8; i++)
4087   {
4088      UINT16 s1 = (UINT16)VREG_S(VS1REG, i);
4089      UINT16 s2 = (UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
4090      vres[i] = s1 | s2;
4091      ACCUM_L(i) = vres[i];
4092   }
4093   WRITEBACK_RESULT();
4322    INT16 vres[8];
4323    for (int i = 0; i < 8; i++)
4324    {
4325        UINT16 s1, s2;
4326      SCALAR_GET_VS1(s1, i);
4327      SCALAR_GET_VS2(s2, i);
4328        vres[i] = s1 | s2;
4329        SET_ACCUM_L(vres[i], i);
4330    }
4331    WRITEBACK_RESULT();
40944332#endif
40954333}
40964334
40974335INLINE void cfunc_rsp_vnor(void *param)
40984336{
4099   rsp_state *rsp = (rsp_state*)param;
4100   int op = rsp->impstate->arg0;
4337    rsp_state *rsp = (rsp_state*)param;
4338    int op = rsp->impstate->arg0;
41014339
4102   // 31       25  24     20      15      10      5        0
4103   // ------------------------------------------------------
4104   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101011 |
4105   // ------------------------------------------------------
4106   //
4107   // Bitwise NOT OR of two vector registers
4340    // 31       25  24     20      15      10      5        0
4341    // ------------------------------------------------------
4342    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101011 |
4343    // ------------------------------------------------------
4344    //
4345    // Bitwise NOT OR of two vector registers
41084346
41094347#if USE_SIMD
4110   __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
4111   rsp->xv[VDREG] = _mm_xor_si128(_mm_or_si128(rsp->xv[VS1REG], shuf), vec_neg1);
4112   rsp->accum_l = rsp->xv[VDREG];
4348    __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
4349    rsp->xv[VDREG] = _mm_xor_si128(_mm_or_si128(rsp->xv[VS1REG], shuf), vec_neg1);
4350    rsp->accum_l = rsp->xv[VDREG];
41134351#else
4114   INT16 vres[8];
4115   for (int i = 0; i < 8; i++)
4116   {
4117      UINT16 s1 = (UINT16)VREG_S(VS1REG, i);
4118      UINT16 s2 = (UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
4119      vres[i] = ~((s1 | s2));
4120      ACCUM_L(i) = vres[i];
4121   }
4122   WRITEBACK_RESULT();
4352    INT16 vres[8];
4353    for (int i = 0; i < 8; i++)
4354    {
4355        UINT16 s1, s2;
4356      SCALAR_GET_VS1(s1, i);
4357      SCALAR_GET_VS2(s2, i);
4358        vres[i] = ~(s1 | s2);
4359        SET_ACCUM_L(vres[i], i);
4360    }
4361    WRITEBACK_RESULT();
41234362#endif
41244363}
41254364
41264365INLINE void cfunc_rsp_vxor(void *param)
41274366{
4128   rsp_state *rsp = (rsp_state*)param;
4129   int op = rsp->impstate->arg0;
4367    rsp_state *rsp = (rsp_state*)param;
4368    int op = rsp->impstate->arg0;
41304369
4131   // 31       25  24     20      15      10      5        0
4132   // ------------------------------------------------------
4133   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101100 |
4134   // ------------------------------------------------------
4135   //
4136   // Bitwise XOR of two vector registers
4370    // 31       25  24     20      15      10      5        0
4371    // ------------------------------------------------------
4372    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101100 |
4373    // ------------------------------------------------------
4374    //
4375    // Bitwise XOR of two vector registers
41374376
41384377#if USE_SIMD
4139   __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
4140   rsp->xv[VDREG] = _mm_xor_si128(rsp->xv[VS1REG], shuf);
4141   rsp->accum_l = rsp->xv[VDREG];
4378    __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
4379    rsp->xv[VDREG] = _mm_xor_si128(rsp->xv[VS1REG], shuf);
4380    rsp->accum_l = rsp->xv[VDREG];
41424381#else
4143   INT16 vres[8];
4144   for (int i = 0; i < 8; i++)
4145   {
4146      UINT16 s1 = (UINT16)VREG_S(VS1REG, i);
4147      UINT16 s2 = (UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
4148      vres[i] = s1 ^ s2;
4149      ACCUM_L(i) = vres[i];
4150   }
4151   WRITEBACK_RESULT();
4382    INT16 vres[8];
4383    for (int i = 0; i < 8; i++)
4384    {
4385        UINT16 s1, s2;
4386      SCALAR_GET_VS1(s1, i);
4387      SCALAR_GET_VS2(s2, i);
4388        vres[i] = s1 ^ s2;
4389        SET_ACCUM_L(vres[i], i);
4390    }
4391    WRITEBACK_RESULT();
41524392#endif
41534393}
41544394
41554395INLINE void cfunc_rsp_vnxor(void *param)
41564396{
4157   rsp_state *rsp = (rsp_state*)param;
4158   int op = rsp->impstate->arg0;
4397    rsp_state *rsp = (rsp_state*)param;
4398    int op = rsp->impstate->arg0;
41594399
4160   // 31       25  24     20      15      10      5        0
4161   // ------------------------------------------------------
4162   // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101101 |
4163   // ------------------------------------------------------
4164   //
4165   // Bitwise NOT XOR of two vector registers
4400    // 31       25  24     20      15      10      5        0
4401    // ------------------------------------------------------
4402    // | 010010 | 1 | EEEE | SSSSS | TTTTT | DDDDD | 101101 |
4403    // ------------------------------------------------------
4404    //
4405    // Bitwise NOT XOR of two vector registers
41664406
41674407#if USE_SIMD
4168   __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
4169   rsp->xv[VDREG] = _mm_xor_si128(_mm_xor_si128(rsp->xv[VS1REG], shuf), vec_neg1);
4170   rsp->accum_l = rsp->xv[VDREG];
4408    __m128i shuf = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
4409    rsp->xv[VDREG] = _mm_xor_si128(_mm_xor_si128(rsp->xv[VS1REG], shuf), vec_neg1);
4410    rsp->accum_l = rsp->xv[VDREG];
41714411#else
4172   INT16 vres[8];
4173   for (int i = 0; i < 8; i++)
4174   {
4175      UINT16 s1 = (UINT16)VREG_S(VS1REG, i);
4176      UINT16 s2 = (UINT16)VREG_S(VS2REG, VEC_EL_2(EL, i));
4177      vres[i] = ~((s1 ^ s2));
4178      ACCUM_L(i) = vres[i];
4179   }
4180   WRITEBACK_RESULT();
4412    INT16 vres[8];
4413    for (int i = 0; i < 8; i++)
4414    {
4415        UINT16 s1, s2;
4416      SCALAR_GET_VS1(s1, i);
4417      SCALAR_GET_VS2(s2, i);
4418        vres[i] = ~(s1 ^ s2);
4419        SET_ACCUM_L(vres[i], i);
4420    }
4421    WRITEBACK_RESULT();
41814422#endif
41824423}
41834424
41844425INLINE void cfunc_rsp_vrcp(void *param)
41854426{
4186   rsp_state *rsp = (rsp_state*)param;
4187   int op = rsp->impstate->arg0;
4427    rsp_state *rsp = (rsp_state*)param;
4428    int op = rsp->impstate->arg0;
41884429
4189   // 31       25  24     20      15      10      5        0
4190   // ------------------------------------------------------
4191   // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110000 |
4192   // ------------------------------------------------------
4193   //
4194   // Calculates reciprocal
4430    // 31       25  24     20      15      10      5        0
4431    // ------------------------------------------------------
4432    // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110000 |
4433    // ------------------------------------------------------
4434    //
4435    // Calculates reciprocal
41954436
4196   INT32 shifter = 0;
4437    INT32 shifter = 0;
41974438#if USE_SIMD
4198   UINT16 urec;
4199   INT32 rec;
4200   SIMD_EXTRACT16(rsp->xv[VS2REG], urec, EL);
4201   rec = (INT16)urec;
4439    UINT16 urec;
4440    INT32 rec;
4441    SIMD_EXTRACT16(rsp->xv[VS2REG], urec, EL);
4442    rec = (INT16)urec;
42024443#else
4203   INT32 rec = (INT16)(VREG_S(VS2REG, EL & 7));
4444    INT32 rec = (INT16)(VREG_S(VS2REG, EL & 7));
42044445#endif
4205   INT32 datainput = (rec < 0) ? (-rec) : rec;
4206   if (datainput)
4207   {
4208      for (int i = 0; i < 32; i++)
4209      {
4210         if (datainput & (1 << ((~i) & 0x1f)))
4211         {
4212            shifter = i;
4213            break;
4214         }
4215      }
4216   }
4217   else
4218   {
4219      shifter = 0x10;
4220   }
4446    INT32 datainput = (rec < 0) ? (-rec) : rec;
4447    if (datainput)
4448    {
4449        for (int i = 0; i < 32; i++)
4450        {
4451            if (datainput & (1 << ((~i) & 0x1f)))
4452            {
4453                shifter = i;
4454                break;
4455            }
4456        }
4457    }
4458    else
4459    {
4460        shifter = 0x10;
4461    }
42214462
4222   INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22;
4223   INT32 fetchval = rsp_divtable[address];
4224   INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f);
4225   if (rec < 0)
4226   {
4227      temp = ~temp;
4228   }
4229   if (!rec)
4230   {
4231      temp = 0x7fffffff;
4232   }
4233   else if (rec == 0xffff8000)
4234   {
4235      temp = 0xffff0000;
4236   }
4237   rec = temp;
4463    INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22;
4464    INT32 fetchval = rsp_divtable[address];
4465    INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f);
4466    if (rec < 0)
4467    {
4468        temp = ~temp;
4469    }
4470    if (!rec)
4471    {
4472        temp = 0x7fffffff;
4473    }
4474    else if (rec == 0xffff8000)
4475    {
4476        temp = 0xffff0000;
4477    }
4478    rec = temp;
42384479
4239   rsp->reciprocal_res = rec;
4240   rsp->dp_allowed = 0;
4480    rsp->reciprocal_res = rec;
4481    rsp->dp_allowed = 0;
42414482
42424483#if USE_SIMD
4243   SIMD_INSERT16(rsp->xv[VDREG], (UINT16)rec, VS1REG);
4484    SIMD_INSERT16(rsp->xv[VDREG], (UINT16)rec, VS1REG);
4485    rsp->accum_l = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
42444486#else
4245   W_VREG_S(VDREG, VS1REG & 7) = (UINT16)rec;
4487    W_VREG_S(VDREG, VS1REG & 7) = (UINT16)rec;
4488    for (int i = 0; i < 8; i++)
4489    {
4490        SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i);
4491    }
42464492#endif
4247
4248   for (int i = 0; i < 8; i++)
4249   {
4250#if USE_SIMD
4251      INT16 val;
4252      SIMD_EXTRACT16(rsp->xv[VS2REG], val, VEC_EL_2(EL, i));
4253      SIMD_INSERT16(rsp->accum_l, val, i);
4254#else
4255      ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i));
4256#endif
4257   }
42584493}
42594494
42604495INLINE void cfunc_rsp_vrcpl(void *param)
42614496{
4262   rsp_state *rsp = (rsp_state*)param;
4263   int op = rsp->impstate->arg0;
4497    rsp_state *rsp = (rsp_state*)param;
4498    int op = rsp->impstate->arg0;
42644499
4265   // 31       25  24     20      15      10      5        0
4266   // ------------------------------------------------------
4267   // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110001 |
4268   // ------------------------------------------------------
4269   //
4270   // Calculates reciprocal low part
4500    // 31       25  24     20      15      10      5        0
4501    // ------------------------------------------------------
4502    // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110001 |
4503    // ------------------------------------------------------
4504    //
4505    // Calculates reciprocal low part
42714506
4272   INT32 shifter = 0;
4507    INT32 shifter = 0;
42734508
42744509#if USE_SIMD
4275   UINT16 urec;
4276   INT32 rec;
4277   SIMD_EXTRACT16(rsp->xv[VS2REG], urec, EL);
4278   rec = (INT32)(rsp->reciprocal_high | urec);
4510    UINT16 urec;
4511    SIMD_EXTRACT16(rsp->xv[VS2REG], urec, EL);
4512    INT32 rec = (urec | rsp->reciprocal_high);
42794513#else
4280   INT32 rec = ((UINT16)(VREG_S(VS2REG, EL & 7)) | rsp->reciprocal_high);
4514    INT32 rec = ((UINT16)(VREG_S(VS2REG, EL & 7)) | rsp->reciprocal_high);
42814515#endif
42824516
4283   INT32 datainput = rec;
4517    INT32 datainput = rec;
42844518
4285   if (rec < 0)
4286   {
4287      if (rsp->dp_allowed)
4288      {
4289         if (rec < -32768)
4290         {
4291            datainput = ~datainput;
4292         }
4293         else
4294         {
4295            datainput = -datainput;
4296         }
4297      }
4298      else
4299      {
4300         datainput = -datainput;
4301      }
4302   }
4519    if (rec < 0)
4520    {
4521        if (rsp->dp_allowed)
4522        {
4523            if (rec < -32768)
4524            {
4525                datainput = ~datainput;
4526            }
4527            else
4528            {
4529                datainput = -datainput;
4530            }
4531        }
4532        else
4533        {
4534            datainput = -datainput;
4535        }
4536    }
43034537
43044538
4305   if (datainput)
4306   {
4307      for (int i = 0; i < 32; i++)
4308      {
4309         if (datainput & (1 << ((~i) & 0x1f)))
4310         {
4311            shifter = i;
4312            break;
4313         }
4314      }
4315   }
4316   else
4317   {
4318      if (rsp->dp_allowed)
4319      {
4320         shifter = 0;
4321      }
4322      else
4323      {
4324         shifter = 0x10;
4325      }
4326   }
4539    if (datainput)
4540    {
4541        for (int i = 0; i < 32; i++)
4542        {
4543            if (datainput & (1 << ((~i) & 0x1f)))
4544            {
4545                shifter = i;
4546                break;
4547            }
4548        }
4549    }
4550    else
4551    {
4552        if (rsp->dp_allowed)
4553        {
4554            shifter = 0;
4555        }
4556        else
4557        {
4558            shifter = 0x10;
4559        }
4560    }
43274561
4328   INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22;
4329   INT32 fetchval = rsp_divtable[address];
4330   INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f);
4331   if (rec < 0)
4332   {
4333      temp = ~temp;
4334   }
4335   if (!rec)
4336   {
4337      temp = 0x7fffffff;
4338   }
4339   else if (rec == 0xffff8000)
4340   {
4341      temp = 0xffff0000;
4342   }
4343   rec = temp;
4562    INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22;
4563    INT32 fetchval = rsp_divtable[address];
4564    INT32 temp = (0x40000000 | (fetchval << 14)) >> ((~shifter) & 0x1f);
4565    if (rec < 0)
4566    {
4567        temp = ~temp;
4568    }
4569    if (!rec)
4570    {
4571        temp = 0x7fffffff;
4572    }
4573    else if (rec == 0xffff8000)
4574    {
4575        temp = 0xffff0000;
4576    }
4577    rec = temp;
43444578
4345   rsp->reciprocal_res = rec;
4346   rsp->dp_allowed = 0;
4579    rsp->reciprocal_res = rec;
4580    rsp->dp_allowed = 0;
43474581
43484582#if USE_SIMD
4349   SIMD_INSERT16(rsp->xv[VDREG], (UINT16)rec, VS1REG);
4583    SIMD_INSERT16(rsp->xv[VDREG], (UINT16)rec, VS1REG);
43504584#else
4351   W_VREG_S(VDREG, VS1REG & 7) = (UINT16)rec;
4585    W_VREG_S(VDREG, VS1REG & 7) = (UINT16)rec;
43524586#endif
43534587
4354   for (int i = 0; i < 8; i++)
4355   {
4588    for (int i = 0; i < 8; i++)
4589    {
43564590#if USE_SIMD
4357      INT16 val;
4358      SIMD_EXTRACT16(rsp->xv[VS2REG], val, VEC_EL_2(EL, i));
4359      SIMD_INSERT16(rsp->accum_l, val, i);
4591        INT16 val;
4592        SIMD_EXTRACT16(rsp->xv[VS2REG], val, VEC_EL_2(EL, i));
43604593#else
4361      ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i));
4594      INT16 val = VREG_S(VS2REG, VEC_EL_2(EL, i));
43624595#endif
4363   }
4596        SET_ACCUM_L(val, i);
4597    }
43644598}
43654599
43664600INLINE void cfunc_rsp_vrcph(void *param)
43674601{
4368   rsp_state *rsp = (rsp_state*)param;
4369   int op = rsp->impstate->arg0;
4370   // 31       25  24     20      15      10      5        0
4371   // ------------------------------------------------------
4372   // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110010 |
4373   // ------------------------------------------------------
4374   //
4375   // Calculates reciprocal high part
4602    rsp_state *rsp = (rsp_state*)param;
4603    int op = rsp->impstate->arg0;
4604    // 31       25  24     20      15      10      5        0
4605    // ------------------------------------------------------
4606    // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110010 |
4607    // ------------------------------------------------------
4608    //
4609    // Calculates reciprocal high part
43764610
43774611#if USE_SIMD
4378   UINT16 rcph;
4379   SIMD_EXTRACT16(rsp->xv[VS2REG], rcph, EL);
4380   rsp->reciprocal_high = rcph << 16;
4381   rsp->dp_allowed = 1;
4612    UINT16 rcph;
4613    SIMD_EXTRACT16(rsp->xv[VS2REG], rcph, EL);
4614    rsp->reciprocal_high = rcph << 16;
4615    rsp->dp_allowed = 1;
43824616
4383   //rsp->accum_l = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
4384   INT16 val;
4385   for (int i = 0; i < 8; i++)
4386   {
4387      SIMD_EXTRACT16(rsp->xv[VS2REG], val, VEC_EL_2(EL, i));
4388      SIMD_INSERT16(rsp->accum_l, val, i);
4389   }
4617    rsp->accum_l = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
43904618
4391   SIMD_INSERT16(rsp->xv[VDREG], (INT16)(rsp->reciprocal_res >> 16), VS1REG);
4619    SIMD_INSERT16(rsp->xv[VDREG], (INT16)(rsp->reciprocal_res >> 16), VS1REG);
43924620#else
4393   rsp->reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16;
4394   rsp->dp_allowed = 1;
4621    rsp->reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16;
4622    rsp->dp_allowed = 1;
43954623
4396   for (int i = 0; i < 8; i++)
4397   {
4398      ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i));
4399   }
4624    for (int i = 0; i < 8; i++)
4625    {
4626        SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i);
4627    }
44004628
4401   W_VREG_S(VDREG, VS1REG & 7) = (INT16)(rsp->reciprocal_res >> 16);
4629    W_VREG_S(VDREG, VS1REG & 7) = (INT16)(rsp->reciprocal_res >> 16);
44024630#endif
44034631}
44044632
44054633INLINE void cfunc_rsp_vmov(void *param)
44064634{
4407   rsp_state *rsp = (rsp_state*)param;
4408   int op = rsp->impstate->arg0;
4409   // 31       25  24     20      15      10      5        0
4410   // ------------------------------------------------------
4411   // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110011 |
4412   // ------------------------------------------------------
4413   //
4414   // Moves element from vector to destination vector
4635    rsp_state *rsp = (rsp_state*)param;
4636    int op = rsp->impstate->arg0;
44154637
4638    // 31       25  24     20      15      10      5        0
4639    // ------------------------------------------------------
4640    // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110011 |
4641    // ------------------------------------------------------
4642    //
4643    // Moves element from vector to destination vector
4644
44164645#if USE_SIMD
4417   INT16 val;
4418   SIMD_EXTRACT16(rsp->xv[VS2REG], val, EL);
4419   SIMD_INSERT16(rsp->xv[VDREG], val, VS1REG);
4420   //rsp->accum_l = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
4421   for (int i = 0; i < 8; i++)
4422   {
4423      SIMD_EXTRACT16(rsp->xv[VS2REG], val, VEC_EL_2(EL, i));
4424      SIMD_INSERT16(rsp->accum_l, val, i);
4425   }
4646    INT16 val;
4647    SIMD_EXTRACT16(rsp->xv[VS2REG], val, EL);
4648    SIMD_INSERT16(rsp->xv[VDREG], val, VS1REG);
4649    rsp->accum_l = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
44264650#else
4427   W_VREG_S(VDREG, VS1REG & 7) = VREG_S(VS2REG, EL & 7);
4428   for (int i = 0; i < 8; i++)
4429   {
4430      ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i));
4431   }
4651    W_VREG_S(VDREG, VS1REG & 7) = VREG_S(VS2REG, EL & 7);
4652    for (int i = 0; i < 8; i++)
4653    {
4654        SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i);
4655    }
44324656#endif
44334657}
44344658
44354659INLINE void cfunc_rsp_vrsql(void *param)
44364660{
4437   rsp_state *rsp = (rsp_state*)param;
4438   int op = rsp->impstate->arg0;
4661    rsp_state *rsp = (rsp_state*)param;
4662    int op = rsp->impstate->arg0;
44394663
4440   // 31       25  24     20      15      10      5        0
4441   // ------------------------------------------------------
4442   // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110101 |
4443   // ------------------------------------------------------
4444   //
4445   // Calculates reciprocal square-root low part
4664    // 31       25  24     20      15      10      5        0
4665    // ------------------------------------------------------
4666    // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110101 |
4667    // ------------------------------------------------------
4668    //
4669    // Calculates reciprocal square-root low part
44464670
4447   INT32 shifter = 0;
4671    INT32 shifter = 0;
44484672#if USE_SIMD
4449   UINT16 val;
4450   SIMD_EXTRACT16(rsp->xv[VS2REG], val, EL);
4451   INT32 rec = (INT32)(rsp->reciprocal_high | val);
4673    UINT16 val;
4674    SIMD_EXTRACT16(rsp->xv[VS2REG], val, EL);
4675    INT32 rec = rsp->reciprocal_high | val;
44524676#else
4453   INT32 rec = rsp->reciprocal_high | (UINT16)VREG_S(VS2REG, EL & 7);
4677    INT32 rec = rsp->reciprocal_high | (UINT16)VREG_S(VS2REG, EL & 7);
44544678#endif
4455   INT32 datainput = rec;
4679    INT32 datainput = rec;
44564680
4457   if (rec < 0)
4458   {
4459      if (rsp->dp_allowed)
4460      {
4461         if (rec < -32768)
4462         {
4463            datainput = ~datainput;
4464         }
4465         else
4466         {
4467            datainput = -datainput;
4468         }
4469      }
4470      else
4471      {
4472         datainput = -datainput;
4473      }
4474   }
4681    if (rec < 0)
4682    {
4683        if (rsp->dp_allowed)
4684        {
4685            if (rec < -32768)
4686            {
4687                datainput = ~datainput;
4688            }
4689            else
4690            {
4691                datainput = -datainput;
4692            }
4693        }
4694        else
4695        {
4696            datainput = -datainput;
4697        }
4698    }
44754699
4476   if (datainput)
4477   {
4478      for (int i = 0; i < 32; i++)
4479      {
4480         if (datainput & (1 << ((~i) & 0x1f)))
4481         {
4482            shifter = i;
4483            break;
4484         }
4485      }
4486   }
4487   else
4488   {
4489      if (rsp->dp_allowed)
4490      {
4491         shifter = 0;
4492      }
4493      else
4494      {
4495         shifter = 0x10;
4496      }
4497   }
4700    if (datainput)
4701    {
4702        for (int i = 0; i < 32; i++)
4703        {
4704            if (datainput & (1 << ((~i) & 0x1f)))
4705            {
4706                shifter = i;
4707                break;
4708            }
4709        }
4710    }
4711    else
4712    {
4713        if (rsp->dp_allowed)
4714        {
4715            shifter = 0;
4716        }
4717        else
4718        {
4719            shifter = 0x10;
4720        }
4721    }
44984722
4499   INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22;
4500   address = ((address | 0x200) & 0x3fe) | (shifter & 1);
4723    INT32 address = ((datainput << shifter) & 0x7fc00000) >> 22;
4724    address = ((address | 0x200) & 0x3fe) | (shifter & 1);
45014725
4502   INT32 fetchval = rsp_divtable[address];
4503   INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1);
4504   if (rec < 0)
4505   {
4506      temp = ~temp;
4507   }
4508   if (!rec)
4509   {
4510      temp = 0x7fffffff;
4511   }
4512   else if (rec == 0xffff8000)
4513   {
4514      temp = 0xffff0000;
4515   }
4516   rec = temp;
4726    INT32 fetchval = rsp_divtable[address];
4727    INT32 temp = (0x40000000 | (fetchval << 14)) >> (((~shifter) & 0x1f) >> 1);
4728    if (rec < 0)
4729    {
4730        temp = ~temp;
4731    }
4732    if (!rec)
4733    {
4734        temp = 0x7fffffff;
4735    }
4736    else if (rec == 0xffff8000)
4737    {
4738        temp = 0xffff0000;
4739    }
4740    rec = temp;
45174741
4518   rsp->reciprocal_res = rec;
4519   rsp->dp_allowed = 0;
4742    rsp->reciprocal_res = rec;
4743    rsp->dp_allowed = 0;
45204744
45214745#if USE_SIMD
4522   SIMD_INSERT16(rsp->xv[VDREG], (UINT16)rec, VS1REG);
4523   //rsp->accum_l = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
4524   for (int i = 0; i < 8; i++)
4525   {
4526      SIMD_EXTRACT16(rsp->xv[VS2REG], val, VEC_EL_2(EL, i));
4527      SIMD_INSERT16(rsp->accum_l, val, i);
4528   }
4746    SIMD_INSERT16(rsp->xv[VDREG], (UINT16)rec, VS1REG);
4747    rsp->accum_l = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
45294748#else
4530   W_VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff);
4531   for (int i = 0; i < 8; i++)
4532   {
4533      ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i));
4534   }
4749    W_VREG_S(VDREG, VS1REG & 7) = (UINT16)(rec & 0xffff);
4750    for (int i = 0; i < 8; i++)
4751    {
4752        SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i);
4753    }
45354754#endif
45364755}
45374756
45384757INLINE void cfunc_rsp_vrsqh(void *param)
45394758{
4540   rsp_state *rsp = (rsp_state*)param;
4541   int op = rsp->impstate->arg0;
4759    rsp_state *rsp = (rsp_state*)param;
4760    int op = rsp->impstate->arg0;
45424761
4543   // 31       25  24     20      15      10      5        0
4544   // ------------------------------------------------------
4545   // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110110 |
4546   // ------------------------------------------------------
4547   //
4548   // Calculates reciprocal square-root high part
4762    // 31       25  24     20      15      10      5        0
4763    // ------------------------------------------------------
4764    // | 010010 | 1 | EEEE | SSSSS | ?FFFF | DDDDD | 110110 |
4765    // ------------------------------------------------------
4766    //
4767    // Calculates reciprocal square-root high part
45494768
45504769#if USE_SIMD
4551   UINT16 val;
4552   SIMD_EXTRACT16(rsp->xv[VS2REG], val, EL);
4553   rsp->reciprocal_high = val << 16;
4554   rsp->dp_allowed = 1;
4770    UINT16 val;
4771    SIMD_EXTRACT16(rsp->xv[VS2REG], val, EL);
4772    rsp->reciprocal_high = val << 16;
4773    rsp->dp_allowed = 1;
45554774
4556   //rsp->accum_l = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
4557   for (int i = 0; i < 8; i++)
4558   {
4559      SIMD_EXTRACT16(rsp->xv[VS2REG], val, VEC_EL_2(EL, i));
4560      SIMD_INSERT16(rsp->accum_l, val, i);
4561   }
4775    rsp->accum_l = _mm_shuffle_epi8(rsp->xv[VS2REG], vec_shuf_inverse[EL]);
45624776
4563   SIMD_INSERT16(rsp->xv[VDREG], (UINT16)(rsp->reciprocal_res >> 16), VS1REG); // store high part
4777    SIMD_INSERT16(rsp->xv[VDREG], (INT16)(rsp->reciprocal_res >> 16), VS1REG); // store high part
45644778#else
4565   rsp->reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16;
4566   rsp->dp_allowed = 1;
4779    rsp->reciprocal_high = (VREG_S(VS2REG, EL & 7)) << 16;
4780    rsp->dp_allowed = 1;
45674781
4568   for (int i = 0; i < 8; i++)
4569   {
4570      ACCUM_L(i) = VREG_S(VS2REG, VEC_EL_2(EL, i));
4571   }
4782    for (int i = 0; i < 8; i++)
4783    {
4784        SET_ACCUM_L(VREG_S(VS2REG, VEC_EL_2(EL, i)), i);
4785    }
45724786
4573   W_VREG_S(VDREG, VS1REG & 7) = (INT16)(rsp->reciprocal_res >> 16);  // store high part
4787    W_VREG_S(VDREG, VS1REG & 7) = (INT16)(rsp->reciprocal_res >> 16);  // store high part
45744788#endif
45754789}
45764790
45774791static void cfunc_sp_set_status_cb(void *param)
45784792{
4579   rsp_state *rsp = (rsp_state*)param;
4580   (rsp->sp_set_status_func)(0, rsp->impstate->arg0);
4793    rsp_state *rsp = (rsp_state*)param;
4794    (rsp->sp_set_status_func)(0, rsp->impstate->arg0);
45814795}
45824796
45834797static CPU_EXECUTE( rsp )
45844798{
4585   rsp_state *rsp = get_safe_token(device);
4586   drcuml_state *drcuml = rsp->impstate->drcuml;
4587   int execute_result;
4799    rsp_state *rsp = get_safe_token(device);
4800    drcuml_state *drcuml = rsp->impstate->drcuml;
4801    int execute_result;
45884802
4589   /* reset the cache if dirty */
4590   if (rsp->impstate->cache_dirty)
4591      code_flush_cache(rsp);
4592   rsp->impstate->cache_dirty = FALSE;
4803    /* reset the cache if dirty */
4804    if (rsp->impstate->cache_dirty)
4805        code_flush_cache(rsp);
4806    rsp->impstate->cache_dirty = FALSE;
45934807
4594   /* execute */
4595   do
4596   {
4597      if( rsp->sr & ( RSP_STATUS_HALT | RSP_STATUS_BROKE ) )
4598      {
4599         rsp->icount = MIN(rsp->icount, 0);
4600         break;
4601      }
4808    /* execute */
4809    do
4810    {
4811        if( rsp->sr & ( RSP_STATUS_HALT | RSP_STATUS_BROKE ) )
4812        {
4813            rsp->icount = MIN(rsp->icount, 0);
4814            break;
4815        }
46024816
4603      /* run as much as we can */
4604      execute_result = drcuml->execute(*rsp->impstate->entry);
4817        /* run as much as we can */
4818        execute_result = drcuml->execute(*rsp->impstate->entry);
46054819
4606      /* if we need to recompile, do it */
4607      if (execute_result == EXECUTE_MISSING_CODE)
4608      {
4609         code_compile_block(rsp, rsp->pc);
4610      }
4611      else if (execute_result == EXECUTE_UNMAPPED_CODE)
4612      {
4613         fatalerror("Attempted to execute unmapped code at PC=%08X\n", rsp->pc);
4614      }
4615      else if (execute_result == EXECUTE_RESET_CACHE)
4616      {
4617         code_flush_cache(rsp);
4618      }
4619   } while (execute_result != EXECUTE_OUT_OF_CYCLES);
4820        /* if we need to recompile, do it */
4821        if (execute_result == EXECUTE_MISSING_CODE)
4822        {
4823            code_compile_block(rsp, rsp->pc);
4824        }
4825        else if (execute_result == EXECUTE_UNMAPPED_CODE)
4826        {
4827            fatalerror("Attempted to execute unmapped code at PC=%08X\n", rsp->pc);
4828        }
4829        else if (execute_result == EXECUTE_RESET_CACHE)
4830        {
4831            code_flush_cache(rsp);
4832        }
4833    } while (execute_result != EXECUTE_OUT_OF_CYCLES);
46204834}
46214835
46224836/***************************************************************************
r25438r25439
46304844
46314845void rspdrc_flush_drc_cache(device_t *device)
46324846{
4633   if (!device->machine().options().drc()) return;
4634   rsp_state *rsp = get_safe_token(device);
4635   rsp->impstate->cache_dirty = TRUE;
4847    if (!device->machine().options().drc()) return;
4848    rsp_state *rsp = get_safe_token(device);
4849    rsp->impstate->cache_dirty = TRUE;
46364850}
46374851
46384852/*-------------------------------------------------
r25438r25439
46424856
46434857static void code_flush_cache(rsp_state *rsp)
46444858{
4645   /* empty the transient cache contents */
4646   rsp->impstate->drcuml->reset();
4859    /* empty the transient cache contents */
4860    rsp->impstate->drcuml->reset();
46474861
4648   try
4649   {
4650      /* generate the entry point and out-of-cycles handlers */
4651      static_generate_entry_point(rsp);
4652      static_generate_nocode_handler(rsp);
4653      static_generate_out_of_cycles(rsp);
4862    try
4863    {
4864        /* generate the entry point and out-of-cycles handlers */
4865        static_generate_entry_point(rsp);
4866        static_generate_nocode_handler(rsp);
4867        static_generate_out_of_cycles(rsp);
46544868
4655      /* add subroutines for memory accesses */
4656      static_generate_memory_accessor(rsp, 1, FALSE, "read8",       rsp->impstate->read8);
4657      static_generate_memory_accessor(rsp, 1, TRUE,  "write8",      rsp->impstate->write8);
4658      static_generate_memory_accessor(rsp, 2, FALSE, "read16",      rsp->impstate->read16);
4659      static_generate_memory_accessor(rsp, 2, TRUE,  "write16",     rsp->impstate->write16);
4660      static_generate_memory_accessor(rsp, 4, FALSE, "read32",      rsp->impstate->read32);
4661      static_generate_memory_accessor(rsp, 4, TRUE,  "write32",     rsp->impstate->write32);
4662   }
4663   catch (drcuml_block::abort_compilation &)
4664   {
4665      fatalerror("Unable to generate static RSP code\n");
4666   }
4869        /* add subroutines for memory accesses */
4870        static_generate_memory_accessor(rsp, 1, FALSE, "read8",       rsp->impstate->read8);
4871        static_generate_memory_accessor(rsp, 1, TRUE,  "write8",      rsp->impstate->write8);
4872        static_generate_memory_accessor(rsp, 2, FALSE, "read16",      rsp->impstate->read16);
4873        static_generate_memory_accessor(rsp, 2, TRUE,  "write16",     rsp->impstate->write16);
4874        static_generate_memory_accessor(rsp, 4, FALSE, "read32",      rsp->impstate->read32);
4875        static_generate_memory_accessor(rsp, 4, TRUE,  "write32",     rsp->impstate->write32);
4876    }
4877    catch (drcuml_block::abort_compilation &)
4878    {
4879        fatalerror("Unable to generate static RSP code\n");
4880    }
46674881}
46684882
46694883
r25438r25439
46744888
46754889static void code_compile_block(rsp_state *rsp, offs_t pc)
46764890{
4677   drcuml_state *drcuml = rsp->impstate->drcuml;
4678   compiler_state compiler = { 0 };
4679   const opcode_desc *seqhead, *seqlast;
4680   const opcode_desc *desclist;
4681   int override = FALSE;
4682   drcuml_block *block;
4891    drcuml_state *drcuml = rsp->impstate->drcuml;
4892    compiler_state compiler = { 0 };
4893    const opcode_desc *seqhead, *seqlast;
4894    const opcode_desc *desclist;
4895    int override = FALSE;
4896    drcuml_block *block;
46834897
4684   g_profiler.start(PROFILER_DRC_COMPILE);
4898    g_profiler.start(PROFILER_DRC_COMPILE);
46854899
4686   /* get a description of this sequence */
4687   desclist = rsp->impstate->drcfe->describe_code(pc);
4900    /* get a description of this sequence */
4901    desclist = rsp->impstate->drcfe->describe_code(pc);
46884902
4689   bool succeeded = false;
4690   while (!succeeded)
4691   {
4692      try
4693      {
4694         /* start the block */
4695         block = drcuml->begin_block(4096);
4903    bool succeeded = false;
4904    while (!succeeded)
4905    {
4906        try
4907        {
4908            /* start the block */
4909            block = drcuml->begin_block(4096);
46964910
4697         /* loop until we get through all instruction sequences */
4698         for (seqhead = desclist; seqhead != NULL; seqhead = seqlast->next())
4699         {
4700            const opcode_desc *curdesc;
4701            UINT32 nextpc;
4911            /* loop until we get through all instruction sequences */
4912            for (seqhead = desclist; seqhead != NULL; seqhead = seqlast->next())
4913            {
4914                const opcode_desc *curdesc;
4915                UINT32 nextpc;
47024916
4703            /* add a code log entry */
4704            if (LOG_UML)
4705               block->append_comment("-------------------------");                 // comment
4917                /* add a code log entry */
4918                if (LOG_UML)
4919                    block->append_comment("-------------------------");                 // comment
47064920
4707            /* determine the last instruction in this sequence */
4708            for (seqlast = seqhead; seqlast != NULL; seqlast = seqlast->next())
4709               if (seqlast->flags & OPFLAG_END_SEQUENCE)
4710                  break;
4711            assert(seqlast != NULL);
4921                /* determine the last instruction in this sequence */
4922                for (seqlast = seqhead; seqlast != NULL; seqlast = seqlast->next())
4923                    if (seqlast->flags & OPFLAG_END_SEQUENCE)
4924                        break;
4925                assert(seqlast != NULL);
47124926
4713            /* if we don't have a hash for this mode/pc, or if we are overriding all, add one */
4714            if (override || !drcuml->hash_exists(0, seqhead->pc))
4715               UML_HASH(block, 0, seqhead->pc);                                        // hash    mode,pc
4927                /* if we don't have a hash for this mode/pc, or if we are overriding all, add one */
4928                if (override || !drcuml->hash_exists(0, seqhead->pc))
4929                    UML_HASH(block, 0, seqhead->pc);                                        // hash    mode,pc
47164930
4717            /* if we already have a hash, and this is the first sequence, assume that we */
4718            /* are recompiling due to being out of sync and allow future overrides */
4719            else if (seqhead == desclist)
4720            {
4721               override = TRUE;
4722               UML_HASH(block, 0, seqhead->pc);                                        // hash    mode,pc
4723            }
4931                /* if we already have a hash, and this is the first sequence, assume that we */
4932                /* are recompiling due to being out of sync and allow future overrides */
4933                else if (seqhead == desclist)
4934                {
4935                    override = TRUE;
4936                    UML_HASH(block, 0, seqhead->pc);                                        // hash    mode,pc
4937                }
47244938
4725            /* otherwise, redispatch to that fixed PC and skip the rest of the processing */
4726            else
4727            {
4728               UML_LABEL(block, seqhead->pc | 0x80000000);                             // label   seqhead->pc
4729               UML_HASHJMP(block, 0, seqhead->pc, *rsp->impstate->nocode);
4730                                                                     // hashjmp <0>,seqhead->pc,nocode
4731               continue;
4732            }
4939                /* otherwise, redispatch to that fixed PC and skip the rest of the processing */
4940                else
4941                {
4942                    UML_LABEL(block, seqhead->pc | 0x80000000);                             // label   seqhead->pc
4943                    UML_HASHJMP(block, 0, seqhead->pc, *rsp->impstate->nocode);
4944                                                                                            // hashjmp <0>,seqhead->pc,nocode
4945                    continue;
4946                }
47334947
4734            /* validate this code block if we're not pointing into ROM */
4735            if (rsp->program->get_write_ptr(seqhead->physpc) != NULL)
4736               generate_checksum_block(rsp, block, &compiler, seqhead, seqlast);
4948                /* validate this code block if we're not pointing into ROM */
4949                if (rsp->program->get_write_ptr(seqhead->physpc) != NULL)
4950                    generate_checksum_block(rsp, block, &compiler, seqhead, seqlast);
47374951
4738            /* label this instruction, if it may be jumped to locally */
4739            if (seqhead->flags & OPFLAG_IS_BRANCH_TARGET)
4740               UML_LABEL(block, seqhead->pc | 0x80000000);                             // label   seqhead->pc
4952                /* label this instruction, if it may be jumped to locally */
4953                if (seqhead->flags & OPFLAG_IS_BRANCH_TARGET)
4954                    UML_LABEL(block, seqhead->pc | 0x80000000);                             // label   seqhead->pc
47414955
4742            /* iterate over instructions in the sequence and compile them */
4743            for (curdesc = seqhead; curdesc != seqlast->next(); curdesc = curdesc->next())
4744               generate_sequence_instruction(rsp, block, &compiler, curdesc);
4956                /* iterate over instructions in the sequence and compile them */
4957                for (curdesc = seqhead; curdesc != seqlast->next(); curdesc = curdesc->next())
4958                    generate_sequence_instruction(rsp, block, &compiler, curdesc);
47454959
4746            /* if we need to return to the start, do it */
4747            if (seqlast->flags & OPFLAG_RETURN_TO_START)
4748               nextpc = pc;
4960                /* if we need to return to the start, do it */
4961                if (seqlast->flags & OPFLAG_RETURN_TO_START)
4962                    nextpc = pc;
47494963
4750            /* otherwise we just go to the next instruction */
4751            else
4752               nextpc = seqlast->pc + (seqlast->skipslots + 1) * 4;
4964                /* otherwise we just go to the next instruction */
4965                else
4966                    nextpc = seqlast->pc + (seqlast->skipslots + 1) * 4;
47534967
4754            /* count off cycles and go there */
4755            generate_update_cycles(rsp, block, &compiler, nextpc, TRUE);            // <subtract cycles>
4968                /* count off cycles and go there */
4969                generate_update_cycles(rsp, block, &compiler, nextpc, TRUE);            // <subtract cycles>
47564970
4757            /* if the last instruction can change modes, use a variable mode; otherwise, assume the same mode */
4758            if (seqlast->next() == NULL || seqlast->next()->pc != nextpc)
4759               UML_HASHJMP(block, 0, nextpc, *rsp->impstate->nocode);          // hashjmp <mode>,nextpc,nocode
4760         }
4971                /* if the last instruction can change modes, use a variable mode; otherwise, assume the same mode */
4972                if (seqlast->next() == NULL || seqlast->next()->pc != nextpc)
4973                    UML_HASHJMP(block, 0, nextpc, *rsp->impstate->nocode);          // hashjmp <mode>,nextpc,nocode
4974            }
47614975
4762         /* end the sequence */
4763         block->end();
4764         g_profiler.stop();
4765         succeeded = true;
4766      }
4767      catch (drcuml_block::abort_compilation &)
4768      {
4769         code_flush_cache(rsp);
4770      }
4771   }
4976            /* end the sequence */
4977            block->end();
4978            g_profiler.stop();
4979            succeeded = true;
4980        }
4981        catch (drcuml_block::abort_compilation &)
4982        {
4983            code_flush_cache(rsp);
4984        }
4985    }
47724986}
47734987
47744988/***************************************************************************
r25438r25439
47824996
47834997static void cfunc_unimplemented(void *param)
47844998{
4785   rsp_state *rsp = (rsp_state *)param;
4786   UINT32 opcode = rsp->impstate->arg0;
4787   fatalerror("PC=%08X: Unimplemented op %08X (%02X,%02X)\n", rsp->pc, opcode, opcode >> 26, opcode & 0x3f);
4999    rsp_state *rsp = (rsp_state *)param;
5000    UINT32 opcode = rsp->impstate->arg0;
5001    fatalerror("PC=%08X: Unimplemented op %08X (%02X,%02X)\n", rsp->pc, opcode, opcode >> 26, opcode & 0x3f);
47885002}
47895003
47905004
r25438r25439
47955009#ifdef UNUSED_CODE
47965010static void cfunc_fatalerror(void *param)
47975011{
4798   fatalerror("fatalerror\n");
5012    fatalerror("fatalerror\n");
47995013}
48005014#endif
48015015
r25438r25439
48115025
48125026static void static_generate_entry_point(rsp_state *rsp)
48135027{
4814   drcuml_state *drcuml = rsp->impstate->drcuml;
4815   drcuml_block *block;
5028    drcuml_state *drcuml = rsp->impstate->drcuml;
5029    drcuml_block *block;
48165030
4817   /* begin generating */
4818   block = drcuml->begin_block(20);
5031    /* begin generating */
5032    block = drcuml->begin_block(20);
48195033
4820   /* forward references */
4821   alloc_handle(drcuml, &rsp->impstate->nocode, "nocode");
5034    /* forward references */
5035    alloc_handle(drcuml, &rsp->impstate->nocode, "nocode");
48225036
4823   alloc_handle(drcuml, &rsp->impstate->entry, "entry");
4824   UML_HANDLE(block, *rsp->impstate->entry);                                       // handle  entry
5037    alloc_handle(drcuml, &rsp->impstate->entry, "entry");
5038    UML_HANDLE(block, *rsp->impstate->entry);                                       // handle  entry
48255039
4826   /* load fast integer registers */
4827   load_fast_iregs(rsp, block);
5040    /* load fast integer registers */
5041    load_fast_iregs(rsp, block);
48285042
4829   /* generate a hash jump via the current mode and PC */
4830   UML_HASHJMP(block, 0, mem(&rsp->pc), *rsp->impstate->nocode);                   // hashjmp <mode>,<pc>,nocode
4831   block->end();
5043    /* generate a hash jump via the current mode and PC */
5044    UML_HASHJMP(block, 0, mem(&rsp->pc), *rsp->impstate->nocode);                   // hashjmp <mode>,<pc>,nocode
5045    block->end();
48325046}
48335047
48345048
r25438r25439
48395053
48405054static void static_generate_nocode_handler(rsp_state *rsp)
48415055{
4842   drcuml_state *drcuml = rsp->impstate->drcuml;
4843   drcuml_block *block;
5056    drcuml_state *drcuml = rsp->impstate->drcuml;
5057    drcuml_block *block;
48445058
4845   /* begin generating */
4846   block = drcuml->begin_block(10);
5059    /* begin generating */
5060    block = drcuml->begin_block(10);
48475061
4848   /* generate a hash jump via the current mode and PC */
4849   alloc_handle(drcuml, &rsp->impstate->nocode, "nocode");
4850   UML_HANDLE(block, *rsp->impstate->nocode);                                      // handle  nocode
4851   UML_GETEXP(block, I0);                                                      // getexp  i0
4852   UML_MOV(block, mem(&rsp->pc), I0);                                          // mov     [pc],i0
4853   save_fast_iregs(rsp, block);
4854   UML_EXIT(block, EXECUTE_MISSING_CODE);                                      // exit    EXECUTE_MISSING_CODE
5062    /* generate a hash jump via the current mode and PC */
5063    alloc_handle(drcuml, &rsp->impstate->nocode, "nocode");
5064    UML_HANDLE(block, *rsp->impstate->nocode);                                      // handle  nocode
5065    UML_GETEXP(block, I0);                                                      // getexp  i0
5066    UML_MOV(block, mem(&rsp->pc), I0);                                          // mov     [pc],i0
5067    save_fast_iregs(rsp, block);
5068    UML_EXIT(block, EXECUTE_MISSING_CODE);                                      // exit    EXECUTE_MISSING_CODE
48555069
4856   block->end();
5070    block->end();
48575071}
48585072
48595073
r25438r25439
48645078
48655079static void static_generate_out_of_cycles(rsp_state *rsp)
48665080{
4867   drcuml_state *drcuml = rsp->impstate->drcuml;
4868   drcuml_block *block;
5081    drcuml_state *drcuml = rsp->impstate->drcuml;
5082    drcuml_block *block;
48695083
4870   /* begin generating */
4871   block = drcuml->begin_block(10);
5084    /* begin generating */
5085    block = drcuml->begin_block(10);
48725086
4873   /* generate a hash jump via the current mode and PC */
4874   alloc_handle(drcuml, &rsp->impstate->out_of_cycles, "out_of_cycles");
4875   UML_HANDLE(block, *rsp->impstate->out_of_cycles);                               // handle  out_of_cycles
4876   UML_GETEXP(block, I0);                                                      // getexp  i0
4877   UML_MOV(block, mem(&rsp->pc), I0);                                          // mov     <pc>,i0
4878   save_fast_iregs(rsp, block);
4879   UML_EXIT(block, EXECUTE_OUT_OF_CYCLES);                                 // exit    EXECUTE_OUT_OF_CYCLES
5087    /* generate a hash jump via the current mode and PC */
5088    alloc_handle(drcuml, &rsp->impstate->out_of_cycles, "out_of_cycles");
5089    UML_HANDLE(block, *rsp->impstate->out_of_cycles);                               // handle  out_of_cycles
5090    UML_GETEXP(block, I0);                                                      // getexp  i0
5091    UML_MOV(block, mem(&rsp->pc), I0);                                          // mov     <pc>,i0
5092    save_fast_iregs(rsp, block);
5093    UML_EXIT(block, EXECUTE_OUT_OF_CYCLES);                                 // exit    EXECUTE_OUT_OF_CYCLES
48805094
4881   block->end();
5095    block->end();
48825096}
48835097
48845098/*------------------------------------------------------------------
r25438r25439
48875101
48885102static void static_generate_memory_accessor(rsp_state *rsp, int size, int iswrite, const char *name, code_handle *&handleptr)
48895103{
4890   /* on entry, address is in I0; data for writes is in I1 */
4891   /* on exit, read result is in I0 */
4892   /* routine trashes I0-I1 */
4893   drcuml_state *drcuml = rsp->impstate->drcuml;
4894   drcuml_block *block;
5104    /* on entry, address is in I0; data for writes is in I1 */
5105    /* on exit, read result is in I0 */
5106    /* routine trashes I0-I1 */
5107    drcuml_state *drcuml = rsp->impstate->drcuml;
5108    drcuml_block *block;
48955109
4896   /* begin generating */
4897   block = drcuml->begin_block(1024);
5110    /* begin generating */
5111    block = drcuml->begin_block(1024);
48985112
4899   /* add a global entry for this */
4900   alloc_handle(drcuml, &handleptr, name);
4901   UML_HANDLE(block, *handleptr);                                                  // handle  *handleptr
5113    /* add a global entry for this */
5114    alloc_handle(drcuml, &handleptr, name);
5115    UML_HANDLE(block, *handleptr);                                                  // handle  *handleptr
49025116
4903   // write:
4904   if (iswrite)
4905   {
4906      if (size == 1)
4907      {
4908         UML_MOV(block, mem(&rsp->impstate->arg0), I0);              // mov     [arg0],i0 ; address
4909         UML_MOV(block, mem(&rsp->impstate->arg1), I1);              // mov     [arg1],i1 ; data
4910         UML_CALLC(block, cfunc_write8, rsp);                            // callc   cfunc_write8
4911      }
4912      else if (size == 2)
4913      {
4914         UML_MOV(block, mem(&rsp->impstate->arg0), I0);              // mov     [arg0],i0 ; address
4915         UML_MOV(block, mem(&rsp->impstate->arg1), I1);              // mov     [arg1],i1 ; data
4916         UML_CALLC(block, cfunc_write16, rsp);                           // callc   cfunc_write16
4917      }
4918      else if (size == 4)
4919      {
4920         UML_MOV(block, mem(&rsp->impstate->arg0), I0);              // mov     [arg0],i0 ; address
4921         UML_MOV(block, mem(&rsp->impstate->arg1), I1);              // mov     [arg1],i1 ; data
4922         UML_CALLC(block, cfunc_write32, rsp);                           // callc   cfunc_write32
4923      }
4924   }
4925   else
4926   {
4927      if (size == 1)
4928      {
4929         UML_MOV(block, mem(&rsp->impstate->arg0), I0);          // mov     [arg0],i0 ; address
4930         UML_CALLC(block, cfunc_read8, rsp);                         // callc   cfunc_printf_debug
4931         UML_MOV(block, I0, mem(&rsp->impstate->arg0));          // mov     i0,[arg0],i0 ; result
4932      }
4933      else if (size == 2)
4934      {
4935         UML_MOV(block, mem(&rsp->impstate->arg0), I0);          // mov     [arg0],i0 ; address
4936         UML_CALLC(block, cfunc_read16, rsp);                        // callc   cfunc_read16
4937         UML_MOV(block, I0, mem(&rsp->impstate->arg0));          // mov     i0,[arg0],i0 ; result
4938      }
4939      else if (size == 4)
4940      {
4941         UML_MOV(block, mem(&rsp->impstate->arg0), I0);          // mov     [arg0],i0 ; address
4942         UML_CALLC(block, cfunc_read32, rsp);                        // callc   cfunc_read32
4943         UML_MOV(block, I0, mem(&rsp->impstate->arg0));          // mov     i0,[arg0],i0 ; result
4944      }
4945   }
4946   UML_RET(block);
5117    // write:
5118    if (iswrite)
5119    {
5120        if (size == 1)
5121        {
5122            UML_MOV(block, mem(&rsp->impstate->arg0), I0);              // mov     [arg0],i0 ; address
5123            UML_MOV(block, mem(&rsp->impstate->arg1), I1);              // mov     [arg1],i1 ; data
5124            UML_CALLC(block, cfunc_write8, rsp);                            // callc   cfunc_write8
5125        }
5126        else if (size == 2)
5127        {
5128            UML_MOV(block, mem(&rsp->impstate->arg0), I0);              // mov     [arg0],i0 ; address
5129            UML_MOV(block, mem(&rsp->impstate->arg1), I1);              // mov     [arg1],i1 ; data
5130            UML_CALLC(block, cfunc_write16, rsp);                           // callc   cfunc_write16
5131        }
5132        else if (size == 4)
5133        {
5134            UML_MOV(block, mem(&rsp->impstate->arg0), I0);              // mov     [arg0],i0 ; address
5135            UML_MOV(block, mem(&rsp->impstate->arg1), I1);              // mov     [arg1],i1 ; data
5136            UML_CALLC(block, cfunc_write32, rsp);                           // callc   cfunc_write32
5137        }
5138    }
5139    else
5140    {
5141        if (size == 1)
5142        {
5143            UML_MOV(block, mem(&rsp->impstate->arg0), I0);          // mov     [arg0],i0 ; address
5144            UML_CALLC(block, cfunc_read8, rsp);                         // callc   cfunc_printf_debug
5145            UML_MOV(block, I0, mem(&rsp->impstate->arg0));          // mov     i0,[arg0],i0 ; result
5146        }
5147        else if (size == 2)
5148        {
5149            UML_MOV(block, mem(&rsp->impstate->arg0), I0);          // mov     [arg0],i0 ; address
5150            UML_CALLC(block, cfunc_read16, rsp);                        // callc   cfunc_read16
5151            UML_MOV(block, I0, mem(&rsp->impstate->arg0));          // mov     i0,[arg0],i0 ; result
5152        }
5153        else if (size == 4)
5154        {
5155            UML_MOV(block, mem(&rsp->impstate->arg0), I0);          // mov     [arg0],i0 ; address
5156            UML_CALLC(block, cfunc_read32, rsp);                        // callc   cfunc_read32
5157            UML_MOV(block, I0, mem(&rsp->impstate->arg0));          // mov     i0,[arg0],i0 ; result
5158        }
5159    }
5160    UML_RET(block);
49475161
4948   block->end();
5162    block->end();
49495163}
49505164
49515165
r25438r25439
49615175-------------------------------------------------*/
49625176static void generate_update_cycles(rsp_state *rsp, drcuml_block *block, compiler_state *compiler, parameter param, int allow_exception)
49635177{
4964   /* account for cycles */
4965   if (compiler->cycles > 0)
4966   {
4967      UML_SUB(block, mem(&rsp->icount), mem(&rsp->icount), MAPVAR_CYCLES);        // sub     icount,icount,cycles
4968      UML_MAPVAR(block, MAPVAR_CYCLES, 0);                                        // mapvar  cycles,0
4969      UML_EXHc(block, COND_S, *rsp->impstate->out_of_cycles, param);
4970   }
4971   compiler->cycles = 0;
5178    /* account for cycles */
5179    if (compiler->cycles > 0)
5180    {
5181        UML_SUB(block, mem(&rsp->icount), mem(&rsp->icount), MAPVAR_CYCLES);        // sub     icount,icount,cycles
5182        UML_MAPVAR(block, MAPVAR_CYCLES, 0);                                        // mapvar  cycles,0
5183        UML_EXHc(block, COND_S, *rsp->impstate->out_of_cycles, param);
5184    }
5185    compiler->cycles = 0;
49725186}
49735187
49745188/*-------------------------------------------------
r25438r25439
49785192
49795193static void generate_checksum_block(rsp_state *rsp, drcuml_block *block, compiler_state *compiler, const opcode_desc *seqhead, const opcode_desc *seqlast)
49805194{
4981   const opcode_desc *curdesc;
4982   if (LOG_UML)
4983   {
4984      block->append_comment("[Validation for %08X]", seqhead->pc | 0x1000);       // comment
4985   }
4986   /* loose verify or single instruction: just compare and fail */
4987   if (!(rsp->impstate->drcoptions & RSPDRC_STRICT_VERIFY) || seqhead->next() == NULL)
4988   {
4989      if (!(seqhead->flags & OPFLAG_VIRTUAL_NOOP))
4990      {
4991         UINT32 sum = seqhead->opptr.l[0];
4992         void *base = rsp->direct->read_decrypted_ptr(seqhead->physpc | 0x1000);
4993         UML_LOAD(block, I0, base, 0, SIZE_DWORD, SCALE_x4);                         // load    i0,base,0,dword
5195    const opcode_desc *curdesc;
5196    if (LOG_UML)
5197    {
5198        block->append_comment("[Validation for %08X]", seqhead->pc | 0x1000);       // comment
5199    }
5200    /* loose verify or single instruction: just compare and fail */
5201    if (!(rsp->impstate->drcoptions & RSPDRC_STRICT_VERIFY) || seqhead->next() == NULL)
5202    {
5203        if (!(seqhead->flags & OPFLAG_VIRTUAL_NOOP))
5204        {
5205            UINT32 sum = seqhead->opptr.l[0];
5206            void *base = rsp->direct->read_decrypted_ptr(seqhead->physpc | 0x1000);
5207            UML_LOAD(block, I0, base, 0, SIZE_DWORD, SCALE_x4);                         // load    i0,base,0,dword
49945208
4995         if (seqhead->delay.first() != NULL && seqhead->physpc != seqhead->delay.first()->physpc)
4996         {
4997            base = rsp->direct->read_decrypted_ptr(seqhead->delay.first()->physpc | 0x1000);
4998            UML_LOAD(block, I1, base, 0, SIZE_DWORD, SCALE_x4);                 // load    i1,base,dword
4999            UML_ADD(block, I0, I0, I1);                     // add     i0,i0,i1
5209            if (seqhead->delay.first() != NULL && seqhead->physpc != seqhead->delay.first()->physpc)
5210            {
5211                base = rsp->direct->read_decrypted_ptr(seqhead->delay.first()->physpc | 0x1000);
5212                UML_LOAD(block, I1, base, 0, SIZE_DWORD, SCALE_x4);                 // load    i1,base,dword
5213                UML_ADD(block, I0, I0, I1);                     // add     i0,i0,i1
50005214
5001            sum += seqhead->delay.first()->opptr.l[0];
5002         }
5215                sum += seqhead->delay.first()->opptr.l[0];
5216            }
50035217
5004         UML_CMP(block, I0, sum);                                    // cmp     i0,opptr[0]
5005         UML_EXHc(block, COND_NE, *rsp->impstate->nocode, epc(seqhead));     // exne    nocode,seqhead->pc
5006      }
5007   }
5218            UML_CMP(block, I0, sum);                                    // cmp     i0,opptr[0]
5219            UML_EXHc(block, COND_NE, *rsp->impstate->nocode, epc(seqhead));     // exne    nocode,seqhead->pc
5220        }
5221    }
50085222
5009   /* full verification; sum up everything */
5010   else
5011   {
5012      UINT32 sum = 0;
5013      void *base = rsp->direct->read_decrypted_ptr(seqhead->physpc | 0x1000);
5014      UML_LOAD(block, I0, base, 0, SIZE_DWORD, SCALE_x4);                             // load    i0,base,0,dword
5015      sum += seqhead->opptr.l[0];
5016      for (curdesc = seqhead->next(); curdesc != seqlast->next(); curdesc = curdesc->next())
5017         if (!(curdesc->flags & OPFLAG_VIRTUAL_NOOP))
5018         {
5019            base = rsp->direct->read_decrypted_ptr(curdesc->physpc | 0x1000);
5020            UML_LOAD(block, I1, base, 0, SIZE_DWORD, SCALE_x4);                     // load    i1,base,dword
5021            UML_ADD(block, I0, I0, I1);                         // add     i0,i0,i1
5022            sum += curdesc->opptr.l[0];
5223    /* full verification; sum up everything */
5224    else
5225    {
5226        UINT32 sum = 0;
5227        void *base = rsp->direct->read_decrypted_ptr(seqhead->physpc | 0x1000);
5228        UML_LOAD(block, I0, base, 0, SIZE_DWORD, SCALE_x4);                             // load    i0,base,0,dword
5229        sum += seqhead->opptr.l[0];
5230        for (curdesc = seqhead->next(); curdesc != seqlast->next(); curdesc = curdesc->next())
5231            if (!(curdesc->flags & OPFLAG_VIRTUAL_NOOP))
5232            {
5233                base = rsp->direct->read_decrypted_ptr(curdesc->physpc | 0x1000);
5234                UML_LOAD(block, I1, base, 0, SIZE_DWORD, SCALE_x4);                     // load    i1,base,dword
5235                UML_ADD(block, I0, I0, I1);                         // add     i0,i0,i1
5236                sum += curdesc->opptr.l[0];
50235237
5024            if (curdesc->delay.first() != NULL && (curdesc == seqlast || (curdesc->next() != NULL && curdesc->next()->physpc != curdesc->delay.first()->physpc)))
5025            {
5026               base = rsp->direct->read_decrypted_ptr(curdesc->delay.first()->physpc | 0x1000);
5027               UML_LOAD(block, I1, base, 0, SIZE_DWORD, SCALE_x4);                 // load    i1,base,dword
5028               UML_ADD(block, I0, I0, I1);                     // add     i0,i0,i1
5238                if (curdesc->delay.first() != NULL && (curdesc == seqlast || (curdesc->next() != NULL && curdesc->next()->physpc != curdesc->delay.first()->physpc)))
5239                {
5240                    base = rsp->direct->read_decrypted_ptr(curdesc->delay.first()->physpc | 0x1000);
5241                    UML_LOAD(block, I1, base, 0, SIZE_DWORD, SCALE_x4);                 // load    i1,base,dword
5242                    UML_ADD(block, I0, I0, I1);                     // add     i0,i0,i1
50295243
5030               sum += curdesc->delay.first()->opptr.l[0];
5031            }
5032         }
5033      UML_CMP(block, I0, sum);                                            // cmp     i0,sum
5034      UML_EXHc(block, COND_NE, *rsp->impstate->nocode, epc(seqhead));         // exne    nocode,seqhead->pc
5035   }
5244                    sum += curdesc->delay.first()->opptr.l[0];
5245                }
5246            }
5247        UML_CMP(block, I0, sum);                                            // cmp     i0,sum
5248        UML_EXHc(block, COND_NE, *rsp->impstate->nocode, epc(seqhead));         // exne    nocode,seqhead->pc
5249    }
50365250}
50375251
50385252
r25438r25439
50435257
50445258static void generate_sequence_instruction(rsp_state *rsp, drcuml_block *block, compiler_state *compiler, const opcode_desc *desc)
50455259{
5046   offs_t expc;
5260    offs_t expc;
50475261
5048   /* add an entry for the log */
5049   if (LOG_UML && !(desc->flags & OPFLAG_VIRTUAL_NOOP))
5050      log_add_disasm_comment(rsp, block, desc->pc, desc->opptr.l[0]);
5262    /* add an entry for the log */
5263    if (LOG_UML && !(desc->flags & OPFLAG_VIRTUAL_NOOP))
5264        log_add_disasm_comment(rsp, block, desc->pc, desc->opptr.l[0]);
50515265
5052   /* set the PC map variable */
5053   expc = (desc->flags & OPFLAG_IN_DELAY_SLOT) ? desc->pc - 3 : desc->pc;
5054   UML_MAPVAR(block, MAPVAR_PC, expc);                                             // mapvar  PC,expc
5266    /* set the PC map variable */
5267    expc = (desc->flags & OPFLAG_IN_DELAY_SLOT) ? desc->pc - 3 : desc->pc;
5268    UML_MAPVAR(block, MAPVAR_PC, expc);                                             // mapvar  PC,expc
50555269
5056   /* accumulate total cycles */
5057   compiler->cycles += desc->cycles;
5270    /* accumulate total cycles */
5271    compiler->cycles += desc->cycles;
50585272
5059   /* update the icount map variable */
5060   UML_MAPVAR(block, MAPVAR_CYCLES, compiler->cycles);                             // mapvar  CYCLES,compiler->cycles
5273    /* update the icount map variable */
5274    UML_MAPVAR(block, MAPVAR_CYCLES, compiler->cycles);                             // mapvar  CYCLES,compiler->cycles
50615275
5062   /* if we are debugging, call the debugger */
5063   if ((rsp->device->machine().debug_flags & DEBUG_FLAG_ENABLED) != 0)
5064   {
5065      UML_MOV(block, mem(&rsp->pc), desc->pc);                                // mov     [pc],desc->pc
5066      save_fast_iregs(rsp, block);
5067      UML_DEBUG(block, desc->pc);                                         // debug   desc->pc
5068   }
5276    /* if we are debugging, call the debugger */
5277    if ((rsp->device->machine().debug_flags & DEBUG_FLAG_ENABLED) != 0)
5278    {
5279        UML_MOV(block, mem(&rsp->pc), desc->pc);                                // mov     [pc],desc->pc
5280        save_fast_iregs(rsp, block);
5281        UML_DEBUG(block, desc->pc);                                         // debug   desc->pc
5282    }
50695283
5070   /* if we hit an unmapped address, fatal error */
5284    /* if we hit an unmapped address, fatal error */
50715285#if 0
5072   if (desc->flags & OPFLAG_COMPILER_UNMAPPED)
5073   {
5074      UML_MOV(block, mem(&rsp->pc), desc->pc);                               // mov     [pc],desc->pc
5075      save_fast_iregs(rsp, block);
5076      UML_EXIT(block, EXECUTE_UNMAPPED_CODE);                             // exit EXECUTE_UNMAPPED_CODE
5077   }
5286    if (desc->flags & OPFLAG_COMPILER_UNMAPPED)
5287    {
5288        UML_MOV(block, mem(&rsp->pc), desc->pc);                               // mov     [pc],desc->pc
5289        save_fast_iregs(rsp, block);
5290        UML_EXIT(block, EXECUTE_UNMAPPED_CODE);                             // exit EXECUTE_UNMAPPED_CODE
5291    }
50785292#endif
50795293
5080   /* otherwise, unless this is a virtual no-op, it's a regular instruction */
5081   /*else*/ if (!(desc->flags & OPFLAG_VIRTUAL_NOOP))
5082   {
5083      /* compile the instruction */
5084      if (!generate_opcode(rsp, block, compiler, desc))
5085      {
5086         UML_MOV(block, mem(&rsp->pc), desc->pc);                            // mov     [pc],desc->pc
5087         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
5088         UML_CALLC(block, cfunc_unimplemented, rsp);                             // callc   cfunc_unimplemented
5089      }
5090   }
5294    /* otherwise, unless this is a virtual no-op, it's a regular instruction */
5295    /*else*/ if (!(desc->flags & OPFLAG_VIRTUAL_NOOP))
5296    {
5297        /* compile the instruction */
5298        if (!generate_opcode(rsp, block, compiler, desc))
5299        {
5300            UML_MOV(block, mem(&rsp->pc), desc->pc);                            // mov     [pc],desc->pc
5301            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
5302            UML_CALLC(block, cfunc_unimplemented, rsp);                             // callc   cfunc_unimplemented
5303        }
5304    }
50915305}
50925306
50935307/*------------------------------------------------------------------
r25438r25439
50965310
50975311static void generate_delay_slot_and_branch(rsp_state *rsp, drcuml_block *block, compiler_state *compiler, const opcode_desc *desc, UINT8 linkreg)
50985312{
5099   compiler_state compiler_temp = *compiler;
5100   UINT32 op = desc->opptr.l[0];
5313    compiler_state compiler_temp = *compiler;
5314    UINT32 op = desc->opptr.l[0];
51015315
5102   /* fetch the target register if dynamic, in case it is modified by the delay slot */
5103   if (desc->targetpc == BRANCH_TARGET_DYNAMIC)
5104   {
5105      UML_AND(block, mem(&rsp->impstate->jmpdest), R32(RSREG), 0x00000fff);
5106      UML_OR(block, mem(&rsp->impstate->jmpdest), mem(&rsp->impstate->jmpdest), 0x1000);
5107   }
5316    /* fetch the target register if dynamic, in case it is modified by the delay slot */
5317    if (desc->targetpc == BRANCH_TARGET_DYNAMIC)
5318    {
5319        UML_AND(block, mem(&rsp->impstate->jmpdest), R32(RSREG), 0x00000fff);
5320        UML_OR(block, mem(&rsp->impstate->jmpdest), mem(&rsp->impstate->jmpdest), 0x1000);
5321    }
51085322
5109   /* set the link if needed -- before the delay slot */
5110   if (linkreg != 0)
5111   {
5112      UML_MOV(block, R32(linkreg), (INT32)(desc->pc + 8));                    // mov    <linkreg>,desc->pc + 8
5113   }
5323    /* set the link if needed -- before the delay slot */
5324    if (linkreg != 0)
5325    {
5326        UML_MOV(block, R32(linkreg), (INT32)(desc->pc + 8));                    // mov    <linkreg>,desc->pc + 8
5327    }
51145328
5115   /* compile the delay slot using temporary compiler state */
5116   assert(desc->delay.first() != NULL);
5117   generate_sequence_instruction(rsp, block, &compiler_temp, desc->delay.first());     // <next instruction>
5329    /* compile the delay slot using temporary compiler state */
5330    assert(desc->delay.first() != NULL);
5331    generate_sequence_instruction(rsp, block, &compiler_temp, desc->delay.first());     // <next instruction>
51185332
5119   /* update the cycles and jump through the hash table to the target */
5120   if (desc->targetpc != BRANCH_TARGET_DYNAMIC)
5121   {
5122      generate_update_cycles(rsp, block, &compiler_temp, desc->targetpc, TRUE);   // <subtract cycles>
5123      if (desc->flags & OPFLAG_INTRABLOCK_BRANCH)
5124         UML_JMP(block, desc->targetpc | 0x80000000);                            // jmp     desc->targetpc
5125      else
5126         UML_HASHJMP(block, 0, desc->targetpc, *rsp->impstate->nocode);
5127                                                               // hashjmp <mode>,desc->targetpc,nocode
5128   }
5129   else
5130   {
5131      generate_update_cycles(rsp, block, &compiler_temp, mem(&rsp->impstate->jmpdest), TRUE);
5132                                                               // <subtract cycles>
5133      UML_HASHJMP(block, 0, mem(&rsp->impstate->jmpdest), *rsp->impstate->nocode);
5134                                                               // hashjmp <mode>,<rsreg>,nocode
5135   }
5333    /* update the cycles and jump through the hash table to the target */
5334    if (desc->targetpc != BRANCH_TARGET_DYNAMIC)
5335    {
5336        generate_update_cycles(rsp, block, &compiler_temp, desc->targetpc, TRUE);   // <subtract cycles>
5337        if (desc->flags & OPFLAG_INTRABLOCK_BRANCH)
5338            UML_JMP(block, desc->targetpc | 0x80000000);                            // jmp     desc->targetpc
5339        else
5340            UML_HASHJMP(block, 0, desc->targetpc, *rsp->impstate->nocode);
5341                                                                                    // hashjmp <mode>,desc->targetpc,nocode
5342    }
5343    else
5344    {
5345        generate_update_cycles(rsp, block, &compiler_temp, mem(&rsp->impstate->jmpdest), TRUE);
5346                                                                                    // <subtract cycles>
5347        UML_HASHJMP(block, 0, mem(&rsp->impstate->jmpdest), *rsp->impstate->nocode);
5348                                                                                    // hashjmp <mode>,<rsreg>,nocode
5349    }
51365350
5137   /* update the label */
5138   compiler->labelnum = compiler_temp.labelnum;
5351    /* update the label */
5352    compiler->labelnum = compiler_temp.labelnum;
51395353
5140   /* reset the mapvar to the current cycles and account for skipped slots */
5141   compiler->cycles += desc->skipslots;
5142   UML_MAPVAR(block, MAPVAR_CYCLES, compiler->cycles);                             // mapvar  CYCLES,compiler->cycles
5354    /* reset the mapvar to the current cycles and account for skipped slots */
5355    compiler->cycles += desc->skipslots;
5356    UML_MAPVAR(block, MAPVAR_CYCLES, compiler->cycles);                             // mapvar  CYCLES,compiler->cycles
51435357}
51445358
51455359
r25438r25439
51505364
51515365static int generate_vector_opcode(rsp_state *rsp, drcuml_block *block, compiler_state *compiler, const opcode_desc *desc)
51525366{
5153   UINT32 op = desc->opptr.l[0];
5154   // Opcode legend:
5155   //    E = VS2 element type
5156   //    S = VS1, Source vector 1
5157   //    T = VS2, Source vector 2
5158   //    D = Destination vector
5367    UINT32 op = desc->opptr.l[0];
5368    // Opcode legend:
5369    //    E = VS2 element type
5370    //    S = VS1, Source vector 1
5371    //    T = VS2, Source vector 2
5372    //    D = Destination vector
51595373
51605374   switch (op & 0x3f)
51615375   {
r25438r25439
53585572
53595573static int generate_opcode(rsp_state *rsp, drcuml_block *block, compiler_state *compiler, const opcode_desc *desc)
53605574{
5361   int in_delay_slot = ((desc->flags & OPFLAG_IN_DELAY_SLOT) != 0);
5362   UINT32 op = desc->opptr.l[0];
5363   UINT8 opswitch = op >> 26;
5364   code_label skip;
5575    int in_delay_slot = ((desc->flags & OPFLAG_IN_DELAY_SLOT) != 0);
5576    UINT32 op = desc->opptr.l[0];
5577    UINT8 opswitch = op >> 26;
5578    code_label skip;
53655579
5366   switch (opswitch)
5367   {
5368      /* ----- sub-groups ----- */
5580    switch (opswitch)
5581    {
5582        /* ----- sub-groups ----- */
53695583
5370      case 0x00:  /* SPECIAL - MIPS I */
5371         return generate_special(rsp, block, compiler, desc);
5584        case 0x00:  /* SPECIAL - MIPS I */
5585            return generate_special(rsp, block, compiler, desc);
53725586
5373      case 0x01:  /* REGIMM - MIPS I */
5374         return generate_regimm(rsp, block, compiler, desc);
5587        case 0x01:  /* REGIMM - MIPS I */
5588            return generate_regimm(rsp, block, compiler, desc);
53755589
5376      /* ----- jumps and branches ----- */
5590        /* ----- jumps and branches ----- */
53775591
5378      case 0x02:  /* J - MIPS I */
5379         generate_delay_slot_and_branch(rsp, block, compiler, desc, 0);      // <next instruction + hashjmp>
5380         return TRUE;
5592        case 0x02:  /* J - MIPS I */
5593            generate_delay_slot_and_branch(rsp, block, compiler, desc, 0);      // <next instruction + hashjmp>
5594            return TRUE;
53815595
5382      case 0x03:  /* JAL - MIPS I */
5383         generate_delay_slot_and_branch(rsp, block, compiler, desc, 31);     // <next instruction + hashjmp>
5384         return TRUE;
5596        case 0x03:  /* JAL - MIPS I */
5597            generate_delay_slot_and_branch(rsp, block, compiler, desc, 31);     // <next instruction + hashjmp>
5598            return TRUE;
53855599
5386      case 0x04:  /* BEQ - MIPS I */
5387         UML_CMP(block, R32(RSREG), R32(RTREG));                             // cmp    <rsreg>,<rtreg>
5388         UML_JMPc(block, COND_NE, skip = compiler->labelnum++);              // jmp    skip,NE
5389         generate_delay_slot_and_branch(rsp, block, compiler, desc, 0);      // <next instruction + hashjmp>
5390         UML_LABEL(block, skip);                                             // skip:
5391         return TRUE;
5600        case 0x04:  /* BEQ - MIPS I */
5601            UML_CMP(block, R32(RSREG), R32(RTREG));                             // cmp    <rsreg>,<rtreg>
5602            UML_JMPc(block, COND_NE, skip = compiler->labelnum++);              // jmp    skip,NE
5603            generate_delay_slot_and_branch(rsp, block, compiler, desc, 0);      // <next instruction + hashjmp>
5604            UML_LABEL(block, skip);                                             // skip:
5605            return TRUE;
53925606
5393      case 0x05:  /* BNE - MIPS I */
5394         UML_CMP(block, R32(RSREG), R32(RTREG));                             // dcmp    <rsreg>,<rtreg>
5395         UML_JMPc(block, COND_E, skip = compiler->labelnum++);                       // jmp     skip,E
5396         generate_delay_slot_and_branch(rsp, block, compiler, desc, 0);      // <next instruction + hashjmp>
5397         UML_LABEL(block, skip);                                             // skip:
5398         return TRUE;
5607        case 0x05:  /* BNE - MIPS I */
5608            UML_CMP(block, R32(RSREG), R32(RTREG));                             // dcmp    <rsreg>,<rtreg>
5609            UML_JMPc(block, COND_E, skip = compiler->labelnum++);                       // jmp     skip,E
5610            generate_delay_slot_and_branch(rsp, block, compiler, desc, 0);      // <next instruction + hashjmp>
5611            UML_LABEL(block, skip);                                             // skip:
5612            return TRUE;
53995613
5400      case 0x06:  /* BLEZ - MIPS I */
5401         if (RSREG != 0)
5402         {
5403            UML_CMP(block, R32(RSREG), 0);                              // dcmp    <rsreg>,0
5404            UML_JMPc(block, COND_G, skip = compiler->labelnum++);                   // jmp     skip,G
5405            generate_delay_slot_and_branch(rsp, block, compiler, desc, 0);  // <next instruction + hashjmp>
5406            UML_LABEL(block, skip);                                         // skip:
5407         }
5408         else
5409            generate_delay_slot_and_branch(rsp, block, compiler, desc, 0);  // <next instruction + hashjmp>
5410         return TRUE;
5614        case 0x06:  /* BLEZ - MIPS I */
5615            if (RSREG != 0)
5616            {
5617                UML_CMP(block, R32(RSREG), 0);                              // dcmp    <rsreg>,0
5618                UML_JMPc(block, COND_G, skip = compiler->labelnum++);                   // jmp     skip,G
5619                generate_delay_slot_and_branch(rsp, block, compiler, desc, 0);  // <next instruction + hashjmp>
5620                UML_LABEL(block, skip);                                         // skip:
5621            }
5622            else
5623                generate_delay_slot_and_branch(rsp, block, compiler, desc, 0);  // <next instruction + hashjmp>
5624            return TRUE;
54115625
5412      case 0x07:  /* BGTZ - MIPS I */
5413         UML_CMP(block, R32(RSREG), 0);                                  // dcmp    <rsreg>,0
5414         UML_JMPc(block, COND_LE, skip = compiler->labelnum++);                  // jmp     skip,LE
5415         generate_delay_slot_and_branch(rsp, block, compiler, desc, 0);      // <next instruction + hashjmp>
5416         UML_LABEL(block, skip);                                             // skip:
5417         return TRUE;
5626        case 0x07:  /* BGTZ - MIPS I */
5627            UML_CMP(block, R32(RSREG), 0);                                  // dcmp    <rsreg>,0
5628            UML_JMPc(block, COND_LE, skip = compiler->labelnum++);                  // jmp     skip,LE
5629            generate_delay_slot_and_branch(rsp, block, compiler, desc, 0);      // <next instruction + hashjmp>
5630            UML_LABEL(block, skip);                                             // skip:
5631            return TRUE;
54185632
54195633
5420      /* ----- immediate arithmetic ----- */
5634        /* ----- immediate arithmetic ----- */
54215635
5422      case 0x0f:  /* LUI - MIPS I */
5423         if (RTREG != 0)
5424            UML_MOV(block, R32(RTREG), SIMMVAL << 16);                  // dmov    <rtreg>,SIMMVAL << 16
5425         return TRUE;
5636        case 0x0f:  /* LUI - MIPS I */
5637            if (RTREG != 0)
5638                UML_MOV(block, R32(RTREG), SIMMVAL << 16);                  // dmov    <rtreg>,SIMMVAL << 16
5639            return TRUE;
54265640
5427      case 0x08:  /* ADDI - MIPS I */
5428      case 0x09:  /* ADDIU - MIPS I */
5429         if (RTREG != 0)
5430         {
5431            UML_ADD(block, R32(RTREG), R32(RSREG), SIMMVAL);                // add     i0,<rsreg>,SIMMVAL,V
5432         }
5433         return TRUE;
5641        case 0x08:  /* ADDI - MIPS I */
5642        case 0x09:  /* ADDIU - MIPS I */
5643            if (RTREG != 0)
5644            {
5645                UML_ADD(block, R32(RTREG), R32(RSREG), SIMMVAL);                // add     i0,<rsreg>,SIMMVAL,V
5646            }
5647            return TRUE;
54345648
5435      case 0x0a:  /* SLTI - MIPS I */
5436         if (RTREG != 0)
5437         {
5438            UML_CMP(block, R32(RSREG), SIMMVAL);                            // dcmp    <rsreg>,SIMMVAL
5439            UML_SETc(block, COND_L, R32(RTREG));                                    // dset    <rtreg>,l
5440         }
5441         return TRUE;
5649        case 0x0a:  /* SLTI - MIPS I */
5650            if (RTREG != 0)
5651            {
5652                UML_CMP(block, R32(RSREG), SIMMVAL);                            // dcmp    <rsreg>,SIMMVAL
5653                UML_SETc(block, COND_L, R32(RTREG));                                    // dset    <rtreg>,l
5654            }
5655            return TRUE;
54425656
5443      case 0x0b:  /* SLTIU - MIPS I */
5444         if (RTREG != 0)
5445         {
5446            UML_CMP(block, R32(RSREG), SIMMVAL);                            // dcmp    <rsreg>,SIMMVAL
5447            UML_SETc(block, COND_B, R32(RTREG));                                    // dset    <rtreg>,b
5448         }
5449         return TRUE;
5657        case 0x0b:  /* SLTIU - MIPS I */
5658            if (RTREG != 0)
5659            {
5660                UML_CMP(block, R32(RSREG), SIMMVAL);                            // dcmp    <rsreg>,SIMMVAL
5661                UML_SETc(block, COND_B, R32(RTREG));                                    // dset    <rtreg>,b
5662            }
5663            return TRUE;
54505664
54515665
5452      case 0x0c:  /* ANDI - MIPS I */
5453         if (RTREG != 0)
5454            UML_AND(block, R32(RTREG), R32(RSREG), UIMMVAL);                // dand    <rtreg>,<rsreg>,UIMMVAL
5455         return TRUE;
5666        case 0x0c:  /* ANDI - MIPS I */
5667            if (RTREG != 0)
5668                UML_AND(block, R32(RTREG), R32(RSREG), UIMMVAL);                // dand    <rtreg>,<rsreg>,UIMMVAL
5669            return TRUE;
54565670
5457      case 0x0d:  /* ORI - MIPS I */
5458         if (RTREG != 0)
5459            UML_OR(block, R32(RTREG), R32(RSREG), UIMMVAL);             // dor     <rtreg>,<rsreg>,UIMMVAL
5460         return TRUE;
5671        case 0x0d:  /* ORI - MIPS I */
5672            if (RTREG != 0)
5673                UML_OR(block, R32(RTREG), R32(RSREG), UIMMVAL);             // dor     <rtreg>,<rsreg>,UIMMVAL
5674            return TRUE;
54615675
5462      case 0x0e:  /* XORI - MIPS I */
5463         if (RTREG != 0)
5464            UML_XOR(block, R32(RTREG), R32(RSREG), UIMMVAL);                // dxor    <rtreg>,<rsreg>,UIMMVAL
5465         return TRUE;
5676        case 0x0e:  /* XORI - MIPS I */
5677            if (RTREG != 0)
5678                UML_XOR(block, R32(RTREG), R32(RSREG), UIMMVAL);                // dxor    <rtreg>,<rsreg>,UIMMVAL
5679            return TRUE;
54665680
5467      /* ----- memory load operations ----- */
5681        /* ----- memory load operations ----- */
54685682
5469      case 0x20:  /* LB - MIPS I */
5470         UML_ADD(block, I0, R32(RSREG), SIMMVAL);                        // add     i0,<rsreg>,SIMMVAL
5471         UML_CALLH(block, *rsp->impstate->read8);                                    // callh   read8
5472         if (RTREG != 0)
5473            UML_SEXT(block, R32(RTREG), I0, SIZE_BYTE);                     // dsext   <rtreg>,i0,byte
5474         if (!in_delay_slot)
5475            generate_update_cycles(rsp, block, compiler, desc->pc + 4, TRUE);
5476         return TRUE;
5683        case 0x20:  /* LB - MIPS I */
5684            UML_ADD(block, I0, R32(RSREG), SIMMVAL);                        // add     i0,<rsreg>,SIMMVAL
5685            UML_CALLH(block, *rsp->impstate->read8);                                    // callh   read8
5686            if (RTREG != 0)
5687                UML_SEXT(block, R32(RTREG), I0, SIZE_BYTE);                     // dsext   <rtreg>,i0,byte
5688            if (!in_delay_slot)
5689                generate_update_cycles(rsp, block, compiler, desc->pc + 4, TRUE);
5690            return TRUE;
54775691
5478      case 0x21:  /* LH - MIPS I */
5479         UML_ADD(block, I0, R32(RSREG), SIMMVAL);                        // add     i0,<rsreg>,SIMMVAL
5480         UML_CALLH(block, *rsp->impstate->read16);                               // callh   read16
5481         if (RTREG != 0)
5482            UML_SEXT(block, R32(RTREG), I0, SIZE_WORD);                     // dsext   <rtreg>,i0,word
5483         if (!in_delay_slot)
5484            generate_update_cycles(rsp, block, compiler, desc->pc + 4, TRUE);
5485         return TRUE;
5692        case 0x21:  /* LH - MIPS I */
5693            UML_ADD(block, I0, R32(RSREG), SIMMVAL);                        // add     i0,<rsreg>,SIMMVAL
5694            UML_CALLH(block, *rsp->impstate->read16);                               // callh   read16
5695            if (RTREG != 0)
5696                UML_SEXT(block, R32(RTREG), I0, SIZE_WORD);                     // dsext   <rtreg>,i0,word
5697            if (!in_delay_slot)
5698                generate_update_cycles(rsp, block, compiler, desc->pc + 4, TRUE);
5699            return TRUE;
54865700
5487      case 0x23:  /* LW - MIPS I */
5488         UML_ADD(block, I0, R32(RSREG), SIMMVAL);                        // add     i0,<rsreg>,SIMMVAL
5489         UML_CALLH(block, *rsp->impstate->read32);                               // callh   read32
5490         if (RTREG != 0)
5491            UML_MOV(block, R32(RTREG), I0);
5492         if (!in_delay_slot)
5493            generate_update_cycles(rsp, block, compiler, desc->pc + 4, TRUE);
5494         return TRUE;
5701        case 0x23:  /* LW - MIPS I */
5702            UML_ADD(block, I0, R32(RSREG), SIMMVAL);                        // add     i0,<rsreg>,SIMMVAL
5703            UML_CALLH(block, *rsp->impstate->read32);                               // callh   read32
5704            if (RTREG != 0)
5705                UML_MOV(block, R32(RTREG), I0);
5706            if (!in_delay_slot)
5707                generate_update_cycles(rsp, block, compiler, desc->pc + 4, TRUE);
5708            return TRUE;
54955709
5496      case 0x24:  /* LBU - MIPS I */
5497         UML_ADD(block, I0, R32(RSREG), SIMMVAL);                        // add     i0,<rsreg>,SIMMVAL
5498         UML_CALLH(block, *rsp->impstate->read8);                                    // callh   read8
5499         if (RTREG != 0)
5500            UML_AND(block, R32(RTREG), I0, 0xff);                   // dand    <rtreg>,i0,0xff
5501         if (!in_delay_slot)
5502            generate_update_cycles(rsp, block, compiler, desc->pc + 4, TRUE);
5503         return TRUE;
5710        case 0x24:  /* LBU - MIPS I */
5711            UML_ADD(block, I0, R32(RSREG), SIMMVAL);                        // add     i0,<rsreg>,SIMMVAL
5712            UML_CALLH(block, *rsp->impstate->read8);                                    // callh   read8
5713            if (RTREG != 0)
5714                UML_AND(block, R32(RTREG), I0, 0xff);                   // dand    <rtreg>,i0,0xff
5715            if (!in_delay_slot)
5716                generate_update_cycles(rsp, block, compiler, desc->pc + 4, TRUE);
5717            return TRUE;
55045718
5505      case 0x25:  /* LHU - MIPS I */
5506         UML_ADD(block, I0, R32(RSREG), SIMMVAL);                        // add     i0,<rsreg>,SIMMVAL
5507         UML_CALLH(block, *rsp->impstate->read16);                               // callh   read16
5508         if (RTREG != 0)
5509            UML_AND(block, R32(RTREG), I0, 0xffff);                 // dand    <rtreg>,i0,0xffff
5510         if (!in_delay_slot)
5511            generate_update_cycles(rsp, block, compiler, desc->pc + 4, TRUE);
5512         return TRUE;
5719        case 0x25:  /* LHU - MIPS I */
5720            UML_ADD(block, I0, R32(RSREG), SIMMVAL);                        // add     i0,<rsreg>,SIMMVAL
5721            UML_CALLH(block, *rsp->impstate->read16);                               // callh   read16
5722            if (RTREG != 0)
5723                UML_AND(block, R32(RTREG), I0, 0xffff);                 // dand    <rtreg>,i0,0xffff
5724            if (!in_delay_slot)
5725                generate_update_cycles(rsp, block, compiler, desc->pc + 4, TRUE);
5726            return TRUE;
55135727
5514      case 0x32:  /* LWC2 - MIPS I */
5515         return generate_lwc2(rsp, block, compiler, desc);
5728        case 0x32:  /* LWC2 - MIPS I */
5729            return generate_lwc2(rsp, block, compiler, desc);
55165730
55175731
5518      /* ----- memory store operations ----- */
5732        /* ----- memory store operations ----- */
55195733
5520      case 0x28:  /* SB - MIPS I */
5521         UML_ADD(block, I0, R32(RSREG), SIMMVAL);                        // add     i0,<rsreg>,SIMMVAL
5522         UML_MOV(block, I1, R32(RTREG));                                 // mov     i1,<rtreg>
5523         UML_CALLH(block, *rsp->impstate->write8);                               // callh   write8
5524         if (!in_delay_slot)
5525            generate_update_cycles(rsp, block, compiler, desc->pc + 4, TRUE);
5526         return TRUE;
5734        case 0x28:  /* SB - MIPS I */
5735            UML_ADD(block, I0, R32(RSREG), SIMMVAL);                        // add     i0,<rsreg>,SIMMVAL
5736            UML_MOV(block, I1, R32(RTREG));                                 // mov     i1,<rtreg>
5737            UML_CALLH(block, *rsp->impstate->write8);                               // callh   write8
5738            if (!in_delay_slot)
5739                generate_update_cycles(rsp, block, compiler, desc->pc + 4, TRUE);
5740            return TRUE;
55275741
5528      case 0x29:  /* SH - MIPS I */
5529         UML_ADD(block, I0, R32(RSREG), SIMMVAL);                        // add     i0,<rsreg>,SIMMVAL
5530         UML_MOV(block, I1, R32(RTREG));                                 // mov     i1,<rtreg>
5531         UML_CALLH(block, *rsp->impstate->write16);                              // callh   write16
5532         if (!in_delay_slot)
5533            generate_update_cycles(rsp, block, compiler, desc->pc + 4, TRUE);
5534         return TRUE;
5742        case 0x29:  /* SH - MIPS I */
5743            UML_ADD(block, I0, R32(RSREG), SIMMVAL);                        // add     i0,<rsreg>,SIMMVAL
5744            UML_MOV(block, I1, R32(RTREG));                                 // mov     i1,<rtreg>
5745            UML_CALLH(block, *rsp->impstate->write16);                              // callh   write16
5746            if (!in_delay_slot)
5747                generate_update_cycles(rsp, block, compiler, desc->pc + 4, TRUE);
5748            return TRUE;
55355749
5536      case 0x2b:  /* SW - MIPS I */
5537         UML_ADD(block, I0, R32(RSREG), SIMMVAL);                        // add     i0,<rsreg>,SIMMVAL
5538         UML_MOV(block, I1, R32(RTREG));                                 // mov     i1,<rtreg>
5539         UML_CALLH(block, *rsp->impstate->write32);                              // callh   write32
5540         if (!in_delay_slot)
5541            generate_update_cycles(rsp, block, compiler, desc->pc + 4, TRUE);
5542         return TRUE;
5750        case 0x2b:  /* SW - MIPS I */
5751            UML_ADD(block, I0, R32(RSREG), SIMMVAL);                        // add     i0,<rsreg>,SIMMVAL
5752            UML_MOV(block, I1, R32(RTREG));                                 // mov     i1,<rtreg>
5753            UML_CALLH(block, *rsp->impstate->write32);                              // callh   write32
5754            if (!in_delay_slot)
5755                generate_update_cycles(rsp, block, compiler, desc->pc + 4, TRUE);
5756            return TRUE;
55435757
5544      case 0x3a:  /* SWC2 - MIPS I */
5545         return generate_swc2(rsp, block, compiler, desc);
5546         //UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);     // mov     [arg0],desc->opptr.l
5547         //UML_CALLC(block, cfunc_swc2, rsp);                                        // callc   cfunc_mfc2
5548         //return TRUE;
5758        case 0x3a:  /* SWC2 - MIPS I */
5759            return generate_swc2(rsp, block, compiler, desc);
5760            //UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);     // mov     [arg0],desc->opptr.l
5761            //UML_CALLC(block, cfunc_swc2, rsp);                                        // callc   cfunc_mfc2
5762            //return TRUE;
55495763
5550      /* ----- coprocessor instructions ----- */
5764        /* ----- coprocessor instructions ----- */
55515765
5552      case 0x10:  /* COP0 - MIPS I */
5553         return generate_cop0(rsp, block, compiler, desc);
5766        case 0x10:  /* COP0 - MIPS I */
5767            return generate_cop0(rsp, block, compiler, desc);
55545768
5555      case 0x12:  /* COP2 - MIPS I */
5556         return generate_cop2(rsp, block, compiler, desc);
5557         //UML_EXH(block, rsp->impstate->exception[EXCEPTION_INVALIDOP], 0);// exh     invalidop,0
5558         //return TRUE;
5769        case 0x12:  /* COP2 - MIPS I */
5770            return generate_cop2(rsp, block, compiler, desc);
5771            //UML_EXH(block, rsp->impstate->exception[EXCEPTION_INVALIDOP], 0);// exh     invalidop,0
5772            //return TRUE;
55595773
55605774
5561      /* ----- unimplemented/illegal instructions ----- */
5775        /* ----- unimplemented/illegal instructions ----- */
55625776
5563      //default:    /* ??? */       invalid_instruction(op);                                                break;
5564   }
5777        //default:    /* ??? */       invalid_instruction(op);                                                break;
5778    }
55655779
5566   return FALSE;
5780    return FALSE;
55675781}
55685782
55695783
r25438r25439
55745788
55755789static int generate_special(rsp_state *rsp, drcuml_block *block, compiler_state *compiler, const opcode_desc *desc)
55765790{
5577   UINT32 op = desc->opptr.l[0];
5578   UINT8 opswitch = op & 63;
5579   //code_label skip;
5791    UINT32 op = desc->opptr.l[0];
5792    UINT8 opswitch = op & 63;
5793    //code_label skip;
55805794
5581   switch (opswitch)
5582   {
5583      /* ----- shift instructions ----- */
5795    switch (opswitch)
5796    {
5797        /* ----- shift instructions ----- */
55845798
5585      case 0x00:  /* SLL - MIPS I */
5586         if (RDREG != 0)
5587         {
5588            UML_SHL(block, R32(RDREG), R32(RTREG), SHIFT);
5589         }
5590         return TRUE;
5799        case 0x00:  /* SLL - MIPS I */
5800            if (RDREG != 0)
5801            {
5802                UML_SHL(block, R32(RDREG), R32(RTREG), SHIFT);
5803            }
5804            return TRUE;
55915805
5592      case 0x02:  /* SRL - MIPS I */
5593         if (RDREG != 0)
5594         {
5595            UML_SHR(block, R32(RDREG), R32(RTREG), SHIFT);
5596         }
5597         return TRUE;
5806        case 0x02:  /* SRL - MIPS I */
5807            if (RDREG != 0)
5808            {
5809                UML_SHR(block, R32(RDREG), R32(RTREG), SHIFT);
5810            }
5811            return TRUE;
55985812
5599      case 0x03:  /* SRA - MIPS I */
5600         if (RDREG != 0)
5601         {
5602            UML_SAR(block, R32(RDREG), R32(RTREG), SHIFT);
5603         }
5604         return TRUE;
5813        case 0x03:  /* SRA - MIPS I */
5814            if (RDREG != 0)
5815            {
5816                UML_SAR(block, R32(RDREG), R32(RTREG), SHIFT);
5817            }
5818            return TRUE;
56055819
5606      case 0x04:  /* SLLV - MIPS I */
5607         if (RDREG != 0)
5608         {
5609            UML_SHL(block, R32(RDREG), R32(RTREG), R32(RSREG));
5610         }
5611         return TRUE;
5820        case 0x04:  /* SLLV - MIPS I */
5821            if (RDREG != 0)
5822            {
5823                UML_SHL(block, R32(RDREG), R32(RTREG), R32(RSREG));
5824            }
5825            return TRUE;
56125826
5613      case 0x06:  /* SRLV - MIPS I */
5614         if (RDREG != 0)
5615         {
5616            UML_SHR(block, R32(RDREG), R32(RTREG), R32(RSREG));
5617         }
5618         return TRUE;
5827        case 0x06:  /* SRLV - MIPS I */
5828            if (RDREG != 0)
5829            {
5830                UML_SHR(block, R32(RDREG), R32(RTREG), R32(RSREG));
5831            }
5832            return TRUE;
56195833
5620      case 0x07:  /* SRAV - MIPS I */
5621         if (RDREG != 0)
5622         {
5623            UML_SAR(block, R32(RDREG), R32(RTREG), R32(RSREG));
5624         }
5625         return TRUE;
5834        case 0x07:  /* SRAV - MIPS I */
5835            if (RDREG != 0)
5836            {
5837                UML_SAR(block, R32(RDREG), R32(RTREG), R32(RSREG));
5838            }
5839            return TRUE;
56265840
5627      /* ----- basic arithmetic ----- */
5841        /* ----- basic arithmetic ----- */
56285842
5629      case 0x20:  /* ADD - MIPS I */
5630      case 0x21:  /* ADDU - MIPS I */
5631         if (RDREG != 0)
5632         {
5633            UML_ADD(block, R32(RDREG), R32(RSREG), R32(RTREG));
5634         }
5635         return TRUE;
5843        case 0x20:  /* ADD - MIPS I */
5844        case 0x21:  /* ADDU - MIPS I */
5845            if (RDREG != 0)
5846            {
5847                UML_ADD(block, R32(RDREG), R32(RSREG), R32(RTREG));
5848            }
5849            return TRUE;
56365850
5637      case 0x22:  /* SUB - MIPS I */
5638      case 0x23:  /* SUBU - MIPS I */
5639         if (RDREG != 0)
5640         {
5641            UML_SUB(block, R32(RDREG), R32(RSREG), R32(RTREG));
5642         }
5643         return TRUE;
5851        case 0x22:  /* SUB - MIPS I */
5852        case 0x23:  /* SUBU - MIPS I */
5853            if (RDREG != 0)
5854            {
5855                UML_SUB(block, R32(RDREG), R32(RSREG), R32(RTREG));
5856            }
5857            return TRUE;
56445858
5645      /* ----- basic logical ops ----- */
5859        /* ----- basic logical ops ----- */
56465860
5647      case 0x24:  /* AND - MIPS I */
5648         if (RDREG != 0)
5649         {
5650            UML_AND(block, R32(RDREG), R32(RSREG), R32(RTREG));             // dand     <rdreg>,<rsreg>,<rtreg>
5651         }
5652         return TRUE;
5861        case 0x24:  /* AND - MIPS I */
5862            if (RDREG != 0)
5863            {
5864                UML_AND(block, R32(RDREG), R32(RSREG), R32(RTREG));             // dand     <rdreg>,<rsreg>,<rtreg>
5865            }
5866            return TRUE;
56535867
5654      case 0x25:  /* OR - MIPS I */
5655         if (RDREG != 0)
5656         {
5657            UML_OR(block, R32(RDREG), R32(RSREG), R32(RTREG));                  // dor      <rdreg>,<rsreg>,<rtreg>
5658         }
5659         return TRUE;
5868        case 0x25:  /* OR - MIPS I */
5869            if (RDREG != 0)
5870            {
5871                UML_OR(block, R32(RDREG), R32(RSREG), R32(RTREG));                  // dor      <rdreg>,<rsreg>,<rtreg>
5872            }
5873            return TRUE;
56605874
5661      case 0x26:  /* XOR - MIPS I */
5662         if (RDREG != 0)
5663         {
5664            UML_XOR(block, R32(RDREG), R32(RSREG), R32(RTREG));             // dxor     <rdreg>,<rsreg>,<rtreg>
5665         }
5666         return TRUE;
5875        case 0x26:  /* XOR - MIPS I */
5876            if (RDREG != 0)
5877            {
5878                UML_XOR(block, R32(RDREG), R32(RSREG), R32(RTREG));             // dxor     <rdreg>,<rsreg>,<rtreg>
5879            }
5880            return TRUE;
56675881
5668      case 0x27:  /* NOR - MIPS I */
5669         if (RDREG != 0)
5670         {
5671            UML_OR(block, I0, R32(RSREG), R32(RTREG));                  // dor      i0,<rsreg>,<rtreg>
5672            UML_XOR(block, R32(RDREG), I0, (UINT64)~0);             // dxor     <rdreg>,i0,~0
5673         }
5674         return TRUE;
5882        case 0x27:  /* NOR - MIPS I */
5883            if (RDREG != 0)
5884            {
5885                UML_OR(block, I0, R32(RSREG), R32(RTREG));                  // dor      i0,<rsreg>,<rtreg>
5886                UML_XOR(block, R32(RDREG), I0, (UINT64)~0);             // dxor     <rdreg>,i0,~0
5887            }
5888            return TRUE;
56755889
56765890
5677      /* ----- basic comparisons ----- */
5891        /* ----- basic comparisons ----- */
56785892
5679      case 0x2a:  /* SLT - MIPS I */
5680         if (RDREG != 0)
5681         {
5682            UML_CMP(block, R32(RSREG), R32(RTREG));                         // dcmp    <rsreg>,<rtreg>
5683            UML_SETc(block, COND_L, R32(RDREG));                                    // dset    <rdreg>,l
5684         }
5685         return TRUE;
5893        case 0x2a:  /* SLT - MIPS I */
5894            if (RDREG != 0)
5895            {
5896                UML_CMP(block, R32(RSREG), R32(RTREG));                         // dcmp    <rsreg>,<rtreg>
5897                UML_SETc(block, COND_L, R32(RDREG));                                    // dset    <rdreg>,l
5898            }
5899            return TRUE;
56865900
5687      case 0x2b:  /* SLTU - MIPS I */
5688         if (RDREG != 0)
5689         {
5690            UML_CMP(block, R32(RSREG), R32(RTREG));                         // dcmp    <rsreg>,<rtreg>
5691            UML_SETc(block, COND_B, R32(RDREG));                                    // dset    <rdreg>,b
5692         }
5693         return TRUE;
5901        case 0x2b:  /* SLTU - MIPS I */
5902            if (RDREG != 0)
5903            {
5904                UML_CMP(block, R32(RSREG), R32(RTREG));                         // dcmp    <rsreg>,<rtreg>
5905                UML_SETc(block, COND_B, R32(RDREG));                                    // dset    <rdreg>,b
5906            }
5907            return TRUE;
56945908
56955909
5696      /* ----- jumps and branches ----- */
5910        /* ----- jumps and branches ----- */
56975911
5698      case 0x08:  /* JR - MIPS I */
5699         generate_delay_slot_and_branch(rsp, block, compiler, desc, 0);      // <next instruction + hashjmp>
5700         return TRUE;
5912        case 0x08:  /* JR - MIPS I */
5913            generate_delay_slot_and_branch(rsp, block, compiler, desc, 0);      // <next instruction + hashjmp>
5914            return TRUE;
57015915
5702      case 0x09:  /* JALR - MIPS I */
5703         generate_delay_slot_and_branch(rsp, block, compiler, desc, RDREG);  // <next instruction + hashjmp>
5704         return TRUE;
5916        case 0x09:  /* JALR - MIPS I */
5917            generate_delay_slot_and_branch(rsp, block, compiler, desc, RDREG);  // <next instruction + hashjmp>
5918            return TRUE;
57055919
57065920
5707      /* ----- system calls ----- */
5921        /* ----- system calls ----- */
57085922
5709      case 0x0d:  /* BREAK - MIPS I */
5710         UML_MOV(block, mem(&rsp->impstate->arg0), 3);                   // mov     [arg0],3
5711         UML_CALLC(block, cfunc_sp_set_status_cb, rsp);                      // callc   cfunc_sp_set_status_cb
5712         UML_MOV(block, mem(&rsp->icount), 0);                       // mov icount, #0
5923        case 0x0d:  /* BREAK - MIPS I */
5924            UML_MOV(block, mem(&rsp->impstate->arg0), 3);                   // mov     [arg0],3
5925            UML_CALLC(block, cfunc_sp_set_status_cb, rsp);                      // callc   cfunc_sp_set_status_cb
5926            UML_MOV(block, mem(&rsp->icount), 0);                       // mov icount, #0
57135927
5714         UML_EXIT(block, EXECUTE_OUT_OF_CYCLES);
5715         return TRUE;
5716   }
5717   return FALSE;
5928            UML_EXIT(block, EXECUTE_OUT_OF_CYCLES);
5929            return TRUE;
5930    }
5931    return FALSE;
57185932}
57195933
57205934
r25438r25439
57265940
57275941static int generate_regimm(rsp_state *rsp, drcuml_block *block, compiler_state *compiler, const opcode_desc *desc)
57285942{
5729   UINT32 op = desc->opptr.l[0];
5730   UINT8 opswitch = RTREG;
5731   code_label skip;
5943    UINT32 op = desc->opptr.l[0];
5944    UINT8 opswitch = RTREG;
5945    code_label skip;
57325946
5733   switch (opswitch)
5734   {
5735      case 0x00:  /* BLTZ */
5736      case 0x10:  /* BLTZAL */
5737         if (RSREG != 0)
5738         {
5739            UML_CMP(block, R32(RSREG), 0);                              // dcmp    <rsreg>,0
5740            UML_JMPc(block, COND_GE, skip = compiler->labelnum++);              // jmp     skip,GE
5741            generate_delay_slot_and_branch(rsp, block, compiler, desc, (opswitch & 0x10) ? 31 : 0);
5742                                                               // <next instruction + hashjmp>
5743            UML_LABEL(block, skip);                                         // skip:
5744         }
5745         return TRUE;
5947    switch (opswitch)
5948    {
5949        case 0x00:  /* BLTZ */
5950        case 0x10:  /* BLTZAL */
5951            if (RSREG != 0)
5952            {
5953                UML_CMP(block, R32(RSREG), 0);                              // dcmp    <rsreg>,0
5954                UML_JMPc(block, COND_GE, skip = compiler->labelnum++);              // jmp     skip,GE
5955                generate_delay_slot_and_branch(rsp, block, compiler, desc, (opswitch & 0x10) ? 31 : 0);
5956                                                                                    // <next instruction + hashjmp>
5957                UML_LABEL(block, skip);                                         // skip:
5958            }
5959            return TRUE;
57465960
5747      case 0x01:  /* BGEZ */
5748      case 0x11:  /* BGEZAL */
5749         if (RSREG != 0)
5750         {
5751            UML_CMP(block, R32(RSREG), 0);                              // dcmp    <rsreg>,0
5752            UML_JMPc(block, COND_L, skip = compiler->labelnum++);                   // jmp     skip,L
5753            generate_delay_slot_and_branch(rsp, block, compiler, desc, (opswitch & 0x10) ? 31 : 0);
5754                                                               // <next instruction + hashjmp>
5755            UML_LABEL(block, skip);                                         // skip:
5756         }
5757         else
5758            generate_delay_slot_and_branch(rsp, block, compiler, desc, (opswitch & 0x10) ? 31 : 0);
5759                                                               // <next instruction + hashjmp>
5760         return TRUE;
5761   }
5762   return FALSE;
5961        case 0x01:  /* BGEZ */
5962        case 0x11:  /* BGEZAL */
5963            if (RSREG != 0)
5964            {
5965                UML_CMP(block, R32(RSREG), 0);                              // dcmp    <rsreg>,0
5966                UML_JMPc(block, COND_L, skip = compiler->labelnum++);                   // jmp     skip,L
5967                generate_delay_slot_and_branch(rsp, block, compiler, desc, (opswitch & 0x10) ? 31 : 0);
5968                                                                                    // <next instruction + hashjmp>
5969                UML_LABEL(block, skip);                                         // skip:
5970            }
5971            else
5972                generate_delay_slot_and_branch(rsp, block, compiler, desc, (opswitch & 0x10) ? 31 : 0);
5973                                                                                    // <next instruction + hashjmp>
5974            return TRUE;
5975    }
5976    return FALSE;
57635977}
57645978
57655979
r25438r25439
57695983
57705984static int generate_cop2(rsp_state *rsp, drcuml_block *block, compiler_state *compiler, const opcode_desc *desc)
57715985{
5772   UINT32 op = desc->opptr.l[0];
5773   UINT8 opswitch = RSREG;
5986    UINT32 op = desc->opptr.l[0];
5987    UINT8 opswitch = RSREG;
57745988
5775   switch (opswitch)
5776   {
5777      case 0x00:  /* MFCz */
5778         if (RTREG != 0)
5779         {
5780            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);    // mov     [arg0],desc->opptr.l
5781            UML_CALLC(block, cfunc_mfc2, rsp);                                  // callc   cfunc_mfc2
5782            //UML_SEXT(block, R32(RTREG), I0, DWORD);                      // dsext   <rtreg>,i0,dword
5783         }
5784         return TRUE;
5989    switch (opswitch)
5990    {
5991        case 0x00:  /* MFCz */
5992            if (RTREG != 0)
5993            {
5994                UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);    // mov     [arg0],desc->opptr.l
5995                UML_CALLC(block, cfunc_mfc2, rsp);                                  // callc   cfunc_mfc2
5996                //UML_SEXT(block, R32(RTREG), I0, DWORD);                      // dsext   <rtreg>,i0,dword
5997            }
5998            return TRUE;
57855999
5786      case 0x02:  /* CFCz */
5787         if (RTREG != 0)
5788         {
5789            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);    // mov     [arg0],desc->opptr.l
5790            UML_CALLC(block, cfunc_cfc2, rsp);                                  // callc   cfunc_cfc2
5791            //UML_SEXT(block, R32(RTREG), I0, DWORD);                      // dsext   <rtreg>,i0,dword
5792         }
5793         return TRUE;
6000        case 0x02:  /* CFCz */
6001            if (RTREG != 0)
6002            {
6003                UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);    // mov     [arg0],desc->opptr.l
6004                UML_CALLC(block, cfunc_cfc2, rsp);                                  // callc   cfunc_cfc2
6005                //UML_SEXT(block, R32(RTREG), I0, DWORD);                      // dsext   <rtreg>,i0,dword
6006            }
6007            return TRUE;
57946008
5795      case 0x04:  /* MTCz */
5796         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
5797         UML_CALLC(block, cfunc_mtc2, rsp);                                      // callc   cfunc_mtc2
5798         return TRUE;
6009        case 0x04:  /* MTCz */
6010            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
6011            UML_CALLC(block, cfunc_mtc2, rsp);                                      // callc   cfunc_mtc2
6012            return TRUE;
57996013
5800      case 0x06:  /* CTCz */
5801         UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
5802         UML_CALLC(block, cfunc_ctc2, rsp);                                      // callc   cfunc_ctc2
5803         return TRUE;
6014        case 0x06:  /* CTCz */
6015            UML_MOV(block, mem(&rsp->impstate->arg0), desc->opptr.l[0]);        // mov     [arg0],desc->opptr.l
6016            UML_CALLC(block, cfunc_ctc2, rsp);                                      // callc   cfunc_ctc2
6017            return TRUE;
58046018
5805      case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17:
5806      case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f:
5807         return generate_vector_opcode(rsp, block, compiler, desc);
5808   }
5809   return FALSE;
6019        case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17:
6020        case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f:
6021            return generate_vector_opcode(rsp, block, compiler, desc);
6022    }
6023    return FALSE;
58106024}
58116025
58126026/*-------------------------------------------------
r25438r25439
58156029
58166030static int generate_cop0(rsp_state *rsp, drcuml_block *block, compiler_state *compiler, const opcode_desc *desc)
58176031{
5818   UINT32 op = desc->opptr.l[0];
5819   UINT8 opswitch = RSREG;
6032    UINT32 op = desc->opptr.l[0];
6033    UINT8 opswitch = RSREG;
58206034
5821   switch (opswitch)
5822   {
5823      case 0x00:  /* MFCz */
5824         if (RTREG != 0)
5825         {
5826            UML_MOV(block, mem(&rsp->impstate->arg0), RDREG);               // mov     [arg0],<rdreg>
5827            UML_MOV(block, mem(&rsp->impstate->arg1), RTREG);               // mov     [arg1],<rtreg>
5828            UML_CALLC(block, cfunc_get_cop0_reg, rsp);                          // callc   cfunc_get_cop0_reg
5829            if(RDREG == 2)
5830            {
5831               generate_update_cycles(rsp, block, compiler, mem(&rsp->pc), TRUE);
5832               UML_HASHJMP(block, 0, mem(&rsp->pc), *rsp->impstate->nocode);
5833            }
5834         }
5835         return TRUE;
6035    switch (opswitch)
6036    {
6037        case 0x00:  /* MFCz */
6038            if (RTREG != 0)
6039            {
6040                UML_MOV(block, mem(&rsp->impstate->arg0), RDREG);               // mov     [arg0],<rdreg>
6041                UML_MOV(block, mem(&rsp->impstate->arg1), RTREG);               // mov     [arg1],<rtreg>
6042                UML_CALLC(block, cfunc_get_cop0_reg, rsp);                          // callc   cfunc_get_cop0_reg
6043                if(RDREG == 2)
6044                {
6045                    generate_update_cycles(rsp, block, compiler, mem(&rsp->pc), TRUE);
6046                    UML_HASHJMP(block, 0, mem(&rsp->pc), *rsp->impstate->nocode);
6047                }
6048            }
6049            return TRUE;
58366050
5837      case 0x04:  /* MTCz */
5838         UML_MOV(block, mem(&rsp->impstate->arg0), RDREG);                   // mov     [arg0],<rdreg>
5839         UML_MOV(block, mem(&rsp->impstate->arg1), R32(RTREG));                  // mov     [arg1],rtreg
5840         UML_CALLC(block, cfunc_set_cop0_reg, rsp);                              // callc   cfunc_set_cop0_reg
5841         return TRUE;
5842   }
6051        case 0x04:  /* MTCz */
6052            UML_MOV(block, mem(&rsp->impstate->arg0), RDREG);                   // mov     [arg0],<rdreg>
6053            UML_MOV(block, mem(&rsp->impstate->arg1), R32(RTREG));                  // mov     [arg1],rtreg
6054            UML_CALLC(block, cfunc_set_cop0_reg, rsp);                              // callc   cfunc_set_cop0_reg
6055            return TRUE;
6056    }
58436057
5844   return FALSE;
6058    return FALSE;
58456059}
58466060
58476061static void cfunc_mfc2(void *param)
58486062{
5849   rsp_state *rsp = (rsp_state*)param;
5850   UINT32 op = rsp->impstate->arg0;
5851   int el = (op >> 7) & 0xf;
6063    rsp_state *rsp = (rsp_state*)param;
6064    UINT32 op = rsp->impstate->arg0;
6065    int el = (op >> 7) & 0xf;
58526066#if USE_SIMD
5853   UINT16 w;
5854   SIMD_EXTRACT16(rsp->xv[VS1REG], w, el >> 1);
5855   rsp->r[RTREG] = (INT32)(INT16)w;
6067   UINT16 out;
6068   SIMD_EXTRACT16(rsp->xv[VS1REG], out, (el >> 1));
6069   out >>= (1 - (el & 1)) * 8;
6070   out &= 0x00ff;
6071
6072   el++;
6073
6074   UINT16 temp;
6075   SIMD_EXTRACT16(rsp->xv[VS1REG], temp, (el >> 1));
6076   temp >>= (1 - (el & 1)) * 8;
6077   temp &= 0x00ff;
6078
6079   rsp->r[RTREG] = (INT32)(INT16)((out << 8) | temp);
58566080#else
5857   UINT16 b1 = VREG_B(VS1REG, (el+0) & 0xf);
5858   UINT16 b2 = VREG_B(VS1REG, (el+1) & 0xf);
5859   if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2));
6081    UINT16 b1 = VREG_B(VS1REG, (el+0) & 0xf);
6082    UINT16 b2 = VREG_B(VS1REG, (el+1) & 0xf);
6083    if (RTREG) RTVAL = (INT32)(INT16)((b1 << 8) | (b2));
58606084#endif
58616085}
58626086
58636087static void cfunc_cfc2(void *param)
58646088{
5865   rsp_state *rsp = (rsp_state*)param;
5866   UINT32 op = rsp->impstate->arg0;
5867   if (RTREG)
5868   {
5869      if (RDREG == 2)
6089    rsp_state *rsp = (rsp_state*)param;
6090    UINT32 op = rsp->impstate->arg0;
6091    if (RTREG)
6092    {
6093      switch(RDREG)
58706094      {
5871         // Anciliary clipping flags
5872         RTVAL = rsp->flag[RDREG] & 0x00ff;
6095         case 0:
6096            RTVAL = ((CARRY_FLAG(rsp, 0) & 1) << 0) |
6097                  ((CARRY_FLAG(rsp, 1) & 1) << 1) |
6098                  ((CARRY_FLAG(rsp, 2) & 1) << 2) |
6099                  ((CARRY_FLAG(rsp, 3) & 1) << 3) |
6100                  ((CARRY_FLAG(rsp, 4) & 1) << 4) |
6101                  ((CARRY_FLAG(rsp, 5) & 1) << 5) |
6102                  ((CARRY_FLAG(rsp, 6) & 1) << 6) |
6103                  ((CARRY_FLAG(rsp, 7) & 1) << 7) |
6104                  ((ZERO_FLAG(rsp, 0) & 1) << 8) |
6105                  ((ZERO_FLAG(rsp, 1) & 1) << 9) |
6106                  ((ZERO_FLAG(rsp, 2) & 1) << 10) |
6107                  ((ZERO_FLAG(rsp, 3) & 1) << 11) |
6108                  ((ZERO_FLAG(rsp, 4) & 1) << 12) |
6109                  ((ZERO_FLAG(rsp, 5) & 1) << 13) |
6110                  ((ZERO_FLAG(rsp, 6) & 1) << 14) |
6111                  ((ZERO_FLAG(rsp, 7) & 1) << 15);
6112            if (RTVAL & 0x8000) RTVAL |= 0xffff0000;
6113            break;
6114         case 1:
6115            RTVAL = ((COMPARE_FLAG(rsp, 0) & 1) << 0) |
6116                  ((COMPARE_FLAG(rsp, 1) & 1) << 1) |
6117                  ((COMPARE_FLAG(rsp, 2) & 1) << 2) |
6118                  ((COMPARE_FLAG(rsp, 3) & 1) << 3) |
6119                  ((COMPARE_FLAG(rsp, 4) & 1) << 4) |
6120                  ((COMPARE_FLAG(rsp, 5) & 1) << 5) |
6121                  ((COMPARE_FLAG(rsp, 6) & 1) << 6) |
6122                  ((COMPARE_FLAG(rsp, 7) & 1) << 7) |
6123                  ((CLIP2_FLAG(rsp, 0) & 1) << 8) |
6124                  ((CLIP2_FLAG(rsp, 1) & 1) << 9) |
6125                  ((CLIP2_FLAG(rsp, 2) & 1) << 10) |
6126                  ((CLIP2_FLAG(rsp, 3) & 1) << 11) |
6127                  ((CLIP2_FLAG(rsp, 4) & 1) << 12) |
6128                  ((CLIP2_FLAG(rsp, 5) & 1) << 13) |
6129                  ((CLIP2_FLAG(rsp, 6) & 1) << 14) |
6130                  ((CLIP2_FLAG(rsp, 7) & 1) << 15);
6131            if (RTVAL & 0x8000) RTVAL |= 0xffff0000;
6132            break;
6133         case 2:
6134            RTVAL = ((CLIP1_FLAG(rsp, 0) & 1) << 0) |
6135                  ((CLIP1_FLAG(rsp, 1) & 1) << 1) |
6136                  ((CLIP1_FLAG(rsp, 2) & 1) << 2) |
6137                  ((CLIP1_FLAG(rsp, 3) & 1) << 3) |
6138                  ((CLIP1_FLAG(rsp, 4) & 1) << 4) |
6139                  ((CLIP1_FLAG(rsp, 5) & 1) << 5) |
6140                  ((CLIP1_FLAG(rsp, 6) & 1) << 6) |
6141                  ((CLIP1_FLAG(rsp, 7) & 1) << 7);
6142            break;
58736143      }
5874      else
5875      {
5876         // All other flags are 16 bits but sign-extended at retrieval
5877         RTVAL = (UINT32)rsp->flag[RDREG] | ( ( rsp->flag[RDREG] & 0x8000 ) ? 0xffff0000 : 0 );
5878      }
5879   }
6144    }
58806145}
58816146
58826147static void cfunc_mtc2(void *param)
58836148{
5884   rsp_state *rsp = (rsp_state*)param;
5885   UINT32 op = rsp->impstate->arg0;
5886   int el = (op >> 7) & 0xf;
6149    rsp_state *rsp = (rsp_state*)param;
6150    UINT32 op = rsp->impstate->arg0;
6151    int el = (op >> 7) & 0xf;
58876152#if USE_SIMD
5888   SIMD_INSERT16(rsp->xv[VS1REG], RTVAL, el >> 1);
6153    SIMD_INSERT16(rsp->xv[VS1REG], RTVAL, el >> 1);
58896154#else
5890   VREG_B(VS1REG, (el+0) & 0xf) = (RTVAL >> 8) & 0xff;
5891   VREG_B(VS1REG, (el+1) & 0xf) = (RTVAL >> 0) & 0xff;
6155    VREG_B(VS1REG, (el+0) & 0xf) = (RTVAL >> 8) & 0xff;
6156    VREG_B(VS1REG, (el+1) & 0xf) = (RTVAL >> 0) & 0xff;
58926157#endif
58936158}
58946159
58956160static void cfunc_ctc2(void *param)
58966161{
5897   rsp_state *rsp = (rsp_state*)param;
5898   UINT32 op = rsp->impstate->arg0;
5899   rsp->flag[RDREG] = RTVAL & 0xffff;
6162    rsp_state *rsp = (rsp_state*)param;
6163    UINT32 op = rsp->impstate->arg0;
6164    switch(RDREG)
6165    {
6166      case 0:
6167         CLEAR_CARRY_FLAGS();
6168         CLEAR_ZERO_FLAGS();
6169         rsp->vflag[0][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0;
6170         rsp->vflag[0][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0;
6171         rsp->vflag[0][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0;
6172         rsp->vflag[0][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0;
6173         rsp->vflag[0][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0;
6174         rsp->vflag[0][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0;
6175         rsp->vflag[0][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0;
6176         rsp->vflag[0][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0;
6177         if (RTVAL & (1 << 0))  { SET_CARRY_FLAG(0); }
6178         if (RTVAL & (1 << 1))  { SET_CARRY_FLAG(1); }
6179         if (RTVAL & (1 << 2))  { SET_CARRY_FLAG(2); }
6180         if (RTVAL & (1 << 3))  { SET_CARRY_FLAG(3); }
6181         if (RTVAL & (1 << 4))  { SET_CARRY_FLAG(4); }
6182         if (RTVAL & (1 << 5))  { SET_CARRY_FLAG(5); }
6183         if (RTVAL & (1 << 6))  { SET_CARRY_FLAG(6); }
6184         if (RTVAL & (1 << 7))  { SET_CARRY_FLAG(7); }
6185         rsp->vflag[3][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0;
6186         rsp->vflag[3][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0;
6187         rsp->vflag[3][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0;
6188         rsp->vflag[3][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0;
6189         rsp->vflag[3][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0;
6190         rsp->vflag[3][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0;
6191         rsp->vflag[3][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0;
6192         rsp->vflag[3][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0;
6193         if (RTVAL & (1 << 8))  { SET_ZERO_FLAG(0); }
6194         if (RTVAL & (1 << 9))  { SET_ZERO_FLAG(1); }
6195         if (RTVAL & (1 << 10)) { SET_ZERO_FLAG(2); }
6196         if (RTVAL & (1 << 11)) { SET_ZERO_FLAG(3); }
6197         if (RTVAL & (1 << 12)) { SET_ZERO_FLAG(4); }
6198         if (RTVAL & (1 << 13)) { SET_ZERO_FLAG(5); }
6199         if (RTVAL & (1 << 14)) { SET_ZERO_FLAG(6); }
6200         if (RTVAL & (1 << 15)) { SET_ZERO_FLAG(7); }
6201         break;
6202      case 1:
6203         CLEAR_COMPARE_FLAGS();
6204         CLEAR_CLIP2_FLAGS();
6205         rsp->vflag[1][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0;
6206         rsp->vflag[1][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0;
6207         rsp->vflag[1][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0;
6208         rsp->vflag[1][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0;
6209         rsp->vflag[1][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0;
6210         rsp->vflag[1][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0;
6211         rsp->vflag[1][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0;
6212         rsp->vflag[1][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0;
6213         if (RTVAL & (1 << 0)) { SET_COMPARE_FLAG(0); }
6214         if (RTVAL & (1 << 1)) { SET_COMPARE_FLAG(1); }
6215         if (RTVAL & (1 << 2)) { SET_COMPARE_FLAG(2); }
6216         if (RTVAL & (1 << 3)) { SET_COMPARE_FLAG(3); }
6217         if (RTVAL & (1 << 4)) { SET_COMPARE_FLAG(4); }
6218         if (RTVAL & (1 << 5)) { SET_COMPARE_FLAG(5); }
6219         if (RTVAL & (1 << 6)) { SET_COMPARE_FLAG(6); }
6220         if (RTVAL & (1 << 7)) { SET_COMPARE_FLAG(7); }
6221         rsp->vflag[4][0] = ((RTVAL >> 8) & 1) ? 0xffff : 0;
6222         rsp->vflag[4][1] = ((RTVAL >> 9) & 1) ? 0xffff : 0;
6223         rsp->vflag[4][2] = ((RTVAL >> 10) & 1) ? 0xffff : 0;
6224         rsp->vflag[4][3] = ((RTVAL >> 11) & 1) ? 0xffff : 0;
6225         rsp->vflag[4][4] = ((RTVAL >> 12) & 1) ? 0xffff : 0;
6226         rsp->vflag[4][5] = ((RTVAL >> 13) & 1) ? 0xffff : 0;
6227         rsp->vflag[4][6] = ((RTVAL >> 14) & 1) ? 0xffff : 0;
6228         rsp->vflag[4][7] = ((RTVAL >> 15) & 1) ? 0xffff : 0;
6229         if (RTVAL & (1 << 8))  { SET_CLIP2_FLAG(0); }
6230         if (RTVAL & (1 << 9))  { SET_CLIP2_FLAG(1); }
6231         if (RTVAL & (1 << 10)) { SET_CLIP2_FLAG(2); }
6232         if (RTVAL & (1 << 11)) { SET_CLIP2_FLAG(3); }
6233         if (RTVAL & (1 << 12)) { SET_CLIP2_FLAG(4); }
6234         if (RTVAL & (1 << 13)) { SET_CLIP2_FLAG(5); }
6235         if (RTVAL & (1 << 14)) { SET_CLIP2_FLAG(6); }
6236         if (RTVAL & (1 << 15)) { SET_CLIP2_FLAG(7); }
6237         break;
6238      case 2:
6239         CLEAR_CLIP1_FLAGS();
6240         rsp->vflag[2][0] = ((RTVAL >> 0) & 1) ? 0xffff : 0;
6241         rsp->vflag[2][1] = ((RTVAL >> 1) & 1) ? 0xffff : 0;
6242         rsp->vflag[2][2] = ((RTVAL >> 2) & 1) ? 0xffff : 0;
6243         rsp->vflag[2][3] = ((RTVAL >> 3) & 1) ? 0xffff : 0;
6244         rsp->vflag[2][4] = ((RTVAL >> 4) & 1) ? 0xffff : 0;
6245         rsp->vflag[2][5] = ((RTVAL >> 5) & 1) ? 0xffff : 0;
6246         rsp->vflag[2][6] = ((RTVAL >> 6) & 1) ? 0xffff : 0;
6247         rsp->vflag[2][7] = ((RTVAL >> 7) & 1) ? 0xffff : 0;
6248         if (RTVAL & (1 << 0)) { SET_CLIP1_FLAG(0); }
6249         if (RTVAL & (1 << 1)) { SET_CLIP1_FLAG(1); }
6250         if (RTVAL & (1 << 2)) { SET_CLIP1_FLAG(2); }
6251         if (RTVAL & (1 << 3)) { SET_CLIP1_FLAG(3); }
6252         if (RTVAL & (1 << 4)) { SET_CLIP1_FLAG(4); }
6253         if (RTVAL & (1 << 5)) { SET_CLIP1_FLAG(5); }
6254         if (RTVAL & (1 << 6)) { SET_CLIP1_FLAG(6); }
6255         if (RTVAL & (1 << 7)) { SET_CLIP1_FLAG(7); }
6256         break;
6257   }
59006258}
59016259
59026260/***************************************************************************
r25438r25439
59116269static void log_add_disasm_comment(rsp_state *rsp, drcuml_block *block, UINT32 pc, UINT32 op)
59126270{
59136271#if (LOG_UML)
5914   char buffer[100];
5915   rsp_dasm_one(buffer, pc, op);
5916   block->append_comment("%08X: %s", pc, buffer);                                  // comment
6272    char buffer[100];
6273    rsp_dasm_one(buffer, pc, op);
6274    block->append_comment("%08X: %s", pc, buffer);                                  // comment
59176275#endif
59186276}
59196277
59206278
59216279static CPU_SET_INFO( rsp )
59226280{
5923   rsp_state *rsp = get_safe_token(device);
6281    rsp_state *rsp = get_safe_token(device);
59246282
5925   switch (state)
5926   {
5927      /* --- the following bits of info are set as 64-bit signed integers --- */
5928      case CPUINFO_INT_PC:
5929      case CPUINFO_INT_REGISTER + RSP_PC:             rsp->pc = info->i;        break;
5930      case CPUINFO_INT_REGISTER + RSP_R0:             rsp->r[0] = info->i;        break;
5931      case CPUINFO_INT_REGISTER + RSP_R1:             rsp->r[1] = info->i;        break;
5932      case CPUINFO_INT_REGISTER + RSP_R2:             rsp->r[2] = info->i;        break;
5933      case CPUINFO_INT_REGISTER + RSP_R3:             rsp->r[3] = info->i;        break;
5934      case CPUINFO_INT_REGISTER + RSP_R4:             rsp->r[4] = info->i;        break;
5935      case CPUINFO_INT_REGISTER + RSP_R5:             rsp->r[5] = info->i;        break;
5936      case CPUINFO_INT_REGISTER + RSP_R6:             rsp->r[6] = info->i;        break;
5937      case CPUINFO_INT_REGISTER + RSP_R7:             rsp->r[7] = info->i;        break;
5938      case CPUINFO_INT_REGISTER + RSP_R8:             rsp->r[8] = info->i;        break;
5939      case CPUINFO_INT_REGISTER + RSP_R9:             rsp->r[9] = info->i;        break;
5940      case CPUINFO_INT_REGISTER + RSP_R10:            rsp->r[10] = info->i;        break;
5941      case CPUINFO_INT_REGISTER + RSP_R11:            rsp->r[11] = info->i;        break;
5942      case CPUINFO_INT_REGISTER + RSP_R12:            rsp->r[12] = info->i;        break;
5943      case CPUINFO_INT_REGISTER + RSP_R13:            rsp->r[13] = info->i;        break;
5944      case CPUINFO_INT_REGISTER + RSP_R14:            rsp->r[14] = info->i;        break;
5945      case CPUINFO_INT_REGISTER + RSP_R15:            rsp->r[15] = info->i;        break;
5946      case CPUINFO_INT_REGISTER + RSP_R16:            rsp->r[16] = info->i;        break;
5947      case CPUINFO_INT_REGISTER + RSP_R17:            rsp->r[17] = info->i;        break;
5948      case CPUINFO_INT_REGISTER + RSP_R18:            rsp->r[18] = info->i;        break;
5949      case CPUINFO_INT_REGISTER + RSP_R19:            rsp->r[19] = info->i;        break;
5950      case CPUINFO_INT_REGISTER + RSP_R20:            rsp->r[20] = info->i;        break;
5951      case CPUINFO_INT_REGISTER + RSP_R21:            rsp->r[21] = info->i;        break;
5952      case CPUINFO_INT_REGISTER + RSP_R22:            rsp->r[22] = info->i;        break;
5953      case CPUINFO_INT_REGISTER + RSP_R23:            rsp->r[23] = info->i;        break;
5954      case CPUINFO_INT_REGISTER + RSP_R24:            rsp->r[24] = info->i;        break;
5955      case CPUINFO_INT_REGISTER + RSP_R25:            rsp->r[25] = info->i;        break;
5956      case CPUINFO_INT_REGISTER + RSP_R26:            rsp->r[26] = info->i;        break;
5957      case CPUINFO_INT_REGISTER + RSP_R27:            rsp->r[27] = info->i;        break;
5958      case CPUINFO_INT_REGISTER + RSP_R28:            rsp->r[28] = info->i;        break;
5959      case CPUINFO_INT_REGISTER + RSP_R29:            rsp->r[29] = info->i;        break;
5960      case CPUINFO_INT_REGISTER + RSP_R30:            rsp->r[30] = info->i;        break;
5961      case CPUINFO_INT_SP:
5962      case CPUINFO_INT_REGISTER + RSP_R31:            rsp->r[31] = info->i;        break;
5963      case CPUINFO_INT_REGISTER + RSP_SR:             rsp->sr = info->i;           break;
5964      case CPUINFO_INT_REGISTER + RSP_NEXTPC:         rsp->nextpc = info->i;       break;
5965      case CPUINFO_INT_REGISTER + RSP_STEPCNT:        rsp->step_count = info->i;   break;
5966   }
6283    switch (state)
6284    {
6285        /* --- the following bits of info are set as 64-bit signed integers --- */
6286        case CPUINFO_INT_PC:
6287        case CPUINFO_INT_REGISTER + RSP_PC:             rsp->pc = info->i;        break;
6288        case CPUINFO_INT_REGISTER + RSP_R0:             rsp->r[0] = info->i;        break;
6289        case CPUINFO_INT_REGISTER + RSP_R1:             rsp->r[1] = info->i;        break;
6290        case CPUINFO_INT_REGISTER + RSP_R2:             rsp->r[2] = info->i;        break;
6291        case CPUINFO_INT_REGISTER + RSP_R3:             rsp->r[3] = info->i;        break;
6292        case CPUINFO_INT_REGISTER + RSP_R4:             rsp->r[4] = info->i;        break;
6293        case CPUINFO_INT_REGISTER + RSP_R5:             rsp->r[5] = info->i;        break;
6294        case CPUINFO_INT_REGISTER + RSP_R6:             rsp->r[6] = info->i;        break;
6295        case CPUINFO_INT_REGISTER + RSP_R7:             rsp->r[7] = info->i;        break;
6296        case CPUINFO_INT_REGISTER + RSP_R8:             rsp->r[8] = info->i;        break;
6297        case CPUINFO_INT_REGISTER + RSP_R9:             rsp->r[9] = info->i;        break;
6298        case CPUINFO_INT_REGISTER + RSP_R10:            rsp->r[10] = info->i;        break;
6299        case CPUINFO_INT_REGISTER + RSP_R11:            rsp->r[11] = info->i;        break;
6300        case CPUINFO_INT_REGISTER + RSP_R12:            rsp->r[12] = info->i;        break;
6301        case CPUINFO_INT_REGISTER + RSP_R13:            rsp->r[13] = info->i;        break;
6302        case CPUINFO_INT_REGISTER + RSP_R14:            rsp->r[14] = info->i;        break;
6303        case CPUINFO_INT_REGISTER + RSP_R15:            rsp->r[15] = info->i;        break;
6304        case CPUINFO_INT_REGISTER + RSP_R16:            rsp->r[16] = info->i;        break;
6305        case CPUINFO_INT_REGISTER + RSP_R17:            rsp->r[17] = info->i;        break;
6306        case CPUINFO_INT_REGISTER + RSP_R18:            rsp->r[18] = info->i;        break;
6307        case CPUINFO_INT_REGISTER + RSP_R19:            rsp->r[19] = info->i;        break;
6308        case CPUINFO_INT_REGISTER + RSP_R20:            rsp->r[20] = info->i;        break;
6309        case CPUINFO_INT_REGISTER + RSP_R21:            rsp->r[21] = info->i;        break;
6310        case CPUINFO_INT_REGISTER + RSP_R22:            rsp->r[22] = info->i;        break;
6311        case CPUINFO_INT_REGISTER + RSP_R23:            rsp->r[23] = info->i;        break;
6312        case CPUINFO_INT_REGISTER + RSP_R24:            rsp->r[24] = info->i;        break;
6313        case CPUINFO_INT_REGISTER + RSP_R25:            rsp->r[25] = info->i;        break;
6314        case CPUINFO_INT_REGISTER + RSP_R26:            rsp->r[26] = info->i;        break;
6315        case CPUINFO_INT_REGISTER + RSP_R27:            rsp->r[27] = info->i;        break;
6316        case CPUINFO_INT_REGISTER + RSP_R28:            rsp->r[28] = info->i;        break;
6317        case CPUINFO_INT_REGISTER + RSP_R29:            rsp->r[29] = info->i;        break;
6318        case CPUINFO_INT_REGISTER + RSP_R30:            rsp->r[30] = info->i;        break;
6319        case CPUINFO_INT_SP:
6320        case CPUINFO_INT_REGISTER + RSP_R31:            rsp->r[31] = info->i;        break;
6321        case CPUINFO_INT_REGISTER + RSP_SR:             rsp->sr = info->i;           break;
6322        case CPUINFO_INT_REGISTER + RSP_NEXTPC:         rsp->nextpc = info->i;       break;
6323        case CPUINFO_INT_REGISTER + RSP_STEPCNT:        rsp->step_count = info->i;   break;
6324    }
59676325}
59686326
59696327CPU_GET_INFO( rsp_drc )
59706328{
5971   rsp_state *rsp = (device != NULL && device->token() != NULL) ? get_safe_token(device) : NULL;
6329    rsp_state *rsp = (device != NULL && device->token() != NULL) ? get_safe_token(device) : NULL;
59726330
5973   switch(state)
5974   {
5975      /* --- the following bits of info are returned as 64-bit signed integers --- */
5976      case CPUINFO_INT_CONTEXT_SIZE:                  info->i = sizeof(rsp_state);                    break;
5977      case CPUINFO_INT_INPUT_LINES:                   info->i = 1;                            break;
5978      case CPUINFO_INT_DEFAULT_IRQ_VECTOR:            info->i = 0;                            break;
5979      case CPUINFO_INT_ENDIANNESS:                    info->i = ENDIANNESS_BIG;               break;
5980      case CPUINFO_INT_CLOCK_MULTIPLIER:              info->i = 1;                            break;
5981      case CPUINFO_INT_CLOCK_DIVIDER:                 info->i = 1;                            break;
5982      case CPUINFO_INT_MIN_INSTRUCTION_BYTES:         info->i = 4;                            break;
5983      case CPUINFO_INT_MAX_INSTRUCTION_BYTES:         info->i = 4;                            break;
5984      case CPUINFO_INT_MIN_CYCLES:                    info->i = 1;                            break;
5985      case CPUINFO_INT_MAX_CYCLES:                    info->i = 1;                            break;
6331    switch(state)
6332    {
6333        /* --- the following bits of info are returned as 64-bit signed integers --- */
6334        case CPUINFO_INT_CONTEXT_SIZE:                  info->i = sizeof(rsp_state);                    break;
6335        case CPUINFO_INT_INPUT_LINES:                   info->i = 1;                            break;
6336        case CPUINFO_INT_DEFAULT_IRQ_VECTOR:            info->i = 0;                            break;
6337        case CPUINFO_INT_ENDIANNESS:                    info->i = ENDIANNESS_BIG;               break;
6338        case CPUINFO_INT_CLOCK_MULTIPLIER:              info->i = 1;                            break;
6339        case CPUINFO_INT_CLOCK_DIVIDER:                 info->i = 1;                            break;
6340        case CPUINFO_INT_MIN_INSTRUCTION_BYTES:         info->i = 4;                            break;
6341        case CPUINFO_INT_MAX_INSTRUCTION_BYTES:         info->i = 4;                            break;
6342        case CPUINFO_INT_MIN_CYCLES:                    info->i = 1;                            break;
6343        case CPUINFO_INT_MAX_CYCLES:                    info->i = 1;                            break;
59866344
5987      case CPUINFO_INT_DATABUS_WIDTH + AS_PROGRAM:    info->i = 32;                   break;
5988      case CPUINFO_INT_ADDRBUS_WIDTH + AS_PROGRAM: info->i = 32;                  break;
5989      case CPUINFO_INT_ADDRBUS_SHIFT + AS_PROGRAM: info->i = 0;                   break;
5990      case CPUINFO_INT_DATABUS_WIDTH + AS_DATA:   info->i = 0;                    break;
5991      case CPUINFO_INT_ADDRBUS_WIDTH + AS_DATA:   info->i = 0;                    break;
5992      case CPUINFO_INT_ADDRBUS_SHIFT + AS_DATA:   info->i = 0;                    break;
5993      case CPUINFO_INT_DATABUS_WIDTH + AS_IO:     info->i = 0;                    break;
5994      case CPUINFO_INT_ADDRBUS_WIDTH + AS_IO:     info->i = 0;                    break;
5995      case CPUINFO_INT_ADDRBUS_SHIFT + AS_IO:     info->i = 0;                    break;
6345        case CPUINFO_INT_DATABUS_WIDTH + AS_PROGRAM:    info->i = 32;                   break;
6346        case CPUINFO_INT_ADDRBUS_WIDTH + AS_PROGRAM: info->i = 32;                  break;
6347        case CPUINFO_INT_ADDRBUS_SHIFT + AS_PROGRAM: info->i = 0;                   break;
6348        case CPUINFO_INT_DATABUS_WIDTH + AS_DATA:   info->i = 0;                    break;
6349        case CPUINFO_INT_ADDRBUS_WIDTH + AS_DATA:   info->i = 0;                    break;
6350        case CPUINFO_INT_ADDRBUS_SHIFT + AS_DATA:   info->i = 0;                    break;
6351        case CPUINFO_INT_DATABUS_WIDTH + AS_IO:     info->i = 0;                    break;
6352        case CPUINFO_INT_ADDRBUS_WIDTH + AS_IO:     info->i = 0;                    break;
6353        case CPUINFO_INT_ADDRBUS_SHIFT + AS_IO:     info->i = 0;                    break;
59966354
5997      case CPUINFO_INT_INPUT_STATE:                   info->i = CLEAR_LINE;                   break;
6355        case CPUINFO_INT_INPUT_STATE:                   info->i = CLEAR_LINE;                   break;
59986356
5999      case CPUINFO_INT_PREVIOUSPC:                    info->i = rsp->ppc | 0x04000000;                        break;
6357        case CPUINFO_INT_PREVIOUSPC:                    info->i = rsp->ppc | 0x04000000;                        break;
60006358
6001      case CPUINFO_INT_PC:    /* intentional fallthrough */
6002      case CPUINFO_INT_REGISTER + RSP_PC:             info->i = rsp->pc | 0x04000000;                     break;
6359        case CPUINFO_INT_PC:    /* intentional fallthrough */
6360        case CPUINFO_INT_REGISTER + RSP_PC:             info->i = rsp->pc | 0x04000000;                     break;
60036361
6004      case CPUINFO_INT_REGISTER + RSP_R0:             info->i = rsp->r[0];                        break;
6005      case CPUINFO_INT_REGISTER + RSP_R1:             info->i = rsp->r[1];                        break;
6006      case CPUINFO_INT_REGISTER + RSP_R2:             info->i = rsp->r[2];                        break;
6007      case CPUINFO_INT_REGISTER + RSP_R3:             info->i = rsp->r[3];                        break;
6008      case CPUINFO_INT_REGISTER + RSP_R4:             info->i = rsp->r[4];                        break;
6009      case CPUINFO_INT_REGISTER + RSP_R5:             info->i = rsp->r[5];                        break;
6010      case CPUINFO_INT_REGISTER + RSP_R6:             info->i = rsp->r[6];                        break;
6011      case CPUINFO_INT_REGISTER + RSP_R7:             info->i = rsp->r[7];                        break;
6012      case CPUINFO_INT_REGISTER + RSP_R8:             info->i = rsp->r[8];                        break;
6013      case CPUINFO_INT_REGISTER + RSP_R9:             info->i = rsp->r[9];                        break;
6014      case CPUINFO_INT_REGISTER + RSP_R10:            info->i = rsp->r[10];                   break;
6015      case CPUINFO_INT_REGISTER + RSP_R11:            info->i = rsp->r[11];                   break;
6016      case CPUINFO_INT_REGISTER + RSP_R12:            info->i = rsp->r[12];                   break;
6017      case CPUINFO_INT_REGISTER + RSP_R13:            info->i = rsp->r[13];                   break;
6018      case CPUINFO_INT_REGISTER + RSP_R14:            info->i = rsp->r[14];                   break;
6019      case CPUINFO_INT_REGISTER + RSP_R15:            info->i = rsp->r[15];                   break;
6020      case CPUINFO_INT_REGISTER + RSP_R16:            info->i = rsp->r[16];                   break;
6021      case CPUINFO_INT_REGISTER + RSP_R17:            info->i = rsp->r[17];                   break;
6022      case CPUINFO_INT_REGISTER + RSP_R18:            info->i = rsp->r[18];                   break;
6023      case CPUINFO_INT_REGISTER + RSP_R19:            info->i = rsp->r[19];                   break;
6024      case CPUINFO_INT_REGISTER + RSP_R20:            info->i = rsp->r[20];                   break;
6025      case CPUINFO_INT_REGISTER + RSP_R21:            info->i = rsp->r[21];                   break;
6026      case CPUINFO_INT_REGISTER + RSP_R22:            info->i = rsp->r[22];                   break;
6027      case CPUINFO_INT_REGISTER + RSP_R23:            info->i = rsp->r[23];                   break;
6028      case CPUINFO_INT_REGISTER + RSP_R24:            info->i = rsp->r[24];                   break;
6029      case CPUINFO_INT_REGISTER + RSP_R25:            info->i = rsp->r[25];                   break;
6030      case CPUINFO_INT_REGISTER + RSP_R26:            info->i = rsp->r[26];                   break;
6031      case CPUINFO_INT_REGISTER + RSP_R27:            info->i = rsp->r[27];                   break;
6032      case CPUINFO_INT_REGISTER + RSP_R28:            info->i = rsp->r[28];                   break;
6033      case CPUINFO_INT_REGISTER + RSP_R29:            info->i = rsp->r[29];                   break;
6034      case CPUINFO_INT_REGISTER + RSP_R30:            info->i = rsp->r[30];                   break;
6035      case CPUINFO_INT_SP:
6036      case CPUINFO_INT_REGISTER + RSP_R31:            info->i = rsp->r[31];                    break;
6037      case CPUINFO_INT_REGISTER + RSP_SR:             info->i = rsp->sr;                       break;
6038      case CPUINFO_INT_REGISTER + RSP_NEXTPC:         info->i = rsp->nextpc | 0x04000000;      break;
6039      case CPUINFO_INT_REGISTER + RSP_STEPCNT:        info->i = rsp->step_count;               break;
6362        case CPUINFO_INT_REGISTER + RSP_R0:             info->i = rsp->r[0];                        break;
6363        case CPUINFO_INT_REGISTER + RSP_R1:             info->i = rsp->r[1];                        break;
6364        case CPUINFO_INT_REGISTER + RSP_R2:             info->i = rsp->r[2];                        break;
6365        case CPUINFO_INT_REGISTER + RSP_R3:             info->i = rsp->r[3];                        break;
6366        case CPUINFO_INT_REGISTER + RSP_R4:             info->i = rsp->r[4];                        break;
6367        case CPUINFO_INT_REGISTER + RSP_R5:             info->i = rsp->r[5];                        break;
6368        case CPUINFO_INT_REGISTER + RSP_R6:             info->i = rsp->r[6];                        break;
6369        case CPUINFO_INT_REGISTER + RSP_R7:             info->i = rsp->r[7];                        break;
6370        case CPUINFO_INT_REGISTER + RSP_R8:             info->i = rsp->r[8];                        break;
6371        case CPUINFO_INT_REGISTER + RSP_R9:             info->i = rsp->r[9];                        break;
6372        case CPUINFO_INT_REGISTER + RSP_R10:            info->i = rsp->r[10];                   break;
6373        case CPUINFO_INT_REGISTER + RSP_R11:            info->i = rsp->r[11];                   break;
6374        case CPUINFO_INT_REGISTER + RSP_R12:            info->i = rsp->r[12];                   break;
6375        case CPUINFO_INT_REGISTER + RSP_R13:            info->i = rsp->r[13];                   break;
6376        case CPUINFO_INT_REGISTER + RSP_R14:            info->i = rsp->r[14];                   break;
6377        case CPUINFO_INT_REGISTER + RSP_R15:            info->i = rsp->r[15];                   break;
6378        case CPUINFO_INT_REGISTER + RSP_R16:            info->i = rsp->r[16];                   break;
6379        case CPUINFO_INT_REGISTER + RSP_R17:            info->i = rsp->r[17];                   break;
6380        case CPUINFO_INT_REGISTER + RSP_R18:            info->i = rsp->r[18];                   break;
6381        case CPUINFO_INT_REGISTER + RSP_R19:            info->i = rsp->r[19];                   break;
6382        case CPUINFO_INT_REGISTER + RSP_R20:            info->i = rsp->r[20];                   break;
6383        case CPUINFO_INT_REGISTER + RSP_R21:            info->i = rsp->r[21];                   break;
6384        case CPUINFO_INT_REGISTER + RSP_R22:            info->i = rsp->r[22];                   break;
6385        case CPUINFO_INT_REGISTER + RSP_R23:            info->i = rsp->r[23];                   break;
6386        case CPUINFO_INT_REGISTER + RSP_R24:            info->i = rsp->r[24];                   break;
6387        case CPUINFO_INT_REGISTER + RSP_R25:            info->i = rsp->r[25];                   break;
6388        case CPUINFO_INT_REGISTER + RSP_R26:            info->i = rsp->r[26];                   break;
6389        case CPUINFO_INT_REGISTER + RSP_R27:            info->i = rsp->r[27];                   break;
6390        case CPUINFO_INT_REGISTER + RSP_R28:            info->i = rsp->r[28];                   break;
6391        case CPUINFO_INT_REGISTER + RSP_R29:            info->i = rsp->r[29];                   break;
6392        case CPUINFO_INT_REGISTER + RSP_R30:            info->i = rsp->r[30];                   break;
6393        case CPUINFO_INT_SP:
6394        case CPUINFO_INT_REGISTER + RSP_R31:            info->i = rsp->r[31];                    break;
6395        case CPUINFO_INT_REGISTER + RSP_SR:             info->i = rsp->sr;                       break;
6396        case CPUINFO_INT_REGISTER + RSP_NEXTPC:         info->i = rsp->nextpc | 0x04000000;      break;
6397        case CPUINFO_INT_REGISTER + RSP_STEPCNT:        info->i = rsp->step_count;               break;
60406398
6041      /* --- the following bits of info are returned as pointers to data or functions --- */
6042      case CPUINFO_FCT_SET_INFO:                      info->setinfo = CPU_SET_INFO_NAME(rsp);         break;
6043      case CPUINFO_FCT_INIT:                          info->init = CPU_INIT_NAME(rsp);                    break;
6044      case CPUINFO_FCT_RESET:                         info->reset = CPU_RESET_NAME(rsp);              break;
6045      case CPUINFO_FCT_EXIT:                          info->exit = CPU_EXIT_NAME(rsp);                    break;
6046      case CPUINFO_FCT_EXECUTE:                       info->execute = CPU_EXECUTE_NAME(rsp);          break;
6047      case CPUINFO_FCT_BURN:                          info->burn = NULL;                      break;
6048      case CPUINFO_FCT_DISASSEMBLE:                   info->disassemble = CPU_DISASSEMBLE_NAME(rsp);          break;
6049      case CPUINFO_PTR_INSTRUCTION_COUNTER:           info->icount = &rsp->icount;                break;
6399        /* --- the following bits of info are returned as pointers to data or functions --- */
6400        case CPUINFO_FCT_SET_INFO:                      info->setinfo = CPU_SET_INFO_NAME(rsp);         break;
6401        case CPUINFO_FCT_INIT:                          info->init = CPU_INIT_NAME(rsp);                    break;
6402        case CPUINFO_FCT_RESET:                         info->reset = CPU_RESET_NAME(rsp);              break;
6403        case CPUINFO_FCT_EXIT:                          info->exit = CPU_EXIT_NAME(rsp);                    break;
6404        case CPUINFO_FCT_EXECUTE:                       info->execute = CPU_EXECUTE_NAME(rsp);          break;
6405        case CPUINFO_FCT_BURN:                          info->burn = NULL;                      break;
6406        case CPUINFO_FCT_DISASSEMBLE:                   info->disassemble = CPU_DISASSEMBLE_NAME(rsp);          break;
6407        case CPUINFO_PTR_INSTRUCTION_COUNTER:           info->icount = &rsp->icount;                break;
60506408
6051      /* --- the following bits of info are returned as NULL-terminated strings --- */
6052      case CPUINFO_STR_NAME:                          strcpy(info->s, "RSP DRC");                 break;
6053      case CPUINFO_STR_SHORTNAME:                     strcpy(info->s, "rsp_drc");                 break;
6054      case CPUINFO_STR_FAMILY:                    strcpy(info->s, "RSP");                 break;
6055      case CPUINFO_STR_VERSION:                   strcpy(info->s, "1.0");                 break;
6056      case CPUINFO_STR_SOURCE_FILE:                       strcpy(info->s, __FILE__);              break;
6057      case CPUINFO_STR_CREDITS:                   strcpy(info->s, "Copyright Nicola Salmoria and the MAME Team"); break;
6409        /* --- the following bits of info are returned as NULL-terminated strings --- */
6410        case CPUINFO_STR_NAME:                          strcpy(info->s, "RSP DRC");                 break;
6411        case CPUINFO_STR_SHORTNAME:                     strcpy(info->s, "rsp_drc");                 break;
6412        case CPUINFO_STR_FAMILY:                    strcpy(info->s, "RSP");                 break;
6413        case CPUINFO_STR_VERSION:                   strcpy(info->s, "1.0");                 break;
6414        case CPUINFO_STR_SOURCE_FILE:                       strcpy(info->s, __FILE__);              break;
6415        case CPUINFO_STR_CREDITS:                   strcpy(info->s, "Copyright Nicola Salmoria and the MAME Team"); break;
60586416
6059      case CPUINFO_STR_FLAGS:                         strcpy(info->s, " ");                   break;
6417        case CPUINFO_STR_FLAGS:                         strcpy(info->s, " ");                   break;
60606418
6061      case CPUINFO_STR_REGISTER + RSP_PC:             sprintf(info->s, "PC: %08X", rsp->pc | 0x04000000); break;
6419        case CPUINFO_STR_REGISTER + RSP_PC:             sprintf(info->s, "PC: %08X", rsp->pc | 0x04000000); break;
60626420
6063      case CPUINFO_STR_REGISTER + RSP_R0:             sprintf(info->s, "R0: %08X", rsp->r[0]); break;
6064      case CPUINFO_STR_REGISTER + RSP_R1:             sprintf(info->s, "R1: %08X", rsp->r[1]); break;
6065      case CPUINFO_STR_REGISTER + RSP_R2:             sprintf(info->s, "R2: %08X", rsp->r[2]); break;
6066      case CPUINFO_STR_REGISTER + RSP_R3:             sprintf(info->s, "R3: %08X", rsp->r[3]); break;
6067      case CPUINFO_STR_REGISTER + RSP_R4:             sprintf(info->s, "R4: %08X", rsp->r[4]); break;
6068      case CPUINFO_STR_REGISTER + RSP_R5:             sprintf(info->s, "R5: %08X", rsp->r[5]); break;
6069      case CPUINFO_STR_REGISTER + RSP_R6:             sprintf(info->s, "R6: %08X", rsp->r[6]); break;
6070      case CPUINFO_STR_REGISTER + RSP_R7:             sprintf(info->s, "R7: %08X", rsp->r[7]); break;
6071      case CPUINFO_STR_REGISTER + RSP_R8:             sprintf(info->s, "R8: %08X", rsp->r[8]); break;
6072      case CPUINFO_STR_REGISTER + RSP_R9:             sprintf(info->s, "R9: %08X", rsp->r[9]); break;
6073      case CPUINFO_STR_REGISTER + RSP_R10:            sprintf(info->s, "R10: %08X", rsp->r[10]); break;
6074      case CPUINFO_STR_REGISTER + RSP_R11:            sprintf(info->s, "R11: %08X", rsp->r[11]); break;
6075      case CPUINFO_STR_REGISTER + RSP_R12:            sprintf(info->s, "R12: %08X", rsp->r[12]); break;
6076      case CPUINFO_STR_REGISTER + RSP_R13:            sprintf(info->s, "R13: %08X", rsp->r[13]); break;
6077      case CPUINFO_STR_REGISTER + RSP_R14:            sprintf(info->s, "R14: %08X", rsp->r[14]); break;
6078      case CPUINFO_STR_REGISTER + RSP_R15:            sprintf(info->s, "R15: %08X", rsp->r[15]); break;
6079      case CPUINFO_STR_REGISTER + RSP_R16:            sprintf(info->s, "R16: %08X", rsp->r[16]); break;
6080      case CPUINFO_STR_REGISTER + RSP_R17:            sprintf(info->s, "R17: %08X", rsp->r[17]); break;
6081      case CPUINFO_STR_REGISTER + RSP_R18:            sprintf(info->s, "R18: %08X", rsp->r[18]); break;
6082      case CPUINFO_STR_REGISTER + RSP_R19:            sprintf(info->s, "R19: %08X", rsp->r[19]); break;
6083      case CPUINFO_STR_REGISTER + RSP_R20:            sprintf(info->s, "R20: %08X", rsp->r[20]); break;
6084      case CPUINFO_STR_REGISTER + RSP_R21:            sprintf(info->s, "R21: %08X", rsp->r[21]); break;
6085      case CPUINFO_STR_REGISTER + RSP_R22:            sprintf(info->s, "R22: %08X", rsp->r[22]); break;
6086      case CPUINFO_STR_REGISTER + RSP_R23:            sprintf(info->s, "R23: %08X", rsp->r[23]); break;
6087      case CPUINFO_STR_REGISTER + RSP_R24:            sprintf(info->s, "R24: %08X", rsp->r[24]); break;
6088      case CPUINFO_STR_REGISTER + RSP_R25:            sprintf(info->s, "R25: %08X", rsp->r[25]); break;
6089      case CPUINFO_STR_REGISTER + RSP_R26:            sprintf(info->s, "R26: %08X", rsp->r[26]); break;
6090      case CPUINFO_STR_REGISTER + RSP_R27:            sprintf(info->s, "R27: %08X", rsp->r[27]); break;
6091      case CPUINFO_STR_REGISTER + RSP_R28:            sprintf(info->s, "R28: %08X", rsp->r[28]); break;
6092      case CPUINFO_STR_REGISTER + RSP_R29:            sprintf(info->s, "R29: %08X", rsp->r[29]); break;
6093      case CPUINFO_STR_REGISTER + RSP_R30:            sprintf(info->s, "R30: %08X", rsp->r[30]); break;
6094      case CPUINFO_STR_REGISTER + RSP_R31:            sprintf(info->s, "R31: %08X", rsp->r[31]); break;
6421        case CPUINFO_STR_REGISTER + RSP_R0:             sprintf(info->s, "R0: %08X", rsp->r[0]); break;
6422        case CPUINFO_STR_REGISTER + RSP_R1:             sprintf(info->s, "R1: %08X", rsp->r[1]); break;
6423        case CPUINFO_STR_REGISTER + RSP_R2:             sprintf(info->s, "R2: %08X", rsp->r[2]); break;
6424        case CPUINFO_STR_REGISTER + RSP_R3:             sprintf(info->s, "R3: %08X", rsp->r[3]); break;
6425        case CPUINFO_STR_REGISTER + RSP_R4:             sprintf(info->s, "R4: %08X", rsp->r[4]); break;
6426        case CPUINFO_STR_REGISTER + RSP_R5:             sprintf(info->s, "R5: %08X", rsp->r[5]); break;
6427        case CPUINFO_STR_REGISTER + RSP_R6:             sprintf(info->s, "R6: %08X", rsp->r[6]); break;
6428        case CPUINFO_STR_REGISTER + RSP_R7:             sprintf(info->s, "R7: %08X", rsp->r[7]); break;
6429        case CPUINFO_STR_REGISTER + RSP_R8:             sprintf(info->s, "R8: %08X", rsp->r[8]); break;
6430        case CPUINFO_STR_REGISTER + RSP_R9:             sprintf(info->s, "R9: %08X", rsp->r[9]); break;
6431        case CPUINFO_STR_REGISTER + RSP_R10:            sprintf(info->s, "R10: %08X", rsp->r[10]); break;
6432        case CPUINFO_STR_REGISTER + RSP_R11:            sprintf(info->s, "R11: %08X", rsp->r[11]); break;
6433        case CPUINFO_STR_REGISTER + RSP_R12:            sprintf(info->s, "R12: %08X", rsp->r[12]); break;
6434        case CPUINFO_STR_REGISTER + RSP_R13:            sprintf(info->s, "R13: %08X", rsp->r[13]); break;
6435        case CPUINFO_STR_REGISTER + RSP_R14:            sprintf(info->s, "R14: %08X", rsp->r[14]); break;
6436        case CPUINFO_STR_REGISTER + RSP_R15:            sprintf(info->s, "R15: %08X", rsp->r[15]); break;
6437        case CPUINFO_STR_REGISTER + RSP_R16:            sprintf(info->s, "R16: %08X", rsp->r[16]); break;
6438        case CPUINFO_STR_REGISTER + RSP_R17:            sprintf(info->s, "R17: %08X", rsp->r[17]); break;
6439        case CPUINFO_STR_REGISTER + RSP_R18:            sprintf(info->s, "R18: %08X", rsp->r[18]); break;
6440        case CPUINFO_STR_REGISTER + RSP_R19:            sprintf(info->s, "R19: %08X", rsp->r[19]); break;
6441        case CPUINFO_STR_REGISTER + RSP_R20:            sprintf(info->s, "R20: %08X", rsp->r[20]); break;
6442        case CPUINFO_STR_REGISTER + RSP_R21:            sprintf(info->s, "R21: %08X", rsp->r[21]); break;
6443        case CPUINFO_STR_REGISTER + RSP_R22:            sprintf(info->s, "R22: %08X", rsp->r[22]); break;
6444        case CPUINFO_STR_REGISTER + RSP_R23:            sprintf(info->s, "R23: %08X", rsp->r[23]); break;
6445        case CPUINFO_STR_REGISTER + RSP_R24:            sprintf(info->s, "R24: %08X", rsp->r[24]); break;
6446        case CPUINFO_STR_REGISTER + RSP_R25:            sprintf(info->s, "R25: %08X", rsp->r[25]); break;
6447        case CPUINFO_STR_REGISTER + RSP_R26:            sprintf(info->s, "R26: %08X", rsp->r[26]); break;
6448        case CPUINFO_STR_REGISTER + RSP_R27:            sprintf(info->s, "R27: %08X", rsp->r[27]); break;
6449        case CPUINFO_STR_REGISTER + RSP_R28:            sprintf(info->s, "R28: %08X", rsp->r[28]); break;
6450        case CPUINFO_STR_REGISTER + RSP_R29:            sprintf(info->s, "R29: %08X", rsp->r[29]); break;
6451        case CPUINFO_STR_REGISTER + RSP_R30:            sprintf(info->s, "R30: %08X", rsp->r[30]); break;
6452        case CPUINFO_STR_REGISTER + RSP_R31:            sprintf(info->s, "R31: %08X", rsp->r[31]); break;
60956453
60966454#if USE_SIMD
6097      case CPUINFO_STR_REGISTER + RSP_V0:             sprintf(info->s, "V0: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 0], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 0], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 0], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 0], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 0], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 0], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 0], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 0], 0)); break;
6098      case CPUINFO_STR_REGISTER + RSP_V1:             sprintf(info->s, "V1: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 1], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 1], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 1], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 1], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 1], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 1], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 1], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 1], 0)); break;
6099      case CPUINFO_STR_REGISTER + RSP_V2:             sprintf(info->s, "V2: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 2], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 2], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 2], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 2], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 2], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 2], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 2], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 2], 0)); break;
6100      case CPUINFO_STR_REGISTER + RSP_V3:             sprintf(info->s, "V3: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 3], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 3], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 3], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 3], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 3], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 3], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 3], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 3], 0)); break;
6101      case CPUINFO_STR_REGISTER + RSP_V4:             sprintf(info->s, "V4: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 4], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 4], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 4], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 4], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 4], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 4], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 4], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 4], 0)); break;
6102      case CPUINFO_STR_REGISTER + RSP_V5:             sprintf(info->s, "V5: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 5], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 5], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 5], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 5], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 5], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 5], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 5], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 5], 0)); break;
6103      case CPUINFO_STR_REGISTER + RSP_V6:             sprintf(info->s, "V6: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 6], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 6], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 6], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 6], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 6], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 6], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 6], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 6], 0)); break;
6104      case CPUINFO_STR_REGISTER + RSP_V7:             sprintf(info->s, "V7: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 7], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 7], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 7], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 7], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 7], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 7], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 7], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 7], 0)); break;
6105      case CPUINFO_STR_REGISTER + RSP_V8:             sprintf(info->s, "V8: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 8], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 8], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 8], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 8], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 8], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 8], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 8], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 8], 0)); break;
6106      case CPUINFO_STR_REGISTER + RSP_V9:             sprintf(info->s, "V9: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 9], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 9], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 9], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 9], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 9], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 9], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 9], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 9], 0)); break;
6107      case CPUINFO_STR_REGISTER + RSP_V10:            sprintf(info->s, "V10: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[10], 7), (UINT16)_mm_extract_epi16(rsp->xv[10], 6), (UINT16)_mm_extract_epi16(rsp->xv[10], 5), (UINT16)_mm_extract_epi16(rsp->xv[10], 4), (UINT16)_mm_extract_epi16(rsp->xv[10], 3), (UINT16)_mm_extract_epi16(rsp->xv[10], 2), (UINT16)_mm_extract_epi16(rsp->xv[10], 1), (UINT16)_mm_extract_epi16(rsp->xv[10], 0)); break;
6108      case CPUINFO_STR_REGISTER + RSP_V11:            sprintf(info->s, "V11: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[11], 7), (UINT16)_mm_extract_epi16(rsp->xv[11], 6), (UINT16)_mm_extract_epi16(rsp->xv[11], 5), (UINT16)_mm_extract_epi16(rsp->xv[11], 4), (UINT16)_mm_extract_epi16(rsp->xv[11], 3), (UINT16)_mm_extract_epi16(rsp->xv[11], 2), (UINT16)_mm_extract_epi16(rsp->xv[11], 1), (UINT16)_mm_extract_epi16(rsp->xv[11], 0)); break;
6109      case CPUINFO_STR_REGISTER + RSP_V12:            sprintf(info->s, "V12: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[12], 7), (UINT16)_mm_extract_epi16(rsp->xv[12], 6), (UINT16)_mm_extract_epi16(rsp->xv[12], 5), (UINT16)_mm_extract_epi16(rsp->xv[12], 4), (UINT16)_mm_extract_epi16(rsp->xv[12], 3), (UINT16)_mm_extract_epi16(rsp->xv[12], 2), (UINT16)_mm_extract_epi16(rsp->xv[12], 1), (UINT16)_mm_extract_epi16(rsp->xv[12], 0)); break;
6110      case CPUINFO_STR_REGISTER + RSP_V13:            sprintf(info->s, "V13: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[13], 7), (UINT16)_mm_extract_epi16(rsp->xv[13], 6), (UINT16)_mm_extract_epi16(rsp->xv[13], 5), (UINT16)_mm_extract_epi16(rsp->xv[13], 4), (UINT16)_mm_extract_epi16(rsp->xv[13], 3), (UINT16)_mm_extract_epi16(rsp->xv[13], 2), (UINT16)_mm_extract_epi16(rsp->xv[13], 1), (UINT16)_mm_extract_epi16(rsp->xv[13], 0)); break;
6111      case CPUINFO_STR_REGISTER + RSP_V14:            sprintf(info->s, "V14: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[14], 7), (UINT16)_mm_extract_epi16(rsp->xv[14], 6), (UINT16)_mm_extract_epi16(rsp->xv[14], 5), (UINT16)_mm_extract_epi16(rsp->xv[14], 4), (UINT16)_mm_extract_epi16(rsp->xv[14], 3), (UINT16)_mm_extract_epi16(rsp->xv[14], 2), (UINT16)_mm_extract_epi16(rsp->xv[14], 1), (UINT16)_mm_extract_epi16(rsp->xv[14], 0)); break;
6112      case CPUINFO_STR_REGISTER + RSP_V15:            sprintf(info->s, "V15: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[15], 7), (UINT16)_mm_extract_epi16(rsp->xv[15], 6), (UINT16)_mm_extract_epi16(rsp->xv[15], 5), (UINT16)_mm_extract_epi16(rsp->xv[15], 4), (UINT16)_mm_extract_epi16(rsp->xv[15], 3), (UINT16)_mm_extract_epi16(rsp->xv[15], 2), (UINT16)_mm_extract_epi16(rsp->xv[15], 1), (UINT16)_mm_extract_epi16(rsp->xv[15], 0)); break;
6113      case CPUINFO_STR_REGISTER + RSP_V16:            sprintf(info->s, "V16: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[16], 7), (UINT16)_mm_extract_epi16(rsp->xv[16], 6), (UINT16)_mm_extract_epi16(rsp->xv[16], 5), (UINT16)_mm_extract_epi16(rsp->xv[16], 4), (UINT16)_mm_extract_epi16(rsp->xv[16], 3), (UINT16)_mm_extract_epi16(rsp->xv[16], 2), (UINT16)_mm_extract_epi16(rsp->xv[16], 1), (UINT16)_mm_extract_epi16(rsp->xv[16], 0)); break;
6114      case CPUINFO_STR_REGISTER + RSP_V17:            sprintf(info->s, "V17: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[17], 7), (UINT16)_mm_extract_epi16(rsp->xv[17], 6), (UINT16)_mm_extract_epi16(rsp->xv[17], 5), (UINT16)_mm_extract_epi16(rsp->xv[17], 4), (UINT16)_mm_extract_epi16(rsp->xv[17], 3), (UINT16)_mm_extract_epi16(rsp->xv[17], 2), (UINT16)_mm_extract_epi16(rsp->xv[17], 1), (UINT16)_mm_extract_epi16(rsp->xv[17], 0)); break;
6115      case CPUINFO_STR_REGISTER + RSP_V18:            sprintf(info->s, "V18: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[18], 7), (UINT16)_mm_extract_epi16(rsp->xv[18], 6), (UINT16)_mm_extract_epi16(rsp->xv[18], 5), (UINT16)_mm_extract_epi16(rsp->xv[18], 4), (UINT16)_mm_extract_epi16(rsp->xv[18], 3), (UINT16)_mm_extract_epi16(rsp->xv[18], 2), (UINT16)_mm_extract_epi16(rsp->xv[18], 1), (UINT16)_mm_extract_epi16(rsp->xv[18], 0)); break;
6116      case CPUINFO_STR_REGISTER + RSP_V19:            sprintf(info->s, "V19: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[19], 7), (UINT16)_mm_extract_epi16(rsp->xv[19], 6), (UINT16)_mm_extract_epi16(rsp->xv[19], 5), (UINT16)_mm_extract_epi16(rsp->xv[19], 4), (UINT16)_mm_extract_epi16(rsp->xv[19], 3), (UINT16)_mm_extract_epi16(rsp->xv[19], 2), (UINT16)_mm_extract_epi16(rsp->xv[19], 1), (UINT16)_mm_extract_epi16(rsp->xv[19], 0)); break;
6117      case CPUINFO_STR_REGISTER + RSP_V20:            sprintf(info->s, "V20: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[20], 7), (UINT16)_mm_extract_epi16(rsp->xv[20], 6), (UINT16)_mm_extract_epi16(rsp->xv[20], 5), (UINT16)_mm_extract_epi16(rsp->xv[20], 4), (UINT16)_mm_extract_epi16(rsp->xv[20], 3), (UINT16)_mm_extract_epi16(rsp->xv[20], 2), (UINT16)_mm_extract_epi16(rsp->xv[20], 1), (UINT16)_mm_extract_epi16(rsp->xv[20], 0)); break;
6118      case CPUINFO_STR_REGISTER + RSP_V21:            sprintf(info->s, "V21: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[21], 7), (UINT16)_mm_extract_epi16(rsp->xv[21], 6), (UINT16)_mm_extract_epi16(rsp->xv[21], 5), (UINT16)_mm_extract_epi16(rsp->xv[21], 4), (UINT16)_mm_extract_epi16(rsp->xv[21], 3), (UINT16)_mm_extract_epi16(rsp->xv[21], 2), (UINT16)_mm_extract_epi16(rsp->xv[21], 1), (UINT16)_mm_extract_epi16(rsp->xv[21], 0)); break;
6119      case CPUINFO_STR_REGISTER + RSP_V22:            sprintf(info->s, "V22: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[22], 7), (UINT16)_mm_extract_epi16(rsp->xv[22], 6), (UINT16)_mm_extract_epi16(rsp->xv[22], 5), (UINT16)_mm_extract_epi16(rsp->xv[22], 4), (UINT16)_mm_extract_epi16(rsp->xv[22], 3), (UINT16)_mm_extract_epi16(rsp->xv[22], 2), (UINT16)_mm_extract_epi16(rsp->xv[22], 1), (UINT16)_mm_extract_epi16(rsp->xv[22], 0)); break;
6120      case CPUINFO_STR_REGISTER + RSP_V23:            sprintf(info->s, "V23: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[23], 7), (UINT16)_mm_extract_epi16(rsp->xv[23], 6), (UINT16)_mm_extract_epi16(rsp->xv[23], 5), (UINT16)_mm_extract_epi16(rsp->xv[23], 4), (UINT16)_mm_extract_epi16(rsp->xv[23], 3), (UINT16)_mm_extract_epi16(rsp->xv[23], 2), (UINT16)_mm_extract_epi16(rsp->xv[23], 1), (UINT16)_mm_extract_epi16(rsp->xv[23], 0)); break;
6121      case CPUINFO_STR_REGISTER + RSP_V24:            sprintf(info->s, "V24: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[24], 7), (UINT16)_mm_extract_epi16(rsp->xv[24], 6), (UINT16)_mm_extract_epi16(rsp->xv[24], 5), (UINT16)_mm_extract_epi16(rsp->xv[24], 4), (UINT16)_mm_extract_epi16(rsp->xv[24], 3), (UINT16)_mm_extract_epi16(rsp->xv[24], 2), (UINT16)_mm_extract_epi16(rsp->xv[24], 1), (UINT16)_mm_extract_epi16(rsp->xv[24], 0)); break;
6122      case CPUINFO_STR_REGISTER + RSP_V25:            sprintf(info->s, "V25: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[25], 7), (UINT16)_mm_extract_epi16(rsp->xv[25], 6), (UINT16)_mm_extract_epi16(rsp->xv[25], 5), (UINT16)_mm_extract_epi16(rsp->xv[25], 4), (UINT16)_mm_extract_epi16(rsp->xv[25], 3), (UINT16)_mm_extract_epi16(rsp->xv[25], 2), (UINT16)_mm_extract_epi16(rsp->xv[25], 1), (UINT16)_mm_extract_epi16(rsp->xv[25], 0)); break;
6123      case CPUINFO_STR_REGISTER + RSP_V26:            sprintf(info->s, "V26: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[26], 7), (UINT16)_mm_extract_epi16(rsp->xv[26], 6), (UINT16)_mm_extract_epi16(rsp->xv[26], 5), (UINT16)_mm_extract_epi16(rsp->xv[26], 4), (UINT16)_mm_extract_epi16(rsp->xv[26], 3), (UINT16)_mm_extract_epi16(rsp->xv[26], 2), (UINT16)_mm_extract_epi16(rsp->xv[26], 1), (UINT16)_mm_extract_epi16(rsp->xv[26], 0)); break;
6124      case CPUINFO_STR_REGISTER + RSP_V27:            sprintf(info->s, "V27: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[27], 7), (UINT16)_mm_extract_epi16(rsp->xv[27], 6), (UINT16)_mm_extract_epi16(rsp->xv[27], 5), (UINT16)_mm_extract_epi16(rsp->xv[27], 4), (UINT16)_mm_extract_epi16(rsp->xv[27], 3), (UINT16)_mm_extract_epi16(rsp->xv[27], 2), (UINT16)_mm_extract_epi16(rsp->xv[27], 1), (UINT16)_mm_extract_epi16(rsp->xv[27], 0)); break;
6125      case CPUINFO_STR_REGISTER + RSP_V28:            sprintf(info->s, "V28: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[28], 7), (UINT16)_mm_extract_epi16(rsp->xv[28], 6), (UINT16)_mm_extract_epi16(rsp->xv[28], 5), (UINT16)_mm_extract_epi16(rsp->xv[28], 4), (UINT16)_mm_extract_epi16(rsp->xv[28], 3), (UINT16)_mm_extract_epi16(rsp->xv[28], 2), (UINT16)_mm_extract_epi16(rsp->xv[28], 1), (UINT16)_mm_extract_epi16(rsp->xv[28], 0)); break;
6126      case CPUINFO_STR_REGISTER + RSP_V29:            sprintf(info->s, "V29: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[29], 7), (UINT16)_mm_extract_epi16(rsp->xv[29], 6), (UINT16)_mm_extract_epi16(rsp->xv[29], 5), (UINT16)_mm_extract_epi16(rsp->xv[29], 4), (UINT16)_mm_extract_epi16(rsp->xv[29], 3), (UINT16)_mm_extract_epi16(rsp->xv[29], 2), (UINT16)_mm_extract_epi16(rsp->xv[29], 1), (UINT16)_mm_extract_epi16(rsp->xv[29], 0)); break;
6127      case CPUINFO_STR_REGISTER + RSP_V30:            sprintf(info->s, "V30: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[30], 7), (UINT16)_mm_extract_epi16(rsp->xv[30], 6), (UINT16)_mm_extract_epi16(rsp->xv[30], 5), (UINT16)_mm_extract_epi16(rsp->xv[30], 4), (UINT16)_mm_extract_epi16(rsp->xv[30], 3), (UINT16)_mm_extract_epi16(rsp->xv[30], 2), (UINT16)_mm_extract_epi16(rsp->xv[30], 1), (UINT16)_mm_extract_epi16(rsp->xv[30], 0)); break;
6128      case CPUINFO_STR_REGISTER + RSP_V31:            sprintf(info->s, "V31: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[31], 7), (UINT16)_mm_extract_epi16(rsp->xv[31], 6), (UINT16)_mm_extract_epi16(rsp->xv[31], 5), (UINT16)_mm_extract_epi16(rsp->xv[31], 4), (UINT16)_mm_extract_epi16(rsp->xv[31], 3), (UINT16)_mm_extract_epi16(rsp->xv[31], 2), (UINT16)_mm_extract_epi16(rsp->xv[31], 1), (UINT16)_mm_extract_epi16(rsp->xv[31], 0)); break;
6455        case CPUINFO_STR_REGISTER + RSP_V0:             sprintf(info->s, "V0: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 0], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 0], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 0], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 0], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 0], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 0], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 0], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 0], 0)); break;
6456        case CPUINFO_STR_REGISTER + RSP_V1:             sprintf(info->s, "V1: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 1], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 1], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 1], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 1], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 1], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 1], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 1], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 1], 0)); break;
6457        case CPUINFO_STR_REGISTER + RSP_V2:             sprintf(info->s, "V2: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 2], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 2], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 2], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 2], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 2], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 2], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 2], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 2], 0)); break;
6458        case CPUINFO_STR_REGISTER + RSP_V3:             sprintf(info->s, "V3: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 3], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 3], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 3], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 3], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 3], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 3], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 3], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 3], 0)); break;
6459        case CPUINFO_STR_REGISTER + RSP_V4:             sprintf(info->s, "V4: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 4], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 4], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 4], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 4], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 4], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 4], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 4], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 4], 0)); break;
6460        case CPUINFO_STR_REGISTER + RSP_V5:             sprintf(info->s, "V5: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 5], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 5], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 5], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 5], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 5], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 5], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 5], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 5], 0)); break;
6461        case CPUINFO_STR_REGISTER + RSP_V6:             sprintf(info->s, "V6: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 6], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 6], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 6], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 6], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 6], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 6], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 6], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 6], 0)); break;
6462        case CPUINFO_STR_REGISTER + RSP_V7:             sprintf(info->s, "V7: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 7], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 7], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 7], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 7], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 7], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 7], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 7], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 7], 0)); break;
6463        case CPUINFO_STR_REGISTER + RSP_V8:             sprintf(info->s, "V8: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 8], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 8], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 8], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 8], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 8], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 8], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 8], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 8], 0)); break;
6464        case CPUINFO_STR_REGISTER + RSP_V9:             sprintf(info->s, "V9: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)_mm_extract_epi16(rsp->xv[ 9], 7), (UINT16)_mm_extract_epi16(rsp->xv[ 9], 6), (UINT16)_mm_extract_epi16(rsp->xv[ 9], 5), (UINT16)_mm_extract_epi16(rsp->xv[ 9], 4), (UINT16)_mm_extract_epi16(rsp->xv[ 9], 3), (UINT16)_mm_extract_epi16(rsp->xv[ 9], 2), (UINT16)_mm_extract_epi16(rsp->xv[ 9], 1), (UINT16)_mm_extract_epi16(rsp->xv[ 9], 0)); break;
6465        case CPUINFO_STR_REGISTER + RSP_V10:            sprintf(info->s, "V10: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[10], 7), (UINT16)_mm_extract_epi16(rsp->xv[10], 6), (UINT16)_mm_extract_epi16(rsp->xv[10], 5), (UINT16)_mm_extract_epi16(rsp->xv[10], 4), (UINT16)_mm_extract_epi16(rsp->xv[10], 3), (UINT16)_mm_extract_epi16(rsp->xv[10], 2), (UINT16)_mm_extract_epi16(rsp->xv[10], 1), (UINT16)_mm_extract_epi16(rsp->xv[10], 0)); break;
6466        case CPUINFO_STR_REGISTER + RSP_V11:            sprintf(info->s, "V11: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[11], 7), (UINT16)_mm_extract_epi16(rsp->xv[11], 6), (UINT16)_mm_extract_epi16(rsp->xv[11], 5), (UINT16)_mm_extract_epi16(rsp->xv[11], 4), (UINT16)_mm_extract_epi16(rsp->xv[11], 3), (UINT16)_mm_extract_epi16(rsp->xv[11], 2), (UINT16)_mm_extract_epi16(rsp->xv[11], 1), (UINT16)_mm_extract_epi16(rsp->xv[11], 0)); break;
6467        case CPUINFO_STR_REGISTER + RSP_V12:            sprintf(info->s, "V12: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[12], 7), (UINT16)_mm_extract_epi16(rsp->xv[12], 6), (UINT16)_mm_extract_epi16(rsp->xv[12], 5), (UINT16)_mm_extract_epi16(rsp->xv[12], 4), (UINT16)_mm_extract_epi16(rsp->xv[12], 3), (UINT16)_mm_extract_epi16(rsp->xv[12], 2), (UINT16)_mm_extract_epi16(rsp->xv[12], 1), (UINT16)_mm_extract_epi16(rsp->xv[12], 0)); break;
6468        case CPUINFO_STR_REGISTER + RSP_V13:            sprintf(info->s, "V13: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[13], 7), (UINT16)_mm_extract_epi16(rsp->xv[13], 6), (UINT16)_mm_extract_epi16(rsp->xv[13], 5), (UINT16)_mm_extract_epi16(rsp->xv[13], 4), (UINT16)_mm_extract_epi16(rsp->xv[13], 3), (UINT16)_mm_extract_epi16(rsp->xv[13], 2), (UINT16)_mm_extract_epi16(rsp->xv[13], 1), (UINT16)_mm_extract_epi16(rsp->xv[13], 0)); break;
6469        case CPUINFO_STR_REGISTER + RSP_V14:            sprintf(info->s, "V14: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[14], 7), (UINT16)_mm_extract_epi16(rsp->xv[14], 6), (UINT16)_mm_extract_epi16(rsp->xv[14], 5), (UINT16)_mm_extract_epi16(rsp->xv[14], 4), (UINT16)_mm_extract_epi16(rsp->xv[14], 3), (UINT16)_mm_extract_epi16(rsp->xv[14], 2), (UINT16)_mm_extract_epi16(rsp->xv[14], 1), (UINT16)_mm_extract_epi16(rsp->xv[14], 0)); break;
6470        case CPUINFO_STR_REGISTER + RSP_V15:            sprintf(info->s, "V15: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[15], 7), (UINT16)_mm_extract_epi16(rsp->xv[15], 6), (UINT16)_mm_extract_epi16(rsp->xv[15], 5), (UINT16)_mm_extract_epi16(rsp->xv[15], 4), (UINT16)_mm_extract_epi16(rsp->xv[15], 3), (UINT16)_mm_extract_epi16(rsp->xv[15], 2), (UINT16)_mm_extract_epi16(rsp->xv[15], 1), (UINT16)_mm_extract_epi16(rsp->xv[15], 0)); break;
6471        case CPUINFO_STR_REGISTER + RSP_V16:            sprintf(info->s, "V16: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[16], 7), (UINT16)_mm_extract_epi16(rsp->xv[16], 6), (UINT16)_mm_extract_epi16(rsp->xv[16], 5), (UINT16)_mm_extract_epi16(rsp->xv[16], 4), (UINT16)_mm_extract_epi16(rsp->xv[16], 3), (UINT16)_mm_extract_epi16(rsp->xv[16], 2), (UINT16)_mm_extract_epi16(rsp->xv[16], 1), (UINT16)_mm_extract_epi16(rsp->xv[16], 0)); break;
6472        case CPUINFO_STR_REGISTER + RSP_V17:            sprintf(info->s, "V17: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[17], 7), (UINT16)_mm_extract_epi16(rsp->xv[17], 6), (UINT16)_mm_extract_epi16(rsp->xv[17], 5), (UINT16)_mm_extract_epi16(rsp->xv[17], 4), (UINT16)_mm_extract_epi16(rsp->xv[17], 3), (UINT16)_mm_extract_epi16(rsp->xv[17], 2), (UINT16)_mm_extract_epi16(rsp->xv[17], 1), (UINT16)_mm_extract_epi16(rsp->xv[17], 0)); break;
6473        case CPUINFO_STR_REGISTER + RSP_V18:            sprintf(info->s, "V18: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[18], 7), (UINT16)_mm_extract_epi16(rsp->xv[18], 6), (UINT16)_mm_extract_epi16(rsp->xv[18], 5), (UINT16)_mm_extract_epi16(rsp->xv[18], 4), (UINT16)_mm_extract_epi16(rsp->xv[18], 3), (UINT16)_mm_extract_epi16(rsp->xv[18], 2), (UINT16)_mm_extract_epi16(rsp->xv[18], 1), (UINT16)_mm_extract_epi16(rsp->xv[18], 0)); break;
6474        case CPUINFO_STR_REGISTER + RSP_V19:            sprintf(info->s, "V19: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[19], 7), (UINT16)_mm_extract_epi16(rsp->xv[19], 6), (UINT16)_mm_extract_epi16(rsp->xv[19], 5), (UINT16)_mm_extract_epi16(rsp->xv[19], 4), (UINT16)_mm_extract_epi16(rsp->xv[19], 3), (UINT16)_mm_extract_epi16(rsp->xv[19], 2), (UINT16)_mm_extract_epi16(rsp->xv[19], 1), (UINT16)_mm_extract_epi16(rsp->xv[19], 0)); break;
6475        case CPUINFO_STR_REGISTER + RSP_V20:            sprintf(info->s, "V20: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[20], 7), (UINT16)_mm_extract_epi16(rsp->xv[20], 6), (UINT16)_mm_extract_epi16(rsp->xv[20], 5), (UINT16)_mm_extract_epi16(rsp->xv[20], 4), (UINT16)_mm_extract_epi16(rsp->xv[20], 3), (UINT16)_mm_extract_epi16(rsp->xv[20], 2), (UINT16)_mm_extract_epi16(rsp->xv[20], 1), (UINT16)_mm_extract_epi16(rsp->xv[20], 0)); break;
6476        case CPUINFO_STR_REGISTER + RSP_V21:            sprintf(info->s, "V21: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[21], 7), (UINT16)_mm_extract_epi16(rsp->xv[21], 6), (UINT16)_mm_extract_epi16(rsp->xv[21], 5), (UINT16)_mm_extract_epi16(rsp->xv[21], 4), (UINT16)_mm_extract_epi16(rsp->xv[21], 3), (UINT16)_mm_extract_epi16(rsp->xv[21], 2), (UINT16)_mm_extract_epi16(rsp->xv[21], 1), (UINT16)_mm_extract_epi16(rsp->xv[21], 0)); break;
6477        case CPUINFO_STR_REGISTER + RSP_V22:            sprintf(info->s, "V22: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[22], 7), (UINT16)_mm_extract_epi16(rsp->xv[22], 6), (UINT16)_mm_extract_epi16(rsp->xv[22], 5), (UINT16)_mm_extract_epi16(rsp->xv[22], 4), (UINT16)_mm_extract_epi16(rsp->xv[22], 3), (UINT16)_mm_extract_epi16(rsp->xv[22], 2), (UINT16)_mm_extract_epi16(rsp->xv[22], 1), (UINT16)_mm_extract_epi16(rsp->xv[22], 0)); break;
6478        case CPUINFO_STR_REGISTER + RSP_V23:            sprintf(info->s, "V23: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[23], 7), (UINT16)_mm_extract_epi16(rsp->xv[23], 6), (UINT16)_mm_extract_epi16(rsp->xv[23], 5), (UINT16)_mm_extract_epi16(rsp->xv[23], 4), (UINT16)_mm_extract_epi16(rsp->xv[23], 3), (UINT16)_mm_extract_epi16(rsp->xv[23], 2), (UINT16)_mm_extract_epi16(rsp->xv[23], 1), (UINT16)_mm_extract_epi16(rsp->xv[23], 0)); break;
6479        case CPUINFO_STR_REGISTER + RSP_V24:            sprintf(info->s, "V24: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[24], 7), (UINT16)_mm_extract_epi16(rsp->xv[24], 6), (UINT16)_mm_extract_epi16(rsp->xv[24], 5), (UINT16)_mm_extract_epi16(rsp->xv[24], 4), (UINT16)_mm_extract_epi16(rsp->xv[24], 3), (UINT16)_mm_extract_epi16(rsp->xv[24], 2), (UINT16)_mm_extract_epi16(rsp->xv[24], 1), (UINT16)_mm_extract_epi16(rsp->xv[24], 0)); break;
6480        case CPUINFO_STR_REGISTER + RSP_V25:            sprintf(info->s, "V25: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[25], 7), (UINT16)_mm_extract_epi16(rsp->xv[25], 6), (UINT16)_mm_extract_epi16(rsp->xv[25], 5), (UINT16)_mm_extract_epi16(rsp->xv[25], 4), (UINT16)_mm_extract_epi16(rsp->xv[25], 3), (UINT16)_mm_extract_epi16(rsp->xv[25], 2), (UINT16)_mm_extract_epi16(rsp->xv[25], 1), (UINT16)_mm_extract_epi16(rsp->xv[25], 0)); break;
6481        case CPUINFO_STR_REGISTER + RSP_V26:            sprintf(info->s, "V26: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[26], 7), (UINT16)_mm_extract_epi16(rsp->xv[26], 6), (UINT16)_mm_extract_epi16(rsp->xv[26], 5), (UINT16)_mm_extract_epi16(rsp->xv[26], 4), (UINT16)_mm_extract_epi16(rsp->xv[26], 3), (UINT16)_mm_extract_epi16(rsp->xv[26], 2), (UINT16)_mm_extract_epi16(rsp->xv[26], 1), (UINT16)_mm_extract_epi16(rsp->xv[26], 0)); break;
6482        case CPUINFO_STR_REGISTER + RSP_V27:            sprintf(info->s, "V27: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[27], 7), (UINT16)_mm_extract_epi16(rsp->xv[27], 6), (UINT16)_mm_extract_epi16(rsp->xv[27], 5), (UINT16)_mm_extract_epi16(rsp->xv[27], 4), (UINT16)_mm_extract_epi16(rsp->xv[27], 3), (UINT16)_mm_extract_epi16(rsp->xv[27], 2), (UINT16)_mm_extract_epi16(rsp->xv[27], 1), (UINT16)_mm_extract_epi16(rsp->xv[27], 0)); break;
6483        case CPUINFO_STR_REGISTER + RSP_V28:            sprintf(info->s, "V28: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[28], 7), (UINT16)_mm_extract_epi16(rsp->xv[28], 6), (UINT16)_mm_extract_epi16(rsp->xv[28], 5), (UINT16)_mm_extract_epi16(rsp->xv[28], 4), (UINT16)_mm_extract_epi16(rsp->xv[28], 3), (UINT16)_mm_extract_epi16(rsp->xv[28], 2), (UINT16)_mm_extract_epi16(rsp->xv[28], 1), (UINT16)_mm_extract_epi16(rsp->xv[28], 0)); break;
6484        case CPUINFO_STR_REGISTER + RSP_V29:            sprintf(info->s, "V29: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[29], 7), (UINT16)_mm_extract_epi16(rsp->xv[29], 6), (UINT16)_mm_extract_epi16(rsp->xv[29], 5), (UINT16)_mm_extract_epi16(rsp->xv[29], 4), (UINT16)_mm_extract_epi16(rsp->xv[29], 3), (UINT16)_mm_extract_epi16(rsp->xv[29], 2), (UINT16)_mm_extract_epi16(rsp->xv[29], 1), (UINT16)_mm_extract_epi16(rsp->xv[29], 0)); break;
6485        case CPUINFO_STR_REGISTER + RSP_V30:            sprintf(info->s, "V30: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[30], 7), (UINT16)_mm_extract_epi16(rsp->xv[30], 6), (UINT16)_mm_extract_epi16(rsp->xv[30], 5), (UINT16)_mm_extract_epi16(rsp->xv[30], 4), (UINT16)_mm_extract_epi16(rsp->xv[30], 3), (UINT16)_mm_extract_epi16(rsp->xv[30], 2), (UINT16)_mm_extract_epi16(rsp->xv[30], 1), (UINT16)_mm_extract_epi16(rsp->xv[30], 0)); break;
6486        case CPUINFO_STR_REGISTER + RSP_V31:            sprintf(info->s, "V31: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)_mm_extract_epi16(rsp->xv[31], 7), (UINT16)_mm_extract_epi16(rsp->xv[31], 6), (UINT16)_mm_extract_epi16(rsp->xv[31], 5), (UINT16)_mm_extract_epi16(rsp->xv[31], 4), (UINT16)_mm_extract_epi16(rsp->xv[31], 3), (UINT16)_mm_extract_epi16(rsp->xv[31], 2), (UINT16)_mm_extract_epi16(rsp->xv[31], 1), (UINT16)_mm_extract_epi16(rsp->xv[31], 0)); break;
61296487#else
6130      case CPUINFO_STR_REGISTER + RSP_V0:             sprintf(info->s, "V0: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 0, 0), (UINT16)VREG_S( 0, 1), (UINT16)VREG_S( 0, 2), (UINT16)VREG_S( 0, 3), (UINT16)VREG_S( 0, 4), (UINT16)VREG_S( 0, 5), (UINT16)VREG_S( 0, 6), (UINT16)VREG_S( 0, 7)); break;
6131      case CPUINFO_STR_REGISTER + RSP_V1:             sprintf(info->s, "V1: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 1, 0), (UINT16)VREG_S( 1, 1), (UINT16)VREG_S( 1, 2), (UINT16)VREG_S( 1, 3), (UINT16)VREG_S( 1, 4), (UINT16)VREG_S( 1, 5), (UINT16)VREG_S( 1, 6), (UINT16)VREG_S( 1, 7)); break;
6132      case CPUINFO_STR_REGISTER + RSP_V2:             sprintf(info->s, "V2: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 2, 0), (UINT16)VREG_S( 2, 1), (UINT16)VREG_S( 2, 2), (UINT16)VREG_S( 2, 3), (UINT16)VREG_S( 2, 4), (UINT16)VREG_S( 2, 5), (UINT16)VREG_S( 2, 6), (UINT16)VREG_S( 2, 7)); break;
6133      case CPUINFO_STR_REGISTER + RSP_V3:             sprintf(info->s, "V3: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 3, 0), (UINT16)VREG_S( 3, 1), (UINT16)VREG_S( 3, 2), (UINT16)VREG_S( 3, 3), (UINT16)VREG_S( 3, 4), (UINT16)VREG_S( 3, 5), (UINT16)VREG_S( 3, 6), (UINT16)VREG_S( 3, 7)); break;
6134      case CPUINFO_STR_REGISTER + RSP_V4:             sprintf(info->s, "V4: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 4, 0), (UINT16)VREG_S( 4, 1), (UINT16)VREG_S( 4, 2), (UINT16)VREG_S( 4, 3), (UINT16)VREG_S( 4, 4), (UINT16)VREG_S( 4, 5), (UINT16)VREG_S( 4, 6), (UINT16)VREG_S( 4, 7)); break;
6135      case CPUINFO_STR_REGISTER + RSP_V5:             sprintf(info->s, "V5: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 5, 0), (UINT16)VREG_S( 5, 1), (UINT16)VREG_S( 5, 2), (UINT16)VREG_S( 5, 3), (UINT16)VREG_S( 5, 4), (UINT16)VREG_S( 5, 5), (UINT16)VREG_S( 5, 6), (UINT16)VREG_S( 5, 7)); break;
6136      case CPUINFO_STR_REGISTER + RSP_V6:             sprintf(info->s, "V6: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 6, 0), (UINT16)VREG_S( 6, 1), (UINT16)VREG_S( 6, 2), (UINT16)VREG_S( 6, 3), (UINT16)VREG_S( 6, 4), (UINT16)VREG_S( 6, 5), (UINT16)VREG_S( 6, 6), (UINT16)VREG_S( 6, 7)); break;
6137      case CPUINFO_STR_REGISTER + RSP_V7:             sprintf(info->s, "V7: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 7, 0), (UINT16)VREG_S( 7, 1), (UINT16)VREG_S( 7, 2), (UINT16)VREG_S( 7, 3), (UINT16)VREG_S( 7, 4), (UINT16)VREG_S( 7, 5), (UINT16)VREG_S( 7, 6), (UINT16)VREG_S( 7, 7)); break;
6138      case CPUINFO_STR_REGISTER + RSP_V8:             sprintf(info->s, "V8: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 8, 0), (UINT16)VREG_S( 8, 1), (UINT16)VREG_S( 8, 2), (UINT16)VREG_S( 8, 3), (UINT16)VREG_S( 8, 4), (UINT16)VREG_S( 8, 5), (UINT16)VREG_S( 8, 6), (UINT16)VREG_S( 8, 7)); break;
6139      case CPUINFO_STR_REGISTER + RSP_V9:             sprintf(info->s, "V9: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 9, 0), (UINT16)VREG_S( 9, 1), (UINT16)VREG_S( 9, 2), (UINT16)VREG_S( 9, 3), (UINT16)VREG_S( 9, 4), (UINT16)VREG_S( 9, 5), (UINT16)VREG_S( 9, 6), (UINT16)VREG_S( 9, 7)); break;
6140      case CPUINFO_STR_REGISTER + RSP_V10:            sprintf(info->s, "V10: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(10, 0), (UINT16)VREG_S(10, 1), (UINT16)VREG_S(10, 2), (UINT16)VREG_S(10, 3), (UINT16)VREG_S(10, 4), (UINT16)VREG_S(10, 5), (UINT16)VREG_S(10, 6), (UINT16)VREG_S(10, 7)); break;
6141      case CPUINFO_STR_REGISTER + RSP_V11:            sprintf(info->s, "V11: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(11, 0), (UINT16)VREG_S(11, 1), (UINT16)VREG_S(11, 2), (UINT16)VREG_S(11, 3), (UINT16)VREG_S(11, 4), (UINT16)VREG_S(11, 5), (UINT16)VREG_S(11, 6), (UINT16)VREG_S(11, 7)); break;
6142      case CPUINFO_STR_REGISTER + RSP_V12:            sprintf(info->s, "V12: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(12, 0), (UINT16)VREG_S(12, 1), (UINT16)VREG_S(12, 2), (UINT16)VREG_S(12, 3), (UINT16)VREG_S(12, 4), (UINT16)VREG_S(12, 5), (UINT16)VREG_S(12, 6), (UINT16)VREG_S(12, 7)); break;
6143      case CPUINFO_STR_REGISTER + RSP_V13:            sprintf(info->s, "V13: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(13, 0), (UINT16)VREG_S(13, 1), (UINT16)VREG_S(13, 2), (UINT16)VREG_S(13, 3), (UINT16)VREG_S(13, 4), (UINT16)VREG_S(13, 5), (UINT16)VREG_S(13, 6), (UINT16)VREG_S(13, 7)); break;
6144      case CPUINFO_STR_REGISTER + RSP_V14:            sprintf(info->s, "V14: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(14, 0), (UINT16)VREG_S(14, 1), (UINT16)VREG_S(14, 2), (UINT16)VREG_S(14, 3), (UINT16)VREG_S(14, 4), (UINT16)VREG_S(14, 5), (UINT16)VREG_S(14, 6), (UINT16)VREG_S(14, 7)); break;
6145      case CPUINFO_STR_REGISTER + RSP_V15:            sprintf(info->s, "V15: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(15, 0), (UINT16)VREG_S(15, 1), (UINT16)VREG_S(15, 2), (UINT16)VREG_S(15, 3), (UINT16)VREG_S(15, 4), (UINT16)VREG_S(15, 5), (UINT16)VREG_S(15, 6), (UINT16)VREG_S(15, 7)); break;
6146      case CPUINFO_STR_REGISTER + RSP_V16:            sprintf(info->s, "V16: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(16, 0), (UINT16)VREG_S(16, 1), (UINT16)VREG_S(16, 2), (UINT16)VREG_S(16, 3), (UINT16)VREG_S(16, 4), (UINT16)VREG_S(16, 5), (UINT16)VREG_S(16, 6), (UINT16)VREG_S(16, 7)); break;
6147      case CPUINFO_STR_REGISTER + RSP_V17:            sprintf(info->s, "V17: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(17, 0), (UINT16)VREG_S(17, 1), (UINT16)VREG_S(17, 2), (UINT16)VREG_S(17, 3), (UINT16)VREG_S(17, 4), (UINT16)VREG_S(17, 5), (UINT16)VREG_S(17, 6), (UINT16)VREG_S(17, 7)); break;
6148      case CPUINFO_STR_REGISTER + RSP_V18:            sprintf(info->s, "V18: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(18, 0), (UINT16)VREG_S(18, 1), (UINT16)VREG_S(18, 2), (UINT16)VREG_S(18, 3), (UINT16)VREG_S(18, 4), (UINT16)VREG_S(18, 5), (UINT16)VREG_S(18, 6), (UINT16)VREG_S(18, 7)); break;
6149      case CPUINFO_STR_REGISTER + RSP_V19:            sprintf(info->s, "V19: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(19, 0), (UINT16)VREG_S(19, 1), (UINT16)VREG_S(19, 2), (UINT16)VREG_S(19, 3), (UINT16)VREG_S(19, 4), (UINT16)VREG_S(19, 5), (UINT16)VREG_S(19, 6), (UINT16)VREG_S(19, 7)); break;
6150      case CPUINFO_STR_REGISTER + RSP_V20:            sprintf(info->s, "V20: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(20, 0), (UINT16)VREG_S(20, 1), (UINT16)VREG_S(20, 2), (UINT16)VREG_S(20, 3), (UINT16)VREG_S(20, 4), (UINT16)VREG_S(20, 5), (UINT16)VREG_S(20, 6), (UINT16)VREG_S(20, 7)); break;
6151      case CPUINFO_STR_REGISTER + RSP_V21:            sprintf(info->s, "V21: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(21, 0), (UINT16)VREG_S(21, 1), (UINT16)VREG_S(21, 2), (UINT16)VREG_S(21, 3), (UINT16)VREG_S(21, 4), (UINT16)VREG_S(21, 5), (UINT16)VREG_S(21, 6), (UINT16)VREG_S(21, 7)); break;
6152      case CPUINFO_STR_REGISTER + RSP_V22:            sprintf(info->s, "V22: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(22, 0), (UINT16)VREG_S(22, 1), (UINT16)VREG_S(22, 2), (UINT16)VREG_S(22, 3), (UINT16)VREG_S(22, 4), (UINT16)VREG_S(22, 5), (UINT16)VREG_S(22, 6), (UINT16)VREG_S(22, 7)); break;
6153      case CPUINFO_STR_REGISTER + RSP_V23:            sprintf(info->s, "V23: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(23, 0), (UINT16)VREG_S(23, 1), (UINT16)VREG_S(23, 2), (UINT16)VREG_S(23, 3), (UINT16)VREG_S(23, 4), (UINT16)VREG_S(23, 5), (UINT16)VREG_S(23, 6), (UINT16)VREG_S(23, 7)); break;
6154      case CPUINFO_STR_REGISTER + RSP_V24:            sprintf(info->s, "V24: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(24, 0), (UINT16)VREG_S(24, 1), (UINT16)VREG_S(24, 2), (UINT16)VREG_S(24, 3), (UINT16)VREG_S(24, 4), (UINT16)VREG_S(24, 5), (UINT16)VREG_S(24, 6), (UINT16)VREG_S(24, 7)); break;
6155      case CPUINFO_STR_REGISTER + RSP_V25:            sprintf(info->s, "V25: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(25, 0), (UINT16)VREG_S(25, 1), (UINT16)VREG_S(25, 2), (UINT16)VREG_S(25, 3), (UINT16)VREG_S(25, 4), (UINT16)VREG_S(25, 5), (UINT16)VREG_S(25, 6), (UINT16)VREG_S(25, 7)); break;
6156      case CPUINFO_STR_REGISTER + RSP_V26:            sprintf(info->s, "V26: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(26, 0), (UINT16)VREG_S(26, 1), (UINT16)VREG_S(26, 2), (UINT16)VREG_S(26, 3), (UINT16)VREG_S(26, 4), (UINT16)VREG_S(26, 5), (UINT16)VREG_S(26, 6), (UINT16)VREG_S(26, 7)); break;
6157      case CPUINFO_STR_REGISTER + RSP_V27:            sprintf(info->s, "V27: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(27, 0), (UINT16)VREG_S(27, 1), (UINT16)VREG_S(27, 2), (UINT16)VREG_S(27, 3), (UINT16)VREG_S(27, 4), (UINT16)VREG_S(27, 5), (UINT16)VREG_S(27, 6), (UINT16)VREG_S(27, 7)); break;
6158      case CPUINFO_STR_REGISTER + RSP_V28:            sprintf(info->s, "V28: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(28, 0), (UINT16)VREG_S(28, 1), (UINT16)VREG_S(28, 2), (UINT16)VREG_S(28, 3), (UINT16)VREG_S(28, 4), (UINT16)VREG_S(28, 5), (UINT16)VREG_S(28, 6), (UINT16)VREG_S(28, 7)); break;
6159      case CPUINFO_STR_REGISTER + RSP_V29:            sprintf(info->s, "V29: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(29, 0), (UINT16)VREG_S(29, 1), (UINT16)VREG_S(29, 2), (UINT16)VREG_S(29, 3), (UINT16)VREG_S(29, 4), (UINT16)VREG_S(29, 5), (UINT16)VREG_S(29, 6), (UINT16)VREG_S(29, 7)); break;
6160      case CPUINFO_STR_REGISTER + RSP_V30:            sprintf(info->s, "V30: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(30, 0), (UINT16)VREG_S(30, 1), (UINT16)VREG_S(30, 2), (UINT16)VREG_S(30, 3), (UINT16)VREG_S(30, 4), (UINT16)VREG_S(30, 5), (UINT16)VREG_S(30, 6), (UINT16)VREG_S(30, 7)); break;
6161      case CPUINFO_STR_REGISTER + RSP_V31:            sprintf(info->s, "V31: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(31, 0), (UINT16)VREG_S(31, 1), (UINT16)VREG_S(31, 2), (UINT16)VREG_S(31, 3), (UINT16)VREG_S(31, 4), (UINT16)VREG_S(31, 5), (UINT16)VREG_S(31, 6), (UINT16)VREG_S(31, 7)); break;
6488        case CPUINFO_STR_REGISTER + RSP_V0:             sprintf(info->s, "V0: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 0, 0), (UINT16)VREG_S( 0, 1), (UINT16)VREG_S( 0, 2), (UINT16)VREG_S( 0, 3), (UINT16)VREG_S( 0, 4), (UINT16)VREG_S( 0, 5), (UINT16)VREG_S( 0, 6), (UINT16)VREG_S( 0, 7)); break;
6489        case CPUINFO_STR_REGISTER + RSP_V1:             sprintf(info->s, "V1: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 1, 0), (UINT16)VREG_S( 1, 1), (UINT16)VREG_S( 1, 2), (UINT16)VREG_S( 1, 3), (UINT16)VREG_S( 1, 4), (UINT16)VREG_S( 1, 5), (UINT16)VREG_S( 1, 6), (UINT16)VREG_S( 1, 7)); break;
6490        case CPUINFO_STR_REGISTER + RSP_V2:             sprintf(info->s, "V2: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 2, 0), (UINT16)VREG_S( 2, 1), (UINT16)VREG_S( 2, 2), (UINT16)VREG_S( 2, 3), (UINT16)VREG_S( 2, 4), (UINT16)VREG_S( 2, 5), (UINT16)VREG_S( 2, 6), (UINT16)VREG_S( 2, 7)); break;
6491        case CPUINFO_STR_REGISTER + RSP_V3:             sprintf(info->s, "V3: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 3, 0), (UINT16)VREG_S( 3, 1), (UINT16)VREG_S( 3, 2), (UINT16)VREG_S( 3, 3), (UINT16)VREG_S( 3, 4), (UINT16)VREG_S( 3, 5), (UINT16)VREG_S( 3, 6), (UINT16)VREG_S( 3, 7)); break;
6492        case CPUINFO_STR_REGISTER + RSP_V4:             sprintf(info->s, "V4: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 4, 0), (UINT16)VREG_S( 4, 1), (UINT16)VREG_S( 4, 2), (UINT16)VREG_S( 4, 3), (UINT16)VREG_S( 4, 4), (UINT16)VREG_S( 4, 5), (UINT16)VREG_S( 4, 6), (UINT16)VREG_S( 4, 7)); break;
6493        case CPUINFO_STR_REGISTER + RSP_V5:             sprintf(info->s, "V5: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 5, 0), (UINT16)VREG_S( 5, 1), (UINT16)VREG_S( 5, 2), (UINT16)VREG_S( 5, 3), (UINT16)VREG_S( 5, 4), (UINT16)VREG_S( 5, 5), (UINT16)VREG_S( 5, 6), (UINT16)VREG_S( 5, 7)); break;
6494        case CPUINFO_STR_REGISTER + RSP_V6:             sprintf(info->s, "V6: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 6, 0), (UINT16)VREG_S( 6, 1), (UINT16)VREG_S( 6, 2), (UINT16)VREG_S( 6, 3), (UINT16)VREG_S( 6, 4), (UINT16)VREG_S( 6, 5), (UINT16)VREG_S( 6, 6), (UINT16)VREG_S( 6, 7)); break;
6495        case CPUINFO_STR_REGISTER + RSP_V7:             sprintf(info->s, "V7: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 7, 0), (UINT16)VREG_S( 7, 1), (UINT16)VREG_S( 7, 2), (UINT16)VREG_S( 7, 3), (UINT16)VREG_S( 7, 4), (UINT16)VREG_S( 7, 5), (UINT16)VREG_S( 7, 6), (UINT16)VREG_S( 7, 7)); break;
6496        case CPUINFO_STR_REGISTER + RSP_V8:             sprintf(info->s, "V8: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 8, 0), (UINT16)VREG_S( 8, 1), (UINT16)VREG_S( 8, 2), (UINT16)VREG_S( 8, 3), (UINT16)VREG_S( 8, 4), (UINT16)VREG_S( 8, 5), (UINT16)VREG_S( 8, 6), (UINT16)VREG_S( 8, 7)); break;
6497        case CPUINFO_STR_REGISTER + RSP_V9:             sprintf(info->s, "V9: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X",  (UINT16)VREG_S( 9, 0), (UINT16)VREG_S( 9, 1), (UINT16)VREG_S( 9, 2), (UINT16)VREG_S( 9, 3), (UINT16)VREG_S( 9, 4), (UINT16)VREG_S( 9, 5), (UINT16)VREG_S( 9, 6), (UINT16)VREG_S( 9, 7)); break;
6498        case CPUINFO_STR_REGISTER + RSP_V10:            sprintf(info->s, "V10: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(10, 0), (UINT16)VREG_S(10, 1), (UINT16)VREG_S(10, 2), (UINT16)VREG_S(10, 3), (UINT16)VREG_S(10, 4), (UINT16)VREG_S(10, 5), (UINT16)VREG_S(10, 6), (UINT16)VREG_S(10, 7)); break;
6499        case CPUINFO_STR_REGISTER + RSP_V11:            sprintf(info->s, "V11: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(11, 0), (UINT16)VREG_S(11, 1), (UINT16)VREG_S(11, 2), (UINT16)VREG_S(11, 3), (UINT16)VREG_S(11, 4), (UINT16)VREG_S(11, 5), (UINT16)VREG_S(11, 6), (UINT16)VREG_S(11, 7)); break;
6500        case CPUINFO_STR_REGISTER + RSP_V12:            sprintf(info->s, "V12: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(12, 0), (UINT16)VREG_S(12, 1), (UINT16)VREG_S(12, 2), (UINT16)VREG_S(12, 3), (UINT16)VREG_S(12, 4), (UINT16)VREG_S(12, 5), (UINT16)VREG_S(12, 6), (UINT16)VREG_S(12, 7)); break;
6501        case CPUINFO_STR_REGISTER + RSP_V13:            sprintf(info->s, "V13: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(13, 0), (UINT16)VREG_S(13, 1), (UINT16)VREG_S(13, 2), (UINT16)VREG_S(13, 3), (UINT16)VREG_S(13, 4), (UINT16)VREG_S(13, 5), (UINT16)VREG_S(13, 6), (UINT16)VREG_S(13, 7)); break;
6502        case CPUINFO_STR_REGISTER + RSP_V14:            sprintf(info->s, "V14: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(14, 0), (UINT16)VREG_S(14, 1), (UINT16)VREG_S(14, 2), (UINT16)VREG_S(14, 3), (UINT16)VREG_S(14, 4), (UINT16)VREG_S(14, 5), (UINT16)VREG_S(14, 6), (UINT16)VREG_S(14, 7)); break;
6503        case CPUINFO_STR_REGISTER + RSP_V15:            sprintf(info->s, "V15: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(15, 0), (UINT16)VREG_S(15, 1), (UINT16)VREG_S(15, 2), (UINT16)VREG_S(15, 3), (UINT16)VREG_S(15, 4), (UINT16)VREG_S(15, 5), (UINT16)VREG_S(15, 6), (UINT16)VREG_S(15, 7)); break;
6504        case CPUINFO_STR_REGISTER + RSP_V16:            sprintf(info->s, "V16: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(16, 0), (UINT16)VREG_S(16, 1), (UINT16)VREG_S(16, 2), (UINT16)VREG_S(16, 3), (UINT16)VREG_S(16, 4), (UINT16)VREG_S(16, 5), (UINT16)VREG_S(16, 6), (UINT16)VREG_S(16, 7)); break;
6505        case CPUINFO_STR_REGISTER + RSP_V17:            sprintf(info->s, "V17: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(17, 0), (UINT16)VREG_S(17, 1), (UINT16)VREG_S(17, 2), (UINT16)VREG_S(17, 3), (UINT16)VREG_S(17, 4), (UINT16)VREG_S(17, 5), (UINT16)VREG_S(17, 6), (UINT16)VREG_S(17, 7)); break;
6506        case CPUINFO_STR_REGISTER + RSP_V18:            sprintf(info->s, "V18: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(18, 0), (UINT16)VREG_S(18, 1), (UINT16)VREG_S(18, 2), (UINT16)VREG_S(18, 3), (UINT16)VREG_S(18, 4), (UINT16)VREG_S(18, 5), (UINT16)VREG_S(18, 6), (UINT16)VREG_S(18, 7)); break;
6507        case CPUINFO_STR_REGISTER + RSP_V19:            sprintf(info->s, "V19: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(19, 0), (UINT16)VREG_S(19, 1), (UINT16)VREG_S(19, 2), (UINT16)VREG_S(19, 3), (UINT16)VREG_S(19, 4), (UINT16)VREG_S(19, 5), (UINT16)VREG_S(19, 6), (UINT16)VREG_S(19, 7)); break;
6508        case CPUINFO_STR_REGISTER + RSP_V20:            sprintf(info->s, "V20: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(20, 0), (UINT16)VREG_S(20, 1), (UINT16)VREG_S(20, 2), (UINT16)VREG_S(20, 3), (UINT16)VREG_S(20, 4), (UINT16)VREG_S(20, 5), (UINT16)VREG_S(20, 6), (UINT16)VREG_S(20, 7)); break;
6509        case CPUINFO_STR_REGISTER + RSP_V21:            sprintf(info->s, "V21: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(21, 0), (UINT16)VREG_S(21, 1), (UINT16)VREG_S(21, 2), (UINT16)VREG_S(21, 3), (UINT16)VREG_S(21, 4), (UINT16)VREG_S(21, 5), (UINT16)VREG_S(21, 6), (UINT16)VREG_S(21, 7)); break;
6510        case CPUINFO_STR_REGISTER + RSP_V22:            sprintf(info->s, "V22: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(22, 0), (UINT16)VREG_S(22, 1), (UINT16)VREG_S(22, 2), (UINT16)VREG_S(22, 3), (UINT16)VREG_S(22, 4), (UINT16)VREG_S(22, 5), (UINT16)VREG_S(22, 6), (UINT16)VREG_S(22, 7)); break;
6511        case CPUINFO_STR_REGISTER + RSP_V23:            sprintf(info->s, "V23: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(23, 0), (UINT16)VREG_S(23, 1), (UINT16)VREG_S(23, 2), (UINT16)VREG_S(23, 3), (UINT16)VREG_S(23, 4), (UINT16)VREG_S(23, 5), (UINT16)VREG_S(23, 6), (UINT16)VREG_S(23, 7)); break;
6512        case CPUINFO_STR_REGISTER + RSP_V24:            sprintf(info->s, "V24: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(24, 0), (UINT16)VREG_S(24, 1), (UINT16)VREG_S(24, 2), (UINT16)VREG_S(24, 3), (UINT16)VREG_S(24, 4), (UINT16)VREG_S(24, 5), (UINT16)VREG_S(24, 6), (UINT16)VREG_S(24, 7)); break;
6513        case CPUINFO_STR_REGISTER + RSP_V25:            sprintf(info->s, "V25: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(25, 0), (UINT16)VREG_S(25, 1), (UINT16)VREG_S(25, 2), (UINT16)VREG_S(25, 3), (UINT16)VREG_S(25, 4), (UINT16)VREG_S(25, 5), (UINT16)VREG_S(25, 6), (UINT16)VREG_S(25, 7)); break;
6514        case CPUINFO_STR_REGISTER + RSP_V26:            sprintf(info->s, "V26: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(26, 0), (UINT16)VREG_S(26, 1), (UINT16)VREG_S(26, 2), (UINT16)VREG_S(26, 3), (UINT16)VREG_S(26, 4), (UINT16)VREG_S(26, 5), (UINT16)VREG_S(26, 6), (UINT16)VREG_S(26, 7)); break;
6515        case CPUINFO_STR_REGISTER + RSP_V27:            sprintf(info->s, "V27: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(27, 0), (UINT16)VREG_S(27, 1), (UINT16)VREG_S(27, 2), (UINT16)VREG_S(27, 3), (UINT16)VREG_S(27, 4), (UINT16)VREG_S(27, 5), (UINT16)VREG_S(27, 6), (UINT16)VREG_S(27, 7)); break;
6516        case CPUINFO_STR_REGISTER + RSP_V28:            sprintf(info->s, "V28: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(28, 0), (UINT16)VREG_S(28, 1), (UINT16)VREG_S(28, 2), (UINT16)VREG_S(28, 3), (UINT16)VREG_S(28, 4), (UINT16)VREG_S(28, 5), (UINT16)VREG_S(28, 6), (UINT16)VREG_S(28, 7)); break;
6517        case CPUINFO_STR_REGISTER + RSP_V29:            sprintf(info->s, "V29: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(29, 0), (UINT16)VREG_S(29, 1), (UINT16)VREG_S(29, 2), (UINT16)VREG_S(29, 3), (UINT16)VREG_S(29, 4), (UINT16)VREG_S(29, 5), (UINT16)VREG_S(29, 6), (UINT16)VREG_S(29, 7)); break;
6518        case CPUINFO_STR_REGISTER + RSP_V30:            sprintf(info->s, "V30: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(30, 0), (UINT16)VREG_S(30, 1), (UINT16)VREG_S(30, 2), (UINT16)VREG_S(30, 3), (UINT16)VREG_S(30, 4), (UINT16)VREG_S(30, 5), (UINT16)VREG_S(30, 6), (UINT16)VREG_S(30, 7)); break;
6519        case CPUINFO_STR_REGISTER + RSP_V31:            sprintf(info->s, "V31: %04X|%04X|%04X|%04X|%04X|%04X|%04X|%04X", (UINT16)VREG_S(31, 0), (UINT16)VREG_S(31, 1), (UINT16)VREG_S(31, 2), (UINT16)VREG_S(31, 3), (UINT16)VREG_S(31, 4), (UINT16)VREG_S(31, 5), (UINT16)VREG_S(31, 6), (UINT16)VREG_S(31, 7)); break;
61626520#endif
6163      case CPUINFO_STR_REGISTER + RSP_SR:             sprintf(info->s, "SR: %08X",  rsp->sr);    break;
6164      case CPUINFO_STR_REGISTER + RSP_NEXTPC:         sprintf(info->s, "NPC: %08X", rsp->nextpc);break;
6165      case CPUINFO_STR_REGISTER + RSP_STEPCNT:        sprintf(info->s, "STEP: %d",  rsp->step_count);  break;
6166   }
6521        case CPUINFO_STR_REGISTER + RSP_SR:             sprintf(info->s, "SR: %08X",  rsp->sr);    break;
6522        case CPUINFO_STR_REGISTER + RSP_NEXTPC:         sprintf(info->s, "NPC: %08X", rsp->nextpc);break;
6523        case CPUINFO_STR_REGISTER + RSP_STEPCNT:        sprintf(info->s, "STEP: %d",  rsp->step_count);  break;
6524    }
61676525}
61686526
61696527DEFINE_LEGACY_CPU_DEVICE(RSP_DRC, rsp_drc);

Previous 199869 Revisions Next


© 1997-2024 The MAME Team