trunk/src/emu/video/rgbvmx.h
| r249921 | r249922 | |
| 19 | 19 | |
| 20 | 20 | class rgbaint_t |
| 21 | 21 | { |
| 22 | protected: |
| 23 | typedef __vector signed char VECS8; |
| 24 | typedef __vector unsigned char VECU8; |
| 25 | typedef __vector signed short VECS16; |
| 26 | typedef __vector unsigned short VECU16; |
| 27 | typedef __vector signed int VECS32; |
| 28 | typedef __vector unsigned int VECU32; |
| 29 | |
| 22 | 30 | public: |
| 23 | 31 | inline rgbaint_t() { } |
| 24 | 32 | inline rgbaint_t(UINT32 rgba) { set(rgba); } |
| 25 | 33 | inline rgbaint_t(INT32 a, INT32 r, INT32 g, INT32 b) { set(a, r, g, b); } |
| 26 | 34 | inline rgbaint_t(rgb_t& rgb) { set(rgb); } |
| 35 | inline rgbaint_t(VECS32 rgba) { m_value = rgba; } |
| 27 | 36 | |
| 28 | 37 | inline void set(rgbaint_t& other) { m_value = other.m_value; } |
| 29 | 38 | |
| r249921 | r249922 | |
| 49 | 58 | |
| 50 | 59 | inline rgb_t to_rgba() |
| 51 | 60 | { |
| 52 | | VECU32 temp = vec_pack(m_value, m_value); |
| 53 | | temp = vec_pack((VECU16)temp, (VECU16)temp); |
| 61 | VECU32 temp = vec_packs(m_value, m_value); |
| 62 | temp = vec_packsu((VECS16)temp, (VECS16)temp); |
| 54 | 63 | UINT32 result; |
| 55 | 64 | vec_ste(temp, 0, &result); |
| 56 | 65 | return result; |
| r249921 | r249922 | |
| 59 | 68 | inline rgb_t to_rgba_clamp() |
| 60 | 69 | { |
| 61 | 70 | VECU32 temp = vec_packs(m_value, m_value); |
| 62 | | temp = vec_packsu((VECU16)temp, (VECU16)temp); |
| 71 | temp = vec_packsu((VECS16)temp, (VECS16)temp); |
| 63 | 72 | UINT32 result; |
| 64 | 73 | vec_ste(temp, 0, &result); |
| 65 | 74 | return result; |
| r249921 | r249922 | |
| 222 | 231 | inline void shl(const rgbaint_t& shift) |
| 223 | 232 | { |
| 224 | 233 | const VECU32 limit = { 32, 32, 32, 32 }; |
| 225 | | const VECU32 temp = vec_splat(shift.m_value, 3); |
| 226 | | m_value = vec_and(vec_sl(m_value, temp), vec_cmpgt(limit, temp)); |
| 234 | m_value = vec_and(vec_sl(m_value, (VECU32)shift.m_value), vec_cmpgt(limit, (VECU32)shift.m_value)); |
| 227 | 235 | } |
| 228 | 236 | |
| 229 | 237 | inline void shl_imm(const UINT8 shift) |
| r249921 | r249922 | |
| 235 | 243 | inline void shr(const rgbaint_t& shift) |
| 236 | 244 | { |
| 237 | 245 | const VECU32 limit = { 32, 32, 32, 32 }; |
| 238 | | const VECU32 temp = vec_splat(shift.m_value, 3); |
| 239 | | m_value = vec_and(vec_sr(m_value, temp), vec_cmpgt(limit, temp)); |
| 246 | m_value = vec_and(vec_sr(m_value, (VECU32)shift.m_value), vec_cmpgt(limit, (VECU32)shift.m_value)); |
| 240 | 247 | } |
| 241 | 248 | |
| 242 | 249 | inline void shr_imm(const UINT8 shift) |
| r249921 | r249922 | |
| 248 | 255 | inline void sra(const rgbaint_t& shift) |
| 249 | 256 | { |
| 250 | 257 | const VECU32 limit = { 31, 31, 31, 31 }; |
| 251 | | m_value = vec_sra(m_value, vec_min((VECU32)vec_splat(shift.m_value, 3), limit)); |
| 258 | m_value = vec_sra(m_value, vec_min((VECU32)shift.m_value, limit)); |
| 252 | 259 | } |
| 253 | 260 | |
| 254 | 261 | inline void sra_imm(const UINT8 shift) |
| r249921 | r249922 | |
| 279 | 286 | m_value = vec_and(m_value, color.m_value); |
| 280 | 287 | } |
| 281 | 288 | |
| 289 | inline void andnot_reg(const rgbaint_t& color) |
| 290 | { |
| 291 | m_value = vec_andc(m_value, color.m_value); |
| 292 | } |
| 293 | |
| 282 | 294 | inline void and_imm(const INT32 value) |
| 283 | 295 | { |
| 284 | 296 | const VECS32 temp = { value, value, value, value }; |
| r249921 | r249922 | |
| 318 | 330 | m_value = vec_or(vec_and(vsign, mask), vec_and(m_value, vec_nor(mask, vzero))); |
| 319 | 331 | } |
| 320 | 332 | |
| 333 | inline void clamp_to_uint8() |
| 334 | { |
| 335 | const VECU32 zero = { 0, 0, 0, 0 }; |
| 336 | m_value = vec_packs(m_value, m_value); |
| 337 | m_value = vec_packsu((VECS16)m_value, (VECS16)m_value); |
| 338 | m_value = vec_mergeh((VECU8)zero, (VECU8)m_value); |
| 339 | m_value = vec_mergeh((VECS16)zero, (VECS16)m_value); |
| 340 | } |
| 341 | |
| 321 | 342 | inline void sign_extend(const UINT32 compare, const UINT32 sign) |
| 322 | 343 | { |
| 323 | 344 | const VECS32 compare_vec = { compare, compare, compare, compare }; |
| r249921 | r249922 | |
| 342 | 363 | |
| 343 | 364 | void scale_and_clamp(const rgbaint_t& scale); |
| 344 | 365 | void scale_imm_and_clamp(const INT32 scale); |
| 345 | | void scale2_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2); |
| 346 | | void scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other); |
| 347 | | void scale_imm_add_and_clamp(const INT32 scale, const rgbaint_t& other); |
| 348 | 366 | |
| 367 | void scale_imm_add_and_clamp(const INT32 scale, const rgbaint_t& other) |
| 368 | { |
| 369 | mul_imm(scale); |
| 370 | sra_imm(8); |
| 371 | add(other); |
| 372 | clamp_to_uint8(); |
| 373 | } |
| 374 | |
| 375 | void scale_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other) |
| 376 | { |
| 377 | mul(scale); |
| 378 | sra_imm(8); |
| 379 | add(other); |
| 380 | clamp_to_uint8(); |
| 381 | } |
| 382 | |
| 383 | void scale2_add_and_clamp(const rgbaint_t& scale, const rgbaint_t& other, const rgbaint_t& scale2) |
| 384 | { |
| 385 | rgbaint_t color2(other); |
| 386 | color2.mul(scale2); |
| 387 | |
| 388 | mul(scale); |
| 389 | add(color2); |
| 390 | sra_imm(8); |
| 391 | clamp_to_uint8(); |
| 392 | } |
| 393 | |
| 349 | 394 | inline void cmpeq(const rgbaint_t& value) |
| 350 | 395 | { |
| 351 | 396 | m_value = vec_cmpeq(m_value, value.m_value); |
| r249921 | r249922 | |
| 480 | 525 | return result; |
| 481 | 526 | } |
| 482 | 527 | |
| 528 | inline void bilinear_filter_rgbaint(UINT32 rgb00, UINT32 rgb01, UINT32 rgb10, UINT32 rgb11, UINT8 u, UINT8 v) |
| 529 | { |
| 530 | const VECS32 zero = vec_splat_s32(0); |
| 531 | |
| 532 | VECS32 color00 = vec_perm((VECS32)vec_lde(0, &rgb00), zero, vec_lvsl(0, &rgb00)); |
| 533 | VECS32 color01 = vec_perm((VECS32)vec_lde(0, &rgb01), zero, vec_lvsl(0, &rgb01)); |
| 534 | VECS32 color10 = vec_perm((VECS32)vec_lde(0, &rgb10), zero, vec_lvsl(0, &rgb10)); |
| 535 | VECS32 color11 = vec_perm((VECS32)vec_lde(0, &rgb11), zero, vec_lvsl(0, &rgb11)); |
| 536 | |
| 537 | /* interleave color01 and color00 at the byte level */ |
| 538 | color01 = vec_mergeh((VECU8)color01, (VECU8)color00); |
| 539 | color11 = vec_mergeh((VECU8)color11, (VECU8)color10); |
| 540 | color01 = vec_mergeh((VECU8)zero, (VECU8)color01); |
| 541 | color11 = vec_mergeh((VECU8)zero, (VECU8)color11); |
| 542 | color01 = vec_msum((VECS16)color01, scale_table[u], zero); |
| 543 | color11 = vec_msum((VECS16)color11, scale_table[u], zero); |
| 544 | color01 = vec_sl(color01, vec_splat_u32(15)); |
| 545 | color11 = vec_sr(color11, vec_splat_u32(1)); |
| 546 | color01 = vec_max((VECS16)color01, (VECS16)color11); |
| 547 | color01 = vec_msum((VECS16)color01, scale_table[v], zero); |
| 548 | m_value = vec_sr(color01, vec_splat_u32(15)); |
| 549 | } |
| 550 | |
| 483 | 551 | protected: |
| 484 | | typedef __vector signed char VECS8; |
| 485 | | typedef __vector unsigned char VECU8; |
| 486 | | typedef __vector signed short VECS16; |
| 487 | | typedef __vector unsigned short VECU16; |
| 488 | | typedef __vector signed int VECS32; |
| 489 | | typedef __vector unsigned int VECU32; |
| 490 | | |
| 491 | 552 | VECS32 m_value; |
| 492 | 553 | |
| 493 | 554 | static const VECU8 alpha_perm; |