|
4 | 4 | #include "rsx_utils.h" |
5 | 5 |
|
6 | 6 | #include "Emu/system_config.h" |
| 7 | +#include "util/simd.hpp" |
| 8 | + |
| 9 | +#if defined(ARCH_ARM64) |
| 10 | +#if !defined(_MSC_VER) |
| 11 | +#pragma GCC diagnostic ignored "-Wstrict-aliasing" |
| 12 | +#endif |
| 13 | +#undef FORCE_INLINE |
| 14 | +#include "Emu/CPU/sse2neon.h" |
| 15 | +#endif |
7 | 16 |
|
8 | 17 | namespace rsx |
9 | 18 | { |
@@ -291,7 +300,69 @@ namespace rsx |
291 | 300 |
|
292 | 301 | u32 fragment_texture::border_color() const |
293 | 302 | { |
294 | | - return registers[NV4097_SET_TEXTURE_BORDER_COLOR + (m_index * 8)]; |
| 303 | + const u32 raw = registers[NV4097_SET_TEXTURE_BORDER_COLOR + (m_index * 8)]; |
| 304 | + const u32 sext = argb_signed(); |
| 305 | + |
| 306 | + if (!sext) [[ likely ]] |
| 307 | + { |
| 308 | + return raw; |
| 309 | + } |
| 310 | + |
| 311 | + // Border color is broken on PS3. The SNORM behavior is completely broken and behaves like BIASED renormalization instead. |
| 312 | + // To solve the mismatch, we need to first do a bit expansion on the value then store it as sign extended. The second part is a natural part of numbers on a binary system, so we only need to do the former. |
| 313 | + static constexpr u32 expand4_lut[16] = |
| 314 | + { |
| 315 | + 0x00000000u, // 0000 |
| 316 | + 0xFF000000u, // 0001 |
| 317 | + 0x00FF0000u, // 0010 |
| 318 | + 0xFFFF0000u, // 0011 |
| 319 | + 0x0000FF00u, // 0100 |
| 320 | + 0xFF00FF00u, // 0101 |
| 321 | + 0x00FFFF00u, // 0110 |
| 322 | + 0xFFFFFF00u, // 0111 |
| 323 | + 0x000000FFu, // 1000 |
| 324 | + 0xFF0000FFu, // 1001 |
| 325 | + 0x00FF00FFu, // 1010 |
| 326 | + 0xFFFF00FFu, // 1011 |
| 327 | + 0x0000FFFFu, // 1100 |
| 328 | + 0xFF00FFFFu, // 1101 |
| 329 | + 0x00FFFFFFu, // 1110 |
| 330 | + 0xFFFFFFFFu // 1111 |
| 331 | + }; |
| 332 | + |
| 333 | + // Bit pattern expand and reverse BE -> LE using LUT |
| 334 | + const u32 mask = expand4_lut[sext]; |
| 335 | + |
| 336 | + // Now we perform the compensation operation |
| 337 | + // BIAS operation = (V - 128 / 127) |
| 338 | + |
| 339 | + // Load |
| 340 | + const __m128i _0 = _mm_setzero_si128(); |
| 341 | + const __m128i _128 = _mm_set1_epi32(128); |
| 342 | + const __m128i _127 = _mm_set1_epi32(127); |
| 343 | + const __m128i _255 = _mm_set1_epi32(255); |
| 344 | + |
| 345 | + const auto be_raw = be_t<u32>(raw); |
| 346 | + __m128i v = _mm_cvtsi32_si128(static_cast<u32>(be_raw)); |
| 347 | + v = _mm_unpacklo_epi8(v, _0); |
| 348 | + v = _mm_unpacklo_epi16(v, _0); // [ 0, 64, 255, 128 ] |
| 349 | + |
| 350 | + // Conversion: x = (y - 128) |
| 351 | + v = _mm_sub_epi32(v, _128); // [ -128, -64, 127, 0 ] |
| 352 | + |
| 353 | + // Convert to signed encoding (reverse sext) |
| 354 | + v = _mm_slli_epi32(v, 24); |
| 355 | + v = _mm_srli_epi32(v, 24); |
| 356 | + |
| 357 | + // Pack down |
| 358 | + v = _mm_packs_epi32(v, _0); |
| 359 | + v = _mm_packus_epi16(v, _0); |
| 360 | + |
| 361 | + // Read |
| 362 | + const u32 conv = _mm_cvtsi128_si32(v); |
| 363 | + |
| 364 | + // Merge |
| 365 | + return (conv & mask) | (raw & ~mask); |
295 | 366 | } |
296 | 367 |
|
297 | 368 | color4f fragment_texture::remapped_border_color() const |
|
0 commit comments