Skip to content

Commit dc63f2f

Browse files
committed
rsx: Correct border-color register value for shader texel remapping
- PS3 border color does not correctly support signed channels and instead treats them as simple compression - We instead perform the reverse conversion before sending it to our shader pipeline
1 parent 0faa894 commit dc63f2f

1 file changed

Lines changed: 72 additions & 1 deletion

File tree

rpcs3/Emu/RSX/RSXTexture.cpp

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,15 @@
44
#include "rsx_utils.h"
55

66
#include "Emu/system_config.h"
7+
#include "util/simd.hpp"
8+
9+
#if defined(ARCH_ARM64)
10+
#if !defined(_MSC_VER)
11+
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
12+
#endif
13+
#undef FORCE_INLINE
14+
#include "Emu/CPU/sse2neon.h"
15+
#endif
716

817
namespace rsx
918
{
@@ -291,7 +300,69 @@ namespace rsx
291300

292301
u32 fragment_texture::border_color() const
293302
{
294-
return registers[NV4097_SET_TEXTURE_BORDER_COLOR + (m_index * 8)];
303+
const u32 raw = registers[NV4097_SET_TEXTURE_BORDER_COLOR + (m_index * 8)];
304+
const u32 sext = argb_signed();
305+
306+
if (!sext) [[ likely ]]
307+
{
308+
return raw;
309+
}
310+
311+
// Border color is broken on PS3. The SNORM behavior is completely broken and behaves like BIASED renormalization instead.
312+
// To solve the mismatch, we need to first do a bit expansion on the value then store it as sign extended. The second part is a natural part of numbers on a binary system, so we only need to do the former.
313+
static constexpr u32 expand4_lut[16] =
314+
{
315+
0x00000000u, // 0000
316+
0xFF000000u, // 0001
317+
0x00FF0000u, // 0010
318+
0xFFFF0000u, // 0011
319+
0x0000FF00u, // 0100
320+
0xFF00FF00u, // 0101
321+
0x00FFFF00u, // 0110
322+
0xFFFFFF00u, // 0111
323+
0x000000FFu, // 1000
324+
0xFF0000FFu, // 1001
325+
0x00FF00FFu, // 1010
326+
0xFFFF00FFu, // 1011
327+
0x0000FFFFu, // 1100
328+
0xFF00FFFFu, // 1101
329+
0x00FFFFFFu, // 1110
330+
0xFFFFFFFFu // 1111
331+
};
332+
333+
// Bit pattern expand and reverse BE -> LE using LUT
334+
const u32 mask = expand4_lut[sext];
335+
336+
// Now we perform the compensation operation
337+
// BIAS operation = (V - 128 / 127)
338+
339+
// Load
340+
const __m128i _0 = _mm_setzero_si128();
341+
const __m128i _128 = _mm_set1_epi32(128);
342+
const __m128i _127 = _mm_set1_epi32(127);
343+
const __m128i _255 = _mm_set1_epi32(255);
344+
345+
const auto be_raw = be_t<u32>(raw);
346+
__m128i v = _mm_cvtsi32_si128(static_cast<u32>(be_raw));
347+
v = _mm_unpacklo_epi8(v, _0);
348+
v = _mm_unpacklo_epi16(v, _0); // [ 0, 64, 255, 128 ]
349+
350+
// Conversion: x = (y - 128)
351+
v = _mm_sub_epi32(v, _128); // [ -128, -64, 127, 0 ]
352+
353+
// Convert to signed encoding (reverse sext)
354+
v = _mm_slli_epi32(v, 24);
355+
v = _mm_srli_epi32(v, 24);
356+
357+
// Pack down
358+
v = _mm_packs_epi32(v, _0);
359+
v = _mm_packus_epi16(v, _0);
360+
361+
// Read
362+
const u32 conv = _mm_cvtsi128_si32(v);
363+
364+
// Merge
365+
return (conv & mask) | (raw & ~mask);
295366
}
296367

297368
color4f fragment_texture::remapped_border_color() const

0 commit comments

Comments
 (0)