Skip to content

Commit b246903

Browse files
Whatcookieelad335
authored andcommitted
ARM64: Detect some arm features and let LLVM know if they are or aren't present via attributes
- On x86, LLVM has robust detection for the CPU name. If a CPU like skylake has AVX disabled, it will fall back to something without AVX (nehalem) - On ARM, detection is not as robust. For instance, on my snapdragon 8 gen 2, it assumes that we have SVE support, as the cortex-x3 supports SVE. - If an ARM cpu is paired with other cpus from another generation which doesn't support the same instructions as the cortex-x3, or if the cortex-x3 just has SVE disabled for no apparant reason (in the case of the snapdragon 8 gen 2) - We need to actually detect that ourselves. - Beyond SVE also detect support for some instructions that might be useful SPU LLVM when optimized with intrinsics.
1 parent 8f06654 commit b246903

3 files changed

Lines changed: 148 additions & 1 deletion

File tree

Utilities/JITLLVM.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,30 @@ jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, co
688688
mem = std::make_unique<MemoryManager1>(std::move(symbols_cement));
689689
}
690690

691+
std::vector<std::string> attributes;
692+
693+
#if defined(ARCH_ARM64)
694+
if (utils::has_sha3())
695+
attributes.push_back("+sha3");
696+
else
697+
attributes.push_back("-sha3");
698+
699+
if (utils::has_dotprod())
700+
attributes.push_back("+dotprod");
701+
else
702+
attributes.push_back("-dotprod");
703+
704+
if (utils::has_sve())
705+
attributes.push_back("+sve");
706+
else
707+
attributes.push_back("-sve");
708+
709+
if (utils::has_sve2())
710+
attributes.push_back("+sve2");
711+
else
712+
attributes.push_back("-sve2");
713+
#endif
714+
691715
{
692716
m_engine.reset(llvm::EngineBuilder(std::move(null_mod))
693717
.setErrorStr(&result)
@@ -699,6 +723,7 @@ jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, co
699723
//.setCodeModel(llvm::CodeModel::Large)
700724
#endif
701725
.setRelocationModel(llvm::Reloc::Model::PIC_)
726+
.setMAttrs(attributes)
702727
.setMCPU(m_cpu)
703728
.create());
704729
}

rpcs3/util/sysinfo.cpp

Lines changed: 113 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,15 @@
1616
#else
1717
#include <unistd.h>
1818
#include <sys/resource.h>
19-
#ifndef __APPLE__
19+
#ifdef __APPLE__
20+
#include <sys/sysctl.h>
21+
#else
2022
#include <sys/utsname.h>
2123
#include <errno.h>
24+
#if defined(ARCH_ARM64) && defined(__linux__)
25+
#include <sys/auxv.h>
26+
#include <asm/hwcap.h>
27+
#endif
2228
#endif
2329
#endif
2430

@@ -444,6 +450,100 @@ u32 utils::get_rep_movsb_threshold()
444450
return g_value;
445451
}
446452

453+
#ifdef ARCH_ARM64
454+
455+
bool utils::has_neon()
456+
{
457+
static const bool g_value = []() -> bool
458+
{
459+
#if defined(__linux__)
460+
return (getauxval(AT_HWCAP) & HWCAP_ASIMD) != 0;
461+
#elif defined(__APPLE__)
462+
int val = 0;
463+
size_t len = sizeof(val);
464+
sysctlbyname("hw.optional.AdvSIMD", &val, &len, nullptr, 0);
465+
return val != 0;
466+
#elif defined(_WIN32)
467+
return IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE) != 0;
468+
#endif
469+
}();
470+
return g_value;
471+
}
472+
473+
bool utils::has_sha3()
474+
{
475+
static const bool g_value = []() -> bool
476+
{
477+
#if defined(__linux__)
478+
return (getauxval(AT_HWCAP) & HWCAP_SHA3) != 0;
479+
#elif defined(__APPLE__)
480+
int val = 0;
481+
size_t len = sizeof(val);
482+
sysctlbyname("hw.optional.arm.FEAT_SHA3", &val, &len, nullptr, 0);
483+
return val != 0;
484+
#elif defined(_WIN32)
485+
return IsProcessorFeaturePresent(PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE) != 0;
486+
#endif
487+
}();
488+
return g_value;
489+
}
490+
491+
bool utils::has_dotprod()
492+
{
493+
static const bool g_value = []() -> bool
494+
{
495+
#if defined(__linux__)
496+
return (getauxval(AT_HWCAP) & HWCAP_ASIMDDP) != 0;
497+
#elif defined(__APPLE__)
498+
int val = 0;
499+
size_t len = sizeof(val);
500+
sysctlbyname("hw.optional.arm.FEAT_DotProd", &val, &len, nullptr, 0);
501+
return val != 0;
502+
#elif defined(_WIN32)
503+
return IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0;
504+
#endif
505+
}();
506+
return g_value;
507+
}
508+
509+
bool utils::has_sve()
510+
{
511+
static const bool g_value = []() -> bool
512+
{
513+
#if defined(__linux__)
514+
return (getauxval(AT_HWCAP) & HWCAP_SVE) != 0;
515+
#elif defined(__APPLE__)
516+
int val = 0;
517+
size_t len = sizeof(val);
518+
sysctlbyname("hw.optional.arm.FEAT_SVE", &val, &len, nullptr, 0);
519+
return val != 0;
520+
#elif defined(_WIN32)
521+
return IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE) != 0;
522+
#endif
523+
}();
524+
return g_value;
525+
}
526+
527+
bool utils::has_sve2()
528+
{
529+
static const bool g_value = []() -> bool
530+
{
531+
#if defined(__linux__)
532+
return (getauxval(AT_HWCAP2) & HWCAP2_SVE2) != 0;
533+
#elif defined(__APPLE__)
534+
int val = 0;
535+
size_t len = sizeof(val);
536+
sysctlbyname("hw.optional.arm.FEAT_SVE2", &val, &len, nullptr, 0);
537+
return val != 0;
538+
#elif defined(_WIN32)
539+
return IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE) != 0;
540+
#endif
541+
}();
542+
return g_value;
543+
}
544+
545+
#endif
546+
447547
std::string utils::get_cpu_brand()
448548
{
449549
#if defined(ARCH_X64)
@@ -496,6 +596,17 @@ std::string utils::get_system_info()
496596
{
497597
fmt::append(result, " | TSC: Disabled");
498598
}
599+
#ifdef ARCH_ARM64
600+
601+
if (has_neon())
602+
{
603+
result += " | Neon";
604+
}
605+
else
606+
{
607+
fmt::throw_exception("Neon support not present");
608+
}
609+
#else
499610

500611
if (has_avx())
501612
{
@@ -562,6 +673,7 @@ std::string utils::get_system_info()
562673
{
563674
result += " | TSX disabled via microcode";
564675
}
676+
#endif
565677

566678
return result;
567679
}

rpcs3/util/sysinfo.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,17 @@ namespace utils
5454
bool has_appropriate_um_wait();
5555

5656
bool has_um_wait();
57+
#ifdef ARCH_ARM64
58+
bool has_neon();
5759

60+
bool has_sha3();
61+
62+
bool has_dotprod();
63+
64+
bool has_sve();
65+
66+
bool has_sve2();
67+
#endif
5868
std::string get_cpu_brand();
5969

6070
std::string get_system_info();

0 commit comments

Comments
 (0)