102 lines
3 KiB
Diff
102 lines
3 KiB
Diff
GCC's version of __builtin___clear_cache() is a no-op on PowerPC.
|
|
Change to use an improved version of the existing ppc_cache_flush().
|
|
|
|
--- a/src/sljit/sljitConfigInternal.h 2021-08-20 09:51:28.000000000 -0700
|
|
+++ b/src/sljit/sljitConfigInternal.h 2022-02-11 20:56:43.159092563 -0800
|
|
@@ -320,7 +320,8 @@
|
|
/****************************/
|
|
|
|
#if (!defined SLJIT_CACHE_FLUSH && defined __has_builtin)
|
|
-#if __has_builtin(__builtin___clear_cache)
|
|
+#if __has_builtin(__builtin___clear_cache) && \
|
|
+ !(defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
|
|
|
|
#define SLJIT_CACHE_FLUSH(from, to) \
|
|
__builtin___clear_cache((char*)(from), (char*)(to))
|
|
--- a/src/sljit/sljitNativePPC_common.c 2022-02-12 20:46:28.383224561 -0800
|
|
+++ b/src/sljit/sljitNativePPC_common.c 2022-02-12 20:48:22.210099029 -0800
|
|
@@ -46,6 +46,39 @@
|
|
#define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
|
|
#endif
|
|
|
|
+#ifdef __linux__
|
|
+#include <sys/auxv.h>
|
|
+
|
|
+/* Return the instruction cache line size, in bytes. */
|
|
+static SLJIT_INLINE sljit_u32 get_icache_line_size()
|
|
+{
|
|
+ static sljit_u32 icache_line_size = 0;
|
|
+ if (SLJIT_UNLIKELY(!icache_line_size)) {
|
|
+ icache_line_size = (sljit_u32) getauxval(AT_ICACHEBSIZE);
|
|
+ SLJIT_ASSERT(icache_line_size != 0);
|
|
+ }
|
|
+ return icache_line_size;
|
|
+}
|
|
+
|
|
+/* Cache and return the first hardware capabilities word. */
|
|
+static SLJIT_INLINE unsigned long get_hwcap()
|
|
+{
|
|
+ static unsigned long hwcap = 0;
|
|
+ if (SLJIT_UNLIKELY(!hwcap)) {
|
|
+ hwcap = getauxval(AT_HWCAP);
|
|
+ SLJIT_ASSERT(hwcap != 0);
|
|
+ }
|
|
+ return hwcap;
|
|
+}
|
|
+
|
|
+/* Return non-zero if this CPU has the icache snoop feature. */
|
|
+static SLJIT_INLINE unsigned long has_feature_icache_snoop()
|
|
+{
|
|
+ return (get_hwcap() & PPC_FEATURE_ICACHE_SNOOP);
|
|
+}
|
|
+
|
|
+#endif /* __linux__ */
|
|
+
|
|
#if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL)
|
|
|
|
static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
|
|
@@ -68,14 +101,40 @@
|
|
# error "Cache flush is not implemented for PowerPC/POWER common mode."
|
|
# else
|
|
/* Cache flush for PowerPC architecture. */
|
|
- while (from < to) {
|
|
+ /* For POWER5 and up with icache snooping, only one icbi in the range
|
|
+ * is required. The sync flushes the store queue, and the icbi/isync
|
|
+ * kills the local prefetch.
|
|
+ */
|
|
+ if (has_feature_icache_snoop()) {
|
|
__asm__ volatile (
|
|
- "dcbf 0, %0\n"
|
|
"sync\n"
|
|
"icbi 0, %0\n"
|
|
- : : "r"(from)
|
|
+ "isync\n"
|
|
+ : : "r"(from) : "memory"
|
|
+ );
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ sljit_u32 cache_line_bytes = get_icache_line_size();
|
|
+ sljit_u32 cache_line_words = cache_line_bytes / sizeof(sljit_ins);
|
|
+ uintptr_t cache_line_mask = ~(uintptr_t)(cache_line_bytes - 1);
|
|
+
|
|
+ /* Round down to start of cache line to simplify the end condition. */
|
|
+ sljit_ins* start = (sljit_ins*)((uintptr_t)(from) & cache_line_mask);
|
|
+
|
|
+ for (from = start; from < to; from += cache_line_words) {
|
|
+ __asm__ volatile (
|
|
+ "dcbf 0, %0"
|
|
+ : : "r"(from) : "memory"
|
|
+ );
|
|
+ }
|
|
+ __asm__ volatile ( "sync" );
|
|
+
|
|
+ for (from = start; from < to; from += cache_line_words) {
|
|
+ __asm__ volatile (
|
|
+ "icbi 0, %0"
|
|
+ : : "r"(from) : "memory"
|
|
);
|
|
- from++;
|
|
}
|
|
__asm__ volatile ( "isync" );
|
|
# endif
|