Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions Makefile.arm64
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,23 @@ CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
endif

# Use a72 tunings because Neoverse-N1 is only available
# in GCC>=9
ifeq ($(CORE), NEOVERSEN1)
ifeq ($(GCCVERSIONGTEQ7), 1)
ifeq ($(GCCVERSIONGTEQ9), 1)
CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
else
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
endif
else
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
endif

ifeq ($(CORE), THUNDERX)
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
FCOMMON_OPT += -march=armv8-a -mtune=thunderx
Expand Down
2 changes: 2 additions & 0 deletions Makefile.system
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@ ifeq ($(C_COMPILER), GCC)
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7)
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
ifeq ($(GCCVERSIONGT4), 1)
Expand Down Expand Up @@ -554,6 +555,7 @@ DYNAMIC_CORE += CORTEXA53
DYNAMIC_CORE += CORTEXA57
DYNAMIC_CORE += CORTEXA72
DYNAMIC_CORE += CORTEXA73
DYNAMIC_CORE += NEOVERSEN1
DYNAMIC_CORE += FALKOR
DYNAMIC_CORE += THUNDERX
DYNAMIC_CORE += THUNDERX2T99
Expand Down
1 change: 1 addition & 0 deletions TargetList.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ CORTEXA53
CORTEXA57
CORTEXA72
CORTEXA73
NEOVERSEN1
FALKOR
THUNDERX
THUNDERX2T99
Expand Down
2 changes: 1 addition & 1 deletion cmake/arch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ endif ()

if (DYNAMIC_ARCH)
if (ARM64)
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180)
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1)
endif ()

if (POWER)
Expand Down
27 changes: 27 additions & 0 deletions cmake/prebuild.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,33 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "NEOVERSEN1")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_CODE_SIZE\t65536\n"
"#define L1_CODE_LINESIZE\t64\n"
"#define L1_CODE_ASSOCIATIVE\t4\n"
"#define L1_DATA_SIZE\t65536\n"
"#define L1_DATA_LINESIZE\t64\n"
"#define L1_DATA_ASSOCIATIVE\t2\n"
"#define L2_SIZE\t1048576\n\n"
"#define L2_LINESIZE\t64\n"
"#define L2_ASSOCIATIVE\t16\n"
"#define DTB_DEFAULT_ENTRIES\t64\n"
"#define DTB_SIZE\t4096\n"
"#define HAVE_VFPV4\n"
"#define HAVE_VFPV3\n"
"#define HAVE_VFP\n"
"#define HAVE_NEON\n"
"#define ARMV8\n")
set(SGEMM_UNROLL_M 16)
set(SGEMM_UNROLL_N 4)
set(DGEMM_UNROLL_M 8)
set(DGEMM_UNROLL_N 4)
set(CGEMM_UNROLL_M 8)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "FALKOR")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_CODE_SIZE\t65536\n"
Expand Down
23 changes: 21 additions & 2 deletions cpuid_arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#define CPU_CORTEXA57 3
#define CPU_CORTEXA72 4
#define CPU_CORTEXA73 5
#define CPU_NEOVERSEN1 11
// Qualcomm
#define CPU_FALKOR 6
// Cavium
Expand All @@ -55,7 +56,8 @@ static char *cpuname[] = {
"THUNDERX",
"THUNDERX2T99",
"TSV110",
"EMAG8180"
"EMAG8180",
"NEOVERSEN1"
};

static char *cpuname_lower[] = {
Expand All @@ -69,7 +71,8 @@ static char *cpuname_lower[] = {
"thunderx",
"thunderx2t99",
"tsv110",
"emag8180"
"emag8180",
"neoversen1"
};

int get_feature(char *search)
Expand Down Expand Up @@ -144,6 +147,8 @@ int detect(void)
return CPU_CORTEXA72;
else if (strstr(cpu_part, "0xd09"))
return CPU_CORTEXA73;
else if (strstr(cpu_part, "0xd0c"))
return CPU_NEOVERSEN1;
}
// Qualcomm
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
Expand Down Expand Up @@ -285,6 +290,20 @@ void get_cpuconfig(void)
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
break;
case CPU_NEOVERSEN1:
printf("#define %s\n", cpuname[d]);
printf("#define L1_CODE_SIZE 65536\n");
printf("#define L1_CODE_LINESIZE 64\n");
printf("#define L1_CODE_ASSOCIATIVE 4\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L1_DATA_ASSOCIATIVE 4\n");
printf("#define L2_SIZE 1048576\n");
printf("#define L2_LINESIZE 64\n");
printf("#define L2_ASSOCIATIVE 16\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
break;

case CPU_FALKOR:
printf("#define FALKOR\n");
Expand Down
8 changes: 7 additions & 1 deletion driver/others/dynamic_arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,11 @@ extern gotoblas_t gotoblas_THUNDERX;
extern gotoblas_t gotoblas_THUNDERX2T99;
extern gotoblas_t gotoblas_TSV110;
extern gotoblas_t gotoblas_EMAG8180;
extern gotoblas_t gotoblas_NEOVERSEN1;

extern void openblas_warning(int verbose, const char * msg);

#define NUM_CORETYPES 10
#define NUM_CORETYPES 11

/*
* In case asm/hwcap.h is outdated on the build system, make sure
Expand All @@ -80,6 +81,7 @@ static char *corename[] = {
"thunderx2t99",
"tsv110",
"emag8180",
"neoversen1",
"unknown"
};

Expand All @@ -94,6 +96,7 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 7];
if (gotoblas == &gotoblas_TSV110) return corename[ 8];
if (gotoblas == &gotoblas_EMAG8180) return corename[ 9];
if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10];
return corename[NUM_CORETYPES];
}

Expand Down Expand Up @@ -123,6 +126,7 @@ static gotoblas_t *force_coretype(char *coretype) {
case 7: return (&gotoblas_THUNDERX2T99);
case 8: return (&gotoblas_TSV110);
case 9: return (&gotoblas_EMAG8180);
case 10: return (&gotoblas_NEOVERSEN1);
}
snprintf(message, 128, "Core not found: %s\n", coretype);
openblas_warning(1, message);
Expand Down Expand Up @@ -168,6 +172,8 @@ static gotoblas_t *get_coretype(void) {
return &gotoblas_CORTEXA72;
case 0xd09: // Cortex A73
return &gotoblas_CORTEXA73;
case 0xd0c: // Neoverse N1
return &gotoblas_NEOVERSEN1;
}
break;
case 0x42: // Broadcom
Expand Down
18 changes: 18 additions & 0 deletions getarch.c
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#endif

#ifdef FORCE_NEOVERSEN1
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "NEOVERSEN1"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DNEOVERSEN1 " \
"-DL1_CODE_SIZE=65536 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=4 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=4 " \
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" \
"-march=armv8.2-a -mtune=cortex-a72"
#define LIBNAME "neoversen1"
#define CORENAME "NEOVERSEN1"
#else
#endif


#ifdef FORCE_FALKOR
#define FORCE
#define ARCHITECTURE "ARM64"
Expand Down
Loading