diff --git a/backends/arm/ethosu/compile_spec.py b/backends/arm/ethosu/compile_spec.py index 99303ed5dc8..2440e96c5c2 100644 --- a/backends/arm/ethosu/compile_spec.py +++ b/backends/arm/ethosu/compile_spec.py @@ -50,7 +50,9 @@ def _default_system_config_and_memory_mode( resolved_system_config = ( "Ethos_U65_High_End" if system_config is None else system_config ) - resolved_memory_mode = "Sram_Only" if memory_mode is None else memory_mode + resolved_memory_mode = ( + "Dedicated_Sram_384KB" if memory_mode is None else memory_mode + ) return resolved_system_config, resolved_memory_mode if "ethos-u85" in target_lower: resolved_system_config = ( diff --git a/backends/arm/scripts/build_executor_runner.sh b/backends/arm/scripts/build_executor_runner.sh index aede5303304..915e93a3480 100755 --- a/backends/arm/scripts/build_executor_runner.sh +++ b/backends/arm/scripts/build_executor_runner.sh @@ -43,11 +43,11 @@ help() { echo " --target= Target to build and run for Default: ${target}" echo " --build_type= Build with Release, Debug or RelWithDebInfo, default is ${build_type}" echo " --bundleio Support both pte and Bundle IO bpte using Devtools BundelIO with Input/RefOutput included" - echo " --system_config= System configuration to select from the Vela configuration file (see vela.ini). Default: Ethos_U55_High_End_Embedded for EthosU55 targets, Ethos_U85_SYS_DRAM_Mid for EthosU85 targets." + echo " --system_config= System configuration to select from the Vela configuration file (see vela.ini). Default: Ethos_U55_High_End_Embedded for EthosU55 targets, Ethos_U65_High_End for EthosU65 targets, Ethos_U85_SYS_DRAM_Mid for EthosU85 targets." echo " NOTE: If given, this option must match the given target. This option along with the memory_mode sets timing adapter values customized for specific hardware, see ./executor_runner/CMakeLists.txt." echo " --memory_mode= Vela memory mode, used for setting the Timing Adapter parameters of the Corstone platforms." echo " Valid values are Shared_Sram(for Ethos-U55, Ethos-U65, Ethos-85), Sram_Only(for Ethos-U55, Ethos-U65, Ethos-U85) or Dedicated_Sram(for Ethos-U65, Ethos-U85)." - echo " Default: Shared_Sram for the Ethos-U55 and Sram_Only for the Ethos-U85" + echo " Default: Shared_Sram for the Ethos-U55, Sram_Only for the Ethos-U65 and Dedicated_Sram_384KB for the Ethos-U85" echo " --etdump Adds Devtools etdump support to track timing and output, etdump area will be base64 encoded in the log" echo " --extra_build_flags= Extra flags to pass to cmake like -DET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE=60000 Default: none " echo " --output= Output folder Default: /_.pte" @@ -139,6 +139,10 @@ fi if [[ ${system_config} == "" ]] then system_config="Ethos_U55_High_End_Embedded" + if [[ ${target} =~ "ethos-u65" ]] + then + system_config="Ethos_U65_High_End" + fi if [[ ${target} =~ "ethos-u85" ]] then system_config="Ethos_U85_SYS_DRAM_Mid" @@ -148,6 +152,10 @@ fi if [[ ${memory_mode} == "" ]] then memory_mode="Shared_Sram" + if [[ ${target} =~ "ethos-u65" ]] + then + memory_mode="Sram_Only" + fi if [[ ${target} =~ "ethos-u85" ]] then memory_mode="Dedicated_Sram_384KB" @@ -165,6 +173,9 @@ if [[ ${target} =~ ^cortex-m([0-9]+(plus|p)?)(\+|$) ]]; then elif [[ ${target} == *"ethos-u55"* ]]; then target_cpu=cortex-m55 npu_target_config="${target}" +elif [[ ${target} == *"ethos-u65"* ]]; then + target_cpu=cortex-m55 + npu_target_config="${target}" else target_cpu=cortex-m85 npu_target_config="${target}" diff --git a/backends/arm/scripts/corstone_utils.cmake b/backends/arm/scripts/corstone_utils.cmake index 723d8a0e600..d08b6e8d857 100644 --- a/backends/arm/scripts/corstone_utils.cmake +++ b/backends/arm/scripts/corstone_utils.cmake @@ -341,6 +341,40 @@ function(configure_timing_adapters SYSTEM_CONFIG MEMORY_MODE) ETHOSU_TA_HISTBIN_1=0 ETHOSU_TA_HISTCNT_1=0 ) + elseif(MEMORY_MODE MATCHES "Dedicated_Sram") + target_compile_definitions( + ethosu_target_common + INTERFACE # Configure NPU architecture timing adapters This is just + # example numbers and you should make this match your hardware + # SRAM + ETHOSU_TA_MAXR_0=8 + ETHOSU_TA_MAXW_0=8 + ETHOSU_TA_MAXRW_0=0 + ETHOSU_TA_RLATENCY_0=32 + ETHOSU_TA_WLATENCY_0=32 + ETHOSU_TA_PULSE_ON_0=3999 + ETHOSU_TA_PULSE_OFF_0=1 + ETHOSU_TA_BWCAP_0=4000 + ETHOSU_TA_PERFCTRL_0=0 + ETHOSU_TA_PERFCNT_0=0 + ETHOSU_TA_MODE_0=1 + ETHOSU_TA_HISTBIN_0=0 + ETHOSU_TA_HISTCNT_0=0 + # DRAM + ETHOSU_TA_MAXR_1=64 + ETHOSU_TA_MAXW_1=32 + ETHOSU_TA_MAXRW_1=0 + ETHOSU_TA_RLATENCY_1=500 + ETHOSU_TA_WLATENCY_1=250 + ETHOSU_TA_PULSE_ON_1=4000 + ETHOSU_TA_PULSE_OFF_1=1000 + ETHOSU_TA_BWCAP_1=3750 + ETHOSU_TA_PERFCTRL_1=0 + ETHOSU_TA_PERFCNT_1=0 + ETHOSU_TA_MODE_1=1 + ETHOSU_TA_HISTBIN_1=0 + ETHOSU_TA_HISTCNT_1=0 + ) else() message( FATAL_ERROR diff --git a/backends/arm/test/common.py b/backends/arm/test/common.py index 736a5ffc6b5..56bd3c22a1f 100644 --- a/backends/arm/test/common.py +++ b/backends/arm/test/common.py @@ -17,6 +17,7 @@ from executorch.backends.arm.test.runner_utils import ( arm_executor_runner_exists, corstone300_installed, + corstone300_u65_installed, corstone320_installed, model_converter_installed, vkml_emulation_layer_installed, @@ -155,6 +156,42 @@ def get_u85_compile_spec( return compile_spec # type: ignore[return-value] +def get_u65_compile_spec( + macs: int = 256, + system_config: str = "Ethos_U65_High_End", + memory_mode: str = "Dedicated_Sram_384KB", + extra_flags: str = "--arena-cache-size=393216", + custom_path: Optional[str] = None, + config: Optional[str] = None, + tosa_debug_mode: EthosUCompileSpec.DebugMode | None = None, +) -> EthosUCompileSpec: + """Default compile spec for Ethos-U65 tests.""" + if not custom_path: + custom_path = maybe_get_tosa_collate_path() + if custom_path is not None: + os.makedirs(custom_path, exist_ok=True) + + assert macs in [256, 512], "Unsupported MACs value" + + if extra_flags is not None: + extra_flags_list = extra_flags.split(" ") + else: + extra_flags_list = [] + + compile_spec = ( + EthosUCompileSpec( + f"ethos-u65-{macs}", + system_config=system_config, + memory_mode=memory_mode, + extra_flags=extra_flags_list, + config_ini=config, + ) + .dump_intermediate_artifacts_to(custom_path) + .dump_debug_info(tosa_debug_mode) + ) + return compile_spec + + def get_vgf_compile_spec( tosa_spec: str | TosaSpecification, compiler_flags: Optional[str] = "", @@ -206,6 +243,19 @@ def get_vgf_compile_spec( is not built. """ + +XfailIfNoCorstone300_u65 = pytest.mark.xfail( + condition=not ( + corstone300_u65_installed() and arm_executor_runner_exists("corstone-300-u65") + ), + raises=FileNotFoundError, + reason="Did not find Corstone-300-u65 FVP or executor_runner on path", +) +"""Xfails a test if Corsone300-u65 FVP is not installed, or if the executor +runner is not built. +""" + + XfailIfNoCorstone320 = pytest.mark.xfail( condition=not ( corstone320_installed() and arm_executor_runner_exists("corstone-320") diff --git a/backends/arm/test/misc/test_compile_spec.py b/backends/arm/test/misc/test_compile_spec.py index f29b8851208..78d54b68d1a 100644 --- a/backends/arm/test/misc/test_compile_spec.py +++ b/backends/arm/test/misc/test_compile_spec.py @@ -38,6 +38,15 @@ def test_ethos_u55_defaults_to_stable_softmax_u55_INT(): assert pipeline_config.softmax == SoftmaxDecompositionConfig.STABLE +def test_ethos_u65_defaults_to_high_end_dedicated_sram_u65_INT(): + compile_spec = EthosUCompileSpec("ethos-u65-256") + + assert "--accelerator-config=ethos-u65-256" in compile_spec.compiler_flags + assert "--system-config=Ethos_U65_High_End" in compile_spec.compiler_flags + assert "--memory-mode=Dedicated_Sram_384KB" in compile_spec.compiler_flags + assert compile_spec.tosa_spec.is_U55_subset + + def test_ethos_u85_defaults_to_masked_softmax_u85_INT(): """Test that EthosUCompileSpec for U85 defaults to MASKED softmax config.""" compile_spec = EthosUCompileSpec("ethos-u85-256") diff --git a/backends/arm/test/ops/test_add.py b/backends/arm/test/ops/test_add.py index 3e32ef523c3..632de5e999a 100644 --- a/backends/arm/test/ops/test_add.py +++ b/backends/arm/test/ops/test_add.py @@ -15,6 +15,7 @@ from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.test_pipeline import ( EthosU55PipelineINT, + EthosU65PipelineINT, EthosU85PipelineINT, TosaPipelineFP, TosaPipelineINT, @@ -182,6 +183,18 @@ def test_add_tensor_u55_INT(test_data: input_t1): pipeline.run() +@common.parametrize("test_data", Add.test_data) +@common.XfailIfNoCorstone300 +def test_add_tensor_u65_INT(test_data: input_t1): + pipeline = EthosU65PipelineINT[input_t1]( + Add(), + test_data(), + aten_op, + exir_op, + ) + pipeline.run() + + @common.parametrize("test_data", Add.test_data) @common.XfailIfNoCorstone320 def test_add_tensor_u85_INT(test_data: input_t1): diff --git a/backends/arm/test/runner_utils.py b/backends/arm/test/runner_utils.py index 13d42e222a4..ff26d17ee13 100644 --- a/backends/arm/test/runner_utils.py +++ b/backends/arm/test/runner_utils.py @@ -73,7 +73,12 @@ torch.complex128: np.complex128, } -VALID_TARGET = {"corstone-300", "corstone-320", "vkml_emulation_layer"} +VALID_TARGET = { + "corstone-300", + "corstone-300-u65", + "corstone-320", + "vkml_emulation_layer", +} class QuantizationParams: @@ -450,11 +455,17 @@ def run_corstone( ) match target_board: - case "corstone-300": + case "corstone-300" | "corstone-300-u65": + if target_board == "corstone-300": + fvp = "FVP_Corstone_SSE-300_Ethos-U55" + num_macs = 128 + else: + fvp = "FVP_Corstone_SSE-300_Ethos-U65" + num_macs = 256 command_args = [ - "FVP_Corstone_SSE-300_Ethos-U55", + fvp, "-C", - "ethosu.num_macs=128", + f"ethosu.num_macs={num_macs}", "-C", "mps3_board.visualisation.disable-visualisation=1", "-C", @@ -805,10 +816,19 @@ def _tosa_refmodel_loglevel(loglevel: int) -> str: def corstone300_installed() -> bool: - cmd = ["FVP_Corstone_SSE-300_Ethos-U55", "--version"] + cmd_u55 = ["FVP_Corstone_SSE-300_Ethos-U55", "--version"] try: - _run_cmd(cmd, check=True) - except: + _run_cmd(cmd_u55, check=True) + except Exception: + return False + return True + + +def corstone300_u65_installed() -> bool: + cmd_u65 = ["FVP_Corstone_SSE-300_Ethos-U65", "--version"] + try: + _run_cmd(cmd_u65, check=True) + except Exception: return False return True @@ -817,7 +837,7 @@ def corstone320_installed() -> bool: cmd = ["FVP_Corstone_SSE-320", "--version"] try: _run_cmd(cmd, check=True) - except: + except Exception: return False return True @@ -898,7 +918,7 @@ def _elf_path_candidates( raise ValueError(f"Unsupported target: {target_board}") portable_ops_str = "portable-ops_" if use_portable_ops else "" - if target_board in ("corstone-300", "corstone-320"): + if target_board in ("corstone-300", "corstone-300-u65", "corstone-320"): build_dir = Path( "arm_test", f"arm_semihosting_executor_runner_" @@ -969,7 +989,7 @@ def get_elf_path( def arm_executor_runner_exists(target_board: str, use_portable_ops: bool = False): try: get_elf_path(target_board, use_portable_ops=use_portable_ops) - except: + except Exception: return False else: return True @@ -1021,6 +1041,8 @@ def get_target_board(compile_spec: ArmCompileSpec) -> str | None: if isinstance(compile_spec, EthosUCompileSpec): if "u55" in compile_spec.target: return "corstone-300" + if "u65" in compile_spec.target: + return "corstone-300-u65" if "u85" in compile_spec.target: return "corstone-320" return None diff --git a/backends/arm/test/setup_testing.sh b/backends/arm/test/setup_testing.sh index c9f3fb7581e..39d8335a26e 100755 --- a/backends/arm/test/setup_testing.sh +++ b/backends/arm/test/setup_testing.sh @@ -19,6 +19,7 @@ extraflags="-DET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE=83886080" #--target --system_config --memory_mode should match the ArmTester used setup see backends/arm/test/common.py ${build_executor_runner} --pte=semihosting --target=ethos-u55-128 --system_config=Ethos_U55_High_End_Embedded --memory_mode=Shared_Sram --output="${build_root_test_dir}_corstone-300" --extra_build_flags=${extraflags} +${build_executor_runner} --pte=semihosting --target=ethos-u65-256 --system_config=Ethos_U65_High_End --memory_mode=Dedicated_Sram_384KB --output="${build_root_test_dir}_corstone-300-u65" --extra_build_flags=${extraflags} ${build_executor_runner} --pte=semihosting --target=ethos-u85-128 --system_config=Ethos_U85_SYS_DRAM_Mid --memory_mode=Dedicated_Sram_384KB --output="${build_root_test_dir}_corstone-320" --extra_build_flags=${extraflags} # List of portable ops used by testing, this is mainly used to test models in the flow @@ -26,7 +27,9 @@ ${build_executor_runner} --pte=semihosting --target=ethos-u85-128 --system_confi # To use this you can set use_portable_ops=True when creating ArmTester() portable_ops_list_u55="aten::permute_copy.out,aten::convolution.out,aten::relu.out,aten::_native_batch_norm_legit_no_training.out,aten::as_strided_copy.out,aten::mean.out,aten::squeeze_copy.dims,dim_order_ops::_clone_dim_order.out" +portable_ops_list_u65="${portable_ops_list_u55}" portable_ops_list_u85="aten::permute_copy.out,aten::convolution.out,aten::relu.out,aten::_native_batch_norm_legit_no_training.out,aten::as_strided_copy.out,aten::mean.out,aten::full_like.out,aten::bmm.out,aten::scalar_tensor.out,aten::index.Tensor_out,aten::where.self_out,dim_order_ops::_to_dim_order_copy.out" ${build_executor_runner} --pte=semihosting --target=ethos-u55-128 --system_config=Ethos_U55_High_End_Embedded --memory_mode=Shared_Sram --select_ops_list="${portable_ops_list_u55}" --output="${build_root_test_dir}_portable-ops_corstone-300" --extra_build_flags=${extraflags} +${build_executor_runner} --pte=semihosting --target=ethos-u65-256 --system_config=Ethos_U65_High_End --memory_mode=Dedicated_Sram_384KB --select_ops_list="${portable_ops_list_u65}" --output="${build_root_test_dir}_portable-ops_corstone-300-u65" --extra_build_flags=${extraflags} ${build_executor_runner} --pte=semihosting --target=ethos-u85-128 --system_config=Ethos_U85_SYS_DRAM_Mid --memory_mode=Dedicated_Sram_384KB --select_ops_list="${portable_ops_list_u85}" --output="${build_root_test_dir}_portable-ops_corstone-320" --extra_build_flags=${extraflags} diff --git a/backends/arm/test/test_arm_backend.sh b/backends/arm/test/test_arm_backend.sh index 7de59a70e36..3e3440e8289 100755 --- a/backends/arm/test/test_arm_backend.sh +++ b/backends/arm/test/test_arm_backend.sh @@ -45,7 +45,7 @@ fi TEST_SUITE_NAME="$(basename "$0") ${TEST_SUITE}" -EXCLUDE_TARGET_EXPR="(not u55) and (not u85) and (not tosa) and (not _vgf_)" +EXCLUDE_TARGET_EXPR="(not u55) and (not u65) and (not u85) and (not tosa) and (not _vgf_)" PYTEST_RETRY_ARGS=(--reruns 2 --reruns-delay 1) all() { # Run all tests @@ -133,7 +133,7 @@ test_pytest_ops_ethos_u55() { backends/arm/scripts/build_executorch.sh backends/arm/test/setup_testing.sh - pytest "${PYTEST_RETRY_ARGS[@]}" --verbose --color=yes --numprocesses=auto --durations=10 backends/arm/test/ --ignore=backends/arm/test/models -k u55 + pytest "${PYTEST_RETRY_ARGS[@]}" --verbose --color=yes --numprocesses=auto --durations=10 backends/arm/test/ --ignore=backends/arm/test/models -k "u55 or u65" echo "${TEST_SUITE_NAME}: PASS" } diff --git a/backends/arm/test/tester/test_pipeline.py b/backends/arm/test/tester/test_pipeline.py index 86a5f857e58..73ba4e9824a 100644 --- a/backends/arm/test/tester/test_pipeline.py +++ b/backends/arm/test/tester/test_pipeline.py @@ -856,6 +856,65 @@ def __init__( ) +class EthosU65PipelineINT(EthosUPipelineINTBase, Generic[T]): + """Lowers a graph to u65 INT TOSA spec and tests it on the Corstone300 U65 + FVP, if run_on_fvp is true. + + Attributes: + module: The module which the pipeline is applied to. + test_data: Data used for quantizing and testing the module. + aten_ops: Aten dialect ops expected to be found in the graph after export. + + exir_ops: Exir dialect ops expected to be found in the graph after to_edge if not using + use_edge_to_transform_and_lower. + run_on_fvp: Set to true to test the pte file on a fvp simulator. + use_edge_to_transform_and_lower: Selects between two possible ways of lowering the module. + custom_path : Path to dump intermediate artifacts such as tosa and pte to. + + """ + + def __init__( + self, + module: torch.nn.Module, + test_data: T, + aten_ops: str | List[str], + exir_ops: str | Sequence[str] | None = None, + run_on_fvp: bool = True, + symmetric_io_quantization: bool = False, + per_channel_quantization: bool = True, + a16w8_quantization: bool = False, + use_to_edge_transform_and_lower: bool = True, + custom_path: str | None = None, + tosa_debug_mode: Optional[ArmCompileSpec.DebugMode] = None, + atol: float = 1e-03, + rtol: float = 1e-03, + qtol: int = 1, + epsilon: float = 2**-12, + fold_quantize: bool = True, + ): + compile_spec = common.get_u65_compile_spec( + custom_path=custom_path, + tosa_debug_mode=tosa_debug_mode, + ) + super().__init__( + compile_spec, + module, + test_data, + aten_ops, + exir_ops, + run_on_fvp=run_on_fvp, + symmetric_io_quantization=symmetric_io_quantization, + per_channel_quantization=per_channel_quantization, + a16w8_quantization=a16w8_quantization, + use_to_edge_transform_and_lower=use_to_edge_transform_and_lower, + atol=atol, + rtol=rtol, + qtol=qtol, + epsilon=epsilon, + fold_quantize=fold_quantize, + ) + + class PassPipeline(TOSAPipeline, Generic[T]): """Runs single passes directly on an edge_program and checks operators before/after.