diff --git a/backends/cadence/fused_quant/op_add.cpp b/backends/cadence/fused_quant/op_add.cpp index 114039410f0..62e58c71c83 100644 --- a/backends/cadence/fused_quant/op_add.cpp +++ b/backends/cadence/fused_quant/op_add.cpp @@ -43,19 +43,16 @@ Tensor& add_out( ScalarType inp_dtype, int64_t inp_quant_min, int64_t inp_quant_max, - optional inp_axis, const optional& other_scale, const optional& other_zero_point, ScalarType other_dtype, int64_t other_quant_min, int64_t other_quant_max, - optional other_axis, const optional& out_scale, const optional& out_zero_point, ScalarType out_dtype, int64_t out_quant_min, int64_t out_quant_max, - optional out_axis, double alpha, Tensor& out) { int64_t numel = inp.numel(); @@ -72,7 +69,7 @@ Tensor& add_out( } inp_buf.resize(numel); QParams qp = extract_qparams( - inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp_axis, inp); + inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp); FUSED_QUANT_DTYPE_SWITCH( inp.scalar_type(), scalar_t, @@ -88,12 +85,7 @@ Tensor& add_out( } other_buf.resize(numel); QParams qp = extract_qparams( - other_scale, - other_zero_point, - other_quant_min, - other_quant_max, - other_axis, - other); + other_scale, other_zero_point, other_quant_min, other_quant_max, other); FUSED_QUANT_DTYPE_SWITCH( other.scalar_type(), scalar_t, @@ -107,7 +99,7 @@ Tensor& add_out( add_kernel(inp_float, other_float, result_float.data(), numel, alpha_f); QParams qp = extract_qparams( - out_scale, out_zero_point, out_quant_min, out_quant_max, out_axis, out); + out_scale, out_zero_point, out_quant_min, out_quant_max, out); FUSED_QUANT_DTYPE_SWITCH( out.scalar_type(), scalar_t, diff --git a/backends/cadence/fused_quant/op_add.h b/backends/cadence/fused_quant/op_add.h index 2da4ce80798..9db1e907294 100644 --- a/backends/cadence/fused_quant/op_add.h +++ b/backends/cadence/fused_quant/op_add.h @@ -24,20 +24,17 @@ executorch::aten::Tensor& add_out( executorch::aten::ScalarType inp_dtype, int64_t inp_quant_min, int64_t inp_quant_max, - executorch::aten::optional inp_axis, const executorch::aten::optional& other_scale, const executorch::aten::optional& other_zero_point, executorch::aten::ScalarType other_dtype, int64_t other_quant_min, int64_t other_quant_max, - executorch::aten::optional other_axis, const executorch::aten::optional& out_scale, const executorch::aten::optional& out_zero_point, executorch::aten::ScalarType out_dtype, int64_t out_quant_min, int64_t out_quant_max, - executorch::aten::optional out_axis, double alpha, executorch::aten::Tensor& out); diff --git a/backends/cadence/fused_quant/op_bmm.cpp b/backends/cadence/fused_quant/op_bmm.cpp index 2c79bcb6a59..7204ab6c88f 100644 --- a/backends/cadence/fused_quant/op_bmm.cpp +++ b/backends/cadence/fused_quant/op_bmm.cpp @@ -53,19 +53,16 @@ Tensor& bmm_out( ScalarType inp_dtype, int64_t inp_quant_min, int64_t inp_quant_max, - optional inp_axis, const optional& other_scale, const optional& other_zero_point, ScalarType other_dtype, int64_t other_quant_min, int64_t other_quant_max, - optional other_axis, const optional& out_scale, const optional& out_zero_point, ScalarType out_dtype, int64_t out_quant_min, int64_t out_quant_max, - optional out_axis, Tensor& out) { int64_t batch = inp.size(0); int64_t M = inp.size(1); @@ -87,7 +84,7 @@ Tensor& bmm_out( } inp_buf.resize(inp_numel); QParams qp = extract_qparams( - inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp_axis, inp); + inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp); FUSED_QUANT_DTYPE_SWITCH( inp.scalar_type(), scalar_t, @@ -104,12 +101,7 @@ Tensor& bmm_out( } other_buf.resize(other_numel); QParams qp = extract_qparams( - other_scale, - other_zero_point, - other_quant_min, - other_quant_max, - other_axis, - other); + other_scale, other_zero_point, other_quant_min, other_quant_max, other); FUSED_QUANT_DTYPE_SWITCH(other.scalar_type(), scalar_t, dequantize_buffer( @@ -126,7 +118,7 @@ Tensor& bmm_out( bmm_kernel(inp_float, other_float, result_float.data(), batch, M, K, N); QParams qp = extract_qparams( - out_scale, out_zero_point, out_quant_min, out_quant_max, out_axis, out); + out_scale, out_zero_point, out_quant_min, out_quant_max, out); FUSED_QUANT_DTYPE_SWITCH(out.scalar_type(), scalar_t, quantize_buffer( diff --git a/backends/cadence/fused_quant/op_bmm.h b/backends/cadence/fused_quant/op_bmm.h index f814b46b481..ef9598eac98 100644 --- a/backends/cadence/fused_quant/op_bmm.h +++ b/backends/cadence/fused_quant/op_bmm.h @@ -24,20 +24,17 @@ executorch::aten::Tensor& bmm_out( executorch::aten::ScalarType inp_dtype, int64_t inp_quant_min, int64_t inp_quant_max, - executorch::aten::optional inp_axis, const executorch::aten::optional& other_scale, const executorch::aten::optional& other_zero_point, executorch::aten::ScalarType other_dtype, int64_t other_quant_min, int64_t other_quant_max, - executorch::aten::optional other_axis, const executorch::aten::optional& out_scale, const executorch::aten::optional& out_zero_point, executorch::aten::ScalarType out_dtype, int64_t out_quant_min, int64_t out_quant_max, - executorch::aten::optional out_axis, executorch::aten::Tensor& out); } // namespace native diff --git a/backends/cadence/fused_quant/op_hardswish.cpp b/backends/cadence/fused_quant/op_hardswish.cpp index 0d653a1bfae..452ea90a405 100644 --- a/backends/cadence/fused_quant/op_hardswish.cpp +++ b/backends/cadence/fused_quant/op_hardswish.cpp @@ -40,13 +40,11 @@ Tensor& hardswish_out( ScalarType inp_dtype, int64_t inp_quant_min, int64_t inp_quant_max, - optional inp_axis, const optional& out_scale, const optional& out_zero_point, ScalarType out_dtype, int64_t out_quant_min, int64_t out_quant_max, - optional out_axis, Tensor& out) { int64_t numel = inp.numel(); @@ -60,7 +58,7 @@ Tensor& hardswish_out( } inp_buf.resize(numel); QParams qp = extract_qparams( - inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp_axis, inp); + inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp); FUSED_QUANT_DTYPE_SWITCH( inp.scalar_type(), scalar_t, @@ -74,7 +72,7 @@ Tensor& hardswish_out( hardswish_kernel(inp_float, result_float.data(), numel); QParams qp = extract_qparams( - out_scale, out_zero_point, out_quant_min, out_quant_max, out_axis, out); + out_scale, out_zero_point, out_quant_min, out_quant_max, out); FUSED_QUANT_DTYPE_SWITCH( out.scalar_type(), scalar_t, diff --git a/backends/cadence/fused_quant/op_hardswish.h b/backends/cadence/fused_quant/op_hardswish.h index 7cba5b07788..ba9e09da23c 100644 --- a/backends/cadence/fused_quant/op_hardswish.h +++ b/backends/cadence/fused_quant/op_hardswish.h @@ -23,13 +23,11 @@ executorch::aten::Tensor& hardswish_out( executorch::aten::ScalarType inp_dtype, int64_t inp_quant_min, int64_t inp_quant_max, - executorch::aten::optional inp_axis, const executorch::aten::optional& out_scale, const executorch::aten::optional& out_zero_point, executorch::aten::ScalarType out_dtype, int64_t out_quant_min, int64_t out_quant_max, - executorch::aten::optional out_axis, executorch::aten::Tensor& out); } // namespace native diff --git a/backends/cadence/fused_quant/op_mul.cpp b/backends/cadence/fused_quant/op_mul.cpp index 59b0254a0f0..3d071f7c2da 100644 --- a/backends/cadence/fused_quant/op_mul.cpp +++ b/backends/cadence/fused_quant/op_mul.cpp @@ -42,19 +42,16 @@ Tensor& mul_out( ScalarType inp_dtype, int64_t inp_quant_min, int64_t inp_quant_max, - optional inp_axis, const optional& other_scale, const optional& other_zero_point, ScalarType other_dtype, int64_t other_quant_min, int64_t other_quant_max, - optional other_axis, const optional& out_scale, const optional& out_zero_point, ScalarType out_dtype, int64_t out_quant_min, int64_t out_quant_max, - optional out_axis, Tensor& out) { (void)ctx; (void)inp_dtype; @@ -74,7 +71,7 @@ Tensor& mul_out( } inp_buf.resize(numel); QParams qp = extract_qparams( - inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp_axis, inp); + inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp); FUSED_QUANT_DTYPE_SWITCH( inp.scalar_type(), scalar_t, @@ -90,12 +87,7 @@ Tensor& mul_out( } other_buf.resize(numel); QParams qp = extract_qparams( - other_scale, - other_zero_point, - other_quant_min, - other_quant_max, - other_axis, - other); + other_scale, other_zero_point, other_quant_min, other_quant_max, other); FUSED_QUANT_DTYPE_SWITCH( other.scalar_type(), scalar_t, @@ -109,7 +101,7 @@ Tensor& mul_out( mul_kernel(inp_float, other_float, result_float.data(), numel); QParams qp = extract_qparams( - out_scale, out_zero_point, out_quant_min, out_quant_max, out_axis, out); + out_scale, out_zero_point, out_quant_min, out_quant_max, out); FUSED_QUANT_DTYPE_SWITCH( out.scalar_type(), scalar_t, diff --git a/backends/cadence/fused_quant/op_mul.h b/backends/cadence/fused_quant/op_mul.h index 402e39bd379..f7afa016b79 100644 --- a/backends/cadence/fused_quant/op_mul.h +++ b/backends/cadence/fused_quant/op_mul.h @@ -24,20 +24,17 @@ executorch::aten::Tensor& mul_out( executorch::aten::ScalarType inp_dtype, int64_t inp_quant_min, int64_t inp_quant_max, - executorch::aten::optional inp_axis, const executorch::aten::optional& other_scale, const executorch::aten::optional& other_zero_point, executorch::aten::ScalarType other_dtype, int64_t other_quant_min, int64_t other_quant_max, - executorch::aten::optional other_axis, const executorch::aten::optional& out_scale, const executorch::aten::optional& out_zero_point, executorch::aten::ScalarType out_dtype, int64_t out_quant_min, int64_t out_quant_max, - executorch::aten::optional out_axis, executorch::aten::Tensor& out); } // namespace native diff --git a/backends/cadence/fused_quant/op_relu.cpp b/backends/cadence/fused_quant/op_relu.cpp index 3fb8d92aff1..ebe7933a7b9 100644 --- a/backends/cadence/fused_quant/op_relu.cpp +++ b/backends/cadence/fused_quant/op_relu.cpp @@ -39,13 +39,11 @@ Tensor& relu_out( ScalarType inp_dtype, int64_t inp_quant_min, int64_t inp_quant_max, - optional inp_axis, const optional& out_scale, const optional& out_zero_point, ScalarType out_dtype, int64_t out_quant_min, int64_t out_quant_max, - optional out_axis, Tensor& out) { int64_t numel = inp.numel(); @@ -59,7 +57,7 @@ Tensor& relu_out( } inp_buf.resize(numel); QParams qp = extract_qparams( - inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp_axis, inp); + inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp); FUSED_QUANT_DTYPE_SWITCH( inp.scalar_type(), scalar_t, @@ -73,7 +71,7 @@ Tensor& relu_out( relu_kernel(inp_float, result_float.data(), numel); QParams qp = extract_qparams( - out_scale, out_zero_point, out_quant_min, out_quant_max, out_axis, out); + out_scale, out_zero_point, out_quant_min, out_quant_max, out); FUSED_QUANT_DTYPE_SWITCH( out.scalar_type(), scalar_t, diff --git a/backends/cadence/fused_quant/op_relu.h b/backends/cadence/fused_quant/op_relu.h index 1a9d986ccce..e8527c7633f 100644 --- a/backends/cadence/fused_quant/op_relu.h +++ b/backends/cadence/fused_quant/op_relu.h @@ -23,13 +23,11 @@ executorch::aten::Tensor& relu_out( executorch::aten::ScalarType inp_dtype, int64_t inp_quant_min, int64_t inp_quant_max, - executorch::aten::optional inp_axis, const executorch::aten::optional& out_scale, const executorch::aten::optional& out_zero_point, executorch::aten::ScalarType out_dtype, int64_t out_quant_min, int64_t out_quant_max, - executorch::aten::optional out_axis, executorch::aten::Tensor& out); } // namespace native diff --git a/backends/cadence/fused_quant/quant_utils.h b/backends/cadence/fused_quant/quant_utils.h index a7f24432ab6..fff669a9e0e 100644 --- a/backends/cadence/fused_quant/quant_utils.h +++ b/backends/cadence/fused_quant/quant_utils.h @@ -17,28 +17,49 @@ namespace cadence { namespace fused_quant { +// Upper bound on tensor rank for affine block indexing. Reference quant kernels +// operate on small ranks (linear rank 2, conv rank 4); 8 leaves headroom. +static constexpr int kMaxAffineDim = 8; + +// Affine quantization params. Scale/zero_point are either a singleton +// (per-tensor) or a full-rank tensor whose shape encodes the affine block +// layout: ``block_size[d] = data.size(d) / scale.size(d)``. This single +// representation covers per-tensor, per-channel, per-group, and blockwise. The +// scale element for a data element at flat index ``i`` is found by decomposing +// ``i`` into per-dim coordinates, mapping each to its block (``coord / +// block_size[d]``), and re-linearizing through the scale strides. struct QParams { const float* scales; const int64_t* zero_points; int32_t quant_min; int32_t quant_max; - int64_t num_channels; - int64_t axis_stride; + bool per_tensor; + int64_t ndim; + int64_t data_strides[kMaxAffineDim]; + int64_t scale_strides[kMaxAffineDim]; + int64_t block_size[kMaxAffineDim]; float scale_at(int64_t i) const { - return scales[channel_idx(i)]; + return scales[scale_idx(i)]; } int32_t zero_point_at(int64_t i) const { - return static_cast(zero_points[channel_idx(i)]); + return static_cast(zero_points[scale_idx(i)]); } private: - int64_t channel_idx(int64_t i) const { - if (num_channels == 1) { + int64_t scale_idx(int64_t i) const { + if (per_tensor) { return 0; } - return (i / axis_stride) % num_channels; + int64_t idx = 0; + int64_t rem = i; + for (int64_t d = 0; d < ndim; ++d) { + const int64_t coord = rem / data_strides[d]; + rem -= coord * data_strides[d]; + idx += (coord / block_size[d]) * scale_strides[d]; + } + return idx; } }; @@ -47,27 +68,47 @@ inline QParams extract_qparams( const executorch::aten::optional& zp_tensor, int64_t quant_min, int64_t quant_max, - executorch::aten::optional axis, const executorch::aten::Tensor& data_tensor) { const auto& scale = scale_tensor.value(); const auto& zp = zp_tensor.value(); - int64_t num_channels = scale.numel(); - int64_t axis_stride = 1; - if (axis.has_value()) { - for (int64_t d = axis.value() + 1; d < data_tensor.dim(); ++d) { - axis_stride *= data_tensor.size(d); - } + QParams qp{}; + qp.scales = scale.const_data_ptr(); + qp.zero_points = zp.const_data_ptr(); + qp.quant_min = static_cast(quant_min); + qp.quant_max = static_cast(quant_max); + + // A singleton scale broadcasts across the whole tensor (per-tensor); no block + // layout to derive, and the scale rank need not match the data rank. + if (scale.numel() == 1) { + qp.per_tensor = true; + return qp; } - return { - scale.const_data_ptr(), - zp.const_data_ptr(), - static_cast(quant_min), - static_cast(quant_max), - num_channels, - axis_stride, - }; + const int64_t ndim = data_tensor.dim(); + ET_CHECK_MSG( + scale.dim() == ndim, + "per-channel/group scale must be full-rank (rank %d) to match data rank %d", + static_cast(scale.dim()), + static_cast(ndim)); + ET_CHECK_MSG( + ndim <= kMaxAffineDim, + "tensor rank %d exceeds kMaxAffineDim %d", + static_cast(ndim), + static_cast(kMaxAffineDim)); + + qp.per_tensor = false; + qp.ndim = ndim; + int64_t data_stride = 1; + int64_t scale_stride = 1; + for (int64_t d = ndim - 1; d >= 0; --d) { + qp.data_strides[d] = data_stride; + qp.scale_strides[d] = scale_stride; + qp.block_size[d] = data_tensor.size(d) / scale.size(d); + data_stride *= data_tensor.size(d); + scale_stride *= scale.size(d); + } + return qp; } template diff --git a/backends/cadence/fused_quant/tests/test_op_add.cpp b/backends/cadence/fused_quant/tests/test_op_add.cpp index e88932cc6ef..dca110cf0e1 100644 --- a/backends/cadence/fused_quant/tests/test_op_add.cpp +++ b/backends/cadence/fused_quant/tests/test_op_add.cpp @@ -25,10 +25,6 @@ optional none_tensor() { return optional(); } -optional none_axis() { - return optional(); -} - } // namespace class FusedQuantAddTest : public OperatorTest {}; @@ -66,19 +62,16 @@ TEST_F(FusedQuantAddTest, AllQuantizedPerTensor) { ScalarType::Float, -128, 127, - none_axis(), optional(other_scale), optional(other_zp), ScalarType::Float, -128, 127, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), 1.0, out); @@ -112,19 +105,16 @@ TEST_F(FusedQuantAddTest, FloatInputsQuantizedOutput) { ScalarType::Float, 0, 0, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), 1.0, out); @@ -161,19 +151,16 @@ TEST_F(FusedQuantAddTest, QuantizedInpFloatOther) { ScalarType::Float, -128, 127, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), 1.0, out); @@ -210,19 +197,16 @@ TEST_F(FusedQuantAddTest, FloatInpQuantizedOther) { ScalarType::Float, 0, 0, - none_axis(), optional(other_scale), optional(other_zp), ScalarType::Float, -128, 127, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), 1.0, out); @@ -259,19 +243,16 @@ TEST_F(FusedQuantAddTest, QuantizedInputsFloatOutput) { ScalarType::Float, -128, 127, - none_axis(), optional(other_scale), optional(other_zp), ScalarType::Float, -128, 127, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), 1.0, out); @@ -305,19 +286,16 @@ TEST_F(FusedQuantAddTest, QuantizedInpFloatOtherFloatOutput) { ScalarType::Float, -128, 127, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), 1.0, out); @@ -351,19 +329,16 @@ TEST_F(FusedQuantAddTest, FloatInpQuantizedOtherFloatOutput) { ScalarType::Float, 0, 0, - none_axis(), optional(other_scale), optional(other_zp), ScalarType::Float, -128, 127, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), 1.0, out); @@ -376,15 +351,16 @@ TEST_F(FusedQuantAddTest, PerChannelInput) { TensorFactory tf_float; TensorFactory tf_long; - // Shape [2, 2], axis=0 → 2 channels, axis_stride=2 + // Per-channel along axis 0: full-rank scale shape [2, 1] encodes the block + // layout (block_size = [2/2, 2/1] = [1, 2]). const std::vector sizes{2, 2}; Tensor inp = tf_int8.make(sizes, {2, 4, 6, 8}); Tensor other = tf_float.make(sizes, {1.0, 1.0, 1.0, 1.0}); // Per-channel: channel 0 scale=0.5, channel 1 scale=1.0 - Tensor inp_scale = tf_float.make({2}, {0.5, 1.0}); - Tensor inp_zp = tf_long.make({2}, {0, 0}); + Tensor inp_scale = tf_float.make({2, 1}, {0.5, 1.0}); + Tensor inp_zp = tf_long.make({2, 1}, {0, 0}); Tensor out_scale = tf_float.make({1}, {0.5}); Tensor out_zp = tf_long.make({1}, {0}); @@ -403,19 +379,16 @@ TEST_F(FusedQuantAddTest, PerChannelInput) { ScalarType::Float, -128, 127, - optional(0), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), 1.0, out); @@ -428,15 +401,15 @@ TEST_F(FusedQuantAddTest, PerChannelOutput) { TensorFactory tf_float; TensorFactory tf_long; - // Shape [2, 2], axis=0 → 2 channels + // Per-channel along axis 0: full-rank scale shape [2, 1] (block_size [1, 2]) const std::vector sizes{2, 2}; Tensor inp = tf_float.make(sizes, {2.0, 3.0, 7.0, 9.0}); Tensor other = tf_float.make(sizes, {0.0, 0.0, 0.0, 0.0}); // Per-channel output: channel 0 scale=0.5, channel 1 scale=1.0 - Tensor out_scale = tf_float.make({2}, {0.5, 1.0}); - Tensor out_zp = tf_long.make({2}, {0, 0}); + Tensor out_scale = tf_float.make({2, 1}, {0.5, 1.0}); + Tensor out_zp = tf_long.make({2, 1}, {0, 0}); Tensor out = tf_int8.zeros(sizes); @@ -452,19 +425,16 @@ TEST_F(FusedQuantAddTest, PerChannelOutput) { ScalarType::Float, 0, 0, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - optional(0), 1.0, out); @@ -504,19 +474,16 @@ TEST_F(FusedQuantAddTest, AlphaScaling) { ScalarType::Float, -128, 127, - none_axis(), optional(other_scale), optional(other_zp), ScalarType::Float, -128, 127, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), 2.0, out); @@ -559,19 +526,16 @@ TEST_F(FusedQuantAddTest, NonZeroZeroPoint) { ScalarType::Float, -128, 127, - none_axis(), optional(other_scale), optional(other_zp), ScalarType::Float, -128, 127, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), 1.0, out); diff --git a/backends/cadence/fused_quant/tests/test_op_bmm.cpp b/backends/cadence/fused_quant/tests/test_op_bmm.cpp index 93c511a10d5..5ede47ea8a9 100644 --- a/backends/cadence/fused_quant/tests/test_op_bmm.cpp +++ b/backends/cadence/fused_quant/tests/test_op_bmm.cpp @@ -25,10 +25,6 @@ optional none_tensor() { return optional(); } -optional none_axis() { - return optional(); -} - } // namespace class FusedQuantBmmTest : public OperatorTest {}; @@ -73,19 +69,16 @@ TEST_F(FusedQuantBmmTest, AllQuantizedPerTensor) { ScalarType::Float, -128, 127, - none_axis(), optional(other_scale), optional(other_zp), ScalarType::Float, -128, 127, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(out_sizes, {2, 4, 6, 8})); @@ -121,19 +114,16 @@ TEST_F(FusedQuantBmmTest, FloatInputsQuantizedOutput) { ScalarType::Float, 0, 0, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(out_sizes, {2, 4, 6, 8})); @@ -171,19 +161,16 @@ TEST_F(FusedQuantBmmTest, QuantizedInputsFloatOutput) { ScalarType::Float, -128, 127, - none_axis(), optional(other_scale), optional(other_zp), ScalarType::Float, -128, 127, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_float.make(out_sizes, {1.0, 2.0, 3.0, 4.0})); @@ -221,19 +208,16 @@ TEST_F(FusedQuantBmmTest, QuantizedInpFloatOther) { ScalarType::Float, -128, 127, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(out_sizes, {2, 4, 6, 8})); @@ -284,19 +268,16 @@ TEST_F(FusedQuantBmmTest, NonZeroZeroPoint) { ScalarType::Float, -128, 127, - none_axis(), optional(other_scale), optional(other_zp), ScalarType::Float, -128, 127, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(out_sizes, {3, 6, 2, 5})); @@ -341,19 +322,16 @@ TEST_F(FusedQuantBmmTest, LargerBatch) { ScalarType::Float, -128, 127, - none_axis(), optional(other_scale), optional(other_zp), ScalarType::Float, -128, 127, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(out_sizes, {2, 4, 6, 8, 10, 12, 14, 16})); diff --git a/backends/cadence/fused_quant/tests/test_op_hardswish.cpp b/backends/cadence/fused_quant/tests/test_op_hardswish.cpp index e92989c64d2..502d680d2e3 100644 --- a/backends/cadence/fused_quant/tests/test_op_hardswish.cpp +++ b/backends/cadence/fused_quant/tests/test_op_hardswish.cpp @@ -25,10 +25,6 @@ optional none_tensor() { return optional(); } -optional none_axis() { - return optional(); -} - } // namespace class FusedQuantHardswishTest : public OperatorTest {}; @@ -66,13 +62,11 @@ TEST_F(FusedQuantHardswishTest, AllQuantizedPerTensor) { ScalarType::Float, -128, 127, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {0, 0, 0, 3, 6, 10})); @@ -103,13 +97,11 @@ TEST_F(FusedQuantHardswishTest, FloatInputQuantizedOutput) { ScalarType::Float, 0, 0, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {0, 0, 0, 3, 6, 10})); @@ -140,13 +132,11 @@ TEST_F(FusedQuantHardswishTest, QuantizedInputFloatOutput) { ScalarType::Float, -128, 127, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_float.make(sizes, {0.0, 0.0, 0.0, 3.0, 6.0, 10.0})); @@ -158,14 +148,15 @@ TEST_F(FusedQuantHardswishTest, PerChannelInput) { TensorFactory tf_float; TensorFactory tf_long; - // Shape [2, 3], axis=0 → 2 channels, axis_stride=3 + // Per-channel along axis 0: full-rank scale shape [2, 1] encodes the block + // layout (block_size = [2/2, 3/1] = [1, 3]). const std::vector sizes{2, 3}; Tensor inp = tf_int8.make(sizes, {-6, -3, 0, 3, 6, 10}); // Per-channel: channel 0 scale=1.0, channel 1 scale=0.5 - Tensor inp_scale = tf_float.make({2}, {1.0, 0.5}); - Tensor inp_zp = tf_long.make({2}, {0, 0}); + Tensor inp_scale = tf_float.make({2, 1}, {1.0, 0.5}); + Tensor inp_zp = tf_long.make({2, 1}, {0, 0}); Tensor out_scale = tf_float.make({1}, {0.5}); Tensor out_zp = tf_long.make({1}, {0}); @@ -187,13 +178,11 @@ TEST_F(FusedQuantHardswishTest, PerChannelInput) { ScalarType::Float, -128, 127, - optional(0), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {0, 0, 0, 2, 6, 10})); @@ -205,14 +194,14 @@ TEST_F(FusedQuantHardswishTest, PerChannelOutput) { TensorFactory tf_float; TensorFactory tf_long; - // Shape [2, 3], axis=0 → 2 channels + // Per-channel along axis 0: full-rank scale shape [2, 1] (block_size [1, 3]) const std::vector sizes{2, 3}; Tensor inp = tf_float.make(sizes, {-6.0, 0.0, 3.0, 6.0, 10.0, 12.0}); // Per-channel output: channel 0 scale=1.0, channel 1 scale=0.5 - Tensor out_scale = tf_float.make({2}, {1.0, 0.5}); - Tensor out_zp = tf_long.make({2}, {0, 0}); + Tensor out_scale = tf_float.make({2, 1}, {1.0, 0.5}); + Tensor out_zp = tf_long.make({2, 1}, {0, 0}); Tensor out = tf_int8.zeros(sizes); @@ -229,13 +218,11 @@ TEST_F(FusedQuantHardswishTest, PerChannelOutput) { ScalarType::Float, 0, 0, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - optional(0), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {0, 0, 3, 12, 20, 24})); @@ -272,13 +259,11 @@ TEST_F(FusedQuantHardswishTest, NonZeroZeroPoint) { ScalarType::Float, -128, 127, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {1, 1, 1, 4, 7, 11})); @@ -312,13 +297,11 @@ TEST_F(FusedQuantHardswishTest, NegativeRegion) { ScalarType::Float, 0, 0, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {0, 0, 0, 0})); @@ -346,13 +329,11 @@ TEST_F(FusedQuantHardswishTest, LinearRegion) { ScalarType::Float, 0, 0, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_float.make(sizes, {3.0, 4.0, 6.0, 10.0})); @@ -392,13 +373,11 @@ TEST_F(FusedQuantHardswishTest, TransitionRegion) { ScalarType::Float, -128, 127, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {0, -3, 0, 9, 24})); diff --git a/backends/cadence/fused_quant/tests/test_op_mul.cpp b/backends/cadence/fused_quant/tests/test_op_mul.cpp index 77983155015..0b9addabc5e 100644 --- a/backends/cadence/fused_quant/tests/test_op_mul.cpp +++ b/backends/cadence/fused_quant/tests/test_op_mul.cpp @@ -25,10 +25,6 @@ optional none_tensor() { return optional(); } -optional none_axis() { - return optional(); -} - } // namespace class FusedQuantMulTest : public OperatorTest {}; @@ -66,19 +62,16 @@ TEST_F(FusedQuantMulTest, AllQuantizedPerTensor) { ScalarType::Float, -128, 127, - none_axis(), optional(other_scale), optional(other_zp), ScalarType::Float, -128, 127, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {2, 4, 6, 8})); @@ -111,19 +104,16 @@ TEST_F(FusedQuantMulTest, FloatInputsQuantizedOutput) { ScalarType::Float, 0, 0, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {4, 8, 12, 16})); @@ -159,19 +149,16 @@ TEST_F(FusedQuantMulTest, QuantizedInpFloatOther) { ScalarType::Float, -128, 127, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {4, 8, 12, 16})); @@ -207,19 +194,16 @@ TEST_F(FusedQuantMulTest, FloatInpQuantizedOther) { ScalarType::Float, 0, 0, - none_axis(), optional(other_scale), optional(other_zp), ScalarType::Float, -128, 127, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {2, 4, 6, 8})); @@ -255,19 +239,16 @@ TEST_F(FusedQuantMulTest, QuantizedInputsFloatOutput) { ScalarType::Float, -128, 127, - none_axis(), optional(other_scale), optional(other_zp), ScalarType::Float, -128, 127, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_float.make(sizes, {1.0, 2.0, 3.0, 4.0})); @@ -300,19 +281,16 @@ TEST_F(FusedQuantMulTest, QuantizedInpFloatOtherFloatOutput) { ScalarType::Float, -128, 127, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_float.make(sizes, {2.0, 4.0, 6.0, 8.0})); @@ -345,19 +323,16 @@ TEST_F(FusedQuantMulTest, FloatInpQuantizedOtherFloatOutput) { ScalarType::Float, 0, 0, - none_axis(), optional(other_scale), optional(other_zp), ScalarType::Float, -128, 127, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_float.make(sizes, {1.0, 2.0, 3.0, 4.0})); @@ -369,15 +344,16 @@ TEST_F(FusedQuantMulTest, PerChannelInput) { TensorFactory tf_float; TensorFactory tf_long; - // Shape [2, 2], axis=0 -> 2 channels, axis_stride=2 const std::vector sizes{2, 2}; Tensor inp = tf_int8.make(sizes, {2, 4, 6, 8}); Tensor other = tf_float.make(sizes, {2.0, 2.0, 2.0, 2.0}); - // Per-channel: channel 0 scale=0.5, channel 1 scale=1.0 - Tensor inp_scale = tf_float.make({2}, {0.5, 1.0}); - Tensor inp_zp = tf_long.make({2}, {0, 0}); + // Per-channel along axis 0: full-rank scale shape [2, 1] encodes the block + // layout (block_size = [2/2, 2/1] = [1, 2]). channel 0 scale=0.5, channel 1 + // scale=1.0. + Tensor inp_scale = tf_float.make({2, 1}, {0.5, 1.0}); + Tensor inp_zp = tf_long.make({2, 1}, {0, 0}); Tensor out_scale = tf_float.make({1}, {0.5}); Tensor out_zp = tf_long.make({1}, {0}); @@ -396,19 +372,16 @@ TEST_F(FusedQuantMulTest, PerChannelInput) { ScalarType::Float, -128, 127, - optional(0), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {4, 8, 24, 32})); @@ -420,15 +393,15 @@ TEST_F(FusedQuantMulTest, PerChannelOutput) { TensorFactory tf_float; TensorFactory tf_long; - // Shape [2, 2], axis=0 -> 2 channels + // Per-channel along axis 0: full-rank scale shape [2, 1] (block_size [1, 2]) const std::vector sizes{2, 2}; Tensor inp = tf_float.make(sizes, {2.0, 3.0, 7.0, 9.0}); Tensor other = tf_float.make(sizes, {1.0, 1.0, 1.0, 1.0}); // Per-channel output: channel 0 scale=0.5, channel 1 scale=1.0 - Tensor out_scale = tf_float.make({2}, {0.5, 1.0}); - Tensor out_zp = tf_long.make({2}, {0, 0}); + Tensor out_scale = tf_float.make({2, 1}, {0.5, 1.0}); + Tensor out_zp = tf_long.make({2, 1}, {0, 0}); Tensor out = tf_int8.zeros(sizes); @@ -444,19 +417,16 @@ TEST_F(FusedQuantMulTest, PerChannelOutput) { ScalarType::Float, 0, 0, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - optional(0), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {4, 6, 7, 9})); @@ -498,19 +468,16 @@ TEST_F(FusedQuantMulTest, NonZeroZeroPoint) { ScalarType::Float, -128, 127, - none_axis(), optional(other_scale), optional(other_zp), ScalarType::Float, -128, 127, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {2, 3, 3, 4})); diff --git a/backends/cadence/fused_quant/tests/test_op_relu.cpp b/backends/cadence/fused_quant/tests/test_op_relu.cpp index 6a35e36dfbf..6b83551fd2b 100644 --- a/backends/cadence/fused_quant/tests/test_op_relu.cpp +++ b/backends/cadence/fused_quant/tests/test_op_relu.cpp @@ -25,10 +25,6 @@ optional none_tensor() { return optional(); } -optional none_axis() { - return optional(); -} - } // namespace class FusedQuantReluTest : public OperatorTest {}; @@ -61,13 +57,11 @@ TEST_F(FusedQuantReluTest, AllQuantizedPerTensor) { ScalarType::Float, -128, 127, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {0, 0, 2, 4})); @@ -98,13 +92,11 @@ TEST_F(FusedQuantReluTest, FloatInputQuantizedOutput) { ScalarType::Float, 0, 0, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {0, 0, 2, 4})); @@ -135,13 +127,11 @@ TEST_F(FusedQuantReluTest, QuantizedInputFloatOutput) { ScalarType::Float, -128, 127, - none_axis(), none_tensor(), none_tensor(), ScalarType::Float, 0, 0, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_float.make(sizes, {0.0, 0.0, 1.0, 2.0})); @@ -153,14 +143,15 @@ TEST_F(FusedQuantReluTest, PerChannelInput) { TensorFactory tf_float; TensorFactory tf_long; - // Shape [2, 2], axis=0 → 2 channels, axis_stride=2 + // Per-channel along axis 0: full-rank scale shape [2, 1] encodes the block + // layout (block_size = [2/2, 2/1] = [1, 2]). const std::vector sizes{2, 2}; Tensor inp = tf_int8.make(sizes, {-4, 2, -3, 6}); // Per-channel: channel 0 scale=0.5, channel 1 scale=1.0 - Tensor inp_scale = tf_float.make({2}, {0.5, 1.0}); - Tensor inp_zp = tf_long.make({2}, {0, 0}); + Tensor inp_scale = tf_float.make({2, 1}, {0.5, 1.0}); + Tensor inp_zp = tf_long.make({2, 1}, {0, 0}); Tensor out_scale = tf_float.make({1}, {0.5}); Tensor out_zp = tf_long.make({1}, {0}); @@ -178,13 +169,11 @@ TEST_F(FusedQuantReluTest, PerChannelInput) { ScalarType::Float, -128, 127, - optional(0), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {0, 2, 0, 12})); @@ -196,14 +185,14 @@ TEST_F(FusedQuantReluTest, PerChannelOutput) { TensorFactory tf_int8; TensorFactory tf_long; - // Shape [2, 2], axis=0 → 2 channels + // Per-channel along axis 0: full-rank scale shape [2, 1] (block_size [1, 2]) const std::vector sizes{2, 2}; Tensor inp = tf_float.make(sizes, {-1.0, 3.0, -2.0, 9.0}); // Per-channel output: channel 0 scale=0.5, channel 1 scale=1.0 - Tensor out_scale = tf_float.make({2}, {0.5, 1.0}); - Tensor out_zp = tf_long.make({2}, {0, 0}); + Tensor out_scale = tf_float.make({2, 1}, {0.5, 1.0}); + Tensor out_zp = tf_long.make({2, 1}, {0, 0}); Tensor out = tf_int8.zeros(sizes); @@ -218,13 +207,11 @@ TEST_F(FusedQuantReluTest, PerChannelOutput) { ScalarType::Float, 0, 0, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - optional(0), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {0, 6, 0, 9})); @@ -261,13 +248,11 @@ TEST_F(FusedQuantReluTest, NonZeroZeroPoint) { ScalarType::Float, -128, 127, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {1, 1, 2, 3})); @@ -301,13 +286,11 @@ TEST_F(FusedQuantReluTest, AllNegativeInputs) { ScalarType::Float, -128, 127, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {0, 0, 0, 0})); @@ -341,13 +324,11 @@ TEST_F(FusedQuantReluTest, AllPositiveInputs) { ScalarType::Float, -128, 127, - none_axis(), optional(out_scale), optional(out_zp), ScalarType::Char, -128, 127, - none_axis(), out); EXPECT_TENSOR_EQ(out, tf_int8.make(sizes, {2, 4, 6, 8}));