pytorch · DrJessop · Jun 10, 2026
diff --git a/backends/cadence/fused_quant/op_add.cpp b/backends/cadence/fused_quant/op_add.cpp
@@ -43,19 +43,16 @@ Tensor& add_out(
     ScalarType inp_dtype,
     int64_t inp_quant_min,
     int64_t inp_quant_max,
-    optional<int64_t> inp_axis,
     const optional<Tensor>& other_scale,
     const optional<Tensor>& other_zero_point,
     ScalarType other_dtype,
     int64_t other_quant_min,
     int64_t other_quant_max,
-    optional<int64_t> other_axis,
     const optional<Tensor>& out_scale,
     const optional<Tensor>& out_zero_point,
     ScalarType out_dtype,
     int64_t out_quant_min,
     int64_t out_quant_max,
-    optional<int64_t> out_axis,
     double alpha,
     Tensor& out) {
   int64_t numel = inp.numel();
@@ -72,7 +69,7 @@ Tensor& add_out(
     }
     inp_buf.resize(numel);
     QParams qp = extract_qparams(
-        inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp_axis, inp);
+        inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp);
     FUSED_QUANT_DTYPE_SWITCH(
         inp.scalar_type(),
         scalar_t,
@@ -88,12 +85,7 @@ Tensor& add_out(
     }
     other_buf.resize(numel);
     QParams qp = extract_qparams(
-        other_scale,
-        other_zero_point,
-        other_quant_min,
-        other_quant_max,
-        other_axis,
-        other);
+        other_scale, other_zero_point, other_quant_min, other_quant_max, other);
     FUSED_QUANT_DTYPE_SWITCH(
         other.scalar_type(),
         scalar_t,
@@ -107,7 +99,7 @@ Tensor& add_out(
     add_kernel(inp_float, other_float, result_float.data(), numel, alpha_f);
 
     QParams qp = extract_qparams(
-        out_scale, out_zero_point, out_quant_min, out_quant_max, out_axis, out);
+        out_scale, out_zero_point, out_quant_min, out_quant_max, out);
     FUSED_QUANT_DTYPE_SWITCH(
         out.scalar_type(),
         scalar_t,

diff --git a/backends/cadence/fused_quant/op_add.h b/backends/cadence/fused_quant/op_add.h
@@ -24,20 +24,17 @@ executorch::aten::Tensor& add_out(
     executorch::aten::ScalarType inp_dtype,
     int64_t inp_quant_min,
     int64_t inp_quant_max,
-    executorch::aten::optional<int64_t> inp_axis,
     const executorch::aten::optional<executorch::aten::Tensor>& other_scale,
     const executorch::aten::optional<executorch::aten::Tensor>&
         other_zero_point,
     executorch::aten::ScalarType other_dtype,
     int64_t other_quant_min,
     int64_t other_quant_max,
-    executorch::aten::optional<int64_t> other_axis,
     const executorch::aten::optional<executorch::aten::Tensor>& out_scale,
     const executorch::aten::optional<executorch::aten::Tensor>& out_zero_point,
     executorch::aten::ScalarType out_dtype,
     int64_t out_quant_min,
     int64_t out_quant_max,
-    executorch::aten::optional<int64_t> out_axis,
     double alpha,
     executorch::aten::Tensor& out);
 

diff --git a/backends/cadence/fused_quant/op_bmm.cpp b/backends/cadence/fused_quant/op_bmm.cpp
@@ -53,19 +53,16 @@ Tensor& bmm_out(
     ScalarType inp_dtype,
     int64_t inp_quant_min,
     int64_t inp_quant_max,
-    optional<int64_t> inp_axis,
     const optional<Tensor>& other_scale,
     const optional<Tensor>& other_zero_point,
     ScalarType other_dtype,
     int64_t other_quant_min,
     int64_t other_quant_max,
-    optional<int64_t> other_axis,
     const optional<Tensor>& out_scale,
     const optional<Tensor>& out_zero_point,
     ScalarType out_dtype,
     int64_t out_quant_min,
     int64_t out_quant_max,
-    optional<int64_t> out_axis,
     Tensor& out) {
   int64_t batch = inp.size(0);
   int64_t M = inp.size(1);
@@ -87,7 +84,7 @@ Tensor& bmm_out(
     }
     inp_buf.resize(inp_numel);
     QParams qp = extract_qparams(
-        inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp_axis, inp);
+        inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp);
     FUSED_QUANT_DTYPE_SWITCH(
         inp.scalar_type(),
         scalar_t,
@@ -104,12 +101,7 @@ Tensor& bmm_out(
     }
     other_buf.resize(other_numel);
     QParams qp = extract_qparams(
-        other_scale,
-        other_zero_point,
-        other_quant_min,
-        other_quant_max,
-        other_axis,
-        other);
+        other_scale, other_zero_point, other_quant_min, other_quant_max, other);
     FUSED_QUANT_DTYPE_SWITCH(other.scalar_type(),
                              scalar_t,
                              dequantize_buffer(
@@ -126,7 +118,7 @@ Tensor& bmm_out(
     bmm_kernel(inp_float, other_float, result_float.data(), batch, M, K, N);
 
     QParams qp = extract_qparams(
-        out_scale, out_zero_point, out_quant_min, out_quant_max, out_axis, out);
+        out_scale, out_zero_point, out_quant_min, out_quant_max, out);
     FUSED_QUANT_DTYPE_SWITCH(out.scalar_type(),
                              scalar_t,
                              quantize_buffer(

diff --git a/backends/cadence/fused_quant/op_bmm.h b/backends/cadence/fused_quant/op_bmm.h
@@ -24,20 +24,17 @@ executorch::aten::Tensor& bmm_out(
     executorch::aten::ScalarType inp_dtype,
     int64_t inp_quant_min,
     int64_t inp_quant_max,
-    executorch::aten::optional<int64_t> inp_axis,
     const executorch::aten::optional<executorch::aten::Tensor>& other_scale,
     const executorch::aten::optional<executorch::aten::Tensor>&
         other_zero_point,
     executorch::aten::ScalarType other_dtype,
     int64_t other_quant_min,
     int64_t other_quant_max,
-    executorch::aten::optional<int64_t> other_axis,
     const executorch::aten::optional<executorch::aten::Tensor>& out_scale,
     const executorch::aten::optional<executorch::aten::Tensor>& out_zero_point,
     executorch::aten::ScalarType out_dtype,
     int64_t out_quant_min,
     int64_t out_quant_max,
-    executorch::aten::optional<int64_t> out_axis,
     executorch::aten::Tensor& out);
 
 } // namespace native

diff --git a/backends/cadence/fused_quant/op_hardswish.cpp b/backends/cadence/fused_quant/op_hardswish.cpp
@@ -40,13 +40,11 @@ Tensor& hardswish_out(
     ScalarType inp_dtype,
     int64_t inp_quant_min,
     int64_t inp_quant_max,
-    optional<int64_t> inp_axis,
     const optional<Tensor>& out_scale,
     const optional<Tensor>& out_zero_point,
     ScalarType out_dtype,
     int64_t out_quant_min,
     int64_t out_quant_max,
-    optional<int64_t> out_axis,
     Tensor& out) {
   int64_t numel = inp.numel();
 
@@ -60,7 +58,7 @@ Tensor& hardswish_out(
     }
     inp_buf.resize(numel);
     QParams qp = extract_qparams(
-        inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp_axis, inp);
+        inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp);
     FUSED_QUANT_DTYPE_SWITCH(
         inp.scalar_type(),
         scalar_t,
@@ -74,7 +72,7 @@ Tensor& hardswish_out(
     hardswish_kernel(inp_float, result_float.data(), numel);
 
     QParams qp = extract_qparams(
-        out_scale, out_zero_point, out_quant_min, out_quant_max, out_axis, out);
+        out_scale, out_zero_point, out_quant_min, out_quant_max, out);
     FUSED_QUANT_DTYPE_SWITCH(
         out.scalar_type(),
         scalar_t,

diff --git a/backends/cadence/fused_quant/op_hardswish.h b/backends/cadence/fused_quant/op_hardswish.h
@@ -23,13 +23,11 @@ executorch::aten::Tensor& hardswish_out(
     executorch::aten::ScalarType inp_dtype,
     int64_t inp_quant_min,
     int64_t inp_quant_max,
-    executorch::aten::optional<int64_t> inp_axis,
     const executorch::aten::optional<executorch::aten::Tensor>& out_scale,
     const executorch::aten::optional<executorch::aten::Tensor>& out_zero_point,
     executorch::aten::ScalarType out_dtype,
     int64_t out_quant_min,
     int64_t out_quant_max,
-    executorch::aten::optional<int64_t> out_axis,
     executorch::aten::Tensor& out);
 
 } // namespace native

diff --git a/backends/cadence/fused_quant/op_mul.cpp b/backends/cadence/fused_quant/op_mul.cpp
@@ -42,19 +42,16 @@ Tensor& mul_out(
     ScalarType inp_dtype,
     int64_t inp_quant_min,
     int64_t inp_quant_max,
-    optional<int64_t> inp_axis,
     const optional<Tensor>& other_scale,
     const optional<Tensor>& other_zero_point,
     ScalarType other_dtype,
     int64_t other_quant_min,
     int64_t other_quant_max,
-    optional<int64_t> other_axis,
     const optional<Tensor>& out_scale,
     const optional<Tensor>& out_zero_point,
     ScalarType out_dtype,
     int64_t out_quant_min,
     int64_t out_quant_max,
-    optional<int64_t> out_axis,
     Tensor& out) {
   (void)ctx;
   (void)inp_dtype;
@@ -74,7 +71,7 @@ Tensor& mul_out(
     }
     inp_buf.resize(numel);
     QParams qp = extract_qparams(
-        inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp_axis, inp);
+        inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp);
     FUSED_QUANT_DTYPE_SWITCH(
         inp.scalar_type(),
         scalar_t,
@@ -90,12 +87,7 @@ Tensor& mul_out(
     }
     other_buf.resize(numel);
     QParams qp = extract_qparams(
-        other_scale,
-        other_zero_point,
-        other_quant_min,
-        other_quant_max,
-        other_axis,
-        other);
+        other_scale, other_zero_point, other_quant_min, other_quant_max, other);
     FUSED_QUANT_DTYPE_SWITCH(
         other.scalar_type(),
         scalar_t,
@@ -109,7 +101,7 @@ Tensor& mul_out(
     mul_kernel(inp_float, other_float, result_float.data(), numel);
 
     QParams qp = extract_qparams(
-        out_scale, out_zero_point, out_quant_min, out_quant_max, out_axis, out);
+        out_scale, out_zero_point, out_quant_min, out_quant_max, out);
     FUSED_QUANT_DTYPE_SWITCH(
         out.scalar_type(),
         scalar_t,

diff --git a/backends/cadence/fused_quant/op_mul.h b/backends/cadence/fused_quant/op_mul.h
@@ -24,20 +24,17 @@ executorch::aten::Tensor& mul_out(
     executorch::aten::ScalarType inp_dtype,
     int64_t inp_quant_min,
     int64_t inp_quant_max,
-    executorch::aten::optional<int64_t> inp_axis,
     const executorch::aten::optional<executorch::aten::Tensor>& other_scale,
     const executorch::aten::optional<executorch::aten::Tensor>&
         other_zero_point,
     executorch::aten::ScalarType other_dtype,
     int64_t other_quant_min,
     int64_t other_quant_max,
-    executorch::aten::optional<int64_t> other_axis,
     const executorch::aten::optional<executorch::aten::Tensor>& out_scale,
     const executorch::aten::optional<executorch::aten::Tensor>& out_zero_point,
     executorch::aten::ScalarType out_dtype,
     int64_t out_quant_min,
     int64_t out_quant_max,
-    executorch::aten::optional<int64_t> out_axis,
     executorch::aten::Tensor& out);
 
 } // namespace native

diff --git a/backends/cadence/fused_quant/op_relu.cpp b/backends/cadence/fused_quant/op_relu.cpp
@@ -39,13 +39,11 @@ Tensor& relu_out(
     ScalarType inp_dtype,
     int64_t inp_quant_min,
     int64_t inp_quant_max,
-    optional<int64_t> inp_axis,
     const optional<Tensor>& out_scale,
     const optional<Tensor>& out_zero_point,
     ScalarType out_dtype,
     int64_t out_quant_min,
     int64_t out_quant_max,
-    optional<int64_t> out_axis,
     Tensor& out) {
   int64_t numel = inp.numel();
 
@@ -59,7 +57,7 @@ Tensor& relu_out(
     }
     inp_buf.resize(numel);
     QParams qp = extract_qparams(
-        inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp_axis, inp);
+        inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp);
     FUSED_QUANT_DTYPE_SWITCH(
         inp.scalar_type(),
         scalar_t,
@@ -73,7 +71,7 @@ Tensor& relu_out(
     relu_kernel(inp_float, result_float.data(), numel);
 
     QParams qp = extract_qparams(
-        out_scale, out_zero_point, out_quant_min, out_quant_max, out_axis, out);
+        out_scale, out_zero_point, out_quant_min, out_quant_max, out);
     FUSED_QUANT_DTYPE_SWITCH(
         out.scalar_type(),
         scalar_t,

diff --git a/backends/cadence/fused_quant/op_relu.h b/backends/cadence/fused_quant/op_relu.h
@@ -23,13 +23,11 @@ executorch::aten::Tensor& relu_out(
     executorch::aten::ScalarType inp_dtype,
     int64_t inp_quant_min,
     int64_t inp_quant_max,
-    executorch::aten::optional<int64_t> inp_axis,
     const executorch::aten::optional<executorch::aten::Tensor>& out_scale,
     const executorch::aten::optional<executorch::aten::Tensor>& out_zero_point,
     executorch::aten::ScalarType out_dtype,
     int64_t out_quant_min,
     int64_t out_quant_max,
-    executorch::aten::optional<int64_t> out_axis,
     executorch::aten::Tensor& out);
 
 } // namespace native