From 358d92510f195150f3e7f32b66fdaca3f02f1ace Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Mon, 1 Jun 2026 11:40:45 +0200 Subject: [PATCH 1/4] Test QR rules with CUDA --- test/mooncake/qr.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/mooncake/qr.jl b/test/mooncake/qr.jl index bbb9a8d17..c4f0df9e0 100644 --- a/test/mooncake/qr.jl +++ b/test/mooncake/qr.jl @@ -20,4 +20,11 @@ for T in (BLASFloats..., GenericFloats...), n in (17, m, 23) TestSuite.test_mooncake_qr(AT, (m, m); atol = m * n * TestSuite.precision(T), rtol = m * n * TestSuite.precision(T)) end end + if T ∈ BLASFloats && CUDA.functional() + TestSuite.test_mooncake_qr(CuMatrix{T}, (m, n); atol = m * n * TestSuite.precision(T), rtol = m * n * TestSuite.precision(T)) + if m == n + AT = Diagonal{T, CuVector{T}} + TestSuite.test_mooncake_qr(AT, (m, m); atol = m * n * TestSuite.precision(T), rtol = m * n * TestSuite.precision(T)) + end + end end From 8193ef727a5b58cfbdd5fe67c694e1cd0fec1841 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Tue, 2 Jun 2026 15:21:17 -0400 Subject: [PATCH 2/4] Incremental progress on pb --- src/pullbacks/qr.jl | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/pullbacks/qr.jl b/src/pullbacks/qr.jl index fb90704c1..146ac1c87 100644 --- a/src/pullbacks/qr.jl +++ b/src/pullbacks/qr.jl @@ -31,9 +31,18 @@ function check_and_prepare_qr_cotangents( ΔR₁₁ = UpperTriangular(view(ΔR, 1:p, 1:p)) ΔR₁₂ = view(ΔR, 1:p, (p + 1):n) ΔR₂₂ = view(ΔR, (p + 1):minmn, (p + 1):n) - Δgauge_R = norm(view(ΔR₂₂, uppertriangularind(ΔR₂₂)), Inf) - Δgauge_R = max(Δgauge_R, norm(view(ΔR₂₂, diagind(ΔR₂₂)), Inf)) - Δgauge = max(Δgauge, Δgauge_R) + if p < minmn # otherwise ΔR₂₂ is empty + # uppertriangularind generates linear indices + # compute the appropriate offset in ΔR so we aren't + # operating on a view-of-view, which doesn't work + # for GPU arrays + offset = LinearIndices(ΔR)[p + 1, p + 1] + upper_inds = uppertriangularind(ΔR₂₂) .+ offset + ΔR₂₂upper = view(ΔR, upper_inds) + Δgauge_R = norm(ΔR₂₂upper, Inf) + Δgauge_R = max(Δgauge_R, norm(view(ΔR₂₂, diagind(ΔR₂₂)), Inf)) + Δgauge = max(Δgauge, Δgauge_R) + end else ΔR₁₁ = nothing ΔR₁₂ = nothing @@ -160,7 +169,16 @@ function remove_qr_gauge_dependence!(ΔQ, ΔR, A, Q, R; rank_atol = MatrixAlgebr end ΔR₂₂ = view(ΔR, (r + 1):minmn, (r + 1):size(R, 2)) zero!(diagview(ΔR₂₂)) - zero!(view(ΔR₂₂, uppertriangularind(ΔR₂₂))) + if r < minmn + # uppertriangularind generates linear indices + # compute the appropriate offset in ΔR so we aren't + # operating on a view-of-view, which doesn't work + # for GPU arrays + offset = LinearIndices(ΔR)[r + 1, r + 1] + upper_inds = uppertriangularind(ΔR₂₂) .+ offset + ΔR₂₂upper = view(ΔR, upper_inds) + zero!(ΔR₂₂upper) + end return ΔQ, ΔR end From c5f5ab7617e02a23b7f28bb5b54bedce7c2cb737 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Thu, 4 Jun 2026 11:46:19 -0400 Subject: [PATCH 3/4] Turn off Diagonal QR tests for CUDA for now --- test/mooncake/qr.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/mooncake/qr.jl b/test/mooncake/qr.jl index c4f0df9e0..ae009528d 100644 --- a/test/mooncake/qr.jl +++ b/test/mooncake/qr.jl @@ -22,9 +22,9 @@ for T in (BLASFloats..., GenericFloats...), n in (17, m, 23) end if T ∈ BLASFloats && CUDA.functional() TestSuite.test_mooncake_qr(CuMatrix{T}, (m, n); atol = m * n * TestSuite.precision(T), rtol = m * n * TestSuite.precision(T)) - if m == n + #=if m == n AT = Diagonal{T, CuVector{T}} TestSuite.test_mooncake_qr(AT, (m, m); atol = m * n * TestSuite.precision(T), rtol = m * n * TestSuite.precision(T)) - end + end=# # currently broken end end From 57993e17aa25b52ac05f14e8e7efca4a6bc785e5 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Tue, 9 Jun 2026 08:10:49 -0400 Subject: [PATCH 4/4] Working QR --- src/pullbacks/qr.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/pullbacks/qr.jl b/src/pullbacks/qr.jl index 146ac1c87..300d82532 100644 --- a/src/pullbacks/qr.jl +++ b/src/pullbacks/qr.jl @@ -36,8 +36,8 @@ function check_and_prepare_qr_cotangents( # compute the appropriate offset in ΔR so we aren't # operating on a view-of-view, which doesn't work # for GPU arrays - offset = LinearIndices(ΔR)[p + 1, p + 1] - upper_inds = uppertriangularind(ΔR₂₂) .+ offset + I = uppertriangularind(ΔR₂₂) + upper_inds = view(LinearIndices(ΔR), (p + 1):minmn, (p + 1):n)[I] ΔR₂₂upper = view(ΔR, upper_inds) Δgauge_R = norm(ΔR₂₂upper, Inf) Δgauge_R = max(Δgauge_R, norm(view(ΔR₂₂, diagind(ΔR₂₂)), Inf)) @@ -84,7 +84,7 @@ function qr_pullback!( Q₁ = view(Q, :, 1:p) - R₁₁ = UpperTriangular(view(R, 1:p, 1:p)) + R₁₁ = UpperTriangular(R[1:p, 1:p]) R₁₂ = view(R, 1:p, (p + 1):n) ΔA₁ = view(ΔA, :, 1:p) @@ -110,7 +110,8 @@ function qr_pullback!( Md = diagview(M) Md .= real.(Md) end - ΔA₁ .+= rdiv!(mul!(ΔQ₁, Q₁, M, +1, 1), R₁₁') + mul!(ΔQ₁, Q₁, M, +1, 1) + ΔA₁ .+= rdiv!(ΔQ₁, R₁₁') return ΔA end