diff --git a/.ci/scripts/setup-webgpu-linux-deps.sh b/.ci/scripts/setup-webgpu-linux-deps.sh
index 8ece5899489..b24ffb460a9 100644
--- a/.ci/scripts/setup-webgpu-linux-deps.sh
+++ b/.ci/scripts/setup-webgpu-linux-deps.sh
@@ -5,26 +5,93 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+# Vendor Dawn (Tint) + SwiftShader for the WebGPU backend CI WITHOUT hosting a
+# private prebuilt:
+#   * Dawn  : Google's official nightly prebuilt, downloaded directly from
+#             github.com/google/dawn/releases (pinned tag+rev+sha256) -- the same
+#             "fetch a pinned upstream prebuilt" pattern used for other CI deps.
+#   * SwiftShader : built from source at a pinned rev compatible with the Dawn
+#             above (the ossci prebuilt is from 2020, too old for current Dawn). No S3.
+# Dawn (Chrome's WebGPU impl; its WGSL compiler Tint is the spec reference) on
+# SwiftShader gives a headless, deterministic, spec-faithful CLI backend.
+#
+# Exports Dawn_DIR / VK_ICD_FILENAMES / LD_LIBRARY_PATH for the cmake build+run.
+# Local/rig override: set DAWN_PREBUILT_DIR=<dir containing lib64/cmake/Dawn> to
+# skip the Dawn download.
 set -ex
 
-# SwiftShader: software Vulkan adapter for GPU-less CI (LunarG SDK not needed).
-install_swiftshader() {
-  _https_amazon_aws=https://ossci-android.s3.amazonaws.com
-  _swiftshader_archive=swiftshader-abe07b943-prebuilt.tar.gz
-  _swiftshader_dir=/tmp/swiftshader
-  mkdir -p $_swiftshader_dir
+# --- pinned versions (bump rev+sha together when upgrading Dawn) --------------
+DAWN_TAG="${DAWN_TAG:-v20260423.175430}"
+DAWN_REV="${DAWN_REV:-31e25af254ab572c77054edec4946d2244e184dd}"
+DAWN_SHA256="${DAWN_SHA256:-ac76fac090162dc1ecea5ed0f28a557bb8f49efc47faab01886105ace82b7b64}"
+# SwiftShader rev verified compatible with DAWN_REV (the old ossci prebuilt is
+# from 2020 and is incompatible with current Dawn -> no adapter / zero compute).
+SWIFTSHADER_REV="${SWIFTSHADER_REV:-9898204d91d6a60b6a08ad74fe4ac52a6913111b}"
 
-  _tmp_archive="/tmp/${_swiftshader_archive}"
+_dawn_dir="${DAWN_PREBUILT_DIR:-/tmp/dawn-ci}"
+_ss_dir=/tmp/swiftshader
 
-  curl --silent --show-error --location --fail --retry 3 --retry-all-errors \
-    --output "${_tmp_archive}" "$_https_amazon_aws/${_swiftshader_archive}"
+# --- toolchain prereqs --------------------------------------------------------
+# Dawn dlopens the system Vulkan loader at runtime (libvulkan1). And the
+# ubuntu-latest prebuilt is built with a bleeding-edge GCC: it references
+# libstdc++ symbols newer than ubuntu-22.04's default (e.g. _M_replace_cold,
+# GCC 13+), so the static .a won't link against the stock runtime. Pull a current
+# libstdc++ from the ubuntu-toolchain-r PPA when the symbol floor isn't met. All
+# of this is scoped to the WebGPU CI job; newer libstdc++ is backward-compatible.
+if command -v apt-get >/dev/null 2>&1; then
+  _SUDO=""; command -v sudo >/dev/null 2>&1 && _SUDO="sudo"
+  ${_SUDO} apt-get update -y || true
+  ${_SUDO} apt-get install -y libvulkan1 software-properties-common || true
+  if ! strings /usr/lib/x86_64-linux-gnu/libstdc++.so.6 2>/dev/null \
+      | grep -q "GLIBCXX_3.4.32"; then
+    ${_SUDO} add-apt-repository -y ppa:ubuntu-toolchain-r/test || true
+    ${_SUDO} apt-get update -y || true
+    ${_SUDO} apt-get install -y libstdc++6 || true  # newest GCC runtime
+  fi
+fi
 
-  tar -C "${_swiftshader_dir}" -xzf "${_tmp_archive}"
+# The native binaries / pybind lib run INSIDE the CI conda env, whose libstdc++
+# predates GLIBCXX_3.4.32 (the Dawn prebuilt's floor) -- the same wall ssjia hit
+# for the vulkan op tests. Upgrade the conda runtime libstdc++ so the loaded
+# libstdc++.so.6 (conda's, not the system one) satisfies Dawn at run time.
+if command -v conda >/dev/null 2>&1; then
+  conda install -y -c conda-forge "libstdcxx-ng>=14" || true
+fi
+
+# --- Dawn: official prebuilt from GitHub (no S3) ------------------------------
+mkdir -p "${_dawn_dir}"
+if [[ ! -d "${_dawn_dir}/lib64/cmake/Dawn" ]]; then
+  _dawn_tar="/tmp/Dawn-${DAWN_REV}-ubuntu-latest-Release.tar.gz"
+  curl --silent --show-error --location --fail --retry 3 --retry-all-errors \
+    --output "${_dawn_tar}" \
+    "https://github.com/google/dawn/releases/download/${DAWN_TAG}/Dawn-${DAWN_REV}-ubuntu-latest-Release.tar.gz"
+  echo "${DAWN_SHA256}  ${_dawn_tar}" | sha256sum -c -
+  # archive top dir is Dawn-<rev>-ubuntu-latest-Release/{lib64,include,bin}
+  tar -C "${_dawn_dir}" --strip-components=1 -xzf "${_dawn_tar}"
+fi
 
-  export VK_ICD_FILENAMES="${_swiftshader_dir}/swiftshader/build/Linux/vk_swiftshader_icd.json"
-  export LD_LIBRARY_PATH="${_swiftshader_dir}/swiftshader/build/Linux/:${LD_LIBRARY_PATH}"
-  export ETVK_USING_SWIFTSHADER=1
-}
+# --- SwiftShader: build from source at a pinned rev (no S3) -------------------
+# The old ossci prebuilt (swiftshader-abe07b943, 2020) is incompatible with the
+# current Dawn; build a matching modern SwiftShader instead. Self-contained
+# cmake build (vendored LLVM); the ICD lands under build/<OS>/.
+if [[ ! -d "${_ss_dir}/build" ]]; then
+  if [[ ! -d "${_ss_dir}/.git" ]]; then
+    git clone https://github.com/google/swiftshader "${_ss_dir}"
+  fi
+  git -C "${_ss_dir}" checkout "${SWIFTSHADER_REV}"
+  # vk_swiftshader's deps are vendored in-tree; tolerate unreachable
+  # disabled-feature submodules (angle, test-only) failing to fetch.
+  git -C "${_ss_dir}" submodule update --init --recursive || true
+  cmake -S "${_ss_dir}" -B "${_ss_dir}/build" -DCMAKE_BUILD_TYPE=Release \
+    -DSWIFTSHADER_BUILD_TESTS=OFF -DSWIFTSHADER_BUILD_PVR=OFF \
+    -DSWIFTSHADER_BUILD_BENCHMARKS=OFF
+  cmake --build "${_ss_dir}/build" --parallel "$(nproc)" --target vk_swiftshader
+fi
+_ss_icd="$(find "${_ss_dir}/build" -name vk_swiftshader_icd.json 2>/dev/null | head -1)"
+[[ -n "${_ss_icd}" ]] || { echo "ERROR: SwiftShader ICD not found after build" >&2; exit 1; }
 
-install_swiftshader
-bash backends/webgpu/scripts/setup-wgpu-native.sh
+_ss_libdir="$(dirname "${_ss_icd}")"
+export Dawn_DIR="${_dawn_dir}/lib64/cmake/Dawn"
+export VK_ICD_FILENAMES="${_ss_icd}"
+export LD_LIBRARY_PATH="${_ss_libdir}:${LD_LIBRARY_PATH:-}"
+export WEBGPU_USING_SWIFTSHADER=1
diff --git a/.ci/scripts/test_backend.sh b/.ci/scripts/test_backend.sh
index fe9b564a18f..8b7a36cd79d 100755
--- a/.ci/scripts/test_backend.sh
+++ b/.ci/scripts/test_backend.sh
@@ -58,11 +58,10 @@ if [[ "$FLOW" == *vulkan* ]]; then
 fi
 
 if [[ "$FLOW" == *webgpu* ]]; then
-    # Setup swiftshader (software Vulkan adapter for GPU-less runners) and wgpu-native,
-    # which are required to build and run the WebGPU delegate.
+    # Dawn (Tint) + SwiftShader, the spec-faithful headless WebGPU backend.
     source .ci/scripts/setup-webgpu-linux-deps.sh
 
-    EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_WEBGPU=ON"
+    EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_WEBGPU=ON -DDawn_DIR=$Dawn_DIR"
 fi
 
 if [[ "$FLOW" == *arm* ]]; then
diff --git a/.github/workflows/test-webgpu-native.yml b/.github/workflows/test-webgpu-native.yml
new file mode 100644
index 00000000000..7220ef9f7b5
--- /dev/null
+++ b/.github/workflows/test-webgpu-native.yml
@@ -0,0 +1,54 @@
+name: Test WebGPU Native (Dawn)
+
+# The substantive WebGPU op-coverage gate. The shared operators suite only
+# delegates add.Tensor to WebGPU (everything else is CPU fallback), so the real
+# Dawn coverage comes from the native test executables (rms_norm, multi-dispatch
+# ordering, scratch). This runs them on Dawn (Tint) + SwiftShader, headless, on a
+# CPU runner -- separate from _test_backend.yml so that reusable template stays
+# untouched.
+
+# Nightly-only for now: this job builds SwiftShader from source (vendored LLVM),
+# which is too expensive to run on every PR while the workflow's reliability is
+# still being established. Once it has proven stable, re-enable a scoped PR
+# trigger with a paths: filter (backends/webgpu/**, the webgpu CI scripts, and
+# this file). The pull_request-aware ref/concurrency expressions below are kept
+# intentionally so that re-enable is a one-line change.
+on:
+  schedule:
+    - cron: 0 2 * * *
+  push:
+    branches:
+      - main
+      - release/*
+    tags:
+      - ciflow/nightly/*
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+jobs:
+  test-webgpu-native:
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    with:
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      runner: linux.4xlarge.memory
+      docker-image: ci-image:executorch-ubuntu-22.04-clang12
+      submodules: recursive
+      timeout: 120
+      script: |
+        set -eux
+
+        # The generic Linux job uses the base conda env, not the image's; activate
+        # the image env (it has the pinned from-source torch). Mirrors
+        # test-vulkan-operators-linux in pull.yml.
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        # Install the python package + runtime deps (the .pte exporters).
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
+
+        # Vendor Dawn (Tint) + SwiftShader, then build + run the native executables.
+        source .ci/scripts/setup-webgpu-linux-deps.sh
+        bash backends/webgpu/scripts/test_webgpu_native_ci.sh
diff --git a/backends/webgpu/CMakeLists.txt b/backends/webgpu/CMakeLists.txt
index 719d86b3008..d6ad80fdd20 100644
--- a/backends/webgpu/CMakeLists.txt
+++ b/backends/webgpu/CMakeLists.txt
@@ -54,29 +54,14 @@ target_include_directories(
 
 target_link_libraries(webgpu_backend PRIVATE vulkan_schema executorch_core)
 
-# Native build: link against wgpu-native
-set(WGPU_NATIVE_DIR
-    "${CMAKE_CURRENT_SOURCE_DIR}/third-party/wgpu-native"
-    CACHE PATH "Path to wgpu-native installation"
-)
-
-# Link the shared lib; the static .a carries LLVM bitcode that breaks LTO.
-# Suffix resolves per platform: .so on Linux, .dylib on macOS.
-set(WGPU_LIB_NAME "libwgpu_native${CMAKE_SHARED_LIBRARY_SUFFIX}")
-set(WGPU_LIB "${WGPU_NATIVE_DIR}/lib/${WGPU_LIB_NAME}")
-if(NOT EXISTS "${WGPU_LIB}")
-  message(FATAL_ERROR "wgpu-native not found at ${WGPU_NATIVE_DIR}. "
-                      "Run: bash backends/webgpu/scripts/setup-wgpu-native.sh"
-  )
-endif()
-
-add_library(wgpu_native SHARED IMPORTED)
-set_target_properties(wgpu_native PROPERTIES IMPORTED_LOCATION "${WGPU_LIB}")
-
-target_include_directories(
-  webgpu_backend PUBLIC $<BUILD_INTERFACE:${WGPU_NATIVE_DIR}/include>
-)
-target_link_libraries(webgpu_backend PRIVATE wgpu_native)
+# Native WebGPU backend: Dawn (Tint) + SwiftShader; deps script sets Dawn_DIR.
+# Native-only: browser/Emscripten builds use the system webgpu.h and never reach
+# this find_package (root CMake gates it via EXECUTORCH_BUILD_WEBGPU).
+# dawn::webgpu_dawn's link interface references Threads::Threads.
+find_package(Threads REQUIRED)
+find_package(Dawn REQUIRED)
+set(WEBGPU_GPU_LIB dawn::webgpu_dawn)
+target_link_libraries(webgpu_backend PUBLIC ${WEBGPU_GPU_LIB})
 
 if(APPLE)
   target_link_libraries(
@@ -100,50 +85,17 @@ install(
   DESTINATION ${CMAKE_INSTALL_LIBDIR}
 )
 
-# Native test target
-if(EXECUTORCH_BUILD_WEBGPU_TEST)
-  add_executable(webgpu_native_test test/test_webgpu_native.cpp)
-
-  target_include_directories(
-    webgpu_native_test PRIVATE $<BUILD_INTERFACE:${EXECUTORCH_ROOT}/..>
-                               "${WGPU_NATIVE_DIR}/include"
-  )
-
-  target_link_libraries(
-    webgpu_native_test
-    PRIVATE webgpu_backend
-            wgpu_native
-            executorch_core
-            extension_module_static
-            extension_data_loader
-            extension_tensor
-            portable_kernels
-            portable_ops_lib
-  )
-
-  if(APPLE)
-    target_link_libraries(
-      webgpu_native_test PRIVATE "-framework Metal" "-framework QuartzCore"
-                                 "-framework CoreGraphics"
-    )
-  else()
-    target_link_libraries(webgpu_native_test PRIVATE dl m pthread)
-  endif()
-
-  target_compile_options(webgpu_native_test PRIVATE -fexceptions)
-  set_property(TARGET webgpu_native_test PROPERTY CXX_STANDARD 17)
-
-  add_executable(webgpu_rms_norm_test test/native/test_rms_norm.cpp)
-
+# Native test targets. Helper mirrors backends/vulkan's vulkan_op_test: every
+# test executable links the same backend + runtime libs.
+function(add_webgpu_native_test test_name test_src)
+  add_executable(${test_name} ${test_src})
   target_include_directories(
-    webgpu_rms_norm_test PRIVATE $<BUILD_INTERFACE:${EXECUTORCH_ROOT}/..>
-                                 "${WGPU_NATIVE_DIR}/include"
+    ${test_name} PRIVATE $<BUILD_INTERFACE:${EXECUTORCH_ROOT}/..>
   )
-
   target_link_libraries(
-    webgpu_rms_norm_test
+    ${test_name}
     PRIVATE webgpu_backend
-            wgpu_native
+            ${WEBGPU_GPU_LIB}
             executorch_core
             extension_module_static
             extension_data_loader
@@ -151,16 +103,19 @@ if(EXECUTORCH_BUILD_WEBGPU_TEST)
             portable_kernels
             portable_ops_lib
   )
-
   if(APPLE)
     target_link_libraries(
-      webgpu_rms_norm_test PRIVATE "-framework Metal" "-framework QuartzCore"
-                                   "-framework CoreGraphics"
+      ${test_name} PRIVATE "-framework Metal" "-framework QuartzCore"
+                           "-framework CoreGraphics"
     )
   else()
-    target_link_libraries(webgpu_rms_norm_test PRIVATE dl m pthread)
+    target_link_libraries(${test_name} PRIVATE dl m pthread)
   endif()
+  target_compile_options(${test_name} PRIVATE -fexceptions)
+  set_property(TARGET ${test_name} PROPERTY CXX_STANDARD 17)
+endfunction()
 
-  target_compile_options(webgpu_rms_norm_test PRIVATE -fexceptions)
-  set_property(TARGET webgpu_rms_norm_test PROPERTY CXX_STANDARD 17)
+if(EXECUTORCH_BUILD_WEBGPU_TEST)
+  add_webgpu_native_test(webgpu_native_test test/test_webgpu_native.cpp)
+  add_webgpu_native_test(webgpu_rms_norm_test test/native/test_rms_norm.cpp)
 endif()
diff --git a/backends/webgpu/runtime/WebGPUCompat.h b/backends/webgpu/runtime/WebGPUCompat.h
new file mode 100644
index 00000000000..06715e0fc81
--- /dev/null
+++ b/backends/webgpu/runtime/WebGPUCompat.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <webgpu/webgpu.h>
+
+#include <cstdint>
+
+namespace executorch::backends::webgpu {
+
+// Caller's instance must enable TimedWaitAny; returns the WaitAny status.
+inline WGPUWaitStatus webgpu_wait(WGPUInstance instance, WGPUFuture future) {
+  WGPUFutureWaitInfo info = {};
+  info.future = future;
+  return wgpuInstanceWaitAny(instance, 1, &info, UINT64_MAX);
+}
+
+} // namespace executorch::backends::webgpu
diff --git a/backends/webgpu/runtime/WebGPUDevice.cpp b/backends/webgpu/runtime/WebGPUDevice.cpp
index a5bbf8e5806..041cbe5a703 100644
--- a/backends/webgpu/runtime/WebGPUDevice.cpp
+++ b/backends/webgpu/runtime/WebGPUDevice.cpp
@@ -6,6 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <executorch/backends/webgpu/runtime/WebGPUCompat.h>
 #include <executorch/backends/webgpu/runtime/WebGPUDevice.h>
 
 #include <cstdio>
@@ -21,12 +22,10 @@ namespace {
 
 struct AdapterResult {
   WGPUAdapter adapter = nullptr;
-  bool done = false;
 };
 
 struct DeviceResult {
   WGPUDevice device = nullptr;
-  bool done = false;
 };
 
 void on_adapter_request(
@@ -46,7 +45,6 @@ void on_adapter_request(
         static_cast<int>(message.length),
         message.data);
   }
-  result->done = true;
 }
 
 void on_device_request(
@@ -66,7 +64,6 @@ void on_device_request(
         static_cast<int>(message.length),
         message.data);
   }
-  result->done = true;
 }
 
 void on_device_error(
@@ -88,25 +85,36 @@ void on_device_error(
 WebGPUContext create_webgpu_context() {
   WebGPUContext ctx;
 
-  ctx.instance = wgpuCreateInstance(nullptr);
+  // TimedWaitAny lets webgpu_wait() block on futures via wgpuInstanceWaitAny.
+  WGPUInstanceDescriptor instance_desc = {};
+#if defined(__EMSCRIPTEN__)
+  instance_desc.capabilities.timedWaitAnyEnable = true;
+  instance_desc.capabilities.timedWaitAnyMaxCount = 1;
+#else
+  WGPUInstanceFeatureName features[1] = {WGPUInstanceFeatureName_TimedWaitAny};
+  instance_desc.requiredFeatureCount = 1;
+  instance_desc.requiredFeatures = features;
+#endif
+  ctx.instance = wgpuCreateInstance(&instance_desc);
   if (!ctx.instance) {
     throw std::runtime_error("Failed to create WebGPU instance");
   }
 
-  // Request adapter using AllowSpontaneous mode (fires during
-  // wgpuInstanceProcessEvents or any other API call).
   AdapterResult adapter_result;
   WGPURequestAdapterCallbackInfo adapter_cb = {};
-  adapter_cb.mode = WGPUCallbackMode_AllowSpontaneous;
+  adapter_cb.mode = WGPUCallbackMode_WaitAnyOnly;
   adapter_cb.callback = on_adapter_request;
   adapter_cb.userdata1 = &adapter_result;
 
-  wgpuInstanceRequestAdapter(ctx.instance, nullptr, adapter_cb);
-  while (!adapter_result.done) {
-    wgpuInstanceProcessEvents(ctx.instance);
-  }
+  // No backend pin or forced fallback; Dawn auto-selects the adapter.
+  WGPURequestAdapterOptions adapter_opts = {};
+  adapter_opts.powerPreference = WGPUPowerPreference_HighPerformance;
+  adapter_opts.forceFallbackAdapter = false;
+  WGPUWaitStatus adapter_wait = webgpu_wait(
+      ctx.instance,
+      wgpuInstanceRequestAdapter(ctx.instance, &adapter_opts, adapter_cb));
 
-  if (!adapter_result.adapter) {
+  if (adapter_wait != WGPUWaitStatus_Success || !adapter_result.adapter) {
     wgpuInstanceRelease(ctx.instance);
     ctx.instance = nullptr;
     throw std::runtime_error(
@@ -118,7 +126,7 @@ WebGPUContext create_webgpu_context() {
   // Request device
   DeviceResult device_result;
   WGPURequestDeviceCallbackInfo device_cb = {};
-  device_cb.mode = WGPUCallbackMode_AllowSpontaneous;
+  device_cb.mode = WGPUCallbackMode_WaitAnyOnly;
   device_cb.callback = on_device_request;
   device_cb.userdata1 = &device_result;
 
@@ -131,12 +139,11 @@ WebGPUContext create_webgpu_context() {
   }
   device_desc.uncapturedErrorCallbackInfo.callback = on_device_error;
 
-  wgpuAdapterRequestDevice(ctx.adapter, &device_desc, device_cb);
-  while (!device_result.done) {
-    wgpuInstanceProcessEvents(ctx.instance);
-  }
+  WGPUWaitStatus device_wait = webgpu_wait(
+      ctx.instance,
+      wgpuAdapterRequestDevice(ctx.adapter, &device_desc, device_cb));
 
-  if (!device_result.device) {
+  if (device_wait != WGPUWaitStatus_Success || !device_result.device) {
     wgpuAdapterRelease(ctx.adapter);
     wgpuInstanceRelease(ctx.instance);
     ctx.adapter = nullptr;
diff --git a/backends/webgpu/runtime/WebGPUGraph.cpp b/backends/webgpu/runtime/WebGPUGraph.cpp
index a11b188f428..a60bfc18e3b 100644
--- a/backends/webgpu/runtime/WebGPUGraph.cpp
+++ b/backends/webgpu/runtime/WebGPUGraph.cpp
@@ -12,8 +12,8 @@
 #include <executorch/backends/vulkan/serialization/schema_generated.h>
 #include <executorch/runtime/core/named_data_map.h>
 
+#include <executorch/backends/webgpu/runtime/WebGPUCompat.h>
 #include <executorch/backends/webgpu/runtime/WebGPUDevice.h>
-#include <webgpu/wgpu.h>
 
 #include <cstring>
 #include <stdexcept>
@@ -471,7 +471,6 @@ void WebGPUGraph::execute() {
 namespace {
 
 struct MapCallbackData {
-  bool done = false;
   WGPUMapAsyncStatus status = WGPUMapAsyncStatus_Error;
 };
 
@@ -482,7 +481,6 @@ void buffer_map_callback(
     void* /*userdata2*/) {
   auto* data = static_cast<MapCallbackData*>(userdata1);
   data->status = status;
-  data->done = true;
 }
 
 } // namespace
@@ -491,18 +489,18 @@ void WebGPUGraph::copy_outputs(std::vector<std::pair<void*, size_t>>& outputs) {
   const size_t count = std::min(outputs.size(), output_staging_buffers_.size());
 
   std::vector<MapCallbackData> cb_data(count);
+  std::vector<WGPUFuture> map_futures(count, WGPUFuture{});
 
   for (size_t i = 0; i < count; i++) {
     if (outputs[i].second == 0) {
-      cb_data[i].done = true;
       cb_data[i].status = WGPUMapAsyncStatus_Success;
       continue;
     }
     WGPUBufferMapCallbackInfo cb_info = {};
-    cb_info.mode = WGPUCallbackMode_AllowSpontaneous;
+    cb_info.mode = WGPUCallbackMode_WaitAnyOnly;
     cb_info.callback = buffer_map_callback;
     cb_info.userdata1 = &cb_data[i];
-    wgpuBufferMapAsync(
+    map_futures[i] = wgpuBufferMapAsync(
         output_staging_buffers_[i],
         WGPUMapMode_Read,
         0,
@@ -510,7 +508,12 @@ void WebGPUGraph::copy_outputs(std::vector<std::pair<void*, size_t>>& outputs) {
         cb_info);
   }
 
-  wgpuDevicePoll(device_, true, nullptr);
+  for (size_t i = 0; i < count; i++) {
+    if (outputs[i].second != 0 &&
+        webgpu_wait(instance_, map_futures[i]) != WGPUWaitStatus_Success) {
+      throw std::runtime_error("WebGPU: WaitAny failed for output map");
+    }
+  }
 
   for (size_t i = 0; i < count; i++) {
     if (outputs[i].second == 0) {
diff --git a/backends/webgpu/scripts/setup-wgpu-native.sh b/backends/webgpu/scripts/setup-wgpu-native.sh
deleted file mode 100755
index 12ca2afdc46..00000000000
--- a/backends/webgpu/scripts/setup-wgpu-native.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/bash
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-# Download prebuilt wgpu-native binaries for native (non-browser) WebGPU testing.
-# Usage: bash backends/webgpu/scripts/setup-wgpu-native.sh
-
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-WGPU_DIR="${SCRIPT_DIR}/../third-party/wgpu-native"
-
-WGPU_VERSION="v27.0.4.0"
-WGPU_BASE_URL="https://github.com/gfx-rs/wgpu-native/releases/download/${WGPU_VERSION}"
-
-OS="$(uname -s)"
-case "${OS}" in
-    Darwin) PLATFORM="macos"; LIB_EXT="dylib" ;;
-    Linux)  PLATFORM="linux"; LIB_EXT="so" ;;
-    *)
-        echo "Unsupported OS: ${OS}"
-        exit 1
-        ;;
-esac
-
-if [[ -f "${WGPU_DIR}/lib/libwgpu_native.${LIB_EXT}" ]]; then
-    echo "wgpu-native already installed at ${WGPU_DIR}"
-    exit 0
-fi
-
-ARCH="$(uname -m)"
-
-case "${ARCH}" in
-    x86_64)  WGPU_ARCH="x86_64" ;;
-    aarch64|arm64) WGPU_ARCH="aarch64" ;;
-    *)
-        echo "Unsupported architecture: ${ARCH}"
-        exit 1
-        ;;
-esac
-
-ZIP_NAME="wgpu-${PLATFORM}-${WGPU_ARCH}-release.zip"
-URL="${WGPU_BASE_URL}/${ZIP_NAME}"
-
-echo "Downloading wgpu-native ${WGPU_VERSION} for ${PLATFORM}-${WGPU_ARCH}..."
-TMPDIR_DL="$(mktemp -d)"
-trap "rm -rf ${TMPDIR_DL}" EXIT
-
-curl -sL "${URL}" -o "${TMPDIR_DL}/${ZIP_NAME}"
-
-mkdir -p "${WGPU_DIR}"
-unzip -qo "${TMPDIR_DL}/${ZIP_NAME}" -d "${WGPU_DIR}"
-
-echo "Installed wgpu-native to ${WGPU_DIR}"
-ls -la "${WGPU_DIR}/lib/"
diff --git a/backends/webgpu/scripts/test_webgpu_native_ci.sh b/backends/webgpu/scripts/test_webgpu_native_ci.sh
new file mode 100644
index 00000000000..e8cdfe8a955
--- /dev/null
+++ b/backends/webgpu/scripts/test_webgpu_native_ci.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Build + run the WebGPU native test executables on Dawn (Tint) + SwiftShader.
+# This is the substantive op-coverage gate: unlike the python operators suite
+# (which only delegates add.Tensor to WebGPU, the rest CPU-fallback), these
+# executables run rms_norm / multi-dispatch ordering / scratch through the real
+# WebGPU backend on Dawn.
+#
+# Assumes the Dawn env is already sourced (Dawn_DIR + VK_ICD_FILENAMES +
+# LD_LIBRARY_PATH) via .ci/scripts/setup-webgpu-linux-deps.sh. For local runs:
+#   source .ci/scripts/setup-webgpu-linux-deps.sh
+#   bash backends/webgpu/scripts/test_webgpu_native_ci.sh
+#
+# Builds whatever native test targets are present in the landed tree (NOT a fixed
+# list). This stack lands: webgpu_native_test, webgpu_rms_norm_test (base) +
+# webgpu_dispatch_order_test, webgpu_scratch_buffer_test (D107576199). update_cache
+# / SDPA executables join automatically once their sibling diffs land.
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+EXECUTORCH_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
+PYTHON_EXECUTABLE="${PYTHON_EXECUTABLE:-python3}"
+NPROC=$(nproc 2>/dev/null || sysctl -n hw.ncpu)
+BUILD_DIR="${EXECUTORCH_ROOT}/cmake-out-webgpu-dawn-ci"
+
+if [[ -z "${Dawn_DIR:-}" ]]; then
+  echo "ERROR: Dawn_DIR not set. Source .ci/scripts/setup-webgpu-linux-deps.sh first." >&2
+  exit 1
+fi
+
+cd "${EXECUTORCH_ROOT}"
+
+# ── Exports for the model-driven executables (best-effort) ───────────────────
+# native_test + rms_norm read .pte/golden inputs via WEBGPU_TEST_* env and
+# self-skip if absent; dispatch_order + scratch are standalone (no exports).
+PTE_MODEL="/tmp/webgpu_add_test.pte"
+PTE_CHAINED_MODEL="/tmp/webgpu_chained_add_test.pte"
+RMS_NORM_DIR="/tmp/rmsn"
+RMS_NORM_OK=1
+
+$PYTHON_EXECUTABLE -c "
+from executorch.backends.webgpu.test.ops.add.test_add import export_add_model, export_chained_add_model
+export_add_model('${PTE_MODEL}')
+export_chained_add_model('${PTE_CHAINED_MODEL}')
+" || echo "WARN: add export failed; webgpu_native_test self-skips models whose .pte is absent"
+
+$PYTHON_EXECUTABLE -c "
+from executorch.backends.webgpu.test.ops.rms_norm.test_rms_norm import export_rms_norm_cases
+export_rms_norm_cases('${RMS_NORM_DIR}')
+" || { echo "WARN: rms_norm export failed; skipping rms_norm native test"; RMS_NORM_OK=0; }
+
+# ── Configure (Dawn-only: no -DWEBGPU_IMPL; Dawn is the sole backend) ─────────
+echo "=== Configure WebGPU native tests on Dawn ==="
+rm -rf "${BUILD_DIR}"
+cmake \
+    -DEXECUTORCH_BUILD_WEBGPU=ON \
+    -DEXECUTORCH_BUILD_WEBGPU_TEST=ON \
+    -DDawn_DIR="${Dawn_DIR}" \
+    -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
+    -DCMAKE_BUILD_TYPE=Release \
+    -B "${BUILD_DIR}" \
+    "${EXECUTORCH_ROOT}"
+
+# ── Build + run every native test target that exists in this tree ────────────
+TARGETS=(webgpu_native_test webgpu_rms_norm_test webgpu_dispatch_order_test webgpu_scratch_buffer_test)
+BIN_DIR="${BUILD_DIR}/backends/webgpu"
+
+# Which targets are defined depends on which diffs are landed (native_test +
+# rms_norm here; dispatch_order + scratch from D107576199). Query the configured
+# target list ONCE so a not-yet-landed target is skipped WITHOUT masking a real
+# compile failure of a target that IS defined (CI uses the Make generator).
+DEFINED_TARGETS="$(cmake --build "${BUILD_DIR}" --target help 2>/dev/null || true)"
+
+# Fail loud if the probe found nothing (e.g. a non-Make generator or a cmake
+# regression): otherwise every target would skip and the job would go green
+# having tested nothing. webgpu_native_test is always defined at/after this diff.
+if ! printf '%s\n' "${DEFINED_TARGETS}" | grep -qw webgpu_native_test; then
+  echo "ERROR: cmake target probe returned no webgpu_native_test; aborting" >&2
+  exit 1
+fi
+
+for t in "${TARGETS[@]}"; do
+  if printf '%s\n' "${DEFINED_TARGETS}" | grep -qw "${t}"; then
+    # Defined target: build with stderr visible; set -e fails the job on a real
+    # build error (never silently skipped).
+    cmake --build "${BUILD_DIR}" --target "${t}" -j"${NPROC}"
+    echo "built ${t}"
+  else
+    echo "(target ${t} not defined in this tree — skipping)"
+  fi
+done
+
+echo "=== Run native tests on Dawn + SwiftShader ==="
+# native_test is model-driven; only run it if the export produced its .pte
+# (CI's setup-linux.sh provides the executorch wheel so exports succeed; a bare
+# local run without the wheel self-skips here rather than hard-failing on load).
+if [[ -x "${BIN_DIR}/webgpu_native_test" && -f "${PTE_MODEL}" ]]; then
+  env WEBGPU_TEST_MODEL="${PTE_MODEL}" \
+      WEBGPU_TEST_CHAINED_MODEL="${PTE_CHAINED_MODEL}" \
+      WEBGPU_TEST_SDPA_DIR=/tmp/ \
+      "${BIN_DIR}/webgpu_native_test"
+else
+  echo "(skipping webgpu_native_test: no exported .pte — needs the executorch python wheel)"
+fi
+if [[ "${RMS_NORM_OK}" == "1" && -x "${BIN_DIR}/webgpu_rms_norm_test" ]]; then
+  "${BIN_DIR}/webgpu_rms_norm_test" "${RMS_NORM_DIR}"
+fi
+[[ -x "${BIN_DIR}/webgpu_dispatch_order_test" ]] && "${BIN_DIR}/webgpu_dispatch_order_test"
+[[ -x "${BIN_DIR}/webgpu_scratch_buffer_test" ]] && "${BIN_DIR}/webgpu_scratch_buffer_test"
+
+echo "=== WebGPU native tests on Dawn: all run targets passed ==="
diff --git a/backends/webgpu/test/test_build_webgpu.sh b/backends/webgpu/test/test_build_webgpu.sh
index 5e3a20e96ac..1b90bdcb593 100755
--- a/backends/webgpu/test/test_build_webgpu.sh
+++ b/backends/webgpu/test/test_build_webgpu.sh
@@ -5,7 +5,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-# End-to-end build and test script for the WebGPU backend (native via wgpu-native).
+# End-to-end build and test script for the WebGPU backend (native via Dawn).
 # Usage: bash backends/webgpu/test/test_build_webgpu.sh
 
 set -e
@@ -51,22 +51,20 @@ export_rms_norm_cases('${RMS_NORM_DIR}')
 " || { echo "WARN: rms_norm export failed; skipping rms_norm native test"; RMS_NORM_PYTEST_OK=0; }
 fi
 
-# ── Step 3: Native build + test (wgpu-native) ────────────────────────────────
+# ── Step 3: Native build + test (Dawn + SwiftShader) ─────────────────────────
 
-WGPU_DIR="${EXECUTORCH_ROOT}/backends/webgpu/third-party/wgpu-native"
+# Vendor Dawn (Tint) + SwiftShader and export Dawn_DIR/VK_ICD_FILENAMES. Set
+# DAWN_PREBUILT_DIR to an existing Dawn install to skip the download locally.
+echo "=== Installing Dawn (Tint) + SwiftShader ==="
+source "${EXECUTORCH_ROOT}/.ci/scripts/setup-webgpu-linux-deps.sh"
 
-# Auto-download wgpu-native if not present
-if [[ ! -d "${WGPU_DIR}/lib" ]]; then
-    echo "=== Installing wgpu-native ==="
-    bash "${EXECUTORCH_ROOT}/backends/webgpu/scripts/setup-wgpu-native.sh"
-fi
-
-echo "=== Step 3: Native build with wgpu-native ==="
+echo "=== Step 3: Native build with Dawn ==="
 NATIVE_BUILD_DIR="${EXECUTORCH_ROOT}/cmake-out-webgpu-native"
 rm -rf "${NATIVE_BUILD_DIR}"
 
 cmake \
     -DEXECUTORCH_BUILD_WEBGPU=ON \
+    -DDawn_DIR="${Dawn_DIR}" \
     -DEXECUTORCH_BUILD_WEBGPU_TEST=ON \
     -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \