From 30b996dfbd97f05dbf757c4faa65bc5b245929b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 5 May 2026 10:23:09 +0200
Subject: [PATCH 01/11] fix(docker): link script

---
 docker/README.md               |  3 ++-
 docker/link-editable-source.sh | 25 ++++++++++---------------
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/docker/README.md b/docker/README.md
index 78bd0da4..35b4bd13 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -23,6 +23,7 @@ Notes:
 - `~/zed_models` is mounted into `/usr/local/zed/resources` to match the direct `docker run` setup.
 - `/dev/dri` is masked inside the container so host Mesa/AMD render nodes do not override the NVIDIA runtime devices.
 - NVIDIA PRIME/GLX environment variables are exported to bias OpenGL/EGL selection toward the NVIDIA stack when using X11 forwarding.
-- Python source changes are picked up from the mounted repo, including `extensions/rcs_zed`.
+- The simulator still bootstraps EGL for offscreen MuJoCo camera rendering; set `RCS_MUJOCO_DISABLE_EGL=1` only if you intentionally want to disable that path.
+- Python source changes are picked up from the mounted repo.
 - If you change C++ code in `rcs` or `rcs_fr3`, rebuild the image.
 - For non-GPU hosts, comment out the GPU-related lines in `docker/compose/dev.yml`.
diff --git a/docker/link-editable-source.sh b/docker/link-editable-source.sh
index 0b1b967a..0a93db47 100644
--- a/docker/link-editable-source.sh
+++ b/docker/link-editable-source.sh
@@ -10,26 +10,21 @@ fi
 
 SITE_PACKAGES="$(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')"
 
-link_mixed_package() {
+link_compiled_package() {
     src_dir="$1"
     dst_dir="$2"
-    keep_dir_name="${3:-}"
 
     if [ ! -d "$src_dir" ] || [ ! -d "$dst_dir" ]; then
         return
     fi
 
-    # Replace only the Python sources from the mounted repo and keep compiled
-    # artifacts that were installed into site-packages during image build.
-    for path in "$src_dir"/* "$src_dir"/.[!.]* "$src_dir"/..?*; do
-        [ -e "$path" ] || continue
-        name="$(basename "$path")"
-        if [ -n "$keep_dir_name" ] && [ "$name" = "$keep_dir_name" ]; then
-            continue
-        fi
-        rm -rf "$dst_dir/$name"
-        cp -as "$path" "$dst_dir/$name"
-    done
+    tmp_keep="$(mktemp -d)"
+    find "$dst_dir" -maxdepth 1 \( -name '_core*.so' -o -name 'lib*.so*' \) -exec mv {} "$tmp_keep/" \;
+    rm -rf "$dst_dir"
+    mkdir -p "$dst_dir"
+    cp -as "$src_dir/." "$dst_dir/"
+    find "$tmp_keep" -maxdepth 1 -type f -exec mv {} "$dst_dir/" \;
+    rmdir "$tmp_keep"
 }
 
 link_pure_python_package() {
@@ -44,8 +39,8 @@ link_pure_python_package() {
     ln -s "$src_dir" "$dst_dir"
 }
 
-link_mixed_package "$REPO_ROOT/python/rcs" "$SITE_PACKAGES/rcs" "_core"
-link_mixed_package "$REPO_ROOT/extensions/rcs_fr3/src/rcs_fr3" "$SITE_PACKAGES/rcs_fr3" "_core"
+link_compiled_package "$REPO_ROOT/python/rcs" "$SITE_PACKAGES/rcs"
+link_compiled_package "$REPO_ROOT/extensions/rcs_fr3/src/rcs_fr3" "$SITE_PACKAGES/rcs_fr3"
 link_pure_python_package "$REPO_ROOT/extensions/rcs_realsense/src/rcs_realsense" "$SITE_PACKAGES/rcs_realsense"
 link_pure_python_package "$REPO_ROOT/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85" "$SITE_PACKAGES/rcs_robotiq2f85"
 link_pure_python_package "$REPO_ROOT/extensions/rcs_zed/src/rcs_zed" "$SITE_PACKAGES/rcs_zed"

From fce0ff68741139518a07f30ee4abd066b820e9cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 5 May 2026 10:29:43 +0200
Subject: [PATCH 02/11] feat: zed cli rgb snapshot

---
 extensions/rcs_zed/src/rcs_zed/__main__.py | 104 ++++++++++++++++-----
 1 file changed, 81 insertions(+), 23 deletions(-)

diff --git a/extensions/rcs_zed/src/rcs_zed/__main__.py b/extensions/rcs_zed/src/rcs_zed/__main__.py
index cfe51fa5..8e2558f4 100644
--- a/extensions/rcs_zed/src/rcs_zed/__main__.py
+++ b/extensions/rcs_zed/src/rcs_zed/__main__.py
@@ -1,4 +1,6 @@
 import logging
+from pathlib import Path
+from time import monotonic
 
 import cv2
 import typer
@@ -14,6 +16,36 @@ def _display_frame(window_name: str, frame):
     cv2.imshow(window_name, frame.camera.color.data[:, :, ::-1])
 
 
+def _resolve_serial(serial: str | None) -> str:
+    if serial is not None:
+        return serial
+
+    try:
+        devices = ZEDCameraSet.enumerate_connected_devices()
+    except RuntimeError as exc:
+        msg = str(exc)
+        raise typer.BadParameter(msg) from exc
+    if len(devices) == 0:
+        msg = "No ZED devices connected."
+        raise typer.BadParameter(msg)
+    return next(iter(devices))
+
+
+def _create_rgb_camera(serial: str, width: int, height: int, fps: int) -> ZEDCameraSet:
+    return ZEDCameraSet(
+        cameras={
+            "viewer": common.BaseCameraConfig(
+                identifier=serial,
+                resolution_width=width,
+                resolution_height=height,
+                frame_rate=fps,
+            )
+        },
+        enable_depth=False,
+        enable_imu=False,
+    )
+
+
 @zed_app.command()
 def serials():
     """Reads out the serial numbers and models of connected ZED devices via the SDK."""
@@ -39,29 +71,8 @@ def rgb_view(
     window_name: str = typer.Option("ZED RGB", help="OpenCV window title."),
 ):
     """Open a live RGB window using the RCS ZED camera interface."""
-    if serial is None:
-        try:
-            devices = ZEDCameraSet.enumerate_connected_devices()
-        except RuntimeError as exc:
-            msg = str(exc)
-            raise typer.BadParameter(msg) from exc
-        if len(devices) == 0:
-            msg = "No ZED devices connected."
-            raise typer.BadParameter(msg)
-        serial = next(iter(devices))
-
-    camera = ZEDCameraSet(
-        cameras={
-            "viewer": common.BaseCameraConfig(
-                identifier=serial,
-                resolution_width=width,
-                resolution_height=height,
-                frame_rate=fps,
-            )
-        },
-        enable_depth=False,
-        enable_imu=False,
-    )
+    serial = _resolve_serial(serial)
+    camera = _create_rgb_camera(serial, width, height, fps)
 
     try:
         camera.open()
@@ -81,6 +92,53 @@ def rgb_view(
         cv2.destroyAllWindows()
 
 
+@zed_app.command("rgb-snapshot")
+def rgb_snapshot(
+    serial: str | None = typer.Argument(None, help="Optional ZED serial number. Uses the first device if omitted."),
+    output: Path = typer.Option(Path("zed_latest.png"), "--output", "-o", help="PNG file to write."),
+    width: int = typer.Option(1280, help="Requested capture width."),
+    height: int = typer.Option(720, help="Requested capture height."),
+    fps: int = typer.Option(30, help="Requested capture frame rate."),
+    duration: float = typer.Option(1.0, "--duration", "-d", help="Seconds to read frames before saving."),
+):
+    """Open a ZED camera briefly and save the latest RGB frame as a PNG."""
+    if duration <= 0:
+        msg = "Duration must be greater than zero."
+        raise typer.BadParameter(msg)
+    if output.suffix.lower() != ".png":
+        msg = "Output path must end in .png."
+        raise typer.BadParameter(msg)
+
+    serial = _resolve_serial(serial)
+    camera = _create_rgb_camera(serial, width, height, fps)
+
+    try:
+        camera.open()
+    except Exception as exc:
+        msg = f"Could not start ZED camera {serial}: {exc}"
+        raise typer.BadParameter(msg) from exc
+
+    latest_frame = None
+    frames_read = 0
+    deadline = monotonic() + duration
+    try:
+        while latest_frame is None or monotonic() < deadline:
+            latest_frame = camera.poll_frame("viewer")
+            frames_read += 1
+    finally:
+        camera.close()
+
+    assert latest_frame is not None
+    output.parent.mkdir(parents=True, exist_ok=True)
+    image_bgr = latest_frame.camera.color.data[:, :, ::-1]
+    print(latest_frame.camera.color.intrinsics)
+    if not cv2.imwrite(str(output), image_bgr):
+        msg = f"Could not write PNG to {output}."
+        raise typer.BadParameter(msg)
+
+    typer.echo(f"Saved {output} from ZED {serial} after reading {frames_read} frame(s).")
+
+
 def main():
     zed_app()
 

From 2f7fe19f9f9db6d1cc8e0373c676d85789f19cbb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 5 May 2026 18:56:24 +0200
Subject: [PATCH 03/11] fix(teleop): depth in cameras

---
 examples/teleop/franka.py                  | 2 +-
 extensions/rcs_fr3/src/rcs_fr3/creators.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/teleop/franka.py b/examples/teleop/franka.py
index 2f0d6ede..3736d55b 100644
--- a/examples/teleop/franka.py
+++ b/examples/teleop/franka.py
@@ -117,7 +117,7 @@ def get_env():
                     for name, identifier in ZED_CAMERA_DICT.items()
                 },
                 kwargs={
-                    "enable_depth": False,
+                    "enable_depth": INCLUDE_DEPTH,
                     "enable_imu": False,
                 },
             )
diff --git a/extensions/rcs_fr3/src/rcs_fr3/creators.py b/extensions/rcs_fr3/src/rcs_fr3/creators.py
index 2225a577..e28401e4 100644
--- a/extensions/rcs_fr3/src/rcs_fr3/creators.py
+++ b/extensions/rcs_fr3/src/rcs_fr3/creators.py
@@ -203,7 +203,7 @@ def create_env(self, cfg: FR3HardwareEnvCreatorConfig) -> gym.Env:
             camera_set.start()
             camera_set.wait_for_frames()
             logger.info("CameraSet started")
-            env = CameraSetWrapper(env, camera_set)
+            env = CameraSetWrapper(env, camera_set, cfg.wrapper_cfg.include_depth)
 
         if cfg.relative_to != RelativeTo.NONE:
             env = RelativeActionSpace(env, max_mov=cfg.max_relative_movement, relative_to=cfg.relative_to)
@@ -236,7 +236,7 @@ def create_env(self, cfg: FR3MultiHardwareEnvCreatorConfig) -> gym.Env:
             camera_set.start()
             camera_set.wait_for_frames()
             logger.info("CameraSet started")
-            env = CameraSetWrapper(env, camera_set)
+            env = CameraSetWrapper(env, camera_set, cfg.wrapper_cfg.include_depth)
         return CoverWrapper(env)
 
     def config(self) -> FR3MultiHardwareEnvCreatorConfig:

From 9d44dc5e124d969b9d8a8ad54ded3a5dd1dd0088 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 5 May 2026 18:56:52 +0200
Subject: [PATCH 04/11] chore(quest): simplify right/left swapping logic

---
 python/rcs/operator/quest.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/python/rcs/operator/quest.py b/python/rcs/operator/quest.py
index 322b870a..158850f0 100644
--- a/python/rcs/operator/quest.py
+++ b/python/rcs/operator/quest.py
@@ -128,6 +128,14 @@ def _reset_state(self):
                 self._last_controller_pose[controller] = Pose()
                 self._grp_pos[controller] = 1
 
+    def _swap_controller_input(self, input_data: dict[str, Any]) -> dict[str, Any]:
+        if not self.config.switched_left_right:
+            return input_data
+        swapped = copy.copy(input_data)
+        swapped["left"] = input_data["right"]
+        swapped["right"] = input_data["left"]
+        return swapped
+
     @staticmethod
     def _normalize_axis(value: bool | float | int) -> float:
         if isinstance(value, bool):
@@ -139,13 +147,6 @@ def consume_commands(self) -> TeleopCommands:
         with self._cmd_lock:
             cmds = copy.copy(self._commands)
             self._commands = TeleopCommands()
-            if self.config.switched_left_right:
-                swapped_reset_origin_to_current = {}
-                if "left" in cmds.reset_origin_to_current:
-                    swapped_reset_origin_to_current["right"] = cmds.reset_origin_to_current["left"]
-                if "right" in cmds.reset_origin_to_current:
-                    swapped_reset_origin_to_current["left"] = cmds.reset_origin_to_current["right"]
-                cmds.reset_origin_to_current = swapped_reset_origin_to_current
             return cmds
 
     def reset_operator_state(self):
@@ -176,11 +177,7 @@ def consume_action(self) -> dict[str, ArmWithGripper]:
                     tquat=np.concatenate([transform.translation(), transform.rotation_q()]),
                     gripper=np.array([self._grp_pos[controller]]),
                 )
-        return (
-            {"left": transforms["right"], "right": transforms["left"]}
-            if self.config.switched_left_right
-            else transforms
-        )
+        return transforms
 
     def set_camera(self, observation: dict) -> None:
         if not self.config.display_cameras:
@@ -241,6 +238,8 @@ def run(self):
                 logger.warning("[Quest Reader] packets arriving again")
                 warning_raised = False
 
+            input_data = self._swap_controller_input(input_data)
+
             # === Update Semantic Commands ===
             with self._cmd_lock:
                 if input_data[self._start_btn] and (self._prev_data is None or not self._prev_data[self._start_btn]):
@@ -293,7 +292,8 @@ def run(self):
 
                 gripper_axis = self._normalize_axis(input_data[controller][self._grp_btn[controller]])
                 # convert from IRIS to RCS gripper logic
-                self._grp_pos[controller] = 1.0 - gripper_axis
+                with self._resource_lock:
+                    self._grp_pos[controller] = 1.0 - gripper_axis
 
             self._prev_data = input_data
             rate_limiter()

From 4af54bbe3c193c60a6c05915751c651d5ded1fb4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 5 May 2026 18:58:28 +0200
Subject: [PATCH 05/11] impr(robotiq): physical reset only in constructor

---
 extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py b/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py
index a5ed5736..4bb10d4e 100644
--- a/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py
+++ b/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py
@@ -42,7 +42,8 @@ class RobotiQ2F85Gripper(Gripper):
     def __init__(self, cfg: RobotiQ2F85GripperConfig):
         super().__init__()
         self._cfg: RobotiQ2F85GripperConfig = cfg
-        self.gripper = Robotiq2F85Driver(serial_number=cfg.serial_number)
+        self.gripper = Robotiq2F85Driver(serial_number=cfg.serial_number, ignore_read_frequency=True)
+        self.gripper.reset()
 
     def get_normalized_width(self) -> float:
         # value between 0 and 1 (0 is closed)
@@ -61,7 +62,7 @@ def open(self) -> None:
         self.set_normalized_width(1.0)
 
     def reset(self) -> None:
-        self.gripper.reset()
+        self.open()
 
     def set_normalized_width(self, width: float, force: float = 0) -> None:
         """

From 2e7802192b2f5f714fa702ab4e6fa5619d3d6040 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 12 May 2026 11:55:05 +0200
Subject: [PATCH 06/11] feat: robotiq in teleop script

---
 .dockerignore                             |  2 ++
 docker/Dockerfile                         |  3 +-
 examples/teleop/franka.py                 | 35 ++++++++++++++-------
 extensions/rcs_fr3/src/rcs_fr3/configs.py |  1 +
 python/tests/test_quest_operator.py       | 38 ++++++++++++++++++++++-
 5 files changed, 66 insertions(+), 13 deletions(-)

diff --git a/.dockerignore b/.dockerignore
index bdf319e7..8b767f67 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1 +1,3 @@
 **/build/
+real*
+test*
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 2b03ad71..b72bde92 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -53,7 +53,8 @@ RUN chmod +x /usr/local/bin/link-editable-source \
     && uv pip install --no-build-isolation /opt/rcs-src/extensions/rcs_fr3 \
     && uv pip install /opt/rcs-src/extensions/rcs_realsense \
     && uv pip install /opt/rcs-src/extensions/rcs_robotiq2f85 \
-    && uv pip install /opt/rcs-src/extensions/rcs_zed
+    && uv pip install /opt/rcs-src/extensions/rcs_zed \
+    && uv pip install /opt/rcs-src/examples/teleop/SimPublisher
 
 WORKDIR /workspace/robot-control-stack
 
diff --git a/examples/teleop/franka.py b/examples/teleop/franka.py
index 3736d55b..f3951c98 100644
--- a/examples/teleop/franka.py
+++ b/examples/teleop/franka.py
@@ -26,22 +26,26 @@
     "right": "0",
 }
 
-
-ROBOT_INSTANCE = RobotPlatform.SIMULATION
-# ROBOT_INSTANCE = RobotPlatform.HARDWARE
+# use `udevadm info -a -n /dev/ttyUSB0 | grep serial`
+# to find out the serial numbers
+ROBOTIQ_SERIAL = {
+    "left": "DAAQMPDC",
+    "right": "DAAQMJHX",
+}
 
 RECORD_FPS = 30
 # set camera dict to none disable cameras
-# CAMERA_DICT = {
-#     "left_wrist": "230422272017",
-#     "right_wrist": "230422271040",
-#     "side": "243522070385",
-#     "bird_eye": "243522070364",
-# }
-CAMERA_DICT = None
+CAMERA_DICT = {
+    "right_wrist": "230422272017",
+    "left_wrist": "230422271040",
+    # "side": "243522070385",
+    # "bird_eye": "243522070364",
+}
+# CAMERA_DICT = None
 ZED_CAMERA_DICT = {
-    "zed": "19928076",
+    "head": "19928076",
 }
+# ZED_CAMERA_DICT = None
 MQ3_ADDR = "10.42.0.1"
 INCLUDE_DEPTH = False
 
@@ -119,6 +123,7 @@ def get_env():
                 kwargs={
                     "enable_depth": INCLUDE_DEPTH,
                     "enable_imu": False,
+                    "include_right": True,
                 },
             )
         if DIGIT_DICT is not None:
@@ -137,11 +142,19 @@ def get_env():
         hw_cfg.camera_cfgs = camera_cfgs or None
         hw_cfg.control_mode = config.operator_class.control_mode[0]
         hw_cfg.wrapper_cfg.include_depth = INCLUDE_DEPTH
+        hw_cfg.wrapper_cfg.binary_gripper = False
+        # hw_cfg.wrapper_cfg.binary_gripper = True
         hw_cfg.max_relative_movement = (
             0.5 if config.operator_class.control_mode[0] == ControlMode.JOINTS else (0.5, np.deg2rad(90))
         )
         hw_cfg.relative_to = config.operator_class.control_mode[1]
         hw_cfg.robot_to_shared_base_frame = robot2world
+        hw_cfg.robot_cfgs["left"].ignore_realtime = True
+        hw_cfg.robot_cfgs["right"].ignore_realtime = True
+        hw_cfg.robot_cfgs["left"].speed_factor = 0.3
+        hw_cfg.robot_cfgs["right"].speed_factor = 0.3
+        hw_cfg.gripper_cfgs["left"].serial_number = ROBOTIQ_SERIAL["left"]
+        hw_cfg.gripper_cfgs["right"].serial_number = ROBOTIQ_SERIAL["right"]
         env_rel = env_creator.create_env(hw_cfg)
         operator = GelloOperator(config) if isinstance(config, GelloConfig) else QuestOperator(config)
     else:
diff --git a/extensions/rcs_fr3/src/rcs_fr3/configs.py b/extensions/rcs_fr3/src/rcs_fr3/configs.py
index 3eaa8b0a..baf47eaf 100644
--- a/extensions/rcs_fr3/src/rcs_fr3/configs.py
+++ b/extensions/rcs_fr3/src/rcs_fr3/configs.py
@@ -152,6 +152,7 @@ def config(self) -> FR3MultiHardwareEnvCreatorConfig:
 
 
 class FrankaDuoEnv(DefaultFR3MultiHardwareEnv):
+    # use `udevadm info -a -n /dev/ttyUSB0 | grep serial` to find out the serial numbers
     left_gripper_serial_number = "DAAQMJHX"
     right_gripper_serial_number = "DAAQMPDC"
 
diff --git a/python/tests/test_quest_operator.py b/python/tests/test_quest_operator.py
index d5af2a78..55db719a 100644
--- a/python/tests/test_quest_operator.py
+++ b/python/tests/test_quest_operator.py
@@ -1,6 +1,9 @@
 import threading
 from typing import Any, cast
 
+import numpy as np
+
+from rcs._core.common import Pose
 from rcs.operator.interface import TeleopCommands
 from rcs.operator.quest import QuestOperator
 
@@ -9,13 +12,46 @@ class _Config:
     switched_left_right = True
 
 
+def test_swap_controller_input_swaps_left_and_right_packets():
+    operator = QuestOperator.__new__(QuestOperator)
+    operator.config = cast(Any, _Config())
+
+    input_data = {
+        "left": {"hand_trigger": 0.1, "index_trigger": 0.2},
+        "right": {"hand_trigger": 0.9, "index_trigger": 0.8},
+        "A": True,
+    }
+
+    swapped = QuestOperator._swap_controller_input(operator, input_data)
+
+    assert swapped["left"] == input_data["right"]
+    assert swapped["right"] == input_data["left"]
+    assert swapped["A"] is True
+
+
 def test_consume_commands_swaps_both_reset_origin_keys():
     operator = QuestOperator.__new__(QuestOperator)
     operator.config = cast(Any, _Config())
     operator._cmd_lock = threading.Lock()
-    operator._commands = TeleopCommands(reset_origin_to_current={"left": True, "right": False})
+    operator._commands = TeleopCommands(reset_origin_to_current={"right": True, "left": False})
 
     cmds = QuestOperator.consume_commands(operator)
 
     assert cmds.reset_origin_to_current == {"right": True, "left": False}
     assert operator._commands.reset_origin_to_current == {}
+
+
+def test_consume_action_swaps_gripper_with_controller():
+    operator = QuestOperator.__new__(QuestOperator)
+    operator.config = cast(Any, _Config())
+    operator._resource_lock = threading.Lock()
+    operator.controller_names = ["left", "right"]
+    operator._last_controller_pose = {key: Pose() for key in operator.controller_names}
+    operator._offset_pose = {key: Pose() for key in operator.controller_names}
+    operator._set_frame = {key: Pose() for key in operator.controller_names}
+    operator._grp_pos = {"left": 0.75, "right": 0.25}
+
+    actions = QuestOperator.consume_action(operator)
+
+    assert np.allclose(actions["left"]["gripper"], np.array([0.75]))
+    assert np.allclose(actions["right"]["gripper"], np.array([0.25]))

From 7b1889110425fb8114b77f061ca86ac95db1281a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 12 May 2026 11:58:26 +0200
Subject: [PATCH 07/11] impr(gripper): improved change detection

---
 .../rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py |  8 ++++---
 python/rcs/envs/base.py                       | 21 +++++++++++++------
 python/rcs/utils.py                           |  1 -
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py b/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py
index 4bb10d4e..db4be1fa 100644
--- a/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py
+++ b/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py
@@ -42,12 +42,13 @@ class RobotiQ2F85Gripper(Gripper):
     def __init__(self, cfg: RobotiQ2F85GripperConfig):
         super().__init__()
         self._cfg: RobotiQ2F85GripperConfig = cfg
-        self.gripper = Robotiq2F85Driver(serial_number=cfg.serial_number, ignore_read_frequency=True)
+        self.gripper = Robotiq2F85Driver(serial_number=cfg.serial_number)
+        self._last_normalized_width = 1.0
         self.gripper.reset()
 
     def get_normalized_width(self) -> float:
-        # value between 0 and 1 (0 is closed)
-        return self.gripper.opening / 85
+        # Return the last commanded width to avoid a synchronous Modbus read on every env step.
+        return self._last_normalized_width
 
     def grasp(self) -> None:
         """
@@ -71,6 +72,7 @@ def set_normalized_width(self, width: float, force: float = 0) -> None:
         if not (0 <= width <= 1):
             msg = f"Width must be between 0 and 1, got {width}."
             raise ValueError(msg)
+        self._last_normalized_width = width
         abs_width = width * 85
         self.gripper.go_to(
             opening=float(abs_width),
diff --git a/python/rcs/envs/base.py b/python/rcs/envs/base.py
index 71a210f0..7d12b336 100644
--- a/python/rcs/envs/base.py
+++ b/python/rcs/envs/base.py
@@ -935,6 +935,14 @@ def __init__(self, env, gripper: common.Gripper, binary: bool = True):
         self.gripper = gripper
         self._last_gripper_cmd = None
 
+    def _command_changed(self, gripper_action: np.ndarray) -> bool:
+        if self._last_gripper_cmd is None:
+            return True
+        last_action = np.asarray(self._last_gripper_cmd, dtype=np.float32)
+        if self.binary:
+            return not np.array_equal(gripper_action, last_action)
+        return not np.allclose(gripper_action, last_action, atol=1e-3, rtol=0.0)
+
     def close(self):
         self.gripper.close()
         super().close()
@@ -966,13 +974,14 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
             gripper_action = [gripper_action]  # type: ignore
         if self.binary:
             gripper_action = np.round(gripper_action)
-        gripper_action = np.clip(gripper_action, 0.0, 1.0)
+        gripper_action = np.clip(np.asarray(gripper_action, dtype=np.float32), 0.0, 1.0)
 
-        if self.binary:
-            self.gripper.grasp() if gripper_action == self.BINARY_GRIPPER_CLOSED else self.gripper.open()
-        else:
-            self.gripper.set_normalized_width(gripper_action[0])
-        self._last_gripper_cmd = gripper_action
+        if self._command_changed(gripper_action):
+            if self.binary:
+                self.gripper.grasp() if gripper_action[0] < 0.5 else self.gripper.open()
+            else:
+                self.gripper.set_normalized_width(float(gripper_action[0]))
+            self._last_gripper_cmd = gripper_action.tolist()
         del action[self.gripper_key]
         return action
 
diff --git a/python/rcs/utils.py b/python/rcs/utils.py
index 9f9b7e7f..25ef2abb 100644
--- a/python/rcs/utils.py
+++ b/python/rcs/utils.py
@@ -27,7 +27,6 @@ def __call__(self):
         if self.t is None:
             self.t = perf_counter()
             self._last_print = self.t
-            sleep(1 / self.frame_rate)
             return
         sleep_time = 1 / self.frame_rate - (perf_counter() - self.t)
         if sleep_time > 0:

From 785acb143c76697db27d7f5e299e45103f1526e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 12 May 2026 11:59:23 +0200
Subject: [PATCH 08/11] feat(zed): add option to record both eyes

---
 extensions/rcs_zed/src/rcs_zed/camera.py      | 156 +++++++++++++-----
 .../rcs_zed/tests/test_zed_extension.py       |  58 ++++++-
 2 files changed, 171 insertions(+), 43 deletions(-)

diff --git a/extensions/rcs_zed/src/rcs_zed/camera.py b/extensions/rcs_zed/src/rcs_zed/camera.py
index e9fd61dd..881a1368 100644
--- a/extensions/rcs_zed/src/rcs_zed/camera.py
+++ b/extensions/rcs_zed/src/rcs_zed/camera.py
@@ -30,6 +30,8 @@ class ZEDFrameBundle:
     color: np.ndarray
     timestamp: float
     color_intrinsics: np.ndarray[tuple[typing.Literal[3], typing.Literal[4]], np.dtype[np.float64]]
+    right_color: np.ndarray | None = None
+    right_color_intrinsics: np.ndarray[tuple[typing.Literal[3], typing.Literal[4]], np.dtype[np.float64]] | None = None
     depth: np.ndarray | None = None
     accel: np.ndarray | None = None
     gyro: np.ndarray | None = None
@@ -47,12 +49,20 @@ def _intrinsics_matrix(fx: float, fy: float, cx: float, cy: float) -> np.ndarray
 
 
 class PyZEDCameraHandle:
-    def __init__(self, camera: typing.Any, device_info: ZEDDeviceInfo, color_intrinsics: np.ndarray):
+    def __init__(
+        self,
+        camera: typing.Any,
+        device_info: ZEDDeviceInfo,
+        color_intrinsics: np.ndarray,
+        right_color_intrinsics: np.ndarray | None = None,
+    ):
         self.camera = camera
         self.device_info = device_info
         self.color_intrinsics = color_intrinsics
+        self.right_color_intrinsics = right_color_intrinsics
         self.runtime_parameters = sl.RuntimeParameters()  # type: ignore[union-attr]
         self.image_mat = sl.Mat()  # type: ignore[union-attr]
+        self.right_image_mat = sl.Mat()  # type: ignore[union-attr]
         self.depth_mat = sl.Mat()  # type: ignore[union-attr]
         self.sensors_data = sl.SensorsData()  # type: ignore[union-attr]
 
@@ -69,7 +79,7 @@ def _timestamp_seconds(self) -> float:
             pass
         return time()
 
-    def grab_frame(self) -> ZEDFrameBundle:
+    def grab_frame(self, include_right: bool = False) -> ZEDFrameBundle:
         err = self.camera.grab(self.runtime_parameters)
         if err != sl.ERROR_CODE.SUCCESS:  # type: ignore[union-attr]
             msg = f"Failed to grab ZED frame: {err}"
@@ -82,6 +92,15 @@ def grab_frame(self) -> ZEDFrameBundle:
             raise RuntimeError(msg)
         color_rgb = color_raw[:, :, :3][:, :, ::-1] if color_raw.shape[2] == 4 else color_raw[:, :, ::-1]
 
+        right_color = None
+        if include_right:
+            self.camera.retrieve_image(self.right_image_mat, sl.VIEW.RIGHT)  # type: ignore[union-attr]
+            right_raw = np.array(self.right_image_mat.get_data(), copy=True)
+            if right_raw.ndim != 3:
+                msg = f"Unexpected ZED right image shape {right_raw.shape}"
+                raise RuntimeError(msg)
+            right_color = right_raw[:, :, :3][:, :, ::-1] if right_raw.shape[2] == 4 else right_raw[:, :, ::-1]
+
         depth = None
         if self.device_info.has_depth:
             self.camera.retrieve_measure(self.depth_mat, sl.MEASURE.DEPTH)  # type: ignore[union-attr]
@@ -105,6 +124,8 @@ def grab_frame(self) -> ZEDFrameBundle:
 
         return ZEDFrameBundle(
             color=color_rgb,
+            right_color=right_color,
+            right_color_intrinsics=self.right_color_intrinsics if right_color is not None else None,
             depth=depth,
             accel=accel,
             gyro=gyro,
@@ -185,6 +206,7 @@ def open_camera(
         *,
         enable_depth: bool,
         enable_imu: bool,
+        include_right: bool,
     ) -> PyZEDCameraHandle:
         cls._require_sdk()
         assert sl is not None
@@ -206,6 +228,7 @@ def open_camera(
         information = camera.get_camera_information()
         calibration = information.camera_configuration.calibration_parameters
         left_cam = calibration.left_cam
+        right_cam = calibration.right_cam
         info = ZEDDeviceInfo(
             serial=str(config.identifier),
             model=cls._model_to_string(information.camera_model),
@@ -213,7 +236,15 @@ def open_camera(
             has_imu=enable_imu and cls._device_has_imu(information),
         )
         intrinsics = _intrinsics_matrix(left_cam.fx, left_cam.fy, left_cam.cx, left_cam.cy)
-        return PyZEDCameraHandle(camera=camera, device_info=info, color_intrinsics=intrinsics)
+        right_intrinsics = None
+        if include_right:
+            right_intrinsics = _intrinsics_matrix(right_cam.fx, right_cam.fy, right_cam.cx, right_cam.cy)
+        return PyZEDCameraHandle(
+            camera=camera,
+            device_info=info,
+            color_intrinsics=intrinsics,
+            right_color_intrinsics=right_intrinsics,
+        )
 
     def __init__(
         self,
@@ -221,6 +252,7 @@ def __init__(
         calibration_strategy: dict[str, CalibrationStrategy] | None = None,
         enable_depth: bool = True,
         enable_imu: bool = True,
+        include_right: bool = False,
     ) -> None:
         self.cameras = cameras
         if calibration_strategy is None:
@@ -228,12 +260,14 @@ def __init__(
         self.calibration_strategy = calibration_strategy
         self.enable_depth = enable_depth
         self.enable_imu = enable_imu
+        self.include_right = include_right
         self._logger = logging.getLogger(__name__)
-        self._camera_names = list(self.cameras.keys())
+        self._camera_names = self._logical_camera_names()
         self._available_devices: dict[str, ZEDDeviceInfo] = {}
         self._enabled_devices: dict[str, PyZEDCameraHandle] = {}
         self._frame_buffer_lock: dict[str, threading.Lock] = {}
         self._frame_buffer: dict[str, list[Frame]] = {}
+        self._pending_right_bundles: dict[str, ZEDFrameBundle] = {}
 
         assert (
             len({camera.resolution_width for camera in self.cameras.values()}) == 1
@@ -246,7 +280,63 @@ def camera_names(self) -> list[str]:
         return self._camera_names
 
     def config(self, camera_name) -> common.BaseCameraConfig:
-        return self.cameras[camera_name]
+        return self.cameras[self._physical_camera_name(camera_name)]
+
+    @staticmethod
+    def _right_camera_name(camera_name: str) -> str:
+        return f"{camera_name}_right"
+
+    def _physical_camera_name(self, camera_name: str) -> str:
+        if camera_name in self.cameras:
+            return camera_name
+        if self.include_right and camera_name.endswith("_right"):
+            physical_name = camera_name.removesuffix("_right")
+            if physical_name in self.cameras:
+                return physical_name
+        msg = f"Camera {camera_name} not found in the enabled devices"
+        raise AssertionError(msg)
+
+    def _is_right_camera_name(self, camera_name: str) -> bool:
+        return camera_name not in self.cameras and self._physical_camera_name(camera_name) != camera_name
+
+    def _logical_camera_names(self) -> list[str]:
+        names = list(self.cameras.keys())
+        if self.include_right:
+            names.extend(self._right_camera_name(camera_name) for camera_name in self.cameras)
+        return names
+
+    def _frame_from_bundle(self, camera_name: str, bundle: ZEDFrameBundle) -> Frame:
+        physical_name = self._physical_camera_name(camera_name)
+        extrinsics = self.calibration_strategy[physical_name].get_extrinsics()
+        is_right = self._is_right_camera_name(camera_name)
+        color_data = bundle.right_color if is_right else bundle.color
+        color_intrinsics = bundle.right_color_intrinsics if is_right else bundle.color_intrinsics
+        if color_data is None or color_intrinsics is None:
+            msg = f"Missing {'right' if is_right else 'left'} image data for {camera_name}"
+            raise RuntimeError(msg)
+
+        color = DataFrame(
+            data=color_data,
+            timestamp=bundle.timestamp,
+            intrinsics=color_intrinsics,
+            extrinsics=extrinsics,
+        )
+        depth = None
+        if not is_right and bundle.depth is not None:
+            depth = DataFrame(
+                data=bundle.depth,
+                timestamp=bundle.timestamp,
+                intrinsics=bundle.color_intrinsics,
+                extrinsics=extrinsics,
+            )
+
+        accel = DataFrame(data=bundle.accel, timestamp=bundle.timestamp) if bundle.accel is not None else None
+        gyro = DataFrame(data=bundle.gyro, timestamp=bundle.timestamp) if bundle.gyro is not None else None
+        return Frame(
+            camera=CameraFrame(color=color, depth=depth),
+            imu=IMUFrame(accel=accel, gyro=gyro) if (accel is not None or gyro is not None) else None,
+            avg_timestamp=bundle.timestamp,
+        )
 
     def update_available_devices(self):
         self._available_devices = self.enumerate_connected_devices()
@@ -256,6 +346,7 @@ def open(self):
         self._enabled_devices = {}
         self._frame_buffer = {}
         self._frame_buffer_lock = {}
+        self._pending_right_bundles = {}
 
         for camera_name, camera_cfg in self.cameras.items():
             if camera_cfg.identifier not in self._available_devices:
@@ -267,6 +358,7 @@ def open(self):
                 camera_cfg,
                 enable_depth=self.enable_depth and device_info.has_depth,
                 enable_imu=self.enable_imu and device_info.has_imu,
+                include_right=self.include_right,
             )
             self._enabled_devices[camera_name] = opened
             self._frame_buffer[camera_name] = []
@@ -274,44 +366,34 @@ def open(self):
 
     def poll_frame(self, camera_name: str) -> Frame:
         assert camera_name in self.camera_names, f"Camera {camera_name} not found in the enabled devices"
-        device = self._enabled_devices[camera_name]
-        bundle = device.grab_frame()
-        extrinsics = self.calibration_strategy[camera_name].get_extrinsics()
-
-        color = DataFrame(
-            data=bundle.color,
-            timestamp=bundle.timestamp,
-            intrinsics=bundle.color_intrinsics,
-            extrinsics=extrinsics,
-        )
-        depth = None
-        if bundle.depth is not None:
-            depth = DataFrame(
-                data=bundle.depth,
-                timestamp=bundle.timestamp,
-                intrinsics=bundle.color_intrinsics,
-                extrinsics=extrinsics,
-            )
-
-        accel = DataFrame(data=bundle.accel, timestamp=bundle.timestamp) if bundle.accel is not None else None
-        gyro = DataFrame(data=bundle.gyro, timestamp=bundle.timestamp) if bundle.gyro is not None else None
-
-        frame = Frame(
-            camera=CameraFrame(color=color, depth=depth),
-            imu=IMUFrame(accel=accel, gyro=gyro) if (accel is not None or gyro is not None) else None,
-            avg_timestamp=bundle.timestamp,
-        )
-
-        with self._frame_buffer_lock[camera_name]:
-            if len(self._frame_buffer[camera_name]) >= self.CALIBRATION_FRAME_SIZE:
-                self._frame_buffer[camera_name].pop(0)
-            self._frame_buffer[camera_name].append(copy.deepcopy(frame))
+        physical_name = self._physical_camera_name(camera_name)
+        device = self._enabled_devices[physical_name]
+
+        if self._is_right_camera_name(camera_name):
+            bundle = self._pending_right_bundles.pop(physical_name, None)
+            if bundle is None:
+                bundle = device.grab_frame(include_right=True)
+        else:
+            bundle = device.grab_frame(include_right=self.include_right)
+            if self.include_right and bundle.right_color is not None:
+                self._pending_right_bundles[physical_name] = bundle
+            else:
+                self._pending_right_bundles.pop(physical_name, None)
+
+        frame = self._frame_from_bundle(camera_name, bundle)
+
+        if not self._is_right_camera_name(camera_name):
+            with self._frame_buffer_lock[physical_name]:
+                if len(self._frame_buffer[physical_name]) >= self.CALIBRATION_FRAME_SIZE:
+                    self._frame_buffer[physical_name].pop(0)
+                self._frame_buffer[physical_name].append(copy.deepcopy(frame))
         return frame
 
     def close(self):
         for device in self._enabled_devices.values():
             device.close()
         self._enabled_devices = {}
+        self._pending_right_bundles = {}
 
     def calibrate(self) -> bool:
         for camera_name in self.cameras:
diff --git a/extensions/rcs_zed/tests/test_zed_extension.py b/extensions/rcs_zed/tests/test_zed_extension.py
index 32e8294f..cb17acec 100644
--- a/extensions/rcs_zed/tests/test_zed_extension.py
+++ b/extensions/rcs_zed/tests/test_zed_extension.py
@@ -20,8 +20,10 @@ def __init__(self, device_info: ZEDDeviceInfo, color_intrinsics: np.ndarray, fra
         self.color_intrinsics = color_intrinsics
         self._frame_bundle = frame_bundle
         self.closed = False
+        self.grab_calls: list[bool] = []
 
-    def grab_frame(self) -> ZEDFrameBundle:
+    def grab_frame(self, include_right: bool = False) -> ZEDFrameBundle:
+        self.grab_calls.append(include_right)
         return self._frame_bundle
 
     def close(self):
@@ -31,7 +33,7 @@ def close(self):
 class PatchZedState(TypedDict):
     devices: dict[str, ZEDDeviceInfo]
     opened: dict[str, FakeOpenedZEDCamera]
-    open_calls: list[tuple[str, bool, bool]]
+    open_calls: list[tuple[str, bool, bool, bool]]
 
 
 @pytest.fixture()
@@ -41,8 +43,8 @@ def patch_zed(monkeypatch) -> PatchZedState:
     def fake_enumerate(_cls):
         return state["devices"]
 
-    def fake_open(_cls, config: common.BaseCameraConfig, *, enable_depth: bool, enable_imu: bool):
-        state["open_calls"].append((config.identifier, enable_depth, enable_imu))
+    def fake_open(_cls, config: common.BaseCameraConfig, *, enable_depth: bool, enable_imu: bool, include_right: bool):
+        state["open_calls"].append((config.identifier, enable_depth, enable_imu, include_right))
         return state["opened"][config.identifier]
 
     monkeypatch.setattr(ZEDCameraSet, "enumerate_connected_devices", classmethod(fake_enumerate))
@@ -78,7 +80,7 @@ def test_zed_frame_mapping_depth_scaling_and_imu_downgrade(patch_zed):
     frame = camera_set.poll_frame("wrist")
     assert frame.camera.color.intrinsics is not None
 
-    assert patch_zed["open_calls"] == [("123", True, False)]
+    assert patch_zed["open_calls"] == [("123", True, False, False)]
     assert np.array_equal(frame.camera.color.data, color)
     assert np.array_equal(frame.camera.depth.data, depth)  # type: ignore[union-attr]
     assert np.array_equal(frame.camera.color.intrinsics, intrinsics)
@@ -113,7 +115,51 @@ def test_zed_enumeration_and_multi_camera_open(patch_zed):
         },
     )
     camera_set.open()
-    assert patch_zed["open_calls"] == [("111", True, False), ("222", True, True)]
+    assert patch_zed["open_calls"] == [("111", True, False, False), ("222", True, True, False)]
     camera_set.close()
     assert opened["111"].closed
     assert opened["222"].closed
+
+
+def test_zed_include_right_adds_logical_right_camera_without_double_grab(patch_zed):
+    left_intrinsics = np.array([[100.0, 0.0, 10.0, 0.0], [0.0, 110.0, 20.0, 0.0], [0.0, 0.0, 1.0, 0.0]])
+    right_intrinsics = np.array([[120.0, 0.0, 12.0, 0.0], [0.0, 130.0, 22.0, 0.0], [0.0, 0.0, 1.0, 0.0]])
+    left_color = np.arange(27, dtype=np.uint8).reshape(3, 3, 3)
+    right_color = np.arange(27, 54, dtype=np.uint8).reshape(3, 3, 3)
+    frame_bundle = ZEDFrameBundle(
+        color=left_color,
+        right_color=right_color,
+        timestamp=12.5,
+        color_intrinsics=left_intrinsics,
+        right_color_intrinsics=right_intrinsics,
+    )
+    device_info = ZEDDeviceInfo(serial="123", model="ZED Mini", has_depth=True, has_imu=False)
+    opened = FakeOpenedZEDCamera(device_info=device_info, color_intrinsics=left_intrinsics, frame_bundle=frame_bundle)
+    patch_zed["devices"] = {"123": device_info}
+    patch_zed["opened"] = {"123": opened}
+
+    camera_set = ZEDCameraSet(
+        cameras={
+            "wrist": common.BaseCameraConfig(
+                identifier="123", resolution_width=1280, resolution_height=720, frame_rate=30
+            )
+        },
+        include_right=True,
+    )
+
+    assert camera_set.camera_names == ["wrist", "wrist_right"]
+    assert camera_set.config("wrist_right").identifier == "123"
+
+    camera_set.open()
+    assert patch_zed["open_calls"] == [("123", True, False, True)]
+    left_frame = camera_set.poll_frame("wrist")
+    right_frame = camera_set.poll_frame("wrist_right")
+
+    assert opened.grab_calls == [True]
+    assert np.array_equal(left_frame.camera.color.data, left_color)
+    assert np.array_equal(right_frame.camera.color.data, right_color)
+    assert np.array_equal(left_frame.camera.color.intrinsics, left_intrinsics)
+    assert np.array_equal(right_frame.camera.color.intrinsics, right_intrinsics)
+    assert left_frame.avg_timestamp == right_frame.avg_timestamp == 12.5
+    assert left_frame.camera.depth is None
+    assert right_frame.camera.depth is None

From 07a8d3d4e5542e205ba584eec438994365bfd3e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Fri, 8 May 2026 16:57:31 +0200
Subject: [PATCH 09/11] feat(inference): add initial inference loop script

---
 examples/inference/README.md        |   0
 examples/inference/franka.py        | 236 ++++++++++++++++++++++++++++
 examples/inference/requirements.txt |   1 +
 3 files changed, 237 insertions(+)
 create mode 100644 examples/inference/README.md
 create mode 100644 examples/inference/franka.py
 create mode 100644 examples/inference/requirements.txt

diff --git a/examples/inference/README.md b/examples/inference/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/examples/inference/franka.py b/examples/inference/franka.py
new file mode 100644
index 00000000..8ac52572
--- /dev/null
+++ b/examples/inference/franka.py
@@ -0,0 +1,236 @@
+from dataclasses import dataclass
+import datetime
+import logging
+from typing import Any
+
+import numpy as np
+from rcs._core.common import BaseCameraConfig, RobotPlatform
+from rcs._core.sim import SimConfig
+from rcs.envs.base import ControlMode, RelativeTo
+from rcs.envs.configs import EmptyWorldFR3Duo
+import gymnasium as gym
+
+import rcs
+from rcs.envs.tasks import PickTaskConfig
+from vlagents.client import RemoteAgent
+from vlagents.policies import Act, Obs
+
+logger = logging.getLogger(__name__)
+
+
+ROBOT2IP = {
+    "right": "192.168.102.1",
+    "left": "192.168.101.1",
+}
+ROBOT2ID = {
+    "left": "1",
+    "right": "0",
+}
+
+
+ROBOT_INSTANCE = RobotPlatform.SIMULATION
+# ROBOT_INSTANCE = RobotPlatform.HARDWARE
+
+# set camera dict to none disable cameras
+# CAMERA_DICT = {
+#     "left_wrist": "230422272017",
+#     "right_wrist": "230422271040",
+#     "side": "243522070385",
+#     "bird_eye": "243522070364",
+# }
+CAMERA_DICT = None
+ZED_CAMERA_DICT = {
+    "zed": "19928076",
+}
+INCLUDE_DEPTH = False
+
+
+# DIGIT_DICT = {
+#     "digit_right_left": "D21182",
+#     "digit_right_right": "D21193"
+# }
+DIGIT_DICT = None
+
+
+DATASET_PATH = "test_iris"
+INSTRUCTION = "pick up cube"
+RECORD_FPS = 30
+CONTROL_MODE = ControlMode.JOINTS
+RELATIVETO = RelativeTo.NONE
+DEBUG = True
+VIDEO_PATH = "videos"
+MODEL = "pi05"
+IP = ""
+PORT = 20997
+
+
+robot2world = {
+    "right": rcs.common.Pose(
+        translation=np.array([0, 0, 0]), rpy_vector=np.array([0.89360858, -0.17453293, 0.46425758])
+    ),
+    "left": rcs.common.Pose(
+        translation=np.array([0, 0, 0]), rpy_vector=np.array([-0.89360858, -0.17453293, -0.46425758])
+    ),
+}
+
+@dataclass
+class InferenceConfig:
+    vlagents_host: str
+    vlagents_port: int
+    vlagents_model: str
+    instruction: str
+    robot_keys: list[str]
+    jpeg_encoding: bool = True
+    on_same_machine: bool = False
+
+class ModelInference:
+    def __init__(self, env: gym.Env, cfg: InferenceConfig):
+        self.env = env
+        self.gripper_state = 1
+        self._cfg = cfg
+        self.remote_agent = RemoteAgent(cfg.vlagents_host, cfg.vlagents_port, cfg.vlagents_model, cfg.on_same_machine, cfg.jpeg_encoding)
+
+
+    def obs_rcs2agents(self, obs: dict, info: dict | None = None) -> Obs:
+        cameras = {}
+        for frame in obs["frames"]:
+            cameras[frame] = obs["frames"][frame]["rgb"]["data"]
+        state = []
+        for robot in self._cfg.robot_keys:
+            # TODO: currently hardcoded for joints
+            state.append(obs[robot]["joints"])
+            state.append(obs[robot]["gripper"])
+
+        return Obs(cameras=None, gripper=None, info=info, state=np.concatenate(state))
+
+    def action_agents2rcs(self, action: Act) -> dict[str, Any]:
+        act = {}
+        for idx, robot in enumerate(self._cfg.robot_keys):
+            # TODO: this is currently hard coded for franka joints
+            act[robot] = {}
+            act[robot]["joints"] = action.action[idx*8:idx*8+7]
+            act[robot]["gripper"] = action.action[idx*8+7]
+        return act
+
+
+    def loop(self):
+        obs, _ = self.env.reset()
+
+        obs_dict = self.obs_rcs2agents(obs)
+
+        self.remote_agent.reset(obs_dict, instruction=self._cfg.instruction)
+
+        while True:
+            action = self.remote_agent.act(obs_dict)
+            if action.done:
+                logger.info("done issued by agent, shutting down")
+                break
+            obs, _, _, _, info = self.env.step(self.action_agents2rcs(action))
+
+            obs_dict = self.obs_rcs2agents(obs)
+
+
+def get_env():
+    if ROBOT_INSTANCE == RobotPlatform.HARDWARE:
+        from rcs_fr3.configs import FrankaDuoEnv
+        from rcs_fr3.creators import HardwareCameraCreatorConfig
+
+        env_creator = FrankaDuoEnv()
+        env_creator.left_ip = ROBOT2IP["left"]
+        env_creator.right_ip = ROBOT2IP["right"]
+        hw_cfg = env_creator.config()
+        camera_cfgs: dict[str, HardwareCameraCreatorConfig] = {}
+        if CAMERA_DICT is not None:
+            camera_cfgs["realsense"] = HardwareCameraCreatorConfig(
+                camera_type_id="realsense",
+                camera_cfgs={
+                    name: BaseCameraConfig(
+                        identifier=identifier,
+                        resolution_width=1280,
+                        resolution_height=720,
+                        frame_rate=30,
+                    )
+                    for name, identifier in CAMERA_DICT.items()
+                },
+            )
+        if ZED_CAMERA_DICT is not None:
+            camera_cfgs["zed"] = HardwareCameraCreatorConfig(
+                camera_type_id="zed",
+                camera_cfgs={
+                    name: BaseCameraConfig(
+                        identifier=identifier,
+                        resolution_width=1280,
+                        resolution_height=720,
+                        frame_rate=30,
+                    )
+                    for name, identifier in ZED_CAMERA_DICT.items()
+                },
+                kwargs={
+                    "enable_depth": False,
+                    "enable_imu": False,
+                },
+            )
+        if DIGIT_DICT is not None:
+            camera_cfgs["digit"] = HardwareCameraCreatorConfig(
+                camera_type_id="digit",
+                camera_cfgs={
+                    name: BaseCameraConfig(
+                        identifier=identifier,
+                        resolution_width=320,
+                        resolution_height=240,
+                        frame_rate=30,
+                    )
+                    for name, identifier in DIGIT_DICT.items()
+                },
+            )
+        hw_cfg.camera_cfgs = camera_cfgs or None
+        hw_cfg.control_mode = CONTROL_MODE
+        hw_cfg.wrapper_cfg.include_depth = INCLUDE_DEPTH
+        hw_cfg.max_relative_movement = (
+            0.5 if CONTROL_MODE == ControlMode.JOINTS else (0.5, np.deg2rad(90))
+        )
+        hw_cfg.relative_to = RELATIVETO
+        hw_cfg.robot_to_shared_base_frame = robot2world
+        env_rel = env_creator.create_env(hw_cfg)
+    else:
+        # FR3
+
+        scene = EmptyWorldFR3Duo()
+        sim_cfg_data = scene.config()
+        sim_cfg_data.sim_cfg = SimConfig(
+            async_control=True, realtime=True, frequency=RECORD_FPS, max_convergence_steps=500
+        )
+        sim_cfg_data.relative_to = RelativeTo.CONFIGURED_ORIGIN
+        sim_cfg_data.wrapper_cfg.include_depth = INCLUDE_DEPTH
+        if sim_cfg_data.root_frame_objects is None:
+            sim_cfg_data.root_frame_objects = {}
+        sim_cfg_data.task_cfg = PickTaskConfig(robot_name="right")
+
+        env_rel = scene.create_env(sim_cfg_data)
+
+    return env_rel
+
+
+def main():
+    env_rel  = get_env()
+    env_rel.reset()
+
+    VIDEO_PATH.mkdir(parents=True, exist_ok=True)
+    timestamp =  str(datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
+
+
+    camera_set = env_rel.get_wrapper_attr("camera_set")
+    camera_set.record_video(VIDEO_PATH, timestamp)
+
+    # env = RHCWrapper(env, exec_horizon=1)
+
+    cfg = InferenceConfig(IP, PORT, MODEL, INSTRUCTION, jpeg_encoding=True, on_same_machine=True, robot_keys=["left", "right"])
+    controller = ModelInference(env_rel, cfg)
+    input("robot is about to be controlled by AI, press enter to start")
+    with env_rel:
+        controller.loop()
+
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/examples/inference/requirements.txt b/examples/inference/requirements.txt
new file mode 100644
index 00000000..cb843744
--- /dev/null
+++ b/examples/inference/requirements.txt
@@ -0,0 +1 @@
+vlagents @ git+https://github.com/RobotControlStack/vlagents.git@lerobot
\ No newline at end of file

From f36b8e1a0e4c14f416f10e08374347c28370655c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Thu, 14 May 2026 14:42:39 +0200
Subject: [PATCH 10/11] style: format inference franka script

---
 examples/inference/franka.py | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/examples/inference/franka.py b/examples/inference/franka.py
index 8ac52572..bbd0c053 100644
--- a/examples/inference/franka.py
+++ b/examples/inference/franka.py
@@ -1,17 +1,17 @@
-from dataclasses import dataclass
 import datetime
 import logging
+from dataclasses import dataclass
 from typing import Any
 
+import gymnasium as gym
 import numpy as np
 from rcs._core.common import BaseCameraConfig, RobotPlatform
 from rcs._core.sim import SimConfig
 from rcs.envs.base import ControlMode, RelativeTo
 from rcs.envs.configs import EmptyWorldFR3Duo
-import gymnasium as gym
+from rcs.envs.tasks import PickTaskConfig
 
 import rcs
-from rcs.envs.tasks import PickTaskConfig
 from vlagents.client import RemoteAgent
 from vlagents.policies import Act, Obs
 
@@ -73,6 +73,7 @@
     ),
 }
 
+
 @dataclass
 class InferenceConfig:
     vlagents_host: str
@@ -83,13 +84,15 @@ class InferenceConfig:
     jpeg_encoding: bool = True
     on_same_machine: bool = False
 
+
 class ModelInference:
     def __init__(self, env: gym.Env, cfg: InferenceConfig):
         self.env = env
         self.gripper_state = 1
         self._cfg = cfg
-        self.remote_agent = RemoteAgent(cfg.vlagents_host, cfg.vlagents_port, cfg.vlagents_model, cfg.on_same_machine, cfg.jpeg_encoding)
-
+        self.remote_agent = RemoteAgent(
+            cfg.vlagents_host, cfg.vlagents_port, cfg.vlagents_model, cfg.on_same_machine, cfg.jpeg_encoding
+        )
 
     def obs_rcs2agents(self, obs: dict, info: dict | None = None) -> Obs:
         cameras = {}
@@ -108,11 +111,10 @@ def action_agents2rcs(self, action: Act) -> dict[str, Any]:
         for idx, robot in enumerate(self._cfg.robot_keys):
             # TODO: this is currently hard coded for franka joints
             act[robot] = {}
-            act[robot]["joints"] = action.action[idx*8:idx*8+7]
-            act[robot]["gripper"] = action.action[idx*8+7]
+            act[robot]["joints"] = action.action[idx * 8 : idx * 8 + 7]
+            act[robot]["gripper"] = action.action[idx * 8 + 7]
         return act
 
-
     def loop(self):
         obs, _ = self.env.reset()
 
@@ -186,9 +188,7 @@ def get_env():
         hw_cfg.camera_cfgs = camera_cfgs or None
         hw_cfg.control_mode = CONTROL_MODE
         hw_cfg.wrapper_cfg.include_depth = INCLUDE_DEPTH
-        hw_cfg.max_relative_movement = (
-            0.5 if CONTROL_MODE == ControlMode.JOINTS else (0.5, np.deg2rad(90))
-        )
+        hw_cfg.max_relative_movement = 0.5 if CONTROL_MODE == ControlMode.JOINTS else (0.5, np.deg2rad(90))
         hw_cfg.relative_to = RELATIVETO
         hw_cfg.robot_to_shared_base_frame = robot2world
         env_rel = env_creator.create_env(hw_cfg)
@@ -212,25 +212,25 @@ def get_env():
 
 
 def main():
-    env_rel  = get_env()
+    env_rel = get_env()
     env_rel.reset()
 
     VIDEO_PATH.mkdir(parents=True, exist_ok=True)
-    timestamp =  str(datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
-
+    timestamp = str(datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
 
     camera_set = env_rel.get_wrapper_attr("camera_set")
     camera_set.record_video(VIDEO_PATH, timestamp)
 
     # env = RHCWrapper(env, exec_horizon=1)
 
-    cfg = InferenceConfig(IP, PORT, MODEL, INSTRUCTION, jpeg_encoding=True, on_same_machine=True, robot_keys=["left", "right"])
+    cfg = InferenceConfig(
+        IP, PORT, MODEL, INSTRUCTION, jpeg_encoding=True, on_same_machine=True, robot_keys=["left", "right"]
+    )
     controller = ModelInference(env_rel, cfg)
     input("robot is about to be controlled by AI, press enter to start")
     with env_rel:
         controller.loop()
 
 
-
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()

From 844d78b8f5ea6fd35b9bad7e06abc3f4a9f159af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Thu, 14 May 2026 19:22:37 +0200
Subject: [PATCH 11/11] fix(franka): inference script after testing

Co-authored-by: Copilot <copilot@github.com>
---
 docker/Dockerfile              |  4 +-
 docker/link-editable-source.sh |  1 +
 examples/inference/franka.py   | 67 +++++++++++++++++++++-------------
 3 files changed, 46 insertions(+), 26 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index b72bde92..9c88e253 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -54,7 +54,9 @@ RUN chmod +x /usr/local/bin/link-editable-source \
     && uv pip install /opt/rcs-src/extensions/rcs_realsense \
     && uv pip install /opt/rcs-src/extensions/rcs_robotiq2f85 \
     && uv pip install /opt/rcs-src/extensions/rcs_zed \
-    && uv pip install /opt/rcs-src/examples/teleop/SimPublisher
+    && uv pip install /opt/rcs-src/examples/teleop/SimPublisher \
+    && uv pip install /opt/rcs-src/2f85-python-driver \
+    && uv pip install /opt/rcs-src/vlagents
 
 WORKDIR /workspace/robot-control-stack
 
diff --git a/docker/link-editable-source.sh b/docker/link-editable-source.sh
index 0a93db47..02bf007e 100644
--- a/docker/link-editable-source.sh
+++ b/docker/link-editable-source.sh
@@ -44,3 +44,4 @@ link_compiled_package "$REPO_ROOT/extensions/rcs_fr3/src/rcs_fr3" "$SITE_PACKAGE
 link_pure_python_package "$REPO_ROOT/extensions/rcs_realsense/src/rcs_realsense" "$SITE_PACKAGES/rcs_realsense"
 link_pure_python_package "$REPO_ROOT/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85" "$SITE_PACKAGES/rcs_robotiq2f85"
 link_pure_python_package "$REPO_ROOT/extensions/rcs_zed/src/rcs_zed" "$SITE_PACKAGES/rcs_zed"
+link_pure_python_package "$REPO_ROOT/vlagents" "$SITE_PACKAGES/vlagents"
diff --git a/examples/inference/franka.py b/examples/inference/franka.py
index bbd0c053..37bb8bae 100644
--- a/examples/inference/franka.py
+++ b/examples/inference/franka.py
@@ -1,8 +1,13 @@
+import copy
 import datetime
 import logging
 from dataclasses import dataclass
+from pathlib import Path
+from time import sleep
 from typing import Any
 
+from rcs.utils import SimpleFrameRate
+
 import gymnasium as gym
 import numpy as np
 from rcs._core.common import BaseCameraConfig, RobotPlatform
@@ -28,22 +33,26 @@
 }
 
 
-ROBOT_INSTANCE = RobotPlatform.SIMULATION
-# ROBOT_INSTANCE = RobotPlatform.HARDWARE
+# ROBOT_INSTANCE = RobotPlatform.SIMULATION
+ROBOT_INSTANCE = RobotPlatform.HARDWARE
 
 # set camera dict to none disable cameras
-# CAMERA_DICT = {
-#     "left_wrist": "230422272017",
-#     "right_wrist": "230422271040",
-#     "side": "243522070385",
-#     "bird_eye": "243522070364",
-# }
-CAMERA_DICT = None
+CAMERA_DICT = {
+    "left_wrist": "230422272017",
+    "right_wrist": "230422271040",
+    # "side": "243522070385",
+    # "bird_eye": "243522070364",
+}
+# CAMERA_DICT = None
 ZED_CAMERA_DICT = {
-    "zed": "19928076",
+    "head": "19928076",
 }
 INCLUDE_DEPTH = False
 
+ROBOTIQ_SERIAL = {
+    "left": "DAAQMPDC",
+    "right": "DAAQMJHX",
+}
 
 # DIGIT_DICT = {
 #     "digit_right_left": "D21182",
@@ -52,16 +61,15 @@
 DIGIT_DICT = None
 
 
-DATASET_PATH = "test_iris"
-INSTRUCTION = "pick up cube"
+INSTRUCTION = "pick up the black cube with the right arm and place it into the black bowl; pick up the white cube with the left arm and place it into the white bowl"
 RECORD_FPS = 30
 CONTROL_MODE = ControlMode.JOINTS
 RELATIVETO = RelativeTo.NONE
 DEBUG = True
 VIDEO_PATH = "videos"
-MODEL = "pi05"
-IP = ""
-PORT = 20997
+MODEL = "lerobot"
+IP = "172.29.5.16"
+PORT = 20000
 
 
 robot2world = {
@@ -93,6 +101,7 @@ def __init__(self, env: gym.Env, cfg: InferenceConfig):
         self.remote_agent = RemoteAgent(
             cfg.vlagents_host, cfg.vlagents_port, cfg.vlagents_model, cfg.on_same_machine, cfg.jpeg_encoding
         )
+        self.frame_rate = SimpleFrameRate(RECORD_FPS)
 
     def obs_rcs2agents(self, obs: dict, info: dict | None = None) -> Obs:
         cameras = {}
@@ -104,7 +113,7 @@ def obs_rcs2agents(self, obs: dict, info: dict | None = None) -> Obs:
             state.append(obs[robot]["joints"])
             state.append(obs[robot]["gripper"])
 
-        return Obs(cameras=None, gripper=None, info=info, state=np.concatenate(state))
+        return Obs(cameras=cameras, gripper=None, info=info, state=np.concatenate(state))
 
     def action_agents2rcs(self, action: Act) -> dict[str, Any]:
         act = {}
@@ -112,7 +121,7 @@ def action_agents2rcs(self, action: Act) -> dict[str, Any]:
             # TODO: this is currently hard coded for franka joints
             act[robot] = {}
             act[robot]["joints"] = action.action[idx * 8 : idx * 8 + 7]
-            act[robot]["gripper"] = action.action[idx * 8 + 7]
+            act[robot]["gripper"] = action.action[idx * 8 + 7 : idx * 8 + 8]
         return act
 
     def loop(self):
@@ -120,16 +129,18 @@ def loop(self):
 
         obs_dict = self.obs_rcs2agents(obs)
 
-        self.remote_agent.reset(obs_dict, instruction=self._cfg.instruction)
+        self.remote_agent.reset(copy.deepcopy(obs_dict), instruction=self._cfg.instruction)
 
         while True:
-            action = self.remote_agent.act(obs_dict)
+            action = self.remote_agent.act(copy.deepcopy(obs_dict))
             if action.done:
                 logger.info("done issued by agent, shutting down")
                 break
-            obs, _, _, _, info = self.env.step(self.action_agents2rcs(action))
+            a = self.action_agents2rcs(action)
+            obs, _, _, _, info = self.env.step(a)
 
             obs_dict = self.obs_rcs2agents(obs)
+            self.frame_rate()
 
 
 def get_env():
@@ -191,6 +202,12 @@ def get_env():
         hw_cfg.max_relative_movement = 0.5 if CONTROL_MODE == ControlMode.JOINTS else (0.5, np.deg2rad(90))
         hw_cfg.relative_to = RELATIVETO
         hw_cfg.robot_to_shared_base_frame = robot2world
+        hw_cfg.robot_cfgs["left"].ignore_realtime = True
+        hw_cfg.robot_cfgs["right"].ignore_realtime = True
+        hw_cfg.robot_cfgs["left"].speed_factor = 0.3
+        hw_cfg.robot_cfgs["right"].speed_factor = 0.3
+        hw_cfg.gripper_cfgs["left"].serial_number = ROBOTIQ_SERIAL["left"]
+        hw_cfg.gripper_cfgs["right"].serial_number = ROBOTIQ_SERIAL["right"]
         env_rel = env_creator.create_env(hw_cfg)
     else:
         # FR3
@@ -215,16 +232,16 @@ def main():
     env_rel = get_env()
     env_rel.reset()
 
-    VIDEO_PATH.mkdir(parents=True, exist_ok=True)
-    timestamp = str(datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
+    # Path(VIDEO_PATH).mkdir(parents=True, exist_ok=True)
+    # timestamp = str(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
 
-    camera_set = env_rel.get_wrapper_attr("camera_set")
-    camera_set.record_video(VIDEO_PATH, timestamp)
+    # camera_set = env_rel.get_wrapper_attr("camera_set")
+    # camera_set.record_video(Path(VIDEO_PATH), timestamp)
 
     # env = RHCWrapper(env, exec_horizon=1)
 
     cfg = InferenceConfig(
-        IP, PORT, MODEL, INSTRUCTION, jpeg_encoding=True, on_same_machine=True, robot_keys=["left", "right"]
+        IP, PORT, MODEL, INSTRUCTION, jpeg_encoding=True, on_same_machine=False, robot_keys=["left", "right"]
     )
     controller = ModelInference(env_rel, cfg)
     input("robot is about to be controlled by AI, press enter to start")