From 30b996dfbd97f05dbf757c4faa65bc5b245929b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Tue, 5 May 2026 10:23:09 +0200 Subject: [PATCH 01/11] fix(docker): link script --- docker/README.md | 3 ++- docker/link-editable-source.sh | 25 ++++++++++--------------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/docker/README.md b/docker/README.md index 78bd0da4..35b4bd13 100644 --- a/docker/README.md +++ b/docker/README.md @@ -23,6 +23,7 @@ Notes: - `~/zed_models` is mounted into `/usr/local/zed/resources` to match the direct `docker run` setup. - `/dev/dri` is masked inside the container so host Mesa/AMD render nodes do not override the NVIDIA runtime devices. - NVIDIA PRIME/GLX environment variables are exported to bias OpenGL/EGL selection toward the NVIDIA stack when using X11 forwarding. -- Python source changes are picked up from the mounted repo, including `extensions/rcs_zed`. +- The simulator still bootstraps EGL for offscreen MuJoCo camera rendering; set `RCS_MUJOCO_DISABLE_EGL=1` only if you intentionally want to disable that path. +- Python source changes are picked up from the mounted repo. - If you change C++ code in `rcs` or `rcs_fr3`, rebuild the image. - For non-GPU hosts, comment out the GPU-related lines in `docker/compose/dev.yml`. diff --git a/docker/link-editable-source.sh b/docker/link-editable-source.sh index 0b1b967a..0a93db47 100644 --- a/docker/link-editable-source.sh +++ b/docker/link-editable-source.sh @@ -10,26 +10,21 @@ fi SITE_PACKAGES="$(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')" -link_mixed_package() { +link_compiled_package() { src_dir="$1" dst_dir="$2" - keep_dir_name="${3:-}" if [ ! -d "$src_dir" ] || [ ! -d "$dst_dir" ]; then return fi - # Replace only the Python sources from the mounted repo and keep compiled - # artifacts that were installed into site-packages during image build. - for path in "$src_dir"/* "$src_dir"/.[!.]* "$src_dir"/..?*; do - [ -e "$path" ] || continue - name="$(basename "$path")" - if [ -n "$keep_dir_name" ] && [ "$name" = "$keep_dir_name" ]; then - continue - fi - rm -rf "$dst_dir/$name" - cp -as "$path" "$dst_dir/$name" - done + tmp_keep="$(mktemp -d)" + find "$dst_dir" -maxdepth 1 \( -name '_core*.so' -o -name 'lib*.so*' \) -exec mv {} "$tmp_keep/" \; + rm -rf "$dst_dir" + mkdir -p "$dst_dir" + cp -as "$src_dir/." "$dst_dir/" + find "$tmp_keep" -maxdepth 1 -type f -exec mv {} "$dst_dir/" \; + rmdir "$tmp_keep" } link_pure_python_package() { @@ -44,8 +39,8 @@ link_pure_python_package() { ln -s "$src_dir" "$dst_dir" } -link_mixed_package "$REPO_ROOT/python/rcs" "$SITE_PACKAGES/rcs" "_core" -link_mixed_package "$REPO_ROOT/extensions/rcs_fr3/src/rcs_fr3" "$SITE_PACKAGES/rcs_fr3" "_core" +link_compiled_package "$REPO_ROOT/python/rcs" "$SITE_PACKAGES/rcs" +link_compiled_package "$REPO_ROOT/extensions/rcs_fr3/src/rcs_fr3" "$SITE_PACKAGES/rcs_fr3" link_pure_python_package "$REPO_ROOT/extensions/rcs_realsense/src/rcs_realsense" "$SITE_PACKAGES/rcs_realsense" link_pure_python_package "$REPO_ROOT/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85" "$SITE_PACKAGES/rcs_robotiq2f85" link_pure_python_package "$REPO_ROOT/extensions/rcs_zed/src/rcs_zed" "$SITE_PACKAGES/rcs_zed" From fce0ff68741139518a07f30ee4abd066b820e9cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Tue, 5 May 2026 10:29:43 +0200 Subject: [PATCH 02/11] feat: zed cli rgb snapshot --- extensions/rcs_zed/src/rcs_zed/__main__.py | 104 ++++++++++++++++----- 1 file changed, 81 insertions(+), 23 deletions(-) diff --git a/extensions/rcs_zed/src/rcs_zed/__main__.py b/extensions/rcs_zed/src/rcs_zed/__main__.py index cfe51fa5..8e2558f4 100644 --- a/extensions/rcs_zed/src/rcs_zed/__main__.py +++ b/extensions/rcs_zed/src/rcs_zed/__main__.py @@ -1,4 +1,6 @@ import logging +from pathlib import Path +from time import monotonic import cv2 import typer @@ -14,6 +16,36 @@ def _display_frame(window_name: str, frame): cv2.imshow(window_name, frame.camera.color.data[:, :, ::-1]) +def _resolve_serial(serial: str | None) -> str: + if serial is not None: + return serial + + try: + devices = ZEDCameraSet.enumerate_connected_devices() + except RuntimeError as exc: + msg = str(exc) + raise typer.BadParameter(msg) from exc + if len(devices) == 0: + msg = "No ZED devices connected." + raise typer.BadParameter(msg) + return next(iter(devices)) + + +def _create_rgb_camera(serial: str, width: int, height: int, fps: int) -> ZEDCameraSet: + return ZEDCameraSet( + cameras={ + "viewer": common.BaseCameraConfig( + identifier=serial, + resolution_width=width, + resolution_height=height, + frame_rate=fps, + ) + }, + enable_depth=False, + enable_imu=False, + ) + + @zed_app.command() def serials(): """Reads out the serial numbers and models of connected ZED devices via the SDK.""" @@ -39,29 +71,8 @@ def rgb_view( window_name: str = typer.Option("ZED RGB", help="OpenCV window title."), ): """Open a live RGB window using the RCS ZED camera interface.""" - if serial is None: - try: - devices = ZEDCameraSet.enumerate_connected_devices() - except RuntimeError as exc: - msg = str(exc) - raise typer.BadParameter(msg) from exc - if len(devices) == 0: - msg = "No ZED devices connected." - raise typer.BadParameter(msg) - serial = next(iter(devices)) - - camera = ZEDCameraSet( - cameras={ - "viewer": common.BaseCameraConfig( - identifier=serial, - resolution_width=width, - resolution_height=height, - frame_rate=fps, - ) - }, - enable_depth=False, - enable_imu=False, - ) + serial = _resolve_serial(serial) + camera = _create_rgb_camera(serial, width, height, fps) try: camera.open() @@ -81,6 +92,53 @@ def rgb_view( cv2.destroyAllWindows() +@zed_app.command("rgb-snapshot") +def rgb_snapshot( + serial: str | None = typer.Argument(None, help="Optional ZED serial number. Uses the first device if omitted."), + output: Path = typer.Option(Path("zed_latest.png"), "--output", "-o", help="PNG file to write."), + width: int = typer.Option(1280, help="Requested capture width."), + height: int = typer.Option(720, help="Requested capture height."), + fps: int = typer.Option(30, help="Requested capture frame rate."), + duration: float = typer.Option(1.0, "--duration", "-d", help="Seconds to read frames before saving."), +): + """Open a ZED camera briefly and save the latest RGB frame as a PNG.""" + if duration <= 0: + msg = "Duration must be greater than zero." + raise typer.BadParameter(msg) + if output.suffix.lower() != ".png": + msg = "Output path must end in .png." + raise typer.BadParameter(msg) + + serial = _resolve_serial(serial) + camera = _create_rgb_camera(serial, width, height, fps) + + try: + camera.open() + except Exception as exc: + msg = f"Could not start ZED camera {serial}: {exc}" + raise typer.BadParameter(msg) from exc + + latest_frame = None + frames_read = 0 + deadline = monotonic() + duration + try: + while latest_frame is None or monotonic() < deadline: + latest_frame = camera.poll_frame("viewer") + frames_read += 1 + finally: + camera.close() + + assert latest_frame is not None + output.parent.mkdir(parents=True, exist_ok=True) + image_bgr = latest_frame.camera.color.data[:, :, ::-1] + print(latest_frame.camera.color.intrinsics) + if not cv2.imwrite(str(output), image_bgr): + msg = f"Could not write PNG to {output}." + raise typer.BadParameter(msg) + + typer.echo(f"Saved {output} from ZED {serial} after reading {frames_read} frame(s).") + + def main(): zed_app() From 2f7fe19f9f9db6d1cc8e0373c676d85789f19cbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Tue, 5 May 2026 18:56:24 +0200 Subject: [PATCH 03/11] fix(teleop): depth in cameras --- examples/teleop/franka.py | 2 +- extensions/rcs_fr3/src/rcs_fr3/creators.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/teleop/franka.py b/examples/teleop/franka.py index 2f0d6ede..3736d55b 100644 --- a/examples/teleop/franka.py +++ b/examples/teleop/franka.py @@ -117,7 +117,7 @@ def get_env(): for name, identifier in ZED_CAMERA_DICT.items() }, kwargs={ - "enable_depth": False, + "enable_depth": INCLUDE_DEPTH, "enable_imu": False, }, ) diff --git a/extensions/rcs_fr3/src/rcs_fr3/creators.py b/extensions/rcs_fr3/src/rcs_fr3/creators.py index 2225a577..e28401e4 100644 --- a/extensions/rcs_fr3/src/rcs_fr3/creators.py +++ b/extensions/rcs_fr3/src/rcs_fr3/creators.py @@ -203,7 +203,7 @@ def create_env(self, cfg: FR3HardwareEnvCreatorConfig) -> gym.Env: camera_set.start() camera_set.wait_for_frames() logger.info("CameraSet started") - env = CameraSetWrapper(env, camera_set) + env = CameraSetWrapper(env, camera_set, cfg.wrapper_cfg.include_depth) if cfg.relative_to != RelativeTo.NONE: env = RelativeActionSpace(env, max_mov=cfg.max_relative_movement, relative_to=cfg.relative_to) @@ -236,7 +236,7 @@ def create_env(self, cfg: FR3MultiHardwareEnvCreatorConfig) -> gym.Env: camera_set.start() camera_set.wait_for_frames() logger.info("CameraSet started") - env = CameraSetWrapper(env, camera_set) + env = CameraSetWrapper(env, camera_set, cfg.wrapper_cfg.include_depth) return CoverWrapper(env) def config(self) -> FR3MultiHardwareEnvCreatorConfig: From 9d44dc5e124d969b9d8a8ad54ded3a5dd1dd0088 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Tue, 5 May 2026 18:56:52 +0200 Subject: [PATCH 04/11] chore(quest): simplify right/left swapping logic --- python/rcs/operator/quest.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/python/rcs/operator/quest.py b/python/rcs/operator/quest.py index 322b870a..158850f0 100644 --- a/python/rcs/operator/quest.py +++ b/python/rcs/operator/quest.py @@ -128,6 +128,14 @@ def _reset_state(self): self._last_controller_pose[controller] = Pose() self._grp_pos[controller] = 1 + def _swap_controller_input(self, input_data: dict[str, Any]) -> dict[str, Any]: + if not self.config.switched_left_right: + return input_data + swapped = copy.copy(input_data) + swapped["left"] = input_data["right"] + swapped["right"] = input_data["left"] + return swapped + @staticmethod def _normalize_axis(value: bool | float | int) -> float: if isinstance(value, bool): @@ -139,13 +147,6 @@ def consume_commands(self) -> TeleopCommands: with self._cmd_lock: cmds = copy.copy(self._commands) self._commands = TeleopCommands() - if self.config.switched_left_right: - swapped_reset_origin_to_current = {} - if "left" in cmds.reset_origin_to_current: - swapped_reset_origin_to_current["right"] = cmds.reset_origin_to_current["left"] - if "right" in cmds.reset_origin_to_current: - swapped_reset_origin_to_current["left"] = cmds.reset_origin_to_current["right"] - cmds.reset_origin_to_current = swapped_reset_origin_to_current return cmds def reset_operator_state(self): @@ -176,11 +177,7 @@ def consume_action(self) -> dict[str, ArmWithGripper]: tquat=np.concatenate([transform.translation(), transform.rotation_q()]), gripper=np.array([self._grp_pos[controller]]), ) - return ( - {"left": transforms["right"], "right": transforms["left"]} - if self.config.switched_left_right - else transforms - ) + return transforms def set_camera(self, observation: dict) -> None: if not self.config.display_cameras: @@ -241,6 +238,8 @@ def run(self): logger.warning("[Quest Reader] packets arriving again") warning_raised = False + input_data = self._swap_controller_input(input_data) + # === Update Semantic Commands === with self._cmd_lock: if input_data[self._start_btn] and (self._prev_data is None or not self._prev_data[self._start_btn]): @@ -293,7 +292,8 @@ def run(self): gripper_axis = self._normalize_axis(input_data[controller][self._grp_btn[controller]]) # convert from IRIS to RCS gripper logic - self._grp_pos[controller] = 1.0 - gripper_axis + with self._resource_lock: + self._grp_pos[controller] = 1.0 - gripper_axis self._prev_data = input_data rate_limiter() From 4af54bbe3c193c60a6c05915751c651d5ded1fb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Tue, 5 May 2026 18:58:28 +0200 Subject: [PATCH 05/11] impr(robotiq): physical reset only in constructor --- extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py b/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py index a5ed5736..4bb10d4e 100644 --- a/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py +++ b/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py @@ -42,7 +42,8 @@ class RobotiQ2F85Gripper(Gripper): def __init__(self, cfg: RobotiQ2F85GripperConfig): super().__init__() self._cfg: RobotiQ2F85GripperConfig = cfg - self.gripper = Robotiq2F85Driver(serial_number=cfg.serial_number) + self.gripper = Robotiq2F85Driver(serial_number=cfg.serial_number, ignore_read_frequency=True) + self.gripper.reset() def get_normalized_width(self) -> float: # value between 0 and 1 (0 is closed) @@ -61,7 +62,7 @@ def open(self) -> None: self.set_normalized_width(1.0) def reset(self) -> None: - self.gripper.reset() + self.open() def set_normalized_width(self, width: float, force: float = 0) -> None: """ From 2e7802192b2f5f714fa702ab4e6fa5619d3d6040 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Tue, 12 May 2026 11:55:05 +0200 Subject: [PATCH 06/11] feat: robotiq in teleop script --- .dockerignore | 2 ++ docker/Dockerfile | 3 +- examples/teleop/franka.py | 35 ++++++++++++++------- extensions/rcs_fr3/src/rcs_fr3/configs.py | 1 + python/tests/test_quest_operator.py | 38 ++++++++++++++++++++++- 5 files changed, 66 insertions(+), 13 deletions(-) diff --git a/.dockerignore b/.dockerignore index bdf319e7..8b767f67 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1 +1,3 @@ **/build/ +real* +test* diff --git a/docker/Dockerfile b/docker/Dockerfile index 2b03ad71..b72bde92 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -53,7 +53,8 @@ RUN chmod +x /usr/local/bin/link-editable-source \ && uv pip install --no-build-isolation /opt/rcs-src/extensions/rcs_fr3 \ && uv pip install /opt/rcs-src/extensions/rcs_realsense \ && uv pip install /opt/rcs-src/extensions/rcs_robotiq2f85 \ - && uv pip install /opt/rcs-src/extensions/rcs_zed + && uv pip install /opt/rcs-src/extensions/rcs_zed \ + && uv pip install /opt/rcs-src/examples/teleop/SimPublisher WORKDIR /workspace/robot-control-stack diff --git a/examples/teleop/franka.py b/examples/teleop/franka.py index 3736d55b..f3951c98 100644 --- a/examples/teleop/franka.py +++ b/examples/teleop/franka.py @@ -26,22 +26,26 @@ "right": "0", } - -ROBOT_INSTANCE = RobotPlatform.SIMULATION -# ROBOT_INSTANCE = RobotPlatform.HARDWARE +# use `udevadm info -a -n /dev/ttyUSB0 | grep serial` +# to find out the serial numbers +ROBOTIQ_SERIAL = { + "left": "DAAQMPDC", + "right": "DAAQMJHX", +} RECORD_FPS = 30 # set camera dict to none disable cameras -# CAMERA_DICT = { -# "left_wrist": "230422272017", -# "right_wrist": "230422271040", -# "side": "243522070385", -# "bird_eye": "243522070364", -# } -CAMERA_DICT = None +CAMERA_DICT = { + "right_wrist": "230422272017", + "left_wrist": "230422271040", + # "side": "243522070385", + # "bird_eye": "243522070364", +} +# CAMERA_DICT = None ZED_CAMERA_DICT = { - "zed": "19928076", + "head": "19928076", } +# ZED_CAMERA_DICT = None MQ3_ADDR = "10.42.0.1" INCLUDE_DEPTH = False @@ -119,6 +123,7 @@ def get_env(): kwargs={ "enable_depth": INCLUDE_DEPTH, "enable_imu": False, + "include_right": True, }, ) if DIGIT_DICT is not None: @@ -137,11 +142,19 @@ def get_env(): hw_cfg.camera_cfgs = camera_cfgs or None hw_cfg.control_mode = config.operator_class.control_mode[0] hw_cfg.wrapper_cfg.include_depth = INCLUDE_DEPTH + hw_cfg.wrapper_cfg.binary_gripper = False + # hw_cfg.wrapper_cfg.binary_gripper = True hw_cfg.max_relative_movement = ( 0.5 if config.operator_class.control_mode[0] == ControlMode.JOINTS else (0.5, np.deg2rad(90)) ) hw_cfg.relative_to = config.operator_class.control_mode[1] hw_cfg.robot_to_shared_base_frame = robot2world + hw_cfg.robot_cfgs["left"].ignore_realtime = True + hw_cfg.robot_cfgs["right"].ignore_realtime = True + hw_cfg.robot_cfgs["left"].speed_factor = 0.3 + hw_cfg.robot_cfgs["right"].speed_factor = 0.3 + hw_cfg.gripper_cfgs["left"].serial_number = ROBOTIQ_SERIAL["left"] + hw_cfg.gripper_cfgs["right"].serial_number = ROBOTIQ_SERIAL["right"] env_rel = env_creator.create_env(hw_cfg) operator = GelloOperator(config) if isinstance(config, GelloConfig) else QuestOperator(config) else: diff --git a/extensions/rcs_fr3/src/rcs_fr3/configs.py b/extensions/rcs_fr3/src/rcs_fr3/configs.py index 3eaa8b0a..baf47eaf 100644 --- a/extensions/rcs_fr3/src/rcs_fr3/configs.py +++ b/extensions/rcs_fr3/src/rcs_fr3/configs.py @@ -152,6 +152,7 @@ def config(self) -> FR3MultiHardwareEnvCreatorConfig: class FrankaDuoEnv(DefaultFR3MultiHardwareEnv): + # use `udevadm info -a -n /dev/ttyUSB0 | grep serial` to find out the serial numbers left_gripper_serial_number = "DAAQMJHX" right_gripper_serial_number = "DAAQMPDC" diff --git a/python/tests/test_quest_operator.py b/python/tests/test_quest_operator.py index d5af2a78..55db719a 100644 --- a/python/tests/test_quest_operator.py +++ b/python/tests/test_quest_operator.py @@ -1,6 +1,9 @@ import threading from typing import Any, cast +import numpy as np + +from rcs._core.common import Pose from rcs.operator.interface import TeleopCommands from rcs.operator.quest import QuestOperator @@ -9,13 +12,46 @@ class _Config: switched_left_right = True +def test_swap_controller_input_swaps_left_and_right_packets(): + operator = QuestOperator.__new__(QuestOperator) + operator.config = cast(Any, _Config()) + + input_data = { + "left": {"hand_trigger": 0.1, "index_trigger": 0.2}, + "right": {"hand_trigger": 0.9, "index_trigger": 0.8}, + "A": True, + } + + swapped = QuestOperator._swap_controller_input(operator, input_data) + + assert swapped["left"] == input_data["right"] + assert swapped["right"] == input_data["left"] + assert swapped["A"] is True + + def test_consume_commands_swaps_both_reset_origin_keys(): operator = QuestOperator.__new__(QuestOperator) operator.config = cast(Any, _Config()) operator._cmd_lock = threading.Lock() - operator._commands = TeleopCommands(reset_origin_to_current={"left": True, "right": False}) + operator._commands = TeleopCommands(reset_origin_to_current={"right": True, "left": False}) cmds = QuestOperator.consume_commands(operator) assert cmds.reset_origin_to_current == {"right": True, "left": False} assert operator._commands.reset_origin_to_current == {} + + +def test_consume_action_swaps_gripper_with_controller(): + operator = QuestOperator.__new__(QuestOperator) + operator.config = cast(Any, _Config()) + operator._resource_lock = threading.Lock() + operator.controller_names = ["left", "right"] + operator._last_controller_pose = {key: Pose() for key in operator.controller_names} + operator._offset_pose = {key: Pose() for key in operator.controller_names} + operator._set_frame = {key: Pose() for key in operator.controller_names} + operator._grp_pos = {"left": 0.75, "right": 0.25} + + actions = QuestOperator.consume_action(operator) + + assert np.allclose(actions["left"]["gripper"], np.array([0.75])) + assert np.allclose(actions["right"]["gripper"], np.array([0.25])) From 7b1889110425fb8114b77f061ca86ac95db1281a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Tue, 12 May 2026 11:58:26 +0200 Subject: [PATCH 07/11] impr(gripper): improved change detection --- .../rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py | 8 ++++--- python/rcs/envs/base.py | 21 +++++++++++++------ python/rcs/utils.py | 1 - 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py b/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py index 4bb10d4e..db4be1fa 100644 --- a/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py +++ b/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py @@ -42,12 +42,13 @@ class RobotiQ2F85Gripper(Gripper): def __init__(self, cfg: RobotiQ2F85GripperConfig): super().__init__() self._cfg: RobotiQ2F85GripperConfig = cfg - self.gripper = Robotiq2F85Driver(serial_number=cfg.serial_number, ignore_read_frequency=True) + self.gripper = Robotiq2F85Driver(serial_number=cfg.serial_number) + self._last_normalized_width = 1.0 self.gripper.reset() def get_normalized_width(self) -> float: - # value between 0 and 1 (0 is closed) - return self.gripper.opening / 85 + # Return the last commanded width to avoid a synchronous Modbus read on every env step. + return self._last_normalized_width def grasp(self) -> None: """ @@ -71,6 +72,7 @@ def set_normalized_width(self, width: float, force: float = 0) -> None: if not (0 <= width <= 1): msg = f"Width must be between 0 and 1, got {width}." raise ValueError(msg) + self._last_normalized_width = width abs_width = width * 85 self.gripper.go_to( opening=float(abs_width), diff --git a/python/rcs/envs/base.py b/python/rcs/envs/base.py index 71a210f0..7d12b336 100644 --- a/python/rcs/envs/base.py +++ b/python/rcs/envs/base.py @@ -935,6 +935,14 @@ def __init__(self, env, gripper: common.Gripper, binary: bool = True): self.gripper = gripper self._last_gripper_cmd = None + def _command_changed(self, gripper_action: np.ndarray) -> bool: + if self._last_gripper_cmd is None: + return True + last_action = np.asarray(self._last_gripper_cmd, dtype=np.float32) + if self.binary: + return not np.array_equal(gripper_action, last_action) + return not np.allclose(gripper_action, last_action, atol=1e-3, rtol=0.0) + def close(self): self.gripper.close() super().close() @@ -966,13 +974,14 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]: gripper_action = [gripper_action] # type: ignore if self.binary: gripper_action = np.round(gripper_action) - gripper_action = np.clip(gripper_action, 0.0, 1.0) + gripper_action = np.clip(np.asarray(gripper_action, dtype=np.float32), 0.0, 1.0) - if self.binary: - self.gripper.grasp() if gripper_action == self.BINARY_GRIPPER_CLOSED else self.gripper.open() - else: - self.gripper.set_normalized_width(gripper_action[0]) - self._last_gripper_cmd = gripper_action + if self._command_changed(gripper_action): + if self.binary: + self.gripper.grasp() if gripper_action[0] < 0.5 else self.gripper.open() + else: + self.gripper.set_normalized_width(float(gripper_action[0])) + self._last_gripper_cmd = gripper_action.tolist() del action[self.gripper_key] return action diff --git a/python/rcs/utils.py b/python/rcs/utils.py index 9f9b7e7f..25ef2abb 100644 --- a/python/rcs/utils.py +++ b/python/rcs/utils.py @@ -27,7 +27,6 @@ def __call__(self): if self.t is None: self.t = perf_counter() self._last_print = self.t - sleep(1 / self.frame_rate) return sleep_time = 1 / self.frame_rate - (perf_counter() - self.t) if sleep_time > 0: From 785acb143c76697db27d7f5e299e45103f1526e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Tue, 12 May 2026 11:59:23 +0200 Subject: [PATCH 08/11] feat(zed): add option to record both eyes --- extensions/rcs_zed/src/rcs_zed/camera.py | 156 +++++++++++++----- .../rcs_zed/tests/test_zed_extension.py | 58 ++++++- 2 files changed, 171 insertions(+), 43 deletions(-) diff --git a/extensions/rcs_zed/src/rcs_zed/camera.py b/extensions/rcs_zed/src/rcs_zed/camera.py index e9fd61dd..881a1368 100644 --- a/extensions/rcs_zed/src/rcs_zed/camera.py +++ b/extensions/rcs_zed/src/rcs_zed/camera.py @@ -30,6 +30,8 @@ class ZEDFrameBundle: color: np.ndarray timestamp: float color_intrinsics: np.ndarray[tuple[typing.Literal[3], typing.Literal[4]], np.dtype[np.float64]] + right_color: np.ndarray | None = None + right_color_intrinsics: np.ndarray[tuple[typing.Literal[3], typing.Literal[4]], np.dtype[np.float64]] | None = None depth: np.ndarray | None = None accel: np.ndarray | None = None gyro: np.ndarray | None = None @@ -47,12 +49,20 @@ def _intrinsics_matrix(fx: float, fy: float, cx: float, cy: float) -> np.ndarray class PyZEDCameraHandle: - def __init__(self, camera: typing.Any, device_info: ZEDDeviceInfo, color_intrinsics: np.ndarray): + def __init__( + self, + camera: typing.Any, + device_info: ZEDDeviceInfo, + color_intrinsics: np.ndarray, + right_color_intrinsics: np.ndarray | None = None, + ): self.camera = camera self.device_info = device_info self.color_intrinsics = color_intrinsics + self.right_color_intrinsics = right_color_intrinsics self.runtime_parameters = sl.RuntimeParameters() # type: ignore[union-attr] self.image_mat = sl.Mat() # type: ignore[union-attr] + self.right_image_mat = sl.Mat() # type: ignore[union-attr] self.depth_mat = sl.Mat() # type: ignore[union-attr] self.sensors_data = sl.SensorsData() # type: ignore[union-attr] @@ -69,7 +79,7 @@ def _timestamp_seconds(self) -> float: pass return time() - def grab_frame(self) -> ZEDFrameBundle: + def grab_frame(self, include_right: bool = False) -> ZEDFrameBundle: err = self.camera.grab(self.runtime_parameters) if err != sl.ERROR_CODE.SUCCESS: # type: ignore[union-attr] msg = f"Failed to grab ZED frame: {err}" @@ -82,6 +92,15 @@ def grab_frame(self) -> ZEDFrameBundle: raise RuntimeError(msg) color_rgb = color_raw[:, :, :3][:, :, ::-1] if color_raw.shape[2] == 4 else color_raw[:, :, ::-1] + right_color = None + if include_right: + self.camera.retrieve_image(self.right_image_mat, sl.VIEW.RIGHT) # type: ignore[union-attr] + right_raw = np.array(self.right_image_mat.get_data(), copy=True) + if right_raw.ndim != 3: + msg = f"Unexpected ZED right image shape {right_raw.shape}" + raise RuntimeError(msg) + right_color = right_raw[:, :, :3][:, :, ::-1] if right_raw.shape[2] == 4 else right_raw[:, :, ::-1] + depth = None if self.device_info.has_depth: self.camera.retrieve_measure(self.depth_mat, sl.MEASURE.DEPTH) # type: ignore[union-attr] @@ -105,6 +124,8 @@ def grab_frame(self) -> ZEDFrameBundle: return ZEDFrameBundle( color=color_rgb, + right_color=right_color, + right_color_intrinsics=self.right_color_intrinsics if right_color is not None else None, depth=depth, accel=accel, gyro=gyro, @@ -185,6 +206,7 @@ def open_camera( *, enable_depth: bool, enable_imu: bool, + include_right: bool, ) -> PyZEDCameraHandle: cls._require_sdk() assert sl is not None @@ -206,6 +228,7 @@ def open_camera( information = camera.get_camera_information() calibration = information.camera_configuration.calibration_parameters left_cam = calibration.left_cam + right_cam = calibration.right_cam info = ZEDDeviceInfo( serial=str(config.identifier), model=cls._model_to_string(information.camera_model), @@ -213,7 +236,15 @@ def open_camera( has_imu=enable_imu and cls._device_has_imu(information), ) intrinsics = _intrinsics_matrix(left_cam.fx, left_cam.fy, left_cam.cx, left_cam.cy) - return PyZEDCameraHandle(camera=camera, device_info=info, color_intrinsics=intrinsics) + right_intrinsics = None + if include_right: + right_intrinsics = _intrinsics_matrix(right_cam.fx, right_cam.fy, right_cam.cx, right_cam.cy) + return PyZEDCameraHandle( + camera=camera, + device_info=info, + color_intrinsics=intrinsics, + right_color_intrinsics=right_intrinsics, + ) def __init__( self, @@ -221,6 +252,7 @@ def __init__( calibration_strategy: dict[str, CalibrationStrategy] | None = None, enable_depth: bool = True, enable_imu: bool = True, + include_right: bool = False, ) -> None: self.cameras = cameras if calibration_strategy is None: @@ -228,12 +260,14 @@ def __init__( self.calibration_strategy = calibration_strategy self.enable_depth = enable_depth self.enable_imu = enable_imu + self.include_right = include_right self._logger = logging.getLogger(__name__) - self._camera_names = list(self.cameras.keys()) + self._camera_names = self._logical_camera_names() self._available_devices: dict[str, ZEDDeviceInfo] = {} self._enabled_devices: dict[str, PyZEDCameraHandle] = {} self._frame_buffer_lock: dict[str, threading.Lock] = {} self._frame_buffer: dict[str, list[Frame]] = {} + self._pending_right_bundles: dict[str, ZEDFrameBundle] = {} assert ( len({camera.resolution_width for camera in self.cameras.values()}) == 1 @@ -246,7 +280,63 @@ def camera_names(self) -> list[str]: return self._camera_names def config(self, camera_name) -> common.BaseCameraConfig: - return self.cameras[camera_name] + return self.cameras[self._physical_camera_name(camera_name)] + + @staticmethod + def _right_camera_name(camera_name: str) -> str: + return f"{camera_name}_right" + + def _physical_camera_name(self, camera_name: str) -> str: + if camera_name in self.cameras: + return camera_name + if self.include_right and camera_name.endswith("_right"): + physical_name = camera_name.removesuffix("_right") + if physical_name in self.cameras: + return physical_name + msg = f"Camera {camera_name} not found in the enabled devices" + raise AssertionError(msg) + + def _is_right_camera_name(self, camera_name: str) -> bool: + return camera_name not in self.cameras and self._physical_camera_name(camera_name) != camera_name + + def _logical_camera_names(self) -> list[str]: + names = list(self.cameras.keys()) + if self.include_right: + names.extend(self._right_camera_name(camera_name) for camera_name in self.cameras) + return names + + def _frame_from_bundle(self, camera_name: str, bundle: ZEDFrameBundle) -> Frame: + physical_name = self._physical_camera_name(camera_name) + extrinsics = self.calibration_strategy[physical_name].get_extrinsics() + is_right = self._is_right_camera_name(camera_name) + color_data = bundle.right_color if is_right else bundle.color + color_intrinsics = bundle.right_color_intrinsics if is_right else bundle.color_intrinsics + if color_data is None or color_intrinsics is None: + msg = f"Missing {'right' if is_right else 'left'} image data for {camera_name}" + raise RuntimeError(msg) + + color = DataFrame( + data=color_data, + timestamp=bundle.timestamp, + intrinsics=color_intrinsics, + extrinsics=extrinsics, + ) + depth = None + if not is_right and bundle.depth is not None: + depth = DataFrame( + data=bundle.depth, + timestamp=bundle.timestamp, + intrinsics=bundle.color_intrinsics, + extrinsics=extrinsics, + ) + + accel = DataFrame(data=bundle.accel, timestamp=bundle.timestamp) if bundle.accel is not None else None + gyro = DataFrame(data=bundle.gyro, timestamp=bundle.timestamp) if bundle.gyro is not None else None + return Frame( + camera=CameraFrame(color=color, depth=depth), + imu=IMUFrame(accel=accel, gyro=gyro) if (accel is not None or gyro is not None) else None, + avg_timestamp=bundle.timestamp, + ) def update_available_devices(self): self._available_devices = self.enumerate_connected_devices() @@ -256,6 +346,7 @@ def open(self): self._enabled_devices = {} self._frame_buffer = {} self._frame_buffer_lock = {} + self._pending_right_bundles = {} for camera_name, camera_cfg in self.cameras.items(): if camera_cfg.identifier not in self._available_devices: @@ -267,6 +358,7 @@ def open(self): camera_cfg, enable_depth=self.enable_depth and device_info.has_depth, enable_imu=self.enable_imu and device_info.has_imu, + include_right=self.include_right, ) self._enabled_devices[camera_name] = opened self._frame_buffer[camera_name] = [] @@ -274,44 +366,34 @@ def open(self): def poll_frame(self, camera_name: str) -> Frame: assert camera_name in self.camera_names, f"Camera {camera_name} not found in the enabled devices" - device = self._enabled_devices[camera_name] - bundle = device.grab_frame() - extrinsics = self.calibration_strategy[camera_name].get_extrinsics() - - color = DataFrame( - data=bundle.color, - timestamp=bundle.timestamp, - intrinsics=bundle.color_intrinsics, - extrinsics=extrinsics, - ) - depth = None - if bundle.depth is not None: - depth = DataFrame( - data=bundle.depth, - timestamp=bundle.timestamp, - intrinsics=bundle.color_intrinsics, - extrinsics=extrinsics, - ) - - accel = DataFrame(data=bundle.accel, timestamp=bundle.timestamp) if bundle.accel is not None else None - gyro = DataFrame(data=bundle.gyro, timestamp=bundle.timestamp) if bundle.gyro is not None else None - - frame = Frame( - camera=CameraFrame(color=color, depth=depth), - imu=IMUFrame(accel=accel, gyro=gyro) if (accel is not None or gyro is not None) else None, - avg_timestamp=bundle.timestamp, - ) - - with self._frame_buffer_lock[camera_name]: - if len(self._frame_buffer[camera_name]) >= self.CALIBRATION_FRAME_SIZE: - self._frame_buffer[camera_name].pop(0) - self._frame_buffer[camera_name].append(copy.deepcopy(frame)) + physical_name = self._physical_camera_name(camera_name) + device = self._enabled_devices[physical_name] + + if self._is_right_camera_name(camera_name): + bundle = self._pending_right_bundles.pop(physical_name, None) + if bundle is None: + bundle = device.grab_frame(include_right=True) + else: + bundle = device.grab_frame(include_right=self.include_right) + if self.include_right and bundle.right_color is not None: + self._pending_right_bundles[physical_name] = bundle + else: + self._pending_right_bundles.pop(physical_name, None) + + frame = self._frame_from_bundle(camera_name, bundle) + + if not self._is_right_camera_name(camera_name): + with self._frame_buffer_lock[physical_name]: + if len(self._frame_buffer[physical_name]) >= self.CALIBRATION_FRAME_SIZE: + self._frame_buffer[physical_name].pop(0) + self._frame_buffer[physical_name].append(copy.deepcopy(frame)) return frame def close(self): for device in self._enabled_devices.values(): device.close() self._enabled_devices = {} + self._pending_right_bundles = {} def calibrate(self) -> bool: for camera_name in self.cameras: diff --git a/extensions/rcs_zed/tests/test_zed_extension.py b/extensions/rcs_zed/tests/test_zed_extension.py index 32e8294f..cb17acec 100644 --- a/extensions/rcs_zed/tests/test_zed_extension.py +++ b/extensions/rcs_zed/tests/test_zed_extension.py @@ -20,8 +20,10 @@ def __init__(self, device_info: ZEDDeviceInfo, color_intrinsics: np.ndarray, fra self.color_intrinsics = color_intrinsics self._frame_bundle = frame_bundle self.closed = False + self.grab_calls: list[bool] = [] - def grab_frame(self) -> ZEDFrameBundle: + def grab_frame(self, include_right: bool = False) -> ZEDFrameBundle: + self.grab_calls.append(include_right) return self._frame_bundle def close(self): @@ -31,7 +33,7 @@ def close(self): class PatchZedState(TypedDict): devices: dict[str, ZEDDeviceInfo] opened: dict[str, FakeOpenedZEDCamera] - open_calls: list[tuple[str, bool, bool]] + open_calls: list[tuple[str, bool, bool, bool]] @pytest.fixture() @@ -41,8 +43,8 @@ def patch_zed(monkeypatch) -> PatchZedState: def fake_enumerate(_cls): return state["devices"] - def fake_open(_cls, config: common.BaseCameraConfig, *, enable_depth: bool, enable_imu: bool): - state["open_calls"].append((config.identifier, enable_depth, enable_imu)) + def fake_open(_cls, config: common.BaseCameraConfig, *, enable_depth: bool, enable_imu: bool, include_right: bool): + state["open_calls"].append((config.identifier, enable_depth, enable_imu, include_right)) return state["opened"][config.identifier] monkeypatch.setattr(ZEDCameraSet, "enumerate_connected_devices", classmethod(fake_enumerate)) @@ -78,7 +80,7 @@ def test_zed_frame_mapping_depth_scaling_and_imu_downgrade(patch_zed): frame = camera_set.poll_frame("wrist") assert frame.camera.color.intrinsics is not None - assert patch_zed["open_calls"] == [("123", True, False)] + assert patch_zed["open_calls"] == [("123", True, False, False)] assert np.array_equal(frame.camera.color.data, color) assert np.array_equal(frame.camera.depth.data, depth) # type: ignore[union-attr] assert np.array_equal(frame.camera.color.intrinsics, intrinsics) @@ -113,7 +115,51 @@ def test_zed_enumeration_and_multi_camera_open(patch_zed): }, ) camera_set.open() - assert patch_zed["open_calls"] == [("111", True, False), ("222", True, True)] + assert patch_zed["open_calls"] == [("111", True, False, False), ("222", True, True, False)] camera_set.close() assert opened["111"].closed assert opened["222"].closed + + +def test_zed_include_right_adds_logical_right_camera_without_double_grab(patch_zed): + left_intrinsics = np.array([[100.0, 0.0, 10.0, 0.0], [0.0, 110.0, 20.0, 0.0], [0.0, 0.0, 1.0, 0.0]]) + right_intrinsics = np.array([[120.0, 0.0, 12.0, 0.0], [0.0, 130.0, 22.0, 0.0], [0.0, 0.0, 1.0, 0.0]]) + left_color = np.arange(27, dtype=np.uint8).reshape(3, 3, 3) + right_color = np.arange(27, 54, dtype=np.uint8).reshape(3, 3, 3) + frame_bundle = ZEDFrameBundle( + color=left_color, + right_color=right_color, + timestamp=12.5, + color_intrinsics=left_intrinsics, + right_color_intrinsics=right_intrinsics, + ) + device_info = ZEDDeviceInfo(serial="123", model="ZED Mini", has_depth=True, has_imu=False) + opened = FakeOpenedZEDCamera(device_info=device_info, color_intrinsics=left_intrinsics, frame_bundle=frame_bundle) + patch_zed["devices"] = {"123": device_info} + patch_zed["opened"] = {"123": opened} + + camera_set = ZEDCameraSet( + cameras={ + "wrist": common.BaseCameraConfig( + identifier="123", resolution_width=1280, resolution_height=720, frame_rate=30 + ) + }, + include_right=True, + ) + + assert camera_set.camera_names == ["wrist", "wrist_right"] + assert camera_set.config("wrist_right").identifier == "123" + + camera_set.open() + assert patch_zed["open_calls"] == [("123", True, False, True)] + left_frame = camera_set.poll_frame("wrist") + right_frame = camera_set.poll_frame("wrist_right") + + assert opened.grab_calls == [True] + assert np.array_equal(left_frame.camera.color.data, left_color) + assert np.array_equal(right_frame.camera.color.data, right_color) + assert np.array_equal(left_frame.camera.color.intrinsics, left_intrinsics) + assert np.array_equal(right_frame.camera.color.intrinsics, right_intrinsics) + assert left_frame.avg_timestamp == right_frame.avg_timestamp == 12.5 + assert left_frame.camera.depth is None + assert right_frame.camera.depth is None From 07a8d3d4e5542e205ba584eec438994365bfd3e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Fri, 8 May 2026 16:57:31 +0200 Subject: [PATCH 09/11] feat(inference): add initial inference loop script --- examples/inference/README.md | 0 examples/inference/franka.py | 236 ++++++++++++++++++++++++++++ examples/inference/requirements.txt | 1 + 3 files changed, 237 insertions(+) create mode 100644 examples/inference/README.md create mode 100644 examples/inference/franka.py create mode 100644 examples/inference/requirements.txt diff --git a/examples/inference/README.md b/examples/inference/README.md new file mode 100644 index 00000000..e69de29b diff --git a/examples/inference/franka.py b/examples/inference/franka.py new file mode 100644 index 00000000..8ac52572 --- /dev/null +++ b/examples/inference/franka.py @@ -0,0 +1,236 @@ +from dataclasses import dataclass +import datetime +import logging +from typing import Any + +import numpy as np +from rcs._core.common import BaseCameraConfig, RobotPlatform +from rcs._core.sim import SimConfig +from rcs.envs.base import ControlMode, RelativeTo +from rcs.envs.configs import EmptyWorldFR3Duo +import gymnasium as gym + +import rcs +from rcs.envs.tasks import PickTaskConfig +from vlagents.client import RemoteAgent +from vlagents.policies import Act, Obs + +logger = logging.getLogger(__name__) + + +ROBOT2IP = { + "right": "192.168.102.1", + "left": "192.168.101.1", +} +ROBOT2ID = { + "left": "1", + "right": "0", +} + + +ROBOT_INSTANCE = RobotPlatform.SIMULATION +# ROBOT_INSTANCE = RobotPlatform.HARDWARE + +# set camera dict to none disable cameras +# CAMERA_DICT = { +# "left_wrist": "230422272017", +# "right_wrist": "230422271040", +# "side": "243522070385", +# "bird_eye": "243522070364", +# } +CAMERA_DICT = None +ZED_CAMERA_DICT = { + "zed": "19928076", +} +INCLUDE_DEPTH = False + + +# DIGIT_DICT = { +# "digit_right_left": "D21182", +# "digit_right_right": "D21193" +# } +DIGIT_DICT = None + + +DATASET_PATH = "test_iris" +INSTRUCTION = "pick up cube" +RECORD_FPS = 30 +CONTROL_MODE = ControlMode.JOINTS +RELATIVETO = RelativeTo.NONE +DEBUG = True +VIDEO_PATH = "videos" +MODEL = "pi05" +IP = "" +PORT = 20997 + + +robot2world = { + "right": rcs.common.Pose( + translation=np.array([0, 0, 0]), rpy_vector=np.array([0.89360858, -0.17453293, 0.46425758]) + ), + "left": rcs.common.Pose( + translation=np.array([0, 0, 0]), rpy_vector=np.array([-0.89360858, -0.17453293, -0.46425758]) + ), +} + +@dataclass +class InferenceConfig: + vlagents_host: str + vlagents_port: int + vlagents_model: str + instruction: str + robot_keys: list[str] + jpeg_encoding: bool = True + on_same_machine: bool = False + +class ModelInference: + def __init__(self, env: gym.Env, cfg: InferenceConfig): + self.env = env + self.gripper_state = 1 + self._cfg = cfg + self.remote_agent = RemoteAgent(cfg.vlagents_host, cfg.vlagents_port, cfg.vlagents_model, cfg.on_same_machine, cfg.jpeg_encoding) + + + def obs_rcs2agents(self, obs: dict, info: dict | None = None) -> Obs: + cameras = {} + for frame in obs["frames"]: + cameras[frame] = obs["frames"][frame]["rgb"]["data"] + state = [] + for robot in self._cfg.robot_keys: + # TODO: currently hardcoded for joints + state.append(obs[robot]["joints"]) + state.append(obs[robot]["gripper"]) + + return Obs(cameras=None, gripper=None, info=info, state=np.concatenate(state)) + + def action_agents2rcs(self, action: Act) -> dict[str, Any]: + act = {} + for idx, robot in enumerate(self._cfg.robot_keys): + # TODO: this is currently hard coded for franka joints + act[robot] = {} + act[robot]["joints"] = action.action[idx*8:idx*8+7] + act[robot]["gripper"] = action.action[idx*8+7] + return act + + + def loop(self): + obs, _ = self.env.reset() + + obs_dict = self.obs_rcs2agents(obs) + + self.remote_agent.reset(obs_dict, instruction=self._cfg.instruction) + + while True: + action = self.remote_agent.act(obs_dict) + if action.done: + logger.info("done issued by agent, shutting down") + break + obs, _, _, _, info = self.env.step(self.action_agents2rcs(action)) + + obs_dict = self.obs_rcs2agents(obs) + + +def get_env(): + if ROBOT_INSTANCE == RobotPlatform.HARDWARE: + from rcs_fr3.configs import FrankaDuoEnv + from rcs_fr3.creators import HardwareCameraCreatorConfig + + env_creator = FrankaDuoEnv() + env_creator.left_ip = ROBOT2IP["left"] + env_creator.right_ip = ROBOT2IP["right"] + hw_cfg = env_creator.config() + camera_cfgs: dict[str, HardwareCameraCreatorConfig] = {} + if CAMERA_DICT is not None: + camera_cfgs["realsense"] = HardwareCameraCreatorConfig( + camera_type_id="realsense", + camera_cfgs={ + name: BaseCameraConfig( + identifier=identifier, + resolution_width=1280, + resolution_height=720, + frame_rate=30, + ) + for name, identifier in CAMERA_DICT.items() + }, + ) + if ZED_CAMERA_DICT is not None: + camera_cfgs["zed"] = HardwareCameraCreatorConfig( + camera_type_id="zed", + camera_cfgs={ + name: BaseCameraConfig( + identifier=identifier, + resolution_width=1280, + resolution_height=720, + frame_rate=30, + ) + for name, identifier in ZED_CAMERA_DICT.items() + }, + kwargs={ + "enable_depth": False, + "enable_imu": False, + }, + ) + if DIGIT_DICT is not None: + camera_cfgs["digit"] = HardwareCameraCreatorConfig( + camera_type_id="digit", + camera_cfgs={ + name: BaseCameraConfig( + identifier=identifier, + resolution_width=320, + resolution_height=240, + frame_rate=30, + ) + for name, identifier in DIGIT_DICT.items() + }, + ) + hw_cfg.camera_cfgs = camera_cfgs or None + hw_cfg.control_mode = CONTROL_MODE + hw_cfg.wrapper_cfg.include_depth = INCLUDE_DEPTH + hw_cfg.max_relative_movement = ( + 0.5 if CONTROL_MODE == ControlMode.JOINTS else (0.5, np.deg2rad(90)) + ) + hw_cfg.relative_to = RELATIVETO + hw_cfg.robot_to_shared_base_frame = robot2world + env_rel = env_creator.create_env(hw_cfg) + else: + # FR3 + + scene = EmptyWorldFR3Duo() + sim_cfg_data = scene.config() + sim_cfg_data.sim_cfg = SimConfig( + async_control=True, realtime=True, frequency=RECORD_FPS, max_convergence_steps=500 + ) + sim_cfg_data.relative_to = RelativeTo.CONFIGURED_ORIGIN + sim_cfg_data.wrapper_cfg.include_depth = INCLUDE_DEPTH + if sim_cfg_data.root_frame_objects is None: + sim_cfg_data.root_frame_objects = {} + sim_cfg_data.task_cfg = PickTaskConfig(robot_name="right") + + env_rel = scene.create_env(sim_cfg_data) + + return env_rel + + +def main(): + env_rel = get_env() + env_rel.reset() + + VIDEO_PATH.mkdir(parents=True, exist_ok=True) + timestamp = str(datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) + + + camera_set = env_rel.get_wrapper_attr("camera_set") + camera_set.record_video(VIDEO_PATH, timestamp) + + # env = RHCWrapper(env, exec_horizon=1) + + cfg = InferenceConfig(IP, PORT, MODEL, INSTRUCTION, jpeg_encoding=True, on_same_machine=True, robot_keys=["left", "right"]) + controller = ModelInference(env_rel, cfg) + input("robot is about to be controlled by AI, press enter to start") + with env_rel: + controller.loop() + + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/inference/requirements.txt b/examples/inference/requirements.txt new file mode 100644 index 00000000..cb843744 --- /dev/null +++ b/examples/inference/requirements.txt @@ -0,0 +1 @@ +vlagents @ git+https://github.com/RobotControlStack/vlagents.git@lerobot \ No newline at end of file From f36b8e1a0e4c14f416f10e08374347c28370655c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Thu, 14 May 2026 14:42:39 +0200 Subject: [PATCH 10/11] style: format inference franka script --- examples/inference/franka.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/examples/inference/franka.py b/examples/inference/franka.py index 8ac52572..bbd0c053 100644 --- a/examples/inference/franka.py +++ b/examples/inference/franka.py @@ -1,17 +1,17 @@ -from dataclasses import dataclass import datetime import logging +from dataclasses import dataclass from typing import Any +import gymnasium as gym import numpy as np from rcs._core.common import BaseCameraConfig, RobotPlatform from rcs._core.sim import SimConfig from rcs.envs.base import ControlMode, RelativeTo from rcs.envs.configs import EmptyWorldFR3Duo -import gymnasium as gym +from rcs.envs.tasks import PickTaskConfig import rcs -from rcs.envs.tasks import PickTaskConfig from vlagents.client import RemoteAgent from vlagents.policies import Act, Obs @@ -73,6 +73,7 @@ ), } + @dataclass class InferenceConfig: vlagents_host: str @@ -83,13 +84,15 @@ class InferenceConfig: jpeg_encoding: bool = True on_same_machine: bool = False + class ModelInference: def __init__(self, env: gym.Env, cfg: InferenceConfig): self.env = env self.gripper_state = 1 self._cfg = cfg - self.remote_agent = RemoteAgent(cfg.vlagents_host, cfg.vlagents_port, cfg.vlagents_model, cfg.on_same_machine, cfg.jpeg_encoding) - + self.remote_agent = RemoteAgent( + cfg.vlagents_host, cfg.vlagents_port, cfg.vlagents_model, cfg.on_same_machine, cfg.jpeg_encoding + ) def obs_rcs2agents(self, obs: dict, info: dict | None = None) -> Obs: cameras = {} @@ -108,11 +111,10 @@ def action_agents2rcs(self, action: Act) -> dict[str, Any]: for idx, robot in enumerate(self._cfg.robot_keys): # TODO: this is currently hard coded for franka joints act[robot] = {} - act[robot]["joints"] = action.action[idx*8:idx*8+7] - act[robot]["gripper"] = action.action[idx*8+7] + act[robot]["joints"] = action.action[idx * 8 : idx * 8 + 7] + act[robot]["gripper"] = action.action[idx * 8 + 7] return act - def loop(self): obs, _ = self.env.reset() @@ -186,9 +188,7 @@ def get_env(): hw_cfg.camera_cfgs = camera_cfgs or None hw_cfg.control_mode = CONTROL_MODE hw_cfg.wrapper_cfg.include_depth = INCLUDE_DEPTH - hw_cfg.max_relative_movement = ( - 0.5 if CONTROL_MODE == ControlMode.JOINTS else (0.5, np.deg2rad(90)) - ) + hw_cfg.max_relative_movement = 0.5 if CONTROL_MODE == ControlMode.JOINTS else (0.5, np.deg2rad(90)) hw_cfg.relative_to = RELATIVETO hw_cfg.robot_to_shared_base_frame = robot2world env_rel = env_creator.create_env(hw_cfg) @@ -212,25 +212,25 @@ def get_env(): def main(): - env_rel = get_env() + env_rel = get_env() env_rel.reset() VIDEO_PATH.mkdir(parents=True, exist_ok=True) - timestamp = str(datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) - + timestamp = str(datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) camera_set = env_rel.get_wrapper_attr("camera_set") camera_set.record_video(VIDEO_PATH, timestamp) # env = RHCWrapper(env, exec_horizon=1) - cfg = InferenceConfig(IP, PORT, MODEL, INSTRUCTION, jpeg_encoding=True, on_same_machine=True, robot_keys=["left", "right"]) + cfg = InferenceConfig( + IP, PORT, MODEL, INSTRUCTION, jpeg_encoding=True, on_same_machine=True, robot_keys=["left", "right"] + ) controller = ModelInference(env_rel, cfg) input("robot is about to be controlled by AI, press enter to start") with env_rel: controller.loop() - if __name__ == "__main__": - main() \ No newline at end of file + main() From 844d78b8f5ea6fd35b9bad7e06abc3f4a9f159af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Thu, 14 May 2026 19:22:37 +0200 Subject: [PATCH 11/11] fix(franka): inference script after testing Co-authored-by: Copilot --- docker/Dockerfile | 4 +- docker/link-editable-source.sh | 1 + examples/inference/franka.py | 67 +++++++++++++++++++++------------- 3 files changed, 46 insertions(+), 26 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index b72bde92..9c88e253 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -54,7 +54,9 @@ RUN chmod +x /usr/local/bin/link-editable-source \ && uv pip install /opt/rcs-src/extensions/rcs_realsense \ && uv pip install /opt/rcs-src/extensions/rcs_robotiq2f85 \ && uv pip install /opt/rcs-src/extensions/rcs_zed \ - && uv pip install /opt/rcs-src/examples/teleop/SimPublisher + && uv pip install /opt/rcs-src/examples/teleop/SimPublisher \ + && uv pip install /opt/rcs-src/2f85-python-driver \ + && uv pip install /opt/rcs-src/vlagents WORKDIR /workspace/robot-control-stack diff --git a/docker/link-editable-source.sh b/docker/link-editable-source.sh index 0a93db47..02bf007e 100644 --- a/docker/link-editable-source.sh +++ b/docker/link-editable-source.sh @@ -44,3 +44,4 @@ link_compiled_package "$REPO_ROOT/extensions/rcs_fr3/src/rcs_fr3" "$SITE_PACKAGE link_pure_python_package "$REPO_ROOT/extensions/rcs_realsense/src/rcs_realsense" "$SITE_PACKAGES/rcs_realsense" link_pure_python_package "$REPO_ROOT/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85" "$SITE_PACKAGES/rcs_robotiq2f85" link_pure_python_package "$REPO_ROOT/extensions/rcs_zed/src/rcs_zed" "$SITE_PACKAGES/rcs_zed" +link_pure_python_package "$REPO_ROOT/vlagents" "$SITE_PACKAGES/vlagents" diff --git a/examples/inference/franka.py b/examples/inference/franka.py index bbd0c053..37bb8bae 100644 --- a/examples/inference/franka.py +++ b/examples/inference/franka.py @@ -1,8 +1,13 @@ +import copy import datetime import logging from dataclasses import dataclass +from pathlib import Path +from time import sleep from typing import Any +from rcs.utils import SimpleFrameRate + import gymnasium as gym import numpy as np from rcs._core.common import BaseCameraConfig, RobotPlatform @@ -28,22 +33,26 @@ } -ROBOT_INSTANCE = RobotPlatform.SIMULATION -# ROBOT_INSTANCE = RobotPlatform.HARDWARE +# ROBOT_INSTANCE = RobotPlatform.SIMULATION +ROBOT_INSTANCE = RobotPlatform.HARDWARE # set camera dict to none disable cameras -# CAMERA_DICT = { -# "left_wrist": "230422272017", -# "right_wrist": "230422271040", -# "side": "243522070385", -# "bird_eye": "243522070364", -# } -CAMERA_DICT = None +CAMERA_DICT = { + "left_wrist": "230422272017", + "right_wrist": "230422271040", + # "side": "243522070385", + # "bird_eye": "243522070364", +} +# CAMERA_DICT = None ZED_CAMERA_DICT = { - "zed": "19928076", + "head": "19928076", } INCLUDE_DEPTH = False +ROBOTIQ_SERIAL = { + "left": "DAAQMPDC", + "right": "DAAQMJHX", +} # DIGIT_DICT = { # "digit_right_left": "D21182", @@ -52,16 +61,15 @@ DIGIT_DICT = None -DATASET_PATH = "test_iris" -INSTRUCTION = "pick up cube" +INSTRUCTION = "pick up the black cube with the right arm and place it into the black bowl; pick up the white cube with the left arm and place it into the white bowl" RECORD_FPS = 30 CONTROL_MODE = ControlMode.JOINTS RELATIVETO = RelativeTo.NONE DEBUG = True VIDEO_PATH = "videos" -MODEL = "pi05" -IP = "" -PORT = 20997 +MODEL = "lerobot" +IP = "172.29.5.16" +PORT = 20000 robot2world = { @@ -93,6 +101,7 @@ def __init__(self, env: gym.Env, cfg: InferenceConfig): self.remote_agent = RemoteAgent( cfg.vlagents_host, cfg.vlagents_port, cfg.vlagents_model, cfg.on_same_machine, cfg.jpeg_encoding ) + self.frame_rate = SimpleFrameRate(RECORD_FPS) def obs_rcs2agents(self, obs: dict, info: dict | None = None) -> Obs: cameras = {} @@ -104,7 +113,7 @@ def obs_rcs2agents(self, obs: dict, info: dict | None = None) -> Obs: state.append(obs[robot]["joints"]) state.append(obs[robot]["gripper"]) - return Obs(cameras=None, gripper=None, info=info, state=np.concatenate(state)) + return Obs(cameras=cameras, gripper=None, info=info, state=np.concatenate(state)) def action_agents2rcs(self, action: Act) -> dict[str, Any]: act = {} @@ -112,7 +121,7 @@ def action_agents2rcs(self, action: Act) -> dict[str, Any]: # TODO: this is currently hard coded for franka joints act[robot] = {} act[robot]["joints"] = action.action[idx * 8 : idx * 8 + 7] - act[robot]["gripper"] = action.action[idx * 8 + 7] + act[robot]["gripper"] = action.action[idx * 8 + 7 : idx * 8 + 8] return act def loop(self): @@ -120,16 +129,18 @@ def loop(self): obs_dict = self.obs_rcs2agents(obs) - self.remote_agent.reset(obs_dict, instruction=self._cfg.instruction) + self.remote_agent.reset(copy.deepcopy(obs_dict), instruction=self._cfg.instruction) while True: - action = self.remote_agent.act(obs_dict) + action = self.remote_agent.act(copy.deepcopy(obs_dict)) if action.done: logger.info("done issued by agent, shutting down") break - obs, _, _, _, info = self.env.step(self.action_agents2rcs(action)) + a = self.action_agents2rcs(action) + obs, _, _, _, info = self.env.step(a) obs_dict = self.obs_rcs2agents(obs) + self.frame_rate() def get_env(): @@ -191,6 +202,12 @@ def get_env(): hw_cfg.max_relative_movement = 0.5 if CONTROL_MODE == ControlMode.JOINTS else (0.5, np.deg2rad(90)) hw_cfg.relative_to = RELATIVETO hw_cfg.robot_to_shared_base_frame = robot2world + hw_cfg.robot_cfgs["left"].ignore_realtime = True + hw_cfg.robot_cfgs["right"].ignore_realtime = True + hw_cfg.robot_cfgs["left"].speed_factor = 0.3 + hw_cfg.robot_cfgs["right"].speed_factor = 0.3 + hw_cfg.gripper_cfgs["left"].serial_number = ROBOTIQ_SERIAL["left"] + hw_cfg.gripper_cfgs["right"].serial_number = ROBOTIQ_SERIAL["right"] env_rel = env_creator.create_env(hw_cfg) else: # FR3 @@ -215,16 +232,16 @@ def main(): env_rel = get_env() env_rel.reset() - VIDEO_PATH.mkdir(parents=True, exist_ok=True) - timestamp = str(datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) + # Path(VIDEO_PATH).mkdir(parents=True, exist_ok=True) + # timestamp = str(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) - camera_set = env_rel.get_wrapper_attr("camera_set") - camera_set.record_video(VIDEO_PATH, timestamp) + # camera_set = env_rel.get_wrapper_attr("camera_set") + # camera_set.record_video(Path(VIDEO_PATH), timestamp) # env = RHCWrapper(env, exec_horizon=1) cfg = InferenceConfig( - IP, PORT, MODEL, INSTRUCTION, jpeg_encoding=True, on_same_machine=True, robot_keys=["left", "right"] + IP, PORT, MODEL, INSTRUCTION, jpeg_encoding=True, on_same_machine=False, robot_keys=["left", "right"] ) controller = ModelInference(env_rel, cfg) input("robot is about to be controlled by AI, press enter to start")