RobotControlStack · juelg · May 5, 2026 · May 5, 2026 · May 5, 2026 · May 5, 2026
diff --git a/.dockerignore b/.dockerignore
@@ -1 +1,3 @@
 **/build/
+real*
+test*
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -53,7 +53,10 @@ RUN chmod +x /usr/local/bin/link-editable-source \
     && uv pip install --no-build-isolation /opt/rcs-src/extensions/rcs_fr3 \
     && uv pip install /opt/rcs-src/extensions/rcs_realsense \
     && uv pip install /opt/rcs-src/extensions/rcs_robotiq2f85 \
-    && uv pip install /opt/rcs-src/extensions/rcs_zed
+    && uv pip install /opt/rcs-src/extensions/rcs_zed \
+    && uv pip install /opt/rcs-src/examples/teleop/SimPublisher \
+    && uv pip install /opt/rcs-src/2f85-python-driver \
+    && uv pip install /opt/rcs-src/vlagents
 
 WORKDIR /workspace/robot-control-stack
 

diff --git a/docker/README.md b/docker/README.md
@@ -23,6 +23,7 @@ Notes:
 - `~/zed_models` is mounted into `/usr/local/zed/resources` to match the direct `docker run` setup.
 - `/dev/dri` is masked inside the container so host Mesa/AMD render nodes do not override the NVIDIA runtime devices.
 - NVIDIA PRIME/GLX environment variables are exported to bias OpenGL/EGL selection toward the NVIDIA stack when using X11 forwarding.
-- Python source changes are picked up from the mounted repo, including `extensions/rcs_zed`.
+- The simulator still bootstraps EGL for offscreen MuJoCo camera rendering; set `RCS_MUJOCO_DISABLE_EGL=1` only if you intentionally want to disable that path.
+- Python source changes are picked up from the mounted repo.
 - If you change C++ code in `rcs` or `rcs_fr3`, rebuild the image.
 - For non-GPU hosts, comment out the GPU-related lines in `docker/compose/dev.yml`.
diff --git a/docker/link-editable-source.sh b/docker/link-editable-source.sh
@@ -10,26 +10,21 @@ fi
 
 SITE_PACKAGES="$(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')"
 
-link_mixed_package() {
+link_compiled_package() {
     src_dir="$1"
     dst_dir="$2"
-    keep_dir_name="${3:-}"
 
     if [ ! -d "$src_dir" ] || [ ! -d "$dst_dir" ]; then
         return
     fi
 
-    # Replace only the Python sources from the mounted repo and keep compiled
-    # artifacts that were installed into site-packages during image build.
-    for path in "$src_dir"/* "$src_dir"/.[!.]* "$src_dir"/..?*; do
-        [ -e "$path" ] || continue
-        name="$(basename "$path")"
-        if [ -n "$keep_dir_name" ] && [ "$name" = "$keep_dir_name" ]; then
-            continue
-        fi
-        rm -rf "$dst_dir/$name"
-        cp -as "$path" "$dst_dir/$name"
-    done
+    tmp_keep="$(mktemp -d)"
+    find "$dst_dir" -maxdepth 1 \( -name '_core*.so' -o -name 'lib*.so*' \) -exec mv {} "$tmp_keep/" \;
+    rm -rf "$dst_dir"
+    mkdir -p "$dst_dir"
+    cp -as "$src_dir/." "$dst_dir/"
+    find "$tmp_keep" -maxdepth 1 -type f -exec mv {} "$dst_dir/" \;
+    rmdir "$tmp_keep"
 }
 
 link_pure_python_package() {
@@ -44,8 +39,9 @@ link_pure_python_package() {
     ln -s "$src_dir" "$dst_dir"
 }
 
-link_mixed_package "$REPO_ROOT/python/rcs" "$SITE_PACKAGES/rcs" "_core"
-link_mixed_package "$REPO_ROOT/extensions/rcs_fr3/src/rcs_fr3" "$SITE_PACKAGES/rcs_fr3" "_core"
+link_compiled_package "$REPO_ROOT/python/rcs" "$SITE_PACKAGES/rcs"
+link_compiled_package "$REPO_ROOT/extensions/rcs_fr3/src/rcs_fr3" "$SITE_PACKAGES/rcs_fr3"
 link_pure_python_package "$REPO_ROOT/extensions/rcs_realsense/src/rcs_realsense" "$SITE_PACKAGES/rcs_realsense"
 link_pure_python_package "$REPO_ROOT/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85" "$SITE_PACKAGES/rcs_robotiq2f85"
 link_pure_python_package "$REPO_ROOT/extensions/rcs_zed/src/rcs_zed" "$SITE_PACKAGES/rcs_zed"
+link_pure_python_package "$REPO_ROOT/vlagents" "$SITE_PACKAGES/vlagents"
diff --git a/examples/inference/README.md b/examples/inference/README.md
diff --git a/examples/inference/franka.py b/examples/inference/franka.py
@@ -0,0 +1,253 @@
+import copy
+import datetime
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from time import sleep
+from typing import Any
+
+from rcs.utils import SimpleFrameRate
+
+import gymnasium as gym
+import numpy as np
+from rcs._core.common import BaseCameraConfig, RobotPlatform
+from rcs._core.sim import SimConfig
+from rcs.envs.base import ControlMode, RelativeTo
+from rcs.envs.configs import EmptyWorldFR3Duo
+from rcs.envs.tasks import PickTaskConfig
+
+import rcs
+from vlagents.client import RemoteAgent
+from vlagents.policies import Act, Obs
+
+logger = logging.getLogger(__name__)
+
+
+ROBOT2IP = {
+    "right": "192.168.102.1",
+    "left": "192.168.101.1",
+}
+ROBOT2ID = {
+    "left": "1",
+    "right": "0",
+}
+
+
+# ROBOT_INSTANCE = RobotPlatform.SIMULATION
+ROBOT_INSTANCE = RobotPlatform.HARDWARE
+
+# set camera dict to none disable cameras
+CAMERA_DICT = {
+    "left_wrist": "230422272017",
+    "right_wrist": "230422271040",
+    # "side": "243522070385",
+    # "bird_eye": "243522070364",
+}
+# CAMERA_DICT = None
+ZED_CAMERA_DICT = {
+    "head": "19928076",
+}
+INCLUDE_DEPTH = False
+
+ROBOTIQ_SERIAL = {
+    "left": "DAAQMPDC",
+    "right": "DAAQMJHX",
+}
+
+# DIGIT_DICT = {
+#     "digit_right_left": "D21182",
+#     "digit_right_right": "D21193"
+# }
+DIGIT_DICT = None
+
+
+INSTRUCTION = "pick up the black cube with the right arm and place it into the black bowl; pick up the white cube with the left arm and place it into the white bowl"
+RECORD_FPS = 30
+CONTROL_MODE = ControlMode.JOINTS
+RELATIVETO = RelativeTo.NONE
+DEBUG = True
+VIDEO_PATH = "videos"
+MODEL = "lerobot"
+IP = "172.29.5.16"
+PORT = 20000
+
+
+robot2world = {
+    "right": rcs.common.Pose(
+        translation=np.array([0, 0, 0]), rpy_vector=np.array([0.89360858, -0.17453293, 0.46425758])
+    ),
+    "left": rcs.common.Pose(
+        translation=np.array([0, 0, 0]), rpy_vector=np.array([-0.89360858, -0.17453293, -0.46425758])
+    ),
+}
+
+
+@dataclass
+class InferenceConfig:
+    vlagents_host: str
+    vlagents_port: int
+    vlagents_model: str
+    instruction: str
+    robot_keys: list[str]
+    jpeg_encoding: bool = True
+    on_same_machine: bool = False
+
+
+class ModelInference:
+    def __init__(self, env: gym.Env, cfg: InferenceConfig):
+        self.env = env
+        self.gripper_state = 1
+        self._cfg = cfg
+        self.remote_agent = RemoteAgent(
+            cfg.vlagents_host, cfg.vlagents_port, cfg.vlagents_model, cfg.on_same_machine, cfg.jpeg_encoding
+        )
+        self.frame_rate = SimpleFrameRate(RECORD_FPS)
+
+    def obs_rcs2agents(self, obs: dict, info: dict | None = None) -> Obs:
+        cameras = {}
+        for frame in obs["frames"]:
+            cameras[frame] = obs["frames"][frame]["rgb"]["data"]
+        state = []
+        for robot in self._cfg.robot_keys:
+            # TODO: currently hardcoded for joints
+            state.append(obs[robot]["joints"])
+            state.append(obs[robot]["gripper"])
+
+        return Obs(cameras=cameras, gripper=None, info=info, state=np.concatenate(state))
+
+    def action_agents2rcs(self, action: Act) -> dict[str, Any]:
+        act = {}
+        for idx, robot in enumerate(self._cfg.robot_keys):
+            # TODO: this is currently hard coded for franka joints
+            act[robot] = {}
+            act[robot]["joints"] = action.action[idx * 8 : idx * 8 + 7]
+            act[robot]["gripper"] = action.action[idx * 8 + 7 : idx * 8 + 8]
+        return act
+
+    def loop(self):
+        obs, _ = self.env.reset()
+
+        obs_dict = self.obs_rcs2agents(obs)
+
+        self.remote_agent.reset(copy.deepcopy(obs_dict), instruction=self._cfg.instruction)
+
+        while True:
+            action = self.remote_agent.act(copy.deepcopy(obs_dict))
+            if action.done:
+                logger.info("done issued by agent, shutting down")
+                break
+            a = self.action_agents2rcs(action)
+            obs, _, _, _, info = self.env.step(a)
+
+            obs_dict = self.obs_rcs2agents(obs)
+            self.frame_rate()
+
+
+def get_env():
+    if ROBOT_INSTANCE == RobotPlatform.HARDWARE:
+        from rcs_fr3.configs import FrankaDuoEnv
+        from rcs_fr3.creators import HardwareCameraCreatorConfig
+
+        env_creator = FrankaDuoEnv()
+        env_creator.left_ip = ROBOT2IP["left"]
+        env_creator.right_ip = ROBOT2IP["right"]
+        hw_cfg = env_creator.config()
+        camera_cfgs: dict[str, HardwareCameraCreatorConfig] = {}
+        if CAMERA_DICT is not None:
+            camera_cfgs["realsense"] = HardwareCameraCreatorConfig(
+                camera_type_id="realsense",
+                camera_cfgs={
+                    name: BaseCameraConfig(
+                        identifier=identifier,
+                        resolution_width=1280,
+                        resolution_height=720,
+                        frame_rate=30,
+                    )
+                    for name, identifier in CAMERA_DICT.items()
+                },
+            )
+        if ZED_CAMERA_DICT is not None:
+            camera_cfgs["zed"] = HardwareCameraCreatorConfig(
+                camera_type_id="zed",
+                camera_cfgs={
+                    name: BaseCameraConfig(
+                        identifier=identifier,
+                        resolution_width=1280,
+                        resolution_height=720,
+                        frame_rate=30,
+                    )
+                    for name, identifier in ZED_CAMERA_DICT.items()
+                },
+                kwargs={
+                    "enable_depth": False,
+                    "enable_imu": False,
+                },
+            )
+        if DIGIT_DICT is not None:
+            camera_cfgs["digit"] = HardwareCameraCreatorConfig(
+                camera_type_id="digit",
+                camera_cfgs={
+                    name: BaseCameraConfig(
+                        identifier=identifier,
+                        resolution_width=320,
+                        resolution_height=240,
+                        frame_rate=30,
+                    )
+                    for name, identifier in DIGIT_DICT.items()
+                },
+            )
+        hw_cfg.camera_cfgs = camera_cfgs or None
+        hw_cfg.control_mode = CONTROL_MODE
+        hw_cfg.wrapper_cfg.include_depth = INCLUDE_DEPTH
+        hw_cfg.max_relative_movement = 0.5 if CONTROL_MODE == ControlMode.JOINTS else (0.5, np.deg2rad(90))
+        hw_cfg.relative_to = RELATIVETO
+        hw_cfg.robot_to_shared_base_frame = robot2world
+        hw_cfg.robot_cfgs["left"].ignore_realtime = True
+        hw_cfg.robot_cfgs["right"].ignore_realtime = True
+        hw_cfg.robot_cfgs["left"].speed_factor = 0.3
+        hw_cfg.robot_cfgs["right"].speed_factor = 0.3
+        hw_cfg.gripper_cfgs["left"].serial_number = ROBOTIQ_SERIAL["left"]
+        hw_cfg.gripper_cfgs["right"].serial_number = ROBOTIQ_SERIAL["right"]
+        env_rel = env_creator.create_env(hw_cfg)
+    else:
+        # FR3
+
+        scene = EmptyWorldFR3Duo()
+        sim_cfg_data = scene.config()
+        sim_cfg_data.sim_cfg = SimConfig(
+            async_control=True, realtime=True, frequency=RECORD_FPS, max_convergence_steps=500
+        )
+        sim_cfg_data.relative_to = RelativeTo.CONFIGURED_ORIGIN
+        sim_cfg_data.wrapper_cfg.include_depth = INCLUDE_DEPTH
+        if sim_cfg_data.root_frame_objects is None:
+            sim_cfg_data.root_frame_objects = {}
+        sim_cfg_data.task_cfg = PickTaskConfig(robot_name="right")
+
+        env_rel = scene.create_env(sim_cfg_data)
+
+    return env_rel
+
+
+def main():
+    env_rel = get_env()
+    env_rel.reset()
+
+    # Path(VIDEO_PATH).mkdir(parents=True, exist_ok=True)
+    # timestamp = str(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
+
+    # camera_set = env_rel.get_wrapper_attr("camera_set")
+    # camera_set.record_video(Path(VIDEO_PATH), timestamp)
+
+    # env = RHCWrapper(env, exec_horizon=1)
+
+    cfg = InferenceConfig(
+        IP, PORT, MODEL, INSTRUCTION, jpeg_encoding=True, on_same_machine=False, robot_keys=["left", "right"]
+    )
+    controller = ModelInference(env_rel, cfg)
+    input("robot is about to be controlled by AI, press enter to start")
+    with env_rel:
+        controller.loop()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/inference/requirements.txt b/examples/inference/requirements.txt
@@ -0,0 +1 @@
+vlagents @ git+https://github.com/RobotControlStack/vlagents.git@lerobot
diff --git a/examples/teleop/franka.py b/examples/teleop/franka.py
@@ -26,22 +26,26 @@
     "right": "0",
 }
 
-
-ROBOT_INSTANCE = RobotPlatform.SIMULATION
-# ROBOT_INSTANCE = RobotPlatform.HARDWARE
+# use `udevadm info -a -n /dev/ttyUSB0 | grep serial`
+# to find out the serial numbers
+ROBOTIQ_SERIAL = {
+    "left": "DAAQMPDC",
+    "right": "DAAQMJHX",
+}
 
 RECORD_FPS = 30
 # set camera dict to none disable cameras
-# CAMERA_DICT = {
-#     "left_wrist": "230422272017",
-#     "right_wrist": "230422271040",
-#     "side": "243522070385",
-#     "bird_eye": "243522070364",
-# }
-CAMERA_DICT = None
+CAMERA_DICT = {
+    "right_wrist": "230422272017",
+    "left_wrist": "230422271040",
+    # "side": "243522070385",
+    # "bird_eye": "243522070364",
+}
+# CAMERA_DICT = None
 ZED_CAMERA_DICT = {
-    "zed": "19928076",
+    "head": "19928076",
 }
+# ZED_CAMERA_DICT = None
 MQ3_ADDR = "10.42.0.1"
 INCLUDE_DEPTH = False
 
@@ -117,8 +121,9 @@ def get_env():
                     for name, identifier in ZED_CAMERA_DICT.items()
                 },
                 kwargs={
-                    "enable_depth": False,
+                    "enable_depth": INCLUDE_DEPTH,
                     "enable_imu": False,
+                    "include_right": True,
                 },
             )
         if DIGIT_DICT is not None:
@@ -137,11 +142,19 @@ def get_env():
         hw_cfg.camera_cfgs = camera_cfgs or None
         hw_cfg.control_mode = config.operator_class.control_mode[0]
         hw_cfg.wrapper_cfg.include_depth = INCLUDE_DEPTH
+        hw_cfg.wrapper_cfg.binary_gripper = False
+        # hw_cfg.wrapper_cfg.binary_gripper = True
         hw_cfg.max_relative_movement = (
             0.5 if config.operator_class.control_mode[0] == ControlMode.JOINTS else (0.5, np.deg2rad(90))
         )
         hw_cfg.relative_to = config.operator_class.control_mode[1]
         hw_cfg.robot_to_shared_base_frame = robot2world
+        hw_cfg.robot_cfgs["left"].ignore_realtime = True
+        hw_cfg.robot_cfgs["right"].ignore_realtime = True
+        hw_cfg.robot_cfgs["left"].speed_factor = 0.3
+        hw_cfg.robot_cfgs["right"].speed_factor = 0.3
+        hw_cfg.gripper_cfgs["left"].serial_number = ROBOTIQ_SERIAL["left"]
+        hw_cfg.gripper_cfgs["right"].serial_number = ROBOTIQ_SERIAL["right"]
         env_rel = env_creator.create_env(hw_cfg)
         operator = GelloOperator(config) if isinstance(config, GelloConfig) else QuestOperator(config)
     else:
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		vlagents @ git+https://github.com/RobotControlStack/vlagents.git@lerobot