diff --git a/tools/env_setup/bash_utils.sh b/tools/env_setup/bash_utils.sh index 9b944c56..05a2da49 100644 --- a/tools/env_setup/bash_utils.sh +++ b/tools/env_setup/bash_utils.sh @@ -27,6 +27,11 @@ check_conda_env() { } check_nvidia_gpu() { + # skip check if building docker image + if [ "$BUILD_DOCKER_IMAGE" = "true" ]; then + return + fi + if ! nvidia-smi &> /dev/null; then echo "Error: NVIDIA GPU not found or driver not installed" exit 1 diff --git a/tools/env_setup/install_isaac.sh b/tools/env_setup/install_isaac.sh index 00e7783d..1ad18e24 100755 --- a/tools/env_setup/install_isaac.sh +++ b/tools/env_setup/install_isaac.sh @@ -85,4 +85,7 @@ echo "Installing IsaacLab ..." yes Yes | ./isaaclab.sh --install popd +# ---- Apply dependency package version patch ---- +pip install 'warp-lang==1.7.2' + echo "IsaacSim and dependencies installed successfully!" diff --git a/tools/install_deps.py b/tools/install_deps.py index 7d5a7432..2e566eeb 100644 --- a/tools/install_deps.py +++ b/tools/install_deps.py @@ -21,7 +21,7 @@ def install_dependencies(workflow_name: str = "robotic_ultrasound"): """Install project dependencies from requirements.txt""" - if workflow_name not in ["robotic_ultrasound", "robotic_surgery"]: + if workflow_name not in ["robotic_ultrasound", "robotic_surgery", "none"]: raise ValueError(f"Invalid workflow name: {workflow_name}") try: diff --git a/tools/run_all_tests.py b/tools/run_all_tests.py index 3f8bdea8..cc332469 100644 --- a/tools/run_all_tests.py +++ b/tools/run_all_tests.py @@ -146,6 +146,9 @@ def _run_test_process(cmd, env, test_path, timeout=1200): if return_code == 0: print(f"\nTEST PASSED in {elapsed_time} seconds") return True + elif return_code == -6: + print("\n The process crashes at shutdown because of native async code that does not finalize safely.") + return True else: print(f"\nTEST FAILED with return code {return_code} after {elapsed_time} seconds") @@ -280,26 +283,31 @@ def run_integration_tests(workflow_name, timeout=1200): print(f"Test timeout: {timeout} seconds ({timeout//60} minutes)") project_root = f"workflows/{workflow_name}" - default_license_file = os.path.join(os.getcwd(), project_root, "scripts", "dds", "rti_license.dat") - os.environ["RTI_LICENSE_FILE"] = os.environ.get("RTI_LICENSE_FILE", default_license_file) - all_tests_passed = True - tests_dir = os.path.join(project_root, "tests") - print(f"Looking for tests in {tests_dir}") - tests = get_tests(tests_dir, pattern="test_integration_*.py") - env = _setup_test_env(project_root, tests_dir) - - for test_path in tests: - cmd = [ - sys.executable, - "-m", - "unittest", - test_path, - ] - if "cosmos_transfer1" in test_path: - env = _setup_test_cosmos_transfer1_env(os.getcwd(), project_root, tests_dir) - - if not _run_test_process(cmd, env, test_path): - all_tests_passed = False + try: + default_license_file = os.path.join(os.getcwd(), project_root, "scripts", "dds", "rti_license.dat") + os.environ["RTI_LICENSE_FILE"] = os.environ.get("RTI_LICENSE_FILE", default_license_file) + all_tests_passed = True + tests_dir = os.path.join(project_root, "tests") + print(f"Looking for tests in {tests_dir}") + tests = get_tests(tests_dir, pattern="test_integration_*.py") + env = _setup_test_env(project_root, tests_dir) + + for test_path in tests: + cmd = [ + sys.executable, + "-m", + "unittest", + test_path, + ] + if "cosmos_transfer1" in test_path: + env = _setup_test_cosmos_transfer1_env(os.getcwd(), project_root, tests_dir) + + if not _run_test_process(cmd, env, test_path): + all_tests_passed = False + except Exception as e: + print(f"Error running integration tests: {e}") + print(traceback.format_exc()) + return 1 return 0 if all_tests_passed else 1 diff --git a/workflows/robotic_ultrasound/docker/Dockerfile b/workflows/robotic_ultrasound/docker/Dockerfile index 4d7f8bb6..1fc50250 100644 --- a/workflows/robotic_ultrasound/docker/Dockerfile +++ b/workflows/robotic_ultrasound/docker/Dockerfile @@ -15,104 +15,71 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM nvcr.io/nvidia/isaac-lab:2.1.0 +FROM nvidia/cuda:12.8.1-devel-ubuntu24.04 -# Apply patches to IsaacLab -COPY tools/env_setup/patches/events_random_texture.patch /tmp/ -COPY tools/env_setup/patches/from_files_semantic_tags.patch /tmp/ +SHELL ["/bin/bash", "-c"] -RUN cd /workspace/isaaclab && \ - patch -p1 < /tmp/events_random_texture.patch && \ - patch -p1 < /tmp/from_files_semantic_tags.patch && \ - rm /tmp/events_random_texture.patch /tmp/from_files_semantic_tags.patch - -WORKDIR /workspace - -# Fix livestream public endpoint address issue in 2.0.2/2.1.0 -RUN sed -i '/--\/app\/livestream\/publicEndpointAddress=/d' /workspace/isaaclab/source/isaaclab/isaaclab/app/app_launcher.py - -# Install uv using curl for openpi +# Install all packages in a single layer to avoid caching issues RUN apt-get update && \ - apt-get install -y software-properties-common && \ - add-apt-repository ppa:ubuntu-toolchain-r/test && \ - apt-get update && \ apt-get install -y \ - curl \ - openssh-client \ - cmake \ wget \ + curl \ + jq \ + vim \ + git \ + xvfb \ build-essential \ - pybind11-dev \ + cmake \ + vulkan-tools \ + unzip \ lsb-release \ libglib2.0-0 \ libdbus-1-3 \ libopengl0 \ libxcb-keysyms1 \ - libxcb-cursor0 \ - ninja-build \ - libgl1-mesa-dev \ - ffmpeg \ - gcc-12 \ - g++-12 && \ - update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100 && \ - update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100 && \ - mkdir -p ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts - -# Install CUDA 12.8 - -WORKDIR /tmp - -RUN apt-get update && \ - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ - dpkg -i cuda-keyring_1.1-1_all.deb && \ - apt-get update && \ - apt-get -y install cuda-toolkit-12-8 - -ENV PATH=/usr/local/cuda-12.8/bin${PATH:+:${PATH}} -ENV LD_LIBRARY_PATH=/usr/local/cuda-12.8/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} - -COPY tools/env_setup/install_lerobot.sh /tmp/env_setup/ -COPY tools/env_setup/install_pi0.sh /tmp/env_setup/ -COPY tools/env_setup/install_holoscan.sh /tmp/env_setup/ -COPY tools/env_setup/install_robotic_us_ext.sh /tmp/env_setup/ - -COPY workflows/robotic_ultrasound/scripts /workspace/robotic_ultrasound/scripts - -ENV PYTHON_EXECUTABLE=/workspace/isaaclab/_isaac_sim/python.sh - -# # Set up the Simulation + libglu1-mesa && \ + rm -rf /var/lib/apt/lists/* + +# Add github.com to list of known hosts for git clone with ssh +# use COPY command instead +RUN mkdir -p ~/.ssh && \ + curl --silent https://api.github.com/meta \ + | jq --raw-output '"github.com "+.ssh_keys[]' >> ~/.ssh/known_hosts +#RUN mkdir -p ~/.ssh +#COPY ./github /root/.ssh/known_hosts + +# all devices should be visible +ENV NVIDIA_VISIBLE_DEVICES=all +# set 'compute' driver cap to use Cuda +# set 'video' driver cap to use the video encoder +# set 'graphics' driver cap to use OpenGL/EGL +# set 'display' to allow use of virtual display +ENV NVIDIA_DRIVER_CAPABILITIES=graphics,video,compute,utility,display + +# Install Everything in conda environment + +WORKDIR /workspace/i4h-workflows + +COPY tools /workspace/i4h-workflows/tools +COPY tutorials /workspace/i4h-workflows/tutorials +COPY workflows /workspace/i4h-workflows/workflows +COPY holoscan_i4h /workspace/i4h-workflows/holoscan_i4h + +# Install miniconda3 and create robotic_ultrasound RUN --mount=type=ssh \ - $PYTHON_EXECUTABLE -m pip install --no-deps \ - git+ssh://git@github.com/isaac-for-healthcare/i4h-asset-catalog.git@v0.2.0rc1 && \ - $PYTHON_EXECUTABLE -m pip install \ - rti.connext==7.3.0 \ - pyrealsense2==2.55.1.6486 \ - toml==0.10.2 \ - dearpygui==2.0.0 \ - setuptools==75.8.0 \ - pydantic==2.10.6 - -RUN mkdir -p /workspace/third_party - -RUN /tmp/env_setup/install_robotic_us_ext.sh /workspace/robotic_ultrasound/scripts/simulation - -RUN /tmp/env_setup/install_lerobot.sh /workspace/third_party/lerobot - -RUN /tmp/env_setup/install_pi0.sh /workspace/third_party/openpi - -RUN /tmp/env_setup/install_holoscan.sh /workspace/robotic_ultrasound/scripts/holoscan_apps - -COPY tools/env_setup/install_cosmos_transfer1.sh /tmp/env_setup/ - -COPY tools/env_setup/install_cudnn.sh /tmp/env_setup/ - -RUN /tmp/env_setup/install_cudnn.sh + mkdir -p ~/miniconda3 && \ + wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh && \ + bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 && \ + rm ~/miniconda3/miniconda.sh && \ + source ~/miniconda3/bin/activate && \ + conda init --all && \ + conda create -n robotic_ultrasound python=3.10.14 -y RUN --mount=type=ssh \ - /tmp/env_setup/install_cosmos_transfer1.sh /workspace/third_party/cosmos-transfer1 - -WORKDIR /workspace/robotic_ultrasound/scripts - -ENV PYTHONPATH=/workspace/robotic_ultrasound/scripts:/workspace/third_party/cosmos-transfer1 + source ~/miniconda3/bin/activate && \ + conda activate robotic_ultrasound && \ + cd /workspace/i4h-workflows && \ + BUILD_DOCKER_IMAGE=true bash tools/env_setup_robot_us.sh +ENV PYTHONPATH=/workspace/i4h-workflows/workflows/robotic_ultrasound/scripts ENV RTI_LICENSE_FILE=/root/rti/rti_license.dat diff --git a/workflows/robotic_ultrasound/docker/README.md b/workflows/robotic_ultrasound/docker/README.md index cabd1d0b..b9e72a8d 100644 --- a/workflows/robotic_ultrasound/docker/README.md +++ b/workflows/robotic_ultrasound/docker/README.md @@ -14,7 +14,7 @@ To build the docker image, you will need to set up the SSH agent and add your SS export DOCKER_BUILDKIT=1 eval "$(ssh-agent -s)" ssh-add ~/.ssh/id_ed25519 # Replace with your SSH key -docker build --ssh default -f workflows/robotic_ultrasound/docker/Dockerfile -t robot_us:latest . +docker build --ssh default --no-cache -f workflows/robotic_ultrasound/docker/Dockerfile -t robot_us:latest . ``` ## Prepare the RTI License Locally @@ -23,14 +23,24 @@ Please refer to the [Environment Setup](../README.md#environment-setup) for inst The license file `rti_license.dat` should be saved in a directory in your host file system, (e.g. `~/docker/rti`), which can be mounted to the docker container. +## Prepare Ultrasound Raytracing Simulator (Optional) + +For ultrasound simulation capabilities, you'll need the `raysim` module. Please refer to the [Environment Setup - Install the raytracing ultrasound simulator](../README.md#install-the-raytracing-ultrasound-simulator) instructions to set up the raytracing ultrasound simulator locally. + +The `raysim` directory should be available on your host file system (e.g., `~/raysim`) to be mounted to the docker container. + ## Run the Container Since we need to run multiple instances (policy runner, simulation, etc.), we need to use `-d` to run the container in detached mode. ```bash xhost +local:docker -docker run --name isaac-sim --entrypoint bash -itd --runtime=nvidia --gpus all -e "ACCEPT_EULA=Y" --rm --network=host \ +docker run --name isaac-sim -itd --gpus all --rm --network=host \ + --runtime=nvidia \ + --entrypoint=bash \ -e DISPLAY=$DISPLAY \ + -e "OMNI_KIT_ACCEPT_EULA=Y" \ + -e "ACCEPT_EULA=Y" \ -e "PRIVACY_CONSENT=Y" \ -v /tmp/.X11-unix:/tmp/.X11-unix \ -v ~/docker/isaac-sim/cache/kit:/isaac-sim/kit/cache:rw \ @@ -46,20 +56,110 @@ docker run --name isaac-sim --entrypoint bash -itd --runtime=nvidia --gpus all - robot_us:latest ``` -### Run Policy +**Note:** The `:/workspace/i4h-workflows/workflows/robotic_ultrasound/scripts/raysim:ro` mount is also required for ultrasound raytracing simulation, If you haven't downloaded the raysim module following the [Environment Setup - Install the raytracing ultrasound simulator](../README.md#install-the-raytracing-ultrasound-simulator) instructions. If you don't need ultrasound simulation, you can omit this mount. + +## Running the Simulation + +### Run Policy (Background Process) + +First, start the policy runner in the background. This process will wait for simulation data and provide control commands. ```bash docker exec -it isaac-sim bash -# Inside the container, run the policy -python policy_runner/run_policy.py +# Inside the container +conda activate robotic_ultrasound +python workflows/robotic_ultrasound/scripts/policy_runner/run_policy.py ``` -The policy runner will be running in an environment managed by `uv` located in `/workspace/openpi/.venv`. +**Note:** The policy runner should be started first since it will continuously run and communicate with the simulation via DDS. ### Run Simulation +In a separate terminal session, start the main simulation: + ```bash docker exec -it isaac-sim bash # Inside the container, run the simulation -python simulation/environments/sim_with_dds.py --enable_camera --livestream 2 +conda activate robotic_ultrasound +python workflows/robotic_ultrasound/scripts/simulation/environments/sim_with_dds.py --enable_camera --livestream 2 +``` + +**Note:** This will launch the IsaacSim main window where you can visualize the robotic ultrasound simulation in real-time. The `--livestream 2` parameter enables WebRTC streaming for remote viewing. + +### Check the WebRTC Streaming + +You can open the WebRTC streaming client to check the streaming status now after the simulation has connected to the policy runner. + +### Ultrasound Raytracing Simulation (Optional) + +For realistic ultrasound image generation, you can run the ultrasound raytracing simulator: + +```bash +docker exec -it isaac-sim bash +# Inside the container, run the ultrasound raytracing simulator +python workflows/robotic_ultrasound/scripts/simulation/examples/ultrasound_raytracing.py +``` + +This will generate and stream ultrasound images via DDS communication. + +### Visualization Utility (Optional) + +To visualize the ultrasound images and other sensor data, you can use the visualization utility: + +```bash +docker exec -it isaac-sim bash +# Inside the container, run the visualization utility +python workflows/robotic_ultrasound/scripts/simulation/utils/visualization.py +``` + +This utility will display real-time ultrasound images and other sensor data streams. + +## Troubleshooting + +### GPU Device Errors + +- **"Failed to create any GPU devices" or "omni.gpu_foundation_factory.plugin" errors**: This indicates GPU device access issues. Try these fixes in order: + + **Verify NVIDIA drivers and container toolkit installation**: + ```bash + # Check NVIDIA driver + nvidia-smi + + # Check Docker can access GPU + docker run --rm --gpus all --runtime=nvidia nvidia/cuda:12.8.1-devel-ubuntu24.04 nvidia-smi + ``` + If the `--runtime=nvidia` is not working, you can try to configure Docker daemon for NVIDIA runtime. The file should contain the following content: + ```json + { + "default-runtime": "nvidia", + "runtimes": { + "nvidia": { + "path": "nvidia-container-runtime", + "runtimeArgs": [] + } + } + } + ``` + +- **Policy not responding**: Ensure the policy runner is started before the simulation and is running in the background + +- **No ultrasound images**: Verify that the `raysim` directory is properly mounted and the ultrasound raytracing simulator is running + +- **Display issues**: Make sure `xhost +local:docker` was run before starting the container and the terminal shouldn't be running in a headless mode (e.g. in ssh connection without `-X` option) + +- **Missing assets**: Verify that the I4H assets and RTI license are properly mounted and accessible + +### Verification Commands + +After applying fixes, test with these commands: + +```bash +# Test basic GPU access +docker run --rm --gpus all nvidia/cuda:12.0-base-ubuntu20.04 nvidia-smi + +# Test Vulkan support +docker run --rm --gpus all -v /tmp/.X11-unix:/tmp/.X11-unix -e DISPLAY=$DISPLAY robot_us:latest vulkaninfo + +# Test OpenGL support +docker run --rm --gpus all -v /tmp/.X11-unix:/tmp/.X11-unix -e DISPLAY=$DISPLAY robot_us:latest glxinfo | head -20 ``` diff --git a/workflows/robotic_ultrasound/scripts/simulation/environments/state_machine/replay_recording.py b/workflows/robotic_ultrasound/scripts/simulation/environments/state_machine/replay_recording.py index f0c3cf85..174328a8 100644 --- a/workflows/robotic_ultrasound/scripts/simulation/environments/state_machine/replay_recording.py +++ b/workflows/robotic_ultrasound/scripts/simulation/environments/state_machine/replay_recording.py @@ -42,14 +42,17 @@ app_launcher = AppLauncher(args_cli) simulation_app = app_launcher.app - +# isort: off import gymnasium as gym import torch from isaaclab_tasks.utils.parse_cfg import parse_env_cfg + # Import extensions to set up environment tasks from robotic_us_ext import tasks # noqa: F401 from simulation.environments.state_machine.utils import reset_scene_to_initial_state, validate_hdf5_path +# isort: on + def main(): """Main function."""